├── .github
├── CODEOWNERS
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── documentation-request.md
│ ├── feature_request.md
│ └── submit-question.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── action-helper
│ ├── Dockerfile
│ ├── action.yml
│ ├── entrypoint.sh
│ └── python
│ │ ├── auto-merge
│ │ ├── cleanup-bot-branch
│ │ ├── submodule-sync
│ │ └── utils.py
│ ├── add-to-project.yml
│ ├── auto-merge.yml
│ ├── blossom-ci.yml
│ ├── clang-format.yml
│ ├── license-header-check.yml
│ └── signoff-check.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── SECURITY.md
├── build
├── apply-patches
├── build-in-docker
├── build-info
├── buildcpp.sh
├── run-in-docker
├── sanitizer-java
│ └── bin
│ │ └── java
├── submodule-check
├── unapply-patches
└── win
│ ├── create-wsl2.ps1
│ └── setup-wsl2.sh
├── ci
├── Dockerfile
├── Jenkinsfile.premerge
├── check-cuda-dependencies.sh
├── deploy.sh
├── fuzz-test.sh
├── nightly-build.sh
├── premerge-build.sh
├── settings.xml
└── submodule-sync.sh
├── docs
├── img
│ └── memory_state_machine.png
└── memory_management.md
├── patches
└── noop.patch
├── pom.xml
├── src
├── main
│ ├── cpp
│ │ ├── CMakeLists.txt
│ │ ├── benchmarks
│ │ │ ├── CMakeLists.txt
│ │ │ ├── bloom_filter.cu
│ │ │ ├── cast_long_to_binary_string.cpp
│ │ │ ├── cast_string_to_float.cpp
│ │ │ ├── common
│ │ │ │ ├── generate_input.cu
│ │ │ │ ├── generate_input.hpp
│ │ │ │ └── random_distribution_factory.cuh
│ │ │ ├── get_json_object.cu
│ │ │ ├── parse_uri.cpp
│ │ │ └── row_conversion.cpp
│ │ ├── cmake
│ │ │ ├── get_flatbuffers.cmake
│ │ │ └── get_spdlog.cmake
│ │ ├── faultinj
│ │ │ ├── CMakeLists.txt
│ │ │ ├── README.md
│ │ │ └── faultinj.cu
│ │ ├── profiler
│ │ │ ├── CMakeLists.txt
│ │ │ ├── ProfilerJni.cpp
│ │ │ ├── profiler_debug.cpp
│ │ │ ├── profiler_debug.hpp
│ │ │ ├── profiler_schema.cpp.in
│ │ │ ├── profiler_serializer.cpp
│ │ │ ├── profiler_serializer.hpp
│ │ │ └── spark_rapids_profile_converter.cpp
│ │ ├── src
│ │ │ ├── BloomFilterJni.cpp
│ │ │ ├── CaseWhenJni.cpp
│ │ │ ├── CastStringJni.cpp
│ │ │ ├── DateTimeUtilsJni.cpp
│ │ │ ├── DecimalUtilsJni.cpp
│ │ │ ├── GpuTimeZoneDBJni.cpp
│ │ │ ├── HashJni.cpp
│ │ │ ├── HistogramJni.cpp
│ │ │ ├── HostTableJni.cpp
│ │ │ ├── HyperLogLogPlusPlusHostUDFJni.cpp
│ │ │ ├── JSONUtilsJni.cpp
│ │ │ ├── KudoGpuSerializerJni.cpp
│ │ │ ├── ListSliceJni.cpp
│ │ │ ├── MapJni.cpp
│ │ │ ├── NativeParquetJni.cpp
│ │ │ ├── NumberConverterJni.cpp
│ │ │ ├── ParseURIJni.cpp
│ │ │ ├── RegexRewriteUtilsJni.cpp
│ │ │ ├── RowConversionJni.cpp
│ │ │ ├── SparkResourceAdaptorJni.cpp
│ │ │ ├── SubStringIndexJni.cpp
│ │ │ ├── TaskPriorityJni.cpp
│ │ │ ├── ZOrderJni.cpp
│ │ │ ├── bloom_filter.cu
│ │ │ ├── bloom_filter.hpp
│ │ │ ├── case_when.cu
│ │ │ ├── case_when.hpp
│ │ │ ├── cast_decimal_to_string.cu
│ │ │ ├── cast_float_to_string.cu
│ │ │ ├── cast_long_to_binary_string.cu
│ │ │ ├── cast_string.cu
│ │ │ ├── cast_string.hpp
│ │ │ ├── cast_string_to_datetime.cu
│ │ │ ├── cast_string_to_float.cu
│ │ │ ├── cast_string_to_timestamp_common.hpp
│ │ │ ├── datetime_rebase.cu
│ │ │ ├── datetime_truncate.cu
│ │ │ ├── datetime_utils.cuh
│ │ │ ├── datetime_utils.hpp
│ │ │ ├── decimal_utils.cu
│ │ │ ├── decimal_utils.hpp
│ │ │ ├── emptyfile.cpp
│ │ │ ├── format_float.cu
│ │ │ ├── from_json_to_raw_map.cu
│ │ │ ├── from_json_to_raw_map_debug.cuh
│ │ │ ├── from_json_to_structs.cu
│ │ │ ├── ftos_converter.cuh
│ │ │ ├── get_json_object.cu
│ │ │ ├── get_json_object.hpp
│ │ │ ├── hash.cuh
│ │ │ ├── hash.hpp
│ │ │ ├── histogram.cu
│ │ │ ├── histogram.hpp
│ │ │ ├── hive_hash.cu
│ │ │ ├── host_table_view.hpp
│ │ │ ├── hyper_log_log_plus_plus.cu
│ │ │ ├── hyper_log_log_plus_plus.hpp
│ │ │ ├── hyper_log_log_plus_plus_const.hpp
│ │ │ ├── hyper_log_log_plus_plus_host_udf.cu
│ │ │ ├── hyper_log_log_plus_plus_host_udf.hpp
│ │ │ ├── json_parser.cuh
│ │ │ ├── json_utils.cu
│ │ │ ├── json_utils.hpp
│ │ │ ├── list_slice.cu
│ │ │ ├── list_slice.hpp
│ │ │ ├── map.cu
│ │ │ ├── map.hpp
│ │ │ ├── murmur_hash.cu
│ │ │ ├── murmur_hash.cuh
│ │ │ ├── number_converter.cu
│ │ │ ├── number_converter.hpp
│ │ │ ├── parse_uri.cu
│ │ │ ├── parse_uri.hpp
│ │ │ ├── regex_rewrite_utils.cu
│ │ │ ├── regex_rewrite_utils.hpp
│ │ │ ├── row_conversion.cu
│ │ │ ├── row_conversion.hpp
│ │ │ ├── shuffle_assemble.cu
│ │ │ ├── shuffle_split.cu
│ │ │ ├── shuffle_split.hpp
│ │ │ ├── shuffle_split_detail.hpp
│ │ │ ├── spark_rapids_jni_version.cpp.in
│ │ │ ├── spark_rapids_jni_version.h
│ │ │ ├── substring_index.cu
│ │ │ ├── substring_index.hpp
│ │ │ ├── task_priority.hpp
│ │ │ ├── timezones.cu
│ │ │ ├── timezones.hpp
│ │ │ ├── utilities.cu
│ │ │ ├── utilities.hpp
│ │ │ ├── version.hpp
│ │ │ ├── xxhash64.cu
│ │ │ ├── zorder.cu
│ │ │ └── zorder.hpp
│ │ └── tests
│ │ │ ├── CMakeLists.txt
│ │ │ ├── bloom_filter.cu
│ │ │ ├── cast_decimal_to_string.cpp
│ │ │ ├── cast_float_to_string.cpp
│ │ │ ├── cast_long_to_binary_string.cpp
│ │ │ ├── cast_string.cpp
│ │ │ ├── datetime_rebase.cpp
│ │ │ ├── format_float.cpp
│ │ │ ├── hash.cpp
│ │ │ ├── hyper_log_log_plus_plus.cu
│ │ │ ├── list_slice.cpp
│ │ │ ├── number_converter.cpp
│ │ │ ├── parse_uri.cpp
│ │ │ ├── row_conversion.cpp
│ │ │ ├── shuffle_split.cu
│ │ │ ├── substring_index.cpp
│ │ │ ├── test_utilities.hpp
│ │ │ ├── timezones.cpp
│ │ │ └── utilities.cpp
│ ├── fbs
│ │ └── profiler.fbs
│ └── java
│ │ └── com
│ │ └── nvidia
│ │ └── spark
│ │ └── rapids
│ │ └── jni
│ │ ├── Arms.java
│ │ ├── BloomFilter.java
│ │ ├── CaseWhen.java
│ │ ├── CastException.java
│ │ ├── CastStrings.java
│ │ ├── CpuRetryOOM.java
│ │ ├── CpuSplitAndRetryOOM.java
│ │ ├── DateTimeRebase.java
│ │ ├── DateTimeUtils.java
│ │ ├── DecimalUtils.java
│ │ ├── GpuListSliceUtils.java
│ │ ├── GpuOOM.java
│ │ ├── GpuRetryOOM.java
│ │ ├── GpuSplitAndRetryOOM.java
│ │ ├── GpuSubstringIndexUtils.java
│ │ ├── GpuTimeZoneDB.java
│ │ ├── Hash.java
│ │ ├── Histogram.java
│ │ ├── HostTable.java
│ │ ├── HyperLogLogPlusPlusHostUDF.java
│ │ ├── JSONUtils.java
│ │ ├── Map.java
│ │ ├── NumberConverter.java
│ │ ├── OffHeapOOM.java
│ │ ├── Pair.java
│ │ ├── ParquetFooter.java
│ │ ├── ParseURI.java
│ │ ├── Preconditions.java
│ │ ├── Profiler.java
│ │ ├── RegexRewriteUtils.java
│ │ ├── RmmSpark.java
│ │ ├── RmmSparkThreadState.java
│ │ ├── RowConversion.java
│ │ ├── SparkPlatformType.java
│ │ ├── SparkResourceAdaptor.java
│ │ ├── TaskPriority.java
│ │ ├── ThreadStateRegistry.java
│ │ ├── Version.java
│ │ ├── ZOrder.java
│ │ ├── kudo
│ │ ├── ByteArrayOutputStreamWriter.java
│ │ ├── ColumnOffsetInfo.java
│ │ ├── ColumnViewInfo.java
│ │ ├── DataOutputStreamWriter.java
│ │ ├── DataWriter.java
│ │ ├── DumpOption.java
│ │ ├── KudoGpuSerializer.java
│ │ ├── KudoHostMergeResult.java
│ │ ├── KudoSerializer.java
│ │ ├── KudoTable.java
│ │ ├── KudoTableHeader.java
│ │ ├── KudoTableHeaderCalc.java
│ │ ├── KudoTableMerger.java
│ │ ├── MergeMetrics.java
│ │ ├── MergeOptions.java
│ │ ├── MergedInfoCalc.java
│ │ ├── OpenByteArrayOutputStream.java
│ │ ├── OpenByteArrayOutputStreamWriter.java
│ │ ├── SliceInfo.java
│ │ ├── SlicedBufferSerializer.java
│ │ ├── SlicedValidityBufferInfo.java
│ │ ├── TableBuilder.java
│ │ ├── WriteInput.java
│ │ └── WriteMetrics.java
│ │ └── schema
│ │ ├── HostColumnsVisitor.java
│ │ ├── SchemaVisitor.java
│ │ ├── SimpleSchemaVisitor.java
│ │ └── Visitors.java
└── test
│ ├── cpp
│ └── faultinj
│ │ └── test_faultinj.json
│ └── java
│ └── com
│ └── nvidia
│ └── spark
│ └── rapids
│ └── jni
│ ├── BloomFilterTest.java
│ ├── CaseWhenTest.java
│ ├── CastStringsTest.java
│ ├── DateTimeUtilsTest.java
│ ├── DecimalUtilsTest.java
│ ├── FromJsonToRawMapTest.java
│ ├── GetJsonObjectTest.java
│ ├── GpuListSliceUtilsTest.java
│ ├── GpuSubstringIndexUtilsTest.java
│ ├── HashTest.java
│ ├── HilbertIndexTest.java
│ ├── HistogramTest.java
│ ├── HostTableTest.java
│ ├── InterleaveBitsTest.java
│ ├── LimitingOffHeapAllocForTests.java
│ ├── MapTest.java
│ ├── NumberConverterTest.java
│ ├── ParseURITest.java
│ ├── RegexRewriteUtilsTest.java
│ ├── RmmSparkMonteCarlo.java
│ ├── RmmSparkTest.java
│ ├── RowConversionTest.java
│ ├── TimeZoneTest.java
│ └── kudo
│ ├── KudoConcatValidityTest.java
│ ├── KudoGpuSerializerTest.java
│ └── KudoSerializerTest.java
└── thirdparty
└── cudf-pins
├── add_dependency_pins.cmake
├── rapids-cmake.sha
├── setup.cmake
└── versions.json
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | # Build-related
18 | /.github/ @NVIDIA/sparkrapids-cicd-codeowners
19 | /build/ @NVIDIA/sparkrapids-cicd-codeowners
20 | /ci/ @NVIDIA/sparkrapids-cicd-codeowners
21 | /patches/ @NVIDIA/sparkrapids-cicd-codeowners
22 | pom.xml @NVIDIA/sparkrapids-cicd-codeowners
23 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a bug report to help us improve the RAPIDS Accelerator JNI for Apache Spark
4 | title: "[BUG]"
5 | labels: "? - Needs Triage, bug"
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **Steps/Code to reproduce bug**
14 | Please provide a list of steps or a code sample to reproduce the issue.
15 | Avoid posting private or sensitive data.
16 |
17 | **Expected behavior**
18 | A clear and concise description of what you expected to happen.
19 |
20 | **Environment details (please complete the following information)**
21 | - Environment location: [Standalone, YARN, Kubernetes, Cloud(specify cloud provider)]
22 | - Spark configuration settings related to the issue
23 |
24 | **Additional context**
25 | Add any other context about the problem here.
26 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Documentation request
3 | about: Report incorrect or needed documentation
4 | title: "[DOC]"
5 | labels: "? - Needs Triage, documentation"
6 | assignees: ''
7 |
8 | ---
9 |
10 | ## Report incorrect documentation
11 |
12 | **Location of incorrect documentation**
13 | Provide links and line numbers if applicable.
14 |
15 | **Describe the problems or issues found in the documentation**
16 | A clear and concise description of what you found to be incorrect.
17 |
18 | **Steps taken to verify documentation is incorrect**
19 | List any steps you have taken:
20 |
21 | **Suggested fix for documentation**
22 | Detail proposed changes to fix the documentation if you have any.
23 |
24 | ---
25 |
26 | ## Report needed documentation
27 |
28 | **Report needed documentation**
29 | A clear and concise description of what documentation you believe it is needed and why.
30 |
31 | **Describe the documentation you'd like**
32 | A clear and concise description of what you want to happen.
33 |
34 | **Steps taken to search for needed documentation**
35 | List any steps you have taken:
36 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for the RAPIDS Accelerator JNI for Apache Spark
4 | title: "[FEA]"
5 | labels: "? - Needs Triage, feature request"
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I wish the RAPIDS Accelerator JNI for Apache Spark would [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context, code examples, or references to existing implementations about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/submit-question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Submit question
3 | about: Ask a general question about RAPIDS Accelerator JNI for Apache Spark
4 | title: "[QST]"
5 | labels: "? - Needs Triage, question"
6 | assignees: ''
7 |
8 | ---
9 |
10 | **What is your question?**
11 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
36 |
--------------------------------------------------------------------------------
/.github/workflows/action-helper/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2023, NVIDIA CORPORATION.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | FROM python:alpine
16 |
17 | WORKDIR /
18 | COPY python /python
19 | COPY entrypoint.sh .
20 | RUN chmod -R +x /python /entrypoint.sh
21 | # pin urllib3<2.0 for https://github.com/psf/requests/issues/6432
22 | RUN pip install requests "urllib3<2.0"
23 |
24 | ENTRYPOINT ["/entrypoint.sh"]
25 |
--------------------------------------------------------------------------------
/.github/workflows/action-helper/action.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, NVIDIA CORPORATION.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # TODO: move this to an independent repo as a public Github Action
16 | name: 'action helper'
17 | description: 'helper for github-related operations'
18 | inputs:
19 | operator:
20 | required: true
21 | description: 'specify operator, e.g. auto-merge'
22 | runs:
23 | using: 'docker'
24 | image: 'Dockerfile'
25 | args:
26 | - ${{ inputs.operator }}
27 |
--------------------------------------------------------------------------------
/.github/workflows/action-helper/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh -l
2 | #
3 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | set -e
19 |
20 | if [[ $# -ne 1 ]]; then
21 | echo "ERROR: invalid number of parameters, should be exact one"
22 | exit 1
23 | fi
24 |
25 | case $1 in
26 |
27 | auto-merge)
28 | /python/auto-merge --delete_head=True
29 | ;;
30 |
31 | *)
32 | echo "ERROR: unknown parameter: $1"
33 | ;;
34 | esac
35 |
--------------------------------------------------------------------------------
/.github/workflows/action-helper/python/auto-merge:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2022, NVIDIA CORPORATION.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import sys
18 | from argparse import ArgumentParser
19 |
20 | from utils import EnvDefault, PullRequest, strtobool
21 |
22 |
23 | def main():
24 | parser = ArgumentParser(description="Automerge")
25 | parser.add_argument("--owner", action=EnvDefault, env="OWNER",
26 | help="github token, will try use env OWNER if empty")
27 | parser.add_argument("--repo", action=EnvDefault, env="REPO",
28 | help="repo name, will try use env REPO if empty")
29 | parser.add_argument("--head", action=EnvDefault, env="HEAD",
30 | help="HEAD ref, will try use env HEAD if empty")
31 | parser.add_argument("--base", action=EnvDefault, env="BASE",
32 | help="Base ref, will try use env BASE if empty")
33 | parser.add_argument("--token", action=EnvDefault, env="TOKEN",
34 | help="github token, will try use env TOKEN if empty")
35 | parser.add_argument("--delete_head", default=False, type=lambda x: bool(strtobool(x)),
36 | help="if delete HEAD branch after auto-merge")
37 | args = parser.parse_args()
38 |
39 | pr = PullRequest(head_owner=args.owner, head=args.head, head_token=args.token,
40 | base_owner=args.owner, repo=args.repo, base=args.base, base_token=args.token)
41 | try:
42 | if exist := pr.get_open():
43 | number = exist[0].get('number')
44 | sha = exist[0].get('head').get('sha')
45 | else:
46 | params = {
47 | # head share the same owner/repo with base in auto-merge
48 | 'title': f"[auto-merge] {pr.head} to {pr.base} [skip ci] [bot]",
49 | 'head': f"{pr.head_owner}:{pr.head}",
50 | 'base': pr.base,
51 | 'body': f"auto-merge triggered by github actions on `{pr.head}` to "
52 | f"create a PR keeping `{pr.base}` up-to-date. "
53 | "If this PR is unable to be merged due to conflicts, "
54 | "it will remain open until manually fix.",
55 | 'maintainer_can_modify': True
56 | }
57 | number, sha, term = pr.create(params)
58 | if term:
59 | sys.exit(0)
60 | pr.auto_merge(number, sha)
61 | if args.delete_head:
62 | pr.delete_head()
63 | except Exception as e:
64 | print(e)
65 | sys.exit(1)
66 |
67 |
68 | if __name__ == '__main__':
69 | main()
70 |
--------------------------------------------------------------------------------
/.github/workflows/action-helper/python/cleanup-bot-branch:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2022, NVIDIA CORPORATION.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import sys
18 | from argparse import ArgumentParser
19 |
20 | from utils import EnvDefault, PullRequest
21 |
22 |
23 | def main():
24 | parser = ArgumentParser(description="Cleanup bot branch")
25 | parser.add_argument("--owner", action=EnvDefault, env="OWNER",
26 | help="github token, will try use env OWNER if empty")
27 | parser.add_argument("--repo", action=EnvDefault, env="REPO",
28 | help="repo name, will try use env REPO if empty")
29 | parser.add_argument("--head", action=EnvDefault, env="HEAD",
30 | help="HEAD ref, will try use env HEAD if empty")
31 | parser.add_argument("--base", action=EnvDefault, env="BASE",
32 | help="Base ref, will try use env BASE if empty")
33 | parser.add_argument("--token", action=EnvDefault, env="TOKEN",
34 | help="github token, will try use env TOKEN if empty")
35 | args = parser.parse_args()
36 |
37 | try:
38 | if not args.head.startswith('bot-'):
39 | raise Exception(f"Cannot delete {args.head}, the script is only allowed to delete branch w/ bot-* prefix")
40 |
41 | pr = PullRequest(head_owner=args.owner, head=args.head, head_token=args.token,
42 | base_owner=args.owner, repo=args.repo, base=args.base, base_token=args.token)
43 | if exist := pr.get_open():
44 | number = exist[0].get('number')
45 | raise Exception(f"Cannot delete {pr.head}, pull request #{number} is still open")
46 | else:
47 | pr.delete_head()
48 | except Exception as e:
49 | print(e)
50 | sys.exit(1)
51 |
52 |
53 | if __name__ == '__main__':
54 | main()
55 |
--------------------------------------------------------------------------------
/.github/workflows/add-to-project.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2025, NVIDIA CORPORATION.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | name: Add new issues and pull requests to project
16 |
17 | on:
18 | issues:
19 | types:
20 | - opened
21 | pull_request_target:
22 | types:
23 | - opened
24 |
25 | jobs:
26 | Add-to-project:
27 | if: github.repository_owner == 'NVIDIA' # avoid adding issues from forks
28 | runs-on: ubuntu-latest
29 | steps:
30 | - name: add-to-project
31 | uses: NVIDIA/spark-rapids-common/add-to-project@main
32 | with:
33 | token: ${{ secrets.PROJECT_TOKEN }}
34 |
--------------------------------------------------------------------------------
/.github/workflows/clang-format.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023-2024, NVIDIA CORPORATION.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # A workflow to check clang format
16 | name: clang format check
17 |
18 | on:
19 | pull_request:
20 | types: [opened, synchronize, reopened]
21 |
22 | jobs:
23 | pre-commit:
24 | runs-on: ubuntu-latest
25 | steps:
26 | - uses: actions/checkout@v4
27 | with:
28 | submodules: true
29 | - uses: actions/setup-python@v5
30 | with:
31 | python-version: "3.10"
32 | - uses: pre-commit/action@v3.0.0
33 | with:
34 | extra_args: clang-format --all-files
--------------------------------------------------------------------------------
/.github/workflows/license-header-check.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, NVIDIA CORPORATION.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # A workflow to check copyright/license header
16 | name: license header check
17 |
18 | on:
19 | pull_request:
20 | types: [opened, synchronize, reopened]
21 |
22 | jobs:
23 | license-header-check:
24 | runs-on: ubuntu-latest
25 | if: "!contains(github.event.pull_request.title, '[bot]')"
26 | steps:
27 | - name: Get checkout depth
28 | run: |
29 | echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV
30 |
31 | - name: Checkout code
32 | uses: actions/checkout@v4
33 | with:
34 | fetch-depth: ${{ env.PR_FETCH_DEPTH }}
35 |
36 | - name: license-header-check
37 | uses: NVIDIA/spark-rapids-common/license-header-check@main
38 | with:
39 | included_file_patterns: |
40 | *.cpp,
41 | *.hpp,
42 | *.cu,
43 | *.cuh,
44 | *.java,
45 | *.sh,
46 | *Dockerfile*,
47 | *Jenkinsfile*,
48 | *.yml,
49 | *.yaml,
50 | *.txt,
51 | *.xml,
52 | *.fbs,
53 | build/*
54 | excluded_file_patterns: |
55 | thirdparty/*
56 |
--------------------------------------------------------------------------------
/.github/workflows/signoff-check.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022-2024, NVIDIA CORPORATION.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # A workflow to check if PR got sign-off
16 | name: signoff check
17 |
18 | on:
19 | pull_request_target:
20 | types: [opened, synchronize, reopened]
21 |
22 | jobs:
23 | signoff-check:
24 | runs-on: ubuntu-latest
25 | steps:
26 | - name: signoff
27 | uses: NVIDIA/spark-rapids-common/signoff-check@main
28 | with:
29 | owner: ${{ github.repository_owner }}
30 | repo: spark-rapids-jni
31 | pull_number: ${{ github.event.number }}
32 | token: ${{ secrets.GITHUB_TOKEN }}
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Common
2 | __pycache__
3 | *.py[cod]
4 | *$py.class
5 | *.a
6 | *.o
7 | *.so
8 | *.dylib
9 | .cache
10 | .vscode
11 | *.swp
12 | *.pytest_cache
13 | DartConfiguration.tcl
14 | .DS_Store
15 | *.manifest
16 | *.spec
17 | .nfs*
18 | .clangd
19 | *#*#
20 | *.#*
21 | *.pyc
22 | *.pyo
23 | *.swp
24 | *~
25 |
26 | ## Patching
27 | *.diff
28 | *.orig
29 | *.rej
30 |
31 | ## Eclipse IDE
32 | .project
33 | .cproject
34 | .settings
35 |
36 | ## IntelliJ IDE
37 | .idea/
38 | .idea_modules/
39 | *.iml
40 | *.ipr
41 | *.iws
42 |
43 | #Java
44 | dependency-reduced-pom.xml
45 | hs_err*.log
46 | target/
47 |
48 | ## VSCode IDE
49 | .vscode
50 |
51 | #Generated files
52 | cufile.log
53 | rmm_log.txt
54 | sanitizer_for_pid_*.log
55 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "thirdparty/cudf"]
2 | path = thirdparty/cudf
3 | url = https://github.com/rapidsai/cudf.git
4 | branch = branch-25.06
5 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, NVIDIA CORPORATION.
2 |
3 | repos:
4 | - repo: https://github.com/pre-commit/mirrors-clang-format
5 | rev: v16.0.1
6 | hooks:
7 | - id: clang-format
8 | files: \.(cu|cuh|h|hpp|cpp|inl)$
9 | types_or: [file]
10 | args: ['-fallback-style=none', '-style=file:thirdparty/cudf/.clang-format']
11 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | RAPIDS Accelerator JNI For Apache Spark
2 | Copyright (c) 2022-2024, NVIDIA CORPORATION
3 |
4 | --------------------------------------------------------------------------------
5 |
6 | This project includes code from ryu (https://github.com/ulfjack/ryu).
7 |
8 | Copyright (2018) Ulf Adams and contributors.
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | http://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
21 |
22 | --------------------------------------------------------------------------------
23 |
24 | This project includes code from flatbuffers (https://github.com/google/flatbuffers).
25 |
26 | Copyright 2021 Google Inc. All rights reserved.
27 |
28 | Licensed under the Apache License, Version 2.0 (the "License");
29 | you may not use this file except in compliance with the License.
30 | You may obtain a copy of the License at
31 |
32 | http://www.apache.org/licenses/LICENSE-2.0
33 |
34 | Unless required by applicable law or agreed to in writing, software
35 | distributed under the License is distributed on an "AS IS" BASIS,
36 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 | See the License for the specific language governing permissions and
38 | limitations under the License.
39 |
40 | --------------------------------------------------------------------------------
41 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RAPIDS Accelerator JNI For Apache Spark
2 |
3 | This repository contains native support code for the
4 | [RAPIDS Accelerator for Apache Spark](https://github.com/NVIDIA/spark-rapids).
5 |
6 | [](https://deepwiki.com/NVIDIA/spark-rapids-jni)
7 |
8 | ## Building From Source
9 |
10 | See the [build instructions in the contributing guide](CONTRIBUTING.md#building-from-source).
11 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | ## Security
2 |
3 | NVIDIA is dedicated to the security and trust of our software products and services, including all
4 | source code repositories managed through our organization.
5 |
6 | If you need to report a security issue, please use the appropriate contact points outlined
7 | below. **Please do not report security vulnerabilities through GitHub/GitLab.**
8 |
9 | ## Reporting Potential Security Vulnerability in an NVIDIA Product
10 |
11 | To report a potential security vulnerability in any NVIDIA product:
12 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
13 | - E-Mail: psirt@nvidia.com
14 | - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
15 | - Please include the following information:
16 | - Product/Driver name and version/branch that contains the vulnerability
17 |
--------------------------------------------------------------------------------
/build/apply-patches:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | set -e
20 |
21 | BASE_DIR=$( git rev-parse --show-toplevel )
22 |
23 | PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}
24 |
25 | CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}
26 |
27 | # Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
28 | # making sure that a user can do development work in spark-rapids-jni without the patches
29 | # getting in the way
30 | # The operations I really want to support no matter what state CUDF is in are
31 | # 1) Build the repo from scratch
32 | # 2) Rebuild the repo without having to clean and start over
33 | # 3) upmerge to a new version of the plugin including updating the cudf submodule
34 | #
35 | # Building from scratch is simple. We want clean to unapply any patches and
36 | # build to apply them. But if we want to rebuild without a clean we need to know what
37 | # state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
38 | # is to save some state files about what happened. But a user could mess with CUDF directly
39 | # so we want to have ways to double check that they are indeed correct.
40 |
41 | FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"
42 |
43 | pushd "$CUDF_DIR"
44 |
45 | PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)
46 |
47 | if [ -z "$PATCH_FILES" ] ; then
48 | echo "No patches to apply"
49 | exit 0
50 | fi
51 |
52 | CHANGED_FILES=$(git status --porcelain --untracked-files=no)
53 |
54 | if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
55 | if git apply -R --check "$FULLY_PATCHED_FILE" ; then
56 | echo "Patches appear to have been applied already"
57 | exit 0
58 | fi
59 | fi
60 |
61 | if [ -n "$CHANGED_FILES" ] ; then
62 | echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches"
63 | echo "$CHANGED_FILE"
64 | exit 1
65 | fi
66 |
67 | find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do
68 | echo "patching with: $file"
69 | git apply -v "$file"
70 | done
71 |
72 | git diff > "$FULLY_PATCHED_FILE"
73 |
74 | popd
75 |
--------------------------------------------------------------------------------
/build/build-in-docker:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | # Build the spark-rapids-jni artifact in a Docker container with devtoolset
20 |
21 | set -e
22 |
23 | # Base paths relative to this script's location
24 | SCRIPTDIR=$(cd $(dirname $0); pwd)
25 |
26 | LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
27 | USE_GDS=${USE_GDS:-ON}
28 | export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"}
29 | # Make CUDA_VERSION consistent with the file run-in-docker
30 | export CUDA_VERSION=${CUDA_VERSION:-11.8.0}
31 | CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*}
32 | BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON}
33 |
34 | if (( $# == 0 )); then
35 | echo "Usage: $0 "
36 | exit 1
37 | fi
38 |
39 | # Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...]
40 | if [ "$(uname -m)" == "aarch64" ]; then
41 | USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit.
42 | BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit.
43 | fi
44 |
45 | $SCRIPTDIR/run-in-docker mvn \
46 | -Dmaven.repo.local=$LOCAL_MAVEN_REPO \
47 | -DUSE_GDS=$USE_GDS \
48 | -DBUILD_FAULTINJ=${BUILD_FAULTINJ} \
49 | -Dcuda.version=$CUDA_CLASSIFIER \
50 | "$@"
51 |
--------------------------------------------------------------------------------
/build/build-info:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | #
4 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | # This script generates the build info.
20 | # Arguments:
21 | # version - The current version of the project
22 | # git_path - The path to the repository
23 | # libcudf_path - The path to the libcudf library
24 | set -e
25 | set -o pipefail
26 |
27 | echo_build_properties() {
28 | version=$1
29 | git_path=$2
30 | libcudf_path=$3
31 | shift 3
32 | echo version=$version
33 | echo user=$(whoami)
34 | echo revision=$(cd "$git_path" && git rev-parse HEAD)
35 | echo branch=$(cd "$git_path" && git rev-parse --abbrev-ref HEAD)
36 | echo date=$(date -u +%Y-%m-%dT%H:%M:%SZ)
37 | echo url=$(cd "$git_path" && git config --get remote.origin.url)
38 | gpu_architectures=$(
39 | cuobjdump "$libcudf_path" | gawk '
40 | match($0, /arch = sm_([0-9]+)/, tmp) {
41 | arch[tmp[1]] = 1
42 | }
43 |
44 | END {
45 | n = asorti(arch)
46 | if (n == 0) {
47 | print "ERROR: No fatbin ELF / PTX code sections found" > "/dev/stderr"
48 | exit(1)
49 | }
50 | s = arch[1]
51 | for (i = 2; i <= n; i++) {
52 | s = s ";" arch[i]
53 | }
54 | print s
55 | }
56 | '
57 | )
58 | echo "gpu_architectures=$gpu_architectures"
59 | for arg in "$@"; do
60 | echo $arg
61 | done
62 | }
63 |
64 | echo_build_properties "$@"
65 |
--------------------------------------------------------------------------------
/build/sanitizer-java/bin/java:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | # This special Java executable is specified to the "jvm" configuration of the
19 | # the surefire plugin to intercept forking the processes for tests. Then
20 | # the tests will run with the compute-sanitizer tool.
21 | exec compute-sanitizer --tool memcheck \
22 | --launch-timeout 600 \
23 | --error-exitcode -2 \
24 | --log-file "./sanitizer_for_pid_%p.log" \
25 | java "$@"
26 |
--------------------------------------------------------------------------------
/build/submodule-check:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | set -e
20 |
21 | outdated_modules=$(git submodule status | grep -v "^ " || true)
22 | if [[ -n $outdated_modules ]]; then
23 | echo >&2 "ERROR: submodules out of date: $outdated_modules. To fix: git submodule update --init --recursive"
24 | exit 1
25 | fi
26 |
--------------------------------------------------------------------------------
/build/unapply-patches:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | set -e
20 |
21 | BASE_DIR=$( git rev-parse --show-toplevel )
22 |
23 | PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}
24 |
25 | CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}
26 |
27 | # Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
28 | # making sure that a user can do development work in spark-rapids-jni without the patches
29 | # getting in the way
30 | # The operations I really want to support no matter what state CUDF is in are
31 | # 1) Build the repo from scratch
32 | # 2) Rebuild the repo without having to clean and start over
33 | # 3) upmerge to a new version of the plugin including updating the cudf submodule
34 | #
35 | # Building from scratch is simple. We want clean to unapply any patches and
36 | # build to apply them. But if we want to rebuild without a clean we need to know what
37 | # state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
38 | # is to save some state files about what happened. But a user could mess with CUDF directly
39 | # so we want to have ways to double check that they are indeed correct.
40 |
41 | FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"
42 |
43 | pushd "$CUDF_DIR"
44 |
45 | PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)
46 |
47 | if [ -z "$PATCH_FILES" ] ; then
48 | echo "No patches to remove"
49 | exit 0
50 | fi
51 |
52 | CHANGED_FILES=$(git status --porcelain --untracked-files=no)
53 |
54 | if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
55 | if git apply --check -R "$FULLY_PATCHED_FILE"; then
56 | echo "Patches appear to have been applied, so going to remove them"
57 | git apply -R -v "$FULLY_PATCHED_FILE"
58 | rm -f "$FULLY_PATCHED_FILE"
59 |
60 | # Check for modifications, again
61 | if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
62 | echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
63 | git status --porcelain --untracked-files=no
64 | exit 1
65 | fi
66 |
67 | exit 0
68 | else
69 | echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE"
70 | exit 1
71 | fi
72 | fi
73 |
74 | if [ -n "$CHANGED_FILES" ] ; then
75 | echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again."
76 | echo "$CHANGED_FILE"
77 | exit 1
78 | else
79 | echo "No changes in CUDF repository to remove"
80 | fi
81 |
82 | popd
83 |
--------------------------------------------------------------------------------
/build/win/create-wsl2.ps1:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | wsl -d Ubuntu build/win/setup-wsl2.sh
18 |
--------------------------------------------------------------------------------
/build/win/setup-wsl2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | # Last tested:
20 | # Edition Windows 10 Enterprise
21 | # Version 21H2
22 | # OS build 19044.1645
23 | # Experience Windows Feature Experience Pack 120.2212.4170.0
24 | # NVIDIA Display Driver 473.47
25 |
26 | # add WSL2 user to passwordless sudoers if desired
27 | # sudo visudo /etc/sudoers.d/wsl2-sudo
28 |
29 | # Docker
30 | sudo apt-get -y install apt-transport-https ca-certificates curl software-properties-common
31 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
32 | echo \
33 | "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
34 | $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
35 | sudo apt-get update
36 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-compose-plugin
37 | sudo usermod -a -G docker $USER
38 | sudo service docker start
39 | docker run hello-world
40 |
41 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
42 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
43 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
44 | sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
45 | sudo service docker restart
46 |
47 |
48 | # CUDA
49 | # Initial instructions
50 | # https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_network
51 | distroArch="wsl-ubuntu/x86_64"
52 | wget https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/cuda-wsl-ubuntu.pin
53 | sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
54 | # Instructions for fetching keys modified per
55 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772#install-new-cuda-keyring-package-3
56 | sudo apt-key del 7fa2af80
57 | wget https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/cuda-keyring_1.0-1_all.deb
58 | sudo dpkg -i cuda-keyring_1.0-1_all.deb
59 | sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/ /"
60 | sudo apt-get update
61 | sudo apt-get -y install cuda-toolkit-11-7
--------------------------------------------------------------------------------
/ci/check-cuda-dependencies.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | # common script to help check if packaged *.so files have dynamical link to CUDA Runtime
19 |
20 | set -exo pipefail
21 |
22 | jar_path=$1
23 | tmp_path=/tmp/"jni-$(date "+%Y%m%d%H%M%S")"
24 | unzip -j "${jar_path}" "*64/Linux/*.so" -d "${tmp_path}"
25 |
26 | find "$tmp_path" -type f -name "*.so" | while read -r so_file; do
27 | # Check if *.so file has a dynamic link to CUDA Runtime
28 | if objdump -p "$so_file" | grep NEEDED | grep -qi cudart; then
29 | echo "Dynamic link to CUDA Runtime found in $so_file..."
30 | ldd "$so_file"
31 | exit 1
32 | else
33 | echo "No dynamic link to CUDA Runtime found in $so_file"
34 | fi
35 | done
36 |
--------------------------------------------------------------------------------
/ci/fuzz-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | # NOTE:
19 | # run fuzz test after build
20 | # required jars: jni, jni-tests, slf4j-api
21 |
22 | set -ex
23 |
24 | WORKSPACE=${WORKSPACE:-$PWD}
25 | M2DIR=${M2DIR:-"$HOME/.m2"}
26 |
27 | SLF4J_VER=$(mvn help:evaluate -Dexpression=slf4j.version -q -DforceStdout)
28 | CLASSPATH=${CLASSPATH:-"$WORKSPACE/target/*:$M2DIR/repository/org/slf4j/slf4j-api/$SLF4J_VER/slf4j-api-$SLF4J_VER.jar"}
29 |
30 | java -cp "$CLASSPATH" \
31 | com.nvidia.spark.rapids.jni.RmmSparkMonteCarlo \
32 | --taskMaxMiB=2048 --gpuMiB=3072 --skewed --allocMode=ASYNC
33 |
--------------------------------------------------------------------------------
/ci/nightly-build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | set -ex
19 |
20 | nvidia-smi
21 |
22 | git submodule update --init --recursive
23 |
24 | MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
25 | # cuda11 or cuda12
26 | CUDA_VER=${CUDA_VER:-cuda`nvcc --version | sed -n 's/^.*release \([0-9]\+\)\..*$/\1/p'`}
27 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
28 | USE_GDS=${USE_GDS:-ON}
29 | USE_SANITIZER=${USE_SANITIZER:-ON}
30 | BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON}
31 | ARM64=${ARM64:-false}
32 | artifact_suffix="${CUDA_VER}"
33 |
34 | profiles="source-javadoc"
35 | if [ "${ARM64}" == "true" ]; then
36 | profiles="${profiles},arm64"
37 | USE_GDS="OFF"
38 | USE_SANITIZER="ON"
39 | BUILD_FAULTINJ="OFF"
40 | artifact_suffix="${artifact_suffix}-arm64"
41 | fi
42 |
43 | ${MVN} clean package ${MVN_MIRROR} \
44 | -P${profiles} \
45 | -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
46 | -Dlibcudf.build.configure=true \
47 | -DUSE_GDS=${USE_GDS} -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
48 | -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \
49 | -DUSE_SANITIZER=${USE_SANITIZER}
50 |
51 | build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout)
52 | . ci/check-cuda-dependencies.sh "target/${build_name}-${artifact_suffix}.jar"
53 |
--------------------------------------------------------------------------------
/ci/premerge-build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | set -ex
19 |
20 | nvidia-smi
21 |
22 | git submodule update --init --recursive
23 |
24 | MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
25 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
26 | ${MVN} verify ${MVN_MIRROR} \
27 | -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
28 | -Dlibcudf.build.configure=true \
29 | -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
30 | -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON
31 |
32 | build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout)
33 | cuda_version=$(${MVN} help:evaluate -Dexpression=cuda.version -q -DforceStdout)
34 | . ci/check-cuda-dependencies.sh "target/${build_name}-${cuda_version}.jar"
35 |
--------------------------------------------------------------------------------
/docs/img/memory_state_machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/spark-rapids-jni/d3aca7a5a423fcbaf0834baf5e24f091f05eee6c/docs/img/memory_state_machine.png
--------------------------------------------------------------------------------
/patches/noop.patch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/spark-rapids-jni/d3aca7a5a423fcbaf0834baf5e24f091f05eee6c/patches/noop.patch
--------------------------------------------------------------------------------
/src/main/cpp/benchmarks/bloom_filter.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include
22 | #include
23 | #include
24 |
25 | static void bloom_filter_put(nvbench::state& state)
26 | {
27 | constexpr int num_rows = 150'000'000;
28 | constexpr int num_hashes = 3;
29 |
30 | // create the bloom filter
31 | cudf::size_type const bloom_filter_bytes = state.get_int64("bloom_filter_bytes");
32 | cudf::size_type const bloom_filter_longs = bloom_filter_bytes / sizeof(int64_t);
33 | auto bloom_filter = spark_rapids_jni::bloom_filter_create(num_hashes, bloom_filter_longs);
34 |
35 | // create a column of hashed values
36 | data_profile_builder builder;
37 | builder.no_validity();
38 | auto const src = create_random_table({{cudf::type_id::INT64}}, row_count{num_rows}, builder);
39 | auto const input = spark_rapids_jni::xxhash64(*src);
40 |
41 | auto const stream = cudf::get_default_stream();
42 | state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
43 | state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
44 | [&](nvbench::launch& launch, auto& timer) {
45 | timer.start();
46 | spark_rapids_jni::bloom_filter_put(*bloom_filter, *input);
47 | stream.synchronize();
48 | timer.stop();
49 | });
50 |
51 | size_t const bytes_read = num_rows * sizeof(int64_t);
52 | size_t const bytes_written = num_rows * sizeof(cudf::bitmask_type) * num_hashes;
53 | auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
54 | state.add_element_count(std::size_t{num_rows}, "Rows Inserted");
55 | state.add_global_memory_reads(bytes_read, "Bytes read");
56 | state.add_global_memory_writes(bytes_written, "Bytes written");
57 | state.add_element_count(static_cast(bytes_written) / time, "Write bytes/sec");
58 | }
59 |
60 | NVBENCH_BENCH(bloom_filter_put)
61 | .set_name("Bloom Filter Put")
62 | .add_int64_axis("bloom_filter_bytes",
63 | {512 * 1024, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024});
64 |
--------------------------------------------------------------------------------
/src/main/cpp/benchmarks/cast_long_to_binary_string.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | static void long_to_binary_string(nvbench::state& state)
27 | {
28 | auto const num_rows = static_cast(state.get_int64("num_rows"));
29 |
30 | auto const input_table = create_random_table({cudf::type_id::INT64}, row_count{num_rows});
31 | auto const long_col = input_table->get_column(0);
32 | auto const stream = cudf::get_default_stream();
33 | state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
34 | state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
35 | spark_rapids_jni::long_to_binary_string(long_col, stream);
36 | });
37 | }
38 |
39 | NVBENCH_BENCH(long_to_binary_string)
40 | .set_name("Long to Binary String Cast")
41 | .add_int64_axis("num_rows", {100'000, 500'000, 1'000'000, 5'000'000, 10'000'000});
42 |
--------------------------------------------------------------------------------
/src/main/cpp/benchmarks/cast_string_to_float.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 |
19 | #include
20 |
21 | #include
22 | #include
23 |
24 | #include
25 | #include
26 |
27 | void string_to_float(nvbench::state& state)
28 | {
29 | cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
30 | auto const float_tbl = create_random_table({cudf::type_id::FLOAT32}, row_count{n_rows});
31 | auto const float_col = float_tbl->get_column(0);
32 | auto const string_col = cudf::strings::from_floats(float_col.view());
33 |
34 | state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
35 | auto rows = spark_rapids_jni::string_to_float(cudf::data_type{cudf::type_id::FLOAT32},
36 | string_col->view(),
37 | false,
38 | cudf::get_default_stream());
39 | });
40 | }
41 |
42 | NVBENCH_BENCH(string_to_float)
43 | .set_name("Strings to Float Cast")
44 | .add_int64_axis("num_rows", {1 * 1024 * 1024, 100 * 1024 * 1024});
45 |
--------------------------------------------------------------------------------
/src/main/cpp/cmake/get_flatbuffers.cmake:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | # in compliance with the License. You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 |
15 | # Use CPM to find or clone flatbuffers
16 | function(find_and_configure_flatbuffers VERSION)
17 |
18 | rapids_cpm_find(
19 | flatbuffers ${VERSION}
20 | GLOBAL_TARGETS flatbuffers
21 | CPM_ARGS
22 | GIT_REPOSITORY https://github.com/google/flatbuffers.git
23 | GIT_TAG v${VERSION}
24 | GIT_SHALLOW TRUE
25 | )
26 |
27 | include("${rapids-cmake-dir}/export/find_package_root.cmake")
28 | rapids_export_find_package_root(
29 | BUILD flatbuffers "${flatbuffers_BINARY_DIR}" EXPORT_SET profilerjni-exports
30 | )
31 |
32 | endfunction()
33 |
34 | find_and_configure_flatbuffers(24.3.25)
35 |
--------------------------------------------------------------------------------
/src/main/cpp/cmake/get_spdlog.cmake:
--------------------------------------------------------------------------------
1 | # =============================================================================
2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | # in compliance with the License. You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 |
15 | # Use CPM to find or clone speedlog
16 | function(find_and_configure_spdlog)
17 |
18 | set(CPM_DOWNLOAD_spdlog ON)
19 | include(${rapids-cmake-dir}/cpm/spdlog.cmake)
20 | rapids_cpm_spdlog(
21 | FMT_OPTION "EXTERNAL_FMT_HO"
22 | )
23 | set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON)
24 |
25 | endfunction()
26 |
27 | find_and_configure_spdlog()
28 |
--------------------------------------------------------------------------------
/src/main/cpp/faultinj/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | #=============================================================================
2 | # Copyright (c) 2022-2023, NVIDIA CORPORATION.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 |
17 | project(
18 | CUDA_FAULT_INJECTION
19 | VERSION 22.08.00
20 | LANGUAGES C CXX CUDA
21 | )
22 |
23 | set(CMAKE_CXX_STANDARD 17)
24 |
25 | add_library(
26 | cufaultinj SHARED
27 | faultinj.cu
28 | )
29 |
30 | target_link_libraries(
31 | cufaultinj PRIVATE spdlog::spdlog_header_only
32 | )
33 |
34 | target_link_libraries(
35 | cufaultinj PRIVATE CUDA::cupti_static
36 | )
37 |
--------------------------------------------------------------------------------
/src/main/cpp/profiler/profiler_debug.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 |
21 | #include
22 | #include
23 |
24 | namespace spark_rapids_jni::profiler {
25 |
26 | std::string activity_kind_to_string(CUpti_ActivityKind kind);
27 |
28 | void print_cupti_buffer(uint8_t* buffer, size_t valid_size);
29 |
30 | } // namespace spark_rapids_jni::profiler
31 |
--------------------------------------------------------------------------------
/src/main/cpp/profiler/profiler_schema.cpp.in:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | namespace spark_rapids_jni::profiler {
18 | char const* Profiler_Schema = R"raw(@SPARK_RAPIDS_JNI_PROFILER_SCHEMA@)raw";
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/cpp/profiler/profiler_serializer.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include "profiler_generated.h"
20 |
21 | #include
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | namespace spark_rapids_jni::profiler {
29 |
30 | // Serializes profile data as flatbuffers
31 | struct profiler_serializer {
32 | profiler_serializer(
33 | JNIEnv* env, jobject writer, size_t buffer_size, size_t flush_threshold, bool capture_allocs);
34 | void process_cupti_buffer(uint8_t* buffer, size_t valid_size);
35 | void flush();
36 |
37 | private:
38 | void write_profile_header();
39 | void process_api_activity(CUpti_ActivityAPI const*);
40 | void process_device_activity(CUpti_ActivityDevice4 const*);
41 | void process_dropped_records(size_t num_dropped);
42 | void process_marker_activity(CUpti_ActivityMarker2 const*);
43 | void process_marker_data(CUpti_ActivityMarkerData const*);
44 | void process_memcpy(CUpti_ActivityMemcpy5 const*);
45 | void process_memset(CUpti_ActivityMemset4 const*);
46 | void process_kernel(CUpti_ActivityKernel8 const*);
47 | void process_overhead(CUpti_ActivityOverhead const*);
48 | void report_num_dropped_records();
49 | void write_current_fb();
50 |
51 | JNIEnv* env_;
52 | jmethodID j_write_method_;
53 | jobject j_writer_;
54 | size_t flush_threshold_;
55 | bool capture_allocs_;
56 | flatbuffers::FlatBufferBuilder fbb_;
57 | std::vector> api_offsets_;
58 | std::vector> device_offsets_;
59 | std::vector> dropped_offsets_;
60 | std::vector> kernel_offsets_;
61 | std::vector> marker_offsets_;
62 | std::vector> marker_data_offsets_;
63 | std::vector> memcpy_offsets_;
64 | std::vector> memset_offsets_;
65 | std::vector> overhead_offsets_;
66 | };
67 |
68 | } // namespace spark_rapids_jni::profiler
69 |
--------------------------------------------------------------------------------
/src/main/cpp/src/CaseWhenJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "case_when.hpp"
18 | #include "cudf_jni_apis.hpp"
19 |
20 | extern "C" {
21 |
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CaseWhen_selectFirstTrueIndex(
23 | JNIEnv* env, jclass, jlongArray bool_cols)
24 | {
25 | JNI_NULL_CHECK(env, bool_cols, "array of column handles is null", 0);
26 | try {
27 | cudf::jni::auto_set_device(env);
28 | cudf::jni::native_jpointerArray n_cudf_bool_columns(env, bool_cols);
29 | auto bool_column_views = n_cudf_bool_columns.get_dereferenced();
30 | return cudf::jni::release_as_jlong(
31 | spark_rapids_jni::select_first_true_index(cudf::table_view(bool_column_views)));
32 | }
33 | CATCH_STD(env, 0);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/cpp/src/DateTimeUtilsJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "datetime_utils.hpp"
19 |
20 | extern "C" {
21 |
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_rebaseGregorianToJulian(
23 | JNIEnv* env, jclass, jlong input)
24 | {
25 | JNI_NULL_CHECK(env, input, "input column is null", 0);
26 |
27 | try {
28 | cudf::jni::auto_set_device(env);
29 | auto const input_cv = reinterpret_cast(input);
30 | auto output = spark_rapids_jni::rebase_gregorian_to_julian(*input_cv);
31 | return reinterpret_cast(output.release());
32 | }
33 | CATCH_STD(env, 0);
34 | }
35 |
36 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_rebaseJulianToGregorian(
37 | JNIEnv* env, jclass, jlong input)
38 | {
39 | JNI_NULL_CHECK(env, input, "input column is null", 0);
40 |
41 | try {
42 | cudf::jni::auto_set_device(env);
43 | auto const input_cv = reinterpret_cast(input);
44 | auto output = spark_rapids_jni::rebase_julian_to_gregorian(*input_cv);
45 | return reinterpret_cast(output.release());
46 | }
47 | CATCH_STD(env, 0);
48 | }
49 |
50 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_truncateWithColumnFormat(
51 | JNIEnv* env, jclass, jlong datetime, jlong format)
52 | {
53 | JNI_NULL_CHECK(env, datetime, "input datetime is null", 0);
54 | JNI_NULL_CHECK(env, format, "input format is null", 0);
55 |
56 | try {
57 | cudf::jni::auto_set_device(env);
58 |
59 | auto const datetime_cv = reinterpret_cast(datetime);
60 | auto const format_cv = reinterpret_cast(format);
61 | return reinterpret_cast(spark_rapids_jni::truncate(*datetime_cv, *format_cv).release());
62 | }
63 | CATCH_STD(env, 0);
64 | }
65 |
66 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_truncateWithScalarFormat(
67 | JNIEnv* env, jclass, jlong datetime, jstring format)
68 | {
69 | JNI_NULL_CHECK(env, datetime, "input datetime is null", 0);
70 |
71 | try {
72 | cudf::jni::auto_set_device(env);
73 |
74 | auto const datetime_cv = reinterpret_cast(datetime);
75 | auto const format_jstr = cudf::jni::native_jstring(env, format);
76 | auto const format = std::string(format_jstr.get(), format_jstr.size_bytes());
77 | return reinterpret_cast(spark_rapids_jni::truncate(*datetime_cv, format).release());
78 | }
79 | CATCH_STD(env, 0);
80 | }
81 |
82 | } // extern "C"
83 |
--------------------------------------------------------------------------------
/src/main/cpp/src/HashJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "dtype_utils.hpp"
19 | #include "hash.hpp"
20 | #include "jni_utils.hpp"
21 |
22 | extern "C" {
23 |
24 | JNIEXPORT jint JNICALL Java_com_nvidia_spark_rapids_jni_Hash_getMaxStackDepth(JNIEnv* env, jclass)
25 | {
26 | return spark_rapids_jni::MAX_STACK_DEPTH;
27 | }
28 |
29 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_murmurHash32(
30 | JNIEnv* env, jclass, jint seed, jlongArray column_handles)
31 | {
32 | JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
33 |
34 | try {
35 | cudf::jni::auto_set_device(env);
36 | auto column_views =
37 | cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced();
38 | return cudf::jni::release_as_jlong(
39 | spark_rapids_jni::murmur_hash3_32(cudf::table_view{column_views}, seed));
40 | }
41 | CATCH_STD(env, 0);
42 | }
43 |
44 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_xxhash64(JNIEnv* env,
45 | jclass,
46 | jlong seed,
47 | jlongArray column_handles)
48 | {
49 | JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
50 |
51 | try {
52 | cudf::jni::auto_set_device(env);
53 | auto column_views =
54 | cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced();
55 | return cudf::jni::release_as_jlong(
56 | spark_rapids_jni::xxhash64(cudf::table_view{column_views}, seed));
57 | }
58 | CATCH_STD(env, 0);
59 | }
60 |
61 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_hiveHash(JNIEnv* env,
62 | jclass,
63 | jlongArray column_handles)
64 | {
65 | JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
66 |
67 | try {
68 | cudf::jni::auto_set_device(env);
69 | auto column_views =
70 | cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced();
71 | return cudf::jni::release_as_jlong(spark_rapids_jni::hive_hash(cudf::table_view{column_views}));
72 | }
73 | CATCH_STD(env, 0);
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/main/cpp/src/HistogramJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "histogram.hpp"
19 |
20 | extern "C" {
21 |
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Histogram_createHistogramIfValid(
23 | JNIEnv* env, jclass, jlong values_handle, jlong frequencies_handle, jboolean output_as_lists)
24 | {
25 | JNI_NULL_CHECK(env, values_handle, "values_handle is null", 0);
26 | JNI_NULL_CHECK(env, frequencies_handle, "frequencies_handle is null", 0);
27 |
28 | try {
29 | cudf::jni::auto_set_device(env);
30 |
31 | auto const values = reinterpret_cast(values_handle);
32 | auto const frequencies = reinterpret_cast(frequencies_handle);
33 | return cudf::jni::ptr_as_jlong(
34 | spark_rapids_jni::create_histogram_if_valid(*values, *frequencies, output_as_lists)
35 | .release());
36 | }
37 | CATCH_STD(env, 0);
38 | }
39 |
40 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Histogram_percentileFromHistogram(
41 | JNIEnv* env, jclass, jlong input_handle, jdoubleArray jpercentages, jboolean output_as_lists)
42 | {
43 | JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
44 | JNI_NULL_CHECK(env, jpercentages, "jpercentages is null", 0);
45 |
46 | try {
47 | cudf::jni::auto_set_device(env);
48 |
49 | auto const input = reinterpret_cast(input_handle);
50 | auto const percentages = [&] {
51 | auto const native_percentages = cudf::jni::native_jdoubleArray(env, jpercentages);
52 | return std::vector(native_percentages.begin(), native_percentages.end());
53 | }();
54 | return cudf::jni::ptr_as_jlong(
55 | spark_rapids_jni::percentile_from_histogram(*input, percentages, output_as_lists).release());
56 | }
57 | CATCH_STD(env, 0);
58 | }
59 |
60 | } // extern "C"
61 |
--------------------------------------------------------------------------------
/src/main/cpp/src/HyperLogLogPlusPlusHostUDFJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "hyper_log_log_plus_plus.hpp"
19 | #include "hyper_log_log_plus_plus_host_udf.hpp"
20 |
21 | extern "C" {
22 |
23 | JNIEXPORT jlong JNICALL
24 | Java_com_nvidia_spark_rapids_jni_HyperLogLogPlusPlusHostUDF_createHLLPPHostUDF(JNIEnv* env,
25 | jclass,
26 | jint agg_type,
27 | int precision)
28 | {
29 | try {
30 | auto udf_ptr = [&] {
31 | // The value of agg_type must be sync with
32 | // `HyperLogLogPlusPlusHostUDF.java#AggregationType`.
33 | switch (agg_type) {
34 | case 0: return spark_rapids_jni::create_hllpp_reduction_host_udf(precision);
35 | case 1: return spark_rapids_jni::create_hllpp_reduction_merge_host_udf(precision);
36 | case 2: return spark_rapids_jni::create_hllpp_groupby_host_udf(precision);
37 | case 3: return spark_rapids_jni::create_hllpp_groupby_merge_host_udf(precision);
38 | default: CUDF_FAIL("Invalid aggregation type.");
39 | }
40 | }();
41 | CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HyperLogLogPlusPlus(HLLPP) UDF instance.");
42 |
43 | return reinterpret_cast(udf_ptr);
44 | }
45 | CATCH_STD(env, 0);
46 | }
47 |
48 | JNIEXPORT jlong JNICALL
49 | Java_com_nvidia_spark_rapids_jni_HyperLogLogPlusPlusHostUDF_estimateDistinctValueFromSketches(
50 | JNIEnv* env, jclass, jlong sketches, jint precision)
51 | {
52 | JNI_NULL_CHECK(env, sketches, "Sketch column is null", 0);
53 | try {
54 | cudf::jni::auto_set_device(env);
55 | auto const sketch_view = reinterpret_cast(sketches);
56 | return cudf::jni::ptr_as_jlong(
57 | spark_rapids_jni::estimate_from_hll_sketches(*sketch_view, precision).release());
58 | }
59 | CATCH_STD(env, 0);
60 | }
61 |
62 | } // extern "C"
63 |
--------------------------------------------------------------------------------
/src/main/cpp/src/MapJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "jni_utils.hpp"
19 | #include "map.hpp"
20 |
21 | extern "C" {
22 |
23 | JNIEXPORT jlong Java_com_nvidia_spark_rapids_jni_Map_sort(JNIEnv* env,
24 | jclass,
25 | jlong map_haldle,
26 | jboolean is_descending)
27 | {
28 | JNI_NULL_CHECK(env, map_haldle, "column is null", 0);
29 |
30 | try {
31 | cudf::jni::auto_set_device(env);
32 | auto sort_order = is_descending ? cudf::order::DESCENDING : cudf::order::ASCENDING;
33 | cudf::column_view const& map_view = *reinterpret_cast(map_haldle);
34 | return cudf::jni::release_as_jlong(spark_rapids_jni::sort_map_column(map_view, sort_order));
35 | }
36 |
37 | CATCH_STD(env, 0);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/cpp/src/RegexRewriteUtilsJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "dtype_utils.hpp"
19 | #include "jni_utils.hpp"
20 | #include "regex_rewrite_utils.hpp"
21 |
22 | extern "C" {
23 |
24 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_RegexRewriteUtils_literalRangePattern(
25 | JNIEnv* env, jclass, jlong input, jlong target, jint d, jint start, jint end)
26 | {
27 | JNI_NULL_CHECK(env, input, "input column is null", 0);
28 | JNI_NULL_CHECK(env, target, "target is null", 0);
29 |
30 | try {
31 | cudf::jni::auto_set_device(env);
32 |
33 | cudf::column_view* cv = reinterpret_cast(input);
34 | cudf::strings_column_view scv(*cv);
35 | cudf::string_scalar* ss_scalar = reinterpret_cast(target);
36 | return cudf::jni::release_as_jlong(
37 | spark_rapids_jni::literal_range_pattern(scv, *ss_scalar, d, start, end));
38 | }
39 | CATCH_STD(env, 0);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/cpp/src/SubStringIndexJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "substring_index.hpp"
19 |
20 | extern "C" {
21 |
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_GpuSubstringIndexUtils_substringIndex(
23 | JNIEnv* env, jclass, jlong strings_handle, jlong delimiter, jint count)
24 | {
25 | JNI_NULL_CHECK(env, strings_handle, "strings column handle is null", 0);
26 | JNI_NULL_CHECK(env, delimiter, "delimiter scalar handle is null", 0);
27 | try {
28 | cudf::jni::auto_set_device(env);
29 | auto const input = reinterpret_cast(strings_handle);
30 | auto const strings_column = cudf::strings_column_view{*input};
31 | cudf::string_scalar* ss_scalar = reinterpret_cast(delimiter);
32 | return cudf::jni::release_as_jlong(
33 | spark_rapids_jni::substring_index(strings_column, *ss_scalar, count));
34 | }
35 | CATCH_STD(env, 0);
36 | }
37 | } // extern "C"
38 |
--------------------------------------------------------------------------------
/src/main/cpp/src/TaskPriorityJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "jni_utils.hpp"
19 | #include "task_priority.hpp"
20 |
21 | namespace {
22 | // Track the next priority to assign and maintain a map of attempt_id to priority
23 | static long next_task_priority = std::numeric_limits::max() - 1;
24 | static std::mutex priority_mutex;
25 | static std::unordered_map attempt_priorities;
26 | } // namespace
27 |
28 | namespace spark_rapids_jni {
29 |
30 | long get_task_priority(long attempt_id)
31 | {
32 | if (attempt_id == -1) {
33 | // Special case: -1 always gets highest priority
34 | return std::numeric_limits::max();
35 | }
36 |
37 | std::lock_guard lock(priority_mutex);
38 | auto it = attempt_priorities.find(attempt_id);
39 | if (it != attempt_priorities.end()) {
40 | // Return existing priority for this attempt_id
41 | return it->second;
42 | }
43 |
44 | // Assign new priority for this attempt_id
45 | long priority = next_task_priority--;
46 | attempt_priorities[attempt_id] = priority;
47 | return priority;
48 | }
49 |
50 | void task_done(long attempt_id)
51 | {
52 | if (attempt_id == -1) {
53 | return; // Nothing to do for special case
54 | }
55 |
56 | std::lock_guard lock(priority_mutex);
57 | attempt_priorities.erase(attempt_id);
58 | }
59 |
60 | } // namespace spark_rapids_jni
61 |
62 | extern "C" {
63 |
64 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_TaskPriority_getTaskPriority(
65 | JNIEnv* env, jclass, jlong task_attempt_id)
66 | {
67 | return spark_rapids_jni::get_task_priority(task_attempt_id);
68 | }
69 |
70 | JNIEXPORT void JNICALL Java_com_nvidia_spark_rapids_jni_TaskPriority_taskDone(JNIEnv* env,
71 | jclass,
72 | jlong task_attempt_id)
73 | {
74 | spark_rapids_jni::task_done(task_attempt_id);
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/cpp/src/ZOrderJni.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "cudf_jni_apis.hpp"
18 | #include "dtype_utils.hpp"
19 | #include "zorder.hpp"
20 |
21 | extern "C" {
22 |
23 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ZOrder_interleaveBits(
24 | JNIEnv* env, jclass, jlongArray input_columns)
25 | {
26 | JNI_NULL_CHECK(env, input_columns, "input is null", 0);
27 |
28 | try {
29 | cudf::jni::auto_set_device(env);
30 | cudf::jni::native_jpointerArray n_input_columns(env, input_columns);
31 | cudf::table_view tbl(n_input_columns.get_dereferenced());
32 |
33 | return cudf::jni::ptr_as_jlong(spark_rapids_jni::interleave_bits(tbl).release());
34 | }
35 | CATCH_STD(env, 0);
36 | }
37 |
38 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ZOrder_hilbertIndex(
39 | JNIEnv* env, jclass, jint num_bits, jlongArray input_columns)
40 | {
41 | JNI_NULL_CHECK(env, input_columns, "input is null", 0);
42 |
43 | try {
44 | cudf::jni::auto_set_device(env);
45 | cudf::jni::native_jpointerArray n_input_columns(env, input_columns);
46 | cudf::table_view tbl(n_input_columns.get_dereferenced());
47 |
48 | return cudf::jni::ptr_as_jlong(spark_rapids_jni::hilbert_index(num_bits, tbl).release());
49 | }
50 | CATCH_STD(env, 0);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/cpp/src/case_when.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 |
22 | #include
23 | #include
24 |
25 | #include
26 |
27 | namespace spark_rapids_jni {
28 |
29 | /**
30 | *
31 | * Select the column index for the first true in bool columns.
32 | * For the row does not contain true, use end index(number of columns).
33 | *
34 | * e.g.:
35 | * column 0 in table: true, false, false, false
36 | * column 1 in table: false, true, false, false
37 | * column 2 in table: false, false, true, false
38 | *
39 | * 1st row is: true, flase, false; first true index is 0
40 | * 2nd row is: false, true, false; first true index is 1
41 | * 3rd row is: false, flase, true; first true index is 2
42 | * 4th row is: false, false, false; do not find true, set index to the end index 3
43 | *
44 | * output column: 0, 1, 2, 3
45 | * In the `case when` context, here 3 index means using NULL value.
46 | *
47 | */
48 | std::unique_ptr select_first_true_index(
49 | cudf::table_view const& when_bool_columns,
50 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
51 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
52 |
53 | } // namespace spark_rapids_jni
54 |
--------------------------------------------------------------------------------
/src/main/cpp/src/cast_string_to_timestamp_common.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 |
21 | namespace spark_rapids_jni {
22 |
23 | /**
24 | * Timezone type
25 | * Used in casting string with timezone to timestamp
26 | */
27 | enum class TZ_TYPE : uint8_t {
28 |
29 | // Not specified timezone in the string, indicate to use the default timezone.
30 | NOT_SPECIFIED = 0,
31 |
32 | // Fixed offset timezone
33 | // String starts with UT/GMT/UTC/[+-], and it's valid.
34 | // E.g: +08:00, +08, +1:02:30, -010203, GMT+8, UTC+8:00, UT+8
35 | // E.g: +01:2:03
36 | FIXED_TZ = 1,
37 |
38 | // Not FIXED_TZ, it's a valid timezone string.
39 | // E.g.: java.time.ZoneId.SHORT_IDS: CTT
40 | // E.g.: Region-based timezone: America/Los_Angeles
41 | OTHER_TZ = 2,
42 |
43 | // Invalid timezone.
44 | // String starts with UT/GMT/UTC/[+-], but it's invalid.
45 | // E.g: UTC+19:00, GMT+19:00, max offset is 18 hours
46 | // E.g: GMT+01:2:03, +01:2:03, special case
47 | // E.g: non-exist-timezone
48 | INVALID_TZ = 3
49 | };
50 |
51 | } // namespace spark_rapids_jni
52 |
--------------------------------------------------------------------------------
/src/main/cpp/src/datetime_utils.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 |
21 | namespace spark_rapids_jni {
22 | std::unique_ptr rebase_gregorian_to_julian(
23 | cudf::column_view const& input,
24 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
25 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
26 |
27 | std::unique_ptr rebase_julian_to_gregorian(
28 | cudf::column_view const& input,
29 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
30 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
31 |
32 | std::unique_ptr truncate(
33 | cudf::column_view const& datetime,
34 | cudf::column_view const& format,
35 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
36 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
37 |
38 | std::unique_ptr truncate(
39 | cudf::column_view const& datetime,
40 | std::string const& format,
41 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
42 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
43 |
44 | } // namespace spark_rapids_jni
45 |
--------------------------------------------------------------------------------
/src/main/cpp/src/decimal_utils.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | #include
24 |
25 | #include
26 |
27 | namespace cudf::jni {
28 |
29 | std::unique_ptr multiply_decimal128(
30 | cudf::column_view const& a,
31 | cudf::column_view const& b,
32 | int32_t product_scale,
33 | bool const cast_interim_result,
34 | rmm::cuda_stream_view stream = cudf::get_default_stream());
35 |
36 | std::unique_ptr divide_decimal128(
37 | cudf::column_view const& a,
38 | cudf::column_view const& b,
39 | int32_t quotient_scale,
40 | rmm::cuda_stream_view stream = cudf::get_default_stream());
41 |
42 | std::unique_ptr integer_divide_decimal128(
43 | cudf::column_view const& a,
44 | cudf::column_view const& b,
45 | int32_t quotient_scale,
46 | rmm::cuda_stream_view stream = cudf::get_default_stream());
47 |
48 | std::unique_ptr remainder_decimal128(
49 | cudf::column_view const& a,
50 | cudf::column_view const& b,
51 | int32_t remainder_scale,
52 | rmm::cuda_stream_view stream = cudf::get_default_stream());
53 |
54 | std::unique_ptr add_decimal128(
55 | cudf::column_view const& a,
56 | cudf::column_view const& b,
57 | int32_t quotient_scale,
58 | rmm::cuda_stream_view stream = cudf::get_default_stream());
59 |
60 | std::unique_ptr sub_decimal128(
61 | cudf::column_view const& a,
62 | cudf::column_view const& b,
63 | int32_t quotient_scale,
64 | rmm::cuda_stream_view stream = cudf::get_default_stream());
65 |
66 | /**
67 | * @brief Cast floating point values to decimals, matching the behavior of Spark.
68 | *
69 | * @param input The input column, which is either FLOAT32 or FLOAT64 type
70 | * @param output_type The output decimal type
71 | * @param precision The maximum number of digits that will be preserved in the output
72 | * @param stream CUDA stream used for device memory operations and kernel launches
73 | * @param mr Device memory resource used to allocate the returned column's device memory
74 | * @return A cudf column containing the cast result and a boolean value indicating whether the cast
75 | operation has failed for any input rows
76 | */
77 | std::pair, bool> floating_point_to_decimal(
78 | cudf::column_view const& input,
79 | cudf::data_type output_type,
80 | int32_t precision,
81 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
82 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
83 |
84 | } // namespace cudf::jni
85 |
--------------------------------------------------------------------------------
/src/main/cpp/src/emptyfile.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | // Intentionally empty
18 |
--------------------------------------------------------------------------------
/src/main/cpp/src/get_json_object.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 |
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | namespace spark_rapids_jni {
27 |
28 | /**
29 | * @brief The maximum supported depth that a JSON path can reach.
30 | */
31 | constexpr int MAX_JSON_PATH_DEPTH = 16;
32 |
33 | /**
34 | * @brief Type of instruction in a JSON path.
35 | */
36 | enum class path_instruction_type : int8_t { WILDCARD, INDEX, NAMED };
37 |
38 | /**
39 | * @brief Extract JSON object from a JSON string based on the specified JSON path.
40 | *
41 | * If the input JSON string is invalid, or it does not contain the object at the given path, a null
42 | * will be returned.
43 | */
44 | std::unique_ptr get_json_object(
45 | cudf::strings_column_view const& input,
46 | std::vector> const& instructions,
47 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
48 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
49 |
50 | /**
51 | * @brief Extract multiple JSON objects from a JSON string based on the specified JSON paths.
52 | *
53 | * This function processes all the JSON paths in parallel, which may be faster than calling
54 | * to `get_json_object` on the individual JSON paths. However, it may consume much more GPU
55 | * memory, proportional to the number of JSON paths.
56 | * @param input the input string column to parse JSON from
57 | * @param json_paths the path operations to read extract
58 | * @param memory_budget_bytes a memory budget for temporary memory usage if > 0
59 | * @param parallel_override if this value is greater than 0 then it specifies the
60 | * number of paths to process in parallel (this will cause the
61 | * `memory_budget_bytes` paramemter to be ignored)
62 | */
63 | std::vector> get_json_object_multiple_paths(
64 | cudf::strings_column_view const& input,
65 | std::vector>> const&
66 | json_paths,
67 | int64_t memory_budget_bytes,
68 | int32_t parallel_override,
69 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
70 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
71 |
72 | } // namespace spark_rapids_jni
73 |
--------------------------------------------------------------------------------
/src/main/cpp/src/hash.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 |
22 | #include
23 | #include
24 |
25 | namespace spark_rapids_jni {
26 |
27 | constexpr int64_t DEFAULT_XXHASH64_SEED = 42;
28 | constexpr int MAX_STACK_DEPTH = 8;
29 |
30 | /**
31 | * @brief Computes the murmur32 hash value of each row in the input set of columns.
32 | *
33 | * @param input The table of columns to hash
34 | * @param seed Optional seed value to use for the hash function
35 | * @param stream CUDA stream used for device memory operations and kernel launches
36 | * @param mr Device memory resource used to allocate the returned column's device memory
37 | *
38 | * @returns A column where each row is the hash of a column from the input.
39 | */
40 | std::unique_ptr murmur_hash3_32(
41 | cudf::table_view const& input,
42 | uint32_t seed = 0,
43 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
44 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
45 |
46 | /**
47 | * @brief Computes the xxhash64 hash value of each row in the input set of columns.
48 | *
49 | * @param input The table of columns to hash
50 | * @param seed Optional seed value to use for the hash function
51 | * @param stream CUDA stream used for device memory operations and kernel launches
52 | * @param mr Device memory resource used to allocate the returned column's device memory
53 | *
54 | * @returns A column where each row is the hash of a column from the input.
55 | */
56 | std::unique_ptr xxhash64(
57 | cudf::table_view const& input,
58 | int64_t seed = DEFAULT_XXHASH64_SEED,
59 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
60 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
61 |
62 | /**
63 | * @brief Computes the Hive hash value of each row in the input set of columns.
64 | *
65 | * @param input The table of columns to hash
66 | * @param stream CUDA stream used for device memory operations and kernel launches
67 | * @param mr Device memory resource used to allocate the returned column's device memory
68 | *
69 | * @returns A column where each row is the hash of a column from the input.
70 | */
71 | std::unique_ptr hive_hash(
72 | cudf::table_view const& input,
73 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
74 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
75 |
76 | } // namespace spark_rapids_jni
77 |
--------------------------------------------------------------------------------
/src/main/cpp/src/hyper_log_log_plus_plus_const.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | namespace spark_rapids_jni {
20 |
21 | /**
22 | * @brief The number of bits that is required for a HLLPP register value.
23 | *
24 | * This number is determined by the maximum number of leading binary zeros a
25 | * hashcode can produce. This is equal to the number of bits the hashcode
26 | * returns. The current implementation uses a 64-bit hashcode, this means 6-bits
27 | * are (at most) needed to store the number of leading zeros.
28 | */
29 | constexpr int REGISTER_VALUE_BITS = 6;
30 |
31 | /**
32 | * @brief The number of registers that can be stored in a single long.
33 | * It's 64 / 6 = 10.
34 | */
35 | constexpr int REGISTERS_PER_LONG = 64 / REGISTER_VALUE_BITS;
36 |
37 | } // namespace spark_rapids_jni
38 |
--------------------------------------------------------------------------------
/src/main/cpp/src/hyper_log_log_plus_plus_host_udf.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 |
21 | namespace spark_rapids_jni {
22 |
23 | cudf::host_udf_base* create_hllpp_reduction_host_udf(int precision);
24 |
25 | cudf::host_udf_base* create_hllpp_reduction_merge_host_udf(int precision);
26 |
27 | cudf::host_udf_base* create_hllpp_groupby_host_udf(int precision);
28 |
29 | cudf::host_udf_base* create_hllpp_groupby_merge_host_udf(int precision);
30 |
31 | } // namespace spark_rapids_jni
32 |
--------------------------------------------------------------------------------
/src/main/cpp/src/map.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "map.hpp"
18 |
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | namespace spark_rapids_jni {
28 |
29 | std::unique_ptr sort_map_column(cudf::column_view const& input,
30 | cudf::order sort_order,
31 | rmm::cuda_stream_view stream,
32 | rmm::device_async_resource_ref mr)
33 | {
34 | CUDF_EXPECTS(input.type().id() == cudf::type_id::LIST,
35 | "maps_column_view input must be LIST type");
36 | if (input.size() == 0) { return cudf::make_empty_column(input.type()); }
37 |
38 | auto const lists_of_structs = cudf::lists_column_view(input);
39 | auto const structs = lists_of_structs.child();
40 | CUDF_EXPECTS(structs.type().id() == cudf::type_id::STRUCT,
41 | "maps_column_view input must have exactly 1 child (STRUCT) column.");
42 | CUDF_EXPECTS(structs.num_children() == 2,
43 | "maps_column_view key-value struct must have exactly 2 children.");
44 | auto keys = structs.child(0);
45 | auto values = structs.child(1);
46 | CUDF_EXPECTS(structs.null_count() == 0, "maps_column_view key-value struct must have no null.");
47 | CUDF_EXPECTS(keys.null_count() == 0, "maps_column_view keys must have no null.");
48 | auto segments = lists_of_structs.offsets();
49 |
50 | auto sorted = cudf::segmented_sort_by_key(cudf::table_view{{structs}},
51 | cudf::table_view{{keys}},
52 | segments,
53 | {sort_order},
54 | {}, // Map keys MUST not be null
55 | stream,
56 | mr);
57 |
58 | return cudf::make_lists_column(input.size(),
59 | std::make_unique(segments), // copy segment offsets
60 | std::move(sorted->release().front()), // child column
61 | input.null_count(),
62 | cudf::copy_bitmask(input, stream, mr),
63 | stream,
64 | mr);
65 | }
66 |
67 | } // namespace spark_rapids_jni
68 |
--------------------------------------------------------------------------------
/src/main/cpp/src/map.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 | #include
19 | #include
20 |
21 | #include
22 |
23 | namespace spark_rapids_jni {
24 |
25 | /**
26 | * Sort entries for each map in map column according to the keys of each map.
27 | * Note:
28 | * The keys of map MUST not be null.
29 | * Assume that maps do not have duplicate keys.
30 | * Do not normalize/sort the nested maps in `KEY` column; This means
31 | * Only consider the first level LIST(STRUCT(KEY, VALUE)) as map type.
32 | *
33 | * @param input Input map column, should in LIST(STRUCT(KEY, VALUE)) type.
34 | * @param sort_order Ascending or descending order
35 | * @return Sorted map according to the sort order of the key column in map.
36 | * @throws cudf::logic_error If the input column is not a LIST(STRUCT(KEY, VALUE)) column or the
37 | * keys contain nulls.
38 | */
39 | std::unique_ptr sort_map_column(
40 | cudf::column_view const& input,
41 | cudf::order sort_order,
42 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
43 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
44 |
45 | } // namespace spark_rapids_jni
46 |
--------------------------------------------------------------------------------
/src/main/cpp/src/number_converter.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | namespace spark_rapids_jni {
27 |
28 | using convert_number_t = std::variant;
29 |
30 | /**
31 | *
32 | * @brief Convert numbers(in string column) between different number bases. If toBase>0 the result
33 | * is unsigned, otherwise it is signed. First trim the space characters (ASCII 32). Return null if
34 | * len(trim_ascii_32(str)) == 0. Return all nulls if `from_base` or `to_base` is not in range [2,
35 | * 36]
36 | *
37 | * e.g.:
38 | * convert('11', 2, 10) = '3'
39 | * convert('F', 16, 10) = '15'
40 | * convert('17', 10, 16) = '11'
41 | *
42 | * @param input the input string column or string scalar
43 | * @param from_base the number base of input, valid range is [2, 36]
44 | * @param to_base the number base of output, valid range is [2, 36]
45 | *
46 | * @return the string column contains numbers with `to_base` base
47 | */
48 | std::unique_ptr convert(
49 | convert_number_t const& input,
50 | convert_number_t const& from_base,
51 | convert_number_t const& to_base,
52 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
53 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
54 |
55 | /**
56 | *
57 | * @brief Check if overflow occurs for converting numbers(in string column) between different
58 | * number bases. This is for the checking when it's ANSI mode. For more details, please refer to the
59 | * convert function.
60 | *
61 | * @param input the input string column or string scalar
62 | * @param from_base the number base of input, valid range is [2, 36]
63 | * @param to_base the number base of output, valid range is [2, 36]
64 | *
65 | * @return If overflow occurs, return true; otherwise, return false.
66 | */
67 | bool is_convert_overflow(
68 | convert_number_t const& input,
69 | convert_number_t const& from_base,
70 | convert_number_t const& to_base,
71 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
72 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
73 |
74 | } // namespace spark_rapids_jni
75 |
--------------------------------------------------------------------------------
/src/main/cpp/src/regex_rewrite_utils.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | namespace spark_rapids_jni {
24 | /**
25 | * @brief Check if input string contains regex pattern `literal[start-end]{len,}`, which means
26 | * a literal string followed by a range of characters in the range of start to end, with at least
27 | * len characters.
28 | *
29 | * @param strings Column of strings to check for literal.
30 | * @param literal UTF-8 encoded string to check in strings column.
31 | * @param len Minimum number of characters to check after the literal.
32 | * @param start Minimum UTF-8 codepoint value to check for in the range.
33 | * @param end Maximum UTF-8 codepoint value to check for in the range.
34 | * @param stream CUDA stream used for device memory operations and kernel launches.
35 | * @param mr Device memory resource used to allocate the returned column's device memory.
36 | */
37 | std::unique_ptr literal_range_pattern(
38 | cudf::strings_column_view const& input,
39 | cudf::string_scalar const& literal,
40 | int const len,
41 | int const start,
42 | int const end,
43 | rmm::cuda_stream_view stream = rmm::cuda_stream_default,
44 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
45 | } // namespace spark_rapids_jni
46 |
--------------------------------------------------------------------------------
/src/main/cpp/src/row_conversion.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | #include
27 |
28 | namespace spark_rapids_jni {
29 |
30 | std::vector> convert_to_rows_fixed_width_optimized(
31 | cudf::table_view const& tbl,
32 | // TODO need something for validity
33 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
34 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
35 |
36 | std::vector> convert_to_rows(
37 | cudf::table_view const& tbl,
38 | // TODO need something for validity
39 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
40 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
41 |
42 | std::unique_ptr convert_from_rows_fixed_width_optimized(
43 | cudf::lists_column_view const& input,
44 | std::vector const& schema,
45 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
46 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
47 |
48 | std::unique_ptr convert_from_rows(
49 | cudf::lists_column_view const& input,
50 | std::vector const& schema,
51 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
52 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
53 |
54 | } // namespace spark_rapids_jni
55 |
--------------------------------------------------------------------------------
/src/main/cpp/src/shuffle_split_detail.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 |
19 | namespace spark_rapids_jni {
20 |
21 | namespace detail {
22 |
23 | /**
24 | * @brief Functor for computing size of data elements for a given cudf type.
25 | *
26 | * Note: columns types which themselves inherently have no data (strings, lists,
27 | * structs) return 0.
28 | */
29 | struct size_of_helper {
30 | template
31 | constexpr std::enable_if_t() && !std::is_same_v,
32 | size_t>
33 | operator()() const
34 | {
35 | return 0;
36 | }
37 |
38 | template
39 | constexpr std::enable_if_t() && std::is_same_v,
40 | size_t>
41 | operator()() const
42 | {
43 | return sizeof(cudf::device_storage_type_t);
44 | }
45 |
46 | template
47 | constexpr std::enable_if_t(), size_t> __device__
48 | operator()() const noexcept
49 | {
50 | return sizeof(cudf::device_storage_type_t);
51 | }
52 | };
53 |
54 | /**
55 | * @brief Header for each partition.
56 | *
57 | * The values are stored in big-endian format.
58 | */
59 | struct partition_header {
60 | uint32_t magic_number;
61 | uint32_t row_index; // row index in the source table that this partition started at
62 | uint32_t num_rows;
63 | uint32_t validity_size;
64 | uint32_t offset_size;
65 | uint32_t total_size; // size of validity+offset+data sections
66 | uint32_t num_flattened_columns;
67 | };
68 |
69 | // alignment values for each validity type, as applied at the end of that data type
70 | // in each partition. so for example all of the grouped-together validity buffers for
71 | // a given partition will have a final 4 byte alignment applied before the offset buffers begin
72 | constexpr size_t validity_pad = 4;
73 | constexpr size_t offset_pad = 4;
74 | constexpr size_t data_pad = 4;
75 |
76 | /**
77 | * @brief Compute per-partition metadata size.
78 | */
79 | constexpr size_t compute_per_partition_metadata_size(size_t total_columns)
80 | {
81 | auto const has_validity_length = (total_columns + 7) / 8; // has-validity bit per column
82 | return sizeof(partition_header) + has_validity_length;
83 | }
84 |
85 | // align all column size allocations to this boundary so that all output column buffers
86 | // start at that alignment.
87 | static constexpr std::size_t split_align = 64;
88 |
89 | /**
90 | * @brief Buffer type enum
91 | *
92 | * Note: these values matter. Don't rearrange them.
93 | */
94 | enum class buffer_type { VALIDITY = 0, OFFSETS = 1, DATA = 2 };
95 |
96 | } // namespace detail
97 |
98 | } // namespace spark_rapids_jni
99 |
--------------------------------------------------------------------------------
/src/main/cpp/src/spark_rapids_jni_version.cpp.in:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include "spark_rapids_jni_version.h"
18 |
19 | namespace spark_rapids_jni {
20 |
21 | char const Version[] = "@CMAKE_PROJECT_VERSION@ @SPARK_RAPIDS_JNI_COMMIT_DETAILS@";
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/cpp/src/spark_rapids_jni_version.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | namespace spark_rapids_jni {
20 |
21 | extern char const Version[];
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/cpp/src/substring_index.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #pragma once
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | #include
23 |
24 | namespace spark_rapids_jni {
25 |
26 | /**
27 | * @brief Returns the substring of strings before count occurrence of the delimiter delim.
28 | *
29 | * @param strings Strings column
30 | * @param delimiter The delimiter string used to slice string
31 | * @param count Specify the occurrence of the delimiter
32 | * @return A string column used to store the result
33 | */
34 | std::unique_ptr substring_index(
35 | cudf::strings_column_view const& strings,
36 | cudf::string_scalar const& delimiter,
37 | cudf::size_type count,
38 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
39 |
40 | } // namespace spark_rapids_jni
41 |
--------------------------------------------------------------------------------
/src/main/cpp/src/task_priority.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | namespace spark_rapids_jni {
20 |
21 | /**
22 | * Get the priority for a task. Larger priorities mean the task
23 | * should get priority access to resources compared to smaller
24 | * priority numbers.
25 | */
26 | long get_task_priority(long attempt_id);
27 |
28 | /**
29 | * Inform the system that a particular task is finished.
30 | */
31 | void task_done(long attempt_id);
32 |
33 | } // namespace spark_rapids_jni
34 |
--------------------------------------------------------------------------------
/src/main/cpp/src/utilities.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 | #include
19 | #include
20 | #include
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #include
28 |
29 | namespace spark_rapids_jni {
30 |
31 | std::unique_ptr bitmask_bitwise_or(
32 | std::vector> const& input,
33 | rmm::cuda_stream_view stream,
34 | rmm::device_async_resource_ref mr)
35 | {
36 | CUDF_EXPECTS(input.size() > 0, "Empty input");
37 | auto const mask_size = (*input.begin()).size();
38 | CUDF_EXPECTS(
39 | std::all_of(
40 | input.begin(), input.end(), [mask_size](auto mask) { return mask.size() == mask_size; }),
41 | "Encountered size mismatch in inputs");
42 | if (mask_size == 0) {
43 | return std::make_unique(rmm::device_buffer{0, stream, mr});
44 | }
45 |
46 | // move the pointers to the gpu
47 | std::vector h_input(input.size());
48 | std::transform(
49 | input.begin(), input.end(), h_input.begin(), [](auto mask) { return mask.data(); });
50 | auto d_input = cudf::detail::make_device_uvector_async(
51 | h_input, stream, rmm::mr::get_current_device_resource());
52 |
53 | std::unique_ptr out =
54 | std::make_unique(mask_size * sizeof(cudf::bitmask_type), stream, mr);
55 | thrust::transform(rmm::exec_policy(stream),
56 | thrust::make_counting_iterator(0),
57 | thrust::make_counting_iterator(0) + mask_size,
58 | static_cast(out->data()),
59 | cuda::proclaim_return_type(
60 | [buffers = d_input.data(),
61 | num_buffers = input.size()] __device__(cudf::size_type word_index) {
62 | cudf::bitmask_type out = buffers[0][word_index];
63 | for (auto idx = 1; idx < num_buffers; idx++) {
64 | out |= buffers[idx][word_index];
65 | }
66 | return out;
67 | }));
68 |
69 | return out;
70 | }
71 |
72 | } // namespace spark_rapids_jni
73 |
--------------------------------------------------------------------------------
/src/main/cpp/src/utilities.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 | #include
26 |
27 | namespace spark_rapids_jni {
28 |
29 | /**
30 | * @brief Bitwise-or an array of equally-sized bitmask buffers into a single output buffer
31 | *
32 | * @param input The array of input bitmask buffers.
33 | * @param stream CUDA stream used for device memory operations and kernel launches.
34 | * @param mr Device memory resource used to allocate the returned bloom filter's memory.
35 | *
36 | */
37 | std::unique_ptr bitmask_bitwise_or(
38 | std::vector> const& input,
39 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
40 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
41 |
42 | } // namespace spark_rapids_jni
43 |
--------------------------------------------------------------------------------
/src/main/cpp/src/version.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | namespace spark_rapids_jni {
20 |
21 | /**
22 | * @brief Enum class representing different Spark platform types.
23 | * The values must match the ordinal values defined in SparkPlatformType.java.
24 | * - VANILLA_SPARK: Represents the standard Apache Spark platform.
25 | * - DATABRICKS: Represents the Databricks platform.
26 | * - CLOUDERA: Represents the Cloudera platform.
27 | * - NUM_PLATFORMS: Represents the total number of platforms defined.
28 | */
29 | enum class spark_platform_type { VANILLA_SPARK = 0, DATABRICKS, CLOUDERA, NUM_PLATFORMS };
30 |
31 | class spark_system {
32 | public:
33 | /**
34 | * @brief Constructor to initialize the spark system with platform type and version.
35 | * NOTE: The `platform_ordinal` MUST keep sync with SparkPlatformType.java
36 | * @param platform_ordinal The platform ordinal value.
37 | * @param major Major version number.
38 | * @param minor Minor version number.
39 | * @param patch Patch version number.
40 | */
41 | spark_system(int platform_ordinal, int major_, int minor_, int patch_)
42 | : platform_type{static_cast(platform_ordinal)},
43 | major{major_},
44 | minor{minor_},
45 | patch{patch_}
46 | {
47 | }
48 |
49 | bool is_vanilla_spark() const { return platform_type == spark_platform_type::VANILLA_SPARK; }
50 | bool is_databricks() const { return platform_type == spark_platform_type::DATABRICKS; }
51 |
52 | bool is_version_eq(int major_, int minor_, int patch_) const
53 | {
54 | return major == major_ && minor == minor_ && patch == patch_;
55 | }
56 |
57 | bool is_version_ge(int major_, int minor_, int patch_) const
58 | {
59 | return (major > major_) || (major == major_ && minor > minor_) ||
60 | (major == major_ && minor == minor_ && patch >= patch_);
61 | }
62 |
63 | bool is_vanilla_320() const { return is_vanilla_spark() && is_version_eq(3, 2, 0); }
64 |
65 | bool is_vanilla_400_or_later() const { return is_vanilla_spark() && is_version_ge(4, 0, 0); }
66 |
67 | bool is_databricks_14_3_or_later() const { return is_databricks() && is_version_ge(14, 3, 0); }
68 |
69 | private:
70 | spark_platform_type platform_type;
71 | int major, minor, patch;
72 | };
73 |
74 | } // namespace spark_rapids_jni
75 |
--------------------------------------------------------------------------------
/src/main/cpp/src/zorder.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #pragma once
18 |
19 | #include
20 | #include
21 |
22 | #include
23 | #include
24 |
25 | #include
26 |
27 | namespace spark_rapids_jni {
28 |
29 | std::unique_ptr interleave_bits(
30 | cudf::table_view const& tbl,
31 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
32 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
33 |
34 | std::unique_ptr hilbert_index(
35 | int32_t const num_bits,
36 | cudf::table_view const& tbl,
37 | rmm::cuda_stream_view stream = cudf::get_default_stream(),
38 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
39 |
40 | } // namespace spark_rapids_jni
41 |
--------------------------------------------------------------------------------
/src/main/cpp/tests/cast_long_to_binary_string.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 | #include
19 |
20 | #include
21 |
22 | #include
23 |
24 | #include
25 |
26 | using namespace cudf;
27 |
28 | constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR};
29 |
30 | struct LongToBinaryStringTests : public cudf::test::BaseFixture {};
31 |
32 | TEST_F(LongToBinaryStringTests, FromLongToBinary)
33 | {
34 | auto const longs = cudf::test::fixed_width_column_wrapper{
35 | 0L, 1L, 10L, -1L, std::numeric_limits::max(), std::numeric_limits::min()};
36 |
37 | auto results = spark_rapids_jni::long_to_binary_string(longs, cudf::get_default_stream());
38 |
39 | auto const expected = cudf::test::strings_column_wrapper{
40 | "0",
41 | "1",
42 | "1010",
43 | "1111111111111111111111111111111111111111111111111111111111111111",
44 | "111111111111111111111111111111111111111111111111111111111111111",
45 | "1000000000000000000000000000000000000000000000000000000000000000"};
46 |
47 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/cpp/tests/list_slice.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 | #include
19 |
20 | #include
21 |
22 | #include
23 |
24 | using namespace cudf;
25 |
26 | constexpr test::debug_output_level verbosity{test::debug_output_level::FIRST_ERROR};
27 |
28 | struct ListSliceTests : public test::BaseFixture {};
29 |
30 | TEST_F(ListSliceTests, ListSliceTest)
31 | {
32 | auto const list_col = test::lists_column_wrapper{{0, 1}, {2, 3, 7, 8}, {4, 5}};
33 | {
34 | size_type start = 1;
35 | size_type length = 2;
36 |
37 | auto results = spark_rapids_jni::list_slice(list_col, start, length);
38 |
39 | auto const expected = test::lists_column_wrapper{{0, 1}, {2, 3}, {4, 5}};
40 |
41 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
42 | }
43 | {
44 | size_type start = 1;
45 | auto const length = test::fixed_width_column_wrapper{0, 1, 2};
46 |
47 | auto results = spark_rapids_jni::list_slice(list_col, start, length);
48 |
49 | auto const expected = test::lists_column_wrapper{{}, {2}, {4, 5}};
50 |
51 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
52 | }
53 | {
54 | auto const start = test::fixed_width_column_wrapper{1, 2, 2};
55 | size_type length = 2;
56 |
57 | auto results = spark_rapids_jni::list_slice(list_col, start, length);
58 |
59 | auto const expected = test::lists_column_wrapper{{0, 1}, {3, 7}, {5}};
60 |
61 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
62 | }
63 | {
64 | auto const start = test::fixed_width_column_wrapper{1, 2, 1};
65 | auto const length = test::fixed_width_column_wrapper{0, 1, 2};
66 |
67 | auto results = spark_rapids_jni::list_slice(list_col, start, length);
68 |
69 | auto const expected = test::lists_column_wrapper{{}, {3}, {4, 5}};
70 |
71 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/cpp/tests/substring_index.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 | #include
19 | #include
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | #include
31 |
32 | #include
33 | #include
34 |
35 | using namespace cudf;
36 |
37 | struct SubstringIndexTests : public test::BaseFixture {};
38 |
39 | TEST_F(SubstringIndexTests, ScalarDelimiter)
40 | {
41 | auto col0 = test::strings_column_wrapper({"www.yahoo.com",
42 | "www.apache..org",
43 | "tennis...com",
44 | "nvidia....com",
45 | "google...........com",
46 | "microsoft...c.....co..m"});
47 |
48 | auto exp_results = test::strings_column_wrapper(
49 | {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."});
50 |
51 | auto results =
52 | spark_rapids_jni::substring_index(strings_column_view{col0}, string_scalar("."), 3);
53 | CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/cpp/tests/test_utilities.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #include
18 | #include
19 | #include
20 |
21 | template
22 | std::vector random_values(std::size_t size)
23 | {
24 | std::vector values(size);
25 |
26 | using T1 = T;
27 | using uniform_distribution =
28 | typename std::conditional_t,
29 | std::bernoulli_distribution,
30 | std::conditional_t,
31 | std::uniform_real_distribution,
32 | std::uniform_int_distribution>>;
33 |
34 | static constexpr auto seed = 0xf00d;
35 | static std::mt19937 engine{seed};
36 | static uniform_distribution dist{};
37 | std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; });
38 |
39 | return values;
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CaseWhen.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 |
22 | /**
23 | * Exedute SQL `case when` semantic.
24 | * If there are multiple branches and each branch uses scalar to generator value,
25 | * then it's fast to use this class because it does not generate temp string columns.
26 | *
27 | * E.g.:
28 | * SQL is:
29 | * select
30 | * case
31 | * when bool_1_expr then "value_1"
32 | * when bool_2_expr then "value_2"
33 | * when bool_3_expr then "value_3"
34 | * else "value_else"
35 | * end
36 | * from tab
37 | *
38 | * Execution steps:
39 | * Execute bool exprs to get bool columns, e.g., gets:
40 | * bool column 1: [true, false, false, false] // bool_1_expr result
41 | * bool column 2: [false, true, false, flase] // bool_2_expr result
42 | * bool column 3: [false, false, true, flase] // bool_3_expr result
43 | * Execute `selectFirstTrueIndex` to get the column index for the first true in bool columns.
44 | * Generate a column to store salars: "value_1", "value_2", "value_3", "value_else"
45 | * Execute `Table.gather` to generate the final output column
46 | *
47 | */
48 | public class CaseWhen {
49 |
50 | /**
51 | *
52 | * Select the column index for the first true in bool columns.
53 | * For the row does not contain true, use end index(number of columns).
54 | *
55 | * e.g.:
56 | * column 0: true, false, false, false
57 | * column 1: false, true, false, false
58 | * column 2: false, false, true, false
59 | *
60 | * 1st row is: true, flase, false; first true index is 0
61 | * 2nd row is: false, true, false; first true index is 1
62 | * 3rd row is: false, flase, true; first true index is 2
63 | * 4th row is: false, false, false; do not find true, set index to the end index 3
64 | *
65 | * output column: 0, 1, 2, 3
66 | * In the `case when` context, here 3 index means using NULL value.
67 | *
68 | */
69 | public static ColumnVector selectFirstTrueIndex(ColumnVector[] boolColumns) {
70 | for (ColumnVector cv : boolColumns) {
71 | assert(cv.getType().equals(DType.BOOL8)) : "Columns must be bools";
72 | }
73 |
74 | long[] boolHandles = new long[boolColumns.length];
75 | for (int i = 0; i < boolColumns.length; ++i) {
76 | boolHandles[i] = boolColumns[i].getNativeView();
77 | }
78 |
79 | return new ColumnVector(selectFirstTrueIndex(boolHandles));
80 | }
81 |
82 | private static native long selectFirstTrueIndex(long[] boolHandles);
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CastException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.nvidia.spark.rapids.jni;
17 |
18 | /**
19 | * CastException is an exception thrown by the JNI in the even of a casting error.
20 | */
21 | public class CastException extends RuntimeException {
22 | private final int rowWithError;
23 | private final String stringWithError;
24 |
25 | CastException(String stringWithError, int rowWithError) {
26 | super("Error casting data on row " + String.valueOf(rowWithError) + ": " + stringWithError);
27 |
28 | this.rowWithError = rowWithError;
29 | this.stringWithError = stringWithError;
30 | }
31 |
32 | public int getRowWithError() {
33 | return rowWithError;
34 | }
35 |
36 | public String getStringWithError() {
37 | return stringWithError;
38 | }
39 | }
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CpuRetryOOM.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A special version of an out of memory error that indicates we ran out of off heap memory, but
21 | * should roll back to a point when all memory for the task is spillable and then retry the
22 | * operation.
23 | */
24 | public class CpuRetryOOM extends OffHeapOOM {
25 | public CpuRetryOOM() {
26 | super();
27 | }
28 |
29 | public CpuRetryOOM(String message) {
30 | super(message);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CpuSplitAndRetryOOM.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A special version of an out of memory error that indicates we ran out of off heap memory, but
21 | * should roll back to a point when all memory for the task is spillable and then retry the
22 | * operation with the input data split to make it ideally use less off heap memory overall.
23 | */
24 | public class CpuSplitAndRetryOOM extends OffHeapOOM {
25 | public CpuSplitAndRetryOOM() {
26 | super();
27 | }
28 |
29 | public CpuSplitAndRetryOOM(String message) {
30 | super(message);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 | /**
22 | * This will be removed after the plugin picks up DateTimeUtils class.
23 | */
24 | public class DateTimeRebase {
25 | static {
26 | NativeDepsLoader.loadNativeDeps();
27 | }
28 |
29 | public static ColumnVector rebaseGregorianToJulian(ColumnView input) {
30 | return DateTimeUtils.rebaseGregorianToJulian(input);
31 | }
32 |
33 | public static ColumnVector rebaseJulianToGregorian(ColumnView input) {
34 | return DateTimeUtils.rebaseJulianToGregorian(input);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuOOM.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A special version of an out of memory error that indicates we ran out of GPU memory. This is
21 | * mostly to avoid a fatal error that would force the worker process to restart. This should be
22 | * recoverable on the GPU.
23 | */
24 | public class GpuOOM extends RuntimeException {
25 | public GpuOOM() {
26 | super();
27 | }
28 |
29 | public GpuOOM(String message) {
30 | super(message);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuRetryOOM.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A special version of an out of memory error that indicates we ran out of GPU memory, but should
21 | * roll back to a point when all memory for the task is spillable and then retry the operation.
22 | */
23 | public class GpuRetryOOM extends GpuOOM {
24 | public GpuRetryOOM() {
25 | super();
26 | }
27 |
28 | public GpuRetryOOM(String message) {
29 | super(message);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuSplitAndRetryOOM.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A special version of an out of memory error that indicates we ran out of GPU memory, but should
21 | * roll back to a point when all memory for the task is spillable and then retry the operation
22 | * with the input data split to make it ideally use less GPU memory overall.
23 | */
24 | public class GpuSplitAndRetryOOM extends GpuOOM {
25 | public GpuSplitAndRetryOOM() {
26 | super();
27 | }
28 |
29 | public GpuSplitAndRetryOOM(String message) {
30 | super(message);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuSubstringIndexUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 | public class GpuSubstringIndexUtils {
22 | static{
23 | NativeDepsLoader.loadNativeDeps();
24 | }
25 |
26 | public static ColumnVector substringIndex(ColumnView cv, Scalar delimiter, int count){
27 | return new ColumnVector(substringIndex(cv.getNativeView(), delimiter.getScalarHandle(), count));
28 | }
29 |
30 | private static native long substringIndex(long columnView, long delimiter, int count) throws CudfException;
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Map.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.ColumnVector;
20 | import ai.rapids.cudf.ColumnView;
21 | import ai.rapids.cudf.CudfException;
22 | import ai.rapids.cudf.DType;
23 | import ai.rapids.cudf.NativeDepsLoader;
24 |
25 | /**
26 | * APIs for map column operations.
27 | */
28 | public class Map {
29 | static {
30 | NativeDepsLoader.loadNativeDeps();
31 | }
32 |
33 | /**
34 | * Sort entries for each map in map column according to the keys of each map.
35 | * Note:
36 | * The keys of map MUST not be null.
37 | * Assume that maps do not have duplicate keys.
38 | * Do not normalize/sort the nested maps in `KEY` column; This means
39 | * Only consider the first level LIST(STRUCT(KEY, VALUE)) as map type.
40 | *
41 | * @param cv Input map column, should in LIST(STRUCT(KEY, VALUE))
42 | * type.
43 | * @param isDescending True if sort in descending order, false if sort in
44 | * ascending order
45 | * @return Sorted map according to the sort order of the key column in map.
46 | * @throws CudfException If the input column is not a LIST(STRUCT(KEY, VALUE))
47 | * column or the keys contain nulls.
48 | */
49 | public static ColumnVector sort(ColumnView cv, boolean isDescending) {
50 | assert (cv.getType().equals(DType.LIST));
51 | long r = sort(cv.getNativeView(), isDescending);
52 | return new ColumnVector(r);
53 | }
54 |
55 | private static native long sort(long handle, boolean isDescending) throws CudfException;
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/OffHeapOOM.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A special version of an out of memory error that indicates we ran out of off heap CPU memory.
21 | * This is mostly to avoid a fatal error that would force the worker process to restart. This
22 | * should be recoverable.
23 | */
24 | public class OffHeapOOM extends RuntimeException {
25 | public OffHeapOOM() {
26 | super();
27 | }
28 |
29 | public OffHeapOOM(String message) {
30 | super(message);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Pair.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * A utility class for holding a pair of values.
21 | */
22 | public class Pair {
23 | private final K left;
24 | private final V right;
25 |
26 | public Pair(K left, V right) {
27 | this.left = left;
28 | this.right = right;
29 | }
30 |
31 | public K getLeft() {
32 | return left;
33 | }
34 |
35 | public V getRight() {
36 | return right;
37 | }
38 |
39 | public static Pair of(K left, V right) {
40 | return new Pair<>(left, right);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import java.util.function.Supplier;
20 |
21 | /**
22 | * This class contains utility methods for checking preconditions.
23 | */
24 | public class Preconditions {
25 | /**
26 | * Check if the condition is true, otherwise throw an IllegalStateException with the given message.
27 | */
28 | public static void ensure(boolean condition, String message) {
29 | if (!condition) {
30 | throw new IllegalStateException(message);
31 | }
32 | }
33 |
34 | /**
35 | * Check if the condition is true, otherwise throw an IllegalStateException with the given message supplier.
36 | */
37 | public static void ensure(boolean condition, Supplier messageSupplier) {
38 | if (!condition) {
39 | throw new IllegalStateException(messageSupplier.get());
40 | }
41 | }
42 |
43 | /**
44 | * Check if the value is non-negative, otherwise throw an IllegalArgumentException with the given message.
45 | * @param value the value to check
46 | * @param name the name of the value
47 | * @return the value if it is non-negative
48 | * @throws IllegalArgumentException if the value is negative
49 | */
50 | public static int ensureNonNegative(int value, String name) {
51 | if (value < 0) {
52 | throw new IllegalArgumentException(name + " must be non-negative, but was " + value);
53 | }
54 | return value;
55 | }
56 |
57 | /**
58 | * Check if the value is non-negative, otherwise throw an IllegalArgumentException with the given message.
59 | * @param value the value to check
60 | * @param name the name of the value
61 | * @return the value if it is non-negative
62 | * @throws IllegalArgumentException if the value is negative
63 | */
64 | public static long ensureNonNegative(long value, String name) {
65 | if (value < 0) {
66 | throw new IllegalArgumentException(name + " must be non-negative, but was " + value);
67 | }
68 | return value;
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/RegexRewriteUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 | public class RegexRewriteUtils {
22 | static {
23 | NativeDepsLoader.loadNativeDeps();
24 | }
25 |
26 | /**
27 | * @brief Check if input string contains regex pattern `literal[start-end]{len,}`, which means
28 | * a literal string followed by a range of characters in the range of start to end, with at least
29 | * len characters.
30 | *
31 | * @param input Column of strings to check for literal.
32 | * @param literal UTF-8 encoded string to check in strings column.
33 | * @param len Minimum number of characters to check after the literal.
34 | * @param start Minimum UTF-8 codepoint value to check for in the range.
35 | * @param end Maximum UTF-8 codepoint value to check for in the range.
36 | * @return ColumnVector of booleans where true indicates the string contains the pattern.
37 | */
38 | public static ColumnVector literalRangePattern(ColumnVector input, Scalar literal, int len, int start, int end) {
39 | assert(input.getType().equals(DType.STRING)) : "column must be a String";
40 | return new ColumnVector(literalRangePattern(input.getNativeView(), literal.getScalarHandle(), len, start, end));
41 | }
42 |
43 | private static native long literalRangePattern(long input, long literal, int len, int start, int end);
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/RmmSparkThreadState.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * The state of a given thread according to RmmSpark. This is intended really for debugging and
21 | * testing only.
22 | */
23 | public enum RmmSparkThreadState {
24 | UNKNOWN(-1), // thread is not associated with anything...
25 | THREAD_RUNNING(0), // task thread running normally
26 | THREAD_ALLOC(1), // task thread in the middle of doing an allocation
27 | THREAD_ALLOC_FREE(2), // task thread in the middle of doing an allocation and a free happened
28 | THREAD_BLOCKED(3), // task thread that is temporarily blocked
29 | THREAD_BUFN_THROW(4), // task thread that should throw an exception to roll back before blocking
30 | THREAD_BUFN_WAIT(5), // task thread that threw an exception to roll back and now should
31 | // block the next time alloc is called
32 | THREAD_BUFN(6), // task thread that is blocked until higher priority tasks start to succeed
33 | THREAD_SPLIT_THROW(7), // task thread that should throw an exception to split input and retry
34 | THREAD_REMOVE_THROW(8); // task thread that is being removed and needs to throw an exception
35 |
36 | private final int nativeId;
37 |
38 | RmmSparkThreadState(int nativeId) {
39 | this.nativeId = nativeId;
40 | }
41 |
42 | static RmmSparkThreadState fromNativeId(int nativeId) {
43 | for (RmmSparkThreadState ts : RmmSparkThreadState.values()) {
44 | if (ts.nativeId == nativeId) {
45 | return ts;
46 | }
47 | }
48 | throw new IllegalArgumentException("Could not find an ID for " + nativeId);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/SparkPlatformType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | /**
20 | * Enum representing the platform.
21 | * NOTE: MUST keep sync with version.hpp
22 | * The ordinal values are used to represent the platform in JNI calls.
23 | */
24 | public enum SparkPlatformType {
25 | // ordinal 0 is vanilla Spark, JNI and kernel use 0 representing Spark
26 | VANILLA_SPARK,
27 |
28 | // ordinal 1 is Databricks, JNI and kernel use 1 representing Databricks
29 | DATABRICKS,
30 |
31 | // ordinal 2 is Cloudera, JNI and kernel use 2 representing Cloudera
32 | CLOUDERA;
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/TaskPriority.java:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * Copyright (c) 2025, NVIDIA CORPORATION.
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | *
17 | */
18 |
19 | package com.nvidia.spark.rapids.jni;
20 |
21 | import ai.rapids.cudf.NativeDepsLoader;
22 |
23 | /**
24 | * Get the priority for any task. If the priority for one task is larger than the priority for another task,
25 | * then it means that the task first task (larger number) should get access to resources before the task with
26 | * the lower priority value.
27 | */
28 | public class TaskPriority {
29 | static {
30 | NativeDepsLoader.loadNativeDeps();
31 | }
32 |
33 | public static native long getTaskPriority(long taskAttemptId);
34 |
35 | public static native void taskDone(long taskAttemptId);
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/ThreadStateRegistry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import org.slf4j.Logger;
20 | import org.slf4j.LoggerFactory;
21 |
22 | import java.util.HashMap;
23 | import java.util.HashSet;
24 |
25 | /**
26 | * This is used to allow us to map a native thread id to a java thread so we can look at the
27 | * state from a java perspective.
28 | */
29 | class ThreadStateRegistry {
30 | private static final Logger LOG = LoggerFactory.getLogger(ThreadStateRegistry.class);
31 |
32 | private static final HashMap knownThreads = new HashMap<>();
33 |
34 | public static synchronized void addThread(long nativeId, Thread t) {
35 | knownThreads.put(nativeId, t);
36 | }
37 |
38 | // Typically called from JNI
39 | public static synchronized void removeThread(long threadId) {
40 | knownThreads.remove(threadId);
41 | }
42 |
43 | // This is likely called from JNI
44 | public static synchronized boolean isThreadBlocked(long nativeId) {
45 | Thread t = knownThreads.get(nativeId);
46 | if (t == null || !t.isAlive()) {
47 | // Dead is as good as blocked. This is mostly for tests, not so much for
48 | // production
49 | return true;
50 | }
51 | Thread.State state = t.getState();
52 | switch (state) {
53 | case BLOCKED:
54 | // fall through
55 | case WAITING:
56 | // fall through
57 | case TIMED_WAITING:
58 | return true;
59 | case TERMINATED:
60 | // Technically there is a race with `!t.isAlive` check above, and dead is as good as
61 | // blocked.
62 | return true;
63 | default:
64 | return false;
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Version.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | public class Version {
20 | private final int platformOrdinal;
21 | private final int major;
22 | private final int minor;
23 | private final int patch;
24 |
25 | public Version(SparkPlatformType platform, int major, int minor, int patch) {
26 | this.platformOrdinal = platform.ordinal();
27 | this.major = major;
28 | this.minor = minor;
29 | this.patch = patch;
30 | }
31 |
32 | /**
33 | * Note: this is used in the JNI code and kernel code, so it must match the
34 | * enum SparkPlatformType in com.nvidia.spark.rapids.jni.SparkPlatformType.
35 | */
36 | public int getPlatformOrdinal() {
37 | return platformOrdinal;
38 | }
39 |
40 | public int getMajor() {
41 | return major;
42 | }
43 |
44 | public int getMinor() {
45 | return minor;
46 | }
47 |
48 | public int getPatch() {
49 | return patch;
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/ColumnViewInfo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 | import java.util.Optional;
22 |
23 | import static com.nvidia.spark.rapids.jni.Preconditions.ensureNonNegative;
24 |
25 | class ColumnViewInfo {
26 | private final DType dtype;
27 | private final ColumnOffsetInfo offsetInfo;
28 | private final int nullCount;
29 | private final int rowCount;
30 |
31 | public ColumnViewInfo(DType dtype, ColumnOffsetInfo offsetInfo,
32 | int nullCount, int rowCount) {
33 | ensureNonNegative(nullCount, "nullCount");
34 | ensureNonNegative(rowCount, "rowCount");
35 | this.dtype = dtype;
36 | this.offsetInfo = offsetInfo;
37 | this.nullCount = nullCount;
38 | this.rowCount = rowCount;
39 | }
40 |
41 | ColumnView buildColumnView(DeviceMemoryBuffer buffer, ColumnView[] childrenView) {
42 | long baseAddress = buffer.getAddress();
43 |
44 | if (dtype.isNestedType()) {
45 | return new ColumnView(dtype, rowCount, Optional.of((long)nullCount),
46 | offsetInfo.getValidityBuffer(baseAddress),
47 | offsetInfo.getOffsetBuffer(baseAddress),
48 | childrenView);
49 | } else {
50 | return new ColumnView(dtype, rowCount, Optional.of((long)nullCount),
51 | offsetInfo.getDataBuffer(baseAddress),
52 | offsetInfo.getValidityBuffer(baseAddress),
53 | offsetInfo.getOffsetBuffer(baseAddress));
54 | }
55 | }
56 |
57 | @Override
58 | public String toString() {
59 | return "ColumnViewInfo{" +
60 | "dtype=" + dtype +
61 | ", offsetInfo=" + offsetInfo +
62 | ", nullCount=" + nullCount +
63 | ", rowCount=" + rowCount +
64 | '}';
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/DataOutputStreamWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | import ai.rapids.cudf.HostMemoryBuffer;
20 |
21 | import java.io.DataOutputStream;
22 | import java.io.IOException;
23 |
24 | /**
25 | * Visible for testing
26 | */
27 | class DataOutputStreamWriter implements DataWriter {
28 | private final byte[] arrayBuffer = new byte[1024];
29 | private final DataOutputStream dout;
30 |
31 | public DataOutputStreamWriter(DataOutputStream dout) {
32 | this.dout = dout;
33 | }
34 |
35 | @Override
36 | public void writeInt(int i) throws IOException {
37 | dout.writeInt(i);
38 | }
39 |
40 | @Override
41 | public void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException {
42 | long dataLeft = len;
43 | while (dataLeft > 0) {
44 | int amountToCopy = (int) Math.min(arrayBuffer.length, dataLeft);
45 | src.getBytes(arrayBuffer, 0, srcOffset, amountToCopy);
46 | dout.write(arrayBuffer, 0, amountToCopy);
47 | srcOffset += amountToCopy;
48 | dataLeft -= amountToCopy;
49 | }
50 | }
51 |
52 | @Override
53 | public void flush() throws IOException {
54 | dout.flush();
55 | }
56 |
57 | @Override
58 | public void write(byte[] arr, int offset, int length) throws IOException {
59 | dout.write(arr, offset, length);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/DataWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | import ai.rapids.cudf.HostMemoryBuffer;
20 |
21 | import java.io.IOException;
22 |
23 | /**
24 | * Output data writer for kudo serializer.
25 | */
26 | public interface DataWriter {
27 |
28 | /**
29 | * Write int in network byte order.
30 | */
31 | void writeInt(int i) throws IOException;
32 |
33 | /**
34 | * Reserve space in the buffer for the given size.
35 | */
36 | default void reserve(int size) throws IOException {}
37 |
38 | /**
39 | * Copy data from src starting at srcOffset and going for len bytes.
40 | *
41 | * @param src where to copy from.
42 | * @param srcOffset offset to start at.
43 | * @param len amount to copy.
44 | */
45 | void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException;
46 |
47 | void flush() throws IOException;
48 |
49 | /**
50 | * Copy part of byte array to this writer.
51 | */
52 | void write(byte[] arr, int offset, int length) throws IOException;
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/DumpOption.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | public enum DumpOption {
20 | Always,
21 | OnFailure,
22 | Never
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/MergeMetrics.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | public class MergeMetrics {
20 | // The time it took to calculate combined header in nanoseconds
21 | private final long calcHeaderTime;
22 | // The time it took to merge the buffers into the host buffer in nanoseconds
23 | private final long mergeIntoHostBufferTime;
24 | // The time it took to convert the host buffer into a contiguous table in nanoseconds
25 | private final long convertToTableTime;
26 |
27 | public MergeMetrics(long calcHeaderTime, long mergeIntoHostBufferTime,
28 | long convertToTableTime) {
29 | this.calcHeaderTime = calcHeaderTime;
30 | this.mergeIntoHostBufferTime = mergeIntoHostBufferTime;
31 | this.convertToTableTime = convertToTableTime;
32 | }
33 |
34 | public long getCalcHeaderTime() {
35 | return calcHeaderTime;
36 | }
37 |
38 | public long getMergeIntoHostBufferTime() {
39 | return mergeIntoHostBufferTime;
40 | }
41 |
42 | public long getConvertToTableTime() {
43 | return convertToTableTime;
44 | }
45 |
46 | public static Builder builder() {
47 | return new Builder();
48 | }
49 |
50 | public static Builder builder(MergeMetrics metrics) {
51 | return new Builder()
52 | .calcHeaderTime(metrics.calcHeaderTime)
53 | .mergeIntoHostBufferTime(metrics.mergeIntoHostBufferTime)
54 | .convertToTableTime(metrics.convertToTableTime);
55 | }
56 |
57 |
58 | public static class Builder {
59 | private long calcHeaderTime;
60 | private long mergeIntoHostBufferTime;
61 | private long convertToTableTime;
62 |
63 | public Builder calcHeaderTime(long calcHeaderTime) {
64 | this.calcHeaderTime = calcHeaderTime;
65 | return this;
66 | }
67 |
68 | public Builder mergeIntoHostBufferTime(long mergeIntoHostBufferTime) {
69 | this.mergeIntoHostBufferTime = mergeIntoHostBufferTime;
70 | return this;
71 | }
72 |
73 | public Builder convertToTableTime(long convertToTableTime) {
74 | this.convertToTableTime = convertToTableTime;
75 | return this;
76 | }
77 |
78 | public MergeMetrics build() {
79 | return new MergeMetrics(calcHeaderTime, mergeIntoHostBufferTime, convertToTableTime);
80 | }
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/MergeOptions.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | import java.io.OutputStream;
20 | import java.util.function.Supplier;
21 |
22 | public class MergeOptions {
23 | private final DumpOption dumpOption;
24 | private final Supplier outputStreamSupplier;
25 | private final String filePath;
26 |
27 | public MergeOptions(DumpOption dumpOption, Supplier outputStreamSupplier, String filePath) {
28 | this.dumpOption = dumpOption;
29 | this.outputStreamSupplier = outputStreamSupplier;
30 | this.filePath = filePath;
31 | }
32 |
33 | public DumpOption getDumpOption() {
34 | return dumpOption;
35 | }
36 |
37 | public Supplier getOutputStreamSupplier() {
38 | return outputStreamSupplier;
39 | }
40 |
41 | public String getFilePath() {
42 | return filePath;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/OpenByteArrayOutputStreamWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | import static java.lang.Math.toIntExact;
20 | import static java.util.Objects.requireNonNull;
21 |
22 | import ai.rapids.cudf.HostMemoryBuffer;
23 | import java.io.IOException;
24 |
25 | /**
26 | * Adapter class which helps to save memory copy when shuffle manager uses
27 | * {@link OpenByteArrayOutputStream} during serialization.
28 | */
29 | public class OpenByteArrayOutputStreamWriter implements DataWriter {
30 | private final OpenByteArrayOutputStream out;
31 |
32 | public OpenByteArrayOutputStreamWriter(OpenByteArrayOutputStream bout) {
33 | requireNonNull(bout, "Byte array output stream can't be null");
34 | this.out = bout;
35 | }
36 |
37 | @Override
38 | public void reserve(int size) throws IOException {
39 | out.reserve(size);
40 | }
41 |
42 | @Override
43 | public void writeInt(int v) throws IOException {
44 | out.reserve(4 + out.size());
45 | out.write((v >>> 24) & 0xFF);
46 | out.write((v >>> 16) & 0xFF);
47 | out.write((v >>> 8) & 0xFF);
48 | out.write((v >>> 0) & 0xFF);
49 | }
50 |
51 | @Override
52 | public void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException {
53 | out.write(src, srcOffset, toIntExact(len));
54 | }
55 |
56 | @Override
57 | public void flush() throws IOException {
58 | }
59 |
60 | @Override
61 | public void write(byte[] arr, int offset, int length) throws IOException {
62 | out.write(arr, offset, length);
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/SliceInfo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | public class SliceInfo {
20 | final int offset;
21 | final int rowCount;
22 | private final SlicedValidityBufferInfo validityBufferInfo;
23 |
24 | SliceInfo(int offset, int rowCount) {
25 | this.offset = offset;
26 | this.rowCount = rowCount;
27 | this.validityBufferInfo = SlicedValidityBufferInfo.calc(offset, rowCount);
28 | }
29 |
30 | SlicedValidityBufferInfo getValidityBufferInfo() {
31 | return validityBufferInfo;
32 | }
33 |
34 | public int getOffset() {
35 | return offset;
36 | }
37 |
38 | public int getRowCount() {
39 | return rowCount;
40 | }
41 |
42 | @Override
43 | public String toString() {
44 | return "SliceInfo{" +
45 | "offset=" + offset +
46 | ", rowCount=" + rowCount +
47 | ", validityBufferInfo=" + validityBufferInfo +
48 | '}';
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/SlicedValidityBufferInfo.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | /**
20 | * A simple utility class to hold information about serializing/deserializing sliced validity buffer.
21 | */
22 | class SlicedValidityBufferInfo {
23 | private final int bufferOffset;
24 | private final int bufferLength;
25 | /// The bit offset within the buffer where the slice starts
26 | private final int beginBit;
27 |
28 | SlicedValidityBufferInfo(int bufferOffset, int bufferLength, int beginBit) {
29 | this.bufferOffset = bufferOffset;
30 | this.bufferLength = bufferLength;
31 | this.beginBit = beginBit;
32 | }
33 |
34 | @Override
35 | public String toString() {
36 | return "SlicedValidityBufferInfo{" + "bufferOffset=" + bufferOffset + ", bufferLength=" + bufferLength +
37 | ", beginBit=" + beginBit + '}';
38 | }
39 |
40 | public int getBufferOffset() {
41 | return bufferOffset;
42 | }
43 |
44 | public int getBufferLength() {
45 | return bufferLength;
46 | }
47 |
48 | public int getBeginBit() {
49 | return beginBit;
50 | }
51 |
52 | static SlicedValidityBufferInfo calc(int rowOffset, int numRows) {
53 | if (rowOffset < 0) {
54 | throw new IllegalArgumentException("rowOffset must be >= 0, but was " + rowOffset);
55 | }
56 | if (numRows < 0) {
57 | throw new IllegalArgumentException("numRows must be >= 0, but was " + numRows);
58 | }
59 | int bufferOffset = rowOffset / 8;
60 | int beginBit = rowOffset % 8;
61 | int bufferLength = 0;
62 | if (numRows > 0) {
63 | bufferLength = (rowOffset + numRows - 1) / 8 - bufferOffset + 1;
64 | }
65 | return new SlicedValidityBufferInfo(bufferOffset, bufferLength, beginBit);
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/WriteMetrics.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni.kudo;
18 |
19 | /**
20 | * This class contains metrics for serializing table using kudo format.
21 | */
22 | public class WriteMetrics {
23 | private long copyBufferTime;
24 | private long writtenBytes;
25 |
26 |
27 | public WriteMetrics() {
28 | this.copyBufferTime = 0;
29 | this.writtenBytes = 0;
30 | }
31 |
32 | /**
33 | * Get the time spent on copying the buffer.
34 | */
35 | public long getCopyBufferTime() {
36 | return copyBufferTime;
37 | }
38 |
39 | public void addCopyBufferTime(long time) {
40 | copyBufferTime += time;
41 | }
42 |
43 | /**
44 | * Get the number of bytes written.
45 | */
46 | public long getWrittenBytes() {
47 | return writtenBytes;
48 | }
49 |
50 | public void addWrittenBytes(long bytes) {
51 | writtenBytes += bytes;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/schema/HostColumnsVisitor.java:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * Copyright (c) 2024-2025, NVIDIA CORPORATION.
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | *
17 | */
18 |
19 | package com.nvidia.spark.rapids.jni.schema;
20 |
21 | import ai.rapids.cudf.HostColumnVectorCore;
22 |
23 | import java.util.List;
24 |
25 | /**
26 | * A post order visitor for visiting a list of host columns in a schema.
27 | *
28 | *
29 | *
30 | * For example, if we have three columns A, B, and C with following types:
31 | *
32 | *
33 | * - A:
struct { int a1; long a2}
34 | * - B:
list { int b1}
35 | * - C:
string c1
36 | *
37 | *
38 | * The order of visiting will be:
39 | *
40 | * - Previsit struct column A
41 | * - Visit primitive column a1
42 | * - Visit primitive column a2
43 | * - Visit struct column A
44 | * - Previsit list column B
45 | * - Visit primitive column b1
46 | * - Visit list column B
47 | * - Visit primitive column c1
48 | *
49 | *
50 | *
51 | *
52 | */
53 | public interface HostColumnsVisitor {
54 | /**
55 | * Visit a struct column before any of its children.
56 | * @param col the struct column to visit
57 | */
58 | void preVisitStruct(HostColumnVectorCore col);
59 |
60 | // TODO no one uses this, do we even want it???
61 | /**
62 | * Visit a struct column.
63 | * @param col the struct column to visit
64 | */
65 | void visitStruct(HostColumnVectorCore col);
66 |
67 | /**
68 | * Visit a list column before actually visiting its child.
69 | * @param col the list column to visit
70 | */
71 | void preVisitList(HostColumnVectorCore col);
72 |
73 | /**
74 | * Visit a list column after visiting its child.
75 | * @param col the list column to visit
76 | */
77 | void visitList(HostColumnVectorCore col);
78 |
79 | /**
80 | * Visit a column that is a primitive type.
81 | * @param col the column to visit
82 | */
83 | void visit(HostColumnVectorCore col);
84 |
85 | /**
86 | * The processing is all done
87 | */
88 | void done();
89 | }
90 |
--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/schema/SimpleSchemaVisitor.java:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * Copyright (c) 2025, NVIDIA CORPORATION.
4 | *
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * you may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | *
17 | */
18 |
19 | package com.nvidia.spark.rapids.jni.schema;
20 |
21 | import ai.rapids.cudf.Schema;
22 |
23 | /**
24 | * A schema visitor similar to {@link SchemaVisitor} but with a simplified interface, please refer
25 | * to {@link SchemaVisitor} for more details.
26 | *
27 | * This interface removed generic types and return values to simplify the interface, which could
28 | * avoid unnecessary allocation during visiting schema.
29 | */
30 | public interface SimpleSchemaVisitor {
31 | /**
32 | * Visit the top level schema.
33 | * @param schema the top level schema to visit
34 | */
35 | void visitTopSchema(Schema schema);
36 |
37 | /**
38 | * Visit a struct schema before actually visiting its children.
39 | * @param structType the struct schema to visit
40 | */
41 | void preVisitStruct(Schema structType);
42 |
43 | /**
44 | * Visit a struct schema.
45 | * @param structType the struct schema to visit
46 | */
47 | void visitStruct(Schema structType);
48 |
49 | /**
50 | * Visit a list schema before actually visiting its child.
51 | * @param listType the list schema to visit
52 | */
53 | void preVisitList(Schema listType);
54 |
55 | /**
56 | * Visit a list schema after visiting its child.
57 | * @param listType the list schema to visit
58 | */
59 | void visitList(Schema listType);
60 |
61 | /**
62 | * Visit a primitive type.
63 | * @param primitiveType the primitive type to visit
64 | */
65 | void visit(Schema primitiveType);
66 | }
67 |
--------------------------------------------------------------------------------
/src/test/cpp/faultinj/test_faultinj.json:
--------------------------------------------------------------------------------
1 | {
2 | "logLevel": 1,
3 | "seed": 12345,
4 | "dynamic": true,
5 | "cudaRuntimeFaults": {
6 | "cudaLaunchKernel_ptsz": {
7 | "percent": 0,
8 | "injectionType": 0,
9 | "interceptionCount": 1
10 | }
11 | },
12 | "cudaDriverFaults": {
13 | "*": {
14 | "percent": 0,
15 | "injectionType": 2,
16 | "substituteReturnCode": 2,
17 | "interceptionCount": 1000
18 | },
19 | "cuLaunchKernel_ptsz": {
20 | "percent": 0,
21 | "injectionType": 2,
22 | "substituteReturnCode": 2,
23 | "interceptionCount": 1000
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/CaseWhenTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 | import org.junit.jupiter.api.Test;
22 |
23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
24 |
25 | public class CaseWhenTest {
26 |
27 | @Test
28 | void selectIndexTest() {
29 | try (
30 | ColumnVector b0 = ColumnVector.fromBooleans(
31 | true, false, false, false);
32 | ColumnVector b1 = ColumnVector.fromBooleans(
33 | true, true, false, false);
34 | ColumnVector b2 = ColumnVector.fromBooleans(
35 | false, false, true, false);
36 | ColumnVector b3 = ColumnVector.fromBooleans(
37 | true, true, true, false);
38 | ColumnVector expected = ColumnVector.fromInts(0, 1, 2, 4)) {
39 | ColumnVector[] boolColumns = new ColumnVector[] { b0, b1, b2, b3 };
40 | try (ColumnVector actual = CaseWhen.selectFirstTrueIndex(boolColumns)) {
41 | assertColumnsAreEqual(expected, actual);
42 | }
43 | }
44 | }
45 |
46 | @Test
47 | void selectIndexTestWithNull() {
48 | try (
49 | ColumnVector b0 = ColumnVector.fromBoxedBooleans(
50 | null, false, false, null, false);
51 | ColumnVector b1 = ColumnVector.fromBoxedBooleans(
52 | null, null, false, true, true);
53 | ColumnVector b2 = ColumnVector.fromBoxedBooleans(
54 | null, null, false, true, false);
55 | ColumnVector b3 = ColumnVector.fromBoxedBooleans(
56 | null, null, null, true, null);
57 | ColumnVector expected = ColumnVector.fromInts(4, 4, 4, 1, 1)) {
58 | ColumnVector[] boolColumns = new ColumnVector[] { b0, b1, b2, b3 };
59 | try (ColumnVector actual = CaseWhen.selectFirstTrueIndex(boolColumns)) {
60 | assertColumnsAreEqual(expected, actual);
61 | }
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/GpuSubstringIndexUtilsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.AssertUtils;
20 | import ai.rapids.cudf.ColumnVector;
21 | import ai.rapids.cudf.Scalar;
22 | import ai.rapids.cudf.Table;
23 | import org.junit.jupiter.api.Test;
24 |
25 | import java.util.ArrayList;
26 | import java.util.List;
27 |
28 | public class GpuSubstringIndexUtilsTest {
29 | @Test
30 | void gpuSubstringIndexTest(){
31 | Table.TestBuilder tb = new Table.TestBuilder();
32 | tb.column( "www.apache.org");
33 | tb.column("www.apache");
34 | tb.column("www");
35 | tb.column("");
36 | tb.column("org");
37 | tb.column("apache.org");
38 | tb.column("www.apache.org");
39 | tb.column("");
40 | tb.column("大千世界大");
41 | tb.column("www||apache");
42 |
43 | try(Table expected = tb.build()){
44 | Table.TestBuilder tb2 = new Table.TestBuilder();
45 | tb2.column("www.apache.org");
46 | tb2.column("www.apache.org");
47 | tb2.column("www.apache.org");
48 | tb2.column("www.apache.org");
49 | tb2.column("www.apache.org");
50 | tb2.column("www.apache.org");
51 | tb2.column("www.apache.org");
52 | tb2.column("");
53 | tb2.column("大千世界大千世界");
54 | tb2.column("www||apache||org");
55 |
56 | Scalar dotScalar = Scalar.fromString(".");
57 | Scalar cnChar = Scalar.fromString("千");
58 | Scalar verticalBar = Scalar.fromString("||");
59 | Scalar[] delimiterArray = new Scalar[]{dotScalar, dotScalar, dotScalar, dotScalar,dotScalar, dotScalar, dotScalar, dotScalar, cnChar, verticalBar};
60 | int[] countArray = new int[]{3, 2, 1, 0, -1, -2, -3, -2, 2, 2};
61 | List result = new ArrayList<>();
62 | try (Table origTable = tb2.build()){
63 | for(int i = 0; i < origTable.getNumberOfColumns(); i++){
64 | ColumnVector string_col = origTable.getColumn(i);
65 | result.add(GpuSubstringIndexUtils.substringIndex(string_col, delimiterArray[i], countArray[i]));
66 | }
67 | try (Table result_tbl = new Table(
68 | result.toArray(new ColumnVector[result.size()]))){
69 | AssertUtils.assertTablesAreEqual(expected, result_tbl);
70 | }
71 | }finally {
72 | result.forEach(ColumnVector::close);
73 | }
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/HistogramTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.AssertUtils;
20 | import ai.rapids.cudf.ColumnVector;
21 |
22 | import org.junit.jupiter.api.Test;
23 |
24 | public class HistogramTest {
25 | @Test
26 | void testZeroFrequency() {
27 | try (ColumnVector values = ColumnVector.fromInts(5, 10, 30);
28 | ColumnVector freqs = ColumnVector.fromLongs(1, 0, 1);
29 | ColumnVector histogram = Histogram.createHistogramIfValid(values, freqs, true);
30 | ColumnVector percentiles = Histogram.percentileFromHistogram(histogram, new double[]{1},
31 | false);
32 | ColumnVector expected = ColumnVector.fromBoxedDoubles(5.0, null, 30.0)) {
33 | AssertUtils.assertColumnsAreEqual(percentiles, expected);
34 | }
35 | }
36 |
37 | @Test
38 | void testAllNulls() {
39 | try (ColumnVector values = ColumnVector.fromBoxedInts(null, null, null);
40 | ColumnVector freqs = ColumnVector.fromLongs(1, 2, 3);
41 | ColumnVector histogram = Histogram.createHistogramIfValid(values, freqs, true);
42 | ColumnVector percentiles = Histogram.percentileFromHistogram(histogram, new double[]{0.5},
43 | false);
44 | ColumnVector expected = ColumnVector.fromBoxedDoubles(null, null, null)) {
45 | AssertUtils.assertColumnsAreEqual(percentiles, expected);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/LimitingOffHeapAllocForTests.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.HostMemoryBuffer;
20 |
21 | import java.util.Optional;
22 |
23 | /**
24 | * This provides a way to allocate and deallocate off heap buffers using the RmmSpark APIs for
25 | * retry on allocations.
26 | */
27 | public class LimitingOffHeapAllocForTests {
28 | private static long limit;
29 | private static long amountAllocated = 0;
30 | public static synchronized void setLimit(long limit) {
31 | LimitingOffHeapAllocForTests.limit = limit;
32 | if (amountAllocated > 0) {
33 | throw new IllegalStateException("PREVIOUS TEST LEAKED MEMORY!!!");
34 | }
35 | }
36 |
37 | private static Optional allocInternal(long amount, boolean blocking) {
38 | Optional ret = Optional.empty();
39 | boolean wasOom = true;
40 | boolean isRecursive = RmmSpark.preCpuAlloc(amount, blocking);
41 | try {
42 | synchronized (LimitingOffHeapAllocForTests.class) {
43 | if (amountAllocated + amount <= limit) {
44 | amountAllocated += amount;
45 | wasOom = false;
46 | HostMemoryBuffer buff = HostMemoryBuffer.allocate(amount);
47 | final long ptr = buff.getAddress();
48 | buff.setEventHandler(refCount -> {
49 | if (refCount == 0) {
50 | synchronized (LimitingOffHeapAllocForTests.class) {
51 | amountAllocated -= amount;
52 | }
53 | RmmSpark.cpuDeallocate(ptr, amount);
54 | }
55 | });
56 | ret = Optional.of(buff);
57 | }
58 | }
59 | } finally {
60 | if (ret.isPresent()) {
61 | RmmSpark.postCpuAllocSuccess(ret.get().getAddress(), amount, blocking, isRecursive);
62 | } else {
63 | RmmSpark.postCpuAllocFailed(wasOom, blocking, isRecursive);
64 | }
65 | }
66 | return ret;
67 | }
68 |
69 | /**
70 | * Do a non-blocking allocation
71 | * @param amount the amount to allocate
72 | * @return the allocated buffer or not.
73 | */
74 | public static Optional tryAlloc(long amount) {
75 | return allocInternal(amount, false);
76 | }
77 |
78 | /**
79 | * Do a blocking allocation
80 | * @param amount the amount to allocate
81 | * @return the allocated buffer
82 | */
83 | public static HostMemoryBuffer alloc(long amount) {
84 | Optional ret = Optional.empty();
85 | while (!ret.isPresent()) {
86 | ret = allocInternal(amount, true);
87 | }
88 | return ret.get();
89 | }
90 | }
--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/MapTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.*;
20 |
21 | import org.junit.jupiter.api.Test;
22 |
23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
24 |
25 | import java.util.Arrays;
26 | import java.util.List;
27 |
28 | public class MapTest {
29 |
30 | @Test
31 | void sort() {
32 | // Map is List>
33 | List map1 = Arrays.asList(
34 | new HostColumnVector.StructData(Arrays.asList(5, 2)),
35 | new HostColumnVector.StructData(Arrays.asList(4, 1)));
36 | List map2 = Arrays.asList(
37 | new HostColumnVector.StructData(Arrays.asList(2, 1)),
38 | new HostColumnVector.StructData(Arrays.asList(4, 3)));
39 |
40 | List sorted_map1 = Arrays.asList(
41 | new HostColumnVector.StructData(Arrays.asList(4, 1)),
42 | new HostColumnVector.StructData(Arrays.asList(5, 2)));
43 | List sorted_map2 = map2;
44 |
45 | HostColumnVector.StructType structType = new HostColumnVector.StructType(true,
46 | Arrays.asList(new HostColumnVector.BasicType(true, DType.INT32),
47 | new HostColumnVector.BasicType(true, DType.INT32)));
48 | try (ColumnVector cv = ColumnVector.fromLists(
49 | new HostColumnVector.ListType(true, structType), map1, map2);
50 | ColumnVector res = Map.sort(cv, false);
51 | ColumnVector expected = ColumnVector.fromLists(
52 | new HostColumnVector.ListType(true, structType), sorted_map1, sorted_map2)) {
53 |
54 | assertColumnsAreEqual(expected, res);
55 | }
56 | }
57 |
58 | }
59 |
60 |
--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/RegexRewriteUtilsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024, NVIDIA CORPORATION.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.nvidia.spark.rapids.jni;
18 |
19 | import ai.rapids.cudf.ColumnVector;
20 | import ai.rapids.cudf.Scalar;
21 | import org.junit.jupiter.api.Test;
22 |
23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
24 |
25 | public class RegexRewriteUtilsTest {
26 |
27 | @Test
28 | void testLiteralRangePattern() {
29 | int d = 3;
30 | try (ColumnVector inputCv = ColumnVector.fromStrings(
31 | "abc123", "aabc123", "aabc12", "abc1232", "aabc1232");
32 | Scalar pattern = Scalar.fromString("abc");
33 | ColumnVector expected = ColumnVector.fromBooleans(true, true, false, true, true);
34 | ColumnVector actual = RegexRewriteUtils.literalRangePattern(inputCv, pattern, d, 48, 57)) {
35 | assertColumnsAreEqual(expected, actual);
36 | }
37 | }
38 |
39 | @Test
40 | void testLiteralRangePatternChinese() {
41 | int d = 2;
42 | try (ColumnVector inputCv = ColumnVector.fromStrings(
43 | "数据砖块", "火花-急流英伟达", "英伟达Nvidia", "火花-急流");
44 | Scalar pattern = Scalar.fromString("英");
45 | ColumnVector expected = ColumnVector.fromBooleans(false, true, true, false);
46 | ColumnVector actual = RegexRewriteUtils.literalRangePattern(inputCv, pattern, d, 19968, 40869)) {
47 | assertColumnsAreEqual(expected, actual);
48 | }
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/thirdparty/cudf-pins/add_dependency_pins.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | if(NOT DEFINED rapids-cmake-dir)
18 | include(../cudf/cmake/rapids_config.cmake)
19 | endif()
20 |
21 | include(rapids-cpm)
22 | rapids_cpm_init()
23 |
24 | function(add_override_if_requested)
25 | if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
26 | include(${rapids-cmake-dir}/cpm/package_override.cmake)
27 | rapids_cpm_package_override(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)
28 |
29 | message(STATUS "Pinning CUDF dependencies to values found in ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json")
30 | else()
31 | include(${rapids-cmake-dir}/cpm/generate_pinned_versions.cmake)
32 | rapids_cpm_generate_pinned_versions(OUTPUT ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)
33 |
34 | message(STATUS "Building with latest CUDF dependencies (saving pinned versions to ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)")
35 | endif()
36 | endfunction()
37 | add_override_if_requested()
38 |
--------------------------------------------------------------------------------
/thirdparty/cudf-pins/rapids-cmake.sha:
--------------------------------------------------------------------------------
1 | c0330a832746dabf4c4c518072af6771ce654330
2 |
--------------------------------------------------------------------------------
/thirdparty/cudf-pins/setup.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | string(TOLOWER "${CUDF_DEPENDENCY_PIN_MODE}" CUDF_DEPENDENCY_PIN_MODE)
18 | if(NOT (CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned OR
19 | CUDF_DEPENDENCY_PIN_MODE STREQUAL latest))
20 | message(FATAL_ERROR "The CUDF_DEPENDENCY_PIN_MODE variable must be set to either `pinned` or `latest`.")
21 | endif()
22 |
23 | function(set_rapids_cmake_pin_sha1)
24 | set(rapids-cmake-sha "${rapids-cmake-sha}" PARENT_SCOPE)
25 |
26 | message(STATUS "Pinning rapids-cmake SHA1 to ${rapids-cmake-sha}")
27 | endfunction()
28 |
29 | # We need to set the rapids-cmake SHA1 before any CMake code in libcudf is executed when
30 | # we are in pin mode. Otherwise we will use the latest rapids-cmake version since that
31 | # is what cudf does via `fetch_rapids.cmake`
32 | if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
33 | # Extract the rapids sha1 from the file
34 | file(READ "${CMAKE_CURRENT_LIST_DIR}/rapids-cmake.sha" rapids-cmake-sha)
35 | string(STRIP rapids-cmake-sha "${rapids-cmake-sha}")
36 | string(REPLACE "\n" "" rapids-cmake-sha "${rapids-cmake-sha}")
37 | set(rapids-cmake-sha "${rapids-cmake-sha}" CACHE STRING "rapids-cmake sha to use" FORCE)
38 | message(STATUS "Pinning rapids-cmake SHA1 [${rapids-cmake-sha}]")
39 | else()
40 | set(rapids-cmake-fetch-via-git "ON" CACHE STRING "Make sure rapids-cmake is cloned so we can get SHA value" FORCE)
41 | endif()
42 |
43 | # We need to use a project() call hook, since rapids-cmake cpm_init()
44 | # can't be called from a `-C` CMake file
45 | set(CMAKE_PROJECT_TOP_LEVEL_INCLUDES "${CMAKE_CURRENT_LIST_DIR}/add_dependency_pins.cmake" CACHE FILEPATH "" )
46 |
--------------------------------------------------------------------------------