├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── documentation-request.md │ ├── feature_request.md │ └── submit-question.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── action-helper │ ├── Dockerfile │ ├── action.yml │ ├── entrypoint.sh │ └── python │ │ ├── auto-merge │ │ ├── cleanup-bot-branch │ │ ├── submodule-sync │ │ └── utils.py │ ├── add-to-project.yml │ ├── auto-merge.yml │ ├── blossom-ci.yml │ ├── clang-format.yml │ ├── license-header-check.yml │ └── signoff-check.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── SECURITY.md ├── build ├── apply-patches ├── build-in-docker ├── build-info ├── buildcpp.sh ├── run-in-docker ├── sanitizer-java │ └── bin │ │ └── java ├── submodule-check ├── unapply-patches └── win │ ├── create-wsl2.ps1 │ └── setup-wsl2.sh ├── ci ├── Dockerfile ├── Jenkinsfile.premerge ├── check-cuda-dependencies.sh ├── deploy.sh ├── fuzz-test.sh ├── nightly-build.sh ├── premerge-build.sh ├── settings.xml └── submodule-sync.sh ├── docs ├── img │ └── memory_state_machine.png └── memory_management.md ├── patches └── noop.patch ├── pom.xml ├── src ├── main │ ├── cpp │ │ ├── CMakeLists.txt │ │ ├── benchmarks │ │ │ ├── CMakeLists.txt │ │ │ ├── bloom_filter.cu │ │ │ ├── cast_long_to_binary_string.cpp │ │ │ ├── cast_string_to_float.cpp │ │ │ ├── common │ │ │ │ ├── generate_input.cu │ │ │ │ ├── generate_input.hpp │ │ │ │ └── random_distribution_factory.cuh │ │ │ ├── get_json_object.cu │ │ │ ├── parse_uri.cpp │ │ │ └── row_conversion.cpp │ │ ├── cmake │ │ │ ├── get_flatbuffers.cmake │ │ │ └── get_spdlog.cmake │ │ ├── faultinj │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ └── faultinj.cu │ │ ├── profiler │ │ │ ├── CMakeLists.txt │ │ │ ├── ProfilerJni.cpp │ │ │ ├── profiler_debug.cpp │ │ │ ├── profiler_debug.hpp │ │ │ ├── profiler_schema.cpp.in │ │ │ ├── profiler_serializer.cpp │ │ │ ├── profiler_serializer.hpp │ │ │ └── spark_rapids_profile_converter.cpp │ │ ├── src │ │ │ ├── BloomFilterJni.cpp │ │ │ ├── CaseWhenJni.cpp │ │ │ ├── CastStringJni.cpp │ │ │ ├── DateTimeUtilsJni.cpp │ │ │ ├── DecimalUtilsJni.cpp │ │ │ ├── GpuTimeZoneDBJni.cpp │ │ │ ├── HashJni.cpp │ │ │ ├── HistogramJni.cpp │ │ │ ├── HostTableJni.cpp │ │ │ ├── HyperLogLogPlusPlusHostUDFJni.cpp │ │ │ ├── JSONUtilsJni.cpp │ │ │ ├── KudoGpuSerializerJni.cpp │ │ │ ├── ListSliceJni.cpp │ │ │ ├── MapJni.cpp │ │ │ ├── NativeParquetJni.cpp │ │ │ ├── NumberConverterJni.cpp │ │ │ ├── ParseURIJni.cpp │ │ │ ├── RegexRewriteUtilsJni.cpp │ │ │ ├── RowConversionJni.cpp │ │ │ ├── SparkResourceAdaptorJni.cpp │ │ │ ├── SubStringIndexJni.cpp │ │ │ ├── TaskPriorityJni.cpp │ │ │ ├── ZOrderJni.cpp │ │ │ ├── bloom_filter.cu │ │ │ ├── bloom_filter.hpp │ │ │ ├── case_when.cu │ │ │ ├── case_when.hpp │ │ │ ├── cast_decimal_to_string.cu │ │ │ ├── cast_float_to_string.cu │ │ │ ├── cast_long_to_binary_string.cu │ │ │ ├── cast_string.cu │ │ │ ├── cast_string.hpp │ │ │ ├── cast_string_to_datetime.cu │ │ │ ├── cast_string_to_float.cu │ │ │ ├── cast_string_to_timestamp_common.hpp │ │ │ ├── datetime_rebase.cu │ │ │ ├── datetime_truncate.cu │ │ │ ├── datetime_utils.cuh │ │ │ ├── datetime_utils.hpp │ │ │ ├── decimal_utils.cu │ │ │ ├── decimal_utils.hpp │ │ │ ├── emptyfile.cpp │ │ │ ├── format_float.cu │ │ │ ├── from_json_to_raw_map.cu │ │ │ ├── from_json_to_raw_map_debug.cuh │ │ │ ├── from_json_to_structs.cu │ │ │ ├── ftos_converter.cuh │ │ │ ├── get_json_object.cu │ │ │ ├── get_json_object.hpp │ │ │ ├── hash.cuh │ │ │ ├── hash.hpp │ │ │ ├── histogram.cu │ │ │ ├── histogram.hpp │ │ │ ├── hive_hash.cu │ │ │ ├── host_table_view.hpp │ │ │ ├── hyper_log_log_plus_plus.cu │ │ │ ├── hyper_log_log_plus_plus.hpp │ │ │ ├── hyper_log_log_plus_plus_const.hpp │ │ │ ├── hyper_log_log_plus_plus_host_udf.cu │ │ │ ├── hyper_log_log_plus_plus_host_udf.hpp │ │ │ ├── json_parser.cuh │ │ │ ├── json_utils.cu │ │ │ ├── json_utils.hpp │ │ │ ├── list_slice.cu │ │ │ ├── list_slice.hpp │ │ │ ├── map.cu │ │ │ ├── map.hpp │ │ │ ├── murmur_hash.cu │ │ │ ├── murmur_hash.cuh │ │ │ ├── number_converter.cu │ │ │ ├── number_converter.hpp │ │ │ ├── parse_uri.cu │ │ │ ├── parse_uri.hpp │ │ │ ├── regex_rewrite_utils.cu │ │ │ ├── regex_rewrite_utils.hpp │ │ │ ├── row_conversion.cu │ │ │ ├── row_conversion.hpp │ │ │ ├── shuffle_assemble.cu │ │ │ ├── shuffle_split.cu │ │ │ ├── shuffle_split.hpp │ │ │ ├── shuffle_split_detail.hpp │ │ │ ├── spark_rapids_jni_version.cpp.in │ │ │ ├── spark_rapids_jni_version.h │ │ │ ├── substring_index.cu │ │ │ ├── substring_index.hpp │ │ │ ├── task_priority.hpp │ │ │ ├── timezones.cu │ │ │ ├── timezones.hpp │ │ │ ├── utilities.cu │ │ │ ├── utilities.hpp │ │ │ ├── version.hpp │ │ │ ├── xxhash64.cu │ │ │ ├── zorder.cu │ │ │ └── zorder.hpp │ │ └── tests │ │ │ ├── CMakeLists.txt │ │ │ ├── bloom_filter.cu │ │ │ ├── cast_decimal_to_string.cpp │ │ │ ├── cast_float_to_string.cpp │ │ │ ├── cast_long_to_binary_string.cpp │ │ │ ├── cast_string.cpp │ │ │ ├── datetime_rebase.cpp │ │ │ ├── format_float.cpp │ │ │ ├── hash.cpp │ │ │ ├── hyper_log_log_plus_plus.cu │ │ │ ├── list_slice.cpp │ │ │ ├── number_converter.cpp │ │ │ ├── parse_uri.cpp │ │ │ ├── row_conversion.cpp │ │ │ ├── shuffle_split.cu │ │ │ ├── substring_index.cpp │ │ │ ├── test_utilities.hpp │ │ │ ├── timezones.cpp │ │ │ └── utilities.cpp │ ├── fbs │ │ └── profiler.fbs │ └── java │ │ └── com │ │ └── nvidia │ │ └── spark │ │ └── rapids │ │ └── jni │ │ ├── Arms.java │ │ ├── BloomFilter.java │ │ ├── CaseWhen.java │ │ ├── CastException.java │ │ ├── CastStrings.java │ │ ├── CpuRetryOOM.java │ │ ├── CpuSplitAndRetryOOM.java │ │ ├── DateTimeRebase.java │ │ ├── DateTimeUtils.java │ │ ├── DecimalUtils.java │ │ ├── GpuListSliceUtils.java │ │ ├── GpuOOM.java │ │ ├── GpuRetryOOM.java │ │ ├── GpuSplitAndRetryOOM.java │ │ ├── GpuSubstringIndexUtils.java │ │ ├── GpuTimeZoneDB.java │ │ ├── Hash.java │ │ ├── Histogram.java │ │ ├── HostTable.java │ │ ├── HyperLogLogPlusPlusHostUDF.java │ │ ├── JSONUtils.java │ │ ├── Map.java │ │ ├── NumberConverter.java │ │ ├── OffHeapOOM.java │ │ ├── Pair.java │ │ ├── ParquetFooter.java │ │ ├── ParseURI.java │ │ ├── Preconditions.java │ │ ├── Profiler.java │ │ ├── RegexRewriteUtils.java │ │ ├── RmmSpark.java │ │ ├── RmmSparkThreadState.java │ │ ├── RowConversion.java │ │ ├── SparkPlatformType.java │ │ ├── SparkResourceAdaptor.java │ │ ├── TaskPriority.java │ │ ├── ThreadStateRegistry.java │ │ ├── Version.java │ │ ├── ZOrder.java │ │ ├── kudo │ │ ├── ByteArrayOutputStreamWriter.java │ │ ├── ColumnOffsetInfo.java │ │ ├── ColumnViewInfo.java │ │ ├── DataOutputStreamWriter.java │ │ ├── DataWriter.java │ │ ├── DumpOption.java │ │ ├── KudoGpuSerializer.java │ │ ├── KudoHostMergeResult.java │ │ ├── KudoSerializer.java │ │ ├── KudoTable.java │ │ ├── KudoTableHeader.java │ │ ├── KudoTableHeaderCalc.java │ │ ├── KudoTableMerger.java │ │ ├── MergeMetrics.java │ │ ├── MergeOptions.java │ │ ├── MergedInfoCalc.java │ │ ├── OpenByteArrayOutputStream.java │ │ ├── OpenByteArrayOutputStreamWriter.java │ │ ├── SliceInfo.java │ │ ├── SlicedBufferSerializer.java │ │ ├── SlicedValidityBufferInfo.java │ │ ├── TableBuilder.java │ │ ├── WriteInput.java │ │ └── WriteMetrics.java │ │ └── schema │ │ ├── HostColumnsVisitor.java │ │ ├── SchemaVisitor.java │ │ ├── SimpleSchemaVisitor.java │ │ └── Visitors.java └── test │ ├── cpp │ └── faultinj │ │ └── test_faultinj.json │ └── java │ └── com │ └── nvidia │ └── spark │ └── rapids │ └── jni │ ├── BloomFilterTest.java │ ├── CaseWhenTest.java │ ├── CastStringsTest.java │ ├── DateTimeUtilsTest.java │ ├── DecimalUtilsTest.java │ ├── FromJsonToRawMapTest.java │ ├── GetJsonObjectTest.java │ ├── GpuListSliceUtilsTest.java │ ├── GpuSubstringIndexUtilsTest.java │ ├── HashTest.java │ ├── HilbertIndexTest.java │ ├── HistogramTest.java │ ├── HostTableTest.java │ ├── InterleaveBitsTest.java │ ├── LimitingOffHeapAllocForTests.java │ ├── MapTest.java │ ├── NumberConverterTest.java │ ├── ParseURITest.java │ ├── RegexRewriteUtilsTest.java │ ├── RmmSparkMonteCarlo.java │ ├── RmmSparkTest.java │ ├── RowConversionTest.java │ ├── TimeZoneTest.java │ └── kudo │ ├── KudoConcatValidityTest.java │ ├── KudoGpuSerializerTest.java │ └── KudoSerializerTest.java └── thirdparty └── cudf-pins ├── add_dependency_pins.cmake ├── rapids-cmake.sha ├── setup.cmake └── versions.json /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Build-related 18 | /.github/ @NVIDIA/sparkrapids-cicd-codeowners 19 | /build/ @NVIDIA/sparkrapids-cicd-codeowners 20 | /ci/ @NVIDIA/sparkrapids-cicd-codeowners 21 | /patches/ @NVIDIA/sparkrapids-cicd-codeowners 22 | pom.xml @NVIDIA/sparkrapids-cicd-codeowners 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report to help us improve the RAPIDS Accelerator JNI for Apache Spark 4 | title: "[BUG]" 5 | labels: "? - Needs Triage, bug" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Steps/Code to reproduce bug** 14 | Please provide a list of steps or a code sample to reproduce the issue. 15 | Avoid posting private or sensitive data. 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Environment details (please complete the following information)** 21 | - Environment location: [Standalone, YARN, Kubernetes, Cloud(specify cloud provider)] 22 | - Spark configuration settings related to the issue 23 | 24 | **Additional context** 25 | Add any other context about the problem here. 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation request 3 | about: Report incorrect or needed documentation 4 | title: "[DOC]" 5 | labels: "? - Needs Triage, documentation" 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Report incorrect documentation 11 | 12 | **Location of incorrect documentation** 13 | Provide links and line numbers if applicable. 14 | 15 | **Describe the problems or issues found in the documentation** 16 | A clear and concise description of what you found to be incorrect. 17 | 18 | **Steps taken to verify documentation is incorrect** 19 | List any steps you have taken: 20 | 21 | **Suggested fix for documentation** 22 | Detail proposed changes to fix the documentation if you have any. 23 | 24 | --- 25 | 26 | ## Report needed documentation 27 | 28 | **Report needed documentation** 29 | A clear and concise description of what documentation you believe it is needed and why. 30 | 31 | **Describe the documentation you'd like** 32 | A clear and concise description of what you want to happen. 33 | 34 | **Steps taken to search for needed documentation** 35 | List any steps you have taken: 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for the RAPIDS Accelerator JNI for Apache Spark 4 | title: "[FEA]" 5 | labels: "? - Needs Triage, feature request" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I wish the RAPIDS Accelerator JNI for Apache Spark would [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context, code examples, or references to existing implementations about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/submit-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Submit question 3 | about: Ask a general question about RAPIDS Accelerator JNI for Apache Spark 4 | title: "[QST]" 5 | labels: "? - Needs Triage, question" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What is your question?** 11 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 36 | -------------------------------------------------------------------------------- /.github/workflows/action-helper/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2023, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM python:alpine 16 | 17 | WORKDIR / 18 | COPY python /python 19 | COPY entrypoint.sh . 20 | RUN chmod -R +x /python /entrypoint.sh 21 | # pin urllib3<2.0 for https://github.com/psf/requests/issues/6432 22 | RUN pip install requests "urllib3<2.0" 23 | 24 | ENTRYPOINT ["/entrypoint.sh"] 25 | -------------------------------------------------------------------------------- /.github/workflows/action-helper/action.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # TODO: move this to an independent repo as a public Github Action 16 | name: 'action helper' 17 | description: 'helper for github-related operations' 18 | inputs: 19 | operator: 20 | required: true 21 | description: 'specify operator, e.g. auto-merge' 22 | runs: 23 | using: 'docker' 24 | image: 'Dockerfile' 25 | args: 26 | - ${{ inputs.operator }} 27 | -------------------------------------------------------------------------------- /.github/workflows/action-helper/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -l 2 | # 3 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | set -e 19 | 20 | if [[ $# -ne 1 ]]; then 21 | echo "ERROR: invalid number of parameters, should be exact one" 22 | exit 1 23 | fi 24 | 25 | case $1 in 26 | 27 | auto-merge) 28 | /python/auto-merge --delete_head=True 29 | ;; 30 | 31 | *) 32 | echo "ERROR: unknown parameter: $1" 33 | ;; 34 | esac 35 | -------------------------------------------------------------------------------- /.github/workflows/action-helper/python/auto-merge: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2022, NVIDIA CORPORATION. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import sys 18 | from argparse import ArgumentParser 19 | 20 | from utils import EnvDefault, PullRequest, strtobool 21 | 22 | 23 | def main(): 24 | parser = ArgumentParser(description="Automerge") 25 | parser.add_argument("--owner", action=EnvDefault, env="OWNER", 26 | help="github token, will try use env OWNER if empty") 27 | parser.add_argument("--repo", action=EnvDefault, env="REPO", 28 | help="repo name, will try use env REPO if empty") 29 | parser.add_argument("--head", action=EnvDefault, env="HEAD", 30 | help="HEAD ref, will try use env HEAD if empty") 31 | parser.add_argument("--base", action=EnvDefault, env="BASE", 32 | help="Base ref, will try use env BASE if empty") 33 | parser.add_argument("--token", action=EnvDefault, env="TOKEN", 34 | help="github token, will try use env TOKEN if empty") 35 | parser.add_argument("--delete_head", default=False, type=lambda x: bool(strtobool(x)), 36 | help="if delete HEAD branch after auto-merge") 37 | args = parser.parse_args() 38 | 39 | pr = PullRequest(head_owner=args.owner, head=args.head, head_token=args.token, 40 | base_owner=args.owner, repo=args.repo, base=args.base, base_token=args.token) 41 | try: 42 | if exist := pr.get_open(): 43 | number = exist[0].get('number') 44 | sha = exist[0].get('head').get('sha') 45 | else: 46 | params = { 47 | # head share the same owner/repo with base in auto-merge 48 | 'title': f"[auto-merge] {pr.head} to {pr.base} [skip ci] [bot]", 49 | 'head': f"{pr.head_owner}:{pr.head}", 50 | 'base': pr.base, 51 | 'body': f"auto-merge triggered by github actions on `{pr.head}` to " 52 | f"create a PR keeping `{pr.base}` up-to-date. " 53 | "If this PR is unable to be merged due to conflicts, " 54 | "it will remain open until manually fix.", 55 | 'maintainer_can_modify': True 56 | } 57 | number, sha, term = pr.create(params) 58 | if term: 59 | sys.exit(0) 60 | pr.auto_merge(number, sha) 61 | if args.delete_head: 62 | pr.delete_head() 63 | except Exception as e: 64 | print(e) 65 | sys.exit(1) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /.github/workflows/action-helper/python/cleanup-bot-branch: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2022, NVIDIA CORPORATION. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import sys 18 | from argparse import ArgumentParser 19 | 20 | from utils import EnvDefault, PullRequest 21 | 22 | 23 | def main(): 24 | parser = ArgumentParser(description="Cleanup bot branch") 25 | parser.add_argument("--owner", action=EnvDefault, env="OWNER", 26 | help="github token, will try use env OWNER if empty") 27 | parser.add_argument("--repo", action=EnvDefault, env="REPO", 28 | help="repo name, will try use env REPO if empty") 29 | parser.add_argument("--head", action=EnvDefault, env="HEAD", 30 | help="HEAD ref, will try use env HEAD if empty") 31 | parser.add_argument("--base", action=EnvDefault, env="BASE", 32 | help="Base ref, will try use env BASE if empty") 33 | parser.add_argument("--token", action=EnvDefault, env="TOKEN", 34 | help="github token, will try use env TOKEN if empty") 35 | args = parser.parse_args() 36 | 37 | try: 38 | if not args.head.startswith('bot-'): 39 | raise Exception(f"Cannot delete {args.head}, the script is only allowed to delete branch w/ bot-* prefix") 40 | 41 | pr = PullRequest(head_owner=args.owner, head=args.head, head_token=args.token, 42 | base_owner=args.owner, repo=args.repo, base=args.base, base_token=args.token) 43 | if exist := pr.get_open(): 44 | number = exist[0].get('number') 45 | raise Exception(f"Cannot delete {pr.head}, pull request #{number} is still open") 46 | else: 47 | pr.delete_head() 48 | except Exception as e: 49 | print(e) 50 | sys.exit(1) 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /.github/workflows/add-to-project.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2025, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Add new issues and pull requests to project 16 | 17 | on: 18 | issues: 19 | types: 20 | - opened 21 | pull_request_target: 22 | types: 23 | - opened 24 | 25 | jobs: 26 | Add-to-project: 27 | if: github.repository_owner == 'NVIDIA' # avoid adding issues from forks 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: add-to-project 31 | uses: NVIDIA/spark-rapids-common/add-to-project@main 32 | with: 33 | token: ${{ secrets.PROJECT_TOKEN }} 34 | -------------------------------------------------------------------------------- /.github/workflows/clang-format.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023-2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to check clang format 16 | name: clang format check 17 | 18 | on: 19 | pull_request: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | pre-commit: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | with: 28 | submodules: true 29 | - uses: actions/setup-python@v5 30 | with: 31 | python-version: "3.10" 32 | - uses: pre-commit/action@v3.0.0 33 | with: 34 | extra_args: clang-format --all-files -------------------------------------------------------------------------------- /.github/workflows/license-header-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to check copyright/license header 16 | name: license header check 17 | 18 | on: 19 | pull_request: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | license-header-check: 24 | runs-on: ubuntu-latest 25 | if: "!contains(github.event.pull_request.title, '[bot]')" 26 | steps: 27 | - name: Get checkout depth 28 | run: | 29 | echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV 30 | 31 | - name: Checkout code 32 | uses: actions/checkout@v4 33 | with: 34 | fetch-depth: ${{ env.PR_FETCH_DEPTH }} 35 | 36 | - name: license-header-check 37 | uses: NVIDIA/spark-rapids-common/license-header-check@main 38 | with: 39 | included_file_patterns: | 40 | *.cpp, 41 | *.hpp, 42 | *.cu, 43 | *.cuh, 44 | *.java, 45 | *.sh, 46 | *Dockerfile*, 47 | *Jenkinsfile*, 48 | *.yml, 49 | *.yaml, 50 | *.txt, 51 | *.xml, 52 | *.fbs, 53 | build/* 54 | excluded_file_patterns: | 55 | thirdparty/* 56 | -------------------------------------------------------------------------------- /.github/workflows/signoff-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # A workflow to check if PR got sign-off 16 | name: signoff check 17 | 18 | on: 19 | pull_request_target: 20 | types: [opened, synchronize, reopened] 21 | 22 | jobs: 23 | signoff-check: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: signoff 27 | uses: NVIDIA/spark-rapids-common/signoff-check@main 28 | with: 29 | owner: ${{ github.repository_owner }} 30 | repo: spark-rapids-jni 31 | pull_number: ${{ github.event.number }} 32 | token: ${{ secrets.GITHUB_TOKEN }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Common 2 | __pycache__ 3 | *.py[cod] 4 | *$py.class 5 | *.a 6 | *.o 7 | *.so 8 | *.dylib 9 | .cache 10 | .vscode 11 | *.swp 12 | *.pytest_cache 13 | DartConfiguration.tcl 14 | .DS_Store 15 | *.manifest 16 | *.spec 17 | .nfs* 18 | .clangd 19 | *#*# 20 | *.#* 21 | *.pyc 22 | *.pyo 23 | *.swp 24 | *~ 25 | 26 | ## Patching 27 | *.diff 28 | *.orig 29 | *.rej 30 | 31 | ## Eclipse IDE 32 | .project 33 | .cproject 34 | .settings 35 | 36 | ## IntelliJ IDE 37 | .idea/ 38 | .idea_modules/ 39 | *.iml 40 | *.ipr 41 | *.iws 42 | 43 | #Java 44 | dependency-reduced-pom.xml 45 | hs_err*.log 46 | target/ 47 | 48 | ## VSCode IDE 49 | .vscode 50 | 51 | #Generated files 52 | cufile.log 53 | rmm_log.txt 54 | sanitizer_for_pid_*.log 55 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "thirdparty/cudf"] 2 | path = thirdparty/cudf 3 | url = https://github.com/rapidsai/cudf.git 4 | branch = branch-25.06 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, NVIDIA CORPORATION. 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/mirrors-clang-format 5 | rev: v16.0.1 6 | hooks: 7 | - id: clang-format 8 | files: \.(cu|cuh|h|hpp|cpp|inl)$ 9 | types_or: [file] 10 | args: ['-fallback-style=none', '-style=file:thirdparty/cudf/.clang-format'] 11 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | RAPIDS Accelerator JNI For Apache Spark 2 | Copyright (c) 2022-2024, NVIDIA CORPORATION 3 | 4 | -------------------------------------------------------------------------------- 5 | 6 | This project includes code from ryu (https://github.com/ulfjack/ryu). 7 | 8 | Copyright (2018) Ulf Adams and contributors. 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. 21 | 22 | -------------------------------------------------------------------------------- 23 | 24 | This project includes code from flatbuffers (https://github.com/google/flatbuffers). 25 | 26 | Copyright 2021 Google Inc. All rights reserved. 27 | 28 | Licensed under the Apache License, Version 2.0 (the "License"); 29 | you may not use this file except in compliance with the License. 30 | You may obtain a copy of the License at 31 | 32 | http://www.apache.org/licenses/LICENSE-2.0 33 | 34 | Unless required by applicable law or agreed to in writing, software 35 | distributed under the License is distributed on an "AS IS" BASIS, 36 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 37 | See the License for the specific language governing permissions and 38 | limitations under the License. 39 | 40 | -------------------------------------------------------------------------------- 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAPIDS Accelerator JNI For Apache Spark 2 | 3 | This repository contains native support code for the 4 | [RAPIDS Accelerator for Apache Spark](https://github.com/NVIDIA/spark-rapids). 5 | 6 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/NVIDIA/spark-rapids-jni) 7 | 8 | ## Building From Source 9 | 10 | See the [build instructions in the contributing guide](CONTRIBUTING.md#building-from-source). 11 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security 2 | 3 | NVIDIA is dedicated to the security and trust of our software products and services, including all 4 | source code repositories managed through our organization. 5 | 6 | If you need to report a security issue, please use the appropriate contact points outlined 7 | below. **Please do not report security vulnerabilities through GitHub/GitLab.** 8 | 9 | ## Reporting Potential Security Vulnerability in an NVIDIA Product 10 | 11 | To report a potential security vulnerability in any NVIDIA product: 12 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html) 13 | - E-Mail: psirt@nvidia.com 14 | - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key) 15 | - Please include the following information: 16 | - Product/Driver name and version/branch that contains the vulnerability 17 | -------------------------------------------------------------------------------- /build/apply-patches: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | 21 | BASE_DIR=$( git rev-parse --show-toplevel ) 22 | 23 | PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")} 24 | 25 | CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")} 26 | 27 | # Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to 28 | # making sure that a user can do development work in spark-rapids-jni without the patches 29 | # getting in the way 30 | # The operations I really want to support no matter what state CUDF is in are 31 | # 1) Build the repo from scratch 32 | # 2) Rebuild the repo without having to clean and start over 33 | # 3) upmerge to a new version of the plugin including updating the cudf submodule 34 | # 35 | # Building from scratch is simple. We want clean to unapply any patches and 36 | # build to apply them. But if we want to rebuild without a clean we need to know what 37 | # state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this 38 | # is to save some state files about what happened. But a user could mess with CUDF directly 39 | # so we want to have ways to double check that they are indeed correct. 40 | 41 | FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch" 42 | 43 | pushd "$CUDF_DIR" 44 | 45 | PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty) 46 | 47 | if [ -z "$PATCH_FILES" ] ; then 48 | echo "No patches to apply" 49 | exit 0 50 | fi 51 | 52 | CHANGED_FILES=$(git status --porcelain --untracked-files=no) 53 | 54 | if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then 55 | if git apply -R --check "$FULLY_PATCHED_FILE" ; then 56 | echo "Patches appear to have been applied already" 57 | exit 0 58 | fi 59 | fi 60 | 61 | if [ -n "$CHANGED_FILES" ] ; then 62 | echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches" 63 | echo "$CHANGED_FILE" 64 | exit 1 65 | fi 66 | 67 | find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do 68 | echo "patching with: $file" 69 | git apply -v "$file" 70 | done 71 | 72 | git diff > "$FULLY_PATCHED_FILE" 73 | 74 | popd 75 | -------------------------------------------------------------------------------- /build/build-in-docker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # Build the spark-rapids-jni artifact in a Docker container with devtoolset 20 | 21 | set -e 22 | 23 | # Base paths relative to this script's location 24 | SCRIPTDIR=$(cd $(dirname $0); pwd) 25 | 26 | LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} 27 | USE_GDS=${USE_GDS:-ON} 28 | export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} 29 | # Make CUDA_VERSION consistent with the file run-in-docker 30 | export CUDA_VERSION=${CUDA_VERSION:-11.8.0} 31 | CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*} 32 | BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} 33 | 34 | if (( $# == 0 )); then 35 | echo "Usage: $0 " 36 | exit 1 37 | fi 38 | 39 | # Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...] 40 | if [ "$(uname -m)" == "aarch64" ]; then 41 | USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit. 42 | BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit. 43 | fi 44 | 45 | $SCRIPTDIR/run-in-docker mvn \ 46 | -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ 47 | -DUSE_GDS=$USE_GDS \ 48 | -DBUILD_FAULTINJ=${BUILD_FAULTINJ} \ 49 | -Dcuda.version=$CUDA_CLASSIFIER \ 50 | "$@" 51 | -------------------------------------------------------------------------------- /build/build-info: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # This script generates the build info. 20 | # Arguments: 21 | # version - The current version of the project 22 | # git_path - The path to the repository 23 | # libcudf_path - The path to the libcudf library 24 | set -e 25 | set -o pipefail 26 | 27 | echo_build_properties() { 28 | version=$1 29 | git_path=$2 30 | libcudf_path=$3 31 | shift 3 32 | echo version=$version 33 | echo user=$(whoami) 34 | echo revision=$(cd "$git_path" && git rev-parse HEAD) 35 | echo branch=$(cd "$git_path" && git rev-parse --abbrev-ref HEAD) 36 | echo date=$(date -u +%Y-%m-%dT%H:%M:%SZ) 37 | echo url=$(cd "$git_path" && git config --get remote.origin.url) 38 | gpu_architectures=$( 39 | cuobjdump "$libcudf_path" | gawk ' 40 | match($0, /arch = sm_([0-9]+)/, tmp) { 41 | arch[tmp[1]] = 1 42 | } 43 | 44 | END { 45 | n = asorti(arch) 46 | if (n == 0) { 47 | print "ERROR: No fatbin ELF / PTX code sections found" > "/dev/stderr" 48 | exit(1) 49 | } 50 | s = arch[1] 51 | for (i = 2; i <= n; i++) { 52 | s = s ";" arch[i] 53 | } 54 | print s 55 | } 56 | ' 57 | ) 58 | echo "gpu_architectures=$gpu_architectures" 59 | for arg in "$@"; do 60 | echo $arg 61 | done 62 | } 63 | 64 | echo_build_properties "$@" 65 | -------------------------------------------------------------------------------- /build/sanitizer-java/bin/java: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # This special Java executable is specified to the "jvm" configuration of the 19 | # the surefire plugin to intercept forking the processes for tests. Then 20 | # the tests will run with the compute-sanitizer tool. 21 | exec compute-sanitizer --tool memcheck \ 22 | --launch-timeout 600 \ 23 | --error-exitcode -2 \ 24 | --log-file "./sanitizer_for_pid_%p.log" \ 25 | java "$@" 26 | -------------------------------------------------------------------------------- /build/submodule-check: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | 21 | outdated_modules=$(git submodule status | grep -v "^ " || true) 22 | if [[ -n $outdated_modules ]]; then 23 | echo >&2 "ERROR: submodules out of date: $outdated_modules. To fix: git submodule update --init --recursive" 24 | exit 1 25 | fi 26 | -------------------------------------------------------------------------------- /build/unapply-patches: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | 21 | BASE_DIR=$( git rev-parse --show-toplevel ) 22 | 23 | PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")} 24 | 25 | CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")} 26 | 27 | # Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to 28 | # making sure that a user can do development work in spark-rapids-jni without the patches 29 | # getting in the way 30 | # The operations I really want to support no matter what state CUDF is in are 31 | # 1) Build the repo from scratch 32 | # 2) Rebuild the repo without having to clean and start over 33 | # 3) upmerge to a new version of the plugin including updating the cudf submodule 34 | # 35 | # Building from scratch is simple. We want clean to unapply any patches and 36 | # build to apply them. But if we want to rebuild without a clean we need to know what 37 | # state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this 38 | # is to save some state files about what happened. But a user could mess with CUDF directly 39 | # so we want to have ways to double check that they are indeed correct. 40 | 41 | FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch" 42 | 43 | pushd "$CUDF_DIR" 44 | 45 | PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty) 46 | 47 | if [ -z "$PATCH_FILES" ] ; then 48 | echo "No patches to remove" 49 | exit 0 50 | fi 51 | 52 | CHANGED_FILES=$(git status --porcelain --untracked-files=no) 53 | 54 | if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then 55 | if git apply --check -R "$FULLY_PATCHED_FILE"; then 56 | echo "Patches appear to have been applied, so going to remove them" 57 | git apply -R -v "$FULLY_PATCHED_FILE" 58 | rm -f "$FULLY_PATCHED_FILE" 59 | 60 | # Check for modifications, again 61 | if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then 62 | echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected" 63 | git status --porcelain --untracked-files=no 64 | exit 1 65 | fi 66 | 67 | exit 0 68 | else 69 | echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE" 70 | exit 1 71 | fi 72 | fi 73 | 74 | if [ -n "$CHANGED_FILES" ] ; then 75 | echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again." 76 | echo "$CHANGED_FILE" 77 | exit 1 78 | else 79 | echo "No changes in CUDF repository to remove" 80 | fi 81 | 82 | popd 83 | -------------------------------------------------------------------------------- /build/win/create-wsl2.ps1: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | wsl -d Ubuntu build/win/setup-wsl2.sh 18 | -------------------------------------------------------------------------------- /build/win/setup-wsl2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | # Last tested: 20 | # Edition Windows 10 Enterprise 21 | # Version 21H2 22 | # OS build 19044.1645 23 | # Experience Windows Feature Experience Pack 120.2212.4170.0 24 | # NVIDIA Display Driver 473.47 25 | 26 | # add WSL2 user to passwordless sudoers if desired 27 | # sudo visudo /etc/sudoers.d/wsl2-sudo 28 | 29 | # Docker 30 | sudo apt-get -y install apt-transport-https ca-certificates curl software-properties-common 31 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 32 | echo \ 33 | "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ 34 | $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 35 | sudo apt-get update 36 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-compose-plugin 37 | sudo usermod -a -G docker $USER 38 | sudo service docker start 39 | docker run hello-world 40 | 41 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 42 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - 43 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 44 | sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit 45 | sudo service docker restart 46 | 47 | 48 | # CUDA 49 | # Initial instructions 50 | # https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_network 51 | distroArch="wsl-ubuntu/x86_64" 52 | wget https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/cuda-wsl-ubuntu.pin 53 | sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600 54 | # Instructions for fetching keys modified per 55 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772#install-new-cuda-keyring-package-3 56 | sudo apt-key del 7fa2af80 57 | wget https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/cuda-keyring_1.0-1_all.deb 58 | sudo dpkg -i cuda-keyring_1.0-1_all.deb 59 | sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/ /" 60 | sudo apt-get update 61 | sudo apt-get -y install cuda-toolkit-11-7 -------------------------------------------------------------------------------- /ci/check-cuda-dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # common script to help check if packaged *.so files have dynamical link to CUDA Runtime 19 | 20 | set -exo pipefail 21 | 22 | jar_path=$1 23 | tmp_path=/tmp/"jni-$(date "+%Y%m%d%H%M%S")" 24 | unzip -j "${jar_path}" "*64/Linux/*.so" -d "${tmp_path}" 25 | 26 | find "$tmp_path" -type f -name "*.so" | while read -r so_file; do 27 | # Check if *.so file has a dynamic link to CUDA Runtime 28 | if objdump -p "$so_file" | grep NEEDED | grep -qi cudart; then 29 | echo "Dynamic link to CUDA Runtime found in $so_file..." 30 | ldd "$so_file" 31 | exit 1 32 | else 33 | echo "No dynamic link to CUDA Runtime found in $so_file" 34 | fi 35 | done 36 | -------------------------------------------------------------------------------- /ci/fuzz-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # NOTE: 19 | # run fuzz test after build 20 | # required jars: jni, jni-tests, slf4j-api 21 | 22 | set -ex 23 | 24 | WORKSPACE=${WORKSPACE:-$PWD} 25 | M2DIR=${M2DIR:-"$HOME/.m2"} 26 | 27 | SLF4J_VER=$(mvn help:evaluate -Dexpression=slf4j.version -q -DforceStdout) 28 | CLASSPATH=${CLASSPATH:-"$WORKSPACE/target/*:$M2DIR/repository/org/slf4j/slf4j-api/$SLF4J_VER/slf4j-api-$SLF4J_VER.jar"} 29 | 30 | java -cp "$CLASSPATH" \ 31 | com.nvidia.spark.rapids.jni.RmmSparkMonteCarlo \ 32 | --taskMaxMiB=2048 --gpuMiB=3072 --skewed --allocMode=ASYNC 33 | -------------------------------------------------------------------------------- /ci/nightly-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | set -ex 19 | 20 | nvidia-smi 21 | 22 | git submodule update --init --recursive 23 | 24 | MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" 25 | # cuda11 or cuda12 26 | CUDA_VER=${CUDA_VER:-cuda`nvcc --version | sed -n 's/^.*release \([0-9]\+\)\..*$/\1/p'`} 27 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} 28 | USE_GDS=${USE_GDS:-ON} 29 | USE_SANITIZER=${USE_SANITIZER:-ON} 30 | BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} 31 | ARM64=${ARM64:-false} 32 | artifact_suffix="${CUDA_VER}" 33 | 34 | profiles="source-javadoc" 35 | if [ "${ARM64}" == "true" ]; then 36 | profiles="${profiles},arm64" 37 | USE_GDS="OFF" 38 | USE_SANITIZER="ON" 39 | BUILD_FAULTINJ="OFF" 40 | artifact_suffix="${artifact_suffix}-arm64" 41 | fi 42 | 43 | ${MVN} clean package ${MVN_MIRROR} \ 44 | -P${profiles} \ 45 | -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ 46 | -Dlibcudf.build.configure=true \ 47 | -DUSE_GDS=${USE_GDS} -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ 48 | -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \ 49 | -DUSE_SANITIZER=${USE_SANITIZER} 50 | 51 | build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout) 52 | . ci/check-cuda-dependencies.sh "target/${build_name}-${artifact_suffix}.jar" 53 | -------------------------------------------------------------------------------- /ci/premerge-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | set -ex 19 | 20 | nvidia-smi 21 | 22 | git submodule update --init --recursive 23 | 24 | MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" 25 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} 26 | ${MVN} verify ${MVN_MIRROR} \ 27 | -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ 28 | -Dlibcudf.build.configure=true \ 29 | -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ 30 | -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON 31 | 32 | build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout) 33 | cuda_version=$(${MVN} help:evaluate -Dexpression=cuda.version -q -DforceStdout) 34 | . ci/check-cuda-dependencies.sh "target/${build_name}-${cuda_version}.jar" 35 | -------------------------------------------------------------------------------- /docs/img/memory_state_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-jni/d3aca7a5a423fcbaf0834baf5e24f091f05eee6c/docs/img/memory_state_machine.png -------------------------------------------------------------------------------- /patches/noop.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/spark-rapids-jni/d3aca7a5a423fcbaf0834baf5e24f091f05eee6c/patches/noop.patch -------------------------------------------------------------------------------- /src/main/cpp/benchmarks/bloom_filter.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | static void bloom_filter_put(nvbench::state& state) 26 | { 27 | constexpr int num_rows = 150'000'000; 28 | constexpr int num_hashes = 3; 29 | 30 | // create the bloom filter 31 | cudf::size_type const bloom_filter_bytes = state.get_int64("bloom_filter_bytes"); 32 | cudf::size_type const bloom_filter_longs = bloom_filter_bytes / sizeof(int64_t); 33 | auto bloom_filter = spark_rapids_jni::bloom_filter_create(num_hashes, bloom_filter_longs); 34 | 35 | // create a column of hashed values 36 | data_profile_builder builder; 37 | builder.no_validity(); 38 | auto const src = create_random_table({{cudf::type_id::INT64}}, row_count{num_rows}, builder); 39 | auto const input = spark_rapids_jni::xxhash64(*src); 40 | 41 | auto const stream = cudf::get_default_stream(); 42 | state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); 43 | state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, 44 | [&](nvbench::launch& launch, auto& timer) { 45 | timer.start(); 46 | spark_rapids_jni::bloom_filter_put(*bloom_filter, *input); 47 | stream.synchronize(); 48 | timer.stop(); 49 | }); 50 | 51 | size_t const bytes_read = num_rows * sizeof(int64_t); 52 | size_t const bytes_written = num_rows * sizeof(cudf::bitmask_type) * num_hashes; 53 | auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); 54 | state.add_element_count(std::size_t{num_rows}, "Rows Inserted"); 55 | state.add_global_memory_reads(bytes_read, "Bytes read"); 56 | state.add_global_memory_writes(bytes_written, "Bytes written"); 57 | state.add_element_count(static_cast(bytes_written) / time, "Write bytes/sec"); 58 | } 59 | 60 | NVBENCH_BENCH(bloom_filter_put) 61 | .set_name("Bloom Filter Put") 62 | .add_int64_axis("bloom_filter_bytes", 63 | {512 * 1024, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024}); 64 | -------------------------------------------------------------------------------- /src/main/cpp/benchmarks/cast_long_to_binary_string.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | static void long_to_binary_string(nvbench::state& state) 27 | { 28 | auto const num_rows = static_cast(state.get_int64("num_rows")); 29 | 30 | auto const input_table = create_random_table({cudf::type_id::INT64}, row_count{num_rows}); 31 | auto const long_col = input_table->get_column(0); 32 | auto const stream = cudf::get_default_stream(); 33 | state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); 34 | state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { 35 | spark_rapids_jni::long_to_binary_string(long_col, stream); 36 | }); 37 | } 38 | 39 | NVBENCH_BENCH(long_to_binary_string) 40 | .set_name("Long to Binary String Cast") 41 | .add_int64_axis("num_rows", {100'000, 500'000, 1'000'000, 5'000'000, 10'000'000}); 42 | -------------------------------------------------------------------------------- /src/main/cpp/benchmarks/cast_string_to_float.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | void string_to_float(nvbench::state& state) 28 | { 29 | cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")}; 30 | auto const float_tbl = create_random_table({cudf::type_id::FLOAT32}, row_count{n_rows}); 31 | auto const float_col = float_tbl->get_column(0); 32 | auto const string_col = cudf::strings::from_floats(float_col.view()); 33 | 34 | state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { 35 | auto rows = spark_rapids_jni::string_to_float(cudf::data_type{cudf::type_id::FLOAT32}, 36 | string_col->view(), 37 | false, 38 | cudf::get_default_stream()); 39 | }); 40 | } 41 | 42 | NVBENCH_BENCH(string_to_float) 43 | .set_name("Strings to Float Cast") 44 | .add_int64_axis("num_rows", {1 * 1024 * 1024, 100 * 1024 * 1024}); 45 | -------------------------------------------------------------------------------- /src/main/cpp/cmake/get_flatbuffers.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations under 12 | # the License. 13 | # ============================================================================= 14 | 15 | # Use CPM to find or clone flatbuffers 16 | function(find_and_configure_flatbuffers VERSION) 17 | 18 | rapids_cpm_find( 19 | flatbuffers ${VERSION} 20 | GLOBAL_TARGETS flatbuffers 21 | CPM_ARGS 22 | GIT_REPOSITORY https://github.com/google/flatbuffers.git 23 | GIT_TAG v${VERSION} 24 | GIT_SHALLOW TRUE 25 | ) 26 | 27 | include("${rapids-cmake-dir}/export/find_package_root.cmake") 28 | rapids_export_find_package_root( 29 | BUILD flatbuffers "${flatbuffers_BINARY_DIR}" EXPORT_SET profilerjni-exports 30 | ) 31 | 32 | endfunction() 33 | 34 | find_and_configure_flatbuffers(24.3.25) 35 | -------------------------------------------------------------------------------- /src/main/cpp/cmake/get_spdlog.cmake: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. See the License for the specific language governing permissions and limitations under 12 | # the License. 13 | # ============================================================================= 14 | 15 | # Use CPM to find or clone speedlog 16 | function(find_and_configure_spdlog) 17 | 18 | set(CPM_DOWNLOAD_spdlog ON) 19 | include(${rapids-cmake-dir}/cpm/spdlog.cmake) 20 | rapids_cpm_spdlog( 21 | FMT_OPTION "EXTERNAL_FMT_HO" 22 | ) 23 | set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON) 24 | 25 | endfunction() 26 | 27 | find_and_configure_spdlog() 28 | -------------------------------------------------------------------------------- /src/main/cpp/faultinj/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #============================================================================= 2 | # Copyright (c) 2022-2023, NVIDIA CORPORATION. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | #============================================================================= 16 | 17 | project( 18 | CUDA_FAULT_INJECTION 19 | VERSION 22.08.00 20 | LANGUAGES C CXX CUDA 21 | ) 22 | 23 | set(CMAKE_CXX_STANDARD 17) 24 | 25 | add_library( 26 | cufaultinj SHARED 27 | faultinj.cu 28 | ) 29 | 30 | target_link_libraries( 31 | cufaultinj PRIVATE spdlog::spdlog_header_only 32 | ) 33 | 34 | target_link_libraries( 35 | cufaultinj PRIVATE CUDA::cupti_static 36 | ) 37 | -------------------------------------------------------------------------------- /src/main/cpp/profiler/profiler_debug.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | namespace spark_rapids_jni::profiler { 25 | 26 | std::string activity_kind_to_string(CUpti_ActivityKind kind); 27 | 28 | void print_cupti_buffer(uint8_t* buffer, size_t valid_size); 29 | 30 | } // namespace spark_rapids_jni::profiler 31 | -------------------------------------------------------------------------------- /src/main/cpp/profiler/profiler_schema.cpp.in: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | namespace spark_rapids_jni::profiler { 18 | char const* Profiler_Schema = R"raw(@SPARK_RAPIDS_JNI_PROFILER_SCHEMA@)raw"; 19 | } 20 | -------------------------------------------------------------------------------- /src/main/cpp/profiler/profiler_serializer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "profiler_generated.h" 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | namespace spark_rapids_jni::profiler { 29 | 30 | // Serializes profile data as flatbuffers 31 | struct profiler_serializer { 32 | profiler_serializer( 33 | JNIEnv* env, jobject writer, size_t buffer_size, size_t flush_threshold, bool capture_allocs); 34 | void process_cupti_buffer(uint8_t* buffer, size_t valid_size); 35 | void flush(); 36 | 37 | private: 38 | void write_profile_header(); 39 | void process_api_activity(CUpti_ActivityAPI const*); 40 | void process_device_activity(CUpti_ActivityDevice4 const*); 41 | void process_dropped_records(size_t num_dropped); 42 | void process_marker_activity(CUpti_ActivityMarker2 const*); 43 | void process_marker_data(CUpti_ActivityMarkerData const*); 44 | void process_memcpy(CUpti_ActivityMemcpy5 const*); 45 | void process_memset(CUpti_ActivityMemset4 const*); 46 | void process_kernel(CUpti_ActivityKernel8 const*); 47 | void process_overhead(CUpti_ActivityOverhead const*); 48 | void report_num_dropped_records(); 49 | void write_current_fb(); 50 | 51 | JNIEnv* env_; 52 | jmethodID j_write_method_; 53 | jobject j_writer_; 54 | size_t flush_threshold_; 55 | bool capture_allocs_; 56 | flatbuffers::FlatBufferBuilder fbb_; 57 | std::vector> api_offsets_; 58 | std::vector> device_offsets_; 59 | std::vector> dropped_offsets_; 60 | std::vector> kernel_offsets_; 61 | std::vector> marker_offsets_; 62 | std::vector> marker_data_offsets_; 63 | std::vector> memcpy_offsets_; 64 | std::vector> memset_offsets_; 65 | std::vector> overhead_offsets_; 66 | }; 67 | 68 | } // namespace spark_rapids_jni::profiler 69 | -------------------------------------------------------------------------------- /src/main/cpp/src/CaseWhenJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "case_when.hpp" 18 | #include "cudf_jni_apis.hpp" 19 | 20 | extern "C" { 21 | 22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CaseWhen_selectFirstTrueIndex( 23 | JNIEnv* env, jclass, jlongArray bool_cols) 24 | { 25 | JNI_NULL_CHECK(env, bool_cols, "array of column handles is null", 0); 26 | try { 27 | cudf::jni::auto_set_device(env); 28 | cudf::jni::native_jpointerArray n_cudf_bool_columns(env, bool_cols); 29 | auto bool_column_views = n_cudf_bool_columns.get_dereferenced(); 30 | return cudf::jni::release_as_jlong( 31 | spark_rapids_jni::select_first_true_index(cudf::table_view(bool_column_views))); 32 | } 33 | CATCH_STD(env, 0); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/cpp/src/DateTimeUtilsJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "datetime_utils.hpp" 19 | 20 | extern "C" { 21 | 22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_rebaseGregorianToJulian( 23 | JNIEnv* env, jclass, jlong input) 24 | { 25 | JNI_NULL_CHECK(env, input, "input column is null", 0); 26 | 27 | try { 28 | cudf::jni::auto_set_device(env); 29 | auto const input_cv = reinterpret_cast(input); 30 | auto output = spark_rapids_jni::rebase_gregorian_to_julian(*input_cv); 31 | return reinterpret_cast(output.release()); 32 | } 33 | CATCH_STD(env, 0); 34 | } 35 | 36 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_rebaseJulianToGregorian( 37 | JNIEnv* env, jclass, jlong input) 38 | { 39 | JNI_NULL_CHECK(env, input, "input column is null", 0); 40 | 41 | try { 42 | cudf::jni::auto_set_device(env); 43 | auto const input_cv = reinterpret_cast(input); 44 | auto output = spark_rapids_jni::rebase_julian_to_gregorian(*input_cv); 45 | return reinterpret_cast(output.release()); 46 | } 47 | CATCH_STD(env, 0); 48 | } 49 | 50 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_truncateWithColumnFormat( 51 | JNIEnv* env, jclass, jlong datetime, jlong format) 52 | { 53 | JNI_NULL_CHECK(env, datetime, "input datetime is null", 0); 54 | JNI_NULL_CHECK(env, format, "input format is null", 0); 55 | 56 | try { 57 | cudf::jni::auto_set_device(env); 58 | 59 | auto const datetime_cv = reinterpret_cast(datetime); 60 | auto const format_cv = reinterpret_cast(format); 61 | return reinterpret_cast(spark_rapids_jni::truncate(*datetime_cv, *format_cv).release()); 62 | } 63 | CATCH_STD(env, 0); 64 | } 65 | 66 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_truncateWithScalarFormat( 67 | JNIEnv* env, jclass, jlong datetime, jstring format) 68 | { 69 | JNI_NULL_CHECK(env, datetime, "input datetime is null", 0); 70 | 71 | try { 72 | cudf::jni::auto_set_device(env); 73 | 74 | auto const datetime_cv = reinterpret_cast(datetime); 75 | auto const format_jstr = cudf::jni::native_jstring(env, format); 76 | auto const format = std::string(format_jstr.get(), format_jstr.size_bytes()); 77 | return reinterpret_cast(spark_rapids_jni::truncate(*datetime_cv, format).release()); 78 | } 79 | CATCH_STD(env, 0); 80 | } 81 | 82 | } // extern "C" 83 | -------------------------------------------------------------------------------- /src/main/cpp/src/HashJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "dtype_utils.hpp" 19 | #include "hash.hpp" 20 | #include "jni_utils.hpp" 21 | 22 | extern "C" { 23 | 24 | JNIEXPORT jint JNICALL Java_com_nvidia_spark_rapids_jni_Hash_getMaxStackDepth(JNIEnv* env, jclass) 25 | { 26 | return spark_rapids_jni::MAX_STACK_DEPTH; 27 | } 28 | 29 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_murmurHash32( 30 | JNIEnv* env, jclass, jint seed, jlongArray column_handles) 31 | { 32 | JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0); 33 | 34 | try { 35 | cudf::jni::auto_set_device(env); 36 | auto column_views = 37 | cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced(); 38 | return cudf::jni::release_as_jlong( 39 | spark_rapids_jni::murmur_hash3_32(cudf::table_view{column_views}, seed)); 40 | } 41 | CATCH_STD(env, 0); 42 | } 43 | 44 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_xxhash64(JNIEnv* env, 45 | jclass, 46 | jlong seed, 47 | jlongArray column_handles) 48 | { 49 | JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0); 50 | 51 | try { 52 | cudf::jni::auto_set_device(env); 53 | auto column_views = 54 | cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced(); 55 | return cudf::jni::release_as_jlong( 56 | spark_rapids_jni::xxhash64(cudf::table_view{column_views}, seed)); 57 | } 58 | CATCH_STD(env, 0); 59 | } 60 | 61 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_hiveHash(JNIEnv* env, 62 | jclass, 63 | jlongArray column_handles) 64 | { 65 | JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0); 66 | 67 | try { 68 | cudf::jni::auto_set_device(env); 69 | auto column_views = 70 | cudf::jni::native_jpointerArray{env, column_handles}.get_dereferenced(); 71 | return cudf::jni::release_as_jlong(spark_rapids_jni::hive_hash(cudf::table_view{column_views})); 72 | } 73 | CATCH_STD(env, 0); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/cpp/src/HistogramJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "histogram.hpp" 19 | 20 | extern "C" { 21 | 22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Histogram_createHistogramIfValid( 23 | JNIEnv* env, jclass, jlong values_handle, jlong frequencies_handle, jboolean output_as_lists) 24 | { 25 | JNI_NULL_CHECK(env, values_handle, "values_handle is null", 0); 26 | JNI_NULL_CHECK(env, frequencies_handle, "frequencies_handle is null", 0); 27 | 28 | try { 29 | cudf::jni::auto_set_device(env); 30 | 31 | auto const values = reinterpret_cast(values_handle); 32 | auto const frequencies = reinterpret_cast(frequencies_handle); 33 | return cudf::jni::ptr_as_jlong( 34 | spark_rapids_jni::create_histogram_if_valid(*values, *frequencies, output_as_lists) 35 | .release()); 36 | } 37 | CATCH_STD(env, 0); 38 | } 39 | 40 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Histogram_percentileFromHistogram( 41 | JNIEnv* env, jclass, jlong input_handle, jdoubleArray jpercentages, jboolean output_as_lists) 42 | { 43 | JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0); 44 | JNI_NULL_CHECK(env, jpercentages, "jpercentages is null", 0); 45 | 46 | try { 47 | cudf::jni::auto_set_device(env); 48 | 49 | auto const input = reinterpret_cast(input_handle); 50 | auto const percentages = [&] { 51 | auto const native_percentages = cudf::jni::native_jdoubleArray(env, jpercentages); 52 | return std::vector(native_percentages.begin(), native_percentages.end()); 53 | }(); 54 | return cudf::jni::ptr_as_jlong( 55 | spark_rapids_jni::percentile_from_histogram(*input, percentages, output_as_lists).release()); 56 | } 57 | CATCH_STD(env, 0); 58 | } 59 | 60 | } // extern "C" 61 | -------------------------------------------------------------------------------- /src/main/cpp/src/HyperLogLogPlusPlusHostUDFJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "hyper_log_log_plus_plus.hpp" 19 | #include "hyper_log_log_plus_plus_host_udf.hpp" 20 | 21 | extern "C" { 22 | 23 | JNIEXPORT jlong JNICALL 24 | Java_com_nvidia_spark_rapids_jni_HyperLogLogPlusPlusHostUDF_createHLLPPHostUDF(JNIEnv* env, 25 | jclass, 26 | jint agg_type, 27 | int precision) 28 | { 29 | try { 30 | auto udf_ptr = [&] { 31 | // The value of agg_type must be sync with 32 | // `HyperLogLogPlusPlusHostUDF.java#AggregationType`. 33 | switch (agg_type) { 34 | case 0: return spark_rapids_jni::create_hllpp_reduction_host_udf(precision); 35 | case 1: return spark_rapids_jni::create_hllpp_reduction_merge_host_udf(precision); 36 | case 2: return spark_rapids_jni::create_hllpp_groupby_host_udf(precision); 37 | case 3: return spark_rapids_jni::create_hllpp_groupby_merge_host_udf(precision); 38 | default: CUDF_FAIL("Invalid aggregation type."); 39 | } 40 | }(); 41 | CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HyperLogLogPlusPlus(HLLPP) UDF instance."); 42 | 43 | return reinterpret_cast(udf_ptr); 44 | } 45 | CATCH_STD(env, 0); 46 | } 47 | 48 | JNIEXPORT jlong JNICALL 49 | Java_com_nvidia_spark_rapids_jni_HyperLogLogPlusPlusHostUDF_estimateDistinctValueFromSketches( 50 | JNIEnv* env, jclass, jlong sketches, jint precision) 51 | { 52 | JNI_NULL_CHECK(env, sketches, "Sketch column is null", 0); 53 | try { 54 | cudf::jni::auto_set_device(env); 55 | auto const sketch_view = reinterpret_cast(sketches); 56 | return cudf::jni::ptr_as_jlong( 57 | spark_rapids_jni::estimate_from_hll_sketches(*sketch_view, precision).release()); 58 | } 59 | CATCH_STD(env, 0); 60 | } 61 | 62 | } // extern "C" 63 | -------------------------------------------------------------------------------- /src/main/cpp/src/MapJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "jni_utils.hpp" 19 | #include "map.hpp" 20 | 21 | extern "C" { 22 | 23 | JNIEXPORT jlong Java_com_nvidia_spark_rapids_jni_Map_sort(JNIEnv* env, 24 | jclass, 25 | jlong map_haldle, 26 | jboolean is_descending) 27 | { 28 | JNI_NULL_CHECK(env, map_haldle, "column is null", 0); 29 | 30 | try { 31 | cudf::jni::auto_set_device(env); 32 | auto sort_order = is_descending ? cudf::order::DESCENDING : cudf::order::ASCENDING; 33 | cudf::column_view const& map_view = *reinterpret_cast(map_haldle); 34 | return cudf::jni::release_as_jlong(spark_rapids_jni::sort_map_column(map_view, sort_order)); 35 | } 36 | 37 | CATCH_STD(env, 0); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/cpp/src/RegexRewriteUtilsJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "dtype_utils.hpp" 19 | #include "jni_utils.hpp" 20 | #include "regex_rewrite_utils.hpp" 21 | 22 | extern "C" { 23 | 24 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_RegexRewriteUtils_literalRangePattern( 25 | JNIEnv* env, jclass, jlong input, jlong target, jint d, jint start, jint end) 26 | { 27 | JNI_NULL_CHECK(env, input, "input column is null", 0); 28 | JNI_NULL_CHECK(env, target, "target is null", 0); 29 | 30 | try { 31 | cudf::jni::auto_set_device(env); 32 | 33 | cudf::column_view* cv = reinterpret_cast(input); 34 | cudf::strings_column_view scv(*cv); 35 | cudf::string_scalar* ss_scalar = reinterpret_cast(target); 36 | return cudf::jni::release_as_jlong( 37 | spark_rapids_jni::literal_range_pattern(scv, *ss_scalar, d, start, end)); 38 | } 39 | CATCH_STD(env, 0); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/cpp/src/SubStringIndexJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "substring_index.hpp" 19 | 20 | extern "C" { 21 | 22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_GpuSubstringIndexUtils_substringIndex( 23 | JNIEnv* env, jclass, jlong strings_handle, jlong delimiter, jint count) 24 | { 25 | JNI_NULL_CHECK(env, strings_handle, "strings column handle is null", 0); 26 | JNI_NULL_CHECK(env, delimiter, "delimiter scalar handle is null", 0); 27 | try { 28 | cudf::jni::auto_set_device(env); 29 | auto const input = reinterpret_cast(strings_handle); 30 | auto const strings_column = cudf::strings_column_view{*input}; 31 | cudf::string_scalar* ss_scalar = reinterpret_cast(delimiter); 32 | return cudf::jni::release_as_jlong( 33 | spark_rapids_jni::substring_index(strings_column, *ss_scalar, count)); 34 | } 35 | CATCH_STD(env, 0); 36 | } 37 | } // extern "C" 38 | -------------------------------------------------------------------------------- /src/main/cpp/src/TaskPriorityJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "jni_utils.hpp" 19 | #include "task_priority.hpp" 20 | 21 | namespace { 22 | // Track the next priority to assign and maintain a map of attempt_id to priority 23 | static long next_task_priority = std::numeric_limits::max() - 1; 24 | static std::mutex priority_mutex; 25 | static std::unordered_map attempt_priorities; 26 | } // namespace 27 | 28 | namespace spark_rapids_jni { 29 | 30 | long get_task_priority(long attempt_id) 31 | { 32 | if (attempt_id == -1) { 33 | // Special case: -1 always gets highest priority 34 | return std::numeric_limits::max(); 35 | } 36 | 37 | std::lock_guard lock(priority_mutex); 38 | auto it = attempt_priorities.find(attempt_id); 39 | if (it != attempt_priorities.end()) { 40 | // Return existing priority for this attempt_id 41 | return it->second; 42 | } 43 | 44 | // Assign new priority for this attempt_id 45 | long priority = next_task_priority--; 46 | attempt_priorities[attempt_id] = priority; 47 | return priority; 48 | } 49 | 50 | void task_done(long attempt_id) 51 | { 52 | if (attempt_id == -1) { 53 | return; // Nothing to do for special case 54 | } 55 | 56 | std::lock_guard lock(priority_mutex); 57 | attempt_priorities.erase(attempt_id); 58 | } 59 | 60 | } // namespace spark_rapids_jni 61 | 62 | extern "C" { 63 | 64 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_TaskPriority_getTaskPriority( 65 | JNIEnv* env, jclass, jlong task_attempt_id) 66 | { 67 | return spark_rapids_jni::get_task_priority(task_attempt_id); 68 | } 69 | 70 | JNIEXPORT void JNICALL Java_com_nvidia_spark_rapids_jni_TaskPriority_taskDone(JNIEnv* env, 71 | jclass, 72 | jlong task_attempt_id) 73 | { 74 | spark_rapids_jni::task_done(task_attempt_id); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/cpp/src/ZOrderJni.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "cudf_jni_apis.hpp" 18 | #include "dtype_utils.hpp" 19 | #include "zorder.hpp" 20 | 21 | extern "C" { 22 | 23 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ZOrder_interleaveBits( 24 | JNIEnv* env, jclass, jlongArray input_columns) 25 | { 26 | JNI_NULL_CHECK(env, input_columns, "input is null", 0); 27 | 28 | try { 29 | cudf::jni::auto_set_device(env); 30 | cudf::jni::native_jpointerArray n_input_columns(env, input_columns); 31 | cudf::table_view tbl(n_input_columns.get_dereferenced()); 32 | 33 | return cudf::jni::ptr_as_jlong(spark_rapids_jni::interleave_bits(tbl).release()); 34 | } 35 | CATCH_STD(env, 0); 36 | } 37 | 38 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ZOrder_hilbertIndex( 39 | JNIEnv* env, jclass, jint num_bits, jlongArray input_columns) 40 | { 41 | JNI_NULL_CHECK(env, input_columns, "input is null", 0); 42 | 43 | try { 44 | cudf::jni::auto_set_device(env); 45 | cudf::jni::native_jpointerArray n_input_columns(env, input_columns); 46 | cudf::table_view tbl(n_input_columns.get_dereferenced()); 47 | 48 | return cudf::jni::ptr_as_jlong(spark_rapids_jni::hilbert_index(num_bits, tbl).release()); 49 | } 50 | CATCH_STD(env, 0); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/cpp/src/case_when.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | namespace spark_rapids_jni { 28 | 29 | /** 30 | * 31 | * Select the column index for the first true in bool columns. 32 | * For the row does not contain true, use end index(number of columns). 33 | * 34 | * e.g.: 35 | * column 0 in table: true, false, false, false 36 | * column 1 in table: false, true, false, false 37 | * column 2 in table: false, false, true, false 38 | * 39 | * 1st row is: true, flase, false; first true index is 0 40 | * 2nd row is: false, true, false; first true index is 1 41 | * 3rd row is: false, flase, true; first true index is 2 42 | * 4th row is: false, false, false; do not find true, set index to the end index 3 43 | * 44 | * output column: 0, 1, 2, 3 45 | * In the `case when` context, here 3 index means using NULL value. 46 | * 47 | */ 48 | std::unique_ptr select_first_true_index( 49 | cudf::table_view const& when_bool_columns, 50 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 51 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 52 | 53 | } // namespace spark_rapids_jni 54 | -------------------------------------------------------------------------------- /src/main/cpp/src/cast_string_to_timestamp_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace spark_rapids_jni { 22 | 23 | /** 24 | * Timezone type 25 | * Used in casting string with timezone to timestamp 26 | */ 27 | enum class TZ_TYPE : uint8_t { 28 | 29 | // Not specified timezone in the string, indicate to use the default timezone. 30 | NOT_SPECIFIED = 0, 31 | 32 | // Fixed offset timezone 33 | // String starts with UT/GMT/UTC/[+-], and it's valid. 34 | // E.g: +08:00, +08, +1:02:30, -010203, GMT+8, UTC+8:00, UT+8 35 | // E.g: +01:2:03 36 | FIXED_TZ = 1, 37 | 38 | // Not FIXED_TZ, it's a valid timezone string. 39 | // E.g.: java.time.ZoneId.SHORT_IDS: CTT 40 | // E.g.: Region-based timezone: America/Los_Angeles 41 | OTHER_TZ = 2, 42 | 43 | // Invalid timezone. 44 | // String starts with UT/GMT/UTC/[+-], but it's invalid. 45 | // E.g: UTC+19:00, GMT+19:00, max offset is 18 hours 46 | // E.g: GMT+01:2:03, +01:2:03, special case 47 | // E.g: non-exist-timezone 48 | INVALID_TZ = 3 49 | }; 50 | 51 | } // namespace spark_rapids_jni 52 | -------------------------------------------------------------------------------- /src/main/cpp/src/datetime_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace spark_rapids_jni { 22 | std::unique_ptr rebase_gregorian_to_julian( 23 | cudf::column_view const& input, 24 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 25 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 26 | 27 | std::unique_ptr rebase_julian_to_gregorian( 28 | cudf::column_view const& input, 29 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 30 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 31 | 32 | std::unique_ptr truncate( 33 | cudf::column_view const& datetime, 34 | cudf::column_view const& format, 35 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 36 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 37 | 38 | std::unique_ptr truncate( 39 | cudf::column_view const& datetime, 40 | std::string const& format, 41 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 42 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 43 | 44 | } // namespace spark_rapids_jni 45 | -------------------------------------------------------------------------------- /src/main/cpp/src/decimal_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | #include 26 | 27 | namespace cudf::jni { 28 | 29 | std::unique_ptr multiply_decimal128( 30 | cudf::column_view const& a, 31 | cudf::column_view const& b, 32 | int32_t product_scale, 33 | bool const cast_interim_result, 34 | rmm::cuda_stream_view stream = cudf::get_default_stream()); 35 | 36 | std::unique_ptr divide_decimal128( 37 | cudf::column_view const& a, 38 | cudf::column_view const& b, 39 | int32_t quotient_scale, 40 | rmm::cuda_stream_view stream = cudf::get_default_stream()); 41 | 42 | std::unique_ptr integer_divide_decimal128( 43 | cudf::column_view const& a, 44 | cudf::column_view const& b, 45 | int32_t quotient_scale, 46 | rmm::cuda_stream_view stream = cudf::get_default_stream()); 47 | 48 | std::unique_ptr remainder_decimal128( 49 | cudf::column_view const& a, 50 | cudf::column_view const& b, 51 | int32_t remainder_scale, 52 | rmm::cuda_stream_view stream = cudf::get_default_stream()); 53 | 54 | std::unique_ptr add_decimal128( 55 | cudf::column_view const& a, 56 | cudf::column_view const& b, 57 | int32_t quotient_scale, 58 | rmm::cuda_stream_view stream = cudf::get_default_stream()); 59 | 60 | std::unique_ptr sub_decimal128( 61 | cudf::column_view const& a, 62 | cudf::column_view const& b, 63 | int32_t quotient_scale, 64 | rmm::cuda_stream_view stream = cudf::get_default_stream()); 65 | 66 | /** 67 | * @brief Cast floating point values to decimals, matching the behavior of Spark. 68 | * 69 | * @param input The input column, which is either FLOAT32 or FLOAT64 type 70 | * @param output_type The output decimal type 71 | * @param precision The maximum number of digits that will be preserved in the output 72 | * @param stream CUDA stream used for device memory operations and kernel launches 73 | * @param mr Device memory resource used to allocate the returned column's device memory 74 | * @return A cudf column containing the cast result and a boolean value indicating whether the cast 75 | operation has failed for any input rows 76 | */ 77 | std::pair, bool> floating_point_to_decimal( 78 | cudf::column_view const& input, 79 | cudf::data_type output_type, 80 | int32_t precision, 81 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 82 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 83 | 84 | } // namespace cudf::jni 85 | -------------------------------------------------------------------------------- /src/main/cpp/src/emptyfile.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Intentionally empty 18 | -------------------------------------------------------------------------------- /src/main/cpp/src/get_json_object.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | namespace spark_rapids_jni { 27 | 28 | /** 29 | * @brief The maximum supported depth that a JSON path can reach. 30 | */ 31 | constexpr int MAX_JSON_PATH_DEPTH = 16; 32 | 33 | /** 34 | * @brief Type of instruction in a JSON path. 35 | */ 36 | enum class path_instruction_type : int8_t { WILDCARD, INDEX, NAMED }; 37 | 38 | /** 39 | * @brief Extract JSON object from a JSON string based on the specified JSON path. 40 | * 41 | * If the input JSON string is invalid, or it does not contain the object at the given path, a null 42 | * will be returned. 43 | */ 44 | std::unique_ptr get_json_object( 45 | cudf::strings_column_view const& input, 46 | std::vector> const& instructions, 47 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 48 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 49 | 50 | /** 51 | * @brief Extract multiple JSON objects from a JSON string based on the specified JSON paths. 52 | * 53 | * This function processes all the JSON paths in parallel, which may be faster than calling 54 | * to `get_json_object` on the individual JSON paths. However, it may consume much more GPU 55 | * memory, proportional to the number of JSON paths. 56 | * @param input the input string column to parse JSON from 57 | * @param json_paths the path operations to read extract 58 | * @param memory_budget_bytes a memory budget for temporary memory usage if > 0 59 | * @param parallel_override if this value is greater than 0 then it specifies the 60 | * number of paths to process in parallel (this will cause the 61 | * `memory_budget_bytes` paramemter to be ignored) 62 | */ 63 | std::vector> get_json_object_multiple_paths( 64 | cudf::strings_column_view const& input, 65 | std::vector>> const& 66 | json_paths, 67 | int64_t memory_budget_bytes, 68 | int32_t parallel_override, 69 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 70 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 71 | 72 | } // namespace spark_rapids_jni 73 | -------------------------------------------------------------------------------- /src/main/cpp/src/hash.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | namespace spark_rapids_jni { 26 | 27 | constexpr int64_t DEFAULT_XXHASH64_SEED = 42; 28 | constexpr int MAX_STACK_DEPTH = 8; 29 | 30 | /** 31 | * @brief Computes the murmur32 hash value of each row in the input set of columns. 32 | * 33 | * @param input The table of columns to hash 34 | * @param seed Optional seed value to use for the hash function 35 | * @param stream CUDA stream used for device memory operations and kernel launches 36 | * @param mr Device memory resource used to allocate the returned column's device memory 37 | * 38 | * @returns A column where each row is the hash of a column from the input. 39 | */ 40 | std::unique_ptr murmur_hash3_32( 41 | cudf::table_view const& input, 42 | uint32_t seed = 0, 43 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 44 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 45 | 46 | /** 47 | * @brief Computes the xxhash64 hash value of each row in the input set of columns. 48 | * 49 | * @param input The table of columns to hash 50 | * @param seed Optional seed value to use for the hash function 51 | * @param stream CUDA stream used for device memory operations and kernel launches 52 | * @param mr Device memory resource used to allocate the returned column's device memory 53 | * 54 | * @returns A column where each row is the hash of a column from the input. 55 | */ 56 | std::unique_ptr xxhash64( 57 | cudf::table_view const& input, 58 | int64_t seed = DEFAULT_XXHASH64_SEED, 59 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 60 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 61 | 62 | /** 63 | * @brief Computes the Hive hash value of each row in the input set of columns. 64 | * 65 | * @param input The table of columns to hash 66 | * @param stream CUDA stream used for device memory operations and kernel launches 67 | * @param mr Device memory resource used to allocate the returned column's device memory 68 | * 69 | * @returns A column where each row is the hash of a column from the input. 70 | */ 71 | std::unique_ptr hive_hash( 72 | cudf::table_view const& input, 73 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 74 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 75 | 76 | } // namespace spark_rapids_jni 77 | -------------------------------------------------------------------------------- /src/main/cpp/src/hyper_log_log_plus_plus_const.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace spark_rapids_jni { 20 | 21 | /** 22 | * @brief The number of bits that is required for a HLLPP register value. 23 | * 24 | * This number is determined by the maximum number of leading binary zeros a 25 | * hashcode can produce. This is equal to the number of bits the hashcode 26 | * returns. The current implementation uses a 64-bit hashcode, this means 6-bits 27 | * are (at most) needed to store the number of leading zeros. 28 | */ 29 | constexpr int REGISTER_VALUE_BITS = 6; 30 | 31 | /** 32 | * @brief The number of registers that can be stored in a single long. 33 | * It's 64 / 6 = 10. 34 | */ 35 | constexpr int REGISTERS_PER_LONG = 64 / REGISTER_VALUE_BITS; 36 | 37 | } // namespace spark_rapids_jni 38 | -------------------------------------------------------------------------------- /src/main/cpp/src/hyper_log_log_plus_plus_host_udf.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | namespace spark_rapids_jni { 22 | 23 | cudf::host_udf_base* create_hllpp_reduction_host_udf(int precision); 24 | 25 | cudf::host_udf_base* create_hllpp_reduction_merge_host_udf(int precision); 26 | 27 | cudf::host_udf_base* create_hllpp_groupby_host_udf(int precision); 28 | 29 | cudf::host_udf_base* create_hllpp_groupby_merge_host_udf(int precision); 30 | 31 | } // namespace spark_rapids_jni 32 | -------------------------------------------------------------------------------- /src/main/cpp/src/map.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "map.hpp" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | namespace spark_rapids_jni { 28 | 29 | std::unique_ptr sort_map_column(cudf::column_view const& input, 30 | cudf::order sort_order, 31 | rmm::cuda_stream_view stream, 32 | rmm::device_async_resource_ref mr) 33 | { 34 | CUDF_EXPECTS(input.type().id() == cudf::type_id::LIST, 35 | "maps_column_view input must be LIST type"); 36 | if (input.size() == 0) { return cudf::make_empty_column(input.type()); } 37 | 38 | auto const lists_of_structs = cudf::lists_column_view(input); 39 | auto const structs = lists_of_structs.child(); 40 | CUDF_EXPECTS(structs.type().id() == cudf::type_id::STRUCT, 41 | "maps_column_view input must have exactly 1 child (STRUCT) column."); 42 | CUDF_EXPECTS(structs.num_children() == 2, 43 | "maps_column_view key-value struct must have exactly 2 children."); 44 | auto keys = structs.child(0); 45 | auto values = structs.child(1); 46 | CUDF_EXPECTS(structs.null_count() == 0, "maps_column_view key-value struct must have no null."); 47 | CUDF_EXPECTS(keys.null_count() == 0, "maps_column_view keys must have no null."); 48 | auto segments = lists_of_structs.offsets(); 49 | 50 | auto sorted = cudf::segmented_sort_by_key(cudf::table_view{{structs}}, 51 | cudf::table_view{{keys}}, 52 | segments, 53 | {sort_order}, 54 | {}, // Map keys MUST not be null 55 | stream, 56 | mr); 57 | 58 | return cudf::make_lists_column(input.size(), 59 | std::make_unique(segments), // copy segment offsets 60 | std::move(sorted->release().front()), // child column 61 | input.null_count(), 62 | cudf::copy_bitmask(input, stream, mr), 63 | stream, 64 | mr); 65 | } 66 | 67 | } // namespace spark_rapids_jni 68 | -------------------------------------------------------------------------------- /src/main/cpp/src/map.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | namespace spark_rapids_jni { 24 | 25 | /** 26 | * Sort entries for each map in map column according to the keys of each map. 27 | * Note: 28 | * The keys of map MUST not be null. 29 | * Assume that maps do not have duplicate keys. 30 | * Do not normalize/sort the nested maps in `KEY` column; This means 31 | * Only consider the first level LIST(STRUCT(KEY, VALUE)) as map type. 32 | * 33 | * @param input Input map column, should in LIST(STRUCT(KEY, VALUE)) type. 34 | * @param sort_order Ascending or descending order 35 | * @return Sorted map according to the sort order of the key column in map. 36 | * @throws cudf::logic_error If the input column is not a LIST(STRUCT(KEY, VALUE)) column or the 37 | * keys contain nulls. 38 | */ 39 | std::unique_ptr sort_map_column( 40 | cudf::column_view const& input, 41 | cudf::order sort_order, 42 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 43 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 44 | 45 | } // namespace spark_rapids_jni 46 | -------------------------------------------------------------------------------- /src/main/cpp/src/number_converter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | namespace spark_rapids_jni { 27 | 28 | using convert_number_t = std::variant; 29 | 30 | /** 31 | * 32 | * @brief Convert numbers(in string column) between different number bases. If toBase>0 the result 33 | * is unsigned, otherwise it is signed. First trim the space characters (ASCII 32). Return null if 34 | * len(trim_ascii_32(str)) == 0. Return all nulls if `from_base` or `to_base` is not in range [2, 35 | * 36] 36 | * 37 | * e.g.: 38 | * convert('11', 2, 10) = '3' 39 | * convert('F', 16, 10) = '15' 40 | * convert('17', 10, 16) = '11' 41 | * 42 | * @param input the input string column or string scalar 43 | * @param from_base the number base of input, valid range is [2, 36] 44 | * @param to_base the number base of output, valid range is [2, 36] 45 | * 46 | * @return the string column contains numbers with `to_base` base 47 | */ 48 | std::unique_ptr convert( 49 | convert_number_t const& input, 50 | convert_number_t const& from_base, 51 | convert_number_t const& to_base, 52 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 53 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 54 | 55 | /** 56 | * 57 | * @brief Check if overflow occurs for converting numbers(in string column) between different 58 | * number bases. This is for the checking when it's ANSI mode. For more details, please refer to the 59 | * convert function. 60 | * 61 | * @param input the input string column or string scalar 62 | * @param from_base the number base of input, valid range is [2, 36] 63 | * @param to_base the number base of output, valid range is [2, 36] 64 | * 65 | * @return If overflow occurs, return true; otherwise, return false. 66 | */ 67 | bool is_convert_overflow( 68 | convert_number_t const& input, 69 | convert_number_t const& from_base, 70 | convert_number_t const& to_base, 71 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 72 | rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); 73 | 74 | } // namespace spark_rapids_jni 75 | -------------------------------------------------------------------------------- /src/main/cpp/src/regex_rewrite_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | namespace spark_rapids_jni { 24 | /** 25 | * @brief Check if input string contains regex pattern `literal[start-end]{len,}`, which means 26 | * a literal string followed by a range of characters in the range of start to end, with at least 27 | * len characters. 28 | * 29 | * @param strings Column of strings to check for literal. 30 | * @param literal UTF-8 encoded string to check in strings column. 31 | * @param len Minimum number of characters to check after the literal. 32 | * @param start Minimum UTF-8 codepoint value to check for in the range. 33 | * @param end Maximum UTF-8 codepoint value to check for in the range. 34 | * @param stream CUDA stream used for device memory operations and kernel launches. 35 | * @param mr Device memory resource used to allocate the returned column's device memory. 36 | */ 37 | std::unique_ptr literal_range_pattern( 38 | cudf::strings_column_view const& input, 39 | cudf::string_scalar const& literal, 40 | int const len, 41 | int const start, 42 | int const end, 43 | rmm::cuda_stream_view stream = rmm::cuda_stream_default, 44 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 45 | } // namespace spark_rapids_jni 46 | -------------------------------------------------------------------------------- /src/main/cpp/src/row_conversion.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | namespace spark_rapids_jni { 29 | 30 | std::vector> convert_to_rows_fixed_width_optimized( 31 | cudf::table_view const& tbl, 32 | // TODO need something for validity 33 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 34 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 35 | 36 | std::vector> convert_to_rows( 37 | cudf::table_view const& tbl, 38 | // TODO need something for validity 39 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 40 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 41 | 42 | std::unique_ptr convert_from_rows_fixed_width_optimized( 43 | cudf::lists_column_view const& input, 44 | std::vector const& schema, 45 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 46 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 47 | 48 | std::unique_ptr convert_from_rows( 49 | cudf::lists_column_view const& input, 50 | std::vector const& schema, 51 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 52 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 53 | 54 | } // namespace spark_rapids_jni 55 | -------------------------------------------------------------------------------- /src/main/cpp/src/shuffle_split_detail.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace spark_rapids_jni { 20 | 21 | namespace detail { 22 | 23 | /** 24 | * @brief Functor for computing size of data elements for a given cudf type. 25 | * 26 | * Note: columns types which themselves inherently have no data (strings, lists, 27 | * structs) return 0. 28 | */ 29 | struct size_of_helper { 30 | template 31 | constexpr std::enable_if_t() && !std::is_same_v, 32 | size_t> 33 | operator()() const 34 | { 35 | return 0; 36 | } 37 | 38 | template 39 | constexpr std::enable_if_t() && std::is_same_v, 40 | size_t> 41 | operator()() const 42 | { 43 | return sizeof(cudf::device_storage_type_t); 44 | } 45 | 46 | template 47 | constexpr std::enable_if_t(), size_t> __device__ 48 | operator()() const noexcept 49 | { 50 | return sizeof(cudf::device_storage_type_t); 51 | } 52 | }; 53 | 54 | /** 55 | * @brief Header for each partition. 56 | * 57 | * The values are stored in big-endian format. 58 | */ 59 | struct partition_header { 60 | uint32_t magic_number; 61 | uint32_t row_index; // row index in the source table that this partition started at 62 | uint32_t num_rows; 63 | uint32_t validity_size; 64 | uint32_t offset_size; 65 | uint32_t total_size; // size of validity+offset+data sections 66 | uint32_t num_flattened_columns; 67 | }; 68 | 69 | // alignment values for each validity type, as applied at the end of that data type 70 | // in each partition. so for example all of the grouped-together validity buffers for 71 | // a given partition will have a final 4 byte alignment applied before the offset buffers begin 72 | constexpr size_t validity_pad = 4; 73 | constexpr size_t offset_pad = 4; 74 | constexpr size_t data_pad = 4; 75 | 76 | /** 77 | * @brief Compute per-partition metadata size. 78 | */ 79 | constexpr size_t compute_per_partition_metadata_size(size_t total_columns) 80 | { 81 | auto const has_validity_length = (total_columns + 7) / 8; // has-validity bit per column 82 | return sizeof(partition_header) + has_validity_length; 83 | } 84 | 85 | // align all column size allocations to this boundary so that all output column buffers 86 | // start at that alignment. 87 | static constexpr std::size_t split_align = 64; 88 | 89 | /** 90 | * @brief Buffer type enum 91 | * 92 | * Note: these values matter. Don't rearrange them. 93 | */ 94 | enum class buffer_type { VALIDITY = 0, OFFSETS = 1, DATA = 2 }; 95 | 96 | } // namespace detail 97 | 98 | } // namespace spark_rapids_jni 99 | -------------------------------------------------------------------------------- /src/main/cpp/src/spark_rapids_jni_version.cpp.in: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "spark_rapids_jni_version.h" 18 | 19 | namespace spark_rapids_jni { 20 | 21 | char const Version[] = "@CMAKE_PROJECT_VERSION@ @SPARK_RAPIDS_JNI_COMMIT_DETAILS@"; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/cpp/src/spark_rapids_jni_version.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace spark_rapids_jni { 20 | 21 | extern char const Version[]; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/cpp/src/substring_index.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | namespace spark_rapids_jni { 25 | 26 | /** 27 | * @brief Returns the substring of strings before count occurrence of the delimiter delim. 28 | * 29 | * @param strings Strings column 30 | * @param delimiter The delimiter string used to slice string 31 | * @param count Specify the occurrence of the delimiter 32 | * @return A string column used to store the result 33 | */ 34 | std::unique_ptr substring_index( 35 | cudf::strings_column_view const& strings, 36 | cudf::string_scalar const& delimiter, 37 | cudf::size_type count, 38 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 39 | 40 | } // namespace spark_rapids_jni 41 | -------------------------------------------------------------------------------- /src/main/cpp/src/task_priority.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace spark_rapids_jni { 20 | 21 | /** 22 | * Get the priority for a task. Larger priorities mean the task 23 | * should get priority access to resources compared to smaller 24 | * priority numbers. 25 | */ 26 | long get_task_priority(long attempt_id); 27 | 28 | /** 29 | * Inform the system that a particular task is finished. 30 | */ 31 | void task_done(long attempt_id); 32 | 33 | } // namespace spark_rapids_jni 34 | -------------------------------------------------------------------------------- /src/main/cpp/src/utilities.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | namespace spark_rapids_jni { 30 | 31 | std::unique_ptr bitmask_bitwise_or( 32 | std::vector> const& input, 33 | rmm::cuda_stream_view stream, 34 | rmm::device_async_resource_ref mr) 35 | { 36 | CUDF_EXPECTS(input.size() > 0, "Empty input"); 37 | auto const mask_size = (*input.begin()).size(); 38 | CUDF_EXPECTS( 39 | std::all_of( 40 | input.begin(), input.end(), [mask_size](auto mask) { return mask.size() == mask_size; }), 41 | "Encountered size mismatch in inputs"); 42 | if (mask_size == 0) { 43 | return std::make_unique(rmm::device_buffer{0, stream, mr}); 44 | } 45 | 46 | // move the pointers to the gpu 47 | std::vector h_input(input.size()); 48 | std::transform( 49 | input.begin(), input.end(), h_input.begin(), [](auto mask) { return mask.data(); }); 50 | auto d_input = cudf::detail::make_device_uvector_async( 51 | h_input, stream, rmm::mr::get_current_device_resource()); 52 | 53 | std::unique_ptr out = 54 | std::make_unique(mask_size * sizeof(cudf::bitmask_type), stream, mr); 55 | thrust::transform(rmm::exec_policy(stream), 56 | thrust::make_counting_iterator(0), 57 | thrust::make_counting_iterator(0) + mask_size, 58 | static_cast(out->data()), 59 | cuda::proclaim_return_type( 60 | [buffers = d_input.data(), 61 | num_buffers = input.size()] __device__(cudf::size_type word_index) { 62 | cudf::bitmask_type out = buffers[0][word_index]; 63 | for (auto idx = 1; idx < num_buffers; idx++) { 64 | out |= buffers[idx][word_index]; 65 | } 66 | return out; 67 | })); 68 | 69 | return out; 70 | } 71 | 72 | } // namespace spark_rapids_jni 73 | -------------------------------------------------------------------------------- /src/main/cpp/src/utilities.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace spark_rapids_jni { 28 | 29 | /** 30 | * @brief Bitwise-or an array of equally-sized bitmask buffers into a single output buffer 31 | * 32 | * @param input The array of input bitmask buffers. 33 | * @param stream CUDA stream used for device memory operations and kernel launches. 34 | * @param mr Device memory resource used to allocate the returned bloom filter's memory. 35 | * 36 | */ 37 | std::unique_ptr bitmask_bitwise_or( 38 | std::vector> const& input, 39 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 40 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 41 | 42 | } // namespace spark_rapids_jni 43 | -------------------------------------------------------------------------------- /src/main/cpp/src/version.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace spark_rapids_jni { 20 | 21 | /** 22 | * @brief Enum class representing different Spark platform types. 23 | * The values must match the ordinal values defined in SparkPlatformType.java. 24 | * - VANILLA_SPARK: Represents the standard Apache Spark platform. 25 | * - DATABRICKS: Represents the Databricks platform. 26 | * - CLOUDERA: Represents the Cloudera platform. 27 | * - NUM_PLATFORMS: Represents the total number of platforms defined. 28 | */ 29 | enum class spark_platform_type { VANILLA_SPARK = 0, DATABRICKS, CLOUDERA, NUM_PLATFORMS }; 30 | 31 | class spark_system { 32 | public: 33 | /** 34 | * @brief Constructor to initialize the spark system with platform type and version. 35 | * NOTE: The `platform_ordinal` MUST keep sync with SparkPlatformType.java 36 | * @param platform_ordinal The platform ordinal value. 37 | * @param major Major version number. 38 | * @param minor Minor version number. 39 | * @param patch Patch version number. 40 | */ 41 | spark_system(int platform_ordinal, int major_, int minor_, int patch_) 42 | : platform_type{static_cast(platform_ordinal)}, 43 | major{major_}, 44 | minor{minor_}, 45 | patch{patch_} 46 | { 47 | } 48 | 49 | bool is_vanilla_spark() const { return platform_type == spark_platform_type::VANILLA_SPARK; } 50 | bool is_databricks() const { return platform_type == spark_platform_type::DATABRICKS; } 51 | 52 | bool is_version_eq(int major_, int minor_, int patch_) const 53 | { 54 | return major == major_ && minor == minor_ && patch == patch_; 55 | } 56 | 57 | bool is_version_ge(int major_, int minor_, int patch_) const 58 | { 59 | return (major > major_) || (major == major_ && minor > minor_) || 60 | (major == major_ && minor == minor_ && patch >= patch_); 61 | } 62 | 63 | bool is_vanilla_320() const { return is_vanilla_spark() && is_version_eq(3, 2, 0); } 64 | 65 | bool is_vanilla_400_or_later() const { return is_vanilla_spark() && is_version_ge(4, 0, 0); } 66 | 67 | bool is_databricks_14_3_or_later() const { return is_databricks() && is_version_ge(14, 3, 0); } 68 | 69 | private: 70 | spark_platform_type platform_type; 71 | int major, minor, patch; 72 | }; 73 | 74 | } // namespace spark_rapids_jni 75 | -------------------------------------------------------------------------------- /src/main/cpp/src/zorder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | namespace spark_rapids_jni { 28 | 29 | std::unique_ptr interleave_bits( 30 | cudf::table_view const& tbl, 31 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 32 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 33 | 34 | std::unique_ptr hilbert_index( 35 | int32_t const num_bits, 36 | cudf::table_view const& tbl, 37 | rmm::cuda_stream_view stream = cudf::get_default_stream(), 38 | rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); 39 | 40 | } // namespace spark_rapids_jni 41 | -------------------------------------------------------------------------------- /src/main/cpp/tests/cast_long_to_binary_string.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include 23 | 24 | #include 25 | 26 | using namespace cudf; 27 | 28 | constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR}; 29 | 30 | struct LongToBinaryStringTests : public cudf::test::BaseFixture {}; 31 | 32 | TEST_F(LongToBinaryStringTests, FromLongToBinary) 33 | { 34 | auto const longs = cudf::test::fixed_width_column_wrapper{ 35 | 0L, 1L, 10L, -1L, std::numeric_limits::max(), std::numeric_limits::min()}; 36 | 37 | auto results = spark_rapids_jni::long_to_binary_string(longs, cudf::get_default_stream()); 38 | 39 | auto const expected = cudf::test::strings_column_wrapper{ 40 | "0", 41 | "1", 42 | "1010", 43 | "1111111111111111111111111111111111111111111111111111111111111111", 44 | "111111111111111111111111111111111111111111111111111111111111111", 45 | "1000000000000000000000000000000000000000000000000000000000000000"}; 46 | 47 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); 48 | } 49 | -------------------------------------------------------------------------------- /src/main/cpp/tests/list_slice.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include 23 | 24 | using namespace cudf; 25 | 26 | constexpr test::debug_output_level verbosity{test::debug_output_level::FIRST_ERROR}; 27 | 28 | struct ListSliceTests : public test::BaseFixture {}; 29 | 30 | TEST_F(ListSliceTests, ListSliceTest) 31 | { 32 | auto const list_col = test::lists_column_wrapper{{0, 1}, {2, 3, 7, 8}, {4, 5}}; 33 | { 34 | size_type start = 1; 35 | size_type length = 2; 36 | 37 | auto results = spark_rapids_jni::list_slice(list_col, start, length); 38 | 39 | auto const expected = test::lists_column_wrapper{{0, 1}, {2, 3}, {4, 5}}; 40 | 41 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); 42 | } 43 | { 44 | size_type start = 1; 45 | auto const length = test::fixed_width_column_wrapper{0, 1, 2}; 46 | 47 | auto results = spark_rapids_jni::list_slice(list_col, start, length); 48 | 49 | auto const expected = test::lists_column_wrapper{{}, {2}, {4, 5}}; 50 | 51 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); 52 | } 53 | { 54 | auto const start = test::fixed_width_column_wrapper{1, 2, 2}; 55 | size_type length = 2; 56 | 57 | auto results = spark_rapids_jni::list_slice(list_col, start, length); 58 | 59 | auto const expected = test::lists_column_wrapper{{0, 1}, {3, 7}, {5}}; 60 | 61 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); 62 | } 63 | { 64 | auto const start = test::fixed_width_column_wrapper{1, 2, 1}; 65 | auto const length = test::fixed_width_column_wrapper{0, 1, 2}; 66 | 67 | auto results = spark_rapids_jni::list_slice(list_col, start, length); 68 | 69 | auto const expected = test::lists_column_wrapper{{}, {3}, {4, 5}}; 70 | 71 | CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/cpp/tests/substring_index.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | 32 | #include 33 | #include 34 | 35 | using namespace cudf; 36 | 37 | struct SubstringIndexTests : public test::BaseFixture {}; 38 | 39 | TEST_F(SubstringIndexTests, ScalarDelimiter) 40 | { 41 | auto col0 = test::strings_column_wrapper({"www.yahoo.com", 42 | "www.apache..org", 43 | "tennis...com", 44 | "nvidia....com", 45 | "google...........com", 46 | "microsoft...c.....co..m"}); 47 | 48 | auto exp_results = test::strings_column_wrapper( 49 | {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."}); 50 | 51 | auto results = 52 | spark_rapids_jni::substring_index(strings_column_view{col0}, string_scalar("."), 3); 53 | CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results); 54 | } 55 | -------------------------------------------------------------------------------- /src/main/cpp/tests/test_utilities.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | template 22 | std::vector random_values(std::size_t size) 23 | { 24 | std::vector values(size); 25 | 26 | using T1 = T; 27 | using uniform_distribution = 28 | typename std::conditional_t, 29 | std::bernoulli_distribution, 30 | std::conditional_t, 31 | std::uniform_real_distribution, 32 | std::uniform_int_distribution>>; 33 | 34 | static constexpr auto seed = 0xf00d; 35 | static std::mt19937 engine{seed}; 36 | static uniform_distribution dist{}; 37 | std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); 38 | 39 | return values; 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/CaseWhen.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | 22 | /** 23 | * Exedute SQL `case when` semantic. 24 | * If there are multiple branches and each branch uses scalar to generator value, 25 | * then it's fast to use this class because it does not generate temp string columns. 26 | * 27 | * E.g.: 28 | * SQL is: 29 | * select 30 | * case 31 | * when bool_1_expr then "value_1" 32 | * when bool_2_expr then "value_2" 33 | * when bool_3_expr then "value_3" 34 | * else "value_else" 35 | * end 36 | * from tab 37 | * 38 | * Execution steps: 39 | * Execute bool exprs to get bool columns, e.g., gets: 40 | * bool column 1: [true, false, false, false] // bool_1_expr result 41 | * bool column 2: [false, true, false, flase] // bool_2_expr result 42 | * bool column 3: [false, false, true, flase] // bool_3_expr result 43 | * Execute `selectFirstTrueIndex` to get the column index for the first true in bool columns. 44 | * Generate a column to store salars: "value_1", "value_2", "value_3", "value_else" 45 | * Execute `Table.gather` to generate the final output column 46 | * 47 | */ 48 | public class CaseWhen { 49 | 50 | /** 51 | * 52 | * Select the column index for the first true in bool columns. 53 | * For the row does not contain true, use end index(number of columns). 54 | * 55 | * e.g.: 56 | * column 0: true, false, false, false 57 | * column 1: false, true, false, false 58 | * column 2: false, false, true, false 59 | * 60 | * 1st row is: true, flase, false; first true index is 0 61 | * 2nd row is: false, true, false; first true index is 1 62 | * 3rd row is: false, flase, true; first true index is 2 63 | * 4th row is: false, false, false; do not find true, set index to the end index 3 64 | * 65 | * output column: 0, 1, 2, 3 66 | * In the `case when` context, here 3 index means using NULL value. 67 | * 68 | */ 69 | public static ColumnVector selectFirstTrueIndex(ColumnVector[] boolColumns) { 70 | for (ColumnVector cv : boolColumns) { 71 | assert(cv.getType().equals(DType.BOOL8)) : "Columns must be bools"; 72 | } 73 | 74 | long[] boolHandles = new long[boolColumns.length]; 75 | for (int i = 0; i < boolColumns.length; ++i) { 76 | boolHandles[i] = boolColumns[i].getNativeView(); 77 | } 78 | 79 | return new ColumnVector(selectFirstTrueIndex(boolHandles)); 80 | } 81 | 82 | private static native long selectFirstTrueIndex(long[] boolHandles); 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/CastException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nvidia.spark.rapids.jni; 17 | 18 | /** 19 | * CastException is an exception thrown by the JNI in the even of a casting error. 20 | */ 21 | public class CastException extends RuntimeException { 22 | private final int rowWithError; 23 | private final String stringWithError; 24 | 25 | CastException(String stringWithError, int rowWithError) { 26 | super("Error casting data on row " + String.valueOf(rowWithError) + ": " + stringWithError); 27 | 28 | this.rowWithError = rowWithError; 29 | this.stringWithError = stringWithError; 30 | } 31 | 32 | public int getRowWithError() { 33 | return rowWithError; 34 | } 35 | 36 | public String getStringWithError() { 37 | return stringWithError; 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/CpuRetryOOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A special version of an out of memory error that indicates we ran out of off heap memory, but 21 | * should roll back to a point when all memory for the task is spillable and then retry the 22 | * operation. 23 | */ 24 | public class CpuRetryOOM extends OffHeapOOM { 25 | public CpuRetryOOM() { 26 | super(); 27 | } 28 | 29 | public CpuRetryOOM(String message) { 30 | super(message); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/CpuSplitAndRetryOOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A special version of an out of memory error that indicates we ran out of off heap memory, but 21 | * should roll back to a point when all memory for the task is spillable and then retry the 22 | * operation with the input data split to make it ideally use less off heap memory overall. 23 | */ 24 | public class CpuSplitAndRetryOOM extends OffHeapOOM { 25 | public CpuSplitAndRetryOOM() { 26 | super(); 27 | } 28 | 29 | public CpuSplitAndRetryOOM(String message) { 30 | super(message); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | /** 22 | * This will be removed after the plugin picks up DateTimeUtils class. 23 | */ 24 | public class DateTimeRebase { 25 | static { 26 | NativeDepsLoader.loadNativeDeps(); 27 | } 28 | 29 | public static ColumnVector rebaseGregorianToJulian(ColumnView input) { 30 | return DateTimeUtils.rebaseGregorianToJulian(input); 31 | } 32 | 33 | public static ColumnVector rebaseJulianToGregorian(ColumnView input) { 34 | return DateTimeUtils.rebaseJulianToGregorian(input); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/GpuOOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A special version of an out of memory error that indicates we ran out of GPU memory. This is 21 | * mostly to avoid a fatal error that would force the worker process to restart. This should be 22 | * recoverable on the GPU. 23 | */ 24 | public class GpuOOM extends RuntimeException { 25 | public GpuOOM() { 26 | super(); 27 | } 28 | 29 | public GpuOOM(String message) { 30 | super(message); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/GpuRetryOOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A special version of an out of memory error that indicates we ran out of GPU memory, but should 21 | * roll back to a point when all memory for the task is spillable and then retry the operation. 22 | */ 23 | public class GpuRetryOOM extends GpuOOM { 24 | public GpuRetryOOM() { 25 | super(); 26 | } 27 | 28 | public GpuRetryOOM(String message) { 29 | super(message); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/GpuSplitAndRetryOOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A special version of an out of memory error that indicates we ran out of GPU memory, but should 21 | * roll back to a point when all memory for the task is spillable and then retry the operation 22 | * with the input data split to make it ideally use less GPU memory overall. 23 | */ 24 | public class GpuSplitAndRetryOOM extends GpuOOM { 25 | public GpuSplitAndRetryOOM() { 26 | super(); 27 | } 28 | 29 | public GpuSplitAndRetryOOM(String message) { 30 | super(message); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/GpuSubstringIndexUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022-2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | public class GpuSubstringIndexUtils { 22 | static{ 23 | NativeDepsLoader.loadNativeDeps(); 24 | } 25 | 26 | public static ColumnVector substringIndex(ColumnView cv, Scalar delimiter, int count){ 27 | return new ColumnVector(substringIndex(cv.getNativeView(), delimiter.getScalarHandle(), count)); 28 | } 29 | 30 | private static native long substringIndex(long columnView, long delimiter, int count) throws CudfException; 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/Map.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.ColumnVector; 20 | import ai.rapids.cudf.ColumnView; 21 | import ai.rapids.cudf.CudfException; 22 | import ai.rapids.cudf.DType; 23 | import ai.rapids.cudf.NativeDepsLoader; 24 | 25 | /** 26 | * APIs for map column operations. 27 | */ 28 | public class Map { 29 | static { 30 | NativeDepsLoader.loadNativeDeps(); 31 | } 32 | 33 | /** 34 | * Sort entries for each map in map column according to the keys of each map. 35 | * Note: 36 | * The keys of map MUST not be null. 37 | * Assume that maps do not have duplicate keys. 38 | * Do not normalize/sort the nested maps in `KEY` column; This means 39 | * Only consider the first level LIST(STRUCT(KEY, VALUE)) as map type. 40 | * 41 | * @param cv Input map column, should in LIST(STRUCT(KEY, VALUE)) 42 | * type. 43 | * @param isDescending True if sort in descending order, false if sort in 44 | * ascending order 45 | * @return Sorted map according to the sort order of the key column in map. 46 | * @throws CudfException If the input column is not a LIST(STRUCT(KEY, VALUE)) 47 | * column or the keys contain nulls. 48 | */ 49 | public static ColumnVector sort(ColumnView cv, boolean isDescending) { 50 | assert (cv.getType().equals(DType.LIST)); 51 | long r = sort(cv.getNativeView(), isDescending); 52 | return new ColumnVector(r); 53 | } 54 | 55 | private static native long sort(long handle, boolean isDescending) throws CudfException; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/OffHeapOOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A special version of an out of memory error that indicates we ran out of off heap CPU memory. 21 | * This is mostly to avoid a fatal error that would force the worker process to restart. This 22 | * should be recoverable. 23 | */ 24 | public class OffHeapOOM extends RuntimeException { 25 | public OffHeapOOM() { 26 | super(); 27 | } 28 | 29 | public OffHeapOOM(String message) { 30 | super(message); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/Pair.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * A utility class for holding a pair of values. 21 | */ 22 | public class Pair { 23 | private final K left; 24 | private final V right; 25 | 26 | public Pair(K left, V right) { 27 | this.left = left; 28 | this.right = right; 29 | } 30 | 31 | public K getLeft() { 32 | return left; 33 | } 34 | 35 | public V getRight() { 36 | return right; 37 | } 38 | 39 | public static Pair of(K left, V right) { 40 | return new Pair<>(left, right); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import java.util.function.Supplier; 20 | 21 | /** 22 | * This class contains utility methods for checking preconditions. 23 | */ 24 | public class Preconditions { 25 | /** 26 | * Check if the condition is true, otherwise throw an IllegalStateException with the given message. 27 | */ 28 | public static void ensure(boolean condition, String message) { 29 | if (!condition) { 30 | throw new IllegalStateException(message); 31 | } 32 | } 33 | 34 | /** 35 | * Check if the condition is true, otherwise throw an IllegalStateException with the given message supplier. 36 | */ 37 | public static void ensure(boolean condition, Supplier messageSupplier) { 38 | if (!condition) { 39 | throw new IllegalStateException(messageSupplier.get()); 40 | } 41 | } 42 | 43 | /** 44 | * Check if the value is non-negative, otherwise throw an IllegalArgumentException with the given message. 45 | * @param value the value to check 46 | * @param name the name of the value 47 | * @return the value if it is non-negative 48 | * @throws IllegalArgumentException if the value is negative 49 | */ 50 | public static int ensureNonNegative(int value, String name) { 51 | if (value < 0) { 52 | throw new IllegalArgumentException(name + " must be non-negative, but was " + value); 53 | } 54 | return value; 55 | } 56 | 57 | /** 58 | * Check if the value is non-negative, otherwise throw an IllegalArgumentException with the given message. 59 | * @param value the value to check 60 | * @param name the name of the value 61 | * @return the value if it is non-negative 62 | * @throws IllegalArgumentException if the value is negative 63 | */ 64 | public static long ensureNonNegative(long value, String name) { 65 | if (value < 0) { 66 | throw new IllegalArgumentException(name + " must be non-negative, but was " + value); 67 | } 68 | return value; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/RegexRewriteUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | public class RegexRewriteUtils { 22 | static { 23 | NativeDepsLoader.loadNativeDeps(); 24 | } 25 | 26 | /** 27 | * @brief Check if input string contains regex pattern `literal[start-end]{len,}`, which means 28 | * a literal string followed by a range of characters in the range of start to end, with at least 29 | * len characters. 30 | * 31 | * @param input Column of strings to check for literal. 32 | * @param literal UTF-8 encoded string to check in strings column. 33 | * @param len Minimum number of characters to check after the literal. 34 | * @param start Minimum UTF-8 codepoint value to check for in the range. 35 | * @param end Maximum UTF-8 codepoint value to check for in the range. 36 | * @return ColumnVector of booleans where true indicates the string contains the pattern. 37 | */ 38 | public static ColumnVector literalRangePattern(ColumnVector input, Scalar literal, int len, int start, int end) { 39 | assert(input.getType().equals(DType.STRING)) : "column must be a String"; 40 | return new ColumnVector(literalRangePattern(input.getNativeView(), literal.getScalarHandle(), len, start, end)); 41 | } 42 | 43 | private static native long literalRangePattern(long input, long literal, int len, int start, int end); 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/RmmSparkThreadState.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * The state of a given thread according to RmmSpark. This is intended really for debugging and 21 | * testing only. 22 | */ 23 | public enum RmmSparkThreadState { 24 | UNKNOWN(-1), // thread is not associated with anything... 25 | THREAD_RUNNING(0), // task thread running normally 26 | THREAD_ALLOC(1), // task thread in the middle of doing an allocation 27 | THREAD_ALLOC_FREE(2), // task thread in the middle of doing an allocation and a free happened 28 | THREAD_BLOCKED(3), // task thread that is temporarily blocked 29 | THREAD_BUFN_THROW(4), // task thread that should throw an exception to roll back before blocking 30 | THREAD_BUFN_WAIT(5), // task thread that threw an exception to roll back and now should 31 | // block the next time alloc is called 32 | THREAD_BUFN(6), // task thread that is blocked until higher priority tasks start to succeed 33 | THREAD_SPLIT_THROW(7), // task thread that should throw an exception to split input and retry 34 | THREAD_REMOVE_THROW(8); // task thread that is being removed and needs to throw an exception 35 | 36 | private final int nativeId; 37 | 38 | RmmSparkThreadState(int nativeId) { 39 | this.nativeId = nativeId; 40 | } 41 | 42 | static RmmSparkThreadState fromNativeId(int nativeId) { 43 | for (RmmSparkThreadState ts : RmmSparkThreadState.values()) { 44 | if (ts.nativeId == nativeId) { 45 | return ts; 46 | } 47 | } 48 | throw new IllegalArgumentException("Could not find an ID for " + nativeId); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/SparkPlatformType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | /** 20 | * Enum representing the platform. 21 | * NOTE: MUST keep sync with version.hpp 22 | * The ordinal values are used to represent the platform in JNI calls. 23 | */ 24 | public enum SparkPlatformType { 25 | // ordinal 0 is vanilla Spark, JNI and kernel use 0 representing Spark 26 | VANILLA_SPARK, 27 | 28 | // ordinal 1 is Databricks, JNI and kernel use 1 representing Databricks 29 | DATABRICKS, 30 | 31 | // ordinal 2 is Cloudera, JNI and kernel use 2 representing Cloudera 32 | CLOUDERA; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/TaskPriority.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2025, NVIDIA CORPORATION. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.nvidia.spark.rapids.jni; 20 | 21 | import ai.rapids.cudf.NativeDepsLoader; 22 | 23 | /** 24 | * Get the priority for any task. If the priority for one task is larger than the priority for another task, 25 | * then it means that the task first task (larger number) should get access to resources before the task with 26 | * the lower priority value. 27 | */ 28 | public class TaskPriority { 29 | static { 30 | NativeDepsLoader.loadNativeDeps(); 31 | } 32 | 33 | public static native long getTaskPriority(long taskAttemptId); 34 | 35 | public static native void taskDone(long taskAttemptId); 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/ThreadStateRegistry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import java.util.HashMap; 23 | import java.util.HashSet; 24 | 25 | /** 26 | * This is used to allow us to map a native thread id to a java thread so we can look at the 27 | * state from a java perspective. 28 | */ 29 | class ThreadStateRegistry { 30 | private static final Logger LOG = LoggerFactory.getLogger(ThreadStateRegistry.class); 31 | 32 | private static final HashMap knownThreads = new HashMap<>(); 33 | 34 | public static synchronized void addThread(long nativeId, Thread t) { 35 | knownThreads.put(nativeId, t); 36 | } 37 | 38 | // Typically called from JNI 39 | public static synchronized void removeThread(long threadId) { 40 | knownThreads.remove(threadId); 41 | } 42 | 43 | // This is likely called from JNI 44 | public static synchronized boolean isThreadBlocked(long nativeId) { 45 | Thread t = knownThreads.get(nativeId); 46 | if (t == null || !t.isAlive()) { 47 | // Dead is as good as blocked. This is mostly for tests, not so much for 48 | // production 49 | return true; 50 | } 51 | Thread.State state = t.getState(); 52 | switch (state) { 53 | case BLOCKED: 54 | // fall through 55 | case WAITING: 56 | // fall through 57 | case TIMED_WAITING: 58 | return true; 59 | case TERMINATED: 60 | // Technically there is a race with `!t.isAlive` check above, and dead is as good as 61 | // blocked. 62 | return true; 63 | default: 64 | return false; 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/Version.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | public class Version { 20 | private final int platformOrdinal; 21 | private final int major; 22 | private final int minor; 23 | private final int patch; 24 | 25 | public Version(SparkPlatformType platform, int major, int minor, int patch) { 26 | this.platformOrdinal = platform.ordinal(); 27 | this.major = major; 28 | this.minor = minor; 29 | this.patch = patch; 30 | } 31 | 32 | /** 33 | * Note: this is used in the JNI code and kernel code, so it must match the 34 | * enum SparkPlatformType in com.nvidia.spark.rapids.jni.SparkPlatformType. 35 | */ 36 | public int getPlatformOrdinal() { 37 | return platformOrdinal; 38 | } 39 | 40 | public int getMajor() { 41 | return major; 42 | } 43 | 44 | public int getMinor() { 45 | return minor; 46 | } 47 | 48 | public int getPatch() { 49 | return patch; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/ColumnViewInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | import java.util.Optional; 22 | 23 | import static com.nvidia.spark.rapids.jni.Preconditions.ensureNonNegative; 24 | 25 | class ColumnViewInfo { 26 | private final DType dtype; 27 | private final ColumnOffsetInfo offsetInfo; 28 | private final int nullCount; 29 | private final int rowCount; 30 | 31 | public ColumnViewInfo(DType dtype, ColumnOffsetInfo offsetInfo, 32 | int nullCount, int rowCount) { 33 | ensureNonNegative(nullCount, "nullCount"); 34 | ensureNonNegative(rowCount, "rowCount"); 35 | this.dtype = dtype; 36 | this.offsetInfo = offsetInfo; 37 | this.nullCount = nullCount; 38 | this.rowCount = rowCount; 39 | } 40 | 41 | ColumnView buildColumnView(DeviceMemoryBuffer buffer, ColumnView[] childrenView) { 42 | long baseAddress = buffer.getAddress(); 43 | 44 | if (dtype.isNestedType()) { 45 | return new ColumnView(dtype, rowCount, Optional.of((long)nullCount), 46 | offsetInfo.getValidityBuffer(baseAddress), 47 | offsetInfo.getOffsetBuffer(baseAddress), 48 | childrenView); 49 | } else { 50 | return new ColumnView(dtype, rowCount, Optional.of((long)nullCount), 51 | offsetInfo.getDataBuffer(baseAddress), 52 | offsetInfo.getValidityBuffer(baseAddress), 53 | offsetInfo.getOffsetBuffer(baseAddress)); 54 | } 55 | } 56 | 57 | @Override 58 | public String toString() { 59 | return "ColumnViewInfo{" + 60 | "dtype=" + dtype + 61 | ", offsetInfo=" + offsetInfo + 62 | ", nullCount=" + nullCount + 63 | ", rowCount=" + rowCount + 64 | '}'; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/DataOutputStreamWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | import ai.rapids.cudf.HostMemoryBuffer; 20 | 21 | import java.io.DataOutputStream; 22 | import java.io.IOException; 23 | 24 | /** 25 | * Visible for testing 26 | */ 27 | class DataOutputStreamWriter implements DataWriter { 28 | private final byte[] arrayBuffer = new byte[1024]; 29 | private final DataOutputStream dout; 30 | 31 | public DataOutputStreamWriter(DataOutputStream dout) { 32 | this.dout = dout; 33 | } 34 | 35 | @Override 36 | public void writeInt(int i) throws IOException { 37 | dout.writeInt(i); 38 | } 39 | 40 | @Override 41 | public void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException { 42 | long dataLeft = len; 43 | while (dataLeft > 0) { 44 | int amountToCopy = (int) Math.min(arrayBuffer.length, dataLeft); 45 | src.getBytes(arrayBuffer, 0, srcOffset, amountToCopy); 46 | dout.write(arrayBuffer, 0, amountToCopy); 47 | srcOffset += amountToCopy; 48 | dataLeft -= amountToCopy; 49 | } 50 | } 51 | 52 | @Override 53 | public void flush() throws IOException { 54 | dout.flush(); 55 | } 56 | 57 | @Override 58 | public void write(byte[] arr, int offset, int length) throws IOException { 59 | dout.write(arr, offset, length); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/DataWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | import ai.rapids.cudf.HostMemoryBuffer; 20 | 21 | import java.io.IOException; 22 | 23 | /** 24 | * Output data writer for kudo serializer. 25 | */ 26 | public interface DataWriter { 27 | 28 | /** 29 | * Write int in network byte order. 30 | */ 31 | void writeInt(int i) throws IOException; 32 | 33 | /** 34 | * Reserve space in the buffer for the given size. 35 | */ 36 | default void reserve(int size) throws IOException {} 37 | 38 | /** 39 | * Copy data from src starting at srcOffset and going for len bytes. 40 | * 41 | * @param src where to copy from. 42 | * @param srcOffset offset to start at. 43 | * @param len amount to copy. 44 | */ 45 | void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException; 46 | 47 | void flush() throws IOException; 48 | 49 | /** 50 | * Copy part of byte array to this writer. 51 | */ 52 | void write(byte[] arr, int offset, int length) throws IOException; 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/DumpOption.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | public enum DumpOption { 20 | Always, 21 | OnFailure, 22 | Never 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/MergeMetrics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | public class MergeMetrics { 20 | // The time it took to calculate combined header in nanoseconds 21 | private final long calcHeaderTime; 22 | // The time it took to merge the buffers into the host buffer in nanoseconds 23 | private final long mergeIntoHostBufferTime; 24 | // The time it took to convert the host buffer into a contiguous table in nanoseconds 25 | private final long convertToTableTime; 26 | 27 | public MergeMetrics(long calcHeaderTime, long mergeIntoHostBufferTime, 28 | long convertToTableTime) { 29 | this.calcHeaderTime = calcHeaderTime; 30 | this.mergeIntoHostBufferTime = mergeIntoHostBufferTime; 31 | this.convertToTableTime = convertToTableTime; 32 | } 33 | 34 | public long getCalcHeaderTime() { 35 | return calcHeaderTime; 36 | } 37 | 38 | public long getMergeIntoHostBufferTime() { 39 | return mergeIntoHostBufferTime; 40 | } 41 | 42 | public long getConvertToTableTime() { 43 | return convertToTableTime; 44 | } 45 | 46 | public static Builder builder() { 47 | return new Builder(); 48 | } 49 | 50 | public static Builder builder(MergeMetrics metrics) { 51 | return new Builder() 52 | .calcHeaderTime(metrics.calcHeaderTime) 53 | .mergeIntoHostBufferTime(metrics.mergeIntoHostBufferTime) 54 | .convertToTableTime(metrics.convertToTableTime); 55 | } 56 | 57 | 58 | public static class Builder { 59 | private long calcHeaderTime; 60 | private long mergeIntoHostBufferTime; 61 | private long convertToTableTime; 62 | 63 | public Builder calcHeaderTime(long calcHeaderTime) { 64 | this.calcHeaderTime = calcHeaderTime; 65 | return this; 66 | } 67 | 68 | public Builder mergeIntoHostBufferTime(long mergeIntoHostBufferTime) { 69 | this.mergeIntoHostBufferTime = mergeIntoHostBufferTime; 70 | return this; 71 | } 72 | 73 | public Builder convertToTableTime(long convertToTableTime) { 74 | this.convertToTableTime = convertToTableTime; 75 | return this; 76 | } 77 | 78 | public MergeMetrics build() { 79 | return new MergeMetrics(calcHeaderTime, mergeIntoHostBufferTime, convertToTableTime); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/MergeOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | import java.io.OutputStream; 20 | import java.util.function.Supplier; 21 | 22 | public class MergeOptions { 23 | private final DumpOption dumpOption; 24 | private final Supplier outputStreamSupplier; 25 | private final String filePath; 26 | 27 | public MergeOptions(DumpOption dumpOption, Supplier outputStreamSupplier, String filePath) { 28 | this.dumpOption = dumpOption; 29 | this.outputStreamSupplier = outputStreamSupplier; 30 | this.filePath = filePath; 31 | } 32 | 33 | public DumpOption getDumpOption() { 34 | return dumpOption; 35 | } 36 | 37 | public Supplier getOutputStreamSupplier() { 38 | return outputStreamSupplier; 39 | } 40 | 41 | public String getFilePath() { 42 | return filePath; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/OpenByteArrayOutputStreamWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | import static java.lang.Math.toIntExact; 20 | import static java.util.Objects.requireNonNull; 21 | 22 | import ai.rapids.cudf.HostMemoryBuffer; 23 | import java.io.IOException; 24 | 25 | /** 26 | * Adapter class which helps to save memory copy when shuffle manager uses 27 | * {@link OpenByteArrayOutputStream} during serialization. 28 | */ 29 | public class OpenByteArrayOutputStreamWriter implements DataWriter { 30 | private final OpenByteArrayOutputStream out; 31 | 32 | public OpenByteArrayOutputStreamWriter(OpenByteArrayOutputStream bout) { 33 | requireNonNull(bout, "Byte array output stream can't be null"); 34 | this.out = bout; 35 | } 36 | 37 | @Override 38 | public void reserve(int size) throws IOException { 39 | out.reserve(size); 40 | } 41 | 42 | @Override 43 | public void writeInt(int v) throws IOException { 44 | out.reserve(4 + out.size()); 45 | out.write((v >>> 24) & 0xFF); 46 | out.write((v >>> 16) & 0xFF); 47 | out.write((v >>> 8) & 0xFF); 48 | out.write((v >>> 0) & 0xFF); 49 | } 50 | 51 | @Override 52 | public void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException { 53 | out.write(src, srcOffset, toIntExact(len)); 54 | } 55 | 56 | @Override 57 | public void flush() throws IOException { 58 | } 59 | 60 | @Override 61 | public void write(byte[] arr, int offset, int length) throws IOException { 62 | out.write(arr, offset, length); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/SliceInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | public class SliceInfo { 20 | final int offset; 21 | final int rowCount; 22 | private final SlicedValidityBufferInfo validityBufferInfo; 23 | 24 | SliceInfo(int offset, int rowCount) { 25 | this.offset = offset; 26 | this.rowCount = rowCount; 27 | this.validityBufferInfo = SlicedValidityBufferInfo.calc(offset, rowCount); 28 | } 29 | 30 | SlicedValidityBufferInfo getValidityBufferInfo() { 31 | return validityBufferInfo; 32 | } 33 | 34 | public int getOffset() { 35 | return offset; 36 | } 37 | 38 | public int getRowCount() { 39 | return rowCount; 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | return "SliceInfo{" + 45 | "offset=" + offset + 46 | ", rowCount=" + rowCount + 47 | ", validityBufferInfo=" + validityBufferInfo + 48 | '}'; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/SlicedValidityBufferInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | /** 20 | * A simple utility class to hold information about serializing/deserializing sliced validity buffer. 21 | */ 22 | class SlicedValidityBufferInfo { 23 | private final int bufferOffset; 24 | private final int bufferLength; 25 | /// The bit offset within the buffer where the slice starts 26 | private final int beginBit; 27 | 28 | SlicedValidityBufferInfo(int bufferOffset, int bufferLength, int beginBit) { 29 | this.bufferOffset = bufferOffset; 30 | this.bufferLength = bufferLength; 31 | this.beginBit = beginBit; 32 | } 33 | 34 | @Override 35 | public String toString() { 36 | return "SlicedValidityBufferInfo{" + "bufferOffset=" + bufferOffset + ", bufferLength=" + bufferLength + 37 | ", beginBit=" + beginBit + '}'; 38 | } 39 | 40 | public int getBufferOffset() { 41 | return bufferOffset; 42 | } 43 | 44 | public int getBufferLength() { 45 | return bufferLength; 46 | } 47 | 48 | public int getBeginBit() { 49 | return beginBit; 50 | } 51 | 52 | static SlicedValidityBufferInfo calc(int rowOffset, int numRows) { 53 | if (rowOffset < 0) { 54 | throw new IllegalArgumentException("rowOffset must be >= 0, but was " + rowOffset); 55 | } 56 | if (numRows < 0) { 57 | throw new IllegalArgumentException("numRows must be >= 0, but was " + numRows); 58 | } 59 | int bufferOffset = rowOffset / 8; 60 | int beginBit = rowOffset % 8; 61 | int bufferLength = 0; 62 | if (numRows > 0) { 63 | bufferLength = (rowOffset + numRows - 1) / 8 - bufferOffset + 1; 64 | } 65 | return new SlicedValidityBufferInfo(bufferOffset, bufferLength, beginBit); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/kudo/WriteMetrics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni.kudo; 18 | 19 | /** 20 | * This class contains metrics for serializing table using kudo format. 21 | */ 22 | public class WriteMetrics { 23 | private long copyBufferTime; 24 | private long writtenBytes; 25 | 26 | 27 | public WriteMetrics() { 28 | this.copyBufferTime = 0; 29 | this.writtenBytes = 0; 30 | } 31 | 32 | /** 33 | * Get the time spent on copying the buffer. 34 | */ 35 | public long getCopyBufferTime() { 36 | return copyBufferTime; 37 | } 38 | 39 | public void addCopyBufferTime(long time) { 40 | copyBufferTime += time; 41 | } 42 | 43 | /** 44 | * Get the number of bytes written. 45 | */ 46 | public long getWrittenBytes() { 47 | return writtenBytes; 48 | } 49 | 50 | public void addWrittenBytes(long bytes) { 51 | writtenBytes += bytes; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/schema/HostColumnsVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.nvidia.spark.rapids.jni.schema; 20 | 21 | import ai.rapids.cudf.HostColumnVectorCore; 22 | 23 | import java.util.List; 24 | 25 | /** 26 | * A post order visitor for visiting a list of host columns in a schema. 27 | * 28 | *

29 | * 30 | * For example, if we have three columns A, B, and C with following types: 31 | * 32 | *

    33 | *
  • A: struct { int a1; long a2}
  • 34 | *
  • B: list { int b1}
  • 35 | *
  • C: string c1
  • 36 | *
37 | * 38 | * The order of visiting will be: 39 | *
    40 | *
  1. Previsit struct column A
  2. 41 | *
  3. Visit primitive column a1
  4. 42 | *
  5. Visit primitive column a2
  6. 43 | *
  7. Visit struct column A
  8. 44 | *
  9. Previsit list column B
  10. 45 | *
  11. Visit primitive column b1
  12. 46 | *
  13. Visit list column B
  14. 47 | *
  15. Visit primitive column c1
  16. 48 | *
49 | * 50 | *

51 | * 52 | */ 53 | public interface HostColumnsVisitor { 54 | /** 55 | * Visit a struct column before any of its children. 56 | * @param col the struct column to visit 57 | */ 58 | void preVisitStruct(HostColumnVectorCore col); 59 | 60 | // TODO no one uses this, do we even want it??? 61 | /** 62 | * Visit a struct column. 63 | * @param col the struct column to visit 64 | */ 65 | void visitStruct(HostColumnVectorCore col); 66 | 67 | /** 68 | * Visit a list column before actually visiting its child. 69 | * @param col the list column to visit 70 | */ 71 | void preVisitList(HostColumnVectorCore col); 72 | 73 | /** 74 | * Visit a list column after visiting its child. 75 | * @param col the list column to visit 76 | */ 77 | void visitList(HostColumnVectorCore col); 78 | 79 | /** 80 | * Visit a column that is a primitive type. 81 | * @param col the column to visit 82 | */ 83 | void visit(HostColumnVectorCore col); 84 | 85 | /** 86 | * The processing is all done 87 | */ 88 | void done(); 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/nvidia/spark/rapids/jni/schema/SimpleSchemaVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2025, NVIDIA CORPORATION. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.nvidia.spark.rapids.jni.schema; 20 | 21 | import ai.rapids.cudf.Schema; 22 | 23 | /** 24 | * A schema visitor similar to {@link SchemaVisitor} but with a simplified interface, please refer 25 | * to {@link SchemaVisitor} for more details. 26 | *
27 | * This interface removed generic types and return values to simplify the interface, which could 28 | * avoid unnecessary allocation during visiting schema. 29 | */ 30 | public interface SimpleSchemaVisitor { 31 | /** 32 | * Visit the top level schema. 33 | * @param schema the top level schema to visit 34 | */ 35 | void visitTopSchema(Schema schema); 36 | 37 | /** 38 | * Visit a struct schema before actually visiting its children. 39 | * @param structType the struct schema to visit 40 | */ 41 | void preVisitStruct(Schema structType); 42 | 43 | /** 44 | * Visit a struct schema. 45 | * @param structType the struct schema to visit 46 | */ 47 | void visitStruct(Schema structType); 48 | 49 | /** 50 | * Visit a list schema before actually visiting its child. 51 | * @param listType the list schema to visit 52 | */ 53 | void preVisitList(Schema listType); 54 | 55 | /** 56 | * Visit a list schema after visiting its child. 57 | * @param listType the list schema to visit 58 | */ 59 | void visitList(Schema listType); 60 | 61 | /** 62 | * Visit a primitive type. 63 | * @param primitiveType the primitive type to visit 64 | */ 65 | void visit(Schema primitiveType); 66 | } 67 | -------------------------------------------------------------------------------- /src/test/cpp/faultinj/test_faultinj.json: -------------------------------------------------------------------------------- 1 | { 2 | "logLevel": 1, 3 | "seed": 12345, 4 | "dynamic": true, 5 | "cudaRuntimeFaults": { 6 | "cudaLaunchKernel_ptsz": { 7 | "percent": 0, 8 | "injectionType": 0, 9 | "interceptionCount": 1 10 | } 11 | }, 12 | "cudaDriverFaults": { 13 | "*": { 14 | "percent": 0, 15 | "injectionType": 2, 16 | "substituteReturnCode": 2, 17 | "interceptionCount": 1000 18 | }, 19 | "cuLaunchKernel_ptsz": { 20 | "percent": 0, 21 | "injectionType": 2, 22 | "substituteReturnCode": 2, 23 | "interceptionCount": 1000 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/com/nvidia/spark/rapids/jni/CaseWhenTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; 24 | 25 | public class CaseWhenTest { 26 | 27 | @Test 28 | void selectIndexTest() { 29 | try ( 30 | ColumnVector b0 = ColumnVector.fromBooleans( 31 | true, false, false, false); 32 | ColumnVector b1 = ColumnVector.fromBooleans( 33 | true, true, false, false); 34 | ColumnVector b2 = ColumnVector.fromBooleans( 35 | false, false, true, false); 36 | ColumnVector b3 = ColumnVector.fromBooleans( 37 | true, true, true, false); 38 | ColumnVector expected = ColumnVector.fromInts(0, 1, 2, 4)) { 39 | ColumnVector[] boolColumns = new ColumnVector[] { b0, b1, b2, b3 }; 40 | try (ColumnVector actual = CaseWhen.selectFirstTrueIndex(boolColumns)) { 41 | assertColumnsAreEqual(expected, actual); 42 | } 43 | } 44 | } 45 | 46 | @Test 47 | void selectIndexTestWithNull() { 48 | try ( 49 | ColumnVector b0 = ColumnVector.fromBoxedBooleans( 50 | null, false, false, null, false); 51 | ColumnVector b1 = ColumnVector.fromBoxedBooleans( 52 | null, null, false, true, true); 53 | ColumnVector b2 = ColumnVector.fromBoxedBooleans( 54 | null, null, false, true, false); 55 | ColumnVector b3 = ColumnVector.fromBoxedBooleans( 56 | null, null, null, true, null); 57 | ColumnVector expected = ColumnVector.fromInts(4, 4, 4, 1, 1)) { 58 | ColumnVector[] boolColumns = new ColumnVector[] { b0, b1, b2, b3 }; 59 | try (ColumnVector actual = CaseWhen.selectFirstTrueIndex(boolColumns)) { 60 | assertColumnsAreEqual(expected, actual); 61 | } 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/java/com/nvidia/spark/rapids/jni/GpuSubstringIndexUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.AssertUtils; 20 | import ai.rapids.cudf.ColumnVector; 21 | import ai.rapids.cudf.Scalar; 22 | import ai.rapids.cudf.Table; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.util.ArrayList; 26 | import java.util.List; 27 | 28 | public class GpuSubstringIndexUtilsTest { 29 | @Test 30 | void gpuSubstringIndexTest(){ 31 | Table.TestBuilder tb = new Table.TestBuilder(); 32 | tb.column( "www.apache.org"); 33 | tb.column("www.apache"); 34 | tb.column("www"); 35 | tb.column(""); 36 | tb.column("org"); 37 | tb.column("apache.org"); 38 | tb.column("www.apache.org"); 39 | tb.column(""); 40 | tb.column("大千世界大"); 41 | tb.column("www||apache"); 42 | 43 | try(Table expected = tb.build()){ 44 | Table.TestBuilder tb2 = new Table.TestBuilder(); 45 | tb2.column("www.apache.org"); 46 | tb2.column("www.apache.org"); 47 | tb2.column("www.apache.org"); 48 | tb2.column("www.apache.org"); 49 | tb2.column("www.apache.org"); 50 | tb2.column("www.apache.org"); 51 | tb2.column("www.apache.org"); 52 | tb2.column(""); 53 | tb2.column("大千世界大千世界"); 54 | tb2.column("www||apache||org"); 55 | 56 | Scalar dotScalar = Scalar.fromString("."); 57 | Scalar cnChar = Scalar.fromString("千"); 58 | Scalar verticalBar = Scalar.fromString("||"); 59 | Scalar[] delimiterArray = new Scalar[]{dotScalar, dotScalar, dotScalar, dotScalar,dotScalar, dotScalar, dotScalar, dotScalar, cnChar, verticalBar}; 60 | int[] countArray = new int[]{3, 2, 1, 0, -1, -2, -3, -2, 2, 2}; 61 | List result = new ArrayList<>(); 62 | try (Table origTable = tb2.build()){ 63 | for(int i = 0; i < origTable.getNumberOfColumns(); i++){ 64 | ColumnVector string_col = origTable.getColumn(i); 65 | result.add(GpuSubstringIndexUtils.substringIndex(string_col, delimiterArray[i], countArray[i])); 66 | } 67 | try (Table result_tbl = new Table( 68 | result.toArray(new ColumnVector[result.size()]))){ 69 | AssertUtils.assertTablesAreEqual(expected, result_tbl); 70 | } 71 | }finally { 72 | result.forEach(ColumnVector::close); 73 | } 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/test/java/com/nvidia/spark/rapids/jni/HistogramTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.AssertUtils; 20 | import ai.rapids.cudf.ColumnVector; 21 | 22 | import org.junit.jupiter.api.Test; 23 | 24 | public class HistogramTest { 25 | @Test 26 | void testZeroFrequency() { 27 | try (ColumnVector values = ColumnVector.fromInts(5, 10, 30); 28 | ColumnVector freqs = ColumnVector.fromLongs(1, 0, 1); 29 | ColumnVector histogram = Histogram.createHistogramIfValid(values, freqs, true); 30 | ColumnVector percentiles = Histogram.percentileFromHistogram(histogram, new double[]{1}, 31 | false); 32 | ColumnVector expected = ColumnVector.fromBoxedDoubles(5.0, null, 30.0)) { 33 | AssertUtils.assertColumnsAreEqual(percentiles, expected); 34 | } 35 | } 36 | 37 | @Test 38 | void testAllNulls() { 39 | try (ColumnVector values = ColumnVector.fromBoxedInts(null, null, null); 40 | ColumnVector freqs = ColumnVector.fromLongs(1, 2, 3); 41 | ColumnVector histogram = Histogram.createHistogramIfValid(values, freqs, true); 42 | ColumnVector percentiles = Histogram.percentileFromHistogram(histogram, new double[]{0.5}, 43 | false); 44 | ColumnVector expected = ColumnVector.fromBoxedDoubles(null, null, null)) { 45 | AssertUtils.assertColumnsAreEqual(percentiles, expected); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/java/com/nvidia/spark/rapids/jni/LimitingOffHeapAllocForTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.HostMemoryBuffer; 20 | 21 | import java.util.Optional; 22 | 23 | /** 24 | * This provides a way to allocate and deallocate off heap buffers using the RmmSpark APIs for 25 | * retry on allocations. 26 | */ 27 | public class LimitingOffHeapAllocForTests { 28 | private static long limit; 29 | private static long amountAllocated = 0; 30 | public static synchronized void setLimit(long limit) { 31 | LimitingOffHeapAllocForTests.limit = limit; 32 | if (amountAllocated > 0) { 33 | throw new IllegalStateException("PREVIOUS TEST LEAKED MEMORY!!!"); 34 | } 35 | } 36 | 37 | private static Optional allocInternal(long amount, boolean blocking) { 38 | Optional ret = Optional.empty(); 39 | boolean wasOom = true; 40 | boolean isRecursive = RmmSpark.preCpuAlloc(amount, blocking); 41 | try { 42 | synchronized (LimitingOffHeapAllocForTests.class) { 43 | if (amountAllocated + amount <= limit) { 44 | amountAllocated += amount; 45 | wasOom = false; 46 | HostMemoryBuffer buff = HostMemoryBuffer.allocate(amount); 47 | final long ptr = buff.getAddress(); 48 | buff.setEventHandler(refCount -> { 49 | if (refCount == 0) { 50 | synchronized (LimitingOffHeapAllocForTests.class) { 51 | amountAllocated -= amount; 52 | } 53 | RmmSpark.cpuDeallocate(ptr, amount); 54 | } 55 | }); 56 | ret = Optional.of(buff); 57 | } 58 | } 59 | } finally { 60 | if (ret.isPresent()) { 61 | RmmSpark.postCpuAllocSuccess(ret.get().getAddress(), amount, blocking, isRecursive); 62 | } else { 63 | RmmSpark.postCpuAllocFailed(wasOom, blocking, isRecursive); 64 | } 65 | } 66 | return ret; 67 | } 68 | 69 | /** 70 | * Do a non-blocking allocation 71 | * @param amount the amount to allocate 72 | * @return the allocated buffer or not. 73 | */ 74 | public static Optional tryAlloc(long amount) { 75 | return allocInternal(amount, false); 76 | } 77 | 78 | /** 79 | * Do a blocking allocation 80 | * @param amount the amount to allocate 81 | * @return the allocated buffer 82 | */ 83 | public static HostMemoryBuffer alloc(long amount) { 84 | Optional ret = Optional.empty(); 85 | while (!ret.isPresent()) { 86 | ret = allocInternal(amount, true); 87 | } 88 | return ret.get(); 89 | } 90 | } -------------------------------------------------------------------------------- /src/test/java/com/nvidia/spark/rapids/jni/MapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.*; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; 24 | 25 | import java.util.Arrays; 26 | import java.util.List; 27 | 28 | public class MapTest { 29 | 30 | @Test 31 | void sort() { 32 | // Map is List> 33 | List map1 = Arrays.asList( 34 | new HostColumnVector.StructData(Arrays.asList(5, 2)), 35 | new HostColumnVector.StructData(Arrays.asList(4, 1))); 36 | List map2 = Arrays.asList( 37 | new HostColumnVector.StructData(Arrays.asList(2, 1)), 38 | new HostColumnVector.StructData(Arrays.asList(4, 3))); 39 | 40 | List sorted_map1 = Arrays.asList( 41 | new HostColumnVector.StructData(Arrays.asList(4, 1)), 42 | new HostColumnVector.StructData(Arrays.asList(5, 2))); 43 | List sorted_map2 = map2; 44 | 45 | HostColumnVector.StructType structType = new HostColumnVector.StructType(true, 46 | Arrays.asList(new HostColumnVector.BasicType(true, DType.INT32), 47 | new HostColumnVector.BasicType(true, DType.INT32))); 48 | try (ColumnVector cv = ColumnVector.fromLists( 49 | new HostColumnVector.ListType(true, structType), map1, map2); 50 | ColumnVector res = Map.sort(cv, false); 51 | ColumnVector expected = ColumnVector.fromLists( 52 | new HostColumnVector.ListType(true, structType), sorted_map1, sorted_map2)) { 53 | 54 | assertColumnsAreEqual(expected, res); 55 | } 56 | } 57 | 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/test/java/com/nvidia/spark/rapids/jni/RegexRewriteUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.nvidia.spark.rapids.jni; 18 | 19 | import ai.rapids.cudf.ColumnVector; 20 | import ai.rapids.cudf.Scalar; 21 | import org.junit.jupiter.api.Test; 22 | 23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; 24 | 25 | public class RegexRewriteUtilsTest { 26 | 27 | @Test 28 | void testLiteralRangePattern() { 29 | int d = 3; 30 | try (ColumnVector inputCv = ColumnVector.fromStrings( 31 | "abc123", "aabc123", "aabc12", "abc1232", "aabc1232"); 32 | Scalar pattern = Scalar.fromString("abc"); 33 | ColumnVector expected = ColumnVector.fromBooleans(true, true, false, true, true); 34 | ColumnVector actual = RegexRewriteUtils.literalRangePattern(inputCv, pattern, d, 48, 57)) { 35 | assertColumnsAreEqual(expected, actual); 36 | } 37 | } 38 | 39 | @Test 40 | void testLiteralRangePatternChinese() { 41 | int d = 2; 42 | try (ColumnVector inputCv = ColumnVector.fromStrings( 43 | "数据砖块", "火花-急流英伟达", "英伟达Nvidia", "火花-急流"); 44 | Scalar pattern = Scalar.fromString("英"); 45 | ColumnVector expected = ColumnVector.fromBooleans(false, true, true, false); 46 | ColumnVector actual = RegexRewriteUtils.literalRangePattern(inputCv, pattern, d, 19968, 40869)) { 47 | assertColumnsAreEqual(expected, actual); 48 | } 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /thirdparty/cudf-pins/add_dependency_pins.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | if(NOT DEFINED rapids-cmake-dir) 18 | include(../cudf/cmake/rapids_config.cmake) 19 | endif() 20 | 21 | include(rapids-cpm) 22 | rapids_cpm_init() 23 | 24 | function(add_override_if_requested) 25 | if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned) 26 | include(${rapids-cmake-dir}/cpm/package_override.cmake) 27 | rapids_cpm_package_override(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json) 28 | 29 | message(STATUS "Pinning CUDF dependencies to values found in ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json") 30 | else() 31 | include(${rapids-cmake-dir}/cpm/generate_pinned_versions.cmake) 32 | rapids_cpm_generate_pinned_versions(OUTPUT ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json) 33 | 34 | message(STATUS "Building with latest CUDF dependencies (saving pinned versions to ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)") 35 | endif() 36 | endfunction() 37 | add_override_if_requested() 38 | -------------------------------------------------------------------------------- /thirdparty/cudf-pins/rapids-cmake.sha: -------------------------------------------------------------------------------- 1 | c0330a832746dabf4c4c518072af6771ce654330 2 | -------------------------------------------------------------------------------- /thirdparty/cudf-pins/setup.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | string(TOLOWER "${CUDF_DEPENDENCY_PIN_MODE}" CUDF_DEPENDENCY_PIN_MODE) 18 | if(NOT (CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned OR 19 | CUDF_DEPENDENCY_PIN_MODE STREQUAL latest)) 20 | message(FATAL_ERROR "The CUDF_DEPENDENCY_PIN_MODE variable must be set to either `pinned` or `latest`.") 21 | endif() 22 | 23 | function(set_rapids_cmake_pin_sha1) 24 | set(rapids-cmake-sha "${rapids-cmake-sha}" PARENT_SCOPE) 25 | 26 | message(STATUS "Pinning rapids-cmake SHA1 to ${rapids-cmake-sha}") 27 | endfunction() 28 | 29 | # We need to set the rapids-cmake SHA1 before any CMake code in libcudf is executed when 30 | # we are in pin mode. Otherwise we will use the latest rapids-cmake version since that 31 | # is what cudf does via `fetch_rapids.cmake` 32 | if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned) 33 | # Extract the rapids sha1 from the file 34 | file(READ "${CMAKE_CURRENT_LIST_DIR}/rapids-cmake.sha" rapids-cmake-sha) 35 | string(STRIP rapids-cmake-sha "${rapids-cmake-sha}") 36 | string(REPLACE "\n" "" rapids-cmake-sha "${rapids-cmake-sha}") 37 | set(rapids-cmake-sha "${rapids-cmake-sha}" CACHE STRING "rapids-cmake sha to use" FORCE) 38 | message(STATUS "Pinning rapids-cmake SHA1 [${rapids-cmake-sha}]") 39 | else() 40 | set(rapids-cmake-fetch-via-git "ON" CACHE STRING "Make sure rapids-cmake is cloned so we can get SHA value" FORCE) 41 | endif() 42 | 43 | # We need to use a project() call hook, since rapids-cmake cpm_init() 44 | # can't be called from a `-C` CMake file 45 | set(CMAKE_PROJECT_TOP_LEVEL_INCLUDES "${CMAKE_CURRENT_LIST_DIR}/add_dependency_pins.cmake" CACHE FILEPATH "" ) 46 | --------------------------------------------------------------------------------