├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── documentation-request.md
    │   ├── feature_request.md
    │   └── submit-question.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── action-helper
    │       ├── Dockerfile
    │       ├── action.yml
    │       ├── entrypoint.sh
    │       └── python
    │       │   ├── auto-merge
    │       │   ├── cleanup-bot-branch
    │       │   ├── submodule-sync
    │       │   └── utils.py
    │   ├── add-to-project.yml
    │   ├── auto-merge.yml
    │   ├── blossom-ci.yml
    │   ├── clang-format.yml
    │   ├── license-header-check.yml
    │   └── signoff-check.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── SECURITY.md
├── build
    ├── apply-patches
    ├── build-in-docker
    ├── build-info
    ├── buildcpp.sh
    ├── run-in-docker
    ├── sanitizer-java
    │   └── bin
    │   │   └── java
    ├── submodule-check
    ├── unapply-patches
    └── win
    │   ├── create-wsl2.ps1
    │   └── setup-wsl2.sh
├── ci
    ├── Dockerfile
    ├── Jenkinsfile.premerge
    ├── check-cuda-dependencies.sh
    ├── deploy.sh
    ├── fuzz-test.sh
    ├── nightly-build.sh
    ├── premerge-build.sh
    ├── settings.xml
    └── submodule-sync.sh
├── docs
    ├── img
    │   └── memory_state_machine.png
    └── memory_management.md
├── patches
    └── noop.patch
├── pom.xml
├── src
    ├── main
    │   ├── cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── benchmarks
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── bloom_filter.cu
    │   │   │   ├── cast_long_to_binary_string.cpp
    │   │   │   ├── cast_string_to_float.cpp
    │   │   │   ├── common
    │   │   │   │   ├── generate_input.cu
    │   │   │   │   ├── generate_input.hpp
    │   │   │   │   └── random_distribution_factory.cuh
    │   │   │   ├── get_json_object.cu
    │   │   │   ├── parse_uri.cpp
    │   │   │   └── row_conversion.cpp
    │   │   ├── cmake
    │   │   │   ├── get_flatbuffers.cmake
    │   │   │   └── get_spdlog.cmake
    │   │   ├── faultinj
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── README.md
    │   │   │   └── faultinj.cu
    │   │   ├── profiler
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── ProfilerJni.cpp
    │   │   │   ├── profiler_debug.cpp
    │   │   │   ├── profiler_debug.hpp
    │   │   │   ├── profiler_schema.cpp.in
    │   │   │   ├── profiler_serializer.cpp
    │   │   │   ├── profiler_serializer.hpp
    │   │   │   └── spark_rapids_profile_converter.cpp
    │   │   ├── src
    │   │   │   ├── BloomFilterJni.cpp
    │   │   │   ├── CaseWhenJni.cpp
    │   │   │   ├── CastStringJni.cpp
    │   │   │   ├── DateTimeUtilsJni.cpp
    │   │   │   ├── DecimalUtilsJni.cpp
    │   │   │   ├── GpuTimeZoneDBJni.cpp
    │   │   │   ├── HashJni.cpp
    │   │   │   ├── HistogramJni.cpp
    │   │   │   ├── HostTableJni.cpp
    │   │   │   ├── HyperLogLogPlusPlusHostUDFJni.cpp
    │   │   │   ├── JSONUtilsJni.cpp
    │   │   │   ├── KudoGpuSerializerJni.cpp
    │   │   │   ├── ListSliceJni.cpp
    │   │   │   ├── MapJni.cpp
    │   │   │   ├── NativeParquetJni.cpp
    │   │   │   ├── NumberConverterJni.cpp
    │   │   │   ├── ParseURIJni.cpp
    │   │   │   ├── RegexRewriteUtilsJni.cpp
    │   │   │   ├── RowConversionJni.cpp
    │   │   │   ├── SparkResourceAdaptorJni.cpp
    │   │   │   ├── SubStringIndexJni.cpp
    │   │   │   ├── TaskPriorityJni.cpp
    │   │   │   ├── ZOrderJni.cpp
    │   │   │   ├── bloom_filter.cu
    │   │   │   ├── bloom_filter.hpp
    │   │   │   ├── case_when.cu
    │   │   │   ├── case_when.hpp
    │   │   │   ├── cast_decimal_to_string.cu
    │   │   │   ├── cast_float_to_string.cu
    │   │   │   ├── cast_long_to_binary_string.cu
    │   │   │   ├── cast_string.cu
    │   │   │   ├── cast_string.hpp
    │   │   │   ├── cast_string_to_datetime.cu
    │   │   │   ├── cast_string_to_float.cu
    │   │   │   ├── cast_string_to_timestamp_common.hpp
    │   │   │   ├── datetime_rebase.cu
    │   │   │   ├── datetime_truncate.cu
    │   │   │   ├── datetime_utils.cuh
    │   │   │   ├── datetime_utils.hpp
    │   │   │   ├── decimal_utils.cu
    │   │   │   ├── decimal_utils.hpp
    │   │   │   ├── emptyfile.cpp
    │   │   │   ├── format_float.cu
    │   │   │   ├── from_json_to_raw_map.cu
    │   │   │   ├── from_json_to_raw_map_debug.cuh
    │   │   │   ├── from_json_to_structs.cu
    │   │   │   ├── ftos_converter.cuh
    │   │   │   ├── get_json_object.cu
    │   │   │   ├── get_json_object.hpp
    │   │   │   ├── hash.cuh
    │   │   │   ├── hash.hpp
    │   │   │   ├── histogram.cu
    │   │   │   ├── histogram.hpp
    │   │   │   ├── hive_hash.cu
    │   │   │   ├── host_table_view.hpp
    │   │   │   ├── hyper_log_log_plus_plus.cu
    │   │   │   ├── hyper_log_log_plus_plus.hpp
    │   │   │   ├── hyper_log_log_plus_plus_const.hpp
    │   │   │   ├── hyper_log_log_plus_plus_host_udf.cu
    │   │   │   ├── hyper_log_log_plus_plus_host_udf.hpp
    │   │   │   ├── json_parser.cuh
    │   │   │   ├── json_utils.cu
    │   │   │   ├── json_utils.hpp
    │   │   │   ├── list_slice.cu
    │   │   │   ├── list_slice.hpp
    │   │   │   ├── map.cu
    │   │   │   ├── map.hpp
    │   │   │   ├── murmur_hash.cu
    │   │   │   ├── murmur_hash.cuh
    │   │   │   ├── number_converter.cu
    │   │   │   ├── number_converter.hpp
    │   │   │   ├── parse_uri.cu
    │   │   │   ├── parse_uri.hpp
    │   │   │   ├── regex_rewrite_utils.cu
    │   │   │   ├── regex_rewrite_utils.hpp
    │   │   │   ├── row_conversion.cu
    │   │   │   ├── row_conversion.hpp
    │   │   │   ├── shuffle_assemble.cu
    │   │   │   ├── shuffle_split.cu
    │   │   │   ├── shuffle_split.hpp
    │   │   │   ├── shuffle_split_detail.hpp
    │   │   │   ├── spark_rapids_jni_version.cpp.in
    │   │   │   ├── spark_rapids_jni_version.h
    │   │   │   ├── substring_index.cu
    │   │   │   ├── substring_index.hpp
    │   │   │   ├── task_priority.hpp
    │   │   │   ├── timezones.cu
    │   │   │   ├── timezones.hpp
    │   │   │   ├── utilities.cu
    │   │   │   ├── utilities.hpp
    │   │   │   ├── version.hpp
    │   │   │   ├── xxhash64.cu
    │   │   │   ├── zorder.cu
    │   │   │   └── zorder.hpp
    │   │   └── tests
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── bloom_filter.cu
    │   │   │   ├── cast_decimal_to_string.cpp
    │   │   │   ├── cast_float_to_string.cpp
    │   │   │   ├── cast_long_to_binary_string.cpp
    │   │   │   ├── cast_string.cpp
    │   │   │   ├── datetime_rebase.cpp
    │   │   │   ├── format_float.cpp
    │   │   │   ├── hash.cpp
    │   │   │   ├── hyper_log_log_plus_plus.cu
    │   │   │   ├── list_slice.cpp
    │   │   │   ├── number_converter.cpp
    │   │   │   ├── parse_uri.cpp
    │   │   │   ├── row_conversion.cpp
    │   │   │   ├── shuffle_split.cu
    │   │   │   ├── substring_index.cpp
    │   │   │   ├── test_utilities.hpp
    │   │   │   ├── timezones.cpp
    │   │   │   └── utilities.cpp
    │   ├── fbs
    │   │   └── profiler.fbs
    │   └── java
    │   │   └── com
    │   │       └── nvidia
    │   │           └── spark
    │   │               └── rapids
    │   │                   └── jni
    │   │                       ├── Arms.java
    │   │                       ├── BloomFilter.java
    │   │                       ├── CaseWhen.java
    │   │                       ├── CastException.java
    │   │                       ├── CastStrings.java
    │   │                       ├── CpuRetryOOM.java
    │   │                       ├── CpuSplitAndRetryOOM.java
    │   │                       ├── DateTimeRebase.java
    │   │                       ├── DateTimeUtils.java
    │   │                       ├── DecimalUtils.java
    │   │                       ├── GpuListSliceUtils.java
    │   │                       ├── GpuOOM.java
    │   │                       ├── GpuRetryOOM.java
    │   │                       ├── GpuSplitAndRetryOOM.java
    │   │                       ├── GpuSubstringIndexUtils.java
    │   │                       ├── GpuTimeZoneDB.java
    │   │                       ├── Hash.java
    │   │                       ├── Histogram.java
    │   │                       ├── HostTable.java
    │   │                       ├── HyperLogLogPlusPlusHostUDF.java
    │   │                       ├── JSONUtils.java
    │   │                       ├── Map.java
    │   │                       ├── NumberConverter.java
    │   │                       ├── OffHeapOOM.java
    │   │                       ├── Pair.java
    │   │                       ├── ParquetFooter.java
    │   │                       ├── ParseURI.java
    │   │                       ├── Preconditions.java
    │   │                       ├── Profiler.java
    │   │                       ├── RegexRewriteUtils.java
    │   │                       ├── RmmSpark.java
    │   │                       ├── RmmSparkThreadState.java
    │   │                       ├── RowConversion.java
    │   │                       ├── SparkPlatformType.java
    │   │                       ├── SparkResourceAdaptor.java
    │   │                       ├── TaskPriority.java
    │   │                       ├── ThreadStateRegistry.java
    │   │                       ├── Version.java
    │   │                       ├── ZOrder.java
    │   │                       ├── kudo
    │   │                           ├── ByteArrayOutputStreamWriter.java
    │   │                           ├── ColumnOffsetInfo.java
    │   │                           ├── ColumnViewInfo.java
    │   │                           ├── DataOutputStreamWriter.java
    │   │                           ├── DataWriter.java
    │   │                           ├── DumpOption.java
    │   │                           ├── KudoGpuSerializer.java
    │   │                           ├── KudoHostMergeResult.java
    │   │                           ├── KudoSerializer.java
    │   │                           ├── KudoTable.java
    │   │                           ├── KudoTableHeader.java
    │   │                           ├── KudoTableHeaderCalc.java
    │   │                           ├── KudoTableMerger.java
    │   │                           ├── MergeMetrics.java
    │   │                           ├── MergeOptions.java
    │   │                           ├── MergedInfoCalc.java
    │   │                           ├── OpenByteArrayOutputStream.java
    │   │                           ├── OpenByteArrayOutputStreamWriter.java
    │   │                           ├── SliceInfo.java
    │   │                           ├── SlicedBufferSerializer.java
    │   │                           ├── SlicedValidityBufferInfo.java
    │   │                           ├── TableBuilder.java
    │   │                           ├── WriteInput.java
    │   │                           └── WriteMetrics.java
    │   │                       └── schema
    │   │                           ├── HostColumnsVisitor.java
    │   │                           ├── SchemaVisitor.java
    │   │                           ├── SimpleSchemaVisitor.java
    │   │                           └── Visitors.java
    └── test
    │   ├── cpp
    │       └── faultinj
    │       │   └── test_faultinj.json
    │   └── java
    │       └── com
    │           └── nvidia
    │               └── spark
    │                   └── rapids
    │                       └── jni
    │                           ├── BloomFilterTest.java
    │                           ├── CaseWhenTest.java
    │                           ├── CastStringsTest.java
    │                           ├── DateTimeUtilsTest.java
    │                           ├── DecimalUtilsTest.java
    │                           ├── FromJsonToRawMapTest.java
    │                           ├── GetJsonObjectTest.java
    │                           ├── GpuListSliceUtilsTest.java
    │                           ├── GpuSubstringIndexUtilsTest.java
    │                           ├── HashTest.java
    │                           ├── HilbertIndexTest.java
    │                           ├── HistogramTest.java
    │                           ├── HostTableTest.java
    │                           ├── InterleaveBitsTest.java
    │                           ├── LimitingOffHeapAllocForTests.java
    │                           ├── MapTest.java
    │                           ├── NumberConverterTest.java
    │                           ├── ParseURITest.java
    │                           ├── RegexRewriteUtilsTest.java
    │                           ├── RmmSparkMonteCarlo.java
    │                           ├── RmmSparkTest.java
    │                           ├── RowConversionTest.java
    │                           ├── TimeZoneTest.java
    │                           └── kudo
    │                               ├── KudoConcatValidityTest.java
    │                               ├── KudoGpuSerializerTest.java
    │                               └── KudoSerializerTest.java
└── thirdparty
    └── cudf-pins
        ├── add_dependency_pins.cmake
        ├── rapids-cmake.sha
        ├── setup.cmake
        └── versions.json


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | # Build-related
18 | /.github/  @NVIDIA/sparkrapids-cicd-codeowners
19 | /build/    @NVIDIA/sparkrapids-cicd-codeowners
20 | /ci/       @NVIDIA/sparkrapids-cicd-codeowners
21 | /patches/  @NVIDIA/sparkrapids-cicd-codeowners
22 | pom.xml    @NVIDIA/sparkrapids-cicd-codeowners
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a bug report to help us improve the RAPIDS Accelerator JNI for Apache Spark
 4 | title: "[BUG]"
 5 | labels: "? - Needs Triage, bug"
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **Steps/Code to reproduce bug**
14 | Please provide a list of steps or a code sample to reproduce the issue.
15 | Avoid posting private or sensitive data.
16 | 
17 | **Expected behavior**
18 | A clear and concise description of what you expected to happen.
19 | 
20 | **Environment details (please complete the following information)**
21 |  - Environment location: [Standalone, YARN, Kubernetes, Cloud(specify cloud provider)]
22 |  - Spark configuration settings related to the issue
23 | 
24 | **Additional context**
25 | Add any other context about the problem here.
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Documentation request
 3 | about: Report incorrect or needed documentation
 4 | title: "[DOC]"
 5 | labels: "? - Needs Triage, documentation"
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Report incorrect documentation
11 | 
12 | **Location of incorrect documentation**
13 | Provide links and line numbers if applicable.
14 | 
15 | **Describe the problems or issues found in the documentation**
16 | A clear and concise description of what you found to be incorrect.
17 | 
18 | **Steps taken to verify documentation is incorrect**
19 | List any steps you have taken:
20 | 
21 | **Suggested fix for documentation**
22 | Detail proposed changes to fix the documentation if you have any.
23 | 
24 | ---
25 | 
26 | ## Report needed documentation
27 | 
28 | **Report needed documentation**
29 | A clear and concise description of what documentation you believe it is needed and why.
30 | 
31 | **Describe the documentation you'd like**
32 | A clear and concise description of what you want to happen.
33 | 
34 | **Steps taken to search for needed documentation**
35 | List any steps you have taken:
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for the RAPIDS Accelerator JNI for Apache Spark
 4 | title: "[FEA]"
 5 | labels: "? - Needs Triage, feature request"
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I wish the RAPIDS Accelerator JNI for Apache Spark would [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context, code examples, or references to existing implementations about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/submit-question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Submit question
 3 | about: Ask a general question about RAPIDS Accelerator JNI for Apache Spark
 4 | title: "[QST]"
 5 | labels: "? - Needs Triage, question"
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What is your question?**
11 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | 
 3 | Thank you for contributing to RAPIDS Accelerator for Apache Spark!
 4 | 
 5 | Here are some guidelines to help the review process go smoothly.
 6 | 
 7 | 1. Please write a description in this text box of the changes that are being
 8 |    made.
 9 | 
10 | 2. Please ensure that you have written units tests for the changes made/features
11 |    added.
12 | 
13 | 3. If you are closing an issue please use one of the automatic closing words as
14 |    noted here: https://help.github.com/articles/closing-issues-using-keywords/
15 | 
16 | 4. If your pull request is not ready for review but you want to make use of the
17 |    continuous integration testing facilities please create a draft pull rqeuest
18 |    or prefix the pull request summary with `[WIP]`.
19 | 
20 | 5. If your pull request is ready to be reviewed without requiring additional
21 |    work on top of it then remove any `[WIP]` prefix in the summary and
22 |    restore it from draft status if necessary.
23 | 
24 | 6. Once all work has been done and review has taken place please do not add
25 |    features or make changes out of the scope of those requested by the reviewer
26 |    (doing this just add delays as already reviewed code ends up having to be
27 |    re-reviewed/it is hard to tell what is new etc!). Further, please avoid
28 |    rebasing your branch during the review process, as this causes the context
29 |    of any comments made by reviewers to be lost. If conflicts occur during
30 |    review then they should be resolved by merging into the branch used for
31 |    making the pull request.
32 | 
33 | Many thanks in advance for your cooperation!
34 | 
35 | -->
36 | 


--------------------------------------------------------------------------------
/.github/workflows/action-helper/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM python:alpine
16 | 
17 | WORKDIR /
18 | COPY python /python
19 | COPY entrypoint.sh .
20 | RUN chmod -R +x /python /entrypoint.sh
21 | # pin urllib3<2.0 for https://github.com/psf/requests/issues/6432
22 | RUN pip install requests "urllib3<2.0"
23 | 
24 | ENTRYPOINT ["/entrypoint.sh"]
25 | 


--------------------------------------------------------------------------------
/.github/workflows/action-helper/action.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # TODO: move this to an independent repo as a public Github Action
16 | name: 'action helper'
17 | description: 'helper for github-related operations'
18 | inputs:
19 |   operator:
20 |     required: true
21 |     description: 'specify operator, e.g. auto-merge'
22 | runs:
23 |   using: 'docker'
24 |   image: 'Dockerfile'
25 |   args:
26 |     - ${{ inputs.operator }}
27 | 


--------------------------------------------------------------------------------
/.github/workflows/action-helper/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -l
 2 | #
 3 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | set -e
19 | 
20 | if [[ $# -ne 1 ]]; then
21 |     echo "ERROR: invalid number of parameters, should be exact one"
22 |     exit 1
23 | fi
24 | 
25 | case $1 in
26 | 
27 |     auto-merge)
28 |         /python/auto-merge --delete_head=True
29 |         ;;
30 | 
31 |     *)
32 |         echo "ERROR: unknown parameter: $1"
33 |         ;;
34 | esac
35 | 


--------------------------------------------------------------------------------
/.github/workflows/action-helper/python/auto-merge:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2022, NVIDIA CORPORATION.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import sys
18 | from argparse import ArgumentParser
19 | 
20 | from utils import EnvDefault, PullRequest, strtobool
21 | 
22 | 
23 | def main():
24 |     parser = ArgumentParser(description="Automerge")
25 |     parser.add_argument("--owner", action=EnvDefault, env="OWNER",
26 |                         help="github token, will try use env OWNER if empty")
27 |     parser.add_argument("--repo", action=EnvDefault, env="REPO",
28 |                         help="repo name, will try use env REPO if empty")
29 |     parser.add_argument("--head", action=EnvDefault, env="HEAD",
30 |                         help="HEAD ref, will try use env HEAD if empty")
31 |     parser.add_argument("--base", action=EnvDefault, env="BASE",
32 |                         help="Base ref, will try use env BASE if empty")
33 |     parser.add_argument("--token", action=EnvDefault, env="TOKEN",
34 |                         help="github token, will try use env TOKEN if empty")
35 |     parser.add_argument("--delete_head", default=False, type=lambda x: bool(strtobool(x)),
36 |                         help="if delete HEAD branch after auto-merge")
37 |     args = parser.parse_args()
38 | 
39 |     pr = PullRequest(head_owner=args.owner, head=args.head, head_token=args.token,
40 |                      base_owner=args.owner, repo=args.repo, base=args.base, base_token=args.token)
41 |     try:
42 |         if exist := pr.get_open():
43 |             number = exist[0].get('number')
44 |             sha = exist[0].get('head').get('sha')
45 |         else:
46 |             params = {
47 |                 # head share the same owner/repo with base in auto-merge
48 |                 'title': f"[auto-merge] {pr.head} to {pr.base} [skip ci] [bot]",
49 |                 'head': f"{pr.head_owner}:{pr.head}",
50 |                 'base': pr.base,
51 |                 'body': f"auto-merge triggered by github actions on `{pr.head}` to "
52 |                         f"create a PR keeping `{pr.base}` up-to-date. "
53 |                         "If this PR is unable to be merged due to conflicts, "
54 |                         "it will remain open until manually fix.",
55 |                 'maintainer_can_modify': True
56 |             }
57 |             number, sha, term = pr.create(params)
58 |             if term:
59 |                 sys.exit(0)
60 |         pr.auto_merge(number, sha)
61 |         if args.delete_head:
62 |             pr.delete_head()
63 |     except Exception as e:
64 |         print(e)
65 |         sys.exit(1)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/.github/workflows/action-helper/python/cleanup-bot-branch:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2022, NVIDIA CORPORATION.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import sys
18 | from argparse import ArgumentParser
19 | 
20 | from utils import EnvDefault, PullRequest
21 | 
22 | 
23 | def main():
24 |     parser = ArgumentParser(description="Cleanup bot branch")
25 |     parser.add_argument("--owner", action=EnvDefault, env="OWNER",
26 |                         help="github token, will try use env OWNER if empty")
27 |     parser.add_argument("--repo", action=EnvDefault, env="REPO",
28 |                         help="repo name, will try use env REPO if empty")
29 |     parser.add_argument("--head", action=EnvDefault, env="HEAD",
30 |                         help="HEAD ref, will try use env HEAD if empty")
31 |     parser.add_argument("--base", action=EnvDefault, env="BASE",
32 |                         help="Base ref, will try use env BASE if empty")
33 |     parser.add_argument("--token", action=EnvDefault, env="TOKEN",
34 |                         help="github token, will try use env TOKEN if empty")
35 |     args = parser.parse_args()
36 | 
37 |     try:
38 |         if not args.head.startswith('bot-'):
39 |             raise Exception(f"Cannot delete {args.head}, the script is only allowed to delete branch w/ bot-* prefix")
40 | 
41 |         pr = PullRequest(head_owner=args.owner, head=args.head, head_token=args.token,
42 |                          base_owner=args.owner, repo=args.repo, base=args.base, base_token=args.token)
43 |         if exist := pr.get_open():
44 |             number = exist[0].get('number')
45 |             raise Exception(f"Cannot delete {pr.head}, pull request #{number} is still open")
46 |         else:
47 |             pr.delete_head()
48 |     except Exception as e:
49 |         print(e)
50 |         sys.exit(1)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     main()
55 | 


--------------------------------------------------------------------------------
/.github/workflows/add-to-project.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022-2025, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | name: Add new issues and pull requests to project
16 | 
17 | on:
18 |   issues:
19 |     types:
20 |       - opened
21 |   pull_request_target:
22 |     types:
23 |       - opened
24 | 
25 | jobs:
26 |   Add-to-project:
27 |     if: github.repository_owner == 'NVIDIA' # avoid adding issues from forks
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |       - name: add-to-project
31 |         uses: NVIDIA/spark-rapids-common/add-to-project@main
32 |         with:
33 |           token: ${{ secrets.PROJECT_TOKEN }}
34 | 


--------------------------------------------------------------------------------
/.github/workflows/clang-format.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # A workflow to check clang format
16 | name: clang format check
17 | 
18 | on:
19 |   pull_request:
20 |     types: [opened, synchronize, reopened]
21 | 
22 | jobs:
23 |   pre-commit:
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |     - uses: actions/checkout@v4
27 |       with:
28 |         submodules: true
29 |     - uses: actions/setup-python@v5
30 |       with:
31 |         python-version: "3.10"
32 |     - uses: pre-commit/action@v3.0.0
33 |       with:
34 |         extra_args: clang-format --all-files


--------------------------------------------------------------------------------
/.github/workflows/license-header-check.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # A workflow to check copyright/license header
16 | name: license header check
17 | 
18 | on:
19 |   pull_request:
20 |     types: [opened, synchronize, reopened]
21 | 
22 | jobs:
23 |   license-header-check:
24 |     runs-on: ubuntu-latest
25 |     if: "!contains(github.event.pull_request.title, '[bot]')"
26 |     steps:
27 |       - name: Get checkout depth
28 |         run: |
29 |           echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV
30 | 
31 |       - name: Checkout code
32 |         uses: actions/checkout@v4
33 |         with:
34 |           fetch-depth: ${{ env.PR_FETCH_DEPTH }}
35 | 
36 |       - name: license-header-check
37 |         uses: NVIDIA/spark-rapids-common/license-header-check@main
38 |         with:
39 |           included_file_patterns: |
40 |             *.cpp,
41 |             *.hpp,
42 |             *.cu,
43 |             *.cuh,
44 |             *.java,
45 |             *.sh,
46 |             *Dockerfile*,
47 |             *Jenkinsfile*,
48 |             *.yml,
49 |             *.yaml,
50 |             *.txt,
51 |             *.xml,
52 |             *.fbs,
53 |             build/*
54 |           excluded_file_patterns: |
55 |             thirdparty/*
56 | 


--------------------------------------------------------------------------------
/.github/workflows/signoff-check.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022-2024, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # A workflow to check if PR got sign-off
16 | name: signoff check
17 | 
18 | on:
19 |   pull_request_target:
20 |     types: [opened, synchronize, reopened]
21 | 
22 | jobs:
23 |   signoff-check:
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - name: signoff
27 |         uses: NVIDIA/spark-rapids-common/signoff-check@main
28 |         with:
29 |           owner: ${{ github.repository_owner }}
30 |           repo: spark-rapids-jni
31 |           pull_number: ${{ github.event.number }}
32 |           token: ${{ secrets.GITHUB_TOKEN }}
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ## Common
 2 | __pycache__
 3 | *.py[cod]
 4 | *$py.class
 5 | *.a
 6 | *.o
 7 | *.so
 8 | *.dylib
 9 | .cache
10 | .vscode
11 | *.swp
12 | *.pytest_cache
13 | DartConfiguration.tcl
14 | .DS_Store
15 | *.manifest
16 | *.spec
17 | .nfs*
18 | .clangd
19 | *#*#
20 | *.#*
21 | *.pyc
22 | *.pyo
23 | *.swp
24 | *~
25 | 
26 | ## Patching
27 | *.diff
28 | *.orig
29 | *.rej
30 | 
31 | ## Eclipse IDE
32 | .project
33 | .cproject
34 | .settings
35 | 
36 | ## IntelliJ IDE
37 | .idea/
38 | .idea_modules/
39 | *.iml
40 | *.ipr
41 | *.iws
42 | 
43 | #Java
44 | dependency-reduced-pom.xml
45 | hs_err*.log
46 | target/
47 | 
48 | ## VSCode IDE
49 | .vscode
50 | 
51 | #Generated files
52 | cufile.log
53 | rmm_log.txt
54 | sanitizer_for_pid_*.log
55 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "thirdparty/cudf"]
2 | 	path = thirdparty/cudf
3 | 	url = https://github.com/rapidsai/cudf.git
4 | 	branch = branch-25.06
5 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022, NVIDIA CORPORATION.
 2 | 
 3 | repos:
 4 |       - repo: https://github.com/pre-commit/mirrors-clang-format
 5 |         rev: v16.0.1
 6 |         hooks:
 7 |               - id: clang-format
 8 |                 files: \.(cu|cuh|h|hpp|cpp|inl)$
 9 |                 types_or: [file]
10 |                 args: ['-fallback-style=none', '-style=file:thirdparty/cudf/.clang-format']
11 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | RAPIDS Accelerator JNI For Apache Spark
 2 | Copyright (c) 2022-2024, NVIDIA CORPORATION
 3 | 
 4 | --------------------------------------------------------------------------------
 5 | 
 6 | This project includes code from ryu (https://github.com/ulfjack/ryu).
 7 | 
 8 | Copyright (2018) Ulf Adams and contributors.
 9 | 
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 | 
14 | http://www.apache.org/licenses/LICENSE-2.0
15 | 
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
21 | 
22 | --------------------------------------------------------------------------------
23 | 
24 | This project includes code from flatbuffers (https://github.com/google/flatbuffers).
25 | 
26 | Copyright 2021 Google Inc. All rights reserved.
27 | 
28 | Licensed under the Apache License, Version 2.0 (the "License");
29 | you may not use this file except in compliance with the License.
30 | You may obtain a copy of the License at
31 | 
32 |      http://www.apache.org/licenses/LICENSE-2.0
33 | 
34 | Unless required by applicable law or agreed to in writing, software
35 | distributed under the License is distributed on an "AS IS" BASIS,
36 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 | See the License for the specific language governing permissions and
38 | limitations under the License.
39 | 
40 | --------------------------------------------------------------------------------
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RAPIDS Accelerator JNI For Apache Spark
 2 | 
 3 | This repository contains native support code for the
 4 | [RAPIDS Accelerator for Apache Spark](https://github.com/NVIDIA/spark-rapids).
 5 | 
 6 | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/NVIDIA/spark-rapids-jni)
 7 | 
 8 | ## Building From Source
 9 | 
10 | See the [build instructions in the contributing guide](CONTRIBUTING.md#building-from-source).
11 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | ## Security
 2 | 
 3 | NVIDIA is dedicated to the security and trust of our software products and services, including all
 4 | source code repositories managed through our organization. 
 5 | 
 6 | If you need to report a security issue, please use the appropriate contact points outlined
 7 | below. **Please do not report security vulnerabilities through GitHub/GitLab.** 
 8 | 
 9 | ## Reporting Potential Security Vulnerability in an NVIDIA Product
10 | 
11 | To report a potential security vulnerability in any NVIDIA product:
12 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
13 | - E-Mail: psirt@nvidia.com
14 |    - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
15 |    - Please include the following information:
16 |       - Product/Driver name and version/branch that contains the vulnerability
17 | 


--------------------------------------------------------------------------------
/build/apply-patches:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | set -e
20 | 
21 | BASE_DIR=$( git rev-parse --show-toplevel )
22 | 
23 | PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}
24 | 
25 | CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}
26 | 
27 | # Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
28 | # making sure that a user can do development work in spark-rapids-jni without the patches
29 | # getting in the way
30 | # The operations I really want to support no matter what state CUDF is in are
31 | # 1) Build the repo from scratch
32 | # 2) Rebuild the repo without having to clean and start over
33 | # 3) upmerge to a new version of the plugin including updating the cudf submodule
34 | #
35 | # Building from scratch is simple. We want clean to unapply any patches and 
36 | # build to apply them. But if we want to rebuild without a clean we need to know what
37 | # state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
38 | # is to save some state files about what happened. But a user could mess with CUDF directly
39 | # so we want to have ways to double check that they are indeed correct.
40 | 
41 | FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"
42 | 
43 | pushd "$CUDF_DIR"
44 | 
45 | PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)
46 | 
47 | if [ -z "$PATCH_FILES" ] ; then
48 |   echo "No patches to apply"
49 |   exit 0
50 | fi
51 | 
52 | CHANGED_FILES=$(git status --porcelain --untracked-files=no)
53 | 
54 | if [ \( -s "$FULLY_PATCHED_FILE" \) -a  \( -n "$CHANGED_FILES" \) ] ; then 
55 |   if git apply -R --check "$FULLY_PATCHED_FILE" ; then
56 |     echo "Patches appear to have been applied already"
57 |     exit 0
58 |   fi
59 | fi
60 | 
61 | if [ -n "$CHANGED_FILES" ] ; then
62 |   echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches"
63 |   echo "$CHANGED_FILE"
64 |   exit 1
65 | fi
66 | 
67 | find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do
68 |   echo "patching with: $file"
69 |   git apply -v "$file"
70 | done
71 | 
72 | git diff > "$FULLY_PATCHED_FILE"
73 | 
74 | popd
75 | 


--------------------------------------------------------------------------------
/build/build-in-docker:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # Build the spark-rapids-jni artifact in a Docker container with devtoolset
20 | 
21 | set -e
22 | 
23 | # Base paths relative to this script's location
24 | SCRIPTDIR=$(cd $(dirname $0); pwd)
25 | 
26 | LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
27 | USE_GDS=${USE_GDS:-ON}
28 | export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"}
29 | # Make CUDA_VERSION consistent with the file run-in-docker
30 | export CUDA_VERSION=${CUDA_VERSION:-11.8.0}
31 | CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*}
32 | BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON}
33 | 
34 | if (( $# == 0 )); then
35 |   echo "Usage: $0 <Maven build arguments>"
36 |   exit 1
37 | fi
38 | 
39 | # Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...]
40 | if [ "$(uname -m)" == "aarch64" ]; then
41 |   USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit.
42 |   BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit.
43 | fi
44 | 
45 | $SCRIPTDIR/run-in-docker mvn \
46 |     -Dmaven.repo.local=$LOCAL_MAVEN_REPO \
47 |     -DUSE_GDS=$USE_GDS \
48 |     -DBUILD_FAULTINJ=${BUILD_FAULTINJ} \
49 |     -Dcuda.version=$CUDA_CLASSIFIER \
50 |     "$@"
51 | 


--------------------------------------------------------------------------------
/build/build-info:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #
 4 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # This script generates the build info.
20 | # Arguments:
21 | #   version  - The current version of the project
22 | #   git_path - The path to the repository
23 | #   libcudf_path - The path to the libcudf library
24 | set -e
25 | set -o pipefail
26 | 
27 | echo_build_properties() {
28 |   version=$1
29 |   git_path=$2
30 |   libcudf_path=$3
31 |   shift 3
32 |   echo version=$version
33 |   echo user=$(whoami)
34 |   echo revision=$(cd "$git_path" && git rev-parse HEAD)
35 |   echo branch=$(cd "$git_path" && git rev-parse --abbrev-ref HEAD)
36 |   echo date=$(date -u +%Y-%m-%dT%H:%M:%SZ)
37 |   echo url=$(cd "$git_path" && git config --get remote.origin.url)
38 |   gpu_architectures=$(
39 |     cuobjdump "$libcudf_path" | gawk '
40 |       match($0, /arch = sm_([0-9]+)/, tmp) {
41 |         arch[tmp[1]] = 1
42 |       }
43 | 
44 |       END {
45 |         n = asorti(arch)
46 |         if (n == 0) {
47 |           print "ERROR: No fatbin ELF / PTX code sections found" > "/dev/stderr"
48 |           exit(1)
49 |         }
50 |         s = arch[1]
51 |         for (i = 2; i <= n; i++) {
52 |           s = s ";" arch[i]
53 |         }
54 |         print s
55 |       }
56 |     '
57 |   )
58 |   echo "gpu_architectures=$gpu_architectures"
59 |   for arg in "$@"; do
60 |     echo $arg
61 |   done
62 | }
63 | 
64 | echo_build_properties "$@"
65 | 


--------------------------------------------------------------------------------
/build/sanitizer-java/bin/java:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # This special Java executable is specified to the "jvm" configuration of the
19 | # the surefire plugin to intercept forking the processes for tests. Then
20 | # the tests will run with the compute-sanitizer tool.
21 | exec compute-sanitizer --tool memcheck \
22 |     --launch-timeout 600 \
23 |     --error-exitcode -2 \
24 |     --log-file "./sanitizer_for_pid_%p.log" \
25 |     java "$@"
26 | 


--------------------------------------------------------------------------------
/build/submodule-check:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | set -e
20 | 
21 | outdated_modules=$(git submodule status | grep -v "^ " || true)
22 | if [[ -n $outdated_modules ]]; then
23 |   echo >&2 "ERROR: submodules out of date: $outdated_modules. To fix: git submodule update --init --recursive"
24 |   exit 1
25 | fi
26 | 


--------------------------------------------------------------------------------
/build/unapply-patches:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | set -e
20 | 
21 | BASE_DIR=$( git rev-parse --show-toplevel )
22 | 
23 | PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}
24 | 
25 | CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}
26 | 
27 | # Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
28 | # making sure that a user can do development work in spark-rapids-jni without the patches
29 | # getting in the way
30 | # The operations I really want to support no matter what state CUDF is in are
31 | # 1) Build the repo from scratch
32 | # 2) Rebuild the repo without having to clean and start over
33 | # 3) upmerge to a new version of the plugin including updating the cudf submodule
34 | #
35 | # Building from scratch is simple. We want clean to unapply any patches and 
36 | # build to apply them. But if we want to rebuild without a clean we need to know what
37 | # state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
38 | # is to save some state files about what happened. But a user could mess with CUDF directly
39 | # so we want to have ways to double check that they are indeed correct.
40 | 
41 | FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"
42 | 
43 | pushd "$CUDF_DIR"
44 | 
45 | PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)
46 | 
47 | if [ -z "$PATCH_FILES" ] ; then
48 |   echo "No patches to remove"
49 |   exit 0
50 | fi
51 | 
52 | CHANGED_FILES=$(git status --porcelain --untracked-files=no)
53 | 
54 | if [ \( -s "$FULLY_PATCHED_FILE" \) -a  \( -n "$CHANGED_FILES" \) ] ; then
55 |   if git apply --check -R "$FULLY_PATCHED_FILE"; then
56 |     echo "Patches appear to have been applied, so going to remove them"
57 |     git apply -R -v "$FULLY_PATCHED_FILE"
58 |     rm -f "$FULLY_PATCHED_FILE"
59 | 
60 |     # Check for modifications, again
61 |     if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
62 |       echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
63 |       git status --porcelain --untracked-files=no
64 |       exit 1
65 |     fi
66 | 
67 |     exit 0
68 |   else
69 |     echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE"
70 |     exit 1
71 |   fi
72 | fi
73 | 
74 | if [ -n "$CHANGED_FILES" ] ; then
75 |   echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again."
76 |   echo "$CHANGED_FILE"
77 |   exit 1
78 | else
79 |   echo "No changes in CUDF repository to remove"
80 | fi
81 | 
82 | popd
83 | 


--------------------------------------------------------------------------------
/build/win/create-wsl2.ps1:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | wsl -d Ubuntu build/win/setup-wsl2.sh
18 | 


--------------------------------------------------------------------------------
/build/win/setup-wsl2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | # Last tested: 
20 | # Edition	Windows 10 Enterprise
21 | # Version	21H2
22 | # OS build	19044.1645
23 | # Experience	Windows Feature Experience Pack 120.2212.4170.0
24 | # NVIDIA Display Driver 473.47
25 | 
26 | # add WSL2 user to passwordless sudoers if desired
27 | # sudo visudo /etc/sudoers.d/wsl2-sudo
28 | 
29 | # Docker 
30 | sudo apt-get -y install apt-transport-https ca-certificates curl software-properties-common
31 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
32 | echo \
33 |   "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
34 |   $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
35 | sudo apt-get update
36 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-compose-plugin
37 | sudo usermod -a -G docker $USER
38 | sudo service docker start
39 | docker run hello-world
40 | 
41 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
42 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
43 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
44 | sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
45 | sudo service docker restart
46 | 
47 | 
48 | # CUDA
49 | # Initial instructions
50 | # https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_network
51 | distroArch="wsl-ubuntu/x86_64"
52 | wget https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/cuda-wsl-ubuntu.pin
53 | sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
54 | # Instructions for fetching keys modified per
55 | # https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772#install-new-cuda-keyring-package-3
56 | sudo apt-key del 7fa2af80
57 | wget https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/cuda-keyring_1.0-1_all.deb
58 | sudo dpkg -i cuda-keyring_1.0-1_all.deb
59 | sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${distroArch}/ /"
60 | sudo apt-get update
61 | sudo apt-get -y install cuda-toolkit-11-7


--------------------------------------------------------------------------------
/ci/check-cuda-dependencies.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # common script to help check if packaged *.so files have dynamical link to CUDA Runtime
19 | 
20 | set -exo pipefail
21 | 
22 | jar_path=$1
23 | tmp_path=/tmp/"jni-$(date "+%Y%m%d%H%M%S")"
24 | unzip -j "${jar_path}" "*64/Linux/*.so" -d "${tmp_path}"
25 | 
26 | find "$tmp_path" -type f -name "*.so" | while read -r so_file; do
27 |     # Check if *.so file has a dynamic link to CUDA Runtime
28 |     if objdump -p "$so_file" | grep NEEDED | grep -qi cudart; then
29 |         echo "Dynamic link to CUDA Runtime found in $so_file..."
30 |         ldd "$so_file"
31 |         exit 1
32 |     else
33 |         echo "No dynamic link to CUDA Runtime found in $so_file"
34 |     fi
35 | done
36 | 


--------------------------------------------------------------------------------
/ci/fuzz-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # NOTE:
19 | #     run fuzz test after build
20 | #     required jars: jni, jni-tests, slf4j-api
21 | 
22 | set -ex
23 | 
24 | WORKSPACE=${WORKSPACE:-$PWD}
25 | M2DIR=${M2DIR:-"$HOME/.m2"}
26 | 
27 | SLF4J_VER=$(mvn help:evaluate -Dexpression=slf4j.version -q -DforceStdout)
28 | CLASSPATH=${CLASSPATH:-"$WORKSPACE/target/*:$M2DIR/repository/org/slf4j/slf4j-api/$SLF4J_VER/slf4j-api-$SLF4J_VER.jar"}
29 | 
30 | java -cp "$CLASSPATH" \
31 |   com.nvidia.spark.rapids.jni.RmmSparkMonteCarlo \
32 |   --taskMaxMiB=2048 --gpuMiB=3072 --skewed --allocMode=ASYNC
33 | 


--------------------------------------------------------------------------------
/ci/nightly-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | set -ex
19 | 
20 | nvidia-smi
21 | 
22 | git submodule update --init --recursive
23 | 
24 | MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
25 | # cuda11 or cuda12
26 | CUDA_VER=${CUDA_VER:-cuda`nvcc --version | sed -n 's/^.*release \([0-9]\+\)\..*$/\1/p'`}
27 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
28 | USE_GDS=${USE_GDS:-ON}
29 | USE_SANITIZER=${USE_SANITIZER:-ON}
30 | BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON}
31 | ARM64=${ARM64:-false}
32 | artifact_suffix="${CUDA_VER}"
33 | 
34 | profiles="source-javadoc"
35 | if [ "${ARM64}" == "true" ]; then
36 |   profiles="${profiles},arm64"
37 |   USE_GDS="OFF"
38 |   USE_SANITIZER="ON"
39 |   BUILD_FAULTINJ="OFF"
40 |   artifact_suffix="${artifact_suffix}-arm64"
41 | fi
42 | 
43 | ${MVN} clean package ${MVN_MIRROR}  \
44 |   -P${profiles} \
45 |   -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
46 |   -Dlibcudf.build.configure=true \
47 |   -DUSE_GDS=${USE_GDS} -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
48 |   -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \
49 |   -DUSE_SANITIZER=${USE_SANITIZER}
50 | 
51 | build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout)
52 | . ci/check-cuda-dependencies.sh "target/${build_name}-${artifact_suffix}.jar"
53 | 


--------------------------------------------------------------------------------
/ci/premerge-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | set -ex
19 | 
20 | nvidia-smi
21 | 
22 | git submodule update --init --recursive
23 | 
24 | MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
25 | PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
26 | ${MVN} verify ${MVN_MIRROR} \
27 |   -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
28 |   -Dlibcudf.build.configure=true \
29 |   -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
30 |   -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON
31 | 
32 | build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout)
33 | cuda_version=$(${MVN} help:evaluate -Dexpression=cuda.version -q -DforceStdout)
34 | . ci/check-cuda-dependencies.sh "target/${build_name}-${cuda_version}.jar"
35 | 


--------------------------------------------------------------------------------
/docs/img/memory_state_machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/spark-rapids-jni/d3aca7a5a423fcbaf0834baf5e24f091f05eee6c/docs/img/memory_state_machine.png


--------------------------------------------------------------------------------
/patches/noop.patch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/spark-rapids-jni/d3aca7a5a423fcbaf0834baf5e24f091f05eee6c/patches/noop.patch


--------------------------------------------------------------------------------
/src/main/cpp/benchmarks/bloom_filter.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmarks/common/generate_input.hpp>
18 | 
19 | #include <cudf_test/column_utilities.hpp>
20 | 
21 | #include <bloom_filter.hpp>
22 | #include <hash.hpp>
23 | #include <nvbench/nvbench.cuh>
24 | 
25 | static void bloom_filter_put(nvbench::state& state)
26 | {
27 |   constexpr int num_rows   = 150'000'000;
28 |   constexpr int num_hashes = 3;
29 | 
30 |   // create the bloom filter
31 |   cudf::size_type const bloom_filter_bytes = state.get_int64("bloom_filter_bytes");
32 |   cudf::size_type const bloom_filter_longs = bloom_filter_bytes / sizeof(int64_t);
33 |   auto bloom_filter = spark_rapids_jni::bloom_filter_create(num_hashes, bloom_filter_longs);
34 | 
35 |   // create a column of hashed values
36 |   data_profile_builder builder;
37 |   builder.no_validity();
38 |   auto const src   = create_random_table({{cudf::type_id::INT64}}, row_count{num_rows}, builder);
39 |   auto const input = spark_rapids_jni::xxhash64(*src);
40 | 
41 |   auto const stream = cudf::get_default_stream();
42 |   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
43 |   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
44 |              [&](nvbench::launch& launch, auto& timer) {
45 |                timer.start();
46 |                spark_rapids_jni::bloom_filter_put(*bloom_filter, *input);
47 |                stream.synchronize();
48 |                timer.stop();
49 |              });
50 | 
51 |   size_t const bytes_read    = num_rows * sizeof(int64_t);
52 |   size_t const bytes_written = num_rows * sizeof(cudf::bitmask_type) * num_hashes;
53 |   auto const time            = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
54 |   state.add_element_count(std::size_t{num_rows}, "Rows Inserted");
55 |   state.add_global_memory_reads(bytes_read, "Bytes read");
56 |   state.add_global_memory_writes(bytes_written, "Bytes written");
57 |   state.add_element_count(static_cast<double>(bytes_written) / time, "Write bytes/sec");
58 | }
59 | 
60 | NVBENCH_BENCH(bloom_filter_put)
61 |   .set_name("Bloom Filter Put")
62 |   .add_int64_axis("bloom_filter_bytes",
63 |                   {512 * 1024, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024});
64 | 


--------------------------------------------------------------------------------
/src/main/cpp/benchmarks/cast_long_to_binary_string.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmarks/common/generate_input.hpp>
18 | 
19 | #include <cudf_test/column_utilities.hpp>
20 | 
21 | #include <cudf/io/types.hpp>
22 | 
23 | #include <cast_string.hpp>
24 | #include <nvbench/nvbench.cuh>
25 | 
26 | static void long_to_binary_string(nvbench::state& state)
27 | {
28 |   auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
29 | 
30 |   auto const input_table = create_random_table({cudf::type_id::INT64}, row_count{num_rows});
31 |   auto const long_col    = input_table->get_column(0);
32 |   auto const stream      = cudf::get_default_stream();
33 |   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
34 |   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
35 |     spark_rapids_jni::long_to_binary_string(long_col, stream);
36 |   });
37 | }
38 | 
39 | NVBENCH_BENCH(long_to_binary_string)
40 |   .set_name("Long to Binary String Cast")
41 |   .add_int64_axis("num_rows", {100'000, 500'000, 1'000'000, 5'000'000, 10'000'000});
42 | 


--------------------------------------------------------------------------------
/src/main/cpp/benchmarks/cast_string_to_float.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <benchmarks/common/generate_input.hpp>
18 | 
19 | #include <cudf_test/column_utilities.hpp>
20 | 
21 | #include <cudf/strings/convert/convert_floats.hpp>
22 | #include <cudf/strings/strings_column_view.hpp>
23 | 
24 | #include <cast_string.hpp>
25 | #include <nvbench/nvbench.cuh>
26 | 
27 | void string_to_float(nvbench::state& state)
28 | {
29 |   cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
30 |   auto const float_tbl  = create_random_table({cudf::type_id::FLOAT32}, row_count{n_rows});
31 |   auto const float_col  = float_tbl->get_column(0);
32 |   auto const string_col = cudf::strings::from_floats(float_col.view());
33 | 
34 |   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
35 |     auto rows = spark_rapids_jni::string_to_float(cudf::data_type{cudf::type_id::FLOAT32},
36 |                                                   string_col->view(),
37 |                                                   false,
38 |                                                   cudf::get_default_stream());
39 |   });
40 | }
41 | 
42 | NVBENCH_BENCH(string_to_float)
43 |   .set_name("Strings to Float Cast")
44 |   .add_int64_axis("num_rows", {1 * 1024 * 1024, 100 * 1024 * 1024});
45 | 


--------------------------------------------------------------------------------
/src/main/cpp/cmake/get_flatbuffers.cmake:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 5 | # in compliance with the License. You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 | 
15 | # Use CPM to find or clone flatbuffers
16 | function(find_and_configure_flatbuffers VERSION)
17 | 
18 |   rapids_cpm_find(
19 |     flatbuffers ${VERSION}
20 |     GLOBAL_TARGETS flatbuffers
21 |     CPM_ARGS
22 |     GIT_REPOSITORY https://github.com/google/flatbuffers.git
23 |     GIT_TAG v${VERSION}
24 |     GIT_SHALLOW TRUE
25 |   )
26 | 
27 |   include("${rapids-cmake-dir}/export/find_package_root.cmake")
28 |   rapids_export_find_package_root(
29 |     BUILD flatbuffers "${flatbuffers_BINARY_DIR}" EXPORT_SET profilerjni-exports
30 |   )
31 | 
32 | endfunction()
33 | 
34 | find_and_configure_flatbuffers(24.3.25)
35 | 


--------------------------------------------------------------------------------
/src/main/cpp/cmake/get_spdlog.cmake:
--------------------------------------------------------------------------------
 1 | # =============================================================================
 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 5 | # in compliance with the License. You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License
10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | # or implied. See the License for the specific language governing permissions and limitations under
12 | # the License.
13 | # =============================================================================
14 | 
15 | # Use CPM to find or clone speedlog
16 | function(find_and_configure_spdlog)
17 | 
18 |   set(CPM_DOWNLOAD_spdlog ON)
19 |   include(${rapids-cmake-dir}/cpm/spdlog.cmake)
20 |   rapids_cpm_spdlog(
21 |     FMT_OPTION "EXTERNAL_FMT_HO"
22 |   )
23 |   set_target_properties(spdlog PROPERTIES POSITION_INDEPENDENT_CODE ON)
24 | 
25 | endfunction()
26 | 
27 | find_and_configure_spdlog()
28 | 


--------------------------------------------------------------------------------
/src/main/cpp/faultinj/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #=============================================================================
 2 | # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #=============================================================================
16 | 
17 | project(
18 |   CUDA_FAULT_INJECTION
19 |   VERSION 22.08.00
20 |   LANGUAGES C CXX CUDA
21 | )
22 | 
23 | set(CMAKE_CXX_STANDARD 17)
24 | 
25 | add_library(
26 |   cufaultinj SHARED
27 |   faultinj.cu
28 | )
29 | 
30 | target_link_libraries(
31 |   cufaultinj PRIVATE spdlog::spdlog_header_only
32 | )
33 | 
34 | target_link_libraries(
35 |   cufaultinj PRIVATE CUDA::cupti_static
36 | )
37 | 


--------------------------------------------------------------------------------
/src/main/cpp/profiler/profiler_debug.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cupti.h>
20 | 
21 | #include <cstdint>
22 | #include <string>
23 | 
24 | namespace spark_rapids_jni::profiler {
25 | 
26 | std::string activity_kind_to_string(CUpti_ActivityKind kind);
27 | 
28 | void print_cupti_buffer(uint8_t* buffer, size_t valid_size);
29 | 
30 | }  // namespace spark_rapids_jni::profiler
31 | 


--------------------------------------------------------------------------------
/src/main/cpp/profiler/profiler_schema.cpp.in:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | namespace spark_rapids_jni::profiler {
18 | char const* Profiler_Schema = R"raw(@SPARK_RAPIDS_JNI_PROFILER_SCHEMA@)raw";
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/cpp/profiler/profiler_serializer.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include "profiler_generated.h"
20 | 
21 | #include <cupti.h>
22 | #include <flatbuffers/flatbuffers.h>
23 | #include <jni.h>
24 | 
25 | #include <cstdint>
26 | #include <vector>
27 | 
28 | namespace spark_rapids_jni::profiler {
29 | 
30 | // Serializes profile data as flatbuffers
31 | struct profiler_serializer {
32 |   profiler_serializer(
33 |     JNIEnv* env, jobject writer, size_t buffer_size, size_t flush_threshold, bool capture_allocs);
34 |   void process_cupti_buffer(uint8_t* buffer, size_t valid_size);
35 |   void flush();
36 | 
37 |  private:
38 |   void write_profile_header();
39 |   void process_api_activity(CUpti_ActivityAPI const*);
40 |   void process_device_activity(CUpti_ActivityDevice4 const*);
41 |   void process_dropped_records(size_t num_dropped);
42 |   void process_marker_activity(CUpti_ActivityMarker2 const*);
43 |   void process_marker_data(CUpti_ActivityMarkerData const*);
44 |   void process_memcpy(CUpti_ActivityMemcpy5 const*);
45 |   void process_memset(CUpti_ActivityMemset4 const*);
46 |   void process_kernel(CUpti_ActivityKernel8 const*);
47 |   void process_overhead(CUpti_ActivityOverhead const*);
48 |   void report_num_dropped_records();
49 |   void write_current_fb();
50 | 
51 |   JNIEnv* env_;
52 |   jmethodID j_write_method_;
53 |   jobject j_writer_;
54 |   size_t flush_threshold_;
55 |   bool capture_allocs_;
56 |   flatbuffers::FlatBufferBuilder fbb_;
57 |   std::vector<flatbuffers::Offset<ApiActivity>> api_offsets_;
58 |   std::vector<flatbuffers::Offset<DeviceActivity>> device_offsets_;
59 |   std::vector<flatbuffers::Offset<DroppedRecords>> dropped_offsets_;
60 |   std::vector<flatbuffers::Offset<KernelActivity>> kernel_offsets_;
61 |   std::vector<flatbuffers::Offset<MarkerActivity>> marker_offsets_;
62 |   std::vector<flatbuffers::Offset<MarkerData>> marker_data_offsets_;
63 |   std::vector<flatbuffers::Offset<MemcpyActivity>> memcpy_offsets_;
64 |   std::vector<flatbuffers::Offset<MemsetActivity>> memset_offsets_;
65 |   std::vector<flatbuffers::Offset<OverheadActivity>> overhead_offsets_;
66 | };
67 | 
68 | }  // namespace spark_rapids_jni::profiler
69 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/CaseWhenJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "case_when.hpp"
18 | #include "cudf_jni_apis.hpp"
19 | 
20 | extern "C" {
21 | 
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CaseWhen_selectFirstTrueIndex(
23 |   JNIEnv* env, jclass, jlongArray bool_cols)
24 | {
25 |   JNI_NULL_CHECK(env, bool_cols, "array of column handles is null", 0);
26 |   try {
27 |     cudf::jni::auto_set_device(env);
28 |     cudf::jni::native_jpointerArray<cudf::column_view> n_cudf_bool_columns(env, bool_cols);
29 |     auto bool_column_views = n_cudf_bool_columns.get_dereferenced();
30 |     return cudf::jni::release_as_jlong(
31 |       spark_rapids_jni::select_first_true_index(cudf::table_view(bool_column_views)));
32 |   }
33 |   CATCH_STD(env, 0);
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/DateTimeUtilsJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "datetime_utils.hpp"
19 | 
20 | extern "C" {
21 | 
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_rebaseGregorianToJulian(
23 |   JNIEnv* env, jclass, jlong input)
24 | {
25 |   JNI_NULL_CHECK(env, input, "input column is null", 0);
26 | 
27 |   try {
28 |     cudf::jni::auto_set_device(env);
29 |     auto const input_cv = reinterpret_cast<cudf::column_view const*>(input);
30 |     auto output         = spark_rapids_jni::rebase_gregorian_to_julian(*input_cv);
31 |     return reinterpret_cast<jlong>(output.release());
32 |   }
33 |   CATCH_STD(env, 0);
34 | }
35 | 
36 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_rebaseJulianToGregorian(
37 |   JNIEnv* env, jclass, jlong input)
38 | {
39 |   JNI_NULL_CHECK(env, input, "input column is null", 0);
40 | 
41 |   try {
42 |     cudf::jni::auto_set_device(env);
43 |     auto const input_cv = reinterpret_cast<cudf::column_view const*>(input);
44 |     auto output         = spark_rapids_jni::rebase_julian_to_gregorian(*input_cv);
45 |     return reinterpret_cast<jlong>(output.release());
46 |   }
47 |   CATCH_STD(env, 0);
48 | }
49 | 
50 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_truncateWithColumnFormat(
51 |   JNIEnv* env, jclass, jlong datetime, jlong format)
52 | {
53 |   JNI_NULL_CHECK(env, datetime, "input datetime is null", 0);
54 |   JNI_NULL_CHECK(env, format, "input format is null", 0);
55 | 
56 |   try {
57 |     cudf::jni::auto_set_device(env);
58 | 
59 |     auto const datetime_cv = reinterpret_cast<cudf::column_view const*>(datetime);
60 |     auto const format_cv   = reinterpret_cast<cudf::column_view const*>(format);
61 |     return reinterpret_cast<jlong>(spark_rapids_jni::truncate(*datetime_cv, *format_cv).release());
62 |   }
63 |   CATCH_STD(env, 0);
64 | }
65 | 
66 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_DateTimeUtils_truncateWithScalarFormat(
67 |   JNIEnv* env, jclass, jlong datetime, jstring format)
68 | {
69 |   JNI_NULL_CHECK(env, datetime, "input datetime is null", 0);
70 | 
71 |   try {
72 |     cudf::jni::auto_set_device(env);
73 | 
74 |     auto const datetime_cv = reinterpret_cast<cudf::column_view const*>(datetime);
75 |     auto const format_jstr = cudf::jni::native_jstring(env, format);
76 |     auto const format      = std::string(format_jstr.get(), format_jstr.size_bytes());
77 |     return reinterpret_cast<jlong>(spark_rapids_jni::truncate(*datetime_cv, format).release());
78 |   }
79 |   CATCH_STD(env, 0);
80 | }
81 | 
82 | }  // extern "C"
83 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/HashJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "dtype_utils.hpp"
19 | #include "hash.hpp"
20 | #include "jni_utils.hpp"
21 | 
22 | extern "C" {
23 | 
24 | JNIEXPORT jint JNICALL Java_com_nvidia_spark_rapids_jni_Hash_getMaxStackDepth(JNIEnv* env, jclass)
25 | {
26 |   return spark_rapids_jni::MAX_STACK_DEPTH;
27 | }
28 | 
29 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_murmurHash32(
30 |   JNIEnv* env, jclass, jint seed, jlongArray column_handles)
31 | {
32 |   JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
33 | 
34 |   try {
35 |     cudf::jni::auto_set_device(env);
36 |     auto column_views =
37 |       cudf::jni::native_jpointerArray<cudf::column_view>{env, column_handles}.get_dereferenced();
38 |     return cudf::jni::release_as_jlong(
39 |       spark_rapids_jni::murmur_hash3_32(cudf::table_view{column_views}, seed));
40 |   }
41 |   CATCH_STD(env, 0);
42 | }
43 | 
44 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_xxhash64(JNIEnv* env,
45 |                                                                        jclass,
46 |                                                                        jlong seed,
47 |                                                                        jlongArray column_handles)
48 | {
49 |   JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
50 | 
51 |   try {
52 |     cudf::jni::auto_set_device(env);
53 |     auto column_views =
54 |       cudf::jni::native_jpointerArray<cudf::column_view>{env, column_handles}.get_dereferenced();
55 |     return cudf::jni::release_as_jlong(
56 |       spark_rapids_jni::xxhash64(cudf::table_view{column_views}, seed));
57 |   }
58 |   CATCH_STD(env, 0);
59 | }
60 | 
61 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_hiveHash(JNIEnv* env,
62 |                                                                        jclass,
63 |                                                                        jlongArray column_handles)
64 | {
65 |   JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
66 | 
67 |   try {
68 |     cudf::jni::auto_set_device(env);
69 |     auto column_views =
70 |       cudf::jni::native_jpointerArray<cudf::column_view>{env, column_handles}.get_dereferenced();
71 |     return cudf::jni::release_as_jlong(spark_rapids_jni::hive_hash(cudf::table_view{column_views}));
72 |   }
73 |   CATCH_STD(env, 0);
74 | }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/HistogramJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "histogram.hpp"
19 | 
20 | extern "C" {
21 | 
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Histogram_createHistogramIfValid(
23 |   JNIEnv* env, jclass, jlong values_handle, jlong frequencies_handle, jboolean output_as_lists)
24 | {
25 |   JNI_NULL_CHECK(env, values_handle, "values_handle is null", 0);
26 |   JNI_NULL_CHECK(env, frequencies_handle, "frequencies_handle is null", 0);
27 | 
28 |   try {
29 |     cudf::jni::auto_set_device(env);
30 | 
31 |     auto const values      = reinterpret_cast<cudf::column_view const*>(values_handle);
32 |     auto const frequencies = reinterpret_cast<cudf::column_view const*>(frequencies_handle);
33 |     return cudf::jni::ptr_as_jlong(
34 |       spark_rapids_jni::create_histogram_if_valid(*values, *frequencies, output_as_lists)
35 |         .release());
36 |   }
37 |   CATCH_STD(env, 0);
38 | }
39 | 
40 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Histogram_percentileFromHistogram(
41 |   JNIEnv* env, jclass, jlong input_handle, jdoubleArray jpercentages, jboolean output_as_lists)
42 | {
43 |   JNI_NULL_CHECK(env, input_handle, "input_handle is null", 0);
44 |   JNI_NULL_CHECK(env, jpercentages, "jpercentages is null", 0);
45 | 
46 |   try {
47 |     cudf::jni::auto_set_device(env);
48 | 
49 |     auto const input       = reinterpret_cast<cudf::column_view const*>(input_handle);
50 |     auto const percentages = [&] {
51 |       auto const native_percentages = cudf::jni::native_jdoubleArray(env, jpercentages);
52 |       return std::vector<double>(native_percentages.begin(), native_percentages.end());
53 |     }();
54 |     return cudf::jni::ptr_as_jlong(
55 |       spark_rapids_jni::percentile_from_histogram(*input, percentages, output_as_lists).release());
56 |   }
57 |   CATCH_STD(env, 0);
58 | }
59 | 
60 | }  // extern "C"
61 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/HyperLogLogPlusPlusHostUDFJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "hyper_log_log_plus_plus.hpp"
19 | #include "hyper_log_log_plus_plus_host_udf.hpp"
20 | 
21 | extern "C" {
22 | 
23 | JNIEXPORT jlong JNICALL
24 | Java_com_nvidia_spark_rapids_jni_HyperLogLogPlusPlusHostUDF_createHLLPPHostUDF(JNIEnv* env,
25 |                                                                                jclass,
26 |                                                                                jint agg_type,
27 |                                                                                int precision)
28 | {
29 |   try {
30 |     auto udf_ptr = [&] {
31 |       // The value of agg_type must be sync with
32 |       // `HyperLogLogPlusPlusHostUDF.java#AggregationType`.
33 |       switch (agg_type) {
34 |         case 0: return spark_rapids_jni::create_hllpp_reduction_host_udf(precision);
35 |         case 1: return spark_rapids_jni::create_hllpp_reduction_merge_host_udf(precision);
36 |         case 2: return spark_rapids_jni::create_hllpp_groupby_host_udf(precision);
37 |         case 3: return spark_rapids_jni::create_hllpp_groupby_merge_host_udf(precision);
38 |         default: CUDF_FAIL("Invalid aggregation type.");
39 |       }
40 |     }();
41 |     CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HyperLogLogPlusPlus(HLLPP) UDF instance.");
42 | 
43 |     return reinterpret_cast<jlong>(udf_ptr);
44 |   }
45 |   CATCH_STD(env, 0);
46 | }
47 | 
48 | JNIEXPORT jlong JNICALL
49 | Java_com_nvidia_spark_rapids_jni_HyperLogLogPlusPlusHostUDF_estimateDistinctValueFromSketches(
50 |   JNIEnv* env, jclass, jlong sketches, jint precision)
51 | {
52 |   JNI_NULL_CHECK(env, sketches, "Sketch column is null", 0);
53 |   try {
54 |     cudf::jni::auto_set_device(env);
55 |     auto const sketch_view = reinterpret_cast<cudf::column_view const*>(sketches);
56 |     return cudf::jni::ptr_as_jlong(
57 |       spark_rapids_jni::estimate_from_hll_sketches(*sketch_view, precision).release());
58 |   }
59 |   CATCH_STD(env, 0);
60 | }
61 | 
62 | }  // extern "C"
63 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/MapJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "jni_utils.hpp"
19 | #include "map.hpp"
20 | 
21 | extern "C" {
22 | 
23 | JNIEXPORT jlong Java_com_nvidia_spark_rapids_jni_Map_sort(JNIEnv* env,
24 |                                                           jclass,
25 |                                                           jlong map_haldle,
26 |                                                           jboolean is_descending)
27 | {
28 |   JNI_NULL_CHECK(env, map_haldle, "column is null", 0);
29 | 
30 |   try {
31 |     cudf::jni::auto_set_device(env);
32 |     auto sort_order = is_descending ? cudf::order::DESCENDING : cudf::order::ASCENDING;
33 |     cudf::column_view const& map_view = *reinterpret_cast<cudf::column_view const*>(map_haldle);
34 |     return cudf::jni::release_as_jlong(spark_rapids_jni::sort_map_column(map_view, sort_order));
35 |   }
36 | 
37 |   CATCH_STD(env, 0);
38 | }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/RegexRewriteUtilsJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "dtype_utils.hpp"
19 | #include "jni_utils.hpp"
20 | #include "regex_rewrite_utils.hpp"
21 | 
22 | extern "C" {
23 | 
24 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_RegexRewriteUtils_literalRangePattern(
25 |   JNIEnv* env, jclass, jlong input, jlong target, jint d, jint start, jint end)
26 | {
27 |   JNI_NULL_CHECK(env, input, "input column is null", 0);
28 |   JNI_NULL_CHECK(env, target, "target is null", 0);
29 | 
30 |   try {
31 |     cudf::jni::auto_set_device(env);
32 | 
33 |     cudf::column_view* cv = reinterpret_cast<cudf::column_view*>(input);
34 |     cudf::strings_column_view scv(*cv);
35 |     cudf::string_scalar* ss_scalar = reinterpret_cast<cudf::string_scalar*>(target);
36 |     return cudf::jni::release_as_jlong(
37 |       spark_rapids_jni::literal_range_pattern(scv, *ss_scalar, d, start, end));
38 |   }
39 |   CATCH_STD(env, 0);
40 | }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/SubStringIndexJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "substring_index.hpp"
19 | 
20 | extern "C" {
21 | 
22 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_GpuSubstringIndexUtils_substringIndex(
23 |   JNIEnv* env, jclass, jlong strings_handle, jlong delimiter, jint count)
24 | {
25 |   JNI_NULL_CHECK(env, strings_handle, "strings column handle is null", 0);
26 |   JNI_NULL_CHECK(env, delimiter, "delimiter scalar handle is null", 0);
27 |   try {
28 |     cudf::jni::auto_set_device(env);
29 |     auto const input               = reinterpret_cast<cudf::column_view const*>(strings_handle);
30 |     auto const strings_column      = cudf::strings_column_view{*input};
31 |     cudf::string_scalar* ss_scalar = reinterpret_cast<cudf::string_scalar*>(delimiter);
32 |     return cudf::jni::release_as_jlong(
33 |       spark_rapids_jni::substring_index(strings_column, *ss_scalar, count));
34 |   }
35 |   CATCH_STD(env, 0);
36 | }
37 | }  // extern "C"
38 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/TaskPriorityJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "jni_utils.hpp"
19 | #include "task_priority.hpp"
20 | 
21 | namespace {
22 | // Track the next priority to assign and maintain a map of attempt_id to priority
23 | static long next_task_priority = std::numeric_limits<long>::max() - 1;
24 | static std::mutex priority_mutex;
25 | static std::unordered_map<long, long> attempt_priorities;
26 | }  // namespace
27 | 
28 | namespace spark_rapids_jni {
29 | 
30 | long get_task_priority(long attempt_id)
31 | {
32 |   if (attempt_id == -1) {
33 |     // Special case: -1 always gets highest priority
34 |     return std::numeric_limits<long>::max();
35 |   }
36 | 
37 |   std::lock_guard<std::mutex> lock(priority_mutex);
38 |   auto it = attempt_priorities.find(attempt_id);
39 |   if (it != attempt_priorities.end()) {
40 |     // Return existing priority for this attempt_id
41 |     return it->second;
42 |   }
43 | 
44 |   // Assign new priority for this attempt_id
45 |   long priority                  = next_task_priority--;
46 |   attempt_priorities[attempt_id] = priority;
47 |   return priority;
48 | }
49 | 
50 | void task_done(long attempt_id)
51 | {
52 |   if (attempt_id == -1) {
53 |     return;  // Nothing to do for special case
54 |   }
55 | 
56 |   std::lock_guard<std::mutex> lock(priority_mutex);
57 |   attempt_priorities.erase(attempt_id);
58 | }
59 | 
60 | }  // namespace spark_rapids_jni
61 | 
62 | extern "C" {
63 | 
64 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_TaskPriority_getTaskPriority(
65 |   JNIEnv* env, jclass, jlong task_attempt_id)
66 | {
67 |   return spark_rapids_jni::get_task_priority(task_attempt_id);
68 | }
69 | 
70 | JNIEXPORT void JNICALL Java_com_nvidia_spark_rapids_jni_TaskPriority_taskDone(JNIEnv* env,
71 |                                                                               jclass,
72 |                                                                               jlong task_attempt_id)
73 | {
74 |   spark_rapids_jni::task_done(task_attempt_id);
75 | }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/ZOrderJni.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "cudf_jni_apis.hpp"
18 | #include "dtype_utils.hpp"
19 | #include "zorder.hpp"
20 | 
21 | extern "C" {
22 | 
23 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ZOrder_interleaveBits(
24 |   JNIEnv* env, jclass, jlongArray input_columns)
25 | {
26 |   JNI_NULL_CHECK(env, input_columns, "input is null", 0);
27 | 
28 |   try {
29 |     cudf::jni::auto_set_device(env);
30 |     cudf::jni::native_jpointerArray<cudf::column_view> n_input_columns(env, input_columns);
31 |     cudf::table_view tbl(n_input_columns.get_dereferenced());
32 | 
33 |     return cudf::jni::ptr_as_jlong(spark_rapids_jni::interleave_bits(tbl).release());
34 |   }
35 |   CATCH_STD(env, 0);
36 | }
37 | 
38 | JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ZOrder_hilbertIndex(
39 |   JNIEnv* env, jclass, jint num_bits, jlongArray input_columns)
40 | {
41 |   JNI_NULL_CHECK(env, input_columns, "input is null", 0);
42 | 
43 |   try {
44 |     cudf::jni::auto_set_device(env);
45 |     cudf::jni::native_jpointerArray<cudf::column_view> n_input_columns(env, input_columns);
46 |     cudf::table_view tbl(n_input_columns.get_dereferenced());
47 | 
48 |     return cudf::jni::ptr_as_jlong(spark_rapids_jni::hilbert_index(num_bits, tbl).release());
49 |   }
50 |   CATCH_STD(env, 0);
51 | }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/case_when.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/table/table_view.hpp>
20 | #include <cudf/utilities/default_stream.hpp>
21 | 
22 | #include <rmm/mr/device/per_device_resource.hpp>
23 | #include <rmm/resource_ref.hpp>
24 | 
25 | #include <memory>
26 | 
27 | namespace spark_rapids_jni {
28 | 
29 | /**
30 |  *
31 |  * Select the column index for the first true in bool columns.
32 |  * For the row does not contain true, use end index(number of columns).
33 |  *
34 |  * e.g.:
35 |  *   column 0 in table: true,  false, false, false
36 |  *   column 1 in table: false, true,  false, false
37 |  *   column 2 in table: false, false, true, false
38 |  *
39 |  *   1st row is: true, flase, false; first true index is 0
40 |  *   2nd row is: false, true, false; first true index is 1
41 |  *   3rd row is: false, flase, true; first true index is 2
42 |  *   4th row is: false, false, false; do not find true, set index to the end index 3
43 |  *
44 |  *   output column: 0, 1, 2, 3
45 |  *   In the `case when` context, here 3 index means using NULL value.
46 |  *
47 |  */
48 | std::unique_ptr<cudf::column> select_first_true_index(
49 |   cudf::table_view const& when_bool_columns,
50 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
51 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
52 | 
53 | }  // namespace spark_rapids_jni
54 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/cast_string_to_timestamp_common.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/types.hpp>
20 | 
21 | namespace spark_rapids_jni {
22 | 
23 | /**
24 |  * Timezone type
25 |  * Used in casting string with timezone to timestamp
26 |  */
27 | enum class TZ_TYPE : uint8_t {
28 | 
29 |   // Not specified timezone in the string, indicate to use the default timezone.
30 |   NOT_SPECIFIED = 0,
31 | 
32 |   // Fixed offset timezone
33 |   // String starts with UT/GMT/UTC/[+-], and it's valid.
34 |   // E.g: +08:00, +08, +1:02:30, -010203, GMT+8, UTC+8:00, UT+8
35 |   // E.g: +01:2:03
36 |   FIXED_TZ = 1,
37 | 
38 |   // Not FIXED_TZ, it's a valid timezone string.
39 |   // E.g.: java.time.ZoneId.SHORT_IDS: CTT
40 |   // E.g.: Region-based timezone: America/Los_Angeles
41 |   OTHER_TZ = 2,
42 | 
43 |   // Invalid timezone.
44 |   // String starts with UT/GMT/UTC/[+-], but it's invalid.
45 |   // E.g: UTC+19:00, GMT+19:00, max offset is 18 hours
46 |   // E.g: GMT+01:2:03, +01:2:03, special case
47 |   // E.g: non-exist-timezone
48 |   INVALID_TZ = 3
49 | };
50 | 
51 | }  // namespace spark_rapids_jni
52 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/datetime_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/datetime.hpp>
20 | 
21 | namespace spark_rapids_jni {
22 | std::unique_ptr<cudf::column> rebase_gregorian_to_julian(
23 |   cudf::column_view const& input,
24 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
25 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
26 | 
27 | std::unique_ptr<cudf::column> rebase_julian_to_gregorian(
28 |   cudf::column_view const& input,
29 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
30 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
31 | 
32 | std::unique_ptr<cudf::column> truncate(
33 |   cudf::column_view const& datetime,
34 |   cudf::column_view const& format,
35 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
36 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
37 | 
38 | std::unique_ptr<cudf::column> truncate(
39 |   cudf::column_view const& datetime,
40 |   std::string const& format,
41 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
42 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
43 | 
44 | }  // namespace spark_rapids_jni
45 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/decimal_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/column/column_view.hpp>
20 | #include <cudf/table/table.hpp>
21 | #include <cudf/utilities/default_stream.hpp>
22 | 
23 | #include <rmm/cuda_stream_view.hpp>
24 | 
25 | #include <cstddef>
26 | 
27 | namespace cudf::jni {
28 | 
29 | std::unique_ptr<cudf::table> multiply_decimal128(
30 |   cudf::column_view const& a,
31 |   cudf::column_view const& b,
32 |   int32_t product_scale,
33 |   bool const cast_interim_result,
34 |   rmm::cuda_stream_view stream = cudf::get_default_stream());
35 | 
36 | std::unique_ptr<cudf::table> divide_decimal128(
37 |   cudf::column_view const& a,
38 |   cudf::column_view const& b,
39 |   int32_t quotient_scale,
40 |   rmm::cuda_stream_view stream = cudf::get_default_stream());
41 | 
42 | std::unique_ptr<cudf::table> integer_divide_decimal128(
43 |   cudf::column_view const& a,
44 |   cudf::column_view const& b,
45 |   int32_t quotient_scale,
46 |   rmm::cuda_stream_view stream = cudf::get_default_stream());
47 | 
48 | std::unique_ptr<cudf::table> remainder_decimal128(
49 |   cudf::column_view const& a,
50 |   cudf::column_view const& b,
51 |   int32_t remainder_scale,
52 |   rmm::cuda_stream_view stream = cudf::get_default_stream());
53 | 
54 | std::unique_ptr<cudf::table> add_decimal128(
55 |   cudf::column_view const& a,
56 |   cudf::column_view const& b,
57 |   int32_t quotient_scale,
58 |   rmm::cuda_stream_view stream = cudf::get_default_stream());
59 | 
60 | std::unique_ptr<cudf::table> sub_decimal128(
61 |   cudf::column_view const& a,
62 |   cudf::column_view const& b,
63 |   int32_t quotient_scale,
64 |   rmm::cuda_stream_view stream = cudf::get_default_stream());
65 | 
66 | /**
67 |  * @brief Cast floating point values to decimals, matching the behavior of Spark.
68 |  *
69 |  * @param input The input column, which is either FLOAT32 or FLOAT64 type
70 |  * @param output_type The output decimal type
71 |  * @param precision The maximum number of digits that will be preserved in the output
72 |  * @param stream CUDA stream used for device memory operations and kernel launches
73 |  * @param mr Device memory resource used to allocate the returned column's device memory
74 |  * @return A cudf column containing the cast result and a boolean value indicating whether the cast
75 |            operation has failed for any input rows
76 |  */
77 | std::pair<std::unique_ptr<cudf::column>, bool> floating_point_to_decimal(
78 |   cudf::column_view const& input,
79 |   cudf::data_type output_type,
80 |   int32_t precision,
81 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
82 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
83 | 
84 | }  // namespace cudf::jni
85 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/emptyfile.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // Intentionally empty
18 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/get_json_object.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/strings/strings_column_view.hpp>
20 | 
21 | #include <rmm/resource_ref.hpp>
22 | 
23 | #include <memory>
24 | #include <vector>
25 | 
26 | namespace spark_rapids_jni {
27 | 
28 | /**
29 |  * @brief The maximum supported depth that a JSON path can reach.
30 |  */
31 | constexpr int MAX_JSON_PATH_DEPTH = 16;
32 | 
33 | /**
34 |  * @brief Type of instruction in a JSON path.
35 |  */
36 | enum class path_instruction_type : int8_t { WILDCARD, INDEX, NAMED };
37 | 
38 | /**
39 |  * @brief Extract JSON object from a JSON string based on the specified JSON path.
40 |  *
41 |  * If the input JSON string is invalid, or it does not contain the object at the given path, a null
42 |  * will be returned.
43 |  */
44 | std::unique_ptr<cudf::column> get_json_object(
45 |   cudf::strings_column_view const& input,
46 |   std::vector<std::tuple<path_instruction_type, std::string, int32_t>> const& instructions,
47 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
48 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
49 | 
50 | /**
51 |  * @brief Extract multiple JSON objects from a JSON string based on the specified JSON paths.
52 |  *
53 |  * This function processes all the JSON paths in parallel, which may be faster than calling
54 |  * to `get_json_object` on the individual JSON paths. However, it may consume much more GPU
55 |  * memory, proportional to the number of JSON paths.
56 |  * @param input the input string column to parse JSON from
57 |  * @param json_paths the path operations to read extract
58 |  * @param memory_budget_bytes a memory budget for temporary memory usage if > 0
59 |  * @param parallel_override if this value is greater than 0 then it specifies the
60 |  *        number of paths to process in parallel (this will cause the
61 |  *        `memory_budget_bytes` paramemter to be ignored)
62 |  */
63 | std::vector<std::unique_ptr<cudf::column>> get_json_object_multiple_paths(
64 |   cudf::strings_column_view const& input,
65 |   std::vector<std::vector<std::tuple<path_instruction_type, std::string, int32_t>>> const&
66 |     json_paths,
67 |   int64_t memory_budget_bytes,
68 |   int32_t parallel_override,
69 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
70 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
71 | 
72 | }  // namespace spark_rapids_jni
73 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/hash.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/table/table_view.hpp>
20 | #include <cudf/utilities/default_stream.hpp>
21 | 
22 | #include <rmm/cuda_stream_view.hpp>
23 | #include <rmm/resource_ref.hpp>
24 | 
25 | namespace spark_rapids_jni {
26 | 
27 | constexpr int64_t DEFAULT_XXHASH64_SEED = 42;
28 | constexpr int MAX_STACK_DEPTH           = 8;
29 | 
30 | /**
31 |  * @brief Computes the murmur32 hash value of each row in the input set of columns.
32 |  *
33 |  * @param input The table of columns to hash
34 |  * @param seed Optional seed value to use for the hash function
35 |  * @param stream CUDA stream used for device memory operations and kernel launches
36 |  * @param mr Device memory resource used to allocate the returned column's device memory
37 |  *
38 |  * @returns A column where each row is the hash of a column from the input.
39 |  */
40 | std::unique_ptr<cudf::column> murmur_hash3_32(
41 |   cudf::table_view const& input,
42 |   uint32_t seed                     = 0,
43 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
44 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
45 | 
46 | /**
47 |  * @brief Computes the xxhash64 hash value of each row in the input set of columns.
48 |  *
49 |  * @param input The table of columns to hash
50 |  * @param seed Optional seed value to use for the hash function
51 |  * @param stream CUDA stream used for device memory operations and kernel launches
52 |  * @param mr Device memory resource used to allocate the returned column's device memory
53 |  *
54 |  * @returns A column where each row is the hash of a column from the input.
55 |  */
56 | std::unique_ptr<cudf::column> xxhash64(
57 |   cudf::table_view const& input,
58 |   int64_t seed                      = DEFAULT_XXHASH64_SEED,
59 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
60 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
61 | 
62 | /**
63 |  * @brief Computes the Hive hash value of each row in the input set of columns.
64 |  *
65 |  * @param input The table of columns to hash
66 |  * @param stream CUDA stream used for device memory operations and kernel launches
67 |  * @param mr Device memory resource used to allocate the returned column's device memory
68 |  *
69 |  * @returns A column where each row is the hash of a column from the input.
70 |  */
71 | std::unique_ptr<cudf::column> hive_hash(
72 |   cudf::table_view const& input,
73 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
74 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
75 | 
76 | }  // namespace spark_rapids_jni
77 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/hyper_log_log_plus_plus_const.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace spark_rapids_jni {
20 | 
21 | /**
22 |  * @brief The number of bits that is required for a HLLPP register value.
23 |  *
24 |  * This number is determined by the maximum number of leading binary zeros a
25 |  * hashcode can produce. This is equal to the number of bits the hashcode
26 |  * returns. The current implementation uses a 64-bit hashcode, this means 6-bits
27 |  * are (at most) needed to store the number of leading zeros.
28 |  */
29 | constexpr int REGISTER_VALUE_BITS = 6;
30 | 
31 | /**
32 |  * @brief The number of registers that can be stored in a single long.
33 |  * It's 64 / 6 = 10.
34 |  */
35 | constexpr int REGISTERS_PER_LONG = 64 / REGISTER_VALUE_BITS;
36 | 
37 | }  // namespace spark_rapids_jni
38 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/hyper_log_log_plus_plus_host_udf.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/aggregation/host_udf.hpp>
20 | 
21 | namespace spark_rapids_jni {
22 | 
23 | cudf::host_udf_base* create_hllpp_reduction_host_udf(int precision);
24 | 
25 | cudf::host_udf_base* create_hllpp_reduction_merge_host_udf(int precision);
26 | 
27 | cudf::host_udf_base* create_hllpp_groupby_host_udf(int precision);
28 | 
29 | cudf::host_udf_base* create_hllpp_groupby_merge_host_udf(int precision);
30 | 
31 | }  // namespace spark_rapids_jni
32 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/map.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "map.hpp"
18 | 
19 | #include <cudf/column/column_factories.hpp>
20 | #include <cudf/lists/lists_column_view.hpp>
21 | #include <cudf/null_mask.hpp>
22 | #include <cudf/sorting.hpp>
23 | #include <cudf/table/table.hpp>
24 | #include <cudf/table/table_view.hpp>
25 | #include <cudf/utilities/error.hpp>
26 | 
27 | namespace spark_rapids_jni {
28 | 
29 | std::unique_ptr<cudf::column> sort_map_column(cudf::column_view const& input,
30 |                                               cudf::order sort_order,
31 |                                               rmm::cuda_stream_view stream,
32 |                                               rmm::device_async_resource_ref mr)
33 | {
34 |   CUDF_EXPECTS(input.type().id() == cudf::type_id::LIST,
35 |                "maps_column_view input must be LIST type");
36 |   if (input.size() == 0) { return cudf::make_empty_column(input.type()); }
37 | 
38 |   auto const lists_of_structs = cudf::lists_column_view(input);
39 |   auto const structs          = lists_of_structs.child();
40 |   CUDF_EXPECTS(structs.type().id() == cudf::type_id::STRUCT,
41 |                "maps_column_view input must have exactly 1 child (STRUCT) column.");
42 |   CUDF_EXPECTS(structs.num_children() == 2,
43 |                "maps_column_view key-value struct must have exactly 2 children.");
44 |   auto keys   = structs.child(0);
45 |   auto values = structs.child(1);
46 |   CUDF_EXPECTS(structs.null_count() == 0, "maps_column_view key-value struct must have no null.");
47 |   CUDF_EXPECTS(keys.null_count() == 0, "maps_column_view keys must have no null.");
48 |   auto segments = lists_of_structs.offsets();
49 | 
50 |   auto sorted = cudf::segmented_sort_by_key(cudf::table_view{{structs}},
51 |                                             cudf::table_view{{keys}},
52 |                                             segments,
53 |                                             {sort_order},
54 |                                             {},  // Map keys MUST not be null
55 |                                             stream,
56 |                                             mr);
57 | 
58 |   return cudf::make_lists_column(input.size(),
59 |                                  std::make_unique<cudf::column>(segments),  // copy segment offsets
60 |                                  std::move(sorted->release().front()),      // child column
61 |                                  input.null_count(),
62 |                                  cudf::copy_bitmask(input, stream, mr),
63 |                                  stream,
64 |                                  mr);
65 | }
66 | 
67 | }  // namespace spark_rapids_jni
68 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/map.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cudf/column/column_view.hpp>
18 | #include <cudf/utilities/default_stream.hpp>
19 | #include <cudf/utilities/memory_resource.hpp>
20 | 
21 | #include <rmm/cuda_stream_view.hpp>
22 | 
23 | namespace spark_rapids_jni {
24 | 
25 | /**
26 |  * Sort entries for each map in map column according to the keys of each map.
27 |  * Note:
28 |  *   The keys of map MUST not be null.
29 |  *   Assume that maps do not have duplicate keys.
30 |  *   Do not normalize/sort the nested maps in `KEY` column; This means
31 |  *   Only consider the first level LIST(STRUCT(KEY, VALUE)) as map type.
32 |  *
33 |  * @param input Input map column, should in LIST(STRUCT(KEY, VALUE)) type.
34 |  * @param sort_order Ascending or descending order
35 |  * @return Sorted map according to the sort order of the key column in map.
36 |  * @throws cudf::logic_error If the input column is not a LIST(STRUCT(KEY, VALUE)) column or the
37 |  * keys contain nulls.
38 |  */
39 | std::unique_ptr<cudf::column> sort_map_column(
40 |   cudf::column_view const& input,
41 |   cudf::order sort_order,
42 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
43 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
44 | 
45 | }  // namespace spark_rapids_jni
46 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/number_converter.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/scalar/scalar.hpp>
20 | #include <cudf/utilities/default_stream.hpp>
21 | #include <cudf/utilities/memory_resource.hpp>
22 | 
23 | #include <memory>
24 | #include <variant>
25 | 
26 | namespace spark_rapids_jni {
27 | 
28 | using convert_number_t = std::variant<cudf::column_view, cudf::string_scalar, int>;
29 | 
30 | /**
31 |  *
32 |  * @brief Convert numbers(in string column) between different number bases. If toBase>0 the result
33 |  * is unsigned, otherwise it is signed. First trim the space characters (ASCII 32). Return null if
34 |  * len(trim_ascii_32(str)) == 0. Return all nulls if `from_base` or `to_base` is not in range [2,
35 |  * 36]
36 |  *
37 |  * e.g.:
38 |  *   convert('11', 2, 10) = '3'
39 |  *   convert('F', 16, 10) = '15'
40 |  *   convert('17', 10, 16) = '11'
41 |  *
42 |  * @param input the input string column or string scalar
43 |  * @param from_base the number base of input, valid range is [2, 36]
44 |  * @param to_base the number base of output, valid range is [2, 36]
45 |  *
46 |  * @return the string column contains numbers with `to_base` base
47 |  */
48 | std::unique_ptr<cudf::column> convert(
49 |   convert_number_t const& input,
50 |   convert_number_t const& from_base,
51 |   convert_number_t const& to_base,
52 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
53 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
54 | 
55 | /**
56 |  *
57 |  * @brief Check if overflow occurs for converting numbers(in string column) between different
58 |  * number bases. This is for the checking when it's ANSI mode. For more details, please refer to the
59 |  * convert function.
60 |  *
61 |  * @param input the input string column or string scalar
62 |  * @param from_base the number base of input, valid range is [2, 36]
63 |  * @param to_base the number base of output, valid range is [2, 36]
64 |  *
65 |  * @return If overflow occurs, return true; otherwise, return false.
66 |  */
67 | bool is_convert_overflow(
68 |   convert_number_t const& input,
69 |   convert_number_t const& from_base,
70 |   convert_number_t const& to_base,
71 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
72 |   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
73 | 
74 | }  // namespace spark_rapids_jni
75 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/regex_rewrite_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/scalar/scalar_factories.hpp>
20 | #include <cudf/strings/strings_column_view.hpp>
21 | #include <cudf/utilities/default_stream.hpp>
22 | 
23 | namespace spark_rapids_jni {
24 | /**
25 |  * @brief Check if input string contains regex pattern `literal[start-end]{len,}`, which means
26 |  * a literal string followed by a range of characters in the range of start to end, with at least
27 |  * len characters.
28 |  *
29 |  * @param strings Column of strings to check for literal.
30 |  * @param literal UTF-8 encoded string to check in strings column.
31 |  * @param len Minimum number of characters to check after the literal.
32 |  * @param start Minimum UTF-8 codepoint value to check for in the range.
33 |  * @param end Maximum UTF-8 codepoint value to check for in the range.
34 |  * @param stream CUDA stream used for device memory operations and kernel launches.
35 |  * @param mr Device memory resource used to allocate the returned column's device memory.
36 |  */
37 | std::unique_ptr<cudf::column> literal_range_pattern(
38 |   cudf::strings_column_view const& input,
39 |   cudf::string_scalar const& literal,
40 |   int const len,
41 |   int const start,
42 |   int const end,
43 |   rmm::cuda_stream_view stream      = rmm::cuda_stream_default,
44 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
45 | }  // namespace spark_rapids_jni
46 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/row_conversion.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/lists/lists_column_view.hpp>
20 | #include <cudf/table/table_view.hpp>
21 | #include <cudf/utilities/default_stream.hpp>
22 | 
23 | #include <rmm/cuda_stream_view.hpp>
24 | #include <rmm/resource_ref.hpp>
25 | 
26 | #include <memory>
27 | 
28 | namespace spark_rapids_jni {
29 | 
30 | std::vector<std::unique_ptr<cudf::column>> convert_to_rows_fixed_width_optimized(
31 |   cudf::table_view const& tbl,
32 |   // TODO need something for validity
33 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
34 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
35 | 
36 | std::vector<std::unique_ptr<cudf::column>> convert_to_rows(
37 |   cudf::table_view const& tbl,
38 |   // TODO need something for validity
39 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
40 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
41 | 
42 | std::unique_ptr<cudf::table> convert_from_rows_fixed_width_optimized(
43 |   cudf::lists_column_view const& input,
44 |   std::vector<cudf::data_type> const& schema,
45 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
46 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
47 | 
48 | std::unique_ptr<cudf::table> convert_from_rows(
49 |   cudf::lists_column_view const& input,
50 |   std::vector<cudf::data_type> const& schema,
51 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
52 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
53 | 
54 | }  // namespace spark_rapids_jni
55 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/shuffle_split_detail.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cudf/utilities/traits.hpp>
18 | 
19 | namespace spark_rapids_jni {
20 | 
21 | namespace detail {
22 | 
23 | /**
24 |  * @brief Functor for computing size of data elements for a given cudf type.
25 |  *
26 |  * Note: columns types which themselves inherently have no data (strings, lists,
27 |  * structs) return 0.
28 |  */
29 | struct size_of_helper {
30 |   template <typename T>
31 |   constexpr std::enable_if_t<!cudf::is_fixed_width<T>() && !std::is_same_v<T, cudf::string_view>,
32 |                              size_t>
33 |   operator()() const
34 |   {
35 |     return 0;
36 |   }
37 | 
38 |   template <typename T>
39 |   constexpr std::enable_if_t<!cudf::is_fixed_width<T>() && std::is_same_v<T, cudf::string_view>,
40 |                              size_t>
41 |   operator()() const
42 |   {
43 |     return sizeof(cudf::device_storage_type_t<int8_t>);
44 |   }
45 | 
46 |   template <typename T>
47 |   constexpr std::enable_if_t<cudf::is_fixed_width<T>(), size_t> __device__
48 |   operator()() const noexcept
49 |   {
50 |     return sizeof(cudf::device_storage_type_t<T>);
51 |   }
52 | };
53 | 
54 | /**
55 |  * @brief Header for each partition.
56 |  *
57 |  * The values are stored in big-endian format.
58 |  */
59 | struct partition_header {
60 |   uint32_t magic_number;
61 |   uint32_t row_index;  // row index in the source table that this partition started at
62 |   uint32_t num_rows;
63 |   uint32_t validity_size;
64 |   uint32_t offset_size;
65 |   uint32_t total_size;  // size of validity+offset+data sections
66 |   uint32_t num_flattened_columns;
67 | };
68 | 
69 | // alignment values for each validity type, as applied at the end of that data type
70 | // in each partition. so for example all of the grouped-together validity buffers for
71 | // a given partition will have a final 4 byte alignment applied before the offset buffers begin
72 | constexpr size_t validity_pad = 4;
73 | constexpr size_t offset_pad   = 4;
74 | constexpr size_t data_pad     = 4;
75 | 
76 | /**
77 |  * @brief Compute per-partition metadata size.
78 |  */
79 | constexpr size_t compute_per_partition_metadata_size(size_t total_columns)
80 | {
81 |   auto const has_validity_length = (total_columns + 7) / 8;  // has-validity bit per column
82 |   return sizeof(partition_header) + has_validity_length;
83 | }
84 | 
85 | // align all column size allocations to this boundary so that all output column buffers
86 | // start at that alignment.
87 | static constexpr std::size_t split_align = 64;
88 | 
89 | /**
90 |  * @brief Buffer type enum
91 |  *
92 |  * Note: these values matter. Don't rearrange them.
93 |  */
94 | enum class buffer_type { VALIDITY = 0, OFFSETS = 1, DATA = 2 };
95 | 
96 | }  // namespace detail
97 | 
98 | }  // namespace spark_rapids_jni
99 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/spark_rapids_jni_version.cpp.in:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "spark_rapids_jni_version.h"
18 | 
19 | namespace spark_rapids_jni {
20 | 
21 | char const Version[] = "@CMAKE_PROJECT_VERSION@ @SPARK_RAPIDS_JNI_COMMIT_DETAILS@";
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/spark_rapids_jni_version.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace spark_rapids_jni {
20 | 
21 | extern char const Version[];
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/substring_index.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include <cudf/column/column.hpp>
19 | #include <cudf/scalar/scalar.hpp>
20 | #include <cudf/strings/strings_column_view.hpp>
21 | 
22 | #include <rmm/mr/device/per_device_resource.hpp>
23 | 
24 | namespace spark_rapids_jni {
25 | 
26 | /**
27 |  * @brief Returns the substring of strings before count occurrence of the delimiter delim.
28 |  *
29 |  * @param strings Strings column
30 |  * @param delimiter The delimiter string used to slice string
31 |  * @param count Specify the occurrence of the delimiter
32 |  * @return A string column used to store the result
33 |  */
34 | std::unique_ptr<cudf::column> substring_index(
35 |   cudf::strings_column_view const& strings,
36 |   cudf::string_scalar const& delimiter,
37 |   cudf::size_type count,
38 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
39 | 
40 | }  // namespace spark_rapids_jni
41 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/task_priority.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace spark_rapids_jni {
20 | 
21 | /**
22 |  * Get the priority for a task. Larger priorities mean the task
23 |  * should get priority access to resources compared to smaller
24 |  * priority numbers.
25 |  */
26 | long get_task_priority(long attempt_id);
27 | 
28 | /**
29 |  * Inform the system that a particular task is finished.
30 |  */
31 | void task_done(long attempt_id);
32 | 
33 | }  // namespace spark_rapids_jni
34 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/utilities.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cudf/detail/utilities/vector_factories.hpp>
18 | #include <cudf/types.hpp>
19 | #include <cudf/utilities/error.hpp>
20 | #include <cudf/utilities/span.hpp>
21 | 
22 | #include <rmm/cuda_stream_view.hpp>
23 | #include <rmm/device_uvector.hpp>
24 | #include <rmm/device_vector.hpp>
25 | #include <rmm/exec_policy.hpp>
26 | 
27 | #include <cuda/functional>
28 | 
29 | namespace spark_rapids_jni {
30 | 
31 | std::unique_ptr<rmm::device_buffer> bitmask_bitwise_or(
32 |   std::vector<cudf::device_span<cudf::bitmask_type const>> const& input,
33 |   rmm::cuda_stream_view stream,
34 |   rmm::device_async_resource_ref mr)
35 | {
36 |   CUDF_EXPECTS(input.size() > 0, "Empty input");
37 |   auto const mask_size = (*input.begin()).size();
38 |   CUDF_EXPECTS(
39 |     std::all_of(
40 |       input.begin(), input.end(), [mask_size](auto mask) { return mask.size() == mask_size; }),
41 |     "Encountered size mismatch in inputs");
42 |   if (mask_size == 0) {
43 |     return std::make_unique<rmm::device_buffer>(rmm::device_buffer{0, stream, mr});
44 |   }
45 | 
46 |   // move the pointers to the gpu
47 |   std::vector<cudf::bitmask_type const*> h_input(input.size());
48 |   std::transform(
49 |     input.begin(), input.end(), h_input.begin(), [](auto mask) { return mask.data(); });
50 |   auto d_input = cudf::detail::make_device_uvector_async(
51 |     h_input, stream, rmm::mr::get_current_device_resource());
52 | 
53 |   std::unique_ptr<rmm::device_buffer> out =
54 |     std::make_unique<rmm::device_buffer>(mask_size * sizeof(cudf::bitmask_type), stream, mr);
55 |   thrust::transform(rmm::exec_policy(stream),
56 |                     thrust::make_counting_iterator(0),
57 |                     thrust::make_counting_iterator(0) + mask_size,
58 |                     static_cast<cudf::bitmask_type*>(out->data()),
59 |                     cuda::proclaim_return_type<cudf::bitmask_type>(
60 |                       [buffers     = d_input.data(),
61 |                        num_buffers = input.size()] __device__(cudf::size_type word_index) {
62 |                         cudf::bitmask_type out = buffers[0][word_index];
63 |                         for (auto idx = 1; idx < num_buffers; idx++) {
64 |                           out |= buffers[idx][word_index];
65 |                         }
66 |                         return out;
67 |                       }));
68 | 
69 |   return out;
70 | }
71 | 
72 | }  // namespace spark_rapids_jni
73 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/utilities.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/types.hpp>
20 | #include <cudf/utilities/default_stream.hpp>
21 | #include <cudf/utilities/span.hpp>
22 | 
23 | #include <rmm/cuda_stream_view.hpp>
24 | #include <rmm/device_uvector.hpp>
25 | #include <rmm/resource_ref.hpp>
26 | 
27 | namespace spark_rapids_jni {
28 | 
29 | /**
30 |  * @brief Bitwise-or an array of equally-sized bitmask buffers into a single output buffer
31 |  *
32 |  * @param input The array of input bitmask buffers.
33 |  * @param stream CUDA stream used for device memory operations and kernel launches.
34 |  * @param mr Device memory resource used to allocate the returned bloom filter's memory.
35 |  *
36 |  */
37 | std::unique_ptr<rmm::device_buffer> bitmask_bitwise_or(
38 |   std::vector<cudf::device_span<cudf::bitmask_type const>> const& input,
39 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
40 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
41 | 
42 | }  // namespace spark_rapids_jni
43 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/version.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace spark_rapids_jni {
20 | 
21 | /**
22 |  * @brief Enum class representing different Spark platform types.
23 |  * The values must match the ordinal values defined in SparkPlatformType.java.
24 |  * - VANILLA_SPARK: Represents the standard Apache Spark platform.
25 |  * - DATABRICKS: Represents the Databricks platform.
26 |  * - CLOUDERA: Represents the Cloudera platform.
27 |  * - NUM_PLATFORMS: Represents the total number of platforms defined.
28 |  */
29 | enum class spark_platform_type { VANILLA_SPARK = 0, DATABRICKS, CLOUDERA, NUM_PLATFORMS };
30 | 
31 | class spark_system {
32 |  public:
33 |   /**
34 |    * @brief Constructor to initialize the spark system with platform type and version.
35 |    * NOTE: The `platform_ordinal` MUST keep sync with SparkPlatformType.java
36 |    * @param platform_ordinal The platform ordinal value.
37 |    * @param major Major version number.
38 |    * @param minor Minor version number.
39 |    * @param patch Patch version number.
40 |    */
41 |   spark_system(int platform_ordinal, int major_, int minor_, int patch_)
42 |     : platform_type{static_cast<spark_platform_type>(platform_ordinal)},
43 |       major{major_},
44 |       minor{minor_},
45 |       patch{patch_}
46 |   {
47 |   }
48 | 
49 |   bool is_vanilla_spark() const { return platform_type == spark_platform_type::VANILLA_SPARK; }
50 |   bool is_databricks() const { return platform_type == spark_platform_type::DATABRICKS; }
51 | 
52 |   bool is_version_eq(int major_, int minor_, int patch_) const
53 |   {
54 |     return major == major_ && minor == minor_ && patch == patch_;
55 |   }
56 | 
57 |   bool is_version_ge(int major_, int minor_, int patch_) const
58 |   {
59 |     return (major > major_) || (major == major_ && minor > minor_) ||
60 |            (major == major_ && minor == minor_ && patch >= patch_);
61 |   }
62 | 
63 |   bool is_vanilla_320() const { return is_vanilla_spark() && is_version_eq(3, 2, 0); }
64 | 
65 |   bool is_vanilla_400_or_later() const { return is_vanilla_spark() && is_version_ge(4, 0, 0); }
66 | 
67 |   bool is_databricks_14_3_or_later() const { return is_databricks() && is_version_ge(14, 3, 0); }
68 | 
69 |  private:
70 |   spark_platform_type platform_type;
71 |   int major, minor, patch;
72 | };
73 | 
74 | }  // namespace spark_rapids_jni
75 | 


--------------------------------------------------------------------------------
/src/main/cpp/src/zorder.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <cudf/lists/lists_column_view.hpp>
20 | #include <cudf/table/table_view.hpp>
21 | 
22 | #include <rmm/cuda_stream_view.hpp>
23 | #include <rmm/resource_ref.hpp>
24 | 
25 | #include <memory>
26 | 
27 | namespace spark_rapids_jni {
28 | 
29 | std::unique_ptr<cudf::column> interleave_bits(
30 |   cudf::table_view const& tbl,
31 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
32 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
33 | 
34 | std::unique_ptr<cudf::column> hilbert_index(
35 |   int32_t const num_bits,
36 |   cudf::table_view const& tbl,
37 |   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
38 |   rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
39 | 
40 | }  // namespace spark_rapids_jni
41 | 


--------------------------------------------------------------------------------
/src/main/cpp/tests/cast_long_to_binary_string.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cudf_test/base_fixture.hpp>
18 | #include <cudf_test/column_wrapper.hpp>
19 | 
20 | #include <rmm/device_uvector.hpp>
21 | 
22 | #include <cast_string.hpp>
23 | 
24 | #include <limits>
25 | 
26 | using namespace cudf;
27 | 
28 | constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR};
29 | 
30 | struct LongToBinaryStringTests : public cudf::test::BaseFixture {};
31 | 
32 | TEST_F(LongToBinaryStringTests, FromLongToBinary)
33 | {
34 |   auto const longs = cudf::test::fixed_width_column_wrapper<int64_t>{
35 |     0L, 1L, 10L, -1L, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min()};
36 | 
37 |   auto results = spark_rapids_jni::long_to_binary_string(longs, cudf::get_default_stream());
38 | 
39 |   auto const expected = cudf::test::strings_column_wrapper{
40 |     "0",
41 |     "1",
42 |     "1010",
43 |     "1111111111111111111111111111111111111111111111111111111111111111",
44 |     "111111111111111111111111111111111111111111111111111111111111111",
45 |     "1000000000000000000000000000000000000000000000000000000000000000"};
46 | 
47 |   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/cpp/tests/list_slice.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cudf_test/base_fixture.hpp>
18 | #include <cudf_test/column_wrapper.hpp>
19 | 
20 | #include <cudf/lists/lists_column_view.hpp>
21 | 
22 | #include <list_slice.hpp>
23 | 
24 | using namespace cudf;
25 | 
26 | constexpr test::debug_output_level verbosity{test::debug_output_level::FIRST_ERROR};
27 | 
28 | struct ListSliceTests : public test::BaseFixture {};
29 | 
30 | TEST_F(ListSliceTests, ListSliceTest)
31 | {
32 |   auto const list_col = test::lists_column_wrapper<int32_t>{{0, 1}, {2, 3, 7, 8}, {4, 5}};
33 |   {
34 |     size_type start  = 1;
35 |     size_type length = 2;
36 | 
37 |     auto results = spark_rapids_jni::list_slice(list_col, start, length);
38 | 
39 |     auto const expected = test::lists_column_wrapper<int32_t>{{0, 1}, {2, 3}, {4, 5}};
40 | 
41 |     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
42 |   }
43 |   {
44 |     size_type start   = 1;
45 |     auto const length = test::fixed_width_column_wrapper<int32_t>{0, 1, 2};
46 | 
47 |     auto results = spark_rapids_jni::list_slice(list_col, start, length);
48 | 
49 |     auto const expected = test::lists_column_wrapper<int32_t>{{}, {2}, {4, 5}};
50 | 
51 |     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
52 |   }
53 |   {
54 |     auto const start = test::fixed_width_column_wrapper<int32_t>{1, 2, 2};
55 |     size_type length = 2;
56 | 
57 |     auto results = spark_rapids_jni::list_slice(list_col, start, length);
58 | 
59 |     auto const expected = test::lists_column_wrapper<int32_t>{{0, 1}, {3, 7}, {5}};
60 | 
61 |     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
62 |   }
63 |   {
64 |     auto const start  = test::fixed_width_column_wrapper<int32_t>{1, 2, 1};
65 |     auto const length = test::fixed_width_column_wrapper<int32_t>{0, 1, 2};
66 | 
67 |     auto results = spark_rapids_jni::list_slice(list_col, start, length);
68 | 
69 |     auto const expected = test::lists_column_wrapper<int32_t>{{}, {3}, {4, 5}};
70 | 
71 |     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, verbosity);
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/cpp/tests/substring_index.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <cudf_test/base_fixture.hpp>
18 | #include <cudf_test/column_utilities.hpp>
19 | #include <cudf_test/column_wrapper.hpp>
20 | 
21 | #include <cudf/column/column_view.hpp>
22 | #include <cudf/scalar/scalar.hpp>
23 | #include <cudf/strings/slice.hpp>
24 | #include <cudf/strings/strings_column_view.hpp>
25 | 
26 | #include <thrust/host_vector.h>
27 | #include <thrust/iterator/transform_iterator.h>
28 | #include <thrust/sequence.h>
29 | 
30 | #include <substring_index.hpp>
31 | 
32 | #include <string>
33 | #include <vector>
34 | 
35 | using namespace cudf;
36 | 
37 | struct SubstringIndexTests : public test::BaseFixture {};
38 | 
39 | TEST_F(SubstringIndexTests, ScalarDelimiter)
40 | {
41 |   auto col0 = test::strings_column_wrapper({"www.yahoo.com",
42 |                                             "www.apache..org",
43 |                                             "tennis...com",
44 |                                             "nvidia....com",
45 |                                             "google...........com",
46 |                                             "microsoft...c.....co..m"});
47 | 
48 |   auto exp_results = test::strings_column_wrapper(
49 |     {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."});
50 | 
51 |   auto results =
52 |     spark_rapids_jni::substring_index(strings_column_view{col0}, string_scalar("."), 3);
53 |   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results);
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/cpp/tests/test_utilities.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <algorithm>
18 | #include <random>
19 | #include <vector>
20 | 
21 | template <typename T>
22 | std::vector<T> random_values(std::size_t size)
23 | {
24 |   std::vector<T> values(size);
25 | 
26 |   using T1 = T;
27 |   using uniform_distribution =
28 |     typename std::conditional_t<std::is_same_v<T1, bool>,
29 |                                 std::bernoulli_distribution,
30 |                                 std::conditional_t<std::is_floating_point_v<T1>,
31 |                                                    std::uniform_real_distribution<T1>,
32 |                                                    std::uniform_int_distribution<T1>>>;
33 | 
34 |   static constexpr auto seed = 0xf00d;
35 |   static std::mt19937 engine{seed};
36 |   static uniform_distribution dist{};
37 |   std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; });
38 | 
39 |   return values;
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CaseWhen.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | 
22 | /**
23 |  * Exedute SQL `case when` semantic.
24 |  * If there are multiple branches and each branch uses scalar to generator value,
25 |  * then it's fast to use this class because it does not generate temp string columns.
26 |  *
27 |  * E.g.:
28 |  *   SQL is:
29 |  *     select
30 |  *        case
31 |  *          when bool_1_expr then "value_1"
32 |  *          when bool_2_expr then "value_2"
33 |  *          when bool_3_expr then "value_3"
34 |  *          else "value_else"
35 |  *        end
36 |  *      from tab
37 |  *
38 |  * Execution steps:
39 |  *   Execute bool exprs to get bool columns, e.g., gets:
40 |  *     bool column 1: [true,  false, false, false]  // bool_1_expr result
41 |  *     bool column 2: [false, true,  false, flase]  // bool_2_expr result
42 |  *     bool column 3: [false, false, true,  flase]  // bool_3_expr result
43 |  *   Execute `selectFirstTrueIndex` to get the column index for the first true in bool columns.
44 |  *   Generate a column to store salars: "value_1", "value_2", "value_3", "value_else"
45 |  *   Execute `Table.gather` to generate the final output column
46 |  *
47 |  */
48 | public class CaseWhen {
49 | 
50 |   /**
51 |    *
52 |    * Select the column index for the first true in bool columns.
53 |    * For the row does not contain true, use end index(number of columns).
54 |    *
55 |    * e.g.:
56 |    *   column 0: true,  false, false, false
57 |    *   column 1: false, true,  false, false
58 |    *   column 2: false, false, true, false
59 |    *
60 |    *   1st row is: true, flase, false; first true index is 0
61 |    *   2nd row is: false, true, false; first true index is 1
62 |    *   3rd row is: false, flase, true; first true index is 2
63 |    *   4th row is: false, false, false; do not find true, set index to the end index 3
64 |    *
65 |    *   output column: 0, 1, 2, 3
66 |    *   In the `case when` context, here 3 index means using NULL value.
67 |    *
68 |   */
69 |   public static ColumnVector selectFirstTrueIndex(ColumnVector[] boolColumns) {
70 |     for (ColumnVector cv : boolColumns) {
71 |       assert(cv.getType().equals(DType.BOOL8)) : "Columns must be bools";
72 |     }
73 | 
74 |     long[] boolHandles = new long[boolColumns.length];
75 |     for (int i = 0; i < boolColumns.length; ++i) {
76 |       boolHandles[i] = boolColumns[i].getNativeView();
77 |     }
78 | 
79 |     return new ColumnVector(selectFirstTrueIndex(boolHandles));
80 |   }
81 | 
82 |   private static native long selectFirstTrueIndex(long[] boolHandles);
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CastException.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | package com.nvidia.spark.rapids.jni;
17 | 
18 | /**
19 |  * CastException is an exception thrown by the JNI in the even of a casting error.
20 |  */
21 | public class CastException extends RuntimeException {
22 |   private final int rowWithError;
23 |   private final String stringWithError;
24 |   
25 |   CastException(String stringWithError, int rowWithError) {
26 |     super("Error casting data on row " + String.valueOf(rowWithError) + ": " + stringWithError);
27 | 
28 |     this.rowWithError = rowWithError;
29 |     this.stringWithError = stringWithError;
30 |   }
31 | 
32 |   public int getRowWithError() {
33 |     return rowWithError;
34 |   }
35 | 
36 |   public String getStringWithError() {
37 |     return stringWithError;
38 |   }
39 | }


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CpuRetryOOM.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A special version of an out of memory error that indicates we ran out of off heap memory, but
21 |  * should roll back to a point when all memory for the task is spillable and then retry the
22 |  * operation.
23 |  */
24 | public class CpuRetryOOM extends OffHeapOOM {
25 |   public CpuRetryOOM() {
26 |     super();
27 |   }
28 | 
29 |   public CpuRetryOOM(String message) {
30 |     super(message);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/CpuSplitAndRetryOOM.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A special version of an out of memory error that indicates we ran out of off heap memory, but
21 |  * should roll back to a point when all memory for the task is spillable and then retry the
22 |  * operation with the input data split to make it ideally use less off heap memory overall.
23 |  */
24 | public class CpuSplitAndRetryOOM extends OffHeapOOM {
25 |   public CpuSplitAndRetryOOM() {
26 |     super();
27 |   }
28 | 
29 |   public CpuSplitAndRetryOOM(String message) {
30 |     super(message);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/DateTimeRebase.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | /**
22 |  * This will be removed after the plugin picks up DateTimeUtils class.
23 |  */
24 | public class DateTimeRebase {
25 |   static {
26 |     NativeDepsLoader.loadNativeDeps();
27 |   }
28 | 
29 |   public static ColumnVector rebaseGregorianToJulian(ColumnView input) {
30 |     return DateTimeUtils.rebaseGregorianToJulian(input);
31 |   }
32 | 
33 |   public static ColumnVector rebaseJulianToGregorian(ColumnView input) {
34 |     return DateTimeUtils.rebaseJulianToGregorian(input);
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuOOM.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A special version of an out of memory error that indicates we ran out of GPU memory. This is
21 |  * mostly to avoid a fatal error that would force the worker process to restart. This should be
22 |  * recoverable on the GPU.
23 |  */
24 | public class GpuOOM extends RuntimeException {
25 |   public GpuOOM() {
26 |     super();
27 |   }
28 | 
29 |   public GpuOOM(String message) {
30 |     super(message);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuRetryOOM.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A special version of an out of memory error that indicates we ran out of GPU memory, but should
21 |  * roll back to a point when all memory for the task is spillable and then retry the operation.
22 |  */
23 | public class GpuRetryOOM extends GpuOOM {
24 |   public GpuRetryOOM() {
25 |     super();
26 |   }
27 | 
28 |   public GpuRetryOOM(String message) {
29 |     super(message);
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuSplitAndRetryOOM.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A special version of an out of memory error that indicates we ran out of GPU memory, but should
21 |  * roll back to a point when all memory for the task is spillable and then retry the operation
22 |  * with the input data split to make it ideally use less GPU memory overall.
23 |  */
24 | public class GpuSplitAndRetryOOM extends GpuOOM {
25 |   public GpuSplitAndRetryOOM() {
26 |     super();
27 |   }
28 | 
29 |   public GpuSplitAndRetryOOM(String message) {
30 |     super(message);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/GpuSubstringIndexUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022-2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | public class GpuSubstringIndexUtils {
22 |     static{
23 |         NativeDepsLoader.loadNativeDeps();
24 |     }
25 | 
26 |     public static ColumnVector substringIndex(ColumnView cv, Scalar delimiter, int count){
27 |         return new ColumnVector(substringIndex(cv.getNativeView(), delimiter.getScalarHandle(), count));
28 |     }
29 | 
30 |     private static native long substringIndex(long columnView, long delimiter, int count) throws CudfException;
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Map.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.ColumnVector;
20 | import ai.rapids.cudf.ColumnView;
21 | import ai.rapids.cudf.CudfException;
22 | import ai.rapids.cudf.DType;
23 | import ai.rapids.cudf.NativeDepsLoader;
24 | 
25 | /**
26 |  * APIs for map column operations.
27 |  */
28 | public class Map {
29 |   static {
30 |     NativeDepsLoader.loadNativeDeps();
31 |   }
32 | 
33 |   /**
34 |    * Sort entries for each map in map column according to the keys of each map.
35 |    * Note:
36 |    *   The keys of map MUST not be null.
37 |    *   Assume that maps do not have duplicate keys.
38 |    *   Do not normalize/sort the nested maps in `KEY` column; This means
39 |    *   Only consider the first level LIST(STRUCT(KEY, VALUE)) as map type.
40 |    *
41 |    * @param cv           Input map column, should in LIST(STRUCT(KEY, VALUE))
42 |    *                     type.
43 |    * @param isDescending True if sort in descending order, false if sort in
44 |    *                     ascending order
45 |    * @return Sorted map according to the sort order of the key column in map.
46 |    * @throws CudfException If the input column is not a LIST(STRUCT(KEY, VALUE))
47 |    *                       column or the keys contain nulls.
48 |    */
49 |   public static ColumnVector sort(ColumnView cv, boolean isDescending) {
50 |     assert (cv.getType().equals(DType.LIST));
51 |     long r = sort(cv.getNativeView(), isDescending);
52 |     return new ColumnVector(r);
53 |   }
54 | 
55 |   private static native long sort(long handle, boolean isDescending) throws CudfException;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/OffHeapOOM.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A special version of an out of memory error that indicates we ran out of off heap CPU memory.
21 |  * This is mostly to avoid a fatal error that would force the worker process to restart. This
22 |  * should be recoverable.
23 |  */
24 | public class OffHeapOOM extends RuntimeException {
25 |   public OffHeapOOM() {
26 |     super();
27 |   }
28 | 
29 |   public OffHeapOOM(String message) {
30 |     super(message);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Pair.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * A utility class for holding a pair of values.
21 |  */
22 | public class Pair<K, V> {
23 |     private final K left;
24 |     private final V right;
25 | 
26 |     public Pair(K left, V right) {
27 |         this.left = left;
28 |         this.right = right;
29 |     }
30 | 
31 |     public K getLeft() {
32 |         return left;
33 |     }
34 | 
35 |     public V getRight() {
36 |         return right;
37 |     }
38 | 
39 |     public static <K, V> Pair<K, V> of(K left, V right) {
40 |         return new Pair<>(left, right);
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import java.util.function.Supplier;
20 | 
21 | /**
22 |  * This class contains utility methods for checking preconditions.
23 |  */
24 | public class Preconditions {
25 |     /**
26 |      * Check if the condition is true, otherwise throw an IllegalStateException with the given message.
27 |      */
28 |     public static void ensure(boolean condition, String message) {
29 |         if (!condition) {
30 |             throw new IllegalStateException(message);
31 |         }
32 |     }
33 | 
34 |     /**
35 |      * Check if the condition is true, otherwise throw an IllegalStateException with the given message supplier.
36 |      */
37 |     public static void ensure(boolean condition, Supplier<String> messageSupplier) {
38 |         if (!condition) {
39 |             throw new IllegalStateException(messageSupplier.get());
40 |         }
41 |     }
42 | 
43 |     /**
44 |      * Check if the value is non-negative, otherwise throw an IllegalArgumentException with the given message.
45 |      * @param value the value to check
46 |      * @param name the name of the value
47 |      * @return the value if it is non-negative
48 |      * @throws IllegalArgumentException if the value is negative
49 |      */
50 |     public static int ensureNonNegative(int value, String name) {
51 |         if (value < 0) {
52 |             throw new IllegalArgumentException(name + " must be non-negative, but was " + value);
53 |         }
54 |         return value;
55 |     }
56 | 
57 |     /**
58 |      * Check if the value is non-negative, otherwise throw an IllegalArgumentException with the given message.
59 |      * @param value the value to check
60 |      * @param name the name of the value
61 |      * @return the value if it is non-negative
62 |      * @throws IllegalArgumentException if the value is negative
63 |      */
64 |     public static long ensureNonNegative(long value, String name) {
65 |         if (value < 0) {
66 |             throw new IllegalArgumentException(name + " must be non-negative, but was " + value);
67 |         }
68 |         return value;
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/RegexRewriteUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | public class RegexRewriteUtils {
22 |   static {
23 |     NativeDepsLoader.loadNativeDeps();
24 |   }
25 | 
26 | /**
27 |  * @brief Check if input string contains regex pattern `literal[start-end]{len,}`, which means
28 |  * a literal string followed by a range of characters in the range of start to end, with at least
29 |  * len characters.
30 |  *
31 |  * @param input Column of strings to check for literal.
32 |  * @param literal UTF-8 encoded string to check in strings column.
33 |  * @param len Minimum number of characters to check after the literal.
34 |  * @param start Minimum UTF-8 codepoint value to check for in the range.
35 |  * @param end Maximum UTF-8 codepoint value to check for in the range.
36 |  * @return ColumnVector of booleans where true indicates the string contains the pattern.
37 |  */
38 |   public static ColumnVector literalRangePattern(ColumnVector input, Scalar literal, int len, int start, int end) {
39 |     assert(input.getType().equals(DType.STRING)) : "column must be a String";
40 |     return new ColumnVector(literalRangePattern(input.getNativeView(), literal.getScalarHandle(), len, start, end));
41 |   }
42 | 
43 |   private static native long literalRangePattern(long input, long literal, int len, int start, int end);
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/RmmSparkThreadState.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * The state of a given thread according to RmmSpark. This is intended really for debugging and
21 |  * testing only.
22 |  */
23 | public enum RmmSparkThreadState {
24 |   UNKNOWN(-1), // thread is not associated with anything...
25 |   THREAD_RUNNING(0), // task thread running normally
26 |   THREAD_ALLOC(1), // task thread in the middle of doing an allocation
27 |   THREAD_ALLOC_FREE(2), // task thread in the middle of doing an allocation and a free happened
28 |   THREAD_BLOCKED(3), // task thread that is temporarily blocked
29 |   THREAD_BUFN_THROW(4), // task thread that should throw an exception to roll back before blocking
30 |   THREAD_BUFN_WAIT(5), // task thread that threw an exception to roll back and now should
31 |   // block the next time alloc is called
32 |   THREAD_BUFN(6), // task thread that is blocked until higher priority tasks start to succeed
33 |   THREAD_SPLIT_THROW(7), // task thread that should throw an exception to split input and retry
34 |   THREAD_REMOVE_THROW(8); // task thread that is being removed and needs to throw an exception
35 | 
36 |   private final int nativeId;
37 | 
38 |   RmmSparkThreadState(int nativeId) {
39 |     this.nativeId = nativeId;
40 |   }
41 | 
42 |   static RmmSparkThreadState fromNativeId(int nativeId) {
43 |     for (RmmSparkThreadState ts : RmmSparkThreadState.values()) {
44 |       if (ts.nativeId == nativeId) {
45 |         return ts;
46 |       }
47 |     }
48 |     throw new IllegalArgumentException("Could not find an ID for " + nativeId);
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/SparkPlatformType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | /**
20 |  * Enum representing the platform.
21 |  * NOTE: MUST keep sync with version.hpp
22 |  * The ordinal values are used to represent the platform in JNI calls.
23 |  */
24 | public enum SparkPlatformType {
25 |   // ordinal 0 is vanilla Spark, JNI and kernel use 0 representing Spark
26 |   VANILLA_SPARK,
27 | 
28 |   // ordinal 1 is Databricks, JNI and kernel use 1 representing Databricks
29 |   DATABRICKS,
30 | 
31 |   // ordinal 2 is Cloudera, JNI and kernel use 2 representing Cloudera
32 |   CLOUDERA;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/TaskPriority.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *
 3 |  *  Copyright (c) 2025, NVIDIA CORPORATION.
 4 |  *
 5 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 6 |  *  you may not use this file except in compliance with the License.
 7 |  *  You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  *  Unless required by applicable law or agreed to in writing, software
12 |  *  distributed under the License is distributed on an "AS IS" BASIS,
13 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  *  See the License for the specific language governing permissions and
15 |  *  limitations under the License.
16 |  *
17 |  */
18 | 
19 | package com.nvidia.spark.rapids.jni;
20 | 
21 | import ai.rapids.cudf.NativeDepsLoader;
22 | 
23 | /**
24 |  * Get the priority for any task. If the priority for one task is larger than the priority for another task,
25 |  * then it means that the task first task (larger number) should get access to resources before the task with
26 |  * the lower priority value.
27 |  */
28 | public class TaskPriority {
29 |   static {
30 |     NativeDepsLoader.loadNativeDeps();
31 |   }
32 | 
33 |   public static native long getTaskPriority(long taskAttemptId);
34 | 
35 |   public static native void taskDone(long taskAttemptId);
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/ThreadStateRegistry.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import org.slf4j.Logger;
20 | import org.slf4j.LoggerFactory;
21 | 
22 | import java.util.HashMap;
23 | import java.util.HashSet;
24 | 
25 | /**
26 |  * This is used to allow us to map a native thread id to a java thread so we can look at the
27 |  * state from a java perspective.
28 |  */
29 | class ThreadStateRegistry {
30 |   private static final Logger LOG = LoggerFactory.getLogger(ThreadStateRegistry.class);
31 | 
32 |   private static final HashMap<Long, Thread> knownThreads = new HashMap<>();
33 | 
34 |   public static synchronized void addThread(long nativeId, Thread t) {
35 |     knownThreads.put(nativeId, t);
36 |   }
37 | 
38 |   // Typically called from JNI
39 |   public static synchronized void removeThread(long threadId) {
40 |     knownThreads.remove(threadId);
41 |   }
42 | 
43 |   // This is likely called from JNI
44 |   public static synchronized boolean isThreadBlocked(long nativeId) {
45 |     Thread t = knownThreads.get(nativeId);
46 |     if (t == null || !t.isAlive()) {
47 |       // Dead is as good as blocked. This is mostly for tests, not so much for
48 |       // production
49 |       return true;
50 |     }
51 |     Thread.State state = t.getState();
52 |     switch (state) {
53 |       case BLOCKED:
54 |         // fall through
55 |       case WAITING:
56 |         // fall through
57 |       case TIMED_WAITING:
58 |         return true;
59 |       case TERMINATED:
60 |         // Technically there is a race with `!t.isAlive` check above, and dead is as good as
61 |         // blocked.
62 |         return true;
63 |       default:
64 |         return false;
65 |     }
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/Version.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | public class Version {
20 |   private final int platformOrdinal;
21 |   private final int major;
22 |   private final int minor;
23 |   private final int patch;
24 | 
25 |   public Version(SparkPlatformType platform, int major, int minor, int patch) {
26 |     this.platformOrdinal = platform.ordinal();
27 |     this.major = major;
28 |     this.minor = minor;
29 |     this.patch = patch;
30 |   }
31 | 
32 |   /**
33 |    * Note: this is used in the JNI code and kernel code, so it must match the
34 |    * enum SparkPlatformType in com.nvidia.spark.rapids.jni.SparkPlatformType.
35 |    */
36 |   public int getPlatformOrdinal() {
37 |     return platformOrdinal;
38 |   }
39 | 
40 |   public int getMajor() {
41 |     return major;
42 |   }
43 | 
44 |   public int getMinor() {
45 |     return minor;
46 |   }
47 | 
48 |   public int getPatch() {
49 |     return patch;
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/ColumnViewInfo.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | import java.util.Optional;
22 | 
23 | import static com.nvidia.spark.rapids.jni.Preconditions.ensureNonNegative;
24 | 
25 | class ColumnViewInfo {
26 |   private final DType dtype;
27 |   private final ColumnOffsetInfo offsetInfo;
28 |   private final int nullCount;
29 |   private final int rowCount;
30 | 
31 |   public ColumnViewInfo(DType dtype, ColumnOffsetInfo offsetInfo,
32 |                         int nullCount, int rowCount) {
33 |     ensureNonNegative(nullCount, "nullCount");
34 |     ensureNonNegative(rowCount, "rowCount");
35 |     this.dtype = dtype;
36 |     this.offsetInfo = offsetInfo;
37 |     this.nullCount = nullCount;
38 |     this.rowCount = rowCount;
39 |   }
40 | 
41 |   ColumnView buildColumnView(DeviceMemoryBuffer buffer, ColumnView[] childrenView) {
42 |     long baseAddress = buffer.getAddress();
43 | 
44 |     if (dtype.isNestedType()) {
45 |       return new ColumnView(dtype, rowCount, Optional.of((long)nullCount),
46 |           offsetInfo.getValidityBuffer(baseAddress),
47 |           offsetInfo.getOffsetBuffer(baseAddress),
48 |           childrenView);
49 |     } else {
50 |       return new ColumnView(dtype, rowCount, Optional.of((long)nullCount),
51 |           offsetInfo.getDataBuffer(baseAddress),
52 |           offsetInfo.getValidityBuffer(baseAddress),
53 |           offsetInfo.getOffsetBuffer(baseAddress));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public String toString() {
59 |     return "ColumnViewInfo{" +
60 |         "dtype=" + dtype +
61 |         ", offsetInfo=" + offsetInfo +
62 |         ", nullCount=" + nullCount +
63 |         ", rowCount=" + rowCount +
64 |         '}';
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/DataOutputStreamWriter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | import ai.rapids.cudf.HostMemoryBuffer;
20 | 
21 | import java.io.DataOutputStream;
22 | import java.io.IOException;
23 | 
24 | /**
25 |  * Visible for testing
26 |  */
27 | class DataOutputStreamWriter implements DataWriter {
28 |   private final byte[] arrayBuffer = new byte[1024];
29 |   private final DataOutputStream dout;
30 | 
31 |   public DataOutputStreamWriter(DataOutputStream dout) {
32 |     this.dout = dout;
33 |   }
34 | 
35 |   @Override
36 |   public void writeInt(int i) throws IOException {
37 |     dout.writeInt(i);
38 |   }
39 | 
40 |   @Override
41 |   public void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException {
42 |     long dataLeft = len;
43 |     while (dataLeft > 0) {
44 |       int amountToCopy = (int) Math.min(arrayBuffer.length, dataLeft);
45 |       src.getBytes(arrayBuffer, 0, srcOffset, amountToCopy);
46 |       dout.write(arrayBuffer, 0, amountToCopy);
47 |       srcOffset += amountToCopy;
48 |       dataLeft -= amountToCopy;
49 |     }
50 |   }
51 | 
52 |   @Override
53 |   public void flush() throws IOException {
54 |     dout.flush();
55 |   }
56 | 
57 |   @Override
58 |   public void write(byte[] arr, int offset, int length) throws IOException {
59 |     dout.write(arr, offset, length);
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/DataWriter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | import ai.rapids.cudf.HostMemoryBuffer;
20 | 
21 | import java.io.IOException;
22 | 
23 | /**
24 |  * Output data writer for kudo serializer.
25 |  */
26 | public interface DataWriter {
27 | 
28 |   /**
29 |    * Write int in network byte order.
30 |    */
31 |   void writeInt(int i) throws IOException;
32 | 
33 |   /**
34 |    * Reserve space in the buffer for the given size.
35 |    */
36 |   default void reserve(int size) throws IOException {}
37 | 
38 |   /**
39 |    * Copy data from src starting at srcOffset and going for len bytes.
40 |    *
41 |    * @param src       where to copy from.
42 |    * @param srcOffset offset to start at.
43 |    * @param len       amount to copy.
44 |    */
45 |   void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException;
46 | 
47 |   void flush() throws IOException;
48 | 
49 |   /**
50 |    * Copy part of byte array to this writer.
51 |    */
52 |   void write(byte[] arr, int offset, int length) throws IOException;
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/DumpOption.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | public enum DumpOption {
20 |   Always,
21 |   OnFailure,
22 |   Never
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/MergeMetrics.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | public class MergeMetrics {
20 |   // The time it took to calculate combined header in nanoseconds
21 |   private final long calcHeaderTime;
22 |   // The time it took to merge the buffers into the host buffer in nanoseconds
23 |   private final long mergeIntoHostBufferTime;
24 |   // The time it took to convert the host buffer into a contiguous table in nanoseconds
25 |   private final long convertToTableTime;
26 | 
27 |   public MergeMetrics(long calcHeaderTime, long mergeIntoHostBufferTime,
28 |                       long convertToTableTime) {
29 |     this.calcHeaderTime = calcHeaderTime;
30 |     this.mergeIntoHostBufferTime = mergeIntoHostBufferTime;
31 |     this.convertToTableTime = convertToTableTime;
32 |   }
33 | 
34 |   public long getCalcHeaderTime() {
35 |     return calcHeaderTime;
36 |   }
37 | 
38 |   public long getMergeIntoHostBufferTime() {
39 |     return mergeIntoHostBufferTime;
40 |   }
41 | 
42 |   public long getConvertToTableTime() {
43 |     return convertToTableTime;
44 |   }
45 | 
46 |   public static Builder builder() {
47 |     return new Builder();
48 |   }
49 | 
50 |   public static Builder builder(MergeMetrics metrics) {
51 |     return new Builder()
52 |         .calcHeaderTime(metrics.calcHeaderTime)
53 |         .mergeIntoHostBufferTime(metrics.mergeIntoHostBufferTime)
54 |         .convertToTableTime(metrics.convertToTableTime);
55 |   }
56 | 
57 | 
58 |   public static class Builder {
59 |     private long calcHeaderTime;
60 |     private long mergeIntoHostBufferTime;
61 |     private long convertToTableTime;
62 | 
63 |     public Builder calcHeaderTime(long calcHeaderTime) {
64 |       this.calcHeaderTime = calcHeaderTime;
65 |       return this;
66 |     }
67 | 
68 |     public Builder mergeIntoHostBufferTime(long mergeIntoHostBufferTime) {
69 |       this.mergeIntoHostBufferTime = mergeIntoHostBufferTime;
70 |       return this;
71 |     }
72 | 
73 |     public Builder convertToTableTime(long convertToTableTime) {
74 |       this.convertToTableTime = convertToTableTime;
75 |       return this;
76 |     }
77 | 
78 |     public MergeMetrics build() {
79 |       return new MergeMetrics(calcHeaderTime, mergeIntoHostBufferTime, convertToTableTime);
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/MergeOptions.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | import java.io.OutputStream;
20 | import java.util.function.Supplier;
21 | 
22 | public class MergeOptions {
23 |   private final DumpOption dumpOption;
24 |   private final Supplier<OutputStream> outputStreamSupplier;
25 |   private final String filePath;
26 | 
27 |   public MergeOptions(DumpOption dumpOption, Supplier<OutputStream> outputStreamSupplier, String filePath) {
28 |     this.dumpOption = dumpOption;
29 |     this.outputStreamSupplier = outputStreamSupplier;
30 |     this.filePath = filePath;
31 |   }
32 | 
33 |   public DumpOption getDumpOption() {
34 |     return dumpOption;
35 |   }
36 | 
37 |   public Supplier<OutputStream> getOutputStreamSupplier() {
38 |     return outputStreamSupplier;
39 |   }
40 | 
41 |   public String getFilePath() {
42 |     return filePath;
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/OpenByteArrayOutputStreamWriter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | import static java.lang.Math.toIntExact;
20 | import static java.util.Objects.requireNonNull;
21 | 
22 | import ai.rapids.cudf.HostMemoryBuffer;
23 | import java.io.IOException;
24 | 
25 | /**
26 |  * Adapter class which helps to save memory copy when shuffle manager uses
27 |  * {@link OpenByteArrayOutputStream} during serialization.
28 |  */
29 | public class OpenByteArrayOutputStreamWriter implements DataWriter {
30 |   private final OpenByteArrayOutputStream out;
31 | 
32 |   public OpenByteArrayOutputStreamWriter(OpenByteArrayOutputStream bout) {
33 |     requireNonNull(bout, "Byte array output stream can't be null");
34 |     this.out = bout;
35 |   }
36 | 
37 |   @Override
38 |   public void reserve(int size) throws IOException {
39 |     out.reserve(size);
40 |   }
41 | 
42 |   @Override
43 |   public void writeInt(int v) throws IOException {
44 |     out.reserve(4 + out.size());
45 |     out.write((v >>> 24) & 0xFF);
46 |     out.write((v >>> 16) & 0xFF);
47 |     out.write((v >>>  8) & 0xFF);
48 |     out.write((v >>>  0) & 0xFF);
49 |   }
50 | 
51 |   @Override
52 |   public void copyDataFrom(HostMemoryBuffer src, long srcOffset, long len) throws IOException {
53 |     out.write(src, srcOffset, toIntExact(len));
54 |   }
55 | 
56 |   @Override
57 |   public void flush() throws IOException {
58 |   }
59 | 
60 |   @Override
61 |   public void write(byte[] arr, int offset, int length) throws IOException {
62 |     out.write(arr, offset, length);
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/SliceInfo.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | public class SliceInfo {
20 |   final int offset;
21 |   final int rowCount;
22 |   private final SlicedValidityBufferInfo validityBufferInfo;
23 | 
24 |   SliceInfo(int offset, int rowCount) {
25 |     this.offset = offset;
26 |     this.rowCount = rowCount;
27 |     this.validityBufferInfo = SlicedValidityBufferInfo.calc(offset, rowCount);
28 |   }
29 | 
30 |   SlicedValidityBufferInfo getValidityBufferInfo() {
31 |     return validityBufferInfo;
32 |   }
33 | 
34 |   public int getOffset() {
35 |     return offset;
36 |   }
37 | 
38 |   public int getRowCount() {
39 |     return rowCount;
40 |   }
41 | 
42 |   @Override
43 |   public String toString() {
44 |     return "SliceInfo{" +
45 |         "offset=" + offset +
46 |         ", rowCount=" + rowCount +
47 |         ", validityBufferInfo=" + validityBufferInfo +
48 |         '}';
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/SlicedValidityBufferInfo.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | /**
20 |  * A simple utility class to hold information about serializing/deserializing sliced validity buffer.
21 |  */
22 | class SlicedValidityBufferInfo {
23 |   private final int bufferOffset;
24 |   private final int bufferLength;
25 |   /// The bit offset within the buffer where the slice starts
26 |   private final int beginBit;
27 | 
28 |   SlicedValidityBufferInfo(int bufferOffset, int bufferLength, int beginBit) {
29 |     this.bufferOffset = bufferOffset;
30 |     this.bufferLength = bufferLength;
31 |     this.beginBit = beginBit;
32 |   }
33 | 
34 |   @Override
35 |   public String toString() {
36 |     return "SlicedValidityBufferInfo{" + "bufferOffset=" + bufferOffset + ", bufferLength=" + bufferLength +
37 |         ", beginBit=" + beginBit + '}';
38 |   }
39 | 
40 |   public int getBufferOffset() {
41 |     return bufferOffset;
42 |   }
43 | 
44 |   public int getBufferLength() {
45 |     return bufferLength;
46 |   }
47 | 
48 |   public int getBeginBit() {
49 |     return beginBit;
50 |   }
51 | 
52 |   static SlicedValidityBufferInfo calc(int rowOffset, int numRows) {
53 |     if (rowOffset < 0) {
54 |       throw new IllegalArgumentException("rowOffset must be >= 0, but was " + rowOffset);
55 |     }
56 |     if (numRows < 0) {
57 |       throw new IllegalArgumentException("numRows must be >= 0, but was " + numRows);
58 |     }
59 |     int bufferOffset = rowOffset / 8;
60 |     int beginBit = rowOffset % 8;
61 |     int bufferLength = 0;
62 |     if (numRows > 0) {
63 |       bufferLength = (rowOffset + numRows - 1) / 8 - bufferOffset + 1;
64 |     }
65 |     return new SlicedValidityBufferInfo(bufferOffset, bufferLength, beginBit);
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/kudo/WriteMetrics.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni.kudo;
18 | 
19 | /**
20 |  * This class contains metrics for serializing table using kudo format.
21 |  */
22 | public class WriteMetrics {
23 |   private long copyBufferTime;
24 |   private long writtenBytes;
25 | 
26 | 
27 |   public WriteMetrics() {
28 |     this.copyBufferTime = 0;
29 |     this.writtenBytes = 0;
30 |   }
31 | 
32 |   /**
33 |    * Get the time spent on copying the buffer.
34 |    */
35 |   public long getCopyBufferTime() {
36 |     return copyBufferTime;
37 |   }
38 | 
39 |   public void addCopyBufferTime(long time) {
40 |     copyBufferTime += time;
41 |   }
42 | 
43 |   /**
44 |    * Get the number of bytes written.
45 |    */
46 |   public long getWrittenBytes() {
47 |     return writtenBytes;
48 |   }
49 | 
50 |   public void addWrittenBytes(long bytes) {
51 |     writtenBytes += bytes;
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/schema/HostColumnsVisitor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *
 3 |  *  Copyright (c) 2024-2025, NVIDIA CORPORATION.
 4 |  *
 5 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 6 |  *  you may not use this file except in compliance with the License.
 7 |  *  You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  *  Unless required by applicable law or agreed to in writing, software
12 |  *  distributed under the License is distributed on an "AS IS" BASIS,
13 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  *  See the License for the specific language governing permissions and
15 |  *  limitations under the License.
16 |  *
17 |  */
18 | 
19 | package com.nvidia.spark.rapids.jni.schema;
20 | 
21 | import ai.rapids.cudf.HostColumnVectorCore;
22 | 
23 | import java.util.List;
24 | 
25 | /**
26 |  * A post order visitor for visiting a list of host columns in a schema.
27 |  *
28 |  * <p>
29 |  *
30 |  * For example, if we have three columns A, B, and C with following types:
31 |  *
32 |  * <ul>
33 |  *    <li> A: <code>struct { int a1; long a2} </code> </li>
34 |  *    <li> B: <code>list { int b1} </code> </li>
35 |  *    <li> C: <code>string c1 </code> </li>
36 |  * </ul>
37 |  *
38 |  * The order of visiting will be:
39 |  * <ol>
40 |  *     <li> Previsit struct column A</li>
41 |  *     <li> Visit primitive column a1 </li>
42 |  *     <li> Visit primitive column a2</li>
43 |  *     <li> Visit struct column A</li>
44 |  *     <li> Previsit list column B</li>
45 |  *     <li> Visit primitive column b1</li>
46 |  *     <li> Visit list column B</li>
47 |  *     <li> Visit primitive column c1</li>
48 |  * </ol>
49 |  *
50 |  * </p>
51 |  *
52 |  */
53 | public interface HostColumnsVisitor {
54 |     /**
55 |      * Visit a struct column before any of its children.
56 |      * @param col the struct column to visit
57 |      */
58 |     void preVisitStruct(HostColumnVectorCore col);
59 | 
60 |     // TODO no one uses this, do we even want it???
61 |     /**
62 |      * Visit a struct column.
63 |      * @param col the struct column to visit
64 |      */
65 |     void visitStruct(HostColumnVectorCore col);
66 | 
67 |     /**
68 |      * Visit a list column before actually visiting its child.
69 |      * @param col the list column to visit
70 |      */
71 |     void preVisitList(HostColumnVectorCore col);
72 | 
73 |     /**
74 |      * Visit a list column after visiting its child.
75 |      * @param col the list column to visit
76 |      */
77 |     void visitList(HostColumnVectorCore col);
78 | 
79 |     /**
80 |      * Visit a column that is a primitive type.
81 |      * @param col the column to visit
82 |      */
83 |     void visit(HostColumnVectorCore col);
84 | 
85 |     /**
86 |      * The processing is all done
87 |      */
88 |     void done();
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/java/com/nvidia/spark/rapids/jni/schema/SimpleSchemaVisitor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *
 3 |  *  Copyright (c) 2025, NVIDIA CORPORATION.
 4 |  *
 5 |  *  Licensed under the Apache License, Version 2.0 (the "License");
 6 |  *  you may not use this file except in compliance with the License.
 7 |  *  You may obtain a copy of the License at
 8 |  *
 9 |  *      http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  *  Unless required by applicable law or agreed to in writing, software
12 |  *  distributed under the License is distributed on an "AS IS" BASIS,
13 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  *  See the License for the specific language governing permissions and
15 |  *  limitations under the License.
16 |  *
17 |  */
18 | 
19 | package com.nvidia.spark.rapids.jni.schema;
20 | 
21 | import ai.rapids.cudf.Schema;
22 | 
23 | /**
24 |  * A schema visitor similar to {@link SchemaVisitor} but with a simplified interface, please refer
25 |  * to {@link SchemaVisitor} for more details.
26 |  * <br/>
27 |  * This interface removed generic types and return values to simplify the interface, which could
28 |  * avoid unnecessary allocation during visiting schema.
29 |  */
30 | public interface SimpleSchemaVisitor {
31 |     /**
32 |      * Visit the top level schema.
33 |      * @param schema the top level schema to visit
34 |      */
35 |     void visitTopSchema(Schema schema);
36 | 
37 |     /**
38 |      * Visit a struct schema before actually visiting its children.
39 |      * @param structType the struct schema to visit
40 |      */
41 |     void preVisitStruct(Schema structType);
42 | 
43 |     /**
44 |      * Visit a struct schema.
45 |      * @param structType the struct schema to visit
46 |      */
47 |     void visitStruct(Schema structType);
48 | 
49 |     /**
50 |      * Visit a list schema before actually visiting its child.
51 |      * @param listType the list schema to visit
52 |      */
53 |     void preVisitList(Schema listType);
54 | 
55 |     /**
56 |      * Visit a list schema after visiting its child.
57 |      * @param listType the list schema to visit
58 |      */
59 |     void visitList(Schema listType);
60 | 
61 |     /**
62 |      * Visit a primitive type.
63 |      * @param primitiveType the primitive type to visit
64 |      */
65 |     void visit(Schema primitiveType);
66 | }
67 | 


--------------------------------------------------------------------------------
/src/test/cpp/faultinj/test_faultinj.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "logLevel": 1,
 3 |     "seed": 12345,
 4 |     "dynamic": true,
 5 |     "cudaRuntimeFaults": {
 6 |         "cudaLaunchKernel_ptsz": {
 7 |             "percent": 0,
 8 |             "injectionType": 0,
 9 |             "interceptionCount": 1
10 |         }
11 |     },
12 |     "cudaDriverFaults": {
13 |         "*": {
14 |             "percent": 0,
15 |             "injectionType": 2,
16 |             "substituteReturnCode": 2,
17 |             "interceptionCount": 1000
18 |         },
19 |         "cuLaunchKernel_ptsz": {
20 |             "percent": 0,
21 |             "injectionType": 2,
22 |             "substituteReturnCode": 2,
23 |             "interceptionCount": 1000
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/CaseWhenTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | import org.junit.jupiter.api.Test;
22 | 
23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
24 | 
25 | public class CaseWhenTest {
26 | 
27 |   @Test
28 |   void selectIndexTest() {
29 |     try (
30 |         ColumnVector b0 = ColumnVector.fromBooleans(
31 |             true, false, false, false);
32 |         ColumnVector b1 = ColumnVector.fromBooleans(
33 |             true, true, false, false);
34 |         ColumnVector b2 = ColumnVector.fromBooleans(
35 |             false, false, true, false);
36 |         ColumnVector b3 = ColumnVector.fromBooleans(
37 |             true, true, true, false);
38 |         ColumnVector expected = ColumnVector.fromInts(0, 1, 2, 4)) {
39 |       ColumnVector[] boolColumns = new ColumnVector[] { b0, b1, b2, b3 };
40 |       try (ColumnVector actual = CaseWhen.selectFirstTrueIndex(boolColumns)) {
41 |         assertColumnsAreEqual(expected, actual);
42 |       }
43 |     }
44 |   }
45 | 
46 |   @Test
47 |   void selectIndexTestWithNull() {
48 |     try (
49 |         ColumnVector b0 = ColumnVector.fromBoxedBooleans(
50 |             null, false, false, null, false);
51 |         ColumnVector b1 = ColumnVector.fromBoxedBooleans(
52 |             null, null, false, true, true);
53 |         ColumnVector b2 = ColumnVector.fromBoxedBooleans(
54 |             null, null, false, true, false);
55 |         ColumnVector b3 = ColumnVector.fromBoxedBooleans(
56 |             null, null, null, true, null);
57 |         ColumnVector expected = ColumnVector.fromInts(4, 4, 4, 1, 1)) {
58 |       ColumnVector[] boolColumns = new ColumnVector[] { b0, b1, b2, b3 };
59 |       try (ColumnVector actual = CaseWhen.selectFirstTrueIndex(boolColumns)) {
60 |         assertColumnsAreEqual(expected, actual);
61 |       }
62 |     }
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/GpuSubstringIndexUtilsTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.AssertUtils;
20 | import ai.rapids.cudf.ColumnVector;
21 | import ai.rapids.cudf.Scalar;
22 | import ai.rapids.cudf.Table;
23 | import org.junit.jupiter.api.Test;
24 | 
25 | import java.util.ArrayList;
26 | import java.util.List;
27 | 
28 | public class GpuSubstringIndexUtilsTest {
29 |     @Test
30 |     void gpuSubstringIndexTest(){
31 |         Table.TestBuilder tb = new Table.TestBuilder();
32 |         tb.column( "www.apache.org");
33 |         tb.column("www.apache");
34 |         tb.column("www");
35 |         tb.column("");
36 |         tb.column("org");
37 |         tb.column("apache.org");
38 |         tb.column("www.apache.org");
39 |         tb.column("");
40 |         tb.column("大千世界大");
41 |         tb.column("www||apache");
42 | 
43 |         try(Table expected = tb.build()){
44 |             Table.TestBuilder tb2 = new Table.TestBuilder();
45 |             tb2.column("www.apache.org");
46 |             tb2.column("www.apache.org");
47 |             tb2.column("www.apache.org");
48 |             tb2.column("www.apache.org");
49 |             tb2.column("www.apache.org");
50 |             tb2.column("www.apache.org");
51 |             tb2.column("www.apache.org");
52 |             tb2.column("");
53 |             tb2.column("大千世界大千世界");
54 |             tb2.column("www||apache||org");
55 | 
56 |             Scalar dotScalar = Scalar.fromString(".");
57 |             Scalar cnChar = Scalar.fromString("千");
58 |             Scalar verticalBar = Scalar.fromString("||");
59 |             Scalar[] delimiterArray = new Scalar[]{dotScalar, dotScalar, dotScalar, dotScalar,dotScalar, dotScalar, dotScalar, dotScalar, cnChar, verticalBar};
60 |             int[] countArray = new int[]{3, 2, 1, 0, -1, -2, -3, -2, 2, 2};
61 |             List<ColumnVector> result = new ArrayList<>();
62 |             try (Table origTable = tb2.build()){
63 |                 for(int i = 0; i < origTable.getNumberOfColumns(); i++){
64 |                     ColumnVector string_col = origTable.getColumn(i);
65 |                     result.add(GpuSubstringIndexUtils.substringIndex(string_col, delimiterArray[i], countArray[i]));
66 |                 }
67 |                 try (Table result_tbl = new Table(
68 |                         result.toArray(new ColumnVector[result.size()]))){
69 |                     AssertUtils.assertTablesAreEqual(expected, result_tbl);
70 |                 }
71 |             }finally {
72 |                 result.forEach(ColumnVector::close);
73 |             }
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/HistogramTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.AssertUtils;
20 | import ai.rapids.cudf.ColumnVector;
21 | 
22 | import org.junit.jupiter.api.Test;
23 | 
24 | public class HistogramTest {
25 |   @Test
26 |   void testZeroFrequency() {
27 |     try (ColumnVector values = ColumnVector.fromInts(5, 10, 30);
28 |          ColumnVector freqs = ColumnVector.fromLongs(1, 0, 1);
29 |          ColumnVector histogram = Histogram.createHistogramIfValid(values, freqs, true);
30 |          ColumnVector percentiles = Histogram.percentileFromHistogram(histogram, new double[]{1},
31 |              false);
32 |          ColumnVector expected = ColumnVector.fromBoxedDoubles(5.0, null, 30.0)) {
33 |       AssertUtils.assertColumnsAreEqual(percentiles, expected);
34 |     }
35 |   }
36 | 
37 |   @Test
38 |   void testAllNulls() {
39 |     try (ColumnVector values = ColumnVector.fromBoxedInts(null, null, null);
40 |          ColumnVector freqs = ColumnVector.fromLongs(1, 2, 3);
41 |          ColumnVector histogram = Histogram.createHistogramIfValid(values, freqs, true);
42 |          ColumnVector percentiles = Histogram.percentileFromHistogram(histogram, new double[]{0.5},
43 |              false);
44 |          ColumnVector expected = ColumnVector.fromBoxedDoubles(null, null, null)) {
45 |       AssertUtils.assertColumnsAreEqual(percentiles, expected);
46 |     }
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/LimitingOffHeapAllocForTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2023, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.HostMemoryBuffer;
20 | 
21 | import java.util.Optional;
22 | 
23 | /**
24 |  * This provides a way to allocate and deallocate off heap buffers using the RmmSpark APIs for
25 |  * retry on allocations.
26 |  */
27 | public class LimitingOffHeapAllocForTests {
28 |   private static long limit;
29 |   private static long amountAllocated = 0;
30 |   public static synchronized void setLimit(long limit) {
31 |     LimitingOffHeapAllocForTests.limit = limit;
32 |     if (amountAllocated > 0) {
33 |       throw new IllegalStateException("PREVIOUS TEST LEAKED MEMORY!!!");
34 |     }
35 |   }
36 | 
37 |   private static Optional<HostMemoryBuffer> allocInternal(long amount, boolean blocking) {
38 |     Optional<HostMemoryBuffer> ret = Optional.empty();
39 |     boolean wasOom = true;
40 |     boolean isRecursive = RmmSpark.preCpuAlloc(amount, blocking);
41 |     try {
42 |       synchronized (LimitingOffHeapAllocForTests.class) {
43 |         if (amountAllocated + amount <= limit) {
44 |           amountAllocated += amount;
45 |           wasOom = false;
46 |           HostMemoryBuffer buff = HostMemoryBuffer.allocate(amount);
47 |           final long ptr = buff.getAddress();
48 |           buff.setEventHandler(refCount -> {
49 |             if (refCount == 0) {
50 |               synchronized (LimitingOffHeapAllocForTests.class) {
51 |                 amountAllocated -= amount;
52 |               }
53 |               RmmSpark.cpuDeallocate(ptr, amount);
54 |             }
55 |           });
56 |           ret = Optional.of(buff);
57 |         }
58 |       }
59 |     } finally {
60 |       if (ret.isPresent()) {
61 |         RmmSpark.postCpuAllocSuccess(ret.get().getAddress(), amount, blocking, isRecursive);
62 |       } else {
63 |         RmmSpark.postCpuAllocFailed(wasOom, blocking, isRecursive);
64 |       }
65 |     }
66 |     return ret;
67 |   }
68 | 
69 |   /**
70 |    * Do a non-blocking allocation
71 |    * @param amount the amount to allocate
72 |    * @return the allocated buffer or not.
73 |    */
74 |   public static Optional<HostMemoryBuffer> tryAlloc(long amount) {
75 |     return allocInternal(amount, false);
76 |   }
77 | 
78 |   /**
79 |    * Do a blocking allocation
80 |    * @param amount the amount to allocate
81 |    * @return the allocated buffer
82 |    */
83 |   public static HostMemoryBuffer alloc(long amount) {
84 |     Optional<HostMemoryBuffer> ret = Optional.empty();
85 |     while (!ret.isPresent()) {
86 |       ret = allocInternal(amount, true);
87 |     }
88 |     return ret.get();
89 |   }
90 | }


--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/MapTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.*;
20 | 
21 | import org.junit.jupiter.api.Test;
22 | 
23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
24 | 
25 | import java.util.Arrays;
26 | import java.util.List;
27 | 
28 | public class MapTest {
29 | 
30 |   @Test
31 |   void sort() {
32 |     // Map is List<Struct<KEY, VALUE>>
33 |     List<HostColumnVector.StructData> map1 = Arrays.asList(
34 |         new HostColumnVector.StructData(Arrays.asList(5, 2)),
35 |         new HostColumnVector.StructData(Arrays.asList(4, 1)));
36 |     List<HostColumnVector.StructData> map2 = Arrays.asList(
37 |         new HostColumnVector.StructData(Arrays.asList(2, 1)),
38 |         new HostColumnVector.StructData(Arrays.asList(4, 3)));
39 | 
40 |     List<HostColumnVector.StructData> sorted_map1 = Arrays.asList(
41 |         new HostColumnVector.StructData(Arrays.asList(4, 1)),
42 |         new HostColumnVector.StructData(Arrays.asList(5, 2)));
43 |     List<HostColumnVector.StructData> sorted_map2 = map2;
44 | 
45 |     HostColumnVector.StructType structType = new HostColumnVector.StructType(true,
46 |         Arrays.asList(new HostColumnVector.BasicType(true, DType.INT32),
47 |             new HostColumnVector.BasicType(true, DType.INT32)));
48 |     try (ColumnVector cv = ColumnVector.fromLists(
49 |         new HostColumnVector.ListType(true, structType), map1, map2);
50 |         ColumnVector res = Map.sort(cv, false);
51 |         ColumnVector expected = ColumnVector.fromLists(
52 |             new HostColumnVector.ListType(true, structType), sorted_map1, sorted_map2)) {
53 | 
54 |       assertColumnsAreEqual(expected, res);
55 |     }
56 |   }
57 | 
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/src/test/java/com/nvidia/spark/rapids/jni/RegexRewriteUtilsTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package com.nvidia.spark.rapids.jni;
18 | 
19 | import ai.rapids.cudf.ColumnVector;
20 | import ai.rapids.cudf.Scalar;
21 | import org.junit.jupiter.api.Test;
22 | 
23 | import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
24 | 
25 | public class RegexRewriteUtilsTest {
26 | 
27 |   @Test
28 |   void testLiteralRangePattern() {
29 |     int d = 3;
30 |     try (ColumnVector inputCv = ColumnVector.fromStrings(
31 |         "abc123", "aabc123", "aabc12", "abc1232", "aabc1232");
32 |         Scalar pattern = Scalar.fromString("abc");
33 |         ColumnVector expected = ColumnVector.fromBooleans(true, true, false, true, true);
34 |         ColumnVector actual = RegexRewriteUtils.literalRangePattern(inputCv, pattern, d, 48, 57)) {
35 |       assertColumnsAreEqual(expected, actual);
36 |     }
37 |   }
38 | 
39 |   @Test
40 |   void testLiteralRangePatternChinese() {
41 |     int d = 2;
42 |     try (ColumnVector inputCv = ColumnVector.fromStrings(
43 |         "数据砖块", "火花-急流英伟达", "英伟达Nvidia", "火花-急流");
44 |         Scalar pattern = Scalar.fromString("英");
45 |         ColumnVector expected = ColumnVector.fromBooleans(false, true, true, false);
46 |         ColumnVector actual = RegexRewriteUtils.literalRangePattern(inputCv, pattern, d, 19968, 40869)) {
47 |       assertColumnsAreEqual(expected, actual);
48 |     }
49 |   }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/thirdparty/cudf-pins/add_dependency_pins.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | if(NOT DEFINED rapids-cmake-dir)
18 |   include(../cudf/cmake/rapids_config.cmake)
19 | endif()
20 | 
21 | include(rapids-cpm)
22 | rapids_cpm_init()
23 | 
24 | function(add_override_if_requested)
25 |   if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
26 |     include(${rapids-cmake-dir}/cpm/package_override.cmake)
27 |     rapids_cpm_package_override(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)
28 | 
29 |     message(STATUS "Pinning CUDF dependencies to values found in ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json")
30 |   else()
31 |     include(${rapids-cmake-dir}/cpm/generate_pinned_versions.cmake)
32 |     rapids_cpm_generate_pinned_versions(OUTPUT ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)
33 | 
34 |     message(STATUS "Building with latest CUDF dependencies (saving pinned versions to ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)")
35 |   endif()
36 | endfunction()
37 | add_override_if_requested()
38 | 


--------------------------------------------------------------------------------
/thirdparty/cudf-pins/rapids-cmake.sha:
--------------------------------------------------------------------------------
1 | c0330a832746dabf4c4c518072af6771ce654330
2 | 


--------------------------------------------------------------------------------
/thirdparty/cudf-pins/setup.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | string(TOLOWER "${CUDF_DEPENDENCY_PIN_MODE}" CUDF_DEPENDENCY_PIN_MODE)
18 | if(NOT (CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned OR
19 |         CUDF_DEPENDENCY_PIN_MODE STREQUAL latest))
20 |   message(FATAL_ERROR "The CUDF_DEPENDENCY_PIN_MODE variable must be set to either `pinned` or `latest`.")
21 |  endif()
22 | 
23 | function(set_rapids_cmake_pin_sha1)
24 |   set(rapids-cmake-sha "${rapids-cmake-sha}" PARENT_SCOPE)
25 | 
26 |   message(STATUS "Pinning rapids-cmake SHA1 to ${rapids-cmake-sha}")
27 | endfunction()
28 | 
29 | # We need to set the rapids-cmake SHA1 before any CMake code in libcudf is executed when
30 | # we are in pin mode. Otherwise we will use the latest rapids-cmake version since that
31 | # is what cudf does via `fetch_rapids.cmake`
32 | if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
33 |   # Extract the rapids sha1 from the file
34 |   file(READ "${CMAKE_CURRENT_LIST_DIR}/rapids-cmake.sha" rapids-cmake-sha)
35 |   string(STRIP rapids-cmake-sha "${rapids-cmake-sha}")
36 |   string(REPLACE "\n" "" rapids-cmake-sha "${rapids-cmake-sha}")
37 |   set(rapids-cmake-sha "${rapids-cmake-sha}" CACHE STRING "rapids-cmake sha to use" FORCE)
38 |   message(STATUS "Pinning rapids-cmake SHA1 [${rapids-cmake-sha}]")
39 | else()
40 |   set(rapids-cmake-fetch-via-git "ON" CACHE STRING "Make sure rapids-cmake is cloned so we can get SHA value" FORCE)
41 | endif()
42 | 
43 | # We need to use a project() call hook, since rapids-cmake cpm_init()
44 | # can't be called from a `-C` CMake file
45 | set(CMAKE_PROJECT_TOP_LEVEL_INCLUDES "${CMAKE_CURRENT_LIST_DIR}/add_dependency_pins.cmake" CACHE FILEPATH "" )
46 | 


--------------------------------------------------------------------------------