├── .azuredevops
    └── rocm-ci.yml
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   └── issue_report.yml
    ├── dependabot.yml
    └── workflows
    │   └── markdownlint.yml
├── .gitignore
├── .mdlrc
├── .readthedocs.yaml
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── bin
    ├── findcode.sh
    ├── findcode_custom.sh
    ├── findcode_headers.sh
    ├── findcode_sources.sh
    ├── finduncodep.sh
    ├── hipconvertinplace-perl.sh
    ├── hipconvertinplace.sh
    ├── hipexamine-perl.sh
    ├── hipexamine.sh
    └── hipify-perl
├── docs
    ├── building
    │   ├── build-hipify-perl.rst
    │   └── building-hipify.rst
    ├── conf.py
    ├── data
    │   ├── csv_statistics.png
    │   └── patches
    │   │   ├── patch_for_clang_10.0.0_bug_47332.zip
    │   │   ├── patch_for_clang_10.0.1_bug_47332.zip
    │   │   ├── patch_for_clang_11.0.0_bug_47332.zip
    │   │   ├── patch_for_clang_14.0.0_bug_54609.zip
    │   │   ├── patch_for_clang_14.0.1_bug_54609.zip
    │   │   ├── patch_for_clang_14.0.2_bug_54609.zip
    │   │   ├── patch_for_clang_14.0.3_bug_54609.zip
    │   │   ├── patch_for_clang_14.0.4_bug_54609.zip
    │   │   ├── patch_for_clang_7.0.0_bug_38811.zip
    │   │   ├── patch_for_clang_7.0.1_bug_38811.zip
    │   │   ├── patch_for_clang_7.1.0_bug_38811.zip
    │   │   ├── patch_for_clang_8.0.0_bug_38811.zip
    │   │   └── patch_for_clang_8.0.1_bug_38811.zip
    ├── how-to
    │   ├── hipify-clang.rst
    │   └── hipify-perl.rst
    ├── index.rst
    ├── license.md
    ├── reference
    │   ├── hipify-clang-cmd.rst
    │   ├── hipify-perl-cmd.rst
    │   ├── supported_apis.md
    │   └── tables
    │   │   ├── CUBLAS_API_supported_by_HIP.md
    │   │   ├── CUBLAS_API_supported_by_HIP_and_ROC.md
    │   │   ├── CUBLAS_API_supported_by_ROC.md
    │   │   ├── CUB_API_supported_by_HIP.md
    │   │   ├── CUDA_Device_API_supported_by_HIP.md
    │   │   ├── CUDA_Driver_API_functions_supported_by_HIP.md
    │   │   ├── CUDA_RTC_API_supported_by_HIP.md
    │   │   ├── CUDA_Runtime_API_functions_supported_by_HIP.md
    │   │   ├── CUDNN_API_supported_by_HIP.md
    │   │   ├── CUDNN_API_supported_by_HIP_and_MIOPEN.md
    │   │   ├── CUDNN_API_supported_by_MIOPEN.md
    │   │   ├── CUFFT_API_supported_by_HIP.md
    │   │   ├── CURAND_API_supported_by_HIP.md
    │   │   ├── CURAND_API_supported_by_HIP_and_ROC.md
    │   │   ├── CURAND_API_supported_by_ROC.md
    │   │   ├── CUSOLVER_API_supported_by_HIP.md
    │   │   ├── CUSOLVER_API_supported_by_HIP_and_ROC.md
    │   │   ├── CUSOLVER_API_supported_by_ROC.md
    │   │   ├── CUSPARSE_API_supported_by_HIP.md
    │   │   ├── CUSPARSE_API_supported_by_HIP_and_ROC.md
    │   │   ├── CUSPARSE_API_supported_by_ROC.md
    │   │   ├── CUTENSOR_API_supported_by_HIP.md
    │   │   └── cuComplex_API_supported_by_HIP.md
    └── sphinx
    │   ├── _toc.yml.in
    │   ├── requirements.in
    │   └── requirements.txt
├── hipify-backward-compat.cmake
├── mdlrc-style.rb
├── packaging
    ├── hipify-clang.postinst
    ├── hipify-clang.prerm
    ├── hipify-clang.rpm_post
    ├── hipify-clang.rpm_postun
    └── hipify-clang.txt
├── src
    ├── ArgParse.cpp
    ├── ArgParse.h
    ├── CUDA2HIP.cpp
    ├── CUDA2HIP.h
    ├── CUDA2HIP_BLAS_API_functions.cpp
    ├── CUDA2HIP_BLAS_API_types.cpp
    ├── CUDA2HIP_CAFFE2_API_functions.cpp
    ├── CUDA2HIP_CAFFE2_API_types.cpp
    ├── CUDA2HIP_CUB_API_functions.cpp
    ├── CUDA2HIP_CUB_API_types.cpp
    ├── CUDA2HIP_Complex_API_functions.cpp
    ├── CUDA2HIP_Complex_API_types.cpp
    ├── CUDA2HIP_DNN_API_functions.cpp
    ├── CUDA2HIP_DNN_API_types.cpp
    ├── CUDA2HIP_Device_functions.cpp
    ├── CUDA2HIP_Device_types.cpp
    ├── CUDA2HIP_Doc.cpp
    ├── CUDA2HIP_Driver_API_functions.cpp
    ├── CUDA2HIP_Driver_API_types.cpp
    ├── CUDA2HIP_FFT_API_functions.cpp
    ├── CUDA2HIP_FFT_API_types.cpp
    ├── CUDA2HIP_Perl.cpp
    ├── CUDA2HIP_Python.cpp
    ├── CUDA2HIP_RAND_API_functions.cpp
    ├── CUDA2HIP_RAND_API_types.cpp
    ├── CUDA2HIP_RTC_API_functions.cpp
    ├── CUDA2HIP_RTC_API_types.cpp
    ├── CUDA2HIP_Runtime_API_functions.cpp
    ├── CUDA2HIP_Runtime_API_types.cpp
    ├── CUDA2HIP_SOLVER_API_functions.cpp
    ├── CUDA2HIP_SOLVER_API_types.cpp
    ├── CUDA2HIP_SPARSE_API_functions.cpp
    ├── CUDA2HIP_SPARSE_API_types.cpp
    ├── CUDA2HIP_Scripting.h
    ├── CUDA2HIP_TENSOR_API_functions.cpp
    ├── CUDA2HIP_TENSOR_API_types.cpp
    ├── HipifyAction.cpp
    ├── HipifyAction.h
    ├── LLVMCompat.cpp
    ├── LLVMCompat.h
    ├── ReplacementsFrontendActionFactory.h
    ├── Statistics.cpp
    ├── Statistics.h
    ├── StringUtils.cpp
    ├── StringUtils.h
    └── main.cpp
└── tests
    ├── lit.cfg
    ├── lit.site.cfg.in
    ├── run_test.bat
    ├── run_test.sh
    └── unit_tests
        ├── casts
            └── reinterpret_cast.cu
        ├── compilation_database
            ├── cd_intro.cu
            ├── compile_commands.json.in
            └── include
            │   └── inc.h
        ├── device
            ├── atomics.cu
            ├── device_symbols.cu
            └── math_functions.cu
        ├── graph
            └── simple_mechs.cu
        ├── headers
            ├── headers_test_01.cu
            ├── headers_test_02.cu
            ├── headers_test_03.cu
            ├── headers_test_04.cu
            ├── headers_test_05.cu
            ├── headers_test_06.cu
            ├── headers_test_06_12000.cu
            ├── headers_test_07.cu
            ├── headers_test_07_12000.cu
            ├── headers_test_08.cu
            ├── headers_test_08_12000.cu
            ├── headers_test_09.cu
            ├── headers_test_09_12000.cu
            ├── headers_test_09_fp16_7050.cu
            ├── headers_test_09_rocrand.cu
            ├── headers_test_10.cu
            ├── headers_test_11.cu
            ├── headers_test_12_SOLVER.cu
            ├── headers_test_12_SOLVER_10010.cu
            ├── headers_test_12_SOLVER_7050.cu
            └── headers_test_13.cu
        ├── kernel_launch
            ├── kernel_launch_01.cu
            └── kernel_launch_syntax.cu
        ├── libraries
            ├── CAFFE2
            │   ├── caffe2
            │   │   ├── core
            │   │   │   └── common_cudnn.h
            │   │   └── operators
            │   │   │   └── spatial_batch_norm_op.h
            │   ├── caffe2_01.cu
            │   └── caffe2_02.cu
            ├── CUB
            │   ├── cub_01.cu
            │   ├── cub_02.cu
            │   └── cub_03.cu
            ├── cuBLAS
            │   ├── cublas_0_based_indexing.cu
            │   ├── cublas_0_based_indexing_v2.cu
            │   ├── cublas_1_based_indexing.cu
            │   ├── cublas_sgemm_matrix_multiplication.cu
            │   ├── cublas_v1.cu
            │   └── rocBLAS
            │   │   ├── cublas_0_based_indexing_rocblas.cu
            │   │   ├── cublas_0_based_indexing_rocblas_v2.cu
            │   │   ├── cublas_1_based_indexing_rocblas.cu
            │   │   └── cublas_sgemm_matrix_multiplication_rocblas.cu
            ├── cuComplex
            │   └── cuComplex_Julia.cu
            ├── cuDNN
            │   ├── cudnn_convolution_forward.cu
            │   └── cudnn_softmax.cu
            ├── cuFFT
            │   └── simple_cufft.cu
            ├── cuRAND
            │   ├── benchmark_curand_generate.cpp
            │   ├── benchmark_curand_kernel.cpp
            │   ├── cmdparser.hpp
            │   └── poisson_api_example.cu
            └── cuSPARSE
            │   ├── cuSPARSE_01.cu
            │   ├── cuSPARSE_02.cu
            │   ├── cuSPARSE_03.cu
            │   ├── cuSPARSE_04.cu
            │   ├── cuSPARSE_05.cu
            │   ├── cuSPARSE_06.cu
            │   ├── cuSPARSE_07.cu
            │   ├── cuSPARSE_08.cu
            │   ├── cuSPARSE_09.cu
            │   ├── cuSPARSE_10.cu
            │   ├── cuSPARSE_11.cu
            │   └── cuSPARSE_12.cu
        ├── namespace
            └── ns_kernel_launch.cu
        ├── options
            └── kernel-execution-syntax
            │   ├── both-kernel-execution-syntax.cu
            │   ├── cuda-kernel-execution-syntax.cu
            │   ├── hip-kernel-execution-syntax.cu
            │   └── none-kernel-execution-syntax.cu
        ├── pp
            ├── pp_if_else_conditionals.cu
            ├── pp_if_else_conditionals_01.cu
            ├── pp_if_else_conditionals_01_LLVM_10.cu
            └── pp_if_else_conditionals_LLVM_10.cu
        ├── samples
            ├── 2_Cookbook
            │   ├── 0_MatrixTranspose
            │   │   └── MatrixTranspose.cpp
            │   ├── 11_texture_driver
            │   │   ├── tex2dKernel.cpp
            │   │   └── texture2dDrv.cpp
            │   ├── 13_occupancy
            │   │   └── occupancy.cpp
            │   ├── 1_hipEvent
            │   │   └── hipEvent.cpp
            │   ├── 2_Profiler
            │   │   └── Profiler.cpp
            │   ├── 7_streams
            │   │   └── stream.cpp
            │   └── 8_peer2peer
            │   │   └── peer2peer.cpp
            ├── MallocManaged.cpp
            ├── allocators.cu
            ├── axpy.cu
            ├── coalescing.cu
            ├── cudaRegister.cu
            ├── dynamic_shared_memory.cu
            ├── half2_allocators.cu
            ├── intro.cu
            ├── macro_check.cu
            ├── reduction.cu
            ├── square.cu
            ├── static_shared_memory.cu
            └── vec_add.cu
        └── synthetic
            ├── driver_defines.cu
            ├── driver_enums.cu
            ├── driver_functions.cu
            ├── driver_functions_internal.cu
            ├── driver_structs.cu
            ├── driver_typedefs.cu
            ├── driver_unions.cu
            ├── libraries
                ├── cublas2hipblas.cu
                ├── cublas2hipblas_v2.cu
                ├── cublas2rocblas.cu
                ├── cublas2rocblas_v2.cu
                ├── cublaslt2hipblaslt.cu
                ├── cublaslt2hipblaslt_10010_10020.cu
                ├── cudevice2hipdevice.cu
                ├── cudevice2hipdevice_12080.cu
                ├── cudevice2hipdevice_before_11080_after_12011.cu
                ├── cudnn2miopen.cu
                ├── cudnn2miopen_before_9000.cu
                ├── cufftXt2hipfftXt.cu
                ├── curand2hiprand.cu
                ├── curand2rocrand.cu
                ├── cusolver2hipsolver.cu
                ├── cusolver2rocsolver.cu
                ├── cusparse2hipsparse.cu
                ├── cusparse2hipsparse_11030_12000.cu
                ├── cusparse2hipsparse_12000.cu
                ├── cusparse2rocsparse.cu
                ├── cusparse2rocsparse_10000.cu
                ├── cusparse2rocsparse_10010.cu
                ├── cusparse2rocsparse_10010_12000.cu
                ├── cusparse2rocsparse_11010_12000.cu
                ├── cusparse2rocsparse_11030_12000.cu
                ├── cusparse2rocsparse_12000.cu
                ├── cusparse2rocsparse_7050.cu
                ├── cusparse2rocsparse_9020.cu
                ├── cusparse2rocsparse_9020_12000.cu
                ├── cusparse2rocsparse_before_11000.cu
                ├── cusparse2rocsparse_before_12000.cu
                └── cutensor2hiptensor.cu
            ├── nvrtc2hiprtc.cu
            ├── runtime_defines.cu
            ├── runtime_enums.cu
            ├── runtime_functions.cu
            ├── runtime_functions_11010.cu
            ├── runtime_functions_12000.cu
            ├── runtime_functions_9000.cu
            ├── runtime_structs.cu
            ├── runtime_typedefs.cu
            ├── runtime_unions.cu
            └── transforming_matchers.cu


/.azuredevops/rocm-ci.yml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   repositories:
 3 |   - repository: pipelines_repo
 4 |     type: github
 5 |     endpoint: ROCm
 6 |     name: ROCm/ROCm
 7 | 
 8 | variables:
 9 | - group: common
10 | - template: /.azuredevops/variables-global.yml@pipelines_repo
11 | 
12 | trigger:
13 |   batch: true
14 |   branches:
15 |     include:
16 |     - amd-staging
17 |     - amd-mainline
18 |   paths:
19 |     exclude:
20 |     - .github
21 |     - docs
22 |     - '.*.y*ml'
23 |     - '*.md'
24 |     - LICENSE.txt
25 | 
26 | pr:
27 |   autoCancel: true
28 |   branches:
29 |     include:
30 |     - amd-staging
31 |     - amd-mainline
32 |   paths:
33 |     exclude:
34 |     - .github
35 |     - docs
36 |     - '.*.y*ml'
37 |     - '*.md'
38 |     - LICENSE.txt
39 |   drafts: false
40 | 
41 | jobs:
42 |   - template: ${{ variables.CI_COMPONENT_PATH }}/HIPIFY.yml@pipelines_repo
43 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @searlmc1 @emankov
2 | 
3 | # Documentation files
4 | docs/ @ROCm/rocm-documentation @searlmc1 @emankov
5 | *.md @ROCm/rocm-documentation @searlmc1 @emankov
6 | *.rst @ROCm/rocm-documentation @searlmc1 @emankov
7 | .readthedocs.yaml @ROCm/rocm-documentation @searlmc1 @emankov
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/docs/sphinx" # Location of package manifests
10 |     open-pull-requests-limit: 10
11 |     schedule:
12 |       interval: "daily"
13 |     labels:
14 |       - "documentation"
15 |       - "dependencies"
16 |     reviewers:
17 |       - "samjwu"
18 | 


--------------------------------------------------------------------------------
/.github/workflows/markdownlint.yml:
--------------------------------------------------------------------------------
 1 | name: Markdownlint mdl Action
 2 | on: push
 3 | 
 4 | jobs:
 5 |   build:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |     - name: Check out code
 9 |       uses: actions/checkout@v3
10 |     - name: Use markdownlint
11 |       uses: actionshub/markdownlint@v3.1.3
12 |       with:
13 |         filesToIgnoreRegex: "README.md"
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.json
 2 | *.hip
 3 | **/__pycache__/*
 4 | .python-version
 5 | 
 6 | # documentation artifacts
 7 | build/
 8 | _build/
 9 | _images/
10 | _static/
11 | _templates/
12 | _toc.yml
13 | docBin/
14 | 


--------------------------------------------------------------------------------
/.mdlrc:
--------------------------------------------------------------------------------
1 | style "mdlrc-style.rb"
2 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | sphinx:
 7 |    configuration: docs/conf.py
 8 | 
 9 | formats: [htmlzip, pdf, epub]
10 | 
11 | python:
12 |    install:
13 |    - requirements: docs/sphinx/requirements.txt
14 | 
15 | build:
16 |    os: ubuntu-22.04
17 |    tools:
18 |       python: "3.10"
19 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2025 Advanced Micro Devices, Inc.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HIPIFY
 2 | 
 3 | HIPIFY is a set of tools that you can use to automatically translate CUDA source code into portable
 4 | [HIP](https://github.com/ROCm/HIP) C++.
 5 | 
 6 | ## Documentation
 7 | 
 8 | The published documentation is available at [HIPIFY](https://rocm.docs.amd.com/projects/HIPIFY/en/latest/index.html) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the `HIPIFY/docs` folder of this GitHub repository. As with all ROCm projects, the documentation is open source. For more information on contributing to the documentation, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html).
 9 | 
10 | To build our documentation locally, run the following code.
11 | 
12 | ```bash
13 | cd docs
14 | 
15 | pip3 install -r .sphinx/requirements.txt
16 | 
17 | python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
18 | ```
19 | 
20 | To build `CUDA2HIP` (CUDA APIs supported by HIP) documentation, run the following `hipify-clang`
21 | command. This builds the same content as
22 | [Supported CUDA APIs](./docs/reference/supported_apis.md#supported-cuda-apis).
23 | 
24 | ```bash
25 | hipify-clang --md --doc-format=full --doc-roc=joint
26 | 
27 | # Alternatively, you can use:
28 | 
29 | hipify-clang --md --doc-format=full --doc-roc=separate
30 | ```
31 | 
32 | To generate this documentation in CSV, use the `--csv` option instead of `--md`. Instead of using
33 | the `full` format, you can also build in `strict` or `compact` format.
34 | 
35 | To see all available options, use the `--help` or `--help-hidden` `hipify-clang` option.
36 | 


--------------------------------------------------------------------------------
/bin/findcode.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SEARCH_DIRS=$@
 4 | 
 5 | find $SEARCH_DIRS -name '*.cu' -o -name '*.CU'
 6 | find $SEARCH_DIRS -name '*.cpp' -o -name '*.cxx' -o -name '*.c' -o -name '*.cc'
 7 | find $SEARCH_DIRS -name '*.CPP' -o -name '*.CXX' -o -name '*.C' -o -name '*.CC'
 8 | find $SEARCH_DIRS -name '*.cuh' -o -name '*.CUH'
 9 | find $SEARCH_DIRS -name '*.h' -o -name '*.hpp' -o -name '*.hh' -o -name '*.inc' -o -name '*.inl' -o -name '*.hxx' -o -name '*.hdl'
10 | find $SEARCH_DIRS -name '*.H' -o -name '*.HPP' -o -name '*.HH' -o -name '*.INC' -o -name '*.INL' -o -name '*.HXX' -o -name '*.HDL'
11 | 


--------------------------------------------------------------------------------
/bin/findcode_custom.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | SEARCH_DIRS=$@
4 | 
5 | find $SEARCH_DIRS -name '*.cu' -and -not -name '*.cuh'
6 | find $SEARCH_DIRS -name '*.CU' -and -not -name '*.CUH'
7 | 


--------------------------------------------------------------------------------
/bin/findcode_headers.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | SEARCH_DIRS=$@
4 | 
5 | find $SEARCH_DIRS -name '*.cuh' -o -name '*.CUH'
6 | find $SEARCH_DIRS -name '*.h' -o -name '*.hpp' -o -name '*.hh' -o -name '*.inc' -o -name '*.inl' -o -name '*.hxx' -o -name '*.hdl'
7 | find $SEARCH_DIRS -name '*.H' -o -name '*.HPP' -o -name '*.HH' -o -name '*.INC' -o -name '*.INL' -o -name '*.HXX' -o -name '*.HDL'
8 | 


--------------------------------------------------------------------------------
/bin/findcode_sources.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | SEARCH_DIRS=$@
4 | 
5 | find $SEARCH_DIRS -name '*.cu' -and -not -name '*.cuh'
6 | find $SEARCH_DIRS -name '*.CU' -and -not -name '*.CUH'
7 | find $SEARCH_DIRS -name '*.cpp' -o -name '*.cxx' -o -name '*.c' -o -name '*.cc'
8 | find $SEARCH_DIRS -name '*.CPP' -o -name '*.CXX' -o -name '*.C' -o -name '*.CC'
9 | 


--------------------------------------------------------------------------------
/bin/finduncodep.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | SEARCH_DIR=$1
4 | 
5 | find $SEARCH_DIR -not -name '*.cu' -and -not -name '*.cpp' -and -not -name '*.cxx' -and -not -name '*.c' -and -not -name '*.cc' -and -not -name '*.cuh' -and -not -name '*.h' -and -not -name '*.hpp' -and -not -name '*.inc' -and -not -name '*.inl' -and -not -name '*.hxx' -and -not -name '*.hdl'
6 | 


--------------------------------------------------------------------------------
/bin/hipconvertinplace-perl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #usage : hipconvertinplace-perl.sh DIRNAME [-filter=all|headers|sources|custom] [hipify-perl options]
 4 | 
 5 | #hipify "inplace" all code files in specified directory.
 6 | # This can be quite handy when dealing with an existing CUDA code base since the script
 7 | # preserves the existing directory structure.
 8 | 
 9 | #  For each code file, this script will:
10 | #   - If ".prehip file does not exist, copy the original code to a new file with extension ".prehip". Then hipify the code file.
11 | #   - If ".prehip" file exists, this is used as input to hipify.
12 | # (this is useful for testing improvements to the hipify-perl toolset).
13 | 
14 | SCRIPT_DIR="$(dirname "$(realpath "$0")")"
15 | SCRIPT_NAME=findcode.sh
16 | SEARCH_DIR=$1
17 | if [ "$2" = "-filter=all" ]
18 | then
19 | shift
20 | elif [ "$2" = "-filter=headers" ]
21 | then
22 | SCRIPT_NAME=findcode_headers.sh
23 | shift
24 | elif [ "$2" = "-filter=sources" ]
25 | then
26 | SCRIPT_NAME=findcode_sources.sh
27 | shift
28 | elif [ "$2" = "-filter=custom" ]
29 | then
30 | SCRIPT_NANE=findcode_custom.sh
31 | shift
32 | fi
33 | shift
34 | 
35 | $SCRIPT_DIR/hipify-perl -inplace -print-stats "$@" `$SCRIPT_DIR/$SCRIPT_NAME $SEARCH_DIR`
36 | 


--------------------------------------------------------------------------------
/bin/hipconvertinplace.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #usage : hipconvertinplace.sh DIRNAME [hipify options] [--] [clang options]
 4 | 
 5 | #hipify "inplace" all code files in specified directory.
 6 | # This can be quite handy when dealing with an existing CUDA code base since the script
 7 | # preserves the existing directory structure.
 8 | 
 9 | SCRIPT_DIR="$(dirname "$(realpath "$0")")"
10 | BIN_DIR="$SCRIPT_DIR/../../bin"
11 | SEARCH_DIR=$1
12 | 
13 | hipify_args=''
14 | while (( "$#" )); do
15 |   shift
16 |   if [ "$1" != "--" ]; then
17 |     hipify_args="$hipify_args $1"
18 |   else
19 |     shift
20 |     break
21 |   fi
22 | done
23 | clang_args="$@"
24 | 
25 | $BIN_DIR/hipify-clang -inplace -print-stats $hipify_args `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` -- -x cuda $clang_args
26 | 


--------------------------------------------------------------------------------
/bin/hipexamine-perl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #usage : hipexamine-perl.sh DIRNAME [hipify-perl options]
 4 | 
 5 | # Generate HIP stats (LOC, CUDA->API conversions, missing functionality) for all the code files
 6 | # in the specified directory.
 7 | 
 8 | 
 9 | SCRIPT_DIR="$(dirname "$(realpath "$0")")"
10 | SEARCH_DIR=$1
11 | shift
12 | $SCRIPT_DIR/hipify-perl -no-output -print-stats "$@" `$SCRIPT_DIR/findcode.sh $SEARCH_DIR`
13 | 


--------------------------------------------------------------------------------
/bin/hipexamine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #usage : hipexamine.sh DIRNAME [hipify options] [--] [clang options]
 4 | 
 5 | # Generate CUDA->HIP conversion statistics for all the code files in the specified directory.
 6 | 
 7 | SCRIPT_DIR="$(dirname "$(realpath "$0")")"
 8 | BIN_DIR="$SCRIPT_DIR/../../bin"
 9 | SEARCH_DIR=$1
10 | 
11 | hipify_args=''
12 | while (( "$#" )); do
13 |   shift
14 |   if [ "$1" != "--" ]; then
15 |     hipify_args="$hipify_args $1"
16 |   else
17 |     shift
18 |     break
19 |   fi
20 | done
21 | clang_args="$@"
22 | 
23 | $BIN_DIR/hipify-clang -examine $hipify_args `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` -- -x cuda $clang_args
24 | 


--------------------------------------------------------------------------------
/docs/building/build-hipify-perl.rst:
--------------------------------------------------------------------------------
 1 | .. meta::
 2 |    :description: Tools to automatically translate CUDA source code into portable HIP C++
 3 |    :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl
 4 | 
 5 | .. _build-hipify-perl:
 6 | 
 7 | ===================
 8 | Building hipify-perl
 9 | ===================
10 | 
11 | ``hipify-perl`` is a perl-based script that heavily uses regular expressions, which is automatically generated from ``hipify-clang``. To generate ``hipify-perl``, run: 
12 | 
13 | .. code-block:: shell
14 |     
15 |     hipify-clang --perl
16 |     
17 | You can choose to specify the output directory for the generated ``hipify-perl`` file using ``--o-hipify-perl-dir`` option.
18 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | from rocm_docs import ROCmDocs
 8 | 
 9 | # for PDF output on Read the Docs
10 | project = "HIPIFY Documentation"
11 | author = "Advanced Micro Devices, Inc."
12 | copyright = "Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved."
13 | 
14 | external_toc_path = "./sphinx/_toc.yml"
15 | 
16 | docs_core = ROCmDocs("HIPIFY Documentation")
17 | docs_core.setup()
18 | 
19 | external_projects_current_project = "hipify"
20 | 
21 | for sphinx_var in ROCmDocs.SPHINX_VARS:
22 |     globals()[sphinx_var] = getattr(docs_core, sphinx_var)
23 | 


--------------------------------------------------------------------------------
/docs/data/csv_statistics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/csv_statistics.png


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_10.0.0_bug_47332.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_10.0.0_bug_47332.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_10.0.1_bug_47332.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_10.0.1_bug_47332.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_11.0.0_bug_47332.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_11.0.0_bug_47332.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_14.0.0_bug_54609.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.0_bug_54609.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_14.0.1_bug_54609.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.1_bug_54609.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_14.0.2_bug_54609.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.2_bug_54609.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_14.0.3_bug_54609.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.3_bug_54609.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_14.0.4_bug_54609.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.4_bug_54609.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_7.0.0_bug_38811.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_7.0.0_bug_38811.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_7.0.1_bug_38811.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_7.0.1_bug_38811.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_7.1.0_bug_38811.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_7.1.0_bug_38811.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_8.0.0_bug_38811.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_8.0.0_bug_38811.zip


--------------------------------------------------------------------------------
/docs/data/patches/patch_for_clang_8.0.1_bug_38811.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_8.0.1_bug_38811.zip


--------------------------------------------------------------------------------
/docs/how-to/hipify-perl.rst:
--------------------------------------------------------------------------------
 1 | .. meta::
 2 |    :description: Tools to automatically translate CUDA source code into portable HIP C++
 3 |    :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl
 4 | 
 5 | .. _hipify-perl:
 6 | 
 7 | ===================
 8 | Using hipify-perl
 9 | ===================
10 | 
11 | ``hipify-perl`` is perl-based script that heavily uses regular expressions, that is automatically generated from ``hipify-clang``.
12 | 
13 | **Advantages:**
14 | 
15 | - Ease of use
16 | - No checks for input source NVIDIA CUDA code for correctness required
17 | - No dependency on third party tools, including CUDA
18 | 
19 | **Disadvantages:**
20 | 
21 | - Inability or difficulty in implementing the following constructs:
22 | 
23 |   - Macros expansion
24 |   - Namespaces:
25 | 
26 |     - Redefinition of CUDA entities in user namespaces
27 |     - Using directive
28 | 
29 |   - Templates (some cases)
30 |   - Device or host function calls differentiation
31 |   - Correct injection of header files
32 |   - Parsing complicated argument lists
33 | 
34 | Example
35 | =======
36 | 
37 | For additional details on the following ``hipify-perl`` command options, see :ref:`hipify_perl-command`. For more advanced translation needs use ``hipify-clang`` as it is more comprehensive and accurate. 
38 | 
39 | Convert a simple CUDA file (``square.cu``) to HIP using ``hipify-perl``:
40 | 
41 | .. code-block:: shell
42 | 
43 |     hipify-perl square.cu -o square.cu.hip
44 | 
45 | This command translates the input file and writes the result to ``square.cu.hip``.
46 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. meta::
 2 |    :description: Tools to automatically translate CUDA source code into portable HIP C++
 3 |    :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl
 4 | 
 5 | .. _index:
 6 | 
 7 | =====================
 8 | HIPIFY documentation
 9 | =====================
10 | 
11 | HIPIFY is a ROCm tool to help developers migrate GPU programming from NVIDIA's CUDA language to AMD's HIP C++ programming language for use on AMD GPUs. HIPIFY includes two tools offering different levels of capability: 
12 | 
13 | •	``hipify-clang``: A clang-based tool that parses CUDA code and converts it to HIP code. It handles syntax changes, API calls, and kernel launch differences.
14 | •	``hipify-perl``: A simpler tool generated from ``hipify-clang`` that replaces CUDA API calls with HIP equivalents for basic code translation needs. ``hipify-perl`` is useful for simple CUDA programs, but offers less error detection when running into issues during translation. 
15 | 
16 | .. note::
17 |     
18 |     `hipify_torch <https://github.com/ROCm/hipify_torch>`_ is a related tool that also translates CUDA source code into portable HIP C++. It was developed as part of the PyTorch project to cater to the project's unique requirements, was found to be useful for PyTorch-related projects, and released as an independent utility.
19 | 
20 | HIPIFY does not automatically convert all CUDA code into HIP code seamlessly. While it is a powerful tool for translating CUDA code to HIP, there are some limitations and areas where manual intervention is often required. HIPIFY can automatically convert many CUDA  runtime API calls, kernel launch syntax, standard CUDA library functions where there is a HIP library equivalent, specific keywords like ``__global__`` and ``__device__``. However, HIP is not a complete replacement for CUDA, and HIPIFY cannot automatically translate all code. CUDA libraries, or third-party libraries that have no HIP equivalent cannot be translated. In addition, code which is optimized for performance on NVIDIA GPUs might require additional rework to optimize performance on AMD GPUs. 
21 | 
22 | After migrating code through HIPIFY, you should perform a code review to ensure functional correctness, replace any unsupported libraries or constructs with HIP or ROCm features. Debug and test the new HIP program, and optimize the performance on the target AMD GPUs. 
23 | 
24 | HIPIFY is open-source and freely available as part of the ROCm ecosystem. You can find the HIPIFY code on AMD's `GitHub HIPIFY repository <https://github.com/ROCm/HIPIFY>`_.
25 | 
26 | The documentation is structured as follows:
27 | 
28 | .. grid:: 2
29 |   :gutter: 3
30 | 
31 |   .. grid-item-card:: Building
32 | 
33 |     * :ref:`build-hipify-clang`
34 |     * :ref:`build-hipify-perl`
35 |     
36 |   .. grid-item-card:: How to
37 | 
38 |     * :doc:`Use hipify-clang <./how-to/hipify-clang>`
39 |     * :doc:`Use hipify-perl <./how-to/hipify-perl>`
40 |     
41 |   .. grid-item-card:: API reference
42 | 
43 |     * :ref:`hipify_clang-command`
44 |     * :ref:`hipify_perl-command`
45 |     * :doc:`Supported APIs <./reference/supported_apis>`
46 |      
47 | To contribute to the documentation, refer to
48 | `Contributing to ROCm  <https://rocm.docs.amd.com/en/latest/contribute/contributing.html>`_.
49 | 
50 | You can find licensing information on the `Licensing <https://rocm.docs.amd.com/en/latest/about/license.html>`_ page.
51 | 


--------------------------------------------------------------------------------
/docs/license.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | ```{include} ../LICENSE.txt
4 | ```
5 | 


--------------------------------------------------------------------------------
/docs/reference/hipify-perl-cmd.rst:
--------------------------------------------------------------------------------
 1 | .. meta::
 2 |    :description: Tools to automatically translate CUDA source code into portable HIP C++
 3 |    :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl
 4 | 
 5 | .. _hipify_perl-command:
 6 | 
 7 | **************************************************************************
 8 | hipify-perl command
 9 | **************************************************************************
10 | 
11 | For a list of ``hipify-perl`` options, run: 
12 | 
13 | .. code-block:: cpp
14 | 
15 |   hipify-perl --help
16 | 
17 | Output:
18 | =======
19 | 
20 | Usage
21 | -----
22 | 
23 | .. code-block:: cpp
24 | 
25 |   hipify-perl [options] <source0> [... <sourceN>]
26 | 
27 | Options
28 | -------
29 | 
30 | .. # COMMENT: The following lines define a break for use in the table below. 
31 | .. |br| raw:: html 
32 | 
33 |     <br />
34 | 
35 | .. list-table::
36 |     :widths: 2 5
37 | 
38 |     * - **Options**
39 |       - **Description**
40 | 
41 |     * - ``-cuda-kernel-execution-syntax`` 
42 |       - Keep CUDA kernel launch syntax (default)
43 | 
44 |     * - ``-examine``                      
45 |       - Combines ``-no-output`` and ``-print-stats`` options
46 | 
47 |     * - ``-exclude-dirs=<string>``               
48 |       - Exclude directories
49 | 
50 |     * - ``-exclude-files=<string>``              
51 |       - Exclude files
52 | 
53 |     * - ``-experimental``                 
54 |       - HIPIFY experimentally supported APIs
55 | 
56 |     * - ``-help``                         
57 |       - Display available options
58 | 
59 |     * - ``-hip-kernel-execution-syntax``  
60 |       - Transform CUDA kernel launch syntax to a regular HIP function call (overrides ``--cuda-kernel-execution-syntax``)
61 | 
62 |     * - ``-inplace``                      
63 |       - Backs up the input file in ``.prehip`` file, and modifies the input file in-place
64 | 
65 |     * - ``-no-output``                    
66 |       - Don't write any translated output to stdout
67 | 
68 |     * - ``-o=<string>``                          
69 |       - Output filename
70 | 
71 |     * - ``-print-stats``                  
72 |       - Print translation statistics as described in :ref:`hipify-stats`
73 | 
74 |     * - ``-quiet-warnings``                
75 |       - Don't print warnings on unknown CUDA identifiers
76 | 
77 |     * - ``-roc``                          
78 |       - Translate to ``roc`` libraries instead of ``hip`` libraries where possible
79 | 
80 |     * - ``-version``                      
81 |       - The supported HIP version
82 | 
83 |     * - ``-whitelist=<string>``                  
84 |       - Whitelist of identifiers
85 | 
86 | 


--------------------------------------------------------------------------------
/docs/reference/supported_apis.md:
--------------------------------------------------------------------------------
 1 | <head>
 2 |   <meta charset="UTF-8">
 3 |   <meta name="description" content="NVIDIA CUDA APIs supported by HIPIFY">
 4 | 
 5 |   <meta name="keywords" content="HIPIFY, ROCm, NVIDIA, CUDA, CUDA2HIP, hipify-clang, hipify-perl">
 6 | </head>
 7 | 
 8 | # Supported NVIDIA CUDA APIs
 9 | 
10 | |     **CUDA**     | **HIP**                                                           |                            **ROC**                        |                            **HIP & ROC**                                |
11 | |:-----------------|:------------------------------------------------------------------|:----------------------------------------------------------|:------------------------------------------------------------------------|
12 | | CUDA Runtime API | [HIP API](tables/CUDA_Runtime_API_functions_supported_by_HIP.md)  |                                                           |                                                                         |
13 | | CUDA Driver API  | [HIP API](tables/CUDA_Driver_API_functions_supported_by_HIP.md)   |                                                           |                                                                         |
14 | | CUComplex API    | [HIP API](tables/cuComplex_API_supported_by_HIP.md)               |                                                           |                                                                         |
15 | | CUDA Device API  | [HIP Device API](tables/CUDA_Device_API_supported_by_HIP.md)      |                                                           |                                                                         |
16 | | CUDA RTC API     | [HIP RTC API](tables/CUDA_RTC_API_supported_by_HIP.md)            |                                                           |                                                                         |
17 | | CUBLAS API       | [HIP BLAS API](tables/CUBLAS_API_supported_by_HIP.md)             | [ROC BLAS API](tables/CUBLAS_API_supported_by_ROC.md)     | [HIP + ROC BLAS API](tables/CUBLAS_API_supported_by_HIP_and_ROC.md)     |
18 | | CUSPARSE API     | [HIP SPARSE API](tables/CUSPARSE_API_supported_by_HIP.md)         | [ROC SPARSE API](tables/CUSPARSE_API_supported_by_ROC.md) | [HIP + ROC SPARSE API](tables/CUSPARSE_API_supported_by_HIP_and_ROC.md) |
19 | | CUSOLVER API     | [HIP SOLVER API](tables/CUSOLVER_API_supported_by_HIP.md)         |                                                           |                                                                         |
20 | | CURAND API       | [HIP RAND API](tables/CURAND_API_supported_by_HIP.md)             | [ROC RAND API](tables/CURAND_API_supported_by_ROC.md)     | [HIP + ROC RAND API](tables/CURAND_API_supported_by_HIP_and_ROC.md)     |
21 | | CUFFT API        | [HIP FFT API](tables/CUFFT_API_supported_by_HIP.md)               |                                                           |                                                                         |
22 | | CUDNN API        | [HIP DNN API](tables/CUDNN_API_supported_by_HIP.md)               | [MIOPEN API](tables/CUDNN_API_supported_by_MIOPEN.md)     | [HIP + MIOPEN API](tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md)     |
23 | | CUTENSOR API     | [HIP TENSOR API](tables/CUTENSOR_API_supported_by_HIP.md)         |                                                           |                                                                         |
24 | | CUB API          | [HIP CUB API](tables/CUB_API_supported_by_HIP.md)                 |                                                           |                                                                         |
25 | 
26 | To generate the above documentation with the information about all supported CUDA APIs in Markdown format, run `hipify-clang --md --doc-format=full` with or without specifying the output directory (`-o`), for HIP and ROC separately `--doc-roc=separate` or in the joint format (ROC & HIP) `--doc-roc=joint`.
27 | 


--------------------------------------------------------------------------------
/docs/reference/tables/CUB_API_supported_by_HIP.md:
--------------------------------------------------------------------------------
 1 | <head>
 2 |     <meta charset="UTF-8">
 3 |     <meta name="description" content="NVIDIA CUDA APIs supported by HIPIFY">
 4 |     <meta name="keywords" content="HIPIFY, HIP, ROCm, NVIDIA, CUDA, CUDA2HIP, hipification, hipify-clang, hipify-perl, CUB, hipCUB">
 5 | </head>
 6 | 
 7 | # CUB API supported by HIP
 8 | 
 9 | 
10 | **Note\:** In the tables that follow the columns marked `A`, `D`, `C`, `R`, and `E` mean the following:
11 | **A** - Added; **D** - Deprecated; **C** - Changed; **R** - Removed; **E** - Experimental
12 | 
13 | ## **1. CUB Data types**
14 | 
15 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|
16 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|
17 | |`CUB_ALIGN`| | | | | | | | | | |
18 | |`CUB_CAT`| | | | | | | | | | |
19 | |`CUB_CAT_`| | | | | | | | | | |
20 | |`CUB_COMPILER_DEPRECATION`| | | | | | | | | | |
21 | |`CUB_COMPILER_DEPRECATION_SOFT`| | | | | | | | | | |
22 | |`CUB_COMP_DEPR_IMPL`| | | | | | | | | | |
23 | |`CUB_COMP_DEPR_IMPL0`| | | | | | | | | | |
24 | |`CUB_COMP_DEPR_IMPL1`| | | | | | | | | | |
25 | |`CUB_CPLUSPLUS`| | | | | | | | | | |
26 | |`CUB_CPP_DIALECT`| | | | | | | | | | |
27 | |`CUB_DEFINE_DETECT_NESTED_TYPE`| | | | | | | | | | |
28 | |`CUB_DEFINE_VECTOR_TYPE`| | | | | | | | | | |
29 | |`CUB_DEPRECATED`| | | | | | | | | | |
30 | |`CUB_DEVICE_COMPILER`| | | | | | | | | | |
31 | |`CUB_DEVICE_COMPILER_CLANG`| | | | | | | | | | |
32 | |`CUB_DEVICE_COMPILER_GCC`| | | | | | | | | | |
33 | |`CUB_DEVICE_COMPILER_MSVC`| | | | | | | | | | |
34 | |`CUB_DEVICE_COMPILER_NVCC`| | | | | | | | | | |
35 | |`CUB_DEVICE_COMPILER_UNKNOWN`| | | | | | | | | | |
36 | |`CUB_HOST_COMPILER`| | | | | | | | | | |
37 | |`CUB_HOST_COMPILER_CLANG`| | | | | | | | | | |
38 | |`CUB_HOST_COMPILER_GCC`| | | | | | | | | | |
39 | |`CUB_HOST_COMPILER_MSVC`| | | | | | | | | | |
40 | |`CUB_HOST_COMPILER_UNKNOWN`| | | | | | | | | | |
41 | |`CUB_IGNORE_DEPRECATED_API`| | | | | | | | | | |
42 | |`CUB_IGNORE_DEPRECATED_COMPILER`| | | | | | | | | | |
43 | |`CUB_IGNORE_DEPRECATED_CPP_11`| | | | | | | | | | |
44 | |`CUB_IGNORE_DEPRECATED_CPP_DIALECT`| | | | | | | | | | |
45 | |`CUB_IGNORE_DEPRECATED_DIALECT`| | | | | | | | | | |
46 | |`CUB_INCLUDE_DEVICE_CODE`| | | | | | | | | | |
47 | |`CUB_INCLUDE_HOST_CODE`| | | | | | | | | | |
48 | |`CUB_IS_DEVICE_CODE`| | | | | | | | | | |
49 | |`CUB_IS_HOST_CODE`| | | | | | | | | | |
50 | |`CUB_LOG_SMEM_BANKS`| | | | | | | | | | |
51 | |`CUB_LOG_WARP_THREADS`| | | | | | | | | | |
52 | |`CUB_MAX`| | | | |`CUB_MAX`|4.5.0| | | | |
53 | |`CUB_MAX_DEVICES`| | | | | | | | | | |
54 | |`CUB_MIN`| | | | |`CUB_MIN`|4.5.0| | | | |
55 | |`CUB_MSVC_VERSION`| | | | | | | | | | |
56 | |`CUB_MSVC_VERSION_FULL`| | | | | | | | | | |
57 | |`CUB_NAMESPACE_BEGIN`| | | | |`BEGIN_HIPCUB_NAMESPACE`|2.5.0| | | | |
58 | |`CUB_NAMESPACE_END`| | | | |`END_HIPCUB_NAMESPACE`|2.5.0| | | | |
59 | |`CUB_PREFER_CONFLICT_OVER_PADDING`| | | | | | | | | | |
60 | |`CUB_PREVENT_MACRO_SUBSTITUTION`| | | | | | | | | | |
61 | |`CUB_PTX_ARCH`| | | | |`HIPCUB_ARCH`|2.5.0| | | | |
62 | |`CUB_PTX_LOG_SMEM_BANKS`| | | | | | | | | | |
63 | |`CUB_PTX_LOG_WARP_THREADS`| | | | | | | | | | |
64 | |`CUB_PTX_PREFER_CONFLICT_OVER_PADDING`| | | | | | | | | | |
65 | |`CUB_PTX_SMEM_BANKS`| | | | | | | | | | |
66 | |`CUB_PTX_SUBSCRIPTION_FACTOR`| | | | | | | | | | |
67 | |`CUB_PTX_WARP_THREADS`| | | | |`HIPCUB_WARP_THREADS`|2.5.0| | | | |
68 | |`CUB_QUOTIENT_CEILING`| | | | | | | | | | |
69 | |`CUB_QUOTIENT_FLOOR`| | | | | | | | | | |
70 | |`CUB_ROUND_DOWN_NEAREST`| | | | | | | | | | |
71 | |`CUB_ROUND_UP_NEAREST`| | | | | | | | | | |
72 | |`CUB_RUNTIME_ENABLED`| | | | | | | | | | |
73 | |`CUB_RUNTIME_FUNCTION`| | | | |`HIPCUB_RUNTIME_FUNCTION`|2.5.0| | | | |
74 | |`CUB_SMEM_BANKS`| | | | | | | | | | |
75 | |`CUB_STATIC_ASSERT`| | | | | | | | | | |
76 | |`CUB_STDERR`| | | | |`HIPCUB_STDERR`|2.5.0| | | | |
77 | |`CUB_SUBSCRIPTION_FACTOR`| | | | | | | | | | |
78 | |`CUB_USE_COOPERATIVE_GROUPS`| | | | | | | | | | |
79 | |`CubDebug`| | | | |`HipcubDebug`|2.5.0| | | | |
80 | |`CubDebugExit`| | | | | | | | | | |
81 | |`CubVector`| | | | | | | | | | |
82 | |`_CUB_ASM_PTR_`| | | | | | | | | | |
83 | |`_CUB_ASM_PTR_SIZE_`| | | | | | | | | | |
84 | |`_CubLog`| | | | |`_HipcubLog`|2.5.0| | | | |
85 | |`__CUB_ALIGN_BYTES`| | | | |`__HIPCUB_ALIGN_BYTES`|4.5.0| | | | |
86 | |`__CUB_LP64__`| | | | | | | | | | |
87 | 
88 | 


--------------------------------------------------------------------------------
/docs/reference/tables/CUDA_RTC_API_supported_by_HIP.md:
--------------------------------------------------------------------------------
 1 | <head>
 2 |     <meta charset="UTF-8">
 3 |     <meta name="description" content="NVIDIA CUDA APIs supported by HIPIFY">
 4 |     <meta name="keywords" content="HIPIFY, HIP, ROCm, NVIDIA, CUDA, CUDA2HIP, hipification, hipify-clang, hipify-perl, RTC, Runtime Compilation">
 5 | </head>
 6 | 
 7 | # CUDA RTC API supported by HIP
 8 | 
 9 | 
10 | **Note\:** In the tables that follow the columns marked `A`, `D`, `C`, `R`, and `E` mean the following:
11 | **A** - Added; **D** - Deprecated; **C** - Changed; **R** - Removed; **E** - Experimental
12 | 
13 | ## **1. RTC Data types**
14 | 
15 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|
16 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|
17 | |`NVRTC_ERROR_BUILTIN_OPERATION_FAILURE`| | | | |`HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE`|2.6.0| | | | |
18 | |`NVRTC_ERROR_CANCELLED`|12.8| | | | | | | | | |
19 | |`NVRTC_ERROR_COMPILATION`| | | | |`HIPRTC_ERROR_COMPILATION`|2.6.0| | | | |
20 | |`NVRTC_ERROR_INTERNAL_ERROR`|8.0| | | |`HIPRTC_ERROR_INTERNAL_ERROR`|2.6.0| | | | |
21 | |`NVRTC_ERROR_INVALID_INPUT`| | | | |`HIPRTC_ERROR_INVALID_INPUT`|2.6.0| | | | |
22 | |`NVRTC_ERROR_INVALID_OPTION`| | | | |`HIPRTC_ERROR_INVALID_OPTION`|2.6.0| | | | |
23 | |`NVRTC_ERROR_INVALID_PROGRAM`| | | | |`HIPRTC_ERROR_INVALID_PROGRAM`|2.6.0| | | | |
24 | |`NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID`|8.0| | | |`HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID`|2.6.0| | | | |
25 | |`NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION`|8.0| | | |`HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION`|2.6.0| | | | |
26 | |`NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION`|8.0| | | |`HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION`|2.6.0| | | | |
27 | |`NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED`|12.8| | | | | | | | | |
28 | |`NVRTC_ERROR_OUT_OF_MEMORY`| | | | |`HIPRTC_ERROR_OUT_OF_MEMORY`|2.6.0| | | | |
29 | |`NVRTC_ERROR_PCH_CREATE`|12.8| | | | | | | | | |
30 | |`NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED`|12.8| | | | | | | | | |
31 | |`NVRTC_ERROR_PROGRAM_CREATION_FAILURE`| | | | |`HIPRTC_ERROR_PROGRAM_CREATION_FAILURE`|2.6.0| | | | |
32 | |`NVRTC_ERROR_TIME_FILE_WRITE_FAILED`|12.1| | | | | | | | | |
33 | |`NVRTC_SUCCESS`| | | | |`HIPRTC_SUCCESS`|2.6.0| | | | |
34 | |`_nvrtcProgram`| | | | |`_hiprtcProgram`|2.6.0| | | | |
35 | |`nvrtcProgram`| | | | |`hiprtcProgram`|2.6.0| | | | |
36 | |`nvrtcResult`| | | | |`hiprtcResult`|2.6.0| | | | |
37 | 
38 | ## **2. RTC API functions**
39 | 
40 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|
41 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|
42 | |`nvrtcAddNameExpression`|8.0| | | |`hiprtcAddNameExpression`|2.6.0| | | | |
43 | |`nvrtcCompileProgram`| | |8.0| |`hiprtcCompileProgram`|2.6.0| | | | |
44 | |`nvrtcCreateProgram`| | |8.0| |`hiprtcCreateProgram`|2.6.0| | | | |
45 | |`nvrtcDestroyProgram`| | | | |`hiprtcDestroyProgram`|2.6.0| | | | |
46 | |`nvrtcGetCUBIN`|11.1| | | |`hiprtcGetBitcode`|5.3.0| | | | |
47 | |`nvrtcGetCUBINSize`|11.1| | | |`hiprtcGetBitcodeSize`|5.3.0| | | | |
48 | |`nvrtcGetErrorString`| | | | |`hiprtcGetErrorString`|2.6.0| | | | |
49 | |`nvrtcGetLTOIR`|12.0| | | | | | | | | |
50 | |`nvrtcGetLTOIRSize`|12.0| | | | | | | | | |
51 | |`nvrtcGetLoweredName`|8.0| | | |`hiprtcGetLoweredName`|2.6.0| | | | |
52 | |`nvrtcGetNVVM`|11.4|12.0| | | | | | | | |
53 | |`nvrtcGetNVVMSize`|11.4|12.0| | | | | | | | |
54 | |`nvrtcGetNumSupportedArchs`|11.2| | | | | | | | | |
55 | |`nvrtcGetOptiXIR`|12.0| | | | | | | | | |
56 | |`nvrtcGetOptiXIRSize`|12.0| | | | | | | | | |
57 | |`nvrtcGetPCHCreateStatus`|12.8| | | | | | | | | |
58 | |`nvrtcGetPCHHeapSize`|12.8| | | | | | | | | |
59 | |`nvrtcGetPCHHeapSizeRequired`|12.8| | | | | | | | | |
60 | |`nvrtcGetPTX`| | | | |`hiprtcGetCode`|2.6.0| | | | |
61 | |`nvrtcGetPTXSize`| | | | |`hiprtcGetCodeSize`|2.6.0| | | | |
62 | |`nvrtcGetProgramLog`| | | | |`hiprtcGetProgramLog`|2.6.0| | | | |
63 | |`nvrtcGetProgramLogSize`| | | | |`hiprtcGetProgramLogSize`|2.6.0| | | | |
64 | |`nvrtcGetSupportedArchs`|11.2| | | | | | | | | |
65 | |`nvrtcSetFlowCallback`|12.8| | | | | | | | | |
66 | |`nvrtcSetPCHHeapSize`|12.8| | | | | | | | | |
67 | |`nvrtcVersion`| | | | |`hiprtcVersion`|2.6.0| | | | |
68 | 
69 | 


--------------------------------------------------------------------------------
/docs/reference/tables/cuComplex_API_supported_by_HIP.md:
--------------------------------------------------------------------------------
 1 | <head>
 2 |     <meta charset="UTF-8">
 3 |     <meta name="description" content="NVIDIA CUDA APIs supported by HIPIFY">
 4 |     <meta name="keywords" content="HIPIFY, HIP, ROCm, NVIDIA, CUDA, CUDA2HIP, hipification, hipify-clang, hipify-perl, Runtime API, Complex">
 5 | </head>
 6 | 
 7 | # CUCOMPLEX API supported by HIP
 8 | 
 9 | 
10 | **Note\:** In the tables that follow the columns marked `A`, `D`, `C`, `R`, and `E` mean the following:
11 | **A** - Added; **D** - Deprecated; **C** - Changed; **R** - Removed; **E** - Experimental
12 | 
13 | ## **1. cuComplex Data types**
14 | 
15 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|
16 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|
17 | |`cuComplex`| | | | |`hipComplex`|1.6.0| | | | |
18 | |`cuDoubleComplex`| | | | |`hipDoubleComplex`|1.6.0| | | | |
19 | |`cuFloatComplex`| | | | |`hipFloatComplex`|1.6.0| | | | |
20 | 
21 | ## **2. cuComplex API functions**
22 | 
23 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**|
24 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:|
25 | |`cuCabs`| | | | |`hipCabs`|1.6.0| | | | |
26 | |`cuCabsf`| | | | |`hipCabsf`|1.6.0| | | | |
27 | |`cuCadd`| | | | |`hipCadd`|1.6.0| | | | |
28 | |`cuCaddf`| | | | |`hipCaddf`|1.6.0| | | | |
29 | |`cuCdiv`| | | | |`hipCdiv`|1.6.0| | | | |
30 | |`cuCdivf`| | | | |`hipCdivf`|1.6.0| | | | |
31 | |`cuCfma`| | | | |`hipCfma`|1.6.0| | | | |
32 | |`cuCfmaf`| | | | |`hipCfmaf`|1.6.0| | | | |
33 | |`cuCimag`| | | | |`hipCimag`|1.6.0| | | | |
34 | |`cuCimagf`| | | | |`hipCimagf`|1.6.0| | | | |
35 | |`cuCmul`| | | | |`hipCmul`|1.6.0| | | | |
36 | |`cuCmulf`| | | | |`hipCmulf`|1.6.0| | | | |
37 | |`cuComplexDoubleToFloat`| | | | |`hipComplexDoubleToFloat`|1.6.0| | | | |
38 | |`cuComplexFloatToDouble`| | | | |`hipComplexFloatToDouble`|1.6.0| | | | |
39 | |`cuConj`| | | | |`hipConj`|1.6.0| | | | |
40 | |`cuConjf`| | | | |`hipConjf`|1.6.0| | | | |
41 | |`cuCreal`| | | | |`hipCreal`|1.6.0| | | | |
42 | |`cuCrealf`| | | | |`hipCrealf`|1.6.0| | | | |
43 | |`cuCsub`| | | | |`hipCsub`|1.6.0| | | | |
44 | |`cuCsubf`| | | | |`hipCsubf`|1.6.0| | | | |
45 | |`make_cuComplex`| | | | |`make_hipComplex`|1.6.0| | | | |
46 | |`make_cuDoubleComplex`| | | | |`make_hipDoubleComplex`|1.6.0| | | | |
47 | |`make_cuFloatComplex`| | | | |`make_hipFloatComplex`|1.6.0| | | | |
48 | 
49 | 


--------------------------------------------------------------------------------
/docs/sphinx/_toc.yml.in:
--------------------------------------------------------------------------------
 1 | # Anywhere {branch} is used, the branch name will be substituted.
 2 | # These comments will also be removed.
 3 | defaults:
 4 |   numbered: False
 5 | root: index
 6 | subtrees:
 7 | - caption: Building
 8 |   entries:
 9 |   - file: building/building-hipify
10 |   - file: building/build-hipify-perl
11 | - caption: How to
12 |   entries:
13 |   - file: how-to/hipify-clang
14 |     title: Use hipify-clang
15 |   - file: how-to/hipify-perl
16 |     title: Use hipify-perl
17 | - caption: API reference
18 |   entries:
19 |   - file: reference/hipify-clang-cmd
20 |   - file: reference/hipify-perl-cmd
21 |   - file: reference/supported_apis
22 |     subtrees:
23 |     - entries:
24 |       - file: reference/tables/CUDA_Runtime_API_functions_supported_by_HIP     
25 |         title: CUDA Runtime functions
26 |       - file: reference/tables/CUDA_Driver_API_functions_supported_by_HIP
27 |         title: CUDA Driver functions
28 |       - file: reference/tables/cuComplex_API_supported_by_HIP
29 |         title: cuComplex API
30 |       - file: reference/tables/CUDA_Device_API_supported_by_HIP
31 |         title: CUDA Device API
32 |       - file: reference/tables/CUDA_RTC_API_supported_by_HIP
33 |         title: CUDA RTC API
34 |       - file: reference/tables/CUBLAS_API_supported_by_HIP
35 |         title: cuBLAS API
36 |       - file: reference/tables/CUSPARSE_API_supported_by_HIP
37 |         title: cuSPARSE
38 |       - file: reference/tables/CUSOLVER_API_supported_by_HIP
39 |         title: cuSOLVER API
40 |       - file: reference/tables/CURAND_API_supported_by_HIP      
41 |         title: cuRAND API
42 |       - file: reference/tables/CUFFT_API_supported_by_HIP
43 |         title: cuFFT API
44 |       - file: reference/tables/CUDNN_API_supported_by_HIP     
45 |         title: cuDNN API
46 |       - file: reference/tables/CUTENSOR_API_supported_by_HIP     
47 |         title: cuTENSOR API
48 |       - file: reference/tables/CUB_API_supported_by_HIP
49 |         title: CUB API
50 | - caption: About
51 |   entries:
52 |   - file: license
53 | 


--------------------------------------------------------------------------------
/docs/sphinx/requirements.in:
--------------------------------------------------------------------------------
1 | rocm-docs-core==1.20.0
2 | 


--------------------------------------------------------------------------------
/hipify-backward-compat.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
 2 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 3 | # of this software and associated documentation files (the "Software"), to deal
 4 | # in the Software without restriction, including without limitation the rights
 5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 6 | # copies of the Software, and to permit persons to whom the Software is
 7 | # furnished to do so, subject to the following conditions:
 8 | #
 9 | # The above copyright notice and this permission notice shall be included in
10 | # all copies or substantial portions of the Software.
11 | #
12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
18 | # THE SOFTWARE.
19 | 
20 | cmake_minimum_required(VERSION 3.16.8)
21 | 
22 | set(HIPIFY_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
23 | set(HIPIFY_WRAPPER_DIR ${HIPIFY_BUILD_DIR}/wrapper_dir)
24 | set(HIPIFY_WRAPPER_BIN_DIR ${HIPIFY_WRAPPER_DIR}/bin)
25 | 
26 | #With File Reorganization , hipify(and hip)  will be installed in /opt/rocm-ver
27 | #instead of  /opt/rocm-ver/hip/. For maintaining backward  compatibility
28 | # the previous location(/opt/rocm-ver/hip/) will have soft link.
29 | #This file is for creating  soft link to binary files and install it in the  previous location
30 | #Note: soft link added for binary files.
31 | 
32 | #function to create symlink to binaries
33 | function(create_binary_symlink)
34 |   file(MAKE_DIRECTORY ${HIPIFY_WRAPPER_BIN_DIR})
35 |   #create softlink for public scripts
36 |   file(GLOB binary_files ${CMAKE_SOURCE_DIR}/bin/hip*)
37 |   foreach(binary_file ${binary_files})
38 |     get_filename_component(file_name ${binary_file} NAME)
39 |     add_custom_target(link_${file_name} ALL
40 |                   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
41 |                   COMMAND ${CMAKE_COMMAND} -E create_symlink
42 |                   ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIPIFY_WRAPPER_BIN_DIR}/${file_name})
43 |   endforeach()
44 |   #create softlink for private scripts
45 |   file(GLOB binary_files ${CMAKE_SOURCE_DIR}/bin/find*)
46 |   foreach(binary_file ${binary_files})
47 |     get_filename_component(file_name ${binary_file} NAME)
48 |     add_custom_target(link_${file_name} ALL
49 |                   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
50 |                   COMMAND ${CMAKE_COMMAND} -E create_symlink
51 |                   ../../${CMAKE_INSTALL_LIBEXECDIR}/hipify/${file_name} ${HIPIFY_WRAPPER_BIN_DIR}/${file_name})
52 |   endforeach()
53 | 
54 |   #symlink for hipify-clang
55 |   set(file_name "hipify-clang")
56 |   add_custom_target(link_${file_name} ALL
57 |                   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
58 |                   COMMAND ${CMAKE_COMMAND} -E create_symlink
59 |                   ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIPIFY_WRAPPER_BIN_DIR}/${file_name})
60 | endfunction()
61 | # Create symlink to binaries
62 | create_binary_symlink()
63 | #install symlink in hip folder
64 | install(DIRECTORY ${HIPIFY_WRAPPER_BIN_DIR}  DESTINATION hip)
65 | 


--------------------------------------------------------------------------------
/mdlrc-style.rb:
--------------------------------------------------------------------------------
 1 | # Ref: https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md
 2 | all
 3 | 
 4 | # First header should be a top level header
 5 | exclude_rule 'MD002'
 6 | # Unordered list style
 7 | exclude_rule 'MD004'
 8 | # Unordered list indentation
 9 | exclude_rule 'MD007'
10 | # Trailing spaces
11 | exclude_rule 'MD009'
12 | # Multiple consecutive blank lines
13 | exclude_rule 'MD012'
14 | # Line length
15 | exclude_rule 'MD013'
16 | # No space after hash on atx style header
17 | exclude_rule 'MD018'
18 | # Headers should be surrounded by blank lines
19 | exclude_rule 'MD022'
20 | # Headers must start at the beginning of the line
21 | exclude_rule 'MD023'
22 | # Multiple headers with the same content
23 | exclude_rule 'MD024'
24 | # Multiple top level headers in the same document
25 | exclude_rule 'MD025'
26 | # Trailing punctuation in header
27 | exclude_rule 'MD026'
28 | # Ordered list item prefix
29 | exclude_rule 'MD029'
30 | # Fenced code blocks should be surrounded by blank lines
31 | exclude_rule 'MD031'
32 | # Lists should be surrounded by blank lines
33 | exclude_rule 'MD032'
34 | # Inline HTML
35 | exclude_rule 'MD033'
36 | # Bare URLs
37 | exclude_rule 'MD034'
38 | # Spaces inside emphasis markers
39 | exclude_rule 'MD037'
40 | # Fenced code blocks should have a language specified
41 | exclude_rule 'MD040'
42 | # First line in file should be a top level header
43 | exclude_rule 'MD041'
44 | # Code block style
45 | exclude_rule 'MD046'
46 | # File should end with a single newline character
47 | exclude_rule 'MD047'
48 | 


--------------------------------------------------------------------------------
/packaging/hipify-clang.postinst:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # Soft-link to bin files
 6 | 
 7 | case "$1" in
 8 |   abort-deconfigure|abort-remove|abort-upgrade)
 9 |     echo "$1"
10 |   ;;
11 |   configure)
12 |     mkdir -p @ROCMBINDIR@
13 |     CURRENTDIR=`pwd`
14 |     cd @ROCMBINDIR@
15 |         ln -r -s -f @HIPBINDIR@/hipify-perl @ROCMBINDIR@/hipify-perl
16 |         ln -r -s -f @HIPBINDIR@/hipify-clang @ROCMBINDIR@/hipify-clang
17 |         ln -r -s -f @HIPBINDIR@/hipconvertinplace-perl.sh @ROCMBINDIR@/hipconvertinplace-perl.sh
18 |         ln -r -s -f @HIPBINDIR@/hipconvertinplace.sh @ROCMBINDIR@/hipconvertinplace.sh
19 |         ln -r -s -f @HIPBINDIR@/hipexamine-perl.sh @ROCMBINDIR@/hipexamine-perl.sh
20 |         ln -r -s -f @HIPBINDIR@/hipexamine.sh @ROCMBINDIR@/hipexamine.sh
21 |         ln -r -s -f @HIPBINDIR@/findcode.sh @ROCMBINDIR@/findcode.sh
22 |         ln -r -s -f @HIPBINDIR@/findcode_headers.sh @ROCMBINDIR@/findcode_headers.sh
23 |         ln -r -s -f @HIPBINDIR@/findcode_sources.sh @ROCMBINDIR@/findcode_sources.sh
24 |         ln -r -s -f @HIPBINDIR@/findcode_custom.sh @ROCMBINDIR@/findcode_custom.sh
25 |         ln -r -s -f @HIPBINDIR@/finduncodep.sh @ROCMBINDIR@/finduncodep.sh
26 |     cd $CURRENTDIR
27 |   ;;
28 |   *)
29 |     exit 0
30 |   ;;
31 | esac
32 | 


--------------------------------------------------------------------------------
/packaging/hipify-clang.prerm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | HIPBINDIR=@CPACK_PACKAGING_INSTALL_PREFIX@/bin
 6 | ROCMBINDIR=@CPACK_PACKAGING_INSTALL_PREFIX@/../bin
 7 | 
 8 | # Remove soft-link to bin files
 9 | case "$1" in
10 |   purge)
11 |   ;;
12 |   remove | upgrade)
13 |     rm -f @ROCMBINDIR@/hipify-perl
14 |     rm -f @ROCMBINDIR@/hipify-clang
15 |     rm -f @ROCMBINDIR@/hipconvertinplace-perl.sh
16 |     rm -f @ROCMBINDIR@/hipconvertinplace.sh
17 |     rm -f @ROCMBINDIR@/hipexamine-perl.sh
18 |     rm -f @ROCMBINDIR@/hipexamine.sh
19 |     rm -f @ROCMBINDIR@/findcode.sh
20 |     rm -f @ROCMBINDIR@/findcode_headers.sh
21 |     rm -f @ROCMBINDIR@/findcode_sources.sh
22 |     rm -f @ROCMBINDIR@/findcode_custom.sh
23 |     rm -f @ROCMBINDIR@/finduncodep.sh
24 |     rmdir --ignore-fail-on-non-empty @ROCMBINDIR@
25 |   ;;
26 |   *)
27 |     exit 0
28 |   ;;
29 | esac
30 | 


--------------------------------------------------------------------------------
/packaging/hipify-clang.rpm_post:
--------------------------------------------------------------------------------
 1 | # Soft-link to bin files
 2 | 
 3 | mkdir -p @ROCMBINDIR@
 4 | CURRENTDIR=`pwd`
 5 | cd @ROCMBINDIR@
 6 | ln -r -s -f @HIPBINDIR@/hipify-perl @ROCMBINDIR@/hipify-perl
 7 | ln -r -s -f @HIPBINDIR@/hipify-clang @ROCMBINDIR@/hipify-clang
 8 | ln -r -s -f @HIPBINDIR@/hipconvertinplace-perl.sh @ROCMBINDIR@/hipconvertinplace-perl.sh
 9 | ln -r -s -f @HIPBINDIR@/hipconvertinplace.sh @ROCMBINDIR@/hipconvertinplace.sh
10 | ln -r -s -f @HIPBINDIR@/hipexamine-perl.sh @ROCMBINDIR@/hipexamine-perl.sh
11 | ln -r -s -f @HIPBINDIR@/hipexamine.sh @ROCMBINDIR@/hipexamine.sh
12 | ln -r -s -f @HIPBINDIR@/findcode.sh @ROCMBINDIR@/findcode.sh
13 | ln -r -s -f @HIPBINDIR@/findcode_headers.sh @ROCMBINDIR@/findcode_headers.sh
14 | ln -r -s -f @HIPBINDIR@/findcode_sources.sh @ROCMBINDIR@/findcode_sources.sh
15 | ln -r -s -f @HIPBINDIR@/findcode_custom.sh @ROCMBINDIR@/findcode_custom.sh
16 | ln -r -s -f @HIPBINDIR@/finduncodep.sh @ROCMBINDIR@/finduncodep.sh
17 | cd $CURRENTDIR
18 | 


--------------------------------------------------------------------------------
/packaging/hipify-clang.rpm_postun:
--------------------------------------------------------------------------------
 1 | if [ $1 -le 1 ]; then
 2 |   # perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
 3 |   rm -f @ROCMBINDIR@/hipify-perl
 4 |   rm -f @ROCMBINDIR@/hipify-clang
 5 |   rm -f @ROCMBINDIR@/hipconvertinplace-perl.sh
 6 |   rm -f @ROCMBINDIR@/hipconvertinplace.sh
 7 |   rm -f @ROCMBINDIR@/hipexamine-perl.sh
 8 |   rm -f @ROCMBINDIR@/hipexamine.sh
 9 |   rm -f @ROCMBINDIR@/findcode.sh
10 |   rm -f @ROCMBINDIR@/findcode_headers.sh
11 |   rm -f @ROCMBINDIR@/findcode_sources.sh
12 |   rm -f @ROCMBINDIR@/findcode_custom.sh
13 |   rm -f @ROCMBINDIR@/finduncodep.sh
14 |   rmdir --ignore-fail-on-non-empty @ROCMBINDIR@
15 |   rmdir --ignore-fail-on-non-empty @HIPBINDIR@
16 |   rmdir --ignore-fail-on-non-empty @CPACK_PACKAGING_INSTALL_PREFIX@
17 | fi
18 | 


--------------------------------------------------------------------------------
/src/ArgParse.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #pragma once
24 | 
25 | #include "clang/Tooling/CommonOptionsParser.h"
26 | #include "llvm/Support/CommandLine.h"
27 | 
28 | namespace cl = llvm::cl;
29 | namespace ct = clang::tooling;
30 | 
31 | extern cl::OptionCategory ToolTemplateCategory;
32 | extern cl::opt<std::string> OutputFilename;
33 | extern cl::opt<std::string> OutputHipifyPerlDir;
34 | extern cl::opt<std::string> OutputPythonMapDir;
35 | extern cl::opt<std::string> OutputDir;
36 | extern cl::opt<std::string> TemporaryDir;
37 | extern cl::opt<std::string> CudaPath;
38 | extern cl::opt<std::string> ClangResourceDir;
39 | extern cl::list<std::string> IncludeDirs;
40 | extern cl::list<std::string> MacroNames;
41 | extern cl::opt<bool> Inplace;
42 | extern cl::opt<bool> SaveTemps;
43 | extern cl::opt<bool> GeneratePerl;
44 | extern cl::opt<bool> GeneratePython;
45 | extern cl::opt<bool> Verbose;
46 | extern cl::opt<bool> NoBackup;
47 | extern cl::opt<bool> NoOutput;
48 | extern cl::opt<bool> PrintStats;
49 | extern cl::opt<bool> PrintStatsCSV;
50 | extern cl::opt<std::string> OutputStatsFilename;
51 | extern cl::opt<bool> Examine;
52 | extern cl::extrahelp CommonHelp;
53 | extern cl::opt<bool> TranslateToRoc;
54 | extern cl::opt<bool> TranslateToMIOpen;
55 | extern cl::opt<bool> DashDash;
56 | extern cl::opt<bool> SkipExcludedPPConditionalBlocks;
57 | extern cl::opt<bool> DefaultPreprocessor;
58 | extern cl::opt<std::string> CudaGpuArch;
59 | extern cl::opt<bool> GenerateMarkdown;
60 | extern cl::opt<bool> GenerateCSV;
61 | extern cl::opt<std::string> DocFormat;
62 | extern cl::opt<std::string> DocRoc;
63 | extern cl::opt<bool> Experimental;
64 | extern cl::opt<bool> CudaKernelExecutionSyntax;
65 | extern cl::opt<bool> HipKernelExecutionSyntax;
66 | extern const std::vector<std::string> hipifyOptions;
67 | extern const std::vector<std::string> hipifyOptionsWithTwoArgs;
68 | extern cl::opt<bool> Versions;
69 | extern cl::opt<bool> NoUndocumented;
70 | extern cl::opt<bool> NoWarningsUndocumented;
71 | extern cl::opt<bool> HipifyAMAP;
72 | 


--------------------------------------------------------------------------------
/src/CUDA2HIP_CAFFE2_API_functions.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include "CUDA2HIP.h"
24 | 
25 | // Maps the names of CUDA SPARSE API functions to the corresponding HIP functions
26 | const std::map<llvm::StringRef, hipCounter> CUDA_CAFFE2_FUNCTION_MAP {
27 |   {"cuda_stream",                                   {"hip_stream",                               "", CONV_LIB_FUNC, API_CAFFE2, 2}},
28 | };
29 | 
30 | const std::map<llvm::StringRef, cudaAPIversions> CUDA_CAFFE2_FUNCTION_VER_MAP {
31 | };
32 | 
33 | const std::map<llvm::StringRef, hipAPIversions> HIP_CAFFE2_FUNCTION_VER_MAP {
34 | };
35 | 
36 | const std::map<unsigned int, llvm::StringRef> CUDA_CAFFE2_API_SECTION_MAP {
37 |   {1, "CAFFE2 Data types"},
38 |   {2, "CAFFE2 Functions"},
39 | };
40 | 


--------------------------------------------------------------------------------
/src/CUDA2HIP_CAFFE2_API_types.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include "CUDA2HIP.h"
24 | 
25 | // Map of all types
26 | const std::map<llvm::StringRef, hipCounter> CUDA_CAFFE2_TYPE_NAME_MAP {
27 | 
28 |   // 5. Defines
29 |   {"REGISTER_CUDA_OPERATOR",                                  {"REGISTER_HIP_OPERATOR",                                        "", CONV_DEFINE, API_CAFFE2, 1}},
30 |   {"REGISTER_CUDA_OPERATOR_CREATOR",                          {"REGISTER_HIP_OPERATOR_CREATOR",                                "", CONV_DEFINE, API_CAFFE2, 1}},
31 | 
32 |   // 6. Classes
33 |   {"CUDAContext",                                             {"HIPContext",                                                   "", CONV_TYPE, API_CAFFE2, 1}},
34 | };
35 | 
36 | const std::map<llvm::StringRef, cudaAPIversions> CUDA_CAFFE2_TYPE_NAME_VER_MAP {
37 | };
38 | 
39 | const std::map<llvm::StringRef, hipAPIversions> HIP_CAFFE2_TYPE_NAME_VER_MAP {
40 | };
41 | 


--------------------------------------------------------------------------------
/src/CUDA2HIP_CUB_API_functions.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include "CUDA2HIP.h"
24 | 
25 | // Maps the names of CUDA CUB API functions to the corresponding HIP functions
26 | const std::map<llvm::StringRef, hipCounter> CUDA_CUB_FUNCTION_MAP {
27 | };
28 | 
29 | const std::map<llvm::StringRef, cudaAPIversions> CUDA_CUB_FUNCTION_VER_MAP {
30 | };
31 | 
32 | const std::map<llvm::StringRef, hipAPIversions> HIP_CUB_FUNCTION_VER_MAP {
33 | };
34 | 


--------------------------------------------------------------------------------
/src/CUDA2HIP_Complex_API_types.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include "CUDA2HIP.h"
24 | 
25 | // Maps the names of CUDA Complex API types to the corresponding HIP types
26 | const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_TYPE_NAME_MAP {
27 |   {"cuFloatComplex",  {"hipFloatComplex",  "rocblas_float_complex",  CONV_TYPE, API_COMPLEX, 1}},
28 |   {"cuDoubleComplex", {"hipDoubleComplex", "rocblas_double_complex", CONV_TYPE, API_COMPLEX, 1}},
29 |   {"cuComplex",       {"hipComplex",       "rocblas_float_complex",  CONV_TYPE, API_COMPLEX, 1}},
30 | };
31 | 
32 | const std::map<llvm::StringRef, cudaAPIversions> CUDA_COMPLEX_TYPE_NAME_VER_MAP {
33 | };
34 | 
35 | const std::map<llvm::StringRef, hipAPIversions> HIP_COMPLEX_TYPE_NAME_VER_MAP {
36 |   {"hipFloatComplex", {HIP_1060, HIP_0,    HIP_0   }},
37 |   {"hipDoubleComplex",{HIP_1060, HIP_0,    HIP_0   }},
38 |   {"hipComplex",      {HIP_1060, HIP_0,    HIP_0   }},
39 | };
40 | 


--------------------------------------------------------------------------------
/src/CUDA2HIP_Scripting.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | */
 22 | 
 23 | #pragma once
 24 | 
 25 | #include <map>
 26 | #include <vector>
 27 | #include <string>
 28 | 
 29 | namespace hipify {
 30 | 
 31 |   enum CastTypes {
 32 |     e_HIP_SYMBOL,
 33 |     e_reinterpret_cast,
 34 |     e_reinterpret_cast_size_t,
 35 |     e_const_cast_char_pp,
 36 |     e_static_cast_uint64_t,
 37 |     e_int32_t,
 38 |     e_int64_t,
 39 |     e_remove_argument,
 40 |     e_add_const_argument,
 41 |     e_add_var_argument,
 42 |     e_move_argument,
 43 |     e_replace_argument_with_const,
 44 |   };
 45 | 
 46 |   enum OverloadTypes {
 47 |     ot_arguments_number,
 48 |   };
 49 | 
 50 |   enum CastWarning {
 51 |     cw_None,
 52 |     cw_DataLoss,
 53 |   };
 54 | 
 55 |   enum OverloadWarning {
 56 |     ow_None,
 57 |   };
 58 | 
 59 |   struct CastInfo {
 60 |     CastTypes castType;
 61 |     CastWarning castWarn;
 62 |     std::string constValToAddOrReplace = "";
 63 |     unsigned moveOrCopyTo = 0;
 64 |     unsigned numberToMoveOrCopy = 1;
 65 |   };
 66 | 
 67 |   typedef std::map<unsigned, CastInfo> ArgCastMap;
 68 | 
 69 |   struct ArgCastStruct {
 70 |     ArgCastMap castMap;
 71 |     bool isToRoc = false;
 72 |     bool isToMIOpen = false;
 73 |   };
 74 | 
 75 |   struct OverloadInfo {
 76 |     hipCounter counter;
 77 |     OverloadTypes overloadType;
 78 |     OverloadWarning overloadWarn;
 79 |   };
 80 | 
 81 |   typedef std::map<unsigned, OverloadInfo> OverloadMap;
 82 | 
 83 |   struct FuncOverloadsStruct {
 84 |     OverloadMap overloadMap;
 85 |     bool isToRoc = false;
 86 |     bool isToMIOpen = false;
 87 |   };
 88 | }
 89 | 
 90 | extern std::string getCastType(hipify::CastTypes c);
 91 | extern std::map<std::string, std::vector<hipify::ArgCastStruct>> FuncArgCasts;
 92 | 
 93 | extern std::map<std::string, hipify::FuncOverloadsStruct> FuncOverloads;
 94 | 
 95 | extern std::map<std::string, std::string> TypeOverloads;
 96 | 
 97 | namespace perl {
 98 | 
 99 |   bool generate(bool Generate = true);
100 | }
101 | 
102 | namespace python {
103 | 
104 |   bool generate(bool Generate = true);
105 | }
106 | 
107 | namespace doc {
108 | 
109 |   bool generate(bool GenerateMD = true, bool GenerateCSV = true);
110 | }
111 | 


--------------------------------------------------------------------------------
/src/LLVMCompat.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | */
 22 | 
 23 | #pragma once
 24 | 
 25 | #include <clang/Tooling/Core/Replacement.h>
 26 | #include <clang/Tooling/Refactoring.h>
 27 | #include <llvm/Support/Signals.h>
 28 | #include <clang/Lex/Token.h>
 29 | #include <clang/Lex/Preprocessor.h>
 30 | 
 31 | namespace ct = clang::tooling;
 32 | 
 33 | extern const std::string sHipify, sConflict, sError, sWarning;
 34 | 
 35 | // Things for papering over the differences between different LLVM versions.
 36 | 
 37 | namespace llcompat {
 38 | /**
 39 |   * The getNumArgs function on macros was rather unhelpfully renamed in clang 4.0. Its semantics
 40 |   * remain unchanged, so let's be slightly ugly about it here. :D
 41 |   */
 42 | #if LLVM_VERSION_MAJOR > 4
 43 |   #define GET_NUM_ARGS() getNumParams()
 44 | #else
 45 |   #define GET_NUM_ARGS() getNumArgs()
 46 | #endif
 47 | 
 48 | #if LLVM_VERSION_MAJOR < 7
 49 |   #define LLVM_DEBUG(X) DEBUG(X)
 50 | #endif
 51 | 
 52 | clang::SourceLocation getBeginLoc(const clang::Stmt *stmt);
 53 | clang::SourceLocation getBeginLoc(const clang::TypeLoc &typeLoc);
 54 | 
 55 | clang::SourceLocation getEndLoc(const clang::Stmt *stmt);
 56 | clang::SourceLocation getEndLoc(const clang::TypeLoc &typeLoc);
 57 | 
 58 | void PrintStackTraceOnErrorSignal();
 59 | 
 60 | using namespace llvm;
 61 | 
 62 | /**
 63 |   * Get the replacement map for a given filename in a RefactoringTool.
 64 |   *
 65 |   * Older LLVM versions don't actually support multiple filenames, so everything all gets
 66 |   * smushed together. It is the caller's responsibility to cope with this.
 67 |   */
 68 | ct::Replacements &getReplacements(ct::RefactoringTool &Tool, StringRef file);
 69 | 
 70 | /**
 71 |   * Add a Replacement to a Replacements.
 72 |   */
 73 | void insertReplacement(ct::Replacements &replacements, const ct::Replacement &rep);
 74 | 
 75 | /**
 76 |   * Version-agnostic version of Preprocessor::EnterTokenStream().
 77 |   */
 78 | void EnterPreprocessorTokenStream(clang::Preprocessor &_pp,
 79 |                                   const clang::Token *start,
 80 |                                   size_t len,
 81 |                                   bool DisableMacroExpansion);
 82 | 
 83 | std::error_code real_path(const Twine &path, SmallVectorImpl<char> &output,
 84 |                           bool expand_tilde = false);
 85 | 
 86 | bool pragma_once_outside_header();
 87 | 
 88 | void RetainExcludedConditionalBlocks(clang::CompilerInstance &CI);
 89 | 
 90 | bool CheckCompatibility();
 91 | 
 92 | clang::SourceLocation getEndOfExpansionRangeForLoc(const clang::SourceManager &SM, const clang::SourceLocation &loc);
 93 | 
 94 | #if LLVM_VERSION_MAJOR >= 12
 95 |   typedef MemoryBufferRef Memory_Buffer;
 96 | #else
 97 |   typedef const MemoryBuffer *Memory_Buffer;
 98 | #endif
 99 | 
100 | Memory_Buffer getMemoryBuffer(const clang::SourceManager &SM);
101 | 
102 | void addTargetIfNeeded(ct::RefactoringTool &Tool);
103 | 
104 | const clang::IdentifierInfo *getControllingMacro(clang::CompilerInstance &CI);
105 | 
106 | } // namespace llcompat
107 | 


--------------------------------------------------------------------------------
/src/ReplacementsFrontendActionFactory.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #pragma once
24 | 
25 | #include "clang/Tooling/Tooling.h"
26 | #include "clang/Frontend/FrontendAction.h"
27 | #include "clang/Tooling/Core/Replacement.h"
28 | 
29 | namespace ct = clang::tooling;
30 | 
31 | /**
32 |   * A FrontendActionFactory that propagates a set of Replacements into the FrontendAction.
33 |   * This is necessary boilerplate for using a custom FrontendAction with a RefactoringTool.
34 |   *
35 |   * @tparam T The FrontendAction to create.
36 |   */
37 | template <typename T>
38 | class ReplacementsFrontendActionFactory : public ct::FrontendActionFactory {
39 |   ct::Replacements *replacements;
40 | 
41 | public:
42 |   explicit ReplacementsFrontendActionFactory(ct::Replacements *r):
43 |     ct::FrontendActionFactory(),
44 |     replacements(r) {}
45 | 
46 | #if LLVM_VERSION_MAJOR < 10
47 |   clang::FrontendAction *create() override {
48 |     return new T(replacements);
49 |   }
50 | #else
51 |   std::unique_ptr <clang::FrontendAction> create() override {
52 |     return std::unique_ptr<clang::FrontendAction>(new T(replacements));
53 |   }
54 | #endif
55 | };
56 | 


--------------------------------------------------------------------------------
/src/StringUtils.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include "StringUtils.h"
24 | #include "LLVMCompat.h"
25 | #include "llvm/ADT/SmallString.h"
26 | 
27 | using namespace llvm;
28 | 
29 | llvm::StringRef unquoteStr(llvm::StringRef s) {
30 |   if (s.size() > 1 && s.front() == '"' && s.back() == '"')
31 |     return s.substr(1, s.size() - 2);
32 |   return s;
33 | }
34 | 
35 | void removePrefixIfPresent(std::string &s, const std::string &prefix) {
36 |   if (s.find(prefix) != 0) return;
37 |   s.erase(0, prefix.size());
38 | }
39 | 
40 | std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC) {
41 |   if (sFile.empty())
42 |     return sFile;
43 |   if (!sys::fs::exists(sFile)) {
44 |     llvm::errs() << "\n" << sHipify << sError << "source file: " << sFile << " doesn't exist\n";
45 |     EC = std::error_code(static_cast<int>(std::errc::no_such_file_or_directory), std::generic_category());
46 |     return "";
47 |   }
48 |   SmallString<256> fileAbsPath;
49 |   EC = llcompat::real_path(sFile, fileAbsPath, true);
50 |   if (EC) {
51 |     llvm::errs() << "\n" << sHipify << sError << EC.message() << ": source file: " << sFile << "\n";
52 |     return "";
53 |   }
54 |   EC = std::error_code();
55 |   return fileAbsPath.c_str();
56 | }
57 | 
58 | std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC,
59 |   const std::string &sDirType, bool bCreateDir) {
60 |   if (sDir.empty())
61 |     return sDir;
62 |   EC = std::error_code();
63 |   SmallString<256> dirAbsPath;
64 |   if (sys::fs::exists(sDir)) {
65 |     if (sys::fs::is_regular_file(sDir)) {
66 |       llvm::errs() << "\n" << sHipify << sError << sDir << " is not a directory\n";
67 |       EC = std::error_code(static_cast<int>(std::errc::not_a_directory), std::generic_category());
68 |       return "";
69 |     }
70 |   } else {
71 |     if (bCreateDir) {
72 |       EC = sys::fs::create_directory(sDir);
73 |       if (EC) {
74 |         llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n";
75 |         return "";
76 |       }
77 |     }
78 |     else {
79 |       llvm::errs() << "\n" << sHipify << sError << sDirType << " directory: " << sDir << " doesn't exist\n";
80 |       EC = std::error_code(static_cast<int>(std::errc::no_such_file_or_directory), std::generic_category());
81 |       return "";
82 |     }
83 |   }
84 |   EC = llcompat::real_path(sDir, dirAbsPath, true);
85 |   if (EC) {
86 |     llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n";
87 |     return "";
88 |   }
89 |   return dirAbsPath.c_str();
90 | }
91 | 


--------------------------------------------------------------------------------
/src/StringUtils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #pragma once
24 | 
25 | #include <string>
26 | #include <system_error>
27 | #include "llvm/ADT/StringRef.h"
28 | 
29 | /**
30 |   * Remove double-quotes from the start/end of a string, if present.
31 |   */
32 | llvm::StringRef unquoteStr(llvm::StringRef s);
33 | 
34 | /**
35 |   * If `s` starts with `prefix`, remove it. Otherwise, does nothing.
36 |   */
37 | void removePrefixIfPresent(std::string &s, const std::string &prefix);
38 | 
39 | /**
40 |   * Returns Absolute File Path based on filename, otherwise - error.
41 |   */
42 | std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC);
43 | 
44 | /**
45 |   * Returns Absolute Directory Path based on directory name, otherwise - error;
46 |   * by default the directory is temporary and created.
47 |   */
48 | std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC,
49 |   const std::string &sDirType = "temporary", bool bCreateDir = true);
50 | 


--------------------------------------------------------------------------------
/tests/lit.site.cfg.in:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | config.hipify_install_path = "@CMAKE_INSTALL_PREFIX@"
 5 | config.hipify_clang_tests_only = "@HIPIFY_CLANG_TESTS_ONLY@"
 6 | config.pointer_size = @CMAKE_SIZEOF_VOID_P@
 7 | config.llvm_version = "@LLVM_PACKAGE_VERSION@"
 8 | config.llvm_version_major = int("@LLVM_VERSION_MAJOR@")
 9 | config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@"
10 | config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
11 | config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@"
12 | config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@"
13 | config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@"
14 | config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@"
15 | config.cuda_tensor_root = "@CUDA_TENSOR_ROOT_DIR@"
16 | config.cuda_version_major = int("@CUDA_VERSION_MAJOR@")
17 | config.cuda_version_minor = int("@CUDA_VERSION_MINOR@")
18 | config.cuda_version_full = "@CUDA_VERSION_FULL@"
19 | config.cuda_version = "@CUDA_VERSION@"
20 | config.clang_resource_dir = "@HIPIFY_CLANG_RES@"
21 | if sys.platform in ['win32']:
22 |     config.build_type = "@CMAKE_BUILD_TYPE@"
23 |     if not config.build_type:
24 |         config.build_type = "Debug"
25 | 
26 | config.cudnn_version_major = int("0")
27 | if config.cuda_dnn_root and config.cuda_dnn_root != "OFF":
28 |     cudnn_ver_file = config.cuda_dnn_root + '/include/cudnn_version.h';
29 |     if os.path.isfile(cudnn_ver_file):
30 |         with open(cudnn_ver_file) as f:
31 |             if 'CUDNN_MAJOR 9' in f.read():
32 |                 config.cudnn_version_major = int("9")
33 | 
34 | # Support substitution of the tools and libs dirs with user parameters. This is
35 | # used when we can't determine the tool dir at configuration time.
36 | try:
37 |     config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
38 |     config.obj_root = config.obj_root % lit_config.params
39 | except KeyError:
40 |     e = sys.exc_info()[1]
41 |     key, = e.args
42 |     lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
43 | 


--------------------------------------------------------------------------------
/tests/run_test.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | setlocal
 3 | 
 4 | for %%i in (FileCheck.exe) do set FILE_CHECK=%%~$PATH:i
 5 | if not defined FILE_CHECK (echo      Error: FileCheck.exe not found in PATH. && exit /b 1)
 6 | 
 7 | set HIPIFY=%1
 8 | set IN_FILE=%2
 9 | set TMP_FILE=%3
10 | set CUDA_ROOT=%4
11 | set CLANG_RES=%5
12 | set NUM=%6
13 | set all_args=%*
14 | 
15 | if %NUM% EQU 1 (
16 |   set HIPIFY_OPTS=%7
17 |   call set clang_args=%%all_args:*%7=%%
18 | ) else if %NUM% EQU 2 (
19 |   set HIPIFY_OPTS=%7 %8
20 |   call set clang_args=%%all_args:*%8=%%
21 | ) else if %NUM% EQU 3 (
22 |   set HIPIFY_OPTS=%7 %8 %9
23 |   shift
24 |   call set clang_args=%%all_args:*%9=%%
25 | ) else if %NUM% EQU 4 (
26 |   shift
27 |   call set HIPIFY_OPTS=%%6 %%7 %%8 %%9
28 | ) else if %NUM% EQU 5 (
29 |   shift
30 |   shift
31 |   call set HIPIFY_OPTS=%%5 %%6 %%7 %%8 %%9
32 | ) else (
33 |   set clang_args=%%all_args:*%6=%%
34 |   set NUM=0
35 | )
36 | if %NUM% EQU 4 (
37 |   shift
38 |   call set clang_args=%%all_args:*%9=%%
39 | )
40 | if %NUM% EQU 5 (
41 |   shift
42 |   call set clang_args=%%all_args:*%9=%%
43 | )
44 | 
45 | set test_dir=%~dp2
46 | set "test_dir=%test_dir:\=/%"
47 | 
48 | set compile_commands=compile_commands.json
49 | set json_in=%test_dir%%compile_commands%.in
50 | set json_out=%test_dir%%compile_commands%
51 | 
52 | if exist %json_in% (
53 |   powershell -Command "(gc %json_in%) -replace '<test dir>', '%test_dir%' -replace '<CUDA dir>', '%CUDA_ROOT%' | Out-File -encoding ASCII %json_out%"
54 |   set hipify_cmd=%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% --clang-resource-directory=%CLANG_RES% %HIPIFY_OPTS% -p=%test_dir% %HIPIFY_OPTS% -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH=1
55 | ) else (
56 |   set hipify_cmd=%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% --clang-resource-directory=%CLANG_RES% %HIPIFY_OPTS% -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH=1 -- %clang_args%
57 | )
58 | 
59 | SET CUDA=%CUDA_ROOT:~13,-1%
60 | 
61 | echo [HIPIFY] CUDA directory      : %CUDA%
62 | echo [HIPIFY] clang res directory : %CLANG_RES%
63 | echo [HIPIFY] hipify options count: %NUM%
64 | echo [HIPIFY] hipify options      : %HIPIFY_OPTS%
65 | echo [HIPIFY] clang  options      : %clang_args%
66 | echo [HIPIFY] hipify-clang command: %hipify_cmd%
67 | 
68 | call %hipify_cmd%
69 | 
70 | if errorlevel 1 (echo      Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%)
71 | 
72 | findstr /v /r /c:"[ ]*//[ ]*[CHECK*|RUN]" %TMP_FILE% | %FILE_CHECK% %IN_FILE%
73 | if errorlevel 1 (echo      Error: FileCheck.exe failed with exit code: %errorlevel% && exit /b %errorlevel%)
74 | 


--------------------------------------------------------------------------------
/tests/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | 
 5 | # Run a single LIT test file in a magical way that preserves colour output, to work around
 6 | # a known flaw in lit.
 7 | 
 8 | # Capture lit substitutions
 9 | HIPIFY=$1
10 | IN_FILE=$2
11 | TMP_FILE=$3
12 | CUDA_ROOT=$4
13 | CLANG_RES=$5
14 | NUM=$6
15 | HIPIFY_OPTS=""
16 | 
17 | if [ $NUM = "1" ]
18 | then
19 | HIPIFY_OPTS=$7
20 | shift 7
21 | elif [ $NUM = "2" ]
22 | then
23 | HIPIFY_OPTS="$7 $8"
24 | shift 8
25 | elif [ $NUM = "3" ]
26 | then
27 | HIPIFY_OPTS="$7 $8 $9"
28 | shift 9
29 | elif [ $NUM = "4" ]
30 | then
31 | HIPIFY_OPTS="$7 $8 $9 ${10}"
32 | shift 10
33 | elif [ $NUM = "5" ]
34 | then
35 | HIPIFY_OPTS="$7 $8 $9 ${10} ${11}"
36 | shift 11
37 | fi
38 | 
39 | test_dir=${IN_FILE%/*}
40 | 
41 | compile_commands=compile_commands.json
42 | json_out=${test_dir}/${compile_commands}
43 | json_in=${json_out}.in
44 | 
45 | # Remaining args are the ones to forward to clang proper.
46 | 
47 | if [ -e $json_in ]
48 | then
49 | cp $json_in $json_out
50 | sed -i -e "s|<test dir>|${test_dir}|g; s|<CUDA dir>|${CUDA_ROOT}|g" $json_out
51 | $HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT --clang-resource-directory=$CLANG_RES $HIPIFY_OPTS -p=$test_dir $HIPIFY_OPTS && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE
52 | else
53 | $HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT --clang-resource-directory=$CLANG_RES $HIPIFY_OPTS -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE
54 | fi
55 | 


--------------------------------------------------------------------------------
/tests/unit_tests/compilation_database/compile_commands.json.in:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "directory": "<test dir>",
4 |     "command": "hipify-clang \"<CUDA dir>\" -I./include -v",
5 |     "file": "cd_intro.cu"
6 |   }
7 | ]
8 | 


--------------------------------------------------------------------------------
/tests/unit_tests/compilation_database/include/inc.h:
--------------------------------------------------------------------------------
1 | #define K_THREADS 64
2 | #define K_INDEX() ((gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x)
3 | #define RND() ((rand() & 0x7FFF) / float(0x8000))
4 | #define ERRORCHECK() cErrorCheck(__FILE__, __LINE__)
5 | 


--------------------------------------------------------------------------------
/tests/unit_tests/device/math_functions.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones.
 3 | // ToDo: change lit testing in order to parse the output.
 4 | 
 5 | #define LEN 1024
 6 | #define SIZE LEN * sizeof(float)
 7 | // CHECK: #include <hip/hip_runtime.h>
 8 | #include <algorithm>
 9 | 
10 | namespace my {
11 |   // user defined function
12 |   unsigned int umin(unsigned int arg1, unsigned int arg2) {
13 |     return (arg1 < arg2) ? arg1 : arg2;
14 |   }
15 |   // user defined function
16 |   unsigned int umax(unsigned int arg1, unsigned int arg2) {
17 |     return (arg1 > arg2) ? arg1 : arg2;
18 |   }
19 | }
20 | 
21 | __global__ void uint_arithm(float* A, float* B, float* C, unsigned int u1, unsigned int u2)
22 | {
23 |   // device function call (warn if unsupported)
24 |   unsigned int _umin = umin ( u1, u2 );
25 |   // device function call (warn if unsupported)
26 |   unsigned int _umax = umax ( u1, u2 );
27 |   // device function call (warn if unsupported)
28 |   unsigned int _umin_global = ::umin ( u1, u2 );
29 |   // device function call (warn if unsupported)
30 |   unsigned int _umax_global = ::umax(u1, u2);
31 |   if (_umin != _umin_global) return;
32 |   if (_umax != _umax_global) return;
33 |   int i = threadIdx.x;
34 |   A[i] = i + _umin;
35 |   B[i] = i + _umax;
36 |   C[i] = A[i] + B[i];
37 | }
38 | 
39 | int main() {
40 |   unsigned int u1 = 33;
41 |   unsigned int u2 = 34;
42 |   // user defined function call
43 |   unsigned int _min = my::umin(u1, u2);
44 |   // user defined function call
45 |   unsigned int _max = my::umax(u1, u2);
46 |   float *A, *B, *C;
47 |   // CHECK: hipMalloc((void**)&A, SIZE);
48 |   cudaMalloc((void**)&A, SIZE);
49 |   // CHECK: hipMalloc((void**)&B, SIZE);
50 |   cudaMalloc((void**)&B, SIZE);
51 |   // CHECK: hipMalloc((void**)&C, SIZE);
52 |   cudaMalloc((void**)&C, SIZE);
53 |   dim3 dimGrid(LEN / 512, 1, 1);
54 |   dim3 dimBlock(512, 1, 1);
55 |   // CHECK: hipLaunchKernelGGL(uint_arithm, dim3(dimGrid), dim3(dimBlock), 0, 0, A, B, C, u1, u2);
56 |   uint_arithm<<<dimGrid, dimBlock>>>(A, B, C, u1, u2);
57 |   return _min < _max;
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_01.cu:
--------------------------------------------------------------------------------
1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
2 | 
3 | // CHECK: #include <hip/hip_runtime.h>
4 | // CHECK-NOT: #include <cuda_runtime.h>
5 | // CHECK: #include <stdio.h>
6 | #include <cuda.h>
7 | #include <cuda_runtime.h>
8 | #include <stdio.h>
9 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_02.cu:
--------------------------------------------------------------------------------
1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
2 | 
3 | // CHECK: #include "hip/hip_runtime.h"
4 | // CHECK-NOT: #include "cuda_runtime.h"
5 | // CHECK: #include <stdio.h>
6 | #include "cuda.h"
7 | #include "cuda_runtime.h"
8 | #include <stdio.h>
9 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_03.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #pragma once
 4 | // CHECK-NEXT: #include <hip/hip_runtime.h>
 5 | #pragma once
 6 | // CHECK-NOT: #include <hip/hip_runtime.h>
 7 | int main(int argc, char* argv[]) {
 8 |   return 0;
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_04.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK-NEXT: #include <stdio.h>
 5 | // CHECK-NEXT: #include <iostream>
 6 | #include <stdio.h>
 7 | #include <iostream>
 8 | // CHECK-NOT: #include <hip/hip_runtime.h>
 9 | int main(int argc, char* argv[]) {
10 |   return 0;
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_05.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #pragma once
 4 | // CHECK-NEXT: #include <hip/hip_runtime.h>
 5 | #pragma once
 6 | // CHECK-NOT: #include <hip/hip_runtime.h>
 7 | #include <stdio.h>
 8 | 
 9 | int main(int argc, char* argv[]) {
10 |   return 0;
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_06.cu:
--------------------------------------------------------------------------------
1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
2 | 
3 | // CHECK: #include <hipblas.h>
4 | // CHECK-NOT: #include <cublas_v2.h>
5 | // CHECK: #include <stdio.h>
6 | #include <cublas.h>
7 | #include <cublas_v2.h>
8 | #include <stdio.h>
9 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_06_12000.cu:
--------------------------------------------------------------------------------
1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args
2 | 
3 | // CHECK: #include <hip/hip_runtime.h>
4 | // CHECK: #include <hipblas.h>
5 | // CHECK: #include <stdio.h>
6 | #include <cuda.h>
7 | #include <cublas.h>
8 | #include <stdio.h>
9 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_07.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK: #include "hipblas.h"
 5 | // CHECK-NOT: #include "cublas.h"
 6 | // CHECK-NOT: #include "cublas_v2.h"
 7 | // CHECK: #include <stdio.h>
 8 | #include "cublas.h"
 9 | #include "cublas_v2.h"
10 | // CHECK-NOT: #include "hipblas.h"
11 | #include <stdio.h>
12 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_07_12000.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK: #include "hipblas.h"
 5 | // CHECK-NOT: #include "cublas_v2.h"
 6 | // CHECK: #include <stdio.h>
 7 | #include <cuda.h>
 8 | #include "cublas_v2.h"
 9 | // CHECK-NOT: #include "hipblas.h"
10 | #include <stdio.h>
11 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_08.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK: #include <iostream>
 5 | // CHECK: #include "hipblas.h"
 6 | // CHECK-NOT: #include "cublas.h"
 7 | // CHECK-NOT: #include "cublas_v2.h"
 8 | // CHECK: #include <stdio.h>
 9 | // CHECK-NOT: #include <cuda.h>
10 | #include <cuda.h>
11 | // CHECK-NOT: #include <cuda_runtime.h>
12 | #include <cuda_runtime.h>
13 | // CHECK-NOT: #include <hip/hip_runtime.h>
14 | #include <iostream>
15 | #include "cublas.h"
16 | #include "cublas_v2.h"
17 | // CHECK-NOT: #include "hipblas.h"
18 | #include <stdio.h>
19 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_08_12000.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK: #include <iostream>
 5 | // CHECK: #include "hipblas.h"
 6 | // CHECK-NOT: #include "cublas.h"
 7 | // CHECK: #include <stdio.h>
 8 | // CHECK-NOT: #include <cuda.h>
 9 | #include <cuda.h>
10 | // CHECK-NOT: #include <cuda_runtime.h>
11 | #include <cuda_runtime.h>
12 | // CHECK-NOT: #include <hip/hip_runtime.h>
13 | #include <iostream>
14 | #include "cublas.h"
15 | // CHECK-NOT: #include "hipblas.h"
16 | #include <stdio.h>
17 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_09.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
  2 | 
  3 | // CHECK: #include <hip/hip_runtime.h>
  4 | // CHECK: #include <memory>
  5 | 
  6 | // CHECK-NOT: #include <cuda_runtime.h>
  7 | // CHECK-NOT: #include <hip/hip_runtime.h>
  8 | 
  9 | // CHECK: #include "hip/hip_runtime_api.h"
 10 | // CHECK: #include "hip/channel_descriptor.h"
 11 | // CHECK: #include "hip/device_functions.h"
 12 | // CHECK: #include "hip/driver_types.h"
 13 | // CHECK: #include "hip/hip_complex.h"
 14 | // CHECK: #include "hip/hip_fp16.h"
 15 | // CHECK: #include "hip/hip_texture_types.h"
 16 | // CHECK: #include "hip/hip_vector_types.h"
 17 | 
 18 | // CHECK: #include <iostream>
 19 | 
 20 | // CHECK: #include "hipblas.h"
 21 | // CHECK-NOT: #include "cublas.h"
 22 | // CHECK-NOT: #include "cublas_v2.h"
 23 | 
 24 | // CHECK: #include <stdio.h>
 25 | 
 26 | // CHECK: #include "hiprand/hiprand.h"
 27 | // CHECK: #include "hiprand/hiprand_kernel.h"
 28 | 
 29 | // CHECK: #include <algorithm>
 30 | 
 31 | // CHECK-NOT: #include "hiprand/hiprand.h"
 32 | // CHECK-NOT: #include "hiprand/hiprand_kernel.h"
 33 | // CHECK-NOT: #include "curand_discrete.h"
 34 | // CHECK-NOT: #include "curand_discrete2.h"
 35 | // CHECK-NOT: #include "curand_globals.h"
 36 | // CHECK-NOT: #include "curand_lognormal.h"
 37 | // CHECK-NOT: #include "curand_mrg32k3a.h"
 38 | // CHECK-NOT: #include "curand_mtgp32.h"
 39 | // CHECK-NOT: #include "curand_mtgp32_host.h"
 40 | // CHECK-NOT: #include "curand_mtgp32_kernel.h"
 41 | // CHECK-NOT: #include "curand_mtgp32dc_p_11213.h"
 42 | // CHECK-NOT: #include "curand_normal.h"
 43 | // CHECK-NOT: #include "curand_normal_static.h"
 44 | // CHECK-NOT: #include "curand_philox4x32_x.h"
 45 | // CHECK-NOT: #include "curand_poisson.h"
 46 | // CHECK-NOT: #include "curand_precalc.h"
 47 | // CHECK-NOT: #include "curand_uniform.h"
 48 | 
 49 | // CHECK: #include "hiprand/hiprand_mtgp32_host.h"
 50 | // CHECK: #include "rocrand/rocrand_mtgp32_11213.h"
 51 | 
 52 | // CHECK: #include <string>
 53 | 
 54 | // CHECK: #include "hipfft/hipfft.h"
 55 | // CHECK: #include "hipsparse.h"
 56 | 
 57 | #include <cuda.h>
 58 | // CHECK-NOT: #include <hip/hip_runtime.h>
 59 | 
 60 | #include <memory>
 61 | 
 62 | #include <cuda_runtime.h>
 63 | // CHECK-NOT: #include <hip/hip_runtime.h>
 64 | 
 65 | #include "cuda_runtime_api.h"
 66 | #include "channel_descriptor.h"
 67 | #include "device_functions.h"
 68 | #include "driver_types.h"
 69 | #include "cuComplex.h"
 70 | #include "cuda_fp16.h"
 71 | #include "cuda_texture_types.h"
 72 | #include "vector_types.h"
 73 | 
 74 | #include <iostream>
 75 | 
 76 | #include "cublas.h"
 77 | #include "cublas_v2.h"
 78 | // CHECK-NOT: #include "hipblas.h"
 79 | 
 80 | #include <stdio.h>
 81 | 
 82 | #include "curand.h"
 83 | #include "curand_kernel.h"
 84 | 
 85 | #include <algorithm>
 86 | 
 87 | #include "curand_discrete.h"
 88 | #include "curand_discrete2.h"
 89 | #include "curand_globals.h"
 90 | #include "curand_lognormal.h"
 91 | #include "curand_mrg32k3a.h"
 92 | #include "curand_mtgp32.h"
 93 | #include "curand_mtgp32_host.h"
 94 | #include "curand_mtgp32_kernel.h"
 95 | #include "curand_mtgp32dc_p_11213.h"
 96 | #include "curand_normal.h"
 97 | #include "curand_normal_static.h"
 98 | #include "curand_philox4x32_x.h"
 99 | #include "curand_poisson.h"
100 | #include "curand_precalc.h"
101 | #include "curand_uniform.h"
102 | 
103 | #include <string>
104 | 
105 | #include "cufft.h"
106 | 
107 | #include "cusparse.h"
108 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_09_12000.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
  2 | 
  3 | // CHECK: #include <hip/hip_runtime.h>
  4 | // CHECK: #include <memory>
  5 | 
  6 | // CHECK-NOT: #include <cuda_runtime.h>
  7 | // CHECK-NOT: #include <hip/hip_runtime.h>
  8 | 
  9 | // CHECK: #include "hip/hip_runtime_api.h"
 10 | // CHECK: #include "hip/channel_descriptor.h"
 11 | // CHECK: #include "hip/device_functions.h"
 12 | // CHECK: #include "hip/driver_types.h"
 13 | // CHECK: #include "hip/hip_complex.h"
 14 | // CHECK: #include "hip/hip_fp16.h"
 15 | // CHECK: #include "hip/hip_texture_types.h"
 16 | // CHECK: #include "hip/hip_vector_types.h"
 17 | 
 18 | // CHECK: #include <iostream>
 19 | 
 20 | // CHECK: #include "hipblas.h"
 21 | // CHECK-NOT: #include "cublas_v2.h"
 22 | 
 23 | // CHECK: #include <stdio.h>
 24 | 
 25 | // CHECK: #include "hiprand/hiprand.h"
 26 | // CHECK: #include "hiprand/hiprand_kernel.h"
 27 | 
 28 | // CHECK: #include <algorithm>
 29 | 
 30 | // CHECK-NOT: #include "hiprand/hiprand.h"
 31 | // CHECK-NOT: #include "hiprand/hiprand_kernel.h"
 32 | // CHECK-NOT: #include "curand_discrete.h"
 33 | // CHECK-NOT: #include "curand_discrete2.h"
 34 | // CHECK-NOT: #include "curand_globals.h"
 35 | // CHECK-NOT: #include "curand_lognormal.h"
 36 | // CHECK-NOT: #include "curand_mrg32k3a.h"
 37 | // CHECK-NOT: #include "curand_mtgp32.h"
 38 | // CHECK-NOT: #include "curand_mtgp32_host.h"
 39 | // CHECK-NOT: #include "curand_mtgp32_kernel.h"
 40 | // CHECK-NOT: #include "curand_mtgp32dc_p_11213.h"
 41 | // CHECK-NOT: #include "curand_normal.h"
 42 | // CHECK-NOT: #include "curand_normal_static.h"
 43 | // CHECK-NOT: #include "curand_philox4x32_x.h"
 44 | // CHECK-NOT: #include "curand_poisson.h"
 45 | // CHECK-NOT: #include "curand_precalc.h"
 46 | // CHECK-NOT: #include "curand_uniform.h"
 47 | 
 48 | // CHECK: #include "hiprand/hiprand_mtgp32_host.h"
 49 | // CHECK: #include "rocrand/rocrand_mtgp32_11213.h"
 50 | 
 51 | // CHECK: #include <string>
 52 | 
 53 | // CHECK: #include "hipfft/hipfft.h"
 54 | // CHECK: #include "hipsparse.h"
 55 | 
 56 | #include <cuda.h>
 57 | // CHECK-NOT: #include <hip/hip_runtime.h>
 58 | 
 59 | #include <memory>
 60 | 
 61 | #include <cuda_runtime.h>
 62 | // CHECK-NOT: #include <hip/hip_runtime.h>
 63 | 
 64 | #include "cuda_runtime_api.h"
 65 | #include "channel_descriptor.h"
 66 | #include "device_functions.h"
 67 | #include "driver_types.h"
 68 | #include "cuComplex.h"
 69 | #include "cuda_fp16.h"
 70 | #include "cuda_texture_types.h"
 71 | #include "vector_types.h"
 72 | 
 73 | #include <iostream>
 74 | 
 75 | #include "cublas_v2.h"
 76 | 
 77 | // CHECK-NOT: #include "hipblas.h"
 78 | 
 79 | #include <stdio.h>
 80 | 
 81 | #include "curand.h"
 82 | #include "curand_kernel.h"
 83 | 
 84 | #include <algorithm>
 85 | 
 86 | #include "curand_discrete.h"
 87 | #include "curand_discrete2.h"
 88 | #include "curand_globals.h"
 89 | #include "curand_lognormal.h"
 90 | #include "curand_mrg32k3a.h"
 91 | #include "curand_mtgp32.h"
 92 | #include "curand_mtgp32_host.h"
 93 | #include "curand_mtgp32_kernel.h"
 94 | #include "curand_mtgp32dc_p_11213.h"
 95 | #include "curand_normal.h"
 96 | #include "curand_normal_static.h"
 97 | #include "curand_philox4x32_x.h"
 98 | #include "curand_poisson.h"
 99 | #include "curand_precalc.h"
100 | #include "curand_uniform.h"
101 | 
102 | #include <string>
103 | 
104 | #include "cufft.h"
105 | 
106 | #include "cusparse.h"
107 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_09_fp16_7050.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --default-preprocessor --roc %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda.h>
 5 | // CHECK-NOT: #include <hip/hip_runtime.h>
 6 | #include <memory>
 7 | 
 8 | #include <cuda_runtime.h>
 9 | // CHECK-NOT: #include <hip/hip_runtime.h>
10 | 
11 | #if CUDA_VERSION >= 7050
12 | // CHECK: #include "hip/hip_fp16.h"
13 | #include "cuda_fp16.h"
14 | #endif
15 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_09_rocrand.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --roc %clang_args
  2 | 
  3 | // CHECK: #include <hip/hip_runtime.h>
  4 | // CHECK: #include <memory>
  5 | 
  6 | // CHECK-NOT: #include <cuda_runtime.h>
  7 | // CHECK-NOT: #include <hip/hip_runtime.h>
  8 | 
  9 | // CHECK: #include "hip/hip_runtime_api.h"
 10 | // CHECK: #include "hip/channel_descriptor.h"
 11 | // CHECK: #include "hip/device_functions.h"
 12 | // CHECK: #include "hip/driver_types.h"
 13 | // CHECK: #include "hip/hip_complex.h"
 14 | // CHECK: #include "hip/hip_texture_types.h"
 15 | // CHECK: #include "hip/hip_vector_types.h"
 16 | 
 17 | // CHECK: #include <iostream>
 18 | 
 19 | // CHECK: #include <stdio.h>
 20 | 
 21 | // CHECK: #include "rocrand/rocrand.h"
 22 | // CHECK: #include "rocrand/rocrand_kernel.h"
 23 | 
 24 | // CHECK: #include <algorithm>
 25 | 
 26 | // CHECK: #include "rocrand/rocrand_discrete.h"
 27 | // CHECK: #include "rocrand/rocrand_common.h"
 28 | // CHECK: #include "rocrand/rocrand_log_normal.h"
 29 | // CHECK: #include "rocrand/rocrand_mrg32k3a.h"
 30 | // CHECK: #include "rocrand/rocrand_mtgp32.h"
 31 | // CHECK: #include "rocrand/rocrand_mtgp32_11213.h"
 32 | // CHECK: #include "rocrand/rocrand_normal.h"
 33 | // CHECK: #include "rocrand/rocrand_philox4x32_10.h"
 34 | // CHECK: #include "rocrand/rocrand_poisson.h"
 35 | // CHECK: #include "rocrand/rocrand_xorwow_precomputed.h"
 36 | // CHECK: #include "rocrand/rocrand_uniform.h"
 37 | 
 38 | // CHECK-NOT: #include "rocrand/rocrand.h"
 39 | // CHECK-NOT: #include "rocrand/rocrand_kernel.h"
 40 | // CHECK-NOT: #include "rocrand/rocrand_discrete.h"
 41 | // CHECK-NOT: #include "rocrand/rocrand_mtgp32.h"
 42 | // CHECK-NOT: #include "rocrand/rocrand_normal.h"
 43 | 
 44 | // CHECK-NOT: #include "curand_discrete.h"
 45 | // CHECK-NOT: #include "curand_discrete2.h"
 46 | // CHECK-NOT: #include "curand_globals.h"
 47 | // CHECK-NOT: #include "curand_lognormal.h"
 48 | // CHECK-NOT: #include "curand_mrg32k3a.h"
 49 | // CHECK-NOT: #include "curand_mtgp32.h"
 50 | // CHECK-NOT: #include "curand_mtgp32_host.h"
 51 | // CHECK-NOT: #include "curand_mtgp32_kernel.h"
 52 | // CHECK-NOT: #include "curand_mtgp32dc_p_11213.h"
 53 | // CHECK-NOT: #include "curand_normal.h"
 54 | // CHECK-NOT: #include "curand_normal_static.h"
 55 | // CHECK-NOT: #include "curand_philox4x32_x.h"
 56 | // CHECK-NOT: #include "curand_poisson.h"
 57 | // CHECK-NOT: #include "curand_precalc.h"
 58 | // CHECK-NOT: #include "curand_uniform.h"
 59 | 
 60 | // CHECK: #include <string>
 61 | 
 62 | // CHECK: #include "hipfft/hipfft.h"
 63 | // CHECK: #include "rocsparse.h"
 64 | 
 65 | #include <cuda.h>
 66 | // CHECK-NOT: #include <hip/hip_runtime.h>
 67 | 
 68 | #include <memory>
 69 | 
 70 | #include <cuda_runtime.h>
 71 | // CHECK-NOT: #include <hip/hip_runtime.h>
 72 | 
 73 | #include "cuda_runtime_api.h"
 74 | #include "channel_descriptor.h"
 75 | #include "device_functions.h"
 76 | #include "driver_types.h"
 77 | #include "cuComplex.h"
 78 | #include "cuda_texture_types.h"
 79 | #include "vector_types.h"
 80 | 
 81 | #include <iostream>
 82 | 
 83 | #include <stdio.h>
 84 | 
 85 | #include "curand.h"
 86 | #include "curand_kernel.h"
 87 | 
 88 | #include <algorithm>
 89 | 
 90 | #include "curand_discrete.h"
 91 | #include "curand_discrete2.h"
 92 | #include "curand_globals.h"
 93 | #include "curand_lognormal.h"
 94 | #include "curand_mrg32k3a.h"
 95 | #include "curand_mtgp32.h"
 96 | #include "curand_mtgp32_host.h"
 97 | #include "curand_mtgp32_kernel.h"
 98 | #include "curand_mtgp32dc_p_11213.h"
 99 | #include "curand_normal.h"
100 | #include "curand_normal_static.h"
101 | #include "curand_philox4x32_x.h"
102 | #include "curand_poisson.h"
103 | #include "curand_precalc.h"
104 | #include "curand_uniform.h"
105 | 
106 | #include <string>
107 | 
108 | #include "cufft.h"
109 | 
110 | #include "cusparse.h"
111 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_10.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // Checks that HIP header file is included after #pragma once,
 4 | // which goes before include guard controlling macro.
 5 | // CHECK: #pragma once
 6 | // CHECK-NEXT: #include <hip/hip_runtime.h>
 7 | #pragma once
 8 | #ifndef HEADERS_TEST_10_H
 9 | // CHECK: #ifndef HEADERS_TEST_10_H
10 | // CHECK-NOT: #include <hip/hip_runtime.h>
11 | #define HEADERS_TEST_10_H
12 | #include <stdio.h>
13 | static int counter = 0;
14 | #endif // HEADERS_TEST_10_H
15 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_11.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // Checks that HIP header file is included after include guard controlling macro,
 4 | // which goes before #pragma once.
 5 | // CHECK: #ifndef HEADERS_TEST_10_H
 6 | // CHECK-NEXT: #include <hip/hip_runtime.h>
 7 | #ifndef HEADERS_TEST_10_H
 8 | // CHECK: #pragma once
 9 | #pragma once
10 | // CHECK-NOT: #include <hip/hip_runtime.h>
11 | #define HEADERS_TEST_10_H
12 | #include <stdio.h>
13 | static int counter = 0;
14 | #endif // HEADERS_TEST_10_H
15 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_12_SOLVER.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args
 2 | 
 3 | #ifndef HEADERS_TEST_12_SOLVER_H
 4 | // CHECK: #pragma once
 5 | #pragma once
 6 | #define HEADERS_TEST_12_SOLVER_H
 7 | // CHECK: #include <hip/hip_runtime.h>
 8 | // CHECK-NOT: #include <cuda_runtime.h>
 9 | #include <cuda_runtime.h>
10 | static int counter = 0;
11 | 
12 | // CHECK: #include "hipsolver.h"
13 | // CHECK-NOT: #include "hipsolver.h"
14 | // CHECK-NOT: #include "cusolver_common.h"
15 | // CHECK-NOT: #include "cusolverDn.h"
16 | // CHECK-NOT: #include "cusolverRf.h"
17 | // CHECK-NOT: #include "cusolverSp.h"
18 | #include "cusolver_common.h"
19 | #include "cusolverDn.h"
20 | #include "cusolverRf.h"
21 | #include "cusolverSp.h"
22 | #endif // HEADERS_TEST_12_SOLVER_H
23 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_12_SOLVER_10010.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // Checks that HIP header file is included after include guard controlling macro,
 4 | // which goes before #pragma once.
 5 | // CHECK: #ifndef HEADERS_TEST_12_SOLVER_H
 6 | // CHECK-NEXT: #include <hip/hip_runtime.h>
 7 | #ifndef HEADERS_TEST_12_SOLVER_H
 8 | // CHECK: #pragma once
 9 | #pragma once
10 | // CHECK-NOT: #include <hip/hip_runtime.h>
11 | #define HEADERS_TEST_12_SOLVER_H
12 | #include <stdio.h>
13 | static int counter = 0;
14 | 
15 | // CHECK: #include "hipsolver.h"
16 | // CHECK-NOT: #include "hipsolver.h"
17 | // CHECK-NOT: #include "cusolver_common.h"
18 | // CHECK-NOT: #include "cusolverDn.h"
19 | // CHECK-NOT: #include "cusolverRf.h"
20 | // CHECK-NOT: #include "cusolverMg.h"
21 | // CHECK-NOT: #include "cusolverSp.h"
22 | // CHECK-NOT: #include "cusolverSp_LOWLEVEL_PREVIEW.h"
23 | #include "cusolver_common.h"
24 | #include "cusolverDn.h"
25 | #include "cusolverRf.h"
26 | #include "cusolverMg.h"
27 | #include "cusolverSp.h"
28 | #include "cusolverSp_LOWLEVEL_PREVIEW.h"
29 | 
30 | #endif // HEADERS_TEST_12_SOLVER_H


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_12_SOLVER_7050.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args
 2 | 
 3 | #ifndef HEADERS_TEST_12_SOLVER_H
 4 | // CHECK: #pragma once
 5 | #pragma once
 6 | #define HEADERS_TEST_12_SOLVER_H
 7 | // CHECK: #include <hip/hip_runtime.h>
 8 | // CHECK-NOT: #include <cuda_runtime.h>
 9 | #include <cuda_runtime.h>
10 | static int counter = 0;
11 | 
12 | // CHECK: #include "hipsolver.h"
13 | // CHECK-NOT: #include "hipsolver.h"
14 | // CHECK-NOT: #include "cusolver_common.h"
15 | // CHECK-NOT: #include "cusolverDn.h"
16 | // CHECK-NOT: #include "cusolverRf.h"
17 | // CHECK-NOT: #include "cusolverSp.h"
18 | // CHECK-NOT: #include "cusolverSp_LOWLEVEL_PREVIEW.h"
19 | #include "cusolver_common.h"
20 | #include "cusolverDn.h"
21 | #include "cusolverRf.h"
22 | #include "cusolverSp.h"
23 | #include "cusolverSp_LOWLEVEL_PREVIEW.h"
24 | #endif // HEADERS_TEST_12_SOLVER_H
25 | 


--------------------------------------------------------------------------------
/tests/unit_tests/headers/headers_test_13.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK: #include <hip/hip_math_constants.h>
 5 | // CHECK-NOT: #include <cuda.h>
 6 | // CHECK: #include <stdio.h>
 7 | #include <cuda.h>
 8 | #include <math_constants.h>
 9 | #include <stdio.h>
10 | 


--------------------------------------------------------------------------------
/tests/unit_tests/kernel_launch/kernel_launch_01.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones.
 3 | // ToDo: change lit testing in order to parse the output.
 4 | 
 5 | #define LEN 1024
 6 | #define SIZE LEN * sizeof(float)
 7 | #define ITER 1024*1024
 8 | 
 9 | // CHECK: #include <hip/hip_runtime.h>
10 | #include <algorithm>
11 | 
12 | #define CUDA_LAUNCH(cuda_call,dimGrid,dimBlock, ...) \
13 |     cuda_call<<<dimGrid,dimBlock>>>(__VA_ARGS__);
14 | 
15 | __global__ void Inc1(float *Ad, float *Bd) {
16 |   int tx = threadIdx.x + blockIdx.x * blockDim.x;
17 |   if (tx < 1) {
18 |     for (int i = 0; i < ITER; ++i) {
19 |       Ad[tx] = Ad[tx] + 1.0f;
20 |       for (int j = 0; j < 256; ++j) {
21 |         Bd[tx] = Ad[tx];
22 |       }
23 |     }
24 |   }
25 | }
26 | 
27 | int main() {
28 |   float *A, *Ad, *Bd;
29 |   A = new float[LEN];
30 |   for (int i = 0; i < LEN; ++i) {
31 |     A[i] = 0.0f;
32 |   }
33 |   // CHECK: hipError_t status;
34 |   cudaError_t status;
35 |   // CHECK: status = hipHostRegister(A, SIZE, hipHostRegisterMapped);
36 |   status = cudaHostRegister(A, SIZE, cudaHostRegisterMapped);
37 |   // CHECK: hipHostGetDevicePointer(&Ad, A, 0);
38 |   cudaHostGetDevicePointer(&Ad, A, 0);
39 |   // CHECK: hipMalloc((void**)&Bd, SIZE);
40 |   cudaMalloc((void**)&Bd, SIZE);
41 |   dim3 dimGrid(LEN / 512, 1, 1);
42 |   dim3 dimBlock(512, 1, 1);
43 | 
44 |   // CHECK: hipLaunchKernelGGL(Inc1, dim3(dimGrid), dim3(dimBlock), 0, 0, Ad, Bd);
45 |   CUDA_LAUNCH(Inc1, dimGrid, dimBlock, Ad, Bd);
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h:
--------------------------------------------------------------------------------
1 | #ifndef CAFFE2_CORE_COMMON_CUDNN_H_
2 | #define CAFFE2_CORE_COMMON_CUDNN_H_
3 | 
4 | #include <array>
5 | #include <mutex>
6 | 
7 | #endif  // CAFFE2_CORE_COMMON_CUDNN_H_
8 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h:
--------------------------------------------------------------------------------
 1 | #ifndef CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_
 2 | #define CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_
 3 | 
 4 | #include <algorithm>
 5 | #include <array>
 6 | #include <functional>
 7 | #include <string>
 8 | #include <vector>
 9 | 
10 | namespace caffe2 {
11 | 
12 | } // namespace caffe2
13 | 
14 | #endif // CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_
15 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CAFFE2/caffe2_01.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
 2 | 
 3 | // NOTE: Nonworking code just for conversion testing
 4 | 
 5 | // CHECK: #include <hip/hip_runtime.h>
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <math.h>
 9 | // CHECK: #include "caffe2/operators/hip/spatial_batch_norm_op_miopen.hip"
10 | #include "caffe2/operators/spatial_batch_norm_op.h"
11 | // CHECK: #include "caffe2/core/hip/common_miopen.h"
12 | #include "caffe2/core/common_cudnn.h"
13 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CAFFE2/caffe2_02.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args
  2 | 
  3 | // NOTE: Nonworking code just for conversion testing
  4 | 
  5 | // CHECK: #include <hip/hip_runtime.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <math.h>
  9 | #include <string>
 10 | 
 11 | namespace caffe2 {
 12 | 
 13 | // Operator Definition.
 14 | struct OperatorDef {
 15 |   int input = 1;
 16 |   int output = 2;
 17 |   int name = 3;
 18 | };
 19 | 
 20 | class OperatorBase;
 21 | class Workspace;
 22 | 
 23 | template <class T>
 24 | class Observable {
 25 |  public:
 26 |   Observable() = default;
 27 | 
 28 |   Observable(Observable&&) = default;
 29 |   Observable& operator =(Observable&&) = default;
 30 | 
 31 |   virtual ~Observable() = default;
 32 | };
 33 | 
 34 | template <class T>
 35 | class ObserverBase {
 36 |  public:
 37 |   explicit ObserverBase(T* subject) : subject_(subject) {}
 38 | 
 39 |   virtual void Start() {}
 40 |   virtual void Stop() {}
 41 | 
 42 |   virtual std::string debugInfo() {
 43 |     return "Not implemented.";
 44 |   }
 45 | 
 46 |   virtual ~ObserverBase() noexcept {};
 47 | 
 48 |   T* subject() const {
 49 |     return subject_;
 50 |   }
 51 | 
 52 |  protected:
 53 |   T* subject_;
 54 | };
 55 | 
 56 | typedef ObserverBase<OperatorBase> OperatorObserver;
 57 | 
 58 | class OperatorBase : public Observable<OperatorBase> {
 59 |  public:
 60 |   explicit OperatorBase(const OperatorDef& operator_def, Workspace* ws);
 61 |   virtual ~OperatorBase() noexcept {}
 62 | };
 63 | 
 64 | template <class Context>
 65 | class Operator : public OperatorBase {
 66 |  public:
 67 |   explicit Operator(const OperatorDef& operator_def, Workspace* ws)
 68 |       : OperatorBase(operator_def, ws) {
 69 |   }
 70 |   ~Operator() noexcept override {}
 71 | };
 72 | 
 73 | template <class Context>
 74 | class DummyEmptyOp : public Operator<Context> {
 75 |  public:
 76 |   DummyEmptyOp(const OperatorDef& def, Workspace* ws)
 77 |       : Operator<Context>(def, ws) {}
 78 | 
 79 |   bool RunOnDevice() final { return true; }
 80 | };
 81 | 
 82 | 
 83 | class CUDAContext {
 84 | public:
 85 |   CUDAContext();
 86 |   virtual ~CUDAContext() noexcept {}
 87 | };
 88 | 
 89 | #define REGISTER_CUDA_OPERATOR(name, ...)                           \
 90 |   void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name();              \
 91 |   static void CAFFE_ANONYMOUS_VARIABLE_CUDA##name() { \
 92 |     CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name();                 \
 93 |   }
 94 | 
 95 | #define REGISTER_CUDA_OPERATOR_CREATOR(key, ...)
 96 | 
 97 | // CHECK: REGISTER_HIP_OPERATOR(Operator, DummyEmptyOp<HIPContext>);
 98 | REGISTER_CUDA_OPERATOR(Operator, DummyEmptyOp<CUDAContext>);
 99 | // CHECK: REGISTER_HIP_OPERATOR_CREATOR(Operator, DummyEmptyOp<HIPContext>);
100 | REGISTER_CUDA_OPERATOR_CREATOR(Operator, DummyEmptyOp<CUDAContext>);
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CUB/cub_01.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <iostream>
 4 | // CHECK: #include <hiprand/hiprand.h>
 5 | #include <curand.h>
 6 | #define THRUST_NS_QUALIFIER ::thrust
 7 | // CHECK: #include <hipcub/hipcub.hpp>
 8 | #include <cub/cub.cuh>
 9 | 
10 | #include <iostream>
11 | 
12 | // TODO:
13 | // using namespace cub;
14 | 
15 | template <typename T>
16 | __global__ void sort(const T* data_in, T* data_out){
17 |      // CHECK: typedef ::hipcub::BlockRadixSort<T, 1024, 4> BlockRadixSortT;
18 |      typedef ::cub::BlockRadixSort<T, 1024, 4> BlockRadixSortT;
19 |     __shared__ typename BlockRadixSortT::TempStorage tmp_sort;
20 |     double items[4];
21 |     int i0 = 4 * (blockIdx.x * blockDim.x + threadIdx.x);
22 |     for (int i = 0; i < 4; ++i){
23 |         items[i] = data_in[i0 + i];
24 |     }
25 |     BlockRadixSortT(tmp_sort).Sort(items);
26 |     for (int i = 0; i < 4; ++i){
27 |         data_out[i0 + i] = items[i];
28 |     }
29 | }
30 | 
31 | int main(){
32 |     double* d_gpu = NULL;
33 |     double* result_gpu = NULL;
34 |     double* data_sorted = new double[4096];
35 |     // Allocate memory on the GPU
36 |     // CHECK: hipMalloc(&d_gpu, 4096 * sizeof(double));
37 |     cudaMalloc(&d_gpu, 4096 * sizeof(double));
38 |     // CHECK: hipMalloc(&result_gpu, 4096 * sizeof(double));
39 |     cudaMalloc(&result_gpu, 4096 * sizeof(double));
40 |     // CHECK: hiprandGenerator_t gen;
41 |     curandGenerator_t gen;
42 |     // Create generator
43 |     // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT);
44 |     curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
45 |     // Fill array with random numbers
46 |     // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0);
47 |     curandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0);
48 |     // Destroy generator
49 |     // CHECK: hiprandDestroyGenerator(gen);
50 |     curandDestroyGenerator(gen);
51 |     // Sort data
52 |     // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort), dim3(1), dim3(1024), 0, 0, d_gpu, result_gpu);
53 |     sort<<<1, 1024>>>(d_gpu, result_gpu);
54 |     // CHECK: hipMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), hipMemcpyDeviceToHost);
55 |     cudaMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), cudaMemcpyDeviceToHost);
56 |     // Write the sorted data to standard out
57 |     for (int i = 0; i < 4096; ++i){
58 |         std::cout << data_sorted[i] << ", ";
59 |     }
60 |     std::cout << std::endl;
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CUB/cub_02.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <iostream>
 4 | // CHECK: #include <hiprand/hiprand.h>
 5 | #include <curand.h>
 6 | #define THRUST_NS_QUALIFIER ::thrust
 7 | // CHECK: #include <hipcub/hipcub.hpp>
 8 | #include <cub/cub.cuh>
 9 | 
10 | #include <iostream>
11 | 
12 | template <int BLOCK_WIDTH, int ITEMS_PER_THREAD,
13 |           // CHECK: hipcub::BlockLoadAlgorithm BLOCK_LOAD_ALGO,
14 |           cub::BlockLoadAlgorithm BLOCK_LOAD_ALGO,
15 |           // CHECK: hipcub::BlockStoreAlgorithm BLOCK_STORE_ALGO,
16 |           cub::BlockStoreAlgorithm BLOCK_STORE_ALGO,
17 |           typename T>
18 | __global__ void sort(const T* data_in, T* data_out){
19 |     // CHECK: typedef hipcub::BlockLoad<T, BLOCK_WIDTH, ITEMS_PER_THREAD, BLOCK_LOAD_ALGO> BlockLoadT;
20 |     typedef cub::BlockLoad<T, BLOCK_WIDTH, ITEMS_PER_THREAD, BLOCK_LOAD_ALGO> BlockLoadT;
21 |     // CHECK: typedef hipcub::BlockRadixSort<T, BLOCK_WIDTH, ITEMS_PER_THREAD> BlockRadixSortT;
22 |     typedef cub::BlockRadixSort<T, BLOCK_WIDTH, ITEMS_PER_THREAD> BlockRadixSortT;
23 |     // CHECK: typedef hipcub::BlockStore<T, BLOCK_WIDTH, ITEMS_PER_THREAD, BLOCK_STORE_ALGO> BlockStoreT;
24 |     typedef cub::BlockStore<T, BLOCK_WIDTH, ITEMS_PER_THREAD, BLOCK_STORE_ALGO> BlockStoreT;
25 |     __shared__ union {
26 |         typename BlockLoadT::TempStorage load;
27 |         typename BlockRadixSortT::TempStorage sort;
28 |         typename BlockStoreT::TempStorage store;
29 |     } tmp_storage;
30 |     T items[ITEMS_PER_THREAD];
31 |     BlockLoadT(tmp_storage.load).Load(data_in + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items);
32 |     __syncthreads();
33 |     BlockRadixSortT(tmp_storage.sort).Sort(items);
34 |     __syncthreads();
35 |     BlockStoreT(tmp_storage.store).Store(data_out + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items);
36 | }
37 | 
38 | int main() {
39 |     double* d_gpu = NULL;
40 |     double* result_gpu = NULL;
41 |     double* data_sorted = new double[1000*4096];
42 |     // Allocate memory on the GPU
43 |     // CHECK: hipMalloc(&d_gpu, 1000*4096 * sizeof(double));
44 |     cudaMalloc(&d_gpu, 1000*4096 * sizeof(double));
45 |     // CHECK: hipMalloc(&result_gpu, 1000*4096 * sizeof(double));
46 |     cudaMalloc(&result_gpu, 1000*4096 * sizeof(double));
47 |     // CHECK: hiprandGenerator_t gen;
48 |     curandGenerator_t gen;
49 |     // Create generator
50 |     // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT);
51 |     curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
52 |     // Fill array with random numbers
53 |     // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0);
54 |     curandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0);
55 |     // Destroy generator
56 |     // CHECK: hiprandDestroyGenerator(gen);
57 |     curandDestroyGenerator(gen);
58 |     // Sort data
59 |     // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, hipcub::BLOCK_LOAD_TRANSPOSE, hipcub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu);
60 |     sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE><<<1000, 512>>>(d_gpu, result_gpu);
61 |     // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, hipcub::BLOCK_LOAD_DIRECT, hipcub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu);
62 |     sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT><<<1000, 256>>>(d_gpu, result_gpu);
63 |     // CHECK: hipMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), hipMemcpyDeviceToHost);
64 |     cudaMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), cudaMemcpyDeviceToHost);
65 |     // Write the sorted data to standard out
66 |     for (int i = 0; i < 4095; ++i) {
67 |         std::cout << data_sorted[i] << ", ";
68 |     }
69 |     std::cout << data_sorted[4095] << std::endl;
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/CUB/cub_03.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <iostream>
 4 | #define THRUST_NS_QUALIFIER ::thrust
 5 | // CHECK: #include <hipcub/hipcub.hpp>
 6 | #include <cub/cub.cuh>
 7 | 
 8 | // using namespace hipcub;
 9 | using namespace cub;
10 | 
11 | // Simple CUDA kernel for computing tiled partial sums
12 | template <int BLOCK_THREADS, int ITEMS_PER_THREAD,
13 |           // CHECK: hipcub::BlockLoadAlgorithm LOAD_ALGO,
14 |           cub::BlockLoadAlgorithm LOAD_ALGO,
15 |           // CHECK: hipcub::BlockScanAlgorithm SCAN_ALGO>
16 |           cub::BlockScanAlgorithm SCAN_ALGO>
17 | __global__ void ScanTilesKernel(int *d_in, int *d_out) {
18 |   // Specialize collective types for problem context
19 |   // CHECK: typedef ::hipcub::BlockLoad<int*, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGO> BlockLoadT;
20 |   typedef ::cub::BlockLoad<int*, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGO> BlockLoadT;
21 |   typedef BlockScan<int, BLOCK_THREADS, SCAN_ALGO> BlockScanT;
22 |   // Allocate on-chip temporary storage
23 |   __shared__ union {
24 |     typename BlockLoadT::TempStorage load;
25 |     typename BlockScanT::TempStorage reduce;
26 |   } temp_storage;
27 |   // Load data per thread
28 |   int thread_data[ITEMS_PER_THREAD];
29 |   int offset = blockIdx.x * (BLOCK_THREADS * ITEMS_PER_THREAD);
30 |   BlockLoadT(temp_storage.load).Load(d_in + offset, offset);
31 |   __syncthreads();
32 |   // Compute the block-wide prefix sum
33 |   BlockScanT(temp_storage).Sum(thread_data);
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <math.h>
 7 | // CHECK: #include "hipblas.h"
 8 | // CHECK-NOT: #include "cublas_v2.h"
 9 | #include "cublas.h"
10 | #include "cublas_v2.h"
11 | // CHECK-NOT: #include "hipblas.h"
12 | #define M 6
13 | #define N 5
14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
16 |   alpha, float beta) {
17 |   // CHECK: hipblasHandle_t blasHandle;
18 |   cublasHandle_t blasHandle;
19 |   // CHECK: hipblasStatus_t blasStatus = hipblasCreate(&blasHandle);
20 |   cublasStatus blasStatus = cublasCreate(&blasHandle);
21 |   // CHECK: hipblasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
22 |   cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
23 |   // CHECK: hipblasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
24 |   cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
25 |   // CHECK: hipblasDestroy(blasHandle);
26 |   cublasDestroy(blasHandle);
27 | }
28 | int main(void) {
29 |   int i, j;
30 |   // CHECK: hipblasStatus_t stat;
31 |   cublasStatus stat;
32 |   float* devPtrA;
33 |   float* a = 0;
34 |   a = (float *)malloc(M * N * sizeof(*a));
35 |   if (!a) {
36 |     printf("host memory allocation failed");
37 |     return EXIT_FAILURE;
38 |   }
39 |   for (j = 0; j < N; j++) {
40 |     for (i = 0; i < M; i++) {
41 |       a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
42 |     }
43 |   }
44 |   // cublasInit is not supported yet
45 |   cublasInit();
46 |   // cublasAlloc is not supported yet
47 |   stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA);
48 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
49 |   if (stat != CUBLAS_STATUS_SUCCESS) {
50 |     printf("device memory allocation failed");
51 |     // cublasShutdown is not supported yet
52 |     cublasShutdown();
53 |     return EXIT_FAILURE;
54 |   }
55 |   // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
56 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
57 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
58 |   if (stat != CUBLAS_STATUS_SUCCESS) {
59 |     printf("data download failed");
60 |     // cublasFree is not supported yet
61 |     cublasFree(devPtrA);
62 |     // cublasShutdown is not supported yet
63 |     cublasShutdown();
64 |     return EXIT_FAILURE;
65 |   }
66 |   modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
67 |   // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
68 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
69 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
70 |   if (stat != CUBLAS_STATUS_SUCCESS) {
71 |     printf("data upload failed");
72 |     // cublasFree is not supported yet
73 |     cublasFree(devPtrA);
74 |     // cublasShutdown is not supported yet
75 |     cublasShutdown();
76 |     return EXIT_FAILURE;
77 |   }
78 |   // cublasFree is not supported yet
79 |   cublasFree(devPtrA);
80 |   // cublasShutdown is not supported yet
81 |   cublasShutdown();
82 |   for (j = 0; j < N; j++) {
83 |     for (i = 0; i < M; i++) {
84 |       printf("%7.0f", a[IDX2C(i, j, M)]);
85 |     }
86 |     printf("\n");
87 |   }
88 |   free(a);
89 |   return EXIT_SUCCESS;
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing_v2.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <math.h>
 6 | // CHECK: #include "hip/hip_runtime.h"
 7 | #include "cuda.h"
 8 | // CHECK: #include "hipblas.h"
 9 | // CHECK-NOT: #include "cublas_v2.h"
10 | #include "cublas_v2.h"
11 | // CHECK-NOT: #include "hipblas.h"
12 | #define M 6
13 | #define N 5
14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
16 |   alpha, float beta) {
17 |   // CHECK: hipblasHandle_t blasHandle;
18 |   cublasHandle_t blasHandle;
19 |   // CHECK: hipblasStatus_t blasStatus = hipblasCreate(&blasHandle);
20 |   cublasStatus_t blasStatus = cublasCreate(&blasHandle);
21 |   // CHECK: hipblasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
22 |   cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
23 |   // CHECK: hipblasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
24 |   cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
25 |   // CHECK: hipblasDestroy(blasHandle);
26 |   cublasDestroy(blasHandle);
27 | }
28 | int main(void) {
29 |   int i, j;
30 |   // CHECK: hipblasStatus_t stat;
31 |   cublasStatus_t stat;
32 |   // CHECK: hipError_t result = hipSuccess;
33 |   cudaError_t result = cudaSuccess;
34 |   float* devPtrA;
35 |   float* a = 0;
36 |   a = (float *)malloc(M * N * sizeof(*a));
37 |   if (!a) {
38 |     printf("host memory allocation failed");
39 |     return EXIT_FAILURE;
40 |   }
41 |   for (j = 0; j < N; j++) {
42 |     for (i = 0; i < M; i++) {
43 |       a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
44 |     }
45 |   }
46 |   // CHECK: hipMalloc((void**)&devPtrA, M*N*sizeof(*a));
47 |   result = cudaMalloc((void**)&devPtrA, M*N*sizeof(*a));
48 |   // CHECK: if (result != hipSuccess) {
49 |   if (result != cudaSuccess) {
50 |     printf("device memory allocation failed");
51 |     return EXIT_FAILURE;
52 |   }
53 |   // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
54 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
55 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
56 |   if (stat != CUBLAS_STATUS_SUCCESS) {
57 |     printf("data download failed");
58 |     // CHECK: hipFree(devPtrA);
59 |     cudaFree(devPtrA);
60 |     return EXIT_FAILURE;
61 |   }
62 |   modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
63 |   // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
64 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
65 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
66 |   if (stat != CUBLAS_STATUS_SUCCESS) {
67 |     printf("data upload failed");
68 |     // CHECK: hipFree(devPtrA);
69 |     cudaFree(devPtrA);
70 |     return EXIT_FAILURE;
71 |   }
72 |   // CHECK: hipFree(devPtrA);
73 |   cudaFree(devPtrA);
74 |   for (j = 0; j < N; j++) {
75 |     for (i = 0; i < M; i++) {
76 |       printf("%7.0f", a[IDX2C(i, j, M)]);
77 |     }
78 |     printf("\n");
79 |   }
80 |   free(a);
81 |   return EXIT_SUCCESS;
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <math.h>
 6 | // CHECK: #include <hip/hip_runtime.h>
 7 | #include <cuda_runtime.h>
 8 | // CHECK: #include "hipblas.h"
 9 | #include "cublas_v2.h"
10 | #define M 6
11 | #define N 5
12 | #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))
13 | // CHECK: static __inline__ void modify(hipblasHandle_t handle, float *m, int ldm, int
14 | static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int
15 |   n, int p, int q, float alpha, float beta) {
16 |   // CHECK: hipblasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
17 |   // CHECK: hipblasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
18 |   cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
19 |   cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
20 | }
21 | int main(void) {
22 |   // CHECK: hipError_t cudaStat;
23 |   // CHECK: hipblasStatus_t stat;
24 |   // CHECK: hipblasHandle_t handle;
25 |   cudaError_t cudaStat;
26 |   cublasStatus_t stat;
27 |   cublasHandle_t handle;
28 |   int i, j;
29 |   float* devPtrA;
30 |   float* a = 0;
31 |   a = (float *)malloc(M * N * sizeof(*a));
32 |   if (!a) {
33 |     printf("host memory allocation failed");
34 |     return EXIT_FAILURE;
35 |   }
36 |   for (j = 1; j <= N; j++) {
37 |     for (i = 1; i <= M; i++) {
38 |       a[IDX2F(i, j, M)] = (float)((i - 1) * M + j);
39 |     }
40 |   }
41 |   // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a));
42 |   cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a));
43 |   // CHECK: if (cudaStat != hipSuccess) {
44 |   if (cudaStat != cudaSuccess) {
45 |     printf("device memory allocation failed");
46 |     return EXIT_FAILURE;
47 |   }
48 |   // CHECK: stat = hipblasCreate(&handle);
49 |   stat = cublasCreate(&handle);
50 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
51 |   if (stat != CUBLAS_STATUS_SUCCESS) {
52 |     printf("CUBLAS initialization failed\n");
53 |     return EXIT_FAILURE;
54 |   }
55 |   // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
56 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
57 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
58 |   if (stat != CUBLAS_STATUS_SUCCESS) {
59 |     printf("data download failed");
60 |     // CHECK: hipFree(devPtrA);
61 |     // CHECK: hipblasDestroy(handle);
62 |     cudaFree(devPtrA);
63 |     cublasDestroy(handle);
64 |     return EXIT_FAILURE;
65 |   }
66 |   modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f);
67 |   // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
68 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
69 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
70 |   if (stat != CUBLAS_STATUS_SUCCESS) {
71 |     printf("data upload failed");
72 |     // CHECK: hipFree(devPtrA);
73 |     // CHECK: hipblasDestroy(handle);
74 |     cudaFree(devPtrA);
75 |     cublasDestroy(handle);
76 |     return EXIT_FAILURE;
77 |   }
78 |   // CHECK: hipFree(devPtrA);
79 |   // CHECK: hipblasDestroy(handle);
80 |   cudaFree(devPtrA);
81 |   cublasDestroy(handle);
82 |   for (j = 1; j <= N; j++) {
83 |     for (i = 1; i <= M; i++) {
84 |       printf("%7.0f", a[IDX2F(i, j, M)]);
85 |     }
86 |     printf("\n");
87 |   }
88 |   free(a);
89 |   return EXIT_SUCCESS;
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | // CHECK: #include <hip/hip_runtime.h>
  6 | #include <cuda_runtime.h>
  7 | // CHECK: #include "hipblas.h"
  8 | #include "cublas_v2.h"
  9 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
 10 | #define m 6
 11 | #define n 4
 12 | #define k 5
 13 | int main(void) {
 14 |   // CHECK: hipError_t cudaStat;
 15 |   // CHECK: hipblasStatus_t stat;
 16 |   // CHECK: hipblasHandle_t handle;
 17 |   cudaError_t cudaStat;
 18 |   cublasStatus_t stat;
 19 |   cublasHandle_t handle;
 20 |   int i, j;
 21 |   float * a;
 22 |   float * b;
 23 |   float * c;
 24 |   a = (float *)malloc(m*k * sizeof(float));
 25 |   b = (float *)malloc(k*n * sizeof(float));
 26 |   c = (float *)malloc(m*n * sizeof(float));
 27 |   int ind = 11;
 28 |   for (j = 0; j<k; j++) {
 29 |     for (i = 0; i<m; i++) {
 30 |       a[IDX2C(i, j, m)] = (float)ind++;
 31 |     }
 32 |   }
 33 |   printf("a:\n");
 34 |   for (i = 0; i<m; i++) {
 35 |     for (j = 0; j<k; j++) {
 36 |       printf(" %5.0f", a[IDX2C(i, j, m)]);
 37 |     }
 38 |     printf("\n");
 39 |   }
 40 |   ind = 11;
 41 |   for (j = 0; j<n; j++) {
 42 |     for (i = 0; i<k; i++) {
 43 |       b[IDX2C(i, j, k)] = (float)ind++;
 44 |     }
 45 |   }
 46 |   printf("b:\n");
 47 |   for (i = 0; i<k; i++) {
 48 |     for (j = 0; j<n; j++) {
 49 |       printf(" %5.0f", b[IDX2C(i, j, k)]);
 50 |     }
 51 |     printf("\n");
 52 |   }
 53 |   ind = 11;
 54 |   for (j = 0; j<n; j++) {
 55 |     for (i = 0; i<m; i++) {
 56 |       c[IDX2C(i, j, m)] = (float)ind++;
 57 |     }
 58 |   }
 59 |   printf("c:\n");
 60 |   for (i = 0; i<m; i++) {
 61 |     for (j = 0; j<n; j++) {
 62 |       printf(" %5.0f", c[IDX2C(i, j, m)]);
 63 |     }
 64 |     printf("\n");
 65 |   }
 66 |   float * d_a;
 67 |   float * d_b;
 68 |   float * d_c;
 69 |   // CHECK: cudaStat = hipMalloc((void **)& d_a, m*k * sizeof(*a));
 70 |   // CHECK: cudaStat = hipMalloc((void **)& d_b, k*n * sizeof(*b));
 71 |   // CHECK: cudaStat = hipMalloc((void **)& d_c, m*n * sizeof(*c));
 72 |   cudaStat = cudaMalloc((void **)& d_a, m*k * sizeof(*a));
 73 |   cudaStat = cudaMalloc((void **)& d_b, k*n * sizeof(*b));
 74 |   cudaStat = cudaMalloc((void **)& d_c, m*n * sizeof(*c));
 75 |   // CHECK: stat = hipblasCreate(&handle);
 76 |   stat = cublasCreate(&handle);
 77 |   // CHECK: stat = hipblasSetMatrix(m, k, sizeof(*a), a, m, d_a, m);
 78 |   // CHECK: stat = hipblasSetMatrix(k, n, sizeof(*b), b, k, d_b, k);
 79 |   // CHECK: stat = hipblasSetMatrix(m, n, sizeof(*c), c, m, d_c, m);
 80 |   stat = cublasSetMatrix(m, k, sizeof(*a), a, m, d_a, m);
 81 |   stat = cublasSetMatrix(k, n, sizeof(*b), b, k, d_b, k);
 82 |   stat = cublasSetMatrix(m, n, sizeof(*c), c, m, d_c, m);
 83 |   float al = 1.0f;
 84 |   float bet = 1.0f;
 85 |   // CHECK: stat = hipblasSgemm(handle, HIPBLAS_OP_N, HIPBLAS_OP_N, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
 86 |   stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
 87 |   // CHECK: stat = hipblasGetMatrix(m, n, sizeof(*c), d_c, m, c, m);
 88 |   stat = cublasGetMatrix(m, n, sizeof(*c), d_c, m, c, m);
 89 |   printf("c after Sgemm :\n");
 90 |   for (i = 0; i<m; i++) {
 91 |     for (j = 0; j<n; j++) {
 92 |       printf(" %7.0f", c[IDX2C(i, j, m)]);
 93 |     }
 94 |     printf("\n");
 95 |   }
 96 |   // CHECK: hipFree(d_a);
 97 |   // CHECK: hipFree(d_b);
 98 |   // CHECK: hipFree(d_c);
 99 |   // CHECK: hipblasDestroy(handle);
100 |   cudaFree(d_a);
101 |   cudaFree(d_b);
102 |   cudaFree(d_c);
103 |   cublasDestroy(handle);
104 |   free(a);
105 |   free(b);
106 |   free(c);
107 |   return EXIT_SUCCESS;
108 | }
109 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/cublas_v1.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <math.h>
 7 | // CHECK: #include "hipblas.h"
 8 | #include "cublas.h"
 9 | #define M 6
10 | #define N 5
11 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
12 | static __inline__ void modify(float* m, int ldm, int n, int p, int q, float
13 |   alpha, float beta) {
14 |   // CHECK-NOT: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
15 |   // CHECK-NOT: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
16 |   // CHECK: cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
17 |   // CHECK: cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
18 |   cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm);
19 |   cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1);
20 | }
21 | int main(void) {
22 |   int i, j;
23 |   // CHECK: hipblasStatus_t stat;
24 |   cublasStatus stat;
25 |   float* devPtrA;
26 |   float* a = 0;
27 |   a = (float*)malloc(M * N * sizeof(*a));
28 |   if (!a) {
29 |     printf("host memory allocation failed");
30 |     return EXIT_FAILURE;
31 |   }
32 |   for (j = 0; j < N; j++) {
33 |     for (i = 0; i < M; i++) {
34 |       a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
35 |     }
36 |   }
37 |   // cublasInit is not supported yet
38 |   cublasInit();
39 |   // cublasAlloc is not supported yet
40 |   stat = cublasAlloc(M * N, sizeof(*a), (void**)&devPtrA);
41 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
42 |   if (stat != CUBLAS_STATUS_SUCCESS) {
43 |     printf("device memory allocation failed");
44 |     // cublasShutdown is not supported yet
45 |     cublasShutdown();
46 |     return EXIT_FAILURE;
47 |   }
48 |   // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
49 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
50 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
51 |   if (stat != CUBLAS_STATUS_SUCCESS) {
52 |     printf("data download failed");
53 |     // cublasFree is not supported yet
54 |     cublasFree(devPtrA);
55 |     // cublasShutdown is not supported yet
56 |     cublasShutdown();
57 |     return EXIT_FAILURE;
58 |   }
59 |   modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
60 |   // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
61 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
62 |   // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) {
63 |   if (stat != CUBLAS_STATUS_SUCCESS) {
64 |     printf("data upload failed");
65 |     // cublasFree is not supported yet
66 |     cublasFree(devPtrA);
67 |     // cublasShutdown is not supported yet
68 |     cublasShutdown();
69 |     return EXIT_FAILURE;
70 |   }
71 |   // cublasFree is not supported yet
72 |   cublasFree(devPtrA);
73 |   // cublasShutdown is not supported yet
74 |   cublasShutdown();
75 |   for (j = 0; j < N; j++) {
76 |     for (i = 0; i < M; i++) {
77 |       printf("%7.0f", a[IDX2C(i, j, M)]);
78 |     }
79 |     printf("\n");
80 |   }
81 |   free(a);
82 |   return EXIT_SUCCESS;
83 | }
84 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <math.h>
 7 | // CHECK: #include "rocblas.h"
 8 | // CHECK-NOT: #include "cublas_v2.h"
 9 | #include "cublas.h"
10 | #include "cublas_v2.h"
11 | // CHECK-NOT: #include "rocblas.h"
12 | #define M 6
13 | #define N 5
14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
16 |   alpha, float beta) {
17 |   // CHECK: rocblas_handle blasHandle;
18 |   cublasHandle_t blasHandle;
19 |   // CHECK: rocblas_status blasStatus = rocblas_create_handle(&blasHandle);
20 |   cublasStatus blasStatus = cublasCreate(&blasHandle);
21 |   // CHECK: rocblas_sscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
22 |   // CHECK: rocblas_sscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
23 |   cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
24 |   cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
25 |   // CHECK: rocblas_destroy_handle(blasHandle);
26 |   cublasDestroy(blasHandle);
27 | }
28 | int main(void) {
29 |   int i, j;
30 |   // CHECK: rocblas_status stat;
31 |   cublasStatus stat;
32 |   float* devPtrA;
33 |   float* a = 0;
34 |   a = (float *)malloc(M * N * sizeof(*a));
35 |   if (!a) {
36 |     printf("host memory allocation failed");
37 |     return EXIT_FAILURE;
38 |   }
39 |   for (j = 0; j < N; j++) {
40 |     for (i = 0; i < M; i++) {
41 |       a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
42 |     }
43 |   }
44 |   // cublasInit is not supported yet
45 |   cublasInit();
46 |   // cublasAlloc is not supported yet
47 |   stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA);
48 |   // CHECK: if (stat != rocblas_status_success) {
49 |   if (stat != CUBLAS_STATUS_SUCCESS) {
50 |     printf("device memory allocation failed");
51 |     // cublasShutdown is not supported yet
52 |     cublasShutdown();
53 |     return EXIT_FAILURE;
54 |   }
55 |   // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
56 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
57 |   // CHECK: if (stat != rocblas_status_success) {
58 |   if (stat != CUBLAS_STATUS_SUCCESS) {
59 |     printf("data download failed");
60 |     // cublasFree is not supported yet
61 |     cublasFree(devPtrA);
62 |     // cublasShutdown is not supported yet
63 |     cublasShutdown();
64 |     return EXIT_FAILURE;
65 |   }
66 |   modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
67 |   // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
68 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
69 |   // CHECK: if (stat != rocblas_status_success) {
70 |   if (stat != CUBLAS_STATUS_SUCCESS) {
71 |     printf("data upload failed");
72 |     // cublasFree is not supported yet
73 |     cublasFree(devPtrA);
74 |     // cublasShutdown is not supported yet
75 |     cublasShutdown();
76 |     return EXIT_FAILURE;
77 |   }
78 |   // cublasFree is not supported yet
79 |   cublasFree(devPtrA);
80 |   // cublasShutdown is not supported yet
81 |   cublasShutdown();
82 |   for (j = 0; j < N; j++) {
83 |     for (i = 0; i < M; i++) {
84 |       printf("%7.0f", a[IDX2C(i, j, M)]);
85 |     }
86 |     printf("\n");
87 |   }
88 |   free(a);
89 |   return EXIT_SUCCESS;
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas_v2.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <math.h>
 6 | // CHECK: #include "hip/hip_runtime.h"
 7 | #include "cuda.h"
 8 | // CHECK: #include "rocblas.h"
 9 | // CHECK-NOT: #include "cublas_v2.h"
10 | #include "cublas_v2.h"
11 | // CHECK-NOT: #include "rocblas.h"
12 | #define M 6
13 | #define N 5
14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float
16 |   alpha, float beta) {
17 |   // CHECK: rocblas_handle blasHandle;
18 |   cublasHandle_t blasHandle;
19 |   // CHECK: rocblas_status blasStatus = rocblas_create_handle(&blasHandle);
20 |   cublasStatus_t blasStatus = cublasCreate(&blasHandle);
21 |   // CHECK: rocblas_sscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
22 |   cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm);
23 |   // CHECK: rocblas_sscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
24 |   cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
25 |   // CHECK: rocblas_destroy_handle(blasHandle);
26 |   cublasDestroy(blasHandle);
27 | }
28 | int main(void) {
29 |   int i, j;
30 |   // CHECK: rocblas_status stat;
31 |   cublasStatus_t stat;
32 |   // CHECK: hipError_t result = hipSuccess;
33 |   cudaError_t result = cudaSuccess;
34 |   float* devPtrA;
35 |   float* a = 0;
36 |   a = (float *)malloc(M * N * sizeof(*a));
37 |   if (!a) {
38 |     printf("host memory allocation failed");
39 |     return EXIT_FAILURE;
40 |   }
41 |   for (j = 0; j < N; j++) {
42 |     for (i = 0; i < M; i++) {
43 |       a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
44 |     }
45 |   }
46 |   // CHECK: hipMalloc((void**)&devPtrA, M*N*sizeof(*a));
47 |   result = cudaMalloc((void**)&devPtrA, M*N*sizeof(*a));
48 |   // CHECK: if (result != hipSuccess) {
49 |   if (result != cudaSuccess) {
50 |     printf("device memory allocation failed");
51 |     return EXIT_FAILURE;
52 |   }
53 |   // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
54 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
55 |   // CHECK: if (stat != rocblas_status_success) {
56 |   if (stat != CUBLAS_STATUS_SUCCESS) {
57 |     printf("data download failed");
58 |     // CHECK: hipFree(devPtrA);
59 |     cudaFree(devPtrA);
60 |     return EXIT_FAILURE;
61 |   }
62 |   modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f);
63 |   // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
64 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
65 |   // CHECK: if (stat != rocblas_status_success) {
66 |   if (stat != CUBLAS_STATUS_SUCCESS) {
67 |     printf("data upload failed");
68 |     // CHECK: hipFree(devPtrA);
69 |     cudaFree(devPtrA);
70 |     return EXIT_FAILURE;
71 |   }
72 |   // CHECK: hipFree(devPtrA);
73 |   cudaFree(devPtrA);
74 |   for (j = 0; j < N; j++) {
75 |     for (i = 0; i < M; i++) {
76 |       printf("%7.0f", a[IDX2C(i, j, M)]);
77 |     }
78 |     printf("\n");
79 |   }
80 |   free(a);
81 |   return EXIT_SUCCESS;
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <math.h>
 6 | // CHECK: #include <hip/hip_runtime.h>
 7 | #include <cuda_runtime.h>
 8 | // CHECK: #include "rocblas.h"
 9 | #include "cublas_v2.h"
10 | #define M 6
11 | #define N 5
12 | #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))
13 | // CHECK: static __inline__ void modify(rocblas_handle handle, float *m, int ldm, int
14 | static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int
15 |   n, int p, int q, float alpha, float beta) {
16 |   // CHECK: rocblas_sscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
17 |   // CHECK: rocblas_sscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
18 |   cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm);
19 |   cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1);
20 | }
21 | int main(void) {
22 |   // CHECK: hipError_t cudaStat;
23 |   // CHECK: rocblas_status stat;
24 |   // CHECK: rocblas_handle handle;
25 |   cudaError_t cudaStat;
26 |   cublasStatus_t stat;
27 |   cublasHandle_t handle;
28 |   int i, j;
29 |   float* devPtrA;
30 |   float* a = 0;
31 |   a = (float *)malloc(M * N * sizeof(*a));
32 |   if (!a) {
33 |     printf("host memory allocation failed");
34 |     return EXIT_FAILURE;
35 |   }
36 |   for (j = 1; j <= N; j++) {
37 |     for (i = 1; i <= M; i++) {
38 |       a[IDX2F(i, j, M)] = (float)((i - 1) * M + j);
39 |     }
40 |   }
41 |   // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a));
42 |   cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a));
43 |   // CHECK: if (cudaStat != hipSuccess) {
44 |   if (cudaStat != cudaSuccess) {
45 |     printf("device memory allocation failed");
46 |     return EXIT_FAILURE;
47 |   }
48 |   // CHECK: stat = rocblas_create_handle(&handle);
49 |   stat = cublasCreate(&handle);
50 |   // CHECK: if (stat != rocblas_status_success) {
51 |   if (stat != CUBLAS_STATUS_SUCCESS) {
52 |     printf("CUBLAS initialization failed\n");
53 |     return EXIT_FAILURE;
54 |   }
55 |   // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M);
56 |   stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
57 |   // CHECK: if (stat != rocblas_status_success) {
58 |   if (stat != CUBLAS_STATUS_SUCCESS) {
59 |     printf("data download failed");
60 |     // CHECK: hipFree(devPtrA);
61 |     // CHECK: rocblas_destroy_handle(handle);
62 |     cudaFree(devPtrA);
63 |     cublasDestroy(handle);
64 |     return EXIT_FAILURE;
65 |   }
66 |   modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f);
67 |   // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M);
68 |   stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
69 |   // CHECK: if (stat != rocblas_status_success) {
70 |   if (stat != CUBLAS_STATUS_SUCCESS) {
71 |     printf("data upload failed");
72 |     // CHECK: hipFree(devPtrA);
73 |     // CHECK: rocblas_destroy_handle(handle);
74 |     cudaFree(devPtrA);
75 |     cublasDestroy(handle);
76 |     return EXIT_FAILURE;
77 |   }
78 |   // CHECK: hipFree(devPtrA);
79 |   // CHECK: rocblas_destroy_handle(handle);
80 |   cudaFree(devPtrA);
81 |   cublasDestroy(handle);
82 |   for (j = 1; j <= N; j++) {
83 |     for (i = 1; i <= M; i++) {
84 |       printf("%7.0f", a[IDX2F(i, j, M)]);
85 |     }
86 |     printf("\n");
87 |   }
88 |   free(a);
89 |   return EXIT_SUCCESS;
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | // CHECK: #include <hip/hip_runtime.h>
  6 | #include <cuda_runtime.h>
  7 | // CHECK: #include "rocblas.h"
  8 | #include "cublas_v2.h"
  9 | #define IDX2C(i,j,ld) (((j)*(ld))+(i))
 10 | #define m 6
 11 | #define n 4
 12 | #define k 5
 13 | int main(void) {
 14 |   // CHECK: hipError_t cudaStat;
 15 |   // CHECK: rocblas_status stat;
 16 |   // CHECK: rocblas_handle handle;
 17 |   cudaError_t cudaStat;
 18 |   cublasStatus_t stat;
 19 |   cublasHandle_t handle;
 20 |   int i, j;
 21 |   float * a;
 22 |   float * b;
 23 |   float * c;
 24 |   a = (float *)malloc(m*k * sizeof(float));
 25 |   b = (float *)malloc(k*n * sizeof(float));
 26 |   c = (float *)malloc(m*n * sizeof(float));
 27 |   int ind = 11;
 28 |   for (j = 0; j<k; j++) {
 29 |     for (i = 0; i<m; i++) {
 30 |       a[IDX2C(i, j, m)] = (float)ind++;
 31 |     }
 32 |   }
 33 |   printf("a:\n");
 34 |   for (i = 0; i<m; i++) {
 35 |     for (j = 0; j<k; j++) {
 36 |       printf(" %5.0f", a[IDX2C(i, j, m)]);
 37 |     }
 38 |     printf("\n");
 39 |   }
 40 |   ind = 11;
 41 |   for (j = 0; j<n; j++) {
 42 |     for (i = 0; i<k; i++) {
 43 |       b[IDX2C(i, j, k)] = (float)ind++;
 44 |     }
 45 |   }
 46 |   printf("b:\n");
 47 |   for (i = 0; i<k; i++) {
 48 |     for (j = 0; j<n; j++) {
 49 |       printf(" %5.0f", b[IDX2C(i, j, k)]);
 50 |     }
 51 |     printf("\n");
 52 |   }
 53 |   ind = 11;
 54 |   for (j = 0; j<n; j++) {
 55 |     for (i = 0; i<m; i++) {
 56 |       c[IDX2C(i, j, m)] = (float)ind++;
 57 |     }
 58 |   }
 59 |   printf("c:\n");
 60 |   for (i = 0; i<m; i++) {
 61 |     for (j = 0; j<n; j++) {
 62 |       printf(" %5.0f", c[IDX2C(i, j, m)]);
 63 |     }
 64 |     printf("\n");
 65 |   }
 66 |   float * d_a;
 67 |   float * d_b;
 68 |   float * d_c;
 69 |   // CHECK: cudaStat = hipMalloc((void **)& d_a, m*k * sizeof(*a));
 70 |   // CHECK: cudaStat = hipMalloc((void **)& d_b, k*n * sizeof(*b));
 71 |   // CHECK: cudaStat = hipMalloc((void **)& d_c, m*n * sizeof(*c));
 72 |   cudaStat = cudaMalloc((void **)& d_a, m*k * sizeof(*a));
 73 |   cudaStat = cudaMalloc((void **)& d_b, k*n * sizeof(*b));
 74 |   cudaStat = cudaMalloc((void **)& d_c, m*n * sizeof(*c));
 75 |   // CHECK: stat = rocblas_create_handle(&handle);
 76 |   stat = cublasCreate(&handle);
 77 |   // CHECK: stat = rocblas_set_matrix(m, k, sizeof(*a), a, m, d_a, m);
 78 |   // CHECK: stat = rocblas_set_matrix(k, n, sizeof(*b), b, k, d_b, k);
 79 |   // CHECK: stat = rocblas_set_matrix(m, n, sizeof(*c), c, m, d_c, m);
 80 |   stat = cublasSetMatrix(m, k, sizeof(*a), a, m, d_a, m);
 81 |   stat = cublasSetMatrix(k, n, sizeof(*b), b, k, d_b, k);
 82 |   stat = cublasSetMatrix(m, n, sizeof(*c), c, m, d_c, m);
 83 |   float al = 1.0f;
 84 |   float bet = 1.0f;
 85 |   // CHECK: stat = rocblas_sgemm(handle, rocblas_operation_none, rocblas_operation_none, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
 86 |   stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &al, d_a, m, d_b, k, &bet, d_c, m);
 87 |   // CHECK: stat = rocblas_get_matrix(m, n, sizeof(*c), d_c, m, c, m);
 88 |   stat = cublasGetMatrix(m, n, sizeof(*c), d_c, m, c, m);
 89 |   printf("c after Sgemm :\n");
 90 |   for (i = 0; i<m; i++) {
 91 |     for (j = 0; j<n; j++) {
 92 |       printf(" %7.0f", c[IDX2C(i, j, m)]);
 93 |     }
 94 |     printf("\n");
 95 |   }
 96 |   // CHECK: hipFree(d_a);
 97 |   // CHECK: hipFree(d_b);
 98 |   // CHECK: hipFree(d_c);
 99 |   // CHECK: rocblas_destroy_handle(handle);
100 |   cudaFree(d_a);
101 |   cudaFree(d_b);
102 |   cudaFree(d_c);
103 |   cublasDestroy(handle);
104 |   free(a);
105 |   free(b);
106 |   free(c);
107 |   return EXIT_SUCCESS;
108 | }
109 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuComplex/cuComplex_Julia.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --amap --skip-excluded-preprocessor-conditional-blocks %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | // CHECK: #include "hip/hip_complex.h"
 5 | #include "cuComplex.h"
 6 | 
 7 | #define TYPEFLOAT
 8 | #define DIMX 100
 9 | #define DIMY 40
10 | #define moveX 2
11 | #define moveY 1
12 | 
13 | #define MAXITERATIONS 10
14 | 
15 | #ifdef TYPEFLOAT
16 | #define TYPE float
17 | // CHECK: #define cTYPE hipFloatComplex
18 | #define cTYPE cuFloatComplex
19 | // CHECK: #define cMakecuComplex(re,i) make_hipFloatComplex(re,i)
20 | #define cMakecuComplex(re,i) make_cuFloatComplex(re,i)
21 | #endif
22 | #ifdef TYPEDOUBLE
23 | // CHECK: #define TYPE hipDoubleComplex
24 | #define TYPE cuDoubleComplex
25 | // CHECK: #define cMakecuComplex(re,i) make_hipDoubleComplex(re,i)
26 | #define cMakecuComplex(re,i) make_cuDoubleComplex(re,i)
27 | #endif
28 | 
29 | __device__ cTYPE juliaFunctor(cTYPE p, cTYPE c) {
30 |   // CHECK: return hipCaddf(hipCmulf(p, p), c);
31 |   return cuCaddf(cuCmulf(p, p), c);
32 | }
33 | 
34 | __device__ cTYPE convertToComplex(int x, int y, float zoom) {
35 |   TYPE jx = 1.5 * (x - DIMX / 2) / (0.5 * zoom * DIMX) + moveX;
36 |   TYPE jy = (y - DIMY / 2) / (0.5 * zoom * DIMY) + moveY;
37 |   return cMakecuComplex(jx, jy);
38 | }
39 | 
40 | __device__ int evolveComplexPoint(cTYPE p, cTYPE c) {
41 |   int it = 1;
42 |   // CHECK: while (it <= MAXITERATIONS && hipCabsf(p) <= 4) {
43 |   while (it <= MAXITERATIONS && cuCabsf(p) <= 4) {
44 |     p = juliaFunctor(p, c);
45 |     it++;
46 |   }
47 |   return it;
48 | }
49 | 
50 | __global__ void computeJulia(int* data, cTYPE c, float zoom) {
51 |   int i = blockIdx.x * blockDim.x + threadIdx.x;
52 |   int j = blockIdx.y * blockDim.y + threadIdx.y;
53 | 
54 |   if (i<DIMX && j<DIMY) {
55 |     cTYPE p = convertToComplex(i, j, zoom);
56 |     data[i*DIMY + j] = evolveComplexPoint(p, c);
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuFFT/simple_cufft.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda.h>
 5 | // CHECK: #include <hipfft/hipfft.h>
 6 | #include <cufft.h>
 7 | #include <stdio.h>
 8 | #include <math.h>
 9 | 
10 | #define DATASIZE 8
11 | #define BATCH 2
12 | 
13 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
14 | // CHECK: inline void gpuAssert(hipError_t code, const char *file, int line, bool abort = true)
15 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
16 | {
17 |   // CHECK: if (code != hipSuccess)
18 |   if (code != cudaSuccess)
19 |   {
20 |     // CHECK: fprintf(stderr, "GPUassert: %s %s %dn", hipGetErrorString(code), file, line);
21 |     fprintf(stderr, "GPUassert: %s %s %dn", cudaGetErrorString(code), file, line);
22 |     if (abort) exit(code);
23 |   }
24 | }
25 | 
26 | int main()
27 | {
28 |   // --- Host side input data allocation and initialization
29 |   // CHECK: hipfftReal *hostInputData = (hipfftReal*)malloc(DATASIZE*BATCH * sizeof(hipfftReal));
30 |   cufftReal *hostInputData = (cufftReal*)malloc(DATASIZE*BATCH * sizeof(cufftReal));
31 |   for (int i = 0; i<BATCH; i++)
32 |     for (int j = 0; j<DATASIZE; j++) hostInputData[i*DATASIZE + j] = (cufftReal)(i + 1);
33 | 
34 |   // --- Device side input data allocation and initialization
35 |   cufftReal *deviceInputData; gpuErrchk(cudaMalloc((void**)&deviceInputData, DATASIZE * BATCH * sizeof(cufftReal)));
36 |   // CHECK: hipMemcpy(deviceInputData, hostInputData, DATASIZE * BATCH * sizeof(hipfftReal), hipMemcpyHostToDevice);
37 |   cudaMemcpy(deviceInputData, hostInputData, DATASIZE * BATCH * sizeof(cufftReal), cudaMemcpyHostToDevice);
38 | 
39 |   // --- Host side output data allocation
40 |   cufftComplex *hostOutputData = (cufftComplex*)malloc((DATASIZE / 2 + 1) * BATCH * sizeof(cufftComplex));
41 | 
42 |   // --- Device side output data allocation
43 |   cufftComplex *deviceOutputData; gpuErrchk(cudaMalloc((void**)&deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(cufftComplex)));
44 | 
45 |   // --- Batched 1D FFTs
46 |   // CHECK: hipfftHandle handle;
47 |   cufftHandle handle;
48 |   int rank = 1;                           // --- 1D FFTs
49 |   int n[] = { DATASIZE };                 // --- Size of the Fourier transform
50 |   int istride = 1, ostride = 1;           // --- Distance between two successive input/output elements
51 |   int idist = DATASIZE, odist = (DATASIZE / 2 + 1); // --- Distance between batches
52 |   int inembed[] = { 0 };                  // --- Input size with pitch (ignored for 1D transforms)
53 |   int onembed[] = { 0 };                  // --- Output size with pitch (ignored for 1D transforms)
54 |   int batch = BATCH;                      // --- Number of batched executions
55 |   // CHECK: hipfftPlanMany(&handle, rank, n,
56 |   cufftPlanMany(&handle, rank, n,
57 |     inembed, istride, idist,
58 |     // CHECK: onembed, ostride, odist, HIPFFT_R2C, batch);
59 |     onembed, ostride, odist, CUFFT_R2C, batch);
60 | 
61 |   // CHECK: hipfftExecR2C(handle, deviceInputData, deviceOutputData);
62 |   cufftExecR2C(handle, deviceInputData, deviceOutputData);
63 | 
64 |   // --- Device->Host copy of the results
65 |   // CHECK: gpuErrchk(hipMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(hipfftComplex), hipMemcpyDeviceToHost));
66 |   gpuErrchk(cudaMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(cufftComplex), cudaMemcpyDeviceToHost));
67 | 
68 |   for (int i = 0; i<BATCH; i++)
69 |     for (int j = 0; j<(DATASIZE / 2 + 1); j++)
70 |       printf("%i %i %f %fn", i, j, hostOutputData[i*(DATASIZE / 2 + 1) + j].x, hostOutputData[i*(DATASIZE / 2 + 1) + j].y);
71 | 
72 |   // CHECK: hipfftDestroy(handle);
73 |   cufftDestroy(handle);
74 |   // CHECK: gpuErrchk(hipFree(deviceOutputData));
75 |   // CHECK: gpuErrchk(hipFree(deviceInputData));
76 |   gpuErrchk(cudaFree(deviceOutputData));
77 |   gpuErrchk(cudaFree(deviceInputData));
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/tests/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu


--------------------------------------------------------------------------------
/tests/unit_tests/namespace/ns_kernel_launch.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <cuda.h>
 4 | 
 5 | __global__ void test_0() {
 6 |   int a = 10;
 7 | }
 8 | 
 9 | namespace first {
10 |   __global__ void test_1() {
11 |     int b = 20;
12 |   }
13 |   namespace second {
14 |     __global__ void test_2() {
15 |       int c = 30;
16 |     }
17 |   }
18 | }
19 | 
20 | int main() {
21 |   // CHECK: hipLaunchKernelGGL(::test_0, dim3(1), dim3(1), 0, 0);
22 |   ::test_0<<<1, 1>>>();
23 |   // CHECK: hipLaunchKernelGGL(first::test_1, dim3(1), dim3(1), 0, 0);
24 |   first::test_1<<<1, 1>>>();
25 |   // CHECK: hipLaunchKernelGGL(first::second::test_2, dim3(1), dim3(1), 0, 0);
26 |   first::second::test_2<<<1, 1>>>();
27 |   return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/unit_tests/options/kernel-execution-syntax/both-kernel-execution-syntax.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --cuda-kernel-execution-syntax --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <math.h>
 4 | 
 5 | __global__
 6 | void add(int n, float *x, float *y)
 7 | {
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     int stride = blockDim.x * gridDim.x;
10 |     for (int i = index; i < n; i += stride)
11 |         y[i] = x[i] + y[i];
12 | }
13 | 
14 | int main(int argc, char *argv[])
15 | {
16 |     int numElements = 10;
17 |     bool testResult = true;
18 |     float *A, *B;
19 |     // CHECK: hipMallocManaged(&A, numElements * sizeof(float));
20 |     cudaMallocManaged(&A, numElements * sizeof(float));
21 |     // CHECK: hipMallocManaged(&B, numElements * sizeof(float));
22 |     cudaMallocManaged(&B, numElements * sizeof(float));
23 |     for (int i = 0; i < numElements; i++) {
24 |         A[i] = 1.0f;
25 |         B[i] = 2.0f;
26 |     }
27 |     int blockSize = 256;
28 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
29 |     dim3 dimGrid(numBlocks, 1, 1);
30 |     dim3 dimBlock(blockSize, 1, 1);
31 |     // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B);
32 |     add<<<dimGrid, dimBlock>>>(numElements, A, B);
33 |     // CHECK: hipDeviceSynchronize();
34 |     cudaDeviceSynchronize();
35 |     float maxError = 0.0f;
36 |     for (int i = 0; i < numElements; i++)
37 |         maxError = fmax(maxError, fabs(B[i]-3.0f));
38 |     // CHECK: hipFree(A);
39 |     cudaFree(A);
40 |     // CHECK: hipFree(B);
41 |     cudaFree(B);
42 |     if(maxError == 0.0f)
43 |         return 0;
44 |     return -1;
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/options/kernel-execution-syntax/cuda-kernel-execution-syntax.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --cuda-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <math.h>
 4 | 
 5 | __global__
 6 | void add(int n, float *x, float *y)
 7 | {
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     int stride = blockDim.x * gridDim.x;
10 |     for (int i = index; i < n; i += stride)
11 |         y[i] = x[i] + y[i];
12 | }
13 | 
14 | int main(int argc, char *argv[])
15 | {
16 |     int numElements = 10;
17 |     bool testResult = true;
18 |     float *A, *B;
19 |     // CHECK: hipMallocManaged(&A, numElements * sizeof(float));
20 |     cudaMallocManaged(&A, numElements * sizeof(float));
21 |     // CHECK: hipMallocManaged(&B, numElements * sizeof(float));
22 |     cudaMallocManaged(&B, numElements * sizeof(float));
23 |     for (int i = 0; i < numElements; i++) {
24 |         A[i] = 1.0f;
25 |         B[i] = 2.0f;
26 |     }
27 |     int blockSize = 256;
28 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
29 |     dim3 dimGrid(numBlocks, 1, 1);
30 |     dim3 dimBlock(blockSize, 1, 1);
31 |     // CHECK: add<<<dimGrid, dimBlock>>>(numElements, A, B);
32 |     add<<<dimGrid, dimBlock>>>(numElements, A, B);
33 |     // CHECK: hipDeviceSynchronize();
34 |     cudaDeviceSynchronize();
35 |     float maxError = 0.0f;
36 |     for (int i = 0; i < numElements; i++)
37 |         maxError = fmax(maxError, fabs(B[i]-3.0f));
38 |     // CHECK: hipFree(A);
39 |     cudaFree(A);
40 |     // CHECK: hipFree(B);
41 |     cudaFree(B);
42 |     if(maxError == 0.0f)
43 |         return 0;
44 |     return -1;
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/options/kernel-execution-syntax/hip-kernel-execution-syntax.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <math.h>
 4 | 
 5 | __global__
 6 | void add(int n, float *x, float *y)
 7 | {
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     int stride = blockDim.x * gridDim.x;
10 |     for (int i = index; i < n; i += stride)
11 |         y[i] = x[i] + y[i];
12 | }
13 | 
14 | int main(int argc, char *argv[])
15 | {
16 |     int numElements = 10;
17 |     bool testResult = true;
18 |     float *A, *B;
19 |     // CHECK: hipMallocManaged(&A, numElements * sizeof(float));
20 |     cudaMallocManaged(&A, numElements * sizeof(float));
21 |     // CHECK: hipMallocManaged(&B, numElements * sizeof(float));
22 |     cudaMallocManaged(&B, numElements * sizeof(float));
23 |     for (int i = 0; i < numElements; i++) {
24 |         A[i] = 1.0f;
25 |         B[i] = 2.0f;
26 |     }
27 |     int blockSize = 256;
28 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
29 |     dim3 dimGrid(numBlocks, 1, 1);
30 |     dim3 dimBlock(blockSize, 1, 1);
31 |     // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B);
32 |     add<<<dimGrid, dimBlock>>>(numElements, A, B);
33 |     // CHECK: hipDeviceSynchronize();
34 |     cudaDeviceSynchronize();
35 |     float maxError = 0.0f;
36 |     for (int i = 0; i < numElements; i++)
37 |         maxError = fmax(maxError, fabs(B[i]-3.0f));
38 |     // CHECK: hipFree(A);
39 |     cudaFree(A);
40 |     // CHECK: hipFree(B);
41 |     cudaFree(B);
42 |     if(maxError == 0.0f)
43 |         return 0;
44 |     return -1;
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/options/kernel-execution-syntax/none-kernel-execution-syntax.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <math.h>
 4 | 
 5 | __global__
 6 | void add(int n, float *x, float *y)
 7 | {
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     int stride = blockDim.x * gridDim.x;
10 |     for (int i = index; i < n; i += stride)
11 |         y[i] = x[i] + y[i];
12 | }
13 | 
14 | int main(int argc, char *argv[])
15 | {
16 |     int numElements = 10;
17 |     bool testResult = true;
18 |     float *A, *B;
19 |     // CHECK: hipMallocManaged(&A, numElements * sizeof(float));
20 |     cudaMallocManaged(&A, numElements * sizeof(float));
21 |     // CHECK: hipMallocManaged(&B, numElements * sizeof(float));
22 |     cudaMallocManaged(&B, numElements * sizeof(float));
23 |     for (int i = 0; i < numElements; i++) {
24 |         A[i] = 1.0f;
25 |         B[i] = 2.0f;
26 |     }
27 |     int blockSize = 256;
28 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
29 |     dim3 dimGrid(numBlocks, 1, 1);
30 |     dim3 dimBlock(blockSize, 1, 1);
31 |     // CHECK: add<<<dimGrid, dimBlock>>>(numElements, A, B);
32 |     add<<<dimGrid, dimBlock>>>(numElements, A, B);
33 |     // CHECK: hipDeviceSynchronize();
34 |     cudaDeviceSynchronize();
35 |     float maxError = 0.0f;
36 |     for (int i = 0; i < numElements; i++)
37 |         maxError = fmax(maxError, fabs(B[i]-3.0f));
38 |     // CHECK: hipFree(A);
39 |     cudaFree(A);
40 |     // CHECK: hipFree(B);
41 |     cudaFree(B);
42 |     if(maxError == 0.0f)
43 |         return 0;
44 |     return -1;
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/pp/pp_if_else_conditionals.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | 
 4 | #include <cuda.h>
 5 | 
 6 | __global__ void axpy_kernel(float a, float* x, float* y) {
 7 |   y[threadIdx.x] = a * x[threadIdx.x];
 8 | }
 9 | 
10 | void axpy(float a, float* x, float* y) {
11 | 
12 | #ifdef SOME_MACRO
13 |   // CHECK: axpy_kernel <<<1, 1>>> (a, y, x);
14 |   axpy_kernel <<<1, 1>>> (a, y, x);
15 | #endif
16 | 
17 | #ifndef SOME_MACRO
18 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x);
19 |   axpy_kernel <<<1, 2>>> (a, y, x);
20 | #endif
21 | 
22 | #ifdef SOME_MACRO
23 |   // CHECK: axpy_kernel <<<1, 3>>> (a, y, x);
24 |   axpy_kernel <<<1, 3>>> (a, y, x);
25 | #else
26 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y);
27 |   axpy_kernel <<<1, 4>>> (a, x, y);
28 | #endif
29 | 
30 | }


--------------------------------------------------------------------------------
/tests/unit_tests/pp/pp_if_else_conditionals_01.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | 
 4 | __global__ void axpy_kernel(float a, float* x, float* y) {
 5 |   y[threadIdx.x] = a * x[threadIdx.x];
 6 | }
 7 | 
 8 | void axpy(float a, float* x, float* y) {
 9 | float* y_new = nullptr;
10 | #ifdef SOME_MACRO
11 |   y_new = x;
12 |   // CHECK: axpy_kernel <<<1, 1>>> (a, y_new, x);
13 |   axpy_kernel <<<1, 1>>> (a, y_new, x);
14 | #endif
15 | 
16 | #ifndef SOME_MACRO
17 |   y_new = y;
18 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x);
19 |   axpy_kernel <<<1, 2>>> (a, y_new, x);
20 | #endif
21 | 
22 | #ifdef SOME_MACRO
23 |   // CHECK: axpy_kernel <<<1, 3>>> (a, y, x);
24 |   axpy_kernel <<<1, 3>>> (a, y, x);
25 | #else
26 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y);
27 |   axpy_kernel <<<1, 4>>> (a, x, y);
28 | #endif
29 | 
30 | #ifdef SOME_MACRO
31 |   // CHECK: axpy_kernel <<<1, 5>>> (a, y, x);
32 |   axpy_kernel <<<1, 5>>> (a, y, x);
33 | #elif defined SOME_MACRO_1
34 |   // CHECK: axpy_kernel <<<1, 6>>> (a, x, y);
35 |   axpy_kernel <<<1, 6>>> (a, x, y);
36 | #else
37 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y);
38 |   axpy_kernel <<<1, 7>>> (a, x, y);
39 | #endif
40 | 
41 | #ifndef SOME_MACRO
42 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x);
43 |   axpy_kernel <<<1, 8>>> (a, y, x);
44 | #elif !defined(SOME_MACRO_1)
45 |   // CHECK: axpy_kernel <<<1, 9>>> (a, x, y);
46 |   axpy_kernel <<<1, 9>>> (a, x, y);
47 | #else
48 |   // CHECK: axpy_kernel <<<1, 10>>> (a, x, y);
49 |   axpy_kernel <<<1, 10>>> (a, x, y);
50 | #endif
51 | 
52 | }


--------------------------------------------------------------------------------
/tests/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | 
 4 | __global__ void axpy_kernel(float a, float* x, float* y) {
 5 |   y[threadIdx.x] = a * x[threadIdx.x];
 6 | }
 7 | 
 8 | void axpy(float a, float* x, float* y) {
 9 | float* y_new = nullptr;
10 | #ifdef SOME_MACRO
11 |   y_new = x;
12 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y_new, x);
13 |   axpy_kernel <<<1, 1>>> (a, y_new, x);
14 | #endif
15 | 
16 | #ifndef SOME_MACRO
17 |   y_new = y;
18 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x);
19 |   axpy_kernel <<<1, 2>>> (a, y_new, x);
20 | #endif
21 | 
22 | #ifdef SOME_MACRO
23 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x);
24 |   axpy_kernel <<<1, 3>>> (a, y, x);
25 | #else
26 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y);
27 |   axpy_kernel <<<1, 4>>> (a, x, y);
28 | #endif
29 | 
30 | #ifdef SOME_MACRO
31 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(5), 0, 0, a, y, x);
32 |   axpy_kernel <<<1, 5>>> (a, y, x);
33 | #elif defined SOME_MACRO_1
34 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(6), 0, 0, a, x, y);
35 |   axpy_kernel <<<1, 6>>> (a, x, y);
36 | #else
37 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y);
38 |   axpy_kernel <<<1, 7>>> (a, x, y);
39 | #endif
40 | 
41 | #ifndef SOME_MACRO
42 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x);
43 |   axpy_kernel <<<1, 8>>> (a, y, x);
44 | #elif !defined(SOME_MACRO_1)
45 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(9), 0, 0, a, x, y);
46 |   axpy_kernel <<<1, 9>>> (a, x, y);
47 | #else
48 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(10), 0, 0, a, x, y);
49 |   axpy_kernel <<<1, 10>>> (a, x, y);
50 | #endif
51 | 
52 | }


--------------------------------------------------------------------------------
/tests/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | 
 4 | #include <cuda.h>
 5 | 
 6 | __global__ void axpy_kernel(float a, float* x, float* y) {
 7 |   y[threadIdx.x] = a * x[threadIdx.x];
 8 | }
 9 | 
10 | void axpy(float a, float* x, float* y) {
11 | 
12 | #ifdef SOME_MACRO
13 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y, x);
14 |   axpy_kernel <<<1, 1>>> (a, y, x);
15 | #endif
16 | 
17 | #ifndef SOME_MACRO
18 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x);
19 |   axpy_kernel <<<1, 2>>> (a, y, x);
20 | #endif
21 | 
22 | #ifdef SOME_MACRO
23 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x);
24 |   axpy_kernel <<<1, 3>>> (a, y, x);
25 | #else
26 |   // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y);
27 |   axpy_kernel <<<1, 4>>> (a, x, y);
28 | #endif
29 | 
30 | }


--------------------------------------------------------------------------------
/tests/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | /*
 3 | Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | */
23 | 
24 | // CHECK: #include <hip/hip_runtime.h>
25 | #include <cuda.h>
26 | // CHECK-NOT: #include <texture_fetch_functions.h>
27 | #include <texture_fetch_functions.h>
28 | 
29 | // CHECK: extern texture<float, 2, hipReadModeElementType> tex;
30 | extern texture<float, 2, cudaReadModeElementType> tex;
31 | 
32 | extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) {
33 |   int x = blockDim.x * blockIdx.x + threadIdx.x;
34 |   int y = blockDim.y * blockIdx.y + threadIdx.y;
35 |   outputData[y * width + x] = tex2D(tex, x, y);
36 | }
37 | 
38 | int main(int argc, char** argv) {
39 |   float out = 0.f;
40 |   // CHECK: tex2dKernel<<<512, 512>>>(&out, 128, 128);
41 |   tex2dKernel<<<512, 512>>>(&out, 128, 128);
42 |   return 0;
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/MallocManaged.cpp:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <math.h>
 4 | 
 5 | __global__
 6 | void add(int n, float *x, float *y)
 7 | {
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     int stride = blockDim.x * gridDim.x;
10 |     for (int i = index; i < n; i += stride)
11 |         y[i] = x[i] + y[i];
12 | }
13 | 
14 | int main(int argc, char *argv[])
15 | {
16 |     int numElements = 10;
17 |     bool testResult = true;
18 |     float *A, *B;
19 |     // CHECK: hipMallocManaged(&A, numElements * sizeof(float));
20 |     cudaMallocManaged(&A, numElements * sizeof(float));
21 |     // CHECK: hipMallocManaged(&B, numElements * sizeof(float));
22 |     cudaMallocManaged(&B, numElements * sizeof(float));
23 |     for (int i = 0; i < numElements; i++) {
24 |         A[i] = 1.0f;
25 |         B[i] = 2.0f;
26 |     }
27 |     int blockSize = 256;
28 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
29 |     dim3 dimGrid(numBlocks, 1, 1);
30 |     dim3 dimBlock(blockSize, 1, 1);
31 |     // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B);
32 |     add<<<dimGrid, dimBlock>>>(numElements, A, B);
33 |     // CHECK: hipDeviceSynchronize();
34 |     cudaDeviceSynchronize();
35 |     float maxError = 0.0f;
36 |     for (int i = 0; i < numElements; i++)
37 |         maxError = fmax(maxError, fabs(B[i]-3.0f));
38 |     // CHECK: hipFree(A);
39 |     cudaFree(A);
40 |     // CHECK: hipFree(B);
41 |     cudaFree(B);
42 |     if(maxError == 0.0f)
43 |         return 0;
44 |     return -1;
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/allocators.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | 
 3 | #pragma once
 4 | // CHECK: #include <hip/hip_runtime.h>
 5 | #include <cuda_runtime.h>
 6 | #include <math.h>
 7 | 
 8 | /**
 9 |  * Allocate GPU memory for `count` elements of type `T`.
10 |  */
11 | template<typename T>
12 | static T* gpuMalloc(size_t count) {
13 |     T* ret = nullptr;
14 |     // CHECK: hipMalloc(&ret, count * sizeof(T));
15 |     cudaMalloc(&ret, count * sizeof(T));
16 |     return ret;
17 | }
18 | 
19 | template<typename T>
20 | __global__ void add(int n, T* x, T* y) {
21 |   int index = blockIdx.x * blockDim.x + threadIdx.x;
22 |   int stride = blockDim.x * gridDim.x;
23 |   for (int i = index; i < n; i += stride)
24 |     y[i] = x[i] + y[i];
25 | }
26 | 
27 | int main(int argc, char* argv[]) {
28 |     size_t numElements = 50;
29 |     float *A = gpuMalloc<float>(numElements);
30 |     float* B = gpuMalloc<float>(numElements);
31 |     for (int i = 0; i < numElements; ++i) {
32 |         A[i] = 1.0f;
33 |         B[i] = 2.0f;
34 |     }
35 |     int blockSize = 512;
36 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
37 |     dim3 dimGrid(numBlocks, 1, 1);
38 |     dim3 dimBlock(blockSize, 1, 1);
39 |     // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add<float>), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B);
40 |     add<float><<<dimGrid, dimBlock>>>(numElements, A, B);
41 |     // CHECK: hipDeviceSynchronize();
42 |     cudaDeviceSynchronize();
43 |     float maxError = 0.0f;
44 |     for (int i = 0; i < numElements; ++i)
45 |       maxError = fmax(maxError, fabs(B[i] - 3.0f));
46 |     // CHECK: hipFree(A);
47 |     cudaFree(A);
48 |     // CHECK: hipFree(B);
49 |     cudaFree(B);
50 |     if (maxError == 0.0f)
51 |       return 0;
52 |     return -1;
53 |   }
54 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/axpy.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | 
 3 | #include <iostream>
 4 | 
 5 | // CHECK: #include <hip/hip_runtime.h>
 6 | #include <cuda.h>
 7 | 
 8 | #define TOKEN_PASTE(X, Y) X ## Y
 9 | #define ARG_LIST_AS_MACRO a, device_x, device_y
10 | #define KERNEL_CALL_AS_MACRO axpy<float><<<1, kDataLen>>>
11 | #define KERNEL_NAME_MACRO axpy<float>
12 | 
13 | // CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y)
14 | #define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y)
15 | 
16 | 
17 | template<typename T>
18 | __global__ void axpy(T a, T *x, T *y) {
19 |   y[threadIdx.x] = a * x[threadIdx.x];
20 | }
21 | 
22 | __global__ void empty() {
23 | }
24 | 
25 | int main(int argc, char* argv[]) {
26 |   const int kDataLen = 4;
27 | 
28 |   float a = 2.0f;
29 |   float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f};
30 |   float host_y[kDataLen];
31 | 
32 |   // Copy input data to device.
33 |   float* device_x;
34 |   float* device_y;
35 | 
36 |   // CHECK: hipMalloc(&device_x, kDataLen * sizeof(float));
37 |   cudaMalloc(&device_x, kDataLen * sizeof(float));
38 | 
39 | #ifdef HERRING
40 |   // CHECK: hipMalloc(&device_y, kDataLen * sizeof(float));
41 |   cudaMalloc(&device_y, kDataLen * sizeof(float));
42 | #else
43 |   // CHECK: hipMalloc(&device_y, kDataLen * sizeof(double));
44 |   cudaMalloc(&device_y, kDataLen * sizeof(double));
45 | #endif
46 | 
47 |   // CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice);
48 |   cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice);
49 | 
50 |   // Launch the kernel in numerous different strange ways to exercise the prerocessor.
51 |   // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
52 |   axpy<<<1, kDataLen>>>(a, device_x, device_y);
53 | 
54 |   // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
55 |   axpy<float><<<1, kDataLen>>>(a, device_x, device_y);
56 | 
57 |   // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y);
58 |   axpy<float><<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y);
59 | 
60 |   // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
61 |   axpy<float><<<1, kDataLen>>>(ARG_LIST_AS_MACRO);
62 | 
63 |   // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
64 |   KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO);
65 | 
66 |   // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
67 |   KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO);
68 | 
69 |   // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0);
70 |   empty<<<1, kDataLen>>> ( );
71 | 
72 |   // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0);
73 |   empty<<<1, kDataLen, 0>>>();
74 | 
75 |   // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0);
76 |   empty<<<1, kDataLen, 0, 0>>>();
77 | 
78 |   // CHECK: COMPLETE_LAUNCH;
79 |   COMPLETE_LAUNCH;
80 | 
81 | 
82 |   // Copy output data to host.
83 |   // CHECK: hipDeviceSynchronize();
84 |   cudaDeviceSynchronize();
85 | 
86 |   // CHECK: hipMemcpy(host_y, device_y, kDataLen * sizeof(float), hipMemcpyDeviceToHost);
87 |   cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost);
88 | 
89 |   // Print the results.
90 |   for (int i = 0; i < kDataLen; ++i) {
91 |     std::cout << "y[" << i << "] = " << host_y[i] << "\n";
92 |   }
93 | 
94 |   // CHECK: hipDeviceReset();
95 |   cudaDeviceReset();
96 |   return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/cudaRegister.cu:
--------------------------------------------------------------------------------
  1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
  2 | 
  3 | /*
  4 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | of this software and associated documentation files (the "Software"), to deal
  7 | in the Software without restriction, including without limitation the rights
  8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | copies of the Software, and to permit persons to whom the Software is
 10 | furnished to do so, subject to the following conditions:
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 19 | THE SOFTWARE.
 20 | */
 21 | 
 22 | #include<cuda.h>
 23 | #include<cuda_runtime.h>
 24 | #include<iostream>
 25 | #include<stdio.h>
 26 | #include<malloc.h>
 27 | 
 28 | #define LEN 1024
 29 | #define SIZE LEN * sizeof(float)
 30 | #define ITER 1024*1024
 31 | 
 32 | // CHECK: if(status != hipSuccess) {
 33 | #define check(msg, status){ \
 34 | if(status != cudaSuccess) { \
 35 |   printf("%s failed. \n", #msg); \
 36 | } \
 37 | }
 38 | 
 39 | __global__ void Inc1(float *Ad, float *Bd){
 40 | 	int tx = threadIdx.x + blockIdx.x * blockDim.x;
 41 | 	if(tx < 1 ){
 42 | 		for(int i=0;i<ITER;i++){
 43 | 			Ad[tx] = Ad[tx] + 1.0f;
 44 | 			for(int j=0;j<256;j++){
 45 | 				Bd[tx] = Ad[tx];
 46 | 			}
 47 | 		}
 48 | 	}
 49 | }
 50 | 
 51 | __global__ void Inc2(float *Ad, float *Bd){
 52 | 	int tx = threadIdx.x + blockIdx.x * blockDim.x;
 53 | 	if(tx < 1024){
 54 | 		for(int i=0;i<ITER;i++){
 55 | 			Ad[tx] = Ad[tx] + 1.0f;
 56 | 			for(int j=0;j<256;j++){
 57 | 				Bd[tx] = Ad[tx];
 58 | 			}
 59 | 		}
 60 | 	}
 61 | }
 62 | 
 63 | int main(){
 64 | 	float *A, *Ad, *Bd;
 65 | 	A = new float[LEN];
 66 | 	for(int i=0;i<LEN;i++){
 67 | 		A[i] = 0.0f;
 68 | 	}
 69 | 
 70 | 	// CHECK: hipError_t status;
 71 | 	cudaError_t status;
 72 | 
 73 |   // CHECK: status = hipHostRegister(A, SIZE, hipHostRegisterMapped);
 74 | 	status = cudaHostRegister(A, SIZE, cudaHostRegisterMapped);
 75 | 	check("Registering A",status);
 76 | 
 77 |   // CHECK: hipHostGetDevicePointer(&Ad, A, 0);
 78 | 	cudaHostGetDevicePointer(&Ad, A, 0);
 79 | 
 80 |   // CHECK: hipMalloc((void**) &Bd, SIZE);
 81 | 	cudaMalloc((void**) &Bd, SIZE);
 82 | 	dim3 dimGrid(LEN/512,1,1);
 83 | 	dim3 dimBlock(512,1,1);
 84 | 
 85 |   // CHECK: hipLaunchKernelGGL(Inc1, dim3(dimGrid), dim3(dimBlock), 0, 0, Ad, Bd);
 86 | 	Inc1<<<dimGrid, dimBlock>>>(Ad, Bd);
 87 | 	A[0] = -(ITER*1.0f);
 88 | 	std::cout<<"Same cache line before completion: \t"<< A[0]<<std::endl;
 89 | 
 90 |   // CHECK: hipDeviceSynchronize();
 91 | 	cudaDeviceSynchronize();
 92 | 	std::cout<<"Same cache line after completion: \t"<< A[0]<<std::endl;
 93 | 
 94 | 	for(int i=0;i<LEN;i++){
 95 | 		A[i] = 0.0f;
 96 | 	}
 97 | 
 98 |   // CHECK: hipLaunchKernelGGL(Inc2, dim3(dimGrid), dim3(dimBlock), 0, 0, Ad, Bd);
 99 | 	Inc2<<<dimGrid, dimBlock>>>(Ad, Bd);
100 | 	A[0] = -(ITER*1.0f);
101 | 	std::cout<<"Diff cache line before completion: \t"<<A[0]<<std::endl;
102 | 
103 |   // CHECK: hipDeviceSynchronize();
104 | 	cudaDeviceSynchronize();
105 | 	std::cout<<"Diff cache line after completion: \t"<<A[0]<<std::endl;
106 | }
107 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/dynamic_shared_memory.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | 
 3 | // Taken from Jonathan Hui blog https://jhui.github.io/2017/03/06/CUDA
 4 | 
 5 | #include <stdio.h>
 6 | // CHECK: #include <hip/hip_runtime.h>
 7 | #include <cuda.h>
 8 | 
 9 | __global__ void dynamicReverse(int *d, int n)
10 | {
11 |   // Dynamic shared memory
12 |   // CHECK: __shared__ int s[];
13 |   extern __shared__ int s[];
14 |   int t = threadIdx.x;
15 |   int tr = n-t-1;
16 |   s[t] = d[t];
17 |   __syncthreads();
18 |   d[t] = s[tr];
19 | }
20 | 
21 | int main(void)
22 | {
23 |   const int n = 64;
24 |   int a[n], r[n], d[n];
25 | 
26 |   for (int i = 0; i < n; i++) {
27 |     a[i] = i;
28 |     r[i] = n-i-1;
29 |     d[i] = 0;
30 |   }
31 | 
32 |   int *d_d;
33 |   // CHECK: hipMalloc(&d_d, n * sizeof(int));
34 |   cudaMalloc(&d_d, n * sizeof(int));
35 |   // run version with dynamic shared memory
36 |   // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice);
37 |   cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice);
38 |   // CHECK: hipLaunchKernelGGL(dynamicReverse, dim3(1), dim3(n), n*sizeof(int), 0, d_d, n);
39 |   dynamicReverse<<<1,n,n*sizeof(int)>>>(d_d, n);
40 |   // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost);
41 |   cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost);
42 |   for (int i = 0; i < n; i++)
43 |     if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]);
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/half2_allocators.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | 
 3 | #pragma once
 4 | // CHECK: #include <hip/hip_runtime.h>
 5 | #include <cuda_runtime.h>
 6 | #include <math.h>
 7 | // CHECK: #include "hip/hip_fp16.h"
 8 | #include "cuda_fp16.h"
 9 | /**
10 |  * Allocate GPU memory for `count` elements of type `T`.
11 |  */
12 | template<typename T>
13 | static T *gpuMalloc(size_t count) {
14 |     T *ret = nullptr;
15 |     // CHECK: hipMalloc(&ret, count * sizeof(T));
16 |     cudaMalloc(&ret, count * sizeof(T));
17 |     return ret;
18 | }
19 | 
20 | template<typename T>
21 | __global__ void add(int n, T *x, T *y) {
22 |   int index = blockIdx.x * blockDim.x + threadIdx.x;
23 |   int stride = blockDim.x * gridDim.x;
24 |   // CHECK: half2 tmp = __float2half2_rn(0.0f);
25 |   half2 tmp = __float2half2_rn(0.0f);
26 |   // CHECK: float max_val = fmax((float)reinterpret_cast<half&>(tmp.x), (float)reinterpret_cast<half&>(tmp.y));
27 |   float max_val = fmax((float)tmp.x, (float)tmp.y);
28 |   // CHECK: float min_val = fmin((float)reinterpret_cast<half&>(tmp.x), (float)reinterpret_cast<half&>(tmp.y));
29 |   float min_val = fmin((float)reinterpret_cast<half&>(tmp.x), (float)reinterpret_cast<half&>(tmp.y));
30 |   for (int i = index; i < n; i += stride)
31 |       y[i] = max_val - min_val + x[i] + y[i];
32 | }
33 | 
34 | int main(int argc, char* argv[]) {
35 |     size_t numElements = 50;
36 |     float *A = gpuMalloc<float>(numElements);
37 |     float* B = gpuMalloc<float>(numElements);
38 |     for (int i = 0; i < numElements; ++i) {
39 |         A[i] = 1.0f;
40 |         B[i] = 2.0f;
41 |     }
42 |     int blockSize = 512;
43 |     int numBlocks = (numElements + blockSize - 1) / blockSize;
44 |     dim3 dimGrid(numBlocks, 1, 1);
45 |     dim3 dimBlock(blockSize, 1, 1);
46 |     // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add<float>), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B);
47 |     add<float><<<dimGrid, dimBlock>>>(numElements, A, B);
48 |     // CHECK: hipDeviceSynchronize();
49 |     cudaDeviceSynchronize();
50 |     float maxError = 0.0f;
51 |     for (int i = 0; i < numElements; ++i)
52 |         maxError = fmax(maxError, fabs(B[i] - 3.0f));
53 |     // CHECK: hipFree(A);
54 |     cudaFree(A);
55 |     // CHECK: hipFree(B);
56 |     cudaFree(B);
57 |     if (maxError == 0.0f)
58 |         return 0;
59 |     return -1;
60 | }
61 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/macro_check.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | // CHECK: #include <hip/hip_runtime.h>
 3 | #include <stdio.h>
 4 | 
 5 | #define a0     -3.0124472f
 6 | #define a1      1.7383092f
 7 | #define a2     -0.2796695f
 8 | #define a3      0.0547837f
 9 | #define a4     -0.0073118f
10 | 
11 | #define CHECK(call)                                                            \
12 | {                                                                              \
13 |     const cudaError_t error = call;                                            \
14 |     if (error != cudaSuccess)                                                  \
15 |     {                                                                          \
16 |         fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
17 |         fprintf(stderr, "code: %d, reason: %s\n", error,                       \
18 |                 cudaGetErrorString(error));                                    \
19 |     }                                                                          \
20 | }
21 | 
22 | __device__ __constant__ float coef[5];
23 | 
24 | int main() {
25 |     const float h_coef[] = {a0, a1, a2, a3, a4};
26 |     // CHECK: CHECK( hipMemcpyToSymbol( HIP_SYMBOL(coef), h_coef, 5 * sizeof(float) ));
27 |     CHECK( cudaMemcpyToSymbol( coef, h_coef, 5 * sizeof(float) ));
28 | }


--------------------------------------------------------------------------------
/tests/unit_tests/samples/static_shared_memory.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | 
 3 | // Taken from Jonathan Hui blog https://jhui.github.io/2017/03/06/CUDA
 4 | 
 5 | #include <stdio.h>
 6 | // CHECK: #include <hip/hip_runtime.h>
 7 | #include <cuda.h>
 8 | 
 9 | __global__ void staticReverse(int *d, int n)
10 | {
11 |   // CHECK: __shared__ int s[64];
12 |   __shared__ int s[64];
13 |   int t = threadIdx.x;
14 |   int tr = n-t-1;
15 |   s[t] = d[t];
16 |   // Will not conttinue until all threads completed.
17 |   __syncthreads();
18 |   d[t] = s[tr];
19 | }
20 | 
21 | int main(void)
22 | {
23 |   const int n = 64;
24 |   int a[n], r[n], d[n];
25 | 
26 |   for (int i = 0; i < n; i++) {
27 |     a[i] = i;
28 |     r[i] = n-i-1;
29 |     d[i] = 0;
30 |   }
31 | 
32 |   int *d_d;
33 |   // CHECK: hipMalloc(&d_d, n * sizeof(int));
34 |   cudaMalloc(&d_d, n * sizeof(int));
35 |   // run version with static shared memory
36 |   // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice);
37 |   cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice);
38 |   // CHECK: hipLaunchKernelGGL(staticReverse, dim3(1), dim3(n), 0, 0, d_d, n);
39 |   staticReverse<<<1,n>>>(d_d, n);
40 |   // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost);
41 |   cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost);
42 |   for (int i = 0; i < n; i++)
43 |     if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]);
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/unit_tests/samples/vec_add.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args
 2 | 
 3 | // Kernel definition
 4 | __global__ void  vecAdd(float* A, float* B, float* C)
 5 | {
 6 |   int i = threadIdx.x;
 7 |   A[i] = 0;
 8 |   B[i] = i;
 9 |   C[i] = A[i] + B[i];
10 | }
11 | // CHECK: #include <hip/hip_runtime.h>
12 | #include <stdio.h>
13 | #define SIZE 10
14 | #define KERNELINVOKES  5000000
15 | int vecadd(int  gpudevice, int rank)
16 | {
17 |   int devcheck(int, int);
18 |   devcheck(gpudevice, rank);
19 |   float A[SIZE], B[SIZE], C[SIZE];
20 |   // Kernel invocation
21 |   float *devPtrA;
22 |   float *devPtrB;
23 |   float *devPtrC;
24 |   int memsize = SIZE * sizeof(float);
25 |   // CHECK: hipMalloc((void**)&devPtrA, memsize);
26 |   // CHECK: hipMalloc((void**)&devPtrB, memsize);
27 |   // CHECK: hipMalloc((void**)&devPtrC, memsize);
28 |   cudaMalloc((void**)&devPtrA, memsize);
29 |   cudaMalloc((void**)&devPtrB, memsize);
30 |   cudaMalloc((void**)&devPtrC, memsize);
31 |   // CHECK: hipMemcpy(devPtrA, A, memsize, hipMemcpyHostToDevice);
32 |   // CHECK: hipMemcpy(devPtrB, B, memsize, hipMemcpyHostToDevice);
33 |   cudaMemcpy(devPtrA, A, memsize, cudaMemcpyHostToDevice);
34 |   cudaMemcpy(devPtrB, B, memsize, cudaMemcpyHostToDevice);
35 |   for (int i = 0; i<KERNELINVOKES; i++)
36 |   {
37 |     // CHECK: hipLaunchKernelGGL(vecAdd, dim3(1), dim3(gpudevice), 0, 0, devPtrA, devPtrB, devPtrC);
38 |     vecAdd <<< 1, gpudevice >>>(devPtrA, devPtrB, devPtrC);
39 |   }
40 |   // CHECK: hipMemcpy(C, devPtrC, memsize, hipMemcpyDeviceToHost);
41 |   cudaMemcpy(C, devPtrC, memsize, cudaMemcpyDeviceToHost);
42 |   // calculate only up to gpudevice to show the unique output
43 |   // of each rank's kernel launch
44 |   for (int i = 0; i<gpudevice; i++)
45 |     printf("rank %d: C[%d]=%f\n", rank, i, C[i]);
46 |   // CHECK: hipFree(devPtrA);
47 |   // CHECK: hipFree(devPtrA);
48 |   // CHECK: hipFree(devPtrA);
49 |   cudaFree(devPtrA);
50 |   cudaFree(devPtrA);
51 |   cudaFree(devPtrA);
52 | }
53 | int devcheck(int  gpudevice, int rank)
54 | {
55 |   int device_count = 0;
56 |   int device;   // used with cudaGetDevice() to verify cudaSetDevice()
57 |   // CHECK: hipGetDeviceCount(&device_count);
58 |   cudaGetDeviceCount(&device_count);
59 |   if (gpudevice >= device_count)
60 |   {
61 |     printf("gpudevice >=  device_count ... exiting\n");
62 |     exit(1);
63 |   }
64 |   // CHECK: hipError_t cudareturn;
65 |   // CHECK: hipDeviceProp_t deviceProp;
66 |   // CHECK: hipGetDeviceProperties(&deviceProp, gpudevice);
67 |   cudaError_t cudareturn;
68 |   cudaDeviceProp deviceProp;
69 |   cudaGetDeviceProperties(&deviceProp, gpudevice);
70 |   if (deviceProp.warpSize <= 1)
71 |   {
72 |     printf("rank %d: warning, CUDA Device Emulation (CPU) detected, exiting\n", rank);
73 |     exit(1);
74 |   }
75 |   // CHECK: cudareturn = hipSetDevice(gpudevice);
76 |   cudareturn = cudaSetDevice(gpudevice);
77 |   // CHECK: if (cudareturn == hipErrorInvalidDevice)
78 |   if (cudareturn == cudaErrorInvalidDevice)
79 |   {
80 |     // CHECK: perror("hipSetDevice returned hipErrorInvalidDevice");
81 |     perror("cudaSetDevice returned cudaErrorInvalidDevice");
82 |   }
83 |   else
84 |   {
85 |     // CHECK: hipGetDevice(&device);
86 |     cudaGetDevice(&device);
87 |     printf("rank %d: cudaGetDevice()=%d\n", rank, device);
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/driver_functions_internal.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda.h>
 5 | #include <string>
 6 | #include <stdio.h>
 7 | #if defined(_WIN32)
 8 |   #include "windows.h"
 9 |   #include <GL/glew.h>
10 | #elif CUDA_VERSION <= 10000
11 |   #include <GL/glew.h>
12 | #endif
13 | #include "cudaGL.h"
14 | 
15 | int main() {
16 |   printf("13. CUDA Driver API Internal Functions synthetic test\n");
17 | 
18 |   size_t bytes = 0;
19 | 
20 |   // CHECK: hipTexRef texref;
21 |   // CHECK-NEXT: HIP_ARRAY_DESCRIPTOR ARRAY_DESCRIPTOR;
22 |   // CHECK-NEXT: hipDeviceptr_t deviceptr;
23 |   CUtexref texref;
24 |   CUDA_ARRAY_DESCRIPTOR ARRAY_DESCRIPTOR;
25 |   CUdeviceptr deviceptr;
26 | 
27 |   // CUDA: CUresult CUDAAPI cuTexRefSetAddress2D_v2(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
28 |   // HIP: DEPRECATED(DEPRECATED_MSG) hipError_t hipTexRefSetAddress2D(textureReference* texRef, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t dptr, size_t Pitch);
29 |   // CHECK: hipError_t result = hipTexRefSetAddress2D(texref, &ARRAY_DESCRIPTOR, deviceptr, bytes);
30 |   CUresult result = cuTexRefSetAddress2D_v2(texref, &ARRAY_DESCRIPTOR, deviceptr, bytes);
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/driver_typedefs.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda.h>
 5 | #include <stdio.h>
 6 | 
 7 | int main() {
 8 |   printf("03. CUDA Driver API Typedefs synthetic test\n");
 9 | 
10 |   // CHECK: hipDevice_t device;
11 |   CUdevice device;
12 | 
13 |   // CHECK: hipDeviceptr_t deviceptr;
14 |   // CHECK-NEXT: hipDeviceptr_t deviceptr_v1;
15 |   CUdeviceptr deviceptr;
16 |   CUdeviceptr_v1 deviceptr_v1;
17 | 
18 |   // CHECK: hipStreamCallback_t streamCallback;
19 |   CUstreamCallback streamCallback;
20 | 
21 |   // CHECK: hipSurfaceObject_t surfObject;
22 |   CUsurfObject surfObject;
23 | 
24 |   // CHECK: hipTextureObject_t texObject;
25 |   CUtexObject texObject;
26 | 
27 |   // CHECK: hipUUID uuid;
28 |   CUuuid uuid;
29 | 
30 | #if CUDA_VERSION >= 10000
31 |   // CHECK: hipHostFn_t hostFn;
32 |   CUhostFn hostFn;
33 | 
34 |   // CHECK: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS_st;
35 |   // CHECK-NEXT: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS;
36 |   CUDA_MEMSET_NODE_PARAMS_st MEMSET_NODE_PARAMS_st;
37 |   CUDA_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS;
38 | #endif
39 | 
40 | #if CUDA_VERSION >= 11000
41 |   // CHECK: hipMemGenericAllocationHandle_t memGenericAllocationHandle_t;
42 |   CUmemGenericAllocationHandle memGenericAllocationHandle_t;
43 | #endif
44 | 
45 | #if CUDA_VERSION >= 11030
46 |   // CHECK: hipDevice_t device_v1;
47 |   CUdevice_v1 device_v1;
48 | 
49 |   // CHECK: hipDeviceptr_t deviceptr_v2;
50 |   CUdeviceptr_v2 deviceptr_v2;
51 | 
52 |   // CHECK: hipSurfaceObject_t surfObject_v1;
53 |   CUsurfObject_v1 surfObject_v1;
54 | 
55 |   // CHECK: hipTextureObject_t texObject_v1;
56 |   CUtexObject_v1 texObject_v1;
57 | 
58 |   // CHECK: hipMemGenericAllocationHandle_t memGenericAllocationHandle_v1;
59 |   CUmemGenericAllocationHandle_v1 memGenericAllocationHandle_v1;
60 | 
61 |   // CHECK: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS_v1;
62 |   CUDA_MEMSET_NODE_PARAMS_v1 MEMSET_NODE_PARAMS_v1;
63 | 
64 |   // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams_v1;
65 |   CUstreamBatchMemOpParams_v1 streamBatchMemOpParams_v1;
66 | #endif
67 | 
68 |   return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/driver_unions.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda.h>
 5 | #include <stdio.h>
 6 | 
 7 | int main() {
 8 |   printf("10. CUDA Driver API Unions synthetic test\n");
 9 | 
10 | #if CUDA_VERSION >= 8000
11 |   // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams;
12 |   // CHECK-NEXT: hipStreamBatchMemOpParams_union streamBatchMemOpParams_union;
13 |   CUstreamBatchMemOpParams streamBatchMemOpParams;
14 |   CUstreamBatchMemOpParams_union streamBatchMemOpParams_union;
15 | #endif
16 | 
17 | #if CUDA_VERSION >= 11000
18 |   // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue;
19 |   CUkernelNodeAttrValue kernelNodeAttrValue;
20 | #endif
21 | 
22 | #if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080
23 |   // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_union;
24 |   CUkernelNodeAttrValue_union kernelNodeAttrValue_union;
25 | #endif
26 | 
27 | #if CUDA_VERSION >= 11030
28 |   // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_v1;
29 |   CUkernelNodeAttrValue_v1 kernelNodeAttrValue_v1;
30 | #endif
31 | 
32 | #if CUDA_VERSION >= 11080
33 |   // CHECK: hipLaunchAttributeValue LaunchAttributeValue;
34 |   cudaLaunchAttributeValue LaunchAttributeValue;
35 | #endif
36 | 
37 |   return 0;
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/libraries/cublaslt2hipblaslt_10010_10020.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL -ferror-limit=500
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | #include <stdio.h>
 6 | // CHECK: #include "hipblaslt.h"
 7 | #include "cublasLt.h"
 8 | // CHECK-NOT: #include "hipblaslt.h"
 9 | 
10 | int main() {
11 |   printf("21.10010.10020. cuBLASLt API to hipBLASLt API synthetic test\n");
12 | 
13 | #if CUDA_VERSION >= 10010 && CUDA_VERSION <= 10020
14 |   // CHECK: hipblasLtMatrixLayoutOpaque_t blasLtMatrixLayoutOpaque;
15 |   cublasLtMatrixLayoutStruct blasLtMatrixLayoutOpaque;
16 | #endif
17 | 
18 |   return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/libraries/cusparse2rocsparse_10010.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental --roc %clang_args -ferror-limit=500
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | // CHECK: #include "hip/hip_complex.h"
 6 | #include "cuComplex.h"
 7 | #include <stdio.h>
 8 | // CHECK: #include "rocsparse.h"
 9 | #include "cusparse.h"
10 | // CHECK-NOT: #include "rocsparse.h"
11 | 
12 | int main() {
13 |   printf("18.10010. cuSPARSE API to rocSPARSE API synthetic test\n");
14 | 
15 |   // CHECK: _rocsparse_handle *handle = nullptr;
16 |   // CHECK-NEXT: rocsparse_handle handle_t;
17 |   cusparseContext *handle = nullptr;
18 |   cusparseHandle_t handle_t;
19 | 
20 |   // CHECK: rocsparse_status status_t;
21 |   cusparseStatus_t status_t;
22 | 
23 |   // CHECK: _rocsparse_mat_descr *matDescr = nullptr;
24 |   // CHECK-NEXT: rocsparse_mat_descr matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D;
25 |   cusparseMatDescr *matDescr = nullptr;
26 |   cusparseMatDescr_t matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D;
27 | 
28 |   // CHECK: rocsparse_action action_t;
29 |   cusparseAction_t action_t;
30 | 
31 |   // CHECK: rocsparse_index_base indexBase_t;
32 |   cusparseIndexBase_t indexBase_t;
33 | 
34 |   int m = 0;
35 |   int n = 0;
36 |   int innz = 0;
37 |   int csrRowPtrA = 0;
38 |   int csrRowPtrB = 0;
39 |   int csrRowPtrC = 0;
40 |   int cscRowIndA = 0;
41 |   int csrColIndA = 0;
42 |   int csrColIndB = 0;
43 |   int csrColIndC = 0;
44 |   int cscColPtrA = 0;
45 |   size_t bufferSize = 0;
46 |   void *pcsrVal = nullptr;
47 |   void *pcscVal = nullptr;
48 | 
49 | #if CUDA_VERSION >= 8000
50 |   // TODO: [#899] There should be rocsparse_datatype instead of hipDataType
51 |   cudaDataType_t dataType_t;
52 |   cudaDataType dataType;
53 | #endif
54 | 
55 | #if CUDA_VERSION >= 10010
56 |   // TODO: cusparseCsr2CscAlg_t has no analogue in rocSPARSE. The deletion of declaration and usage is needed to be implemented
57 |   cusparseCsr2CscAlg_t Csr2CscAlg_t;
58 | 
59 |   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize(cusparseHandle_t handle, int m, int n, int nnz, const void* csrVal, const int* csrRowPtr, const int* csrColInd, void* cscVal, int* cscColPtr, int* cscRowInd, cudaDataType valType, cusparseAction_t copyValues, cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t* bufferSize);
60 |   // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csr2csc_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, rocsparse_action copy_values, size_t* buffer_size);
61 |   // CHECK: status_t = rocsparse_csr2csc_buffer_size(handle_t, m, n, innz, &csrRowPtrA, &csrColIndA, action_t, &bufferSize);
62 |   status_t = cusparseCsr2cscEx2_bufferSize(handle_t, m, n, innz, pcsrVal, &csrRowPtrA, &csrColIndA, pcscVal, &cscColPtrA, &cscRowIndA, dataType, action_t, indexBase_t, Csr2CscAlg_t, &bufferSize);
63 | #endif
64 | 
65 |   return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/libraries/cusparse2rocsparse_10010_12000.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 4 --skip-excluded-preprocessor-conditional-blocks --experimental --roc --amap %clang_args -ferror-limit=500
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | // CHECK: #include "hip/hip_complex.h"
 6 | #include "cuComplex.h"
 7 | #include <stdio.h>
 8 | // CHECK: #include "rocsparse.h"
 9 | #include "cusparse.h"
10 | // CHECK-NOT: #include "rocsparse.h"
11 | 
12 | int main() {
13 |   printf("18.10010.12000. cuSPARSE API to rocSPARSE API synthetic test\n");
14 | 
15 |   // CHECK: _rocsparse_handle *handle = nullptr;
16 |   // CHECK-NEXT: rocsparse_handle handle_t;
17 |   cusparseContext *handle = nullptr;
18 |   cusparseHandle_t handle_t;
19 | 
20 |   // CHECK: rocsparse_status status_t;
21 |   cusparseStatus_t status_t;
22 | 
23 |   // CHECK: _rocsparse_mat_descr *matDescr = nullptr;
24 |   // CHECK-NEXT: rocsparse_mat_descr matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D;
25 |   cusparseMatDescr *matDescr = nullptr;
26 |   cusparseMatDescr_t matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D;
27 | 
28 |   // CHECK: rocsparse_action action_t;
29 |   cusparseAction_t action_t;
30 | 
31 |   // CHECK: rocsparse_index_base indexBase_t;
32 |   cusparseIndexBase_t indexBase_t;
33 | 
34 |   int m = 0;
35 |   int n = 0;
36 |   int innz = 0;
37 |   int csrRowPtrA = 0;
38 |   int csrRowPtrB = 0;
39 |   int csrRowPtrC = 0;
40 |   int cscRowIndA = 0;
41 |   int csrColIndA = 0;
42 |   int csrColIndB = 0;
43 |   int csrColIndC = 0;
44 |   int cscColPtrA = 0;
45 |   size_t bufferSize = 0;
46 |   void *pcsrVal = nullptr;
47 |   void *pcscVal = nullptr;
48 |   void *alpha = nullptr;
49 |   void *beta = nullptr;
50 |   void *tempBuffer = nullptr;
51 |   void* result = nullptr;
52 | 
53 |   // CHECK: rocsparse_operation opA, opB, opX;
54 |   cusparseOperation_t opA, opB, opX;
55 | 
56 | #if CUDA_VERSION >= 8000
57 |   // TODO: [#899] There should be rocsparse_datatype instead of hipDataType
58 |   cudaDataType_t dataType_t;
59 |   cudaDataType dataType;
60 | #endif
61 | 
62 | #if CUDA_VERSION >= 10010
63 | 
64 | #if (CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR == 2) || CUSPARSE_VERSION >= 10300
65 |   // TODO: cusparseCsr2CscAlg_t has no analogue in rocSPARSE. The deletion of declaration and usage is needed to be implemented
66 |   cusparseCsr2CscAlg_t Csr2CscAlg_t;
67 | 
68 |   // CHECK: _rocsparse_spvec_descr *spVecDescr = nullptr;
69 |   // CHECK-NEXT: rocsparse_spvec_descr spVecDescr_t;
70 |   cusparseSpVecDescr *spVecDescr = nullptr;
71 |   cusparseSpVecDescr_t spVecDescr_t;
72 | 
73 |   // CHECK: _rocsparse_dnvec_descr *dnVecDescr = nullptr;
74 |   // CHECK-NEXT: rocsparse_dnvec_descr dnVecDescr_t, vecX, vecY;
75 |   cusparseDnVecDescr *dnVecDescr = nullptr;
76 |   cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY;
77 | #endif
78 | 
79 | #if (CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000
80 |   // CHECK: rocsparse_spmat_descr spMatDescr_t, spmatA, spmatB, spmatC;
81 |   cusparseSpMatDescr_t spMatDescr_t, spmatA, spmatB, spmatC;
82 | 
83 |   // CHECK: rocsparse_dnmat_descr dnMatDescr_t, dnmatA, dnmatB, dnmatC;
84 |   cusparseDnMatDescr_t dnMatDescr_t, dnmatA, dnmatB, dnmatC;
85 | 
86 |   // CHECK: rocsparse_spmm_alg spMMAlg_t;
87 |   cusparseSpMMAlg_t spMMAlg_t;
88 | #endif
89 | 
90 | #endif
91 | 
92 |   return 0;
93 | }
94 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/libraries/cusparse2rocsparse_7050.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental --roc %clang_args -ferror-limit=500
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | // CHECK: #include "hip/hip_complex.h"
 6 | #include "cuComplex.h"
 7 | #include <stdio.h>
 8 | // CHECK: #include "rocsparse.h"
 9 | #include "cusparse.h"
10 | // CHECK-NOT: #include "rocsparse.h"
11 | 
12 | int main() {
13 |   printf("18.7050. cuSPARSE API to rocSPARSE API synthetic test\n");
14 | 
15 |   // CHECK: _rocsparse_handle *handle = nullptr;
16 |   // CHECK-NEXT: rocsparse_handle handle_t;
17 |   cusparseContext *handle = nullptr;
18 |   cusparseHandle_t handle_t;
19 | 
20 |   // CHECK: rocsparse_status status_t;
21 |   cusparseStatus_t status_t;
22 | 
23 |   // CHECK: rocsparse_operation opA, opB, opX;
24 |   cusparseOperation_t opA, opB, opX;
25 | 
26 |   int batchCount = 0;
27 |   int m = 0;
28 |   int n = 0;
29 |   int innz = 0;
30 |   int algo = 0;
31 |   int bufferSizeInBytes = 0;
32 |   double dds = 0.f;
33 |   double ddl = 0.f;
34 |   double dd = 0.f;
35 |   double ddu = 0.f;
36 |   double ddw = 0.f;
37 |   double dx = 0.f;
38 |   float fds = 0.f;
39 |   float fdl = 0.f;
40 |   float fd = 0.f;
41 |   float fdu = 0.f;
42 |   float fdw = 0.f;
43 |   float fx = 0.f;
44 |   size_t bufferSize = 0;
45 |   void *pBuffer = nullptr;
46 | 
47 | #if CUDA_VERSION >= 7050
48 |   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize);
49 |   // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_zgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size);
50 |   // CHECK: status_t = rocsparse_zgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast<size_t*>(&bufferSizeInBytes));
51 |   status_t = cusparseZgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes);
52 | 
53 |   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize);
54 |   // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size);
55 |   // CHECK: status_t = rocsparse_cgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast<size_t*>(&bufferSizeInBytes));
56 |   status_t = cusparseCgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes);
57 | 
58 |   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize);
59 |   // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_dgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size);
60 |   // CHECK: status_t = rocsparse_dgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast<size_t*>(&bufferSizeInBytes));
61 |   status_t = cusparseDgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes);
62 | 
63 |   // CUDA: cusparseStatus_t CUSPARSEAPI cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize);
64 |   // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_sgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size);
65 |   // CHECK: status_t = rocsparse_sgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast<size_t*>(&bufferSizeInBytes));
66 |   status_t = cusparseSgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes);
67 | #endif
68 | 
69 |   return 0;
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/nvrtc2hiprtc.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -ferror-limit=500
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | #include <stdio.h>
 6 | // CHECK: #include "hip/hiprtc.h"
 7 | #include "nvrtc.h"
 8 | // CHECK-NOT: #include "hip/hiprtc.h"
 9 | // CHECK-NOT: #include "nvrtc.h"
10 | 
11 | int main() {
12 |   printf("25. CUDA nvRTC API to HIP hipRTC API synthetic test\n");
13 | 
14 |   int numOptions = 0;
15 |   int numHeaders = 0;
16 |   const char* pOptions = nullptr;
17 |   const char* pHeadedrs = nullptr;
18 |   const char* pIncludeNames = nullptr;
19 |   char* pchSrc = nullptr;
20 |   char* pchName = nullptr;
21 | 
22 |   // CHECK: hiprtcProgram rtcProgram = nullptr;
23 |   nvrtcProgram rtcProgram = nullptr;
24 | 
25 |   // CHECK: hiprtcResult rtcResult;
26 |   nvrtcResult rtcResult;
27 | 
28 |   // CUDA: nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char* const* options);
29 |   // HIP: hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options);
30 |   // CHECK: rtcResult = hiprtcCompileProgram(rtcProgram, numOptions, const_cast<const char**>(&pOptions));
31 |   rtcResult = nvrtcCompileProgram(rtcProgram, numOptions, &pOptions);
32 | 
33 |   // CUDA: nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char* src, const char* name, int numHeaders, const char* const* headers, const char* const* includeNames);
34 |   // HIP: hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
35 |   // CHECK: rtcResult = hiprtcCreateProgram(&rtcProgram, pchSrc, pchName, numHeaders, const_cast<const char**>(&pHeadedrs), const_cast<const char**>(&pIncludeNames));
36 |   rtcResult = nvrtcCreateProgram(&rtcProgram, pchSrc, pchName, numHeaders, &pHeadedrs, &pIncludeNames);
37 | 
38 |   return 0;
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/runtime_functions_12000.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | #include <string>
 6 | #include <stdio.h>
 7 | 
 8 | int main() {
 9 |   printf("12.12000. CUDA Runtime API Functions synthetic test for CUDA >= 12000\n");
10 | 
11 |   // CHECK: hipError_t result = hipSuccess;
12 |   cudaError result = cudaSuccess;
13 | 
14 |   // CHECK: hipGraphExec_t GraphExec_t;
15 |   cudaGraphExec_t GraphExec_t;
16 | 
17 |   // CHECK: hipGraph_t Graph_t;
18 |   cudaGraph_t Graph_t;
19 | 
20 |   void *pfn = nullptr;
21 |   std::string symbol = "symbol";
22 | 
23 | #if defined(_WIN32) && CUDA_VERSION < 9000
24 |   typedef signed   __int64 int64_t;
25 |   typedef unsigned __int64 uint64_t;
26 | #endif
27 | 
28 | #if defined(_WIN32)
29 |   unsigned long long ull = 0;
30 | #else
31 |   unsigned long ull = 0;
32 | #endif
33 | 
34 | #if CUDA_VERSION >= 12000
35 |   // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags __dv(0));
36 |   // HIP: hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, unsigned long long flags);
37 |   // CHECK: result = hipGraphInstantiateWithFlags(&GraphExec_t, Graph_t, ull);
38 |   result = cudaGraphInstantiate(&GraphExec_t, Graph_t, ull);
39 | 
40 |   // CHECK: hipDriverProcAddressQueryResult driverProcAddressQueryResult;
41 |   cudaDriverEntryPointQueryResult driverProcAddressQueryResult;
42 | 
43 |   // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags, enum cudaDriverEntryPointQueryResult *driverStatus = NULL);
44 |   // CUDA < 12000: extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags);
45 |   // NOTE: cudaGetDriverEntryPoint for CUDA < 12000 is not supported by HIP
46 |   // TODO: detect cudaGetDriverEntryPoint signature and report warning/error for old (before CUDA 12.0) signature
47 |   // HIP: hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, hipDriverProcAddressQueryResult* symbolStatus);
48 |   // TODO: add an explicit static_cast<uint64_t> for ull
49 |   // CHECK: result = hipGetProcAddress(symbol.c_str(), &pfn, 700, ull, &driverProcAddressQueryResult);
50 |   result = cudaGetDriverEntryPoint(symbol.c_str(), &pfn, ull, &driverProcAddressQueryResult);
51 | #endif
52 | 
53 |   return 0;
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/runtime_functions_9000.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime.h>
 5 | #include <string>
 6 | #include <stdio.h>
 7 | 
 8 | int main() {
 9 |   printf("12.9000. CUDA Runtime API Functions synthetic test for CUDA >= 9000\n");
10 | 
11 |   // CHECK: hipError_t result = hipSuccess;
12 |   cudaError result = cudaSuccess;
13 | 
14 |   // CHECK: hipStream_t stream;
15 |   cudaStream_t stream;
16 | 
17 |   int intVal = 0;
18 |   unsigned int flags = 0;
19 |   dim3 gridDim;
20 |   dim3 blockDim;
21 |   void* func = nullptr;
22 |   void* image = nullptr;
23 | 
24 | #if CUDA_VERSION >= 9000
25 |   // CHECK: hipFuncAttribute FuncAttribute;
26 |   cudaFuncAttribute FuncAttribute;
27 | 
28 |   // CUDA: extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value);
29 |   // HIP: hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value);
30 |   // CHECK: result = hipFuncSetAttribute(reinterpret_cast<const void*>(func), FuncAttribute, intVal);
31 |   result = cudaFuncSetAttribute(func, FuncAttribute, intVal);
32 | 
33 |   // CUDA: extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream);
34 |   // HIP: hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream);
35 |   // CHECK: result = hipLaunchCooperativeKernel(reinterpret_cast<const void*>(func), gridDim, blockDim, &image, flags, stream);
36 |   result = cudaLaunchCooperativeKernel(func, gridDim, blockDim, &image, flags, stream);
37 | #endif
38 | 
39 |   return 0;
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/runtime_typedefs.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda_runtime_api.h>
 5 | #include <stdio.h>
 6 | 
 7 | int main() {
 8 |   printf("07. CUDA Runtime API Typedefs synthetic test\n");
 9 | 
10 |   // CHECK: hipStreamCallback_t StreamCallback_t;
11 |   // CHECK-NEXT: hipSurfaceObject_t SurfaceObject_t;
12 |   // CHECK-NEXT: hipTextureObject_t TextureObject_t;
13 |   cudaStreamCallback_t StreamCallback_t;
14 |   cudaSurfaceObject_t SurfaceObject_t;
15 |   cudaTextureObject_t TextureObject_t;
16 | 
17 |   // CHECK: hipUUID uuid;
18 |   cudaUUID_t uuid;
19 | 
20 | #if CUDA_VERSION >= 10000
21 |   // CHECK: hipHostFn_t HostFn_t;
22 |   cudaHostFn_t HostFn_t;
23 | #endif
24 | 
25 |   return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/runtime_unions.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <cuda.h>
 5 | #include <stdio.h>
 6 | 
 7 | int main() {
 8 |   printf("11. CUDA Runtime API Unions synthetic test\n");
 9 | 
10 | #if CUDA_VERSION >= 11000
11 |   // CHECK: hipKernelNodeAttrValue KernelNodeAttrValue;
12 |   cudaKernelNodeAttrValue KernelNodeAttrValue;
13 | #endif
14 | 
15 | #if CUDA_VERSION >= 11080
16 |   // CHECK: hipLaunchAttributeValue LaunchAttributeValue;
17 |   // CHECK-NEXT: hipLaunchAttributeValue launchAttributeValue_union;
18 |   CUlaunchAttributeValue LaunchAttributeValue;
19 |   CUlaunchAttributeValue_union launchAttributeValue_union;
20 | #endif
21 | 
22 |   return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/unit_tests/synthetic/transforming_matchers.cu:
--------------------------------------------------------------------------------
 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
 2 | 
 3 | // CHECK: #include <hip/hip_runtime.h>
 4 | #include <stdio.h>
 5 | 
 6 | template <typename T>
 7 | void check(T result, char const *const func, const char *const file, int const line) {
 8 |   if (result) {
 9 |     fprintf(stderr, "Error at %s:%d code=%d(%s) \" \n", file, line, static_cast<unsigned int>(result), func);
10 |     exit(EXIT_FAILURE);
11 |   }
12 | }
13 | 
14 | #define checkErrors(val) check((val), #val, __FILE__, __LINE__)
15 | #define num 1024
16 | 
17 | int main(int argc, const char *argv[]) {
18 |   int *input = nullptr;
19 |   void *input_ptr = nullptr;
20 |   int deviceCount = 0;
21 |   // CHECK: checkErrors(hipGetDeviceCount(&deviceCount));
22 |   checkErrors(cudaGetDeviceCount(&deviceCount));
23 |   printf("Device Count: %d\n", deviceCount);
24 |   // CHECK: hipDeviceProp_t deviceProp;
25 |   cudaDeviceProp deviceProp;
26 |   deviceProp.major = 0;
27 |   deviceProp.minor = 0;
28 |   int deviceID = 0;
29 |   // CHECK: checkErrors(hipGetDeviceProperties(&deviceProp, deviceID));
30 |   checkErrors(cudaGetDeviceProperties(&deviceProp, deviceID));
31 |   // CHECK: checkErrors(hipSetDevice(deviceID));
32 |   checkErrors(cudaSetDevice(deviceID));
33 |   // CHECK: checkErrors(hipHostMalloc(&input, sizeof(int) * num * 2, hipHostMallocDefault));
34 |   checkErrors(cudaMallocHost(&input, sizeof(int) * num * 2));
35 |   // CHECK: checkErrors(hipHostMalloc(&input_ptr, sizeof(int) * num * 2, hipHostMallocDefault));
36 |   checkErrors(cudaMallocHost(&input_ptr, sizeof(int) * num * 2));
37 |   for (int i = 0; i < num * 2; ++i) {
38 |     input[i] = i;
39 |   }
40 | 
41 |   int *value = 0;
42 |   int *value_2 = 0;
43 |   int iBlockSize = 0;
44 |   int iBlockSize_2 = 0;
45 |   size_t bytes = 0;
46 |   // CHECK: hipFunction_t function;
47 |   CUfunction function;
48 |   // CHECK: void* occupancyB2DSize;
49 |   CUoccupancyB2DSize occupancyB2DSize;
50 | 
51 |   // CHECK: checkErrors(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(value, value_2, function, bytes, iBlockSize, iBlockSize_2));
52 |   checkErrors(cuOccupancyMaxPotentialBlockSizeWithFlags(value, value_2, function, occupancyB2DSize, bytes, iBlockSize, iBlockSize_2));
53 | 
54 |   // CHECK: checkErrors(hipHostFree(input));
55 |   checkErrors(cudaFreeHost(input));
56 |   // CHECK: checkErrors(hipDeviceSynchronize());
57 |   checkErrors(cudaDeviceSynchronize());
58 |   return 0;
59 | }
60 | 


--------------------------------------------------------------------------------