├── .azuredevops └── rocm-ci.yml ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── config.yml │ └── issue_report.yml ├── dependabot.yml └── workflows │ └── markdownlint.yml ├── .gitignore ├── .mdlrc ├── .readthedocs.yaml ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── bin ├── findcode.sh ├── findcode_custom.sh ├── findcode_headers.sh ├── findcode_sources.sh ├── finduncodep.sh ├── hipconvertinplace-perl.sh ├── hipconvertinplace.sh ├── hipexamine-perl.sh ├── hipexamine.sh └── hipify-perl ├── docs ├── building │ ├── build-hipify-perl.rst │ └── building-hipify.rst ├── conf.py ├── data │ ├── csv_statistics.png │ └── patches │ │ ├── patch_for_clang_10.0.0_bug_47332.zip │ │ ├── patch_for_clang_10.0.1_bug_47332.zip │ │ ├── patch_for_clang_11.0.0_bug_47332.zip │ │ ├── patch_for_clang_14.0.0_bug_54609.zip │ │ ├── patch_for_clang_14.0.1_bug_54609.zip │ │ ├── patch_for_clang_14.0.2_bug_54609.zip │ │ ├── patch_for_clang_14.0.3_bug_54609.zip │ │ ├── patch_for_clang_14.0.4_bug_54609.zip │ │ ├── patch_for_clang_7.0.0_bug_38811.zip │ │ ├── patch_for_clang_7.0.1_bug_38811.zip │ │ ├── patch_for_clang_7.1.0_bug_38811.zip │ │ ├── patch_for_clang_8.0.0_bug_38811.zip │ │ └── patch_for_clang_8.0.1_bug_38811.zip ├── how-to │ ├── hipify-clang.rst │ └── hipify-perl.rst ├── index.rst ├── license.md ├── reference │ ├── hipify-clang-cmd.rst │ ├── hipify-perl-cmd.rst │ ├── supported_apis.md │ └── tables │ │ ├── CUBLAS_API_supported_by_HIP.md │ │ ├── CUBLAS_API_supported_by_HIP_and_ROC.md │ │ ├── CUBLAS_API_supported_by_ROC.md │ │ ├── CUB_API_supported_by_HIP.md │ │ ├── CUDA_Device_API_supported_by_HIP.md │ │ ├── CUDA_Driver_API_functions_supported_by_HIP.md │ │ ├── CUDA_RTC_API_supported_by_HIP.md │ │ ├── CUDA_Runtime_API_functions_supported_by_HIP.md │ │ ├── CUDNN_API_supported_by_HIP.md │ │ ├── CUDNN_API_supported_by_HIP_and_MIOPEN.md │ │ ├── CUDNN_API_supported_by_MIOPEN.md │ │ ├── CUFFT_API_supported_by_HIP.md │ │ ├── CURAND_API_supported_by_HIP.md │ │ ├── CURAND_API_supported_by_HIP_and_ROC.md │ │ ├── CURAND_API_supported_by_ROC.md │ │ ├── CUSOLVER_API_supported_by_HIP.md │ │ ├── CUSOLVER_API_supported_by_HIP_and_ROC.md │ │ ├── CUSOLVER_API_supported_by_ROC.md │ │ ├── CUSPARSE_API_supported_by_HIP.md │ │ ├── CUSPARSE_API_supported_by_HIP_and_ROC.md │ │ ├── CUSPARSE_API_supported_by_ROC.md │ │ ├── CUTENSOR_API_supported_by_HIP.md │ │ └── cuComplex_API_supported_by_HIP.md └── sphinx │ ├── _toc.yml.in │ ├── requirements.in │ └── requirements.txt ├── hipify-backward-compat.cmake ├── mdlrc-style.rb ├── packaging ├── hipify-clang.postinst ├── hipify-clang.prerm ├── hipify-clang.rpm_post ├── hipify-clang.rpm_postun └── hipify-clang.txt ├── src ├── ArgParse.cpp ├── ArgParse.h ├── CUDA2HIP.cpp ├── CUDA2HIP.h ├── CUDA2HIP_BLAS_API_functions.cpp ├── CUDA2HIP_BLAS_API_types.cpp ├── CUDA2HIP_CAFFE2_API_functions.cpp ├── CUDA2HIP_CAFFE2_API_types.cpp ├── CUDA2HIP_CUB_API_functions.cpp ├── CUDA2HIP_CUB_API_types.cpp ├── CUDA2HIP_Complex_API_functions.cpp ├── CUDA2HIP_Complex_API_types.cpp ├── CUDA2HIP_DNN_API_functions.cpp ├── CUDA2HIP_DNN_API_types.cpp ├── CUDA2HIP_Device_functions.cpp ├── CUDA2HIP_Device_types.cpp ├── CUDA2HIP_Doc.cpp ├── CUDA2HIP_Driver_API_functions.cpp ├── CUDA2HIP_Driver_API_types.cpp ├── CUDA2HIP_FFT_API_functions.cpp ├── CUDA2HIP_FFT_API_types.cpp ├── CUDA2HIP_Perl.cpp ├── CUDA2HIP_Python.cpp ├── CUDA2HIP_RAND_API_functions.cpp ├── CUDA2HIP_RAND_API_types.cpp ├── CUDA2HIP_RTC_API_functions.cpp ├── CUDA2HIP_RTC_API_types.cpp ├── CUDA2HIP_Runtime_API_functions.cpp ├── CUDA2HIP_Runtime_API_types.cpp ├── CUDA2HIP_SOLVER_API_functions.cpp ├── CUDA2HIP_SOLVER_API_types.cpp ├── CUDA2HIP_SPARSE_API_functions.cpp ├── CUDA2HIP_SPARSE_API_types.cpp ├── CUDA2HIP_Scripting.h ├── CUDA2HIP_TENSOR_API_functions.cpp ├── CUDA2HIP_TENSOR_API_types.cpp ├── HipifyAction.cpp ├── HipifyAction.h ├── LLVMCompat.cpp ├── LLVMCompat.h ├── ReplacementsFrontendActionFactory.h ├── Statistics.cpp ├── Statistics.h ├── StringUtils.cpp ├── StringUtils.h └── main.cpp └── tests ├── lit.cfg ├── lit.site.cfg.in ├── run_test.bat ├── run_test.sh └── unit_tests ├── casts └── reinterpret_cast.cu ├── compilation_database ├── cd_intro.cu ├── compile_commands.json.in └── include │ └── inc.h ├── device ├── atomics.cu ├── device_symbols.cu └── math_functions.cu ├── graph └── simple_mechs.cu ├── headers ├── headers_test_01.cu ├── headers_test_02.cu ├── headers_test_03.cu ├── headers_test_04.cu ├── headers_test_05.cu ├── headers_test_06.cu ├── headers_test_06_12000.cu ├── headers_test_07.cu ├── headers_test_07_12000.cu ├── headers_test_08.cu ├── headers_test_08_12000.cu ├── headers_test_09.cu ├── headers_test_09_12000.cu ├── headers_test_09_fp16_7050.cu ├── headers_test_09_rocrand.cu ├── headers_test_10.cu ├── headers_test_11.cu ├── headers_test_12_SOLVER.cu ├── headers_test_12_SOLVER_10010.cu ├── headers_test_12_SOLVER_7050.cu └── headers_test_13.cu ├── kernel_launch ├── kernel_launch_01.cu └── kernel_launch_syntax.cu ├── libraries ├── CAFFE2 │ ├── caffe2 │ │ ├── core │ │ │ └── common_cudnn.h │ │ └── operators │ │ │ └── spatial_batch_norm_op.h │ ├── caffe2_01.cu │ └── caffe2_02.cu ├── CUB │ ├── cub_01.cu │ ├── cub_02.cu │ └── cub_03.cu ├── cuBLAS │ ├── cublas_0_based_indexing.cu │ ├── cublas_0_based_indexing_v2.cu │ ├── cublas_1_based_indexing.cu │ ├── cublas_sgemm_matrix_multiplication.cu │ ├── cublas_v1.cu │ └── rocBLAS │ │ ├── cublas_0_based_indexing_rocblas.cu │ │ ├── cublas_0_based_indexing_rocblas_v2.cu │ │ ├── cublas_1_based_indexing_rocblas.cu │ │ └── cublas_sgemm_matrix_multiplication_rocblas.cu ├── cuComplex │ └── cuComplex_Julia.cu ├── cuDNN │ ├── cudnn_convolution_forward.cu │ └── cudnn_softmax.cu ├── cuFFT │ └── simple_cufft.cu ├── cuRAND │ ├── benchmark_curand_generate.cpp │ ├── benchmark_curand_kernel.cpp │ ├── cmdparser.hpp │ └── poisson_api_example.cu └── cuSPARSE │ ├── cuSPARSE_01.cu │ ├── cuSPARSE_02.cu │ ├── cuSPARSE_03.cu │ ├── cuSPARSE_04.cu │ ├── cuSPARSE_05.cu │ ├── cuSPARSE_06.cu │ ├── cuSPARSE_07.cu │ ├── cuSPARSE_08.cu │ ├── cuSPARSE_09.cu │ ├── cuSPARSE_10.cu │ ├── cuSPARSE_11.cu │ └── cuSPARSE_12.cu ├── namespace └── ns_kernel_launch.cu ├── options └── kernel-execution-syntax │ ├── both-kernel-execution-syntax.cu │ ├── cuda-kernel-execution-syntax.cu │ ├── hip-kernel-execution-syntax.cu │ └── none-kernel-execution-syntax.cu ├── pp ├── pp_if_else_conditionals.cu ├── pp_if_else_conditionals_01.cu ├── pp_if_else_conditionals_01_LLVM_10.cu └── pp_if_else_conditionals_LLVM_10.cu ├── samples ├── 2_Cookbook │ ├── 0_MatrixTranspose │ │ └── MatrixTranspose.cpp │ ├── 11_texture_driver │ │ ├── tex2dKernel.cpp │ │ └── texture2dDrv.cpp │ ├── 13_occupancy │ │ └── occupancy.cpp │ ├── 1_hipEvent │ │ └── hipEvent.cpp │ ├── 2_Profiler │ │ └── Profiler.cpp │ ├── 7_streams │ │ └── stream.cpp │ └── 8_peer2peer │ │ └── peer2peer.cpp ├── MallocManaged.cpp ├── allocators.cu ├── axpy.cu ├── coalescing.cu ├── cudaRegister.cu ├── dynamic_shared_memory.cu ├── half2_allocators.cu ├── intro.cu ├── macro_check.cu ├── reduction.cu ├── square.cu ├── static_shared_memory.cu └── vec_add.cu └── synthetic ├── driver_defines.cu ├── driver_enums.cu ├── driver_functions.cu ├── driver_functions_internal.cu ├── driver_structs.cu ├── driver_typedefs.cu ├── driver_unions.cu ├── libraries ├── cublas2hipblas.cu ├── cublas2hipblas_v2.cu ├── cublas2rocblas.cu ├── cublas2rocblas_v2.cu ├── cublaslt2hipblaslt.cu ├── cublaslt2hipblaslt_10010_10020.cu ├── cudevice2hipdevice.cu ├── cudevice2hipdevice_12080.cu ├── cudevice2hipdevice_before_11080_after_12011.cu ├── cudnn2miopen.cu ├── cudnn2miopen_before_9000.cu ├── cufftXt2hipfftXt.cu ├── curand2hiprand.cu ├── curand2rocrand.cu ├── cusolver2hipsolver.cu ├── cusolver2rocsolver.cu ├── cusparse2hipsparse.cu ├── cusparse2hipsparse_11030_12000.cu ├── cusparse2hipsparse_12000.cu ├── cusparse2rocsparse.cu ├── cusparse2rocsparse_10000.cu ├── cusparse2rocsparse_10010.cu ├── cusparse2rocsparse_10010_12000.cu ├── cusparse2rocsparse_11010_12000.cu ├── cusparse2rocsparse_11030_12000.cu ├── cusparse2rocsparse_12000.cu ├── cusparse2rocsparse_7050.cu ├── cusparse2rocsparse_9020.cu ├── cusparse2rocsparse_9020_12000.cu ├── cusparse2rocsparse_before_11000.cu ├── cusparse2rocsparse_before_12000.cu └── cutensor2hiptensor.cu ├── nvrtc2hiprtc.cu ├── runtime_defines.cu ├── runtime_enums.cu ├── runtime_functions.cu ├── runtime_functions_11010.cu ├── runtime_functions_12000.cu ├── runtime_functions_9000.cu ├── runtime_structs.cu ├── runtime_typedefs.cu ├── runtime_unions.cu └── transforming_matchers.cu /.azuredevops/rocm-ci.yml: -------------------------------------------------------------------------------- 1 | resources: 2 | repositories: 3 | - repository: pipelines_repo 4 | type: github 5 | endpoint: ROCm 6 | name: ROCm/ROCm 7 | 8 | variables: 9 | - group: common 10 | - template: /.azuredevops/variables-global.yml@pipelines_repo 11 | 12 | trigger: 13 | batch: true 14 | branches: 15 | include: 16 | - amd-staging 17 | - amd-mainline 18 | paths: 19 | exclude: 20 | - .github 21 | - docs 22 | - '.*.y*ml' 23 | - '*.md' 24 | - LICENSE.txt 25 | 26 | pr: 27 | autoCancel: true 28 | branches: 29 | include: 30 | - amd-staging 31 | - amd-mainline 32 | paths: 33 | exclude: 34 | - .github 35 | - docs 36 | - '.*.y*ml' 37 | - '*.md' 38 | - LICENSE.txt 39 | drafts: false 40 | 41 | jobs: 42 | - template: ${{ variables.CI_COMPONENT_PATH }}/HIPIFY.yml@pipelines_repo 43 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @searlmc1 @emankov 2 | 3 | # Documentation files 4 | docs/ @ROCm/rocm-documentation @searlmc1 @emankov 5 | *.md @ROCm/rocm-documentation @searlmc1 @emankov 6 | *.rst @ROCm/rocm-documentation @searlmc1 @emankov 7 | .readthedocs.yaml @ROCm/rocm-documentation @searlmc1 @emankov 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/docs/sphinx" # Location of package manifests 10 | open-pull-requests-limit: 10 11 | schedule: 12 | interval: "daily" 13 | labels: 14 | - "documentation" 15 | - "dependencies" 16 | reviewers: 17 | - "samjwu" 18 | -------------------------------------------------------------------------------- /.github/workflows/markdownlint.yml: -------------------------------------------------------------------------------- 1 | name: Markdownlint mdl Action 2 | on: push 3 | 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Check out code 9 | uses: actions/checkout@v3 10 | - name: Use markdownlint 11 | uses: actionshub/markdownlint@v3.1.3 12 | with: 13 | filesToIgnoreRegex: "README.md" 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.hip 3 | **/__pycache__/* 4 | .python-version 5 | 6 | # documentation artifacts 7 | build/ 8 | _build/ 9 | _images/ 10 | _static/ 11 | _templates/ 12 | _toc.yml 13 | docBin/ 14 | -------------------------------------------------------------------------------- /.mdlrc: -------------------------------------------------------------------------------- 1 | style "mdlrc-style.rb" 2 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | sphinx: 7 | configuration: docs/conf.py 8 | 9 | formats: [htmlzip, pdf, epub] 10 | 11 | python: 12 | install: 13 | - requirements: docs/sphinx/requirements.txt 14 | 15 | build: 16 | os: ubuntu-22.04 17 | tools: 18 | python: "3.10" 19 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 Advanced Micro Devices, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HIPIFY 2 | 3 | HIPIFY is a set of tools that you can use to automatically translate CUDA source code into portable 4 | [HIP](https://github.com/ROCm/HIP) C++. 5 | 6 | ## Documentation 7 | 8 | The published documentation is available at [HIPIFY](https://rocm.docs.amd.com/projects/HIPIFY/en/latest/index.html) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the `HIPIFY/docs` folder of this GitHub repository. As with all ROCm projects, the documentation is open source. For more information on contributing to the documentation, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). 9 | 10 | To build our documentation locally, run the following code. 11 | 12 | ```bash 13 | cd docs 14 | 15 | pip3 install -r .sphinx/requirements.txt 16 | 17 | python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html 18 | ``` 19 | 20 | To build `CUDA2HIP` (CUDA APIs supported by HIP) documentation, run the following `hipify-clang` 21 | command. This builds the same content as 22 | [Supported CUDA APIs](./docs/reference/supported_apis.md#supported-cuda-apis). 23 | 24 | ```bash 25 | hipify-clang --md --doc-format=full --doc-roc=joint 26 | 27 | # Alternatively, you can use: 28 | 29 | hipify-clang --md --doc-format=full --doc-roc=separate 30 | ``` 31 | 32 | To generate this documentation in CSV, use the `--csv` option instead of `--md`. Instead of using 33 | the `full` format, you can also build in `strict` or `compact` format. 34 | 35 | To see all available options, use the `--help` or `--help-hidden` `hipify-clang` option. 36 | -------------------------------------------------------------------------------- /bin/findcode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SEARCH_DIRS=$@ 4 | 5 | find $SEARCH_DIRS -name '*.cu' -o -name '*.CU' 6 | find $SEARCH_DIRS -name '*.cpp' -o -name '*.cxx' -o -name '*.c' -o -name '*.cc' 7 | find $SEARCH_DIRS -name '*.CPP' -o -name '*.CXX' -o -name '*.C' -o -name '*.CC' 8 | find $SEARCH_DIRS -name '*.cuh' -o -name '*.CUH' 9 | find $SEARCH_DIRS -name '*.h' -o -name '*.hpp' -o -name '*.hh' -o -name '*.inc' -o -name '*.inl' -o -name '*.hxx' -o -name '*.hdl' 10 | find $SEARCH_DIRS -name '*.H' -o -name '*.HPP' -o -name '*.HH' -o -name '*.INC' -o -name '*.INL' -o -name '*.HXX' -o -name '*.HDL' 11 | -------------------------------------------------------------------------------- /bin/findcode_custom.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SEARCH_DIRS=$@ 4 | 5 | find $SEARCH_DIRS -name '*.cu' -and -not -name '*.cuh' 6 | find $SEARCH_DIRS -name '*.CU' -and -not -name '*.CUH' 7 | -------------------------------------------------------------------------------- /bin/findcode_headers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SEARCH_DIRS=$@ 4 | 5 | find $SEARCH_DIRS -name '*.cuh' -o -name '*.CUH' 6 | find $SEARCH_DIRS -name '*.h' -o -name '*.hpp' -o -name '*.hh' -o -name '*.inc' -o -name '*.inl' -o -name '*.hxx' -o -name '*.hdl' 7 | find $SEARCH_DIRS -name '*.H' -o -name '*.HPP' -o -name '*.HH' -o -name '*.INC' -o -name '*.INL' -o -name '*.HXX' -o -name '*.HDL' 8 | -------------------------------------------------------------------------------- /bin/findcode_sources.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SEARCH_DIRS=$@ 4 | 5 | find $SEARCH_DIRS -name '*.cu' -and -not -name '*.cuh' 6 | find $SEARCH_DIRS -name '*.CU' -and -not -name '*.CUH' 7 | find $SEARCH_DIRS -name '*.cpp' -o -name '*.cxx' -o -name '*.c' -o -name '*.cc' 8 | find $SEARCH_DIRS -name '*.CPP' -o -name '*.CXX' -o -name '*.C' -o -name '*.CC' 9 | -------------------------------------------------------------------------------- /bin/finduncodep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SEARCH_DIR=$1 4 | 5 | find $SEARCH_DIR -not -name '*.cu' -and -not -name '*.cpp' -and -not -name '*.cxx' -and -not -name '*.c' -and -not -name '*.cc' -and -not -name '*.cuh' -and -not -name '*.h' -and -not -name '*.hpp' -and -not -name '*.inc' -and -not -name '*.inl' -and -not -name '*.hxx' -and -not -name '*.hdl' 6 | -------------------------------------------------------------------------------- /bin/hipconvertinplace-perl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #usage : hipconvertinplace-perl.sh DIRNAME [-filter=all|headers|sources|custom] [hipify-perl options] 4 | 5 | #hipify "inplace" all code files in specified directory. 6 | # This can be quite handy when dealing with an existing CUDA code base since the script 7 | # preserves the existing directory structure. 8 | 9 | # For each code file, this script will: 10 | # - If ".prehip file does not exist, copy the original code to a new file with extension ".prehip". Then hipify the code file. 11 | # - If ".prehip" file exists, this is used as input to hipify. 12 | # (this is useful for testing improvements to the hipify-perl toolset). 13 | 14 | SCRIPT_DIR="$(dirname "$(realpath "$0")")" 15 | SCRIPT_NAME=findcode.sh 16 | SEARCH_DIR=$1 17 | if [ "$2" = "-filter=all" ] 18 | then 19 | shift 20 | elif [ "$2" = "-filter=headers" ] 21 | then 22 | SCRIPT_NAME=findcode_headers.sh 23 | shift 24 | elif [ "$2" = "-filter=sources" ] 25 | then 26 | SCRIPT_NAME=findcode_sources.sh 27 | shift 28 | elif [ "$2" = "-filter=custom" ] 29 | then 30 | SCRIPT_NANE=findcode_custom.sh 31 | shift 32 | fi 33 | shift 34 | 35 | $SCRIPT_DIR/hipify-perl -inplace -print-stats "$@" `$SCRIPT_DIR/$SCRIPT_NAME $SEARCH_DIR` 36 | -------------------------------------------------------------------------------- /bin/hipconvertinplace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #usage : hipconvertinplace.sh DIRNAME [hipify options] [--] [clang options] 4 | 5 | #hipify "inplace" all code files in specified directory. 6 | # This can be quite handy when dealing with an existing CUDA code base since the script 7 | # preserves the existing directory structure. 8 | 9 | SCRIPT_DIR="$(dirname "$(realpath "$0")")" 10 | BIN_DIR="$SCRIPT_DIR/../../bin" 11 | SEARCH_DIR=$1 12 | 13 | hipify_args='' 14 | while (( "$#" )); do 15 | shift 16 | if [ "$1" != "--" ]; then 17 | hipify_args="$hipify_args $1" 18 | else 19 | shift 20 | break 21 | fi 22 | done 23 | clang_args="$@" 24 | 25 | $BIN_DIR/hipify-clang -inplace -print-stats $hipify_args `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` -- -x cuda $clang_args 26 | -------------------------------------------------------------------------------- /bin/hipexamine-perl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #usage : hipexamine-perl.sh DIRNAME [hipify-perl options] 4 | 5 | # Generate HIP stats (LOC, CUDA->API conversions, missing functionality) for all the code files 6 | # in the specified directory. 7 | 8 | 9 | SCRIPT_DIR="$(dirname "$(realpath "$0")")" 10 | SEARCH_DIR=$1 11 | shift 12 | $SCRIPT_DIR/hipify-perl -no-output -print-stats "$@" `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` 13 | -------------------------------------------------------------------------------- /bin/hipexamine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #usage : hipexamine.sh DIRNAME [hipify options] [--] [clang options] 4 | 5 | # Generate CUDA->HIP conversion statistics for all the code files in the specified directory. 6 | 7 | SCRIPT_DIR="$(dirname "$(realpath "$0")")" 8 | BIN_DIR="$SCRIPT_DIR/../../bin" 9 | SEARCH_DIR=$1 10 | 11 | hipify_args='' 12 | while (( "$#" )); do 13 | shift 14 | if [ "$1" != "--" ]; then 15 | hipify_args="$hipify_args $1" 16 | else 17 | shift 18 | break 19 | fi 20 | done 21 | clang_args="$@" 22 | 23 | $BIN_DIR/hipify-clang -examine $hipify_args `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` -- -x cuda $clang_args 24 | -------------------------------------------------------------------------------- /docs/building/build-hipify-perl.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Tools to automatically translate CUDA source code into portable HIP C++ 3 | :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl 4 | 5 | .. _build-hipify-perl: 6 | 7 | =================== 8 | Building hipify-perl 9 | =================== 10 | 11 | ``hipify-perl`` is a perl-based script that heavily uses regular expressions, which is automatically generated from ``hipify-clang``. To generate ``hipify-perl``, run: 12 | 13 | .. code-block:: shell 14 | 15 | hipify-clang --perl 16 | 17 | You can choose to specify the output directory for the generated ``hipify-perl`` file using ``--o-hipify-perl-dir`` option. 18 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | from rocm_docs import ROCmDocs 8 | 9 | # for PDF output on Read the Docs 10 | project = "HIPIFY Documentation" 11 | author = "Advanced Micro Devices, Inc." 12 | copyright = "Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved." 13 | 14 | external_toc_path = "./sphinx/_toc.yml" 15 | 16 | docs_core = ROCmDocs("HIPIFY Documentation") 17 | docs_core.setup() 18 | 19 | external_projects_current_project = "hipify" 20 | 21 | for sphinx_var in ROCmDocs.SPHINX_VARS: 22 | globals()[sphinx_var] = getattr(docs_core, sphinx_var) 23 | -------------------------------------------------------------------------------- /docs/data/csv_statistics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/csv_statistics.png -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_10.0.0_bug_47332.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_10.0.0_bug_47332.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_10.0.1_bug_47332.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_10.0.1_bug_47332.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_11.0.0_bug_47332.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_11.0.0_bug_47332.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_14.0.0_bug_54609.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.0_bug_54609.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_14.0.1_bug_54609.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.1_bug_54609.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_14.0.2_bug_54609.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.2_bug_54609.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_14.0.3_bug_54609.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.3_bug_54609.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_14.0.4_bug_54609.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_14.0.4_bug_54609.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_7.0.0_bug_38811.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_7.0.0_bug_38811.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_7.0.1_bug_38811.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_7.0.1_bug_38811.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_7.1.0_bug_38811.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_7.1.0_bug_38811.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_8.0.0_bug_38811.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_8.0.0_bug_38811.zip -------------------------------------------------------------------------------- /docs/data/patches/patch_for_clang_8.0.1_bug_38811.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/HIPIFY/f69736cb9873bf48cd0bf39d965334f1c3a32211/docs/data/patches/patch_for_clang_8.0.1_bug_38811.zip -------------------------------------------------------------------------------- /docs/how-to/hipify-perl.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Tools to automatically translate CUDA source code into portable HIP C++ 3 | :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl 4 | 5 | .. _hipify-perl: 6 | 7 | =================== 8 | Using hipify-perl 9 | =================== 10 | 11 | ``hipify-perl`` is perl-based script that heavily uses regular expressions, that is automatically generated from ``hipify-clang``. 12 | 13 | **Advantages:** 14 | 15 | - Ease of use 16 | - No checks for input source NVIDIA CUDA code for correctness required 17 | - No dependency on third party tools, including CUDA 18 | 19 | **Disadvantages:** 20 | 21 | - Inability or difficulty in implementing the following constructs: 22 | 23 | - Macros expansion 24 | - Namespaces: 25 | 26 | - Redefinition of CUDA entities in user namespaces 27 | - Using directive 28 | 29 | - Templates (some cases) 30 | - Device or host function calls differentiation 31 | - Correct injection of header files 32 | - Parsing complicated argument lists 33 | 34 | Example 35 | ======= 36 | 37 | For additional details on the following ``hipify-perl`` command options, see :ref:`hipify_perl-command`. For more advanced translation needs use ``hipify-clang`` as it is more comprehensive and accurate. 38 | 39 | Convert a simple CUDA file (``square.cu``) to HIP using ``hipify-perl``: 40 | 41 | .. code-block:: shell 42 | 43 | hipify-perl square.cu -o square.cu.hip 44 | 45 | This command translates the input file and writes the result to ``square.cu.hip``. 46 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Tools to automatically translate CUDA source code into portable HIP C++ 3 | :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl 4 | 5 | .. _index: 6 | 7 | ===================== 8 | HIPIFY documentation 9 | ===================== 10 | 11 | HIPIFY is a ROCm tool to help developers migrate GPU programming from NVIDIA's CUDA language to AMD's HIP C++ programming language for use on AMD GPUs. HIPIFY includes two tools offering different levels of capability: 12 | 13 | • ``hipify-clang``: A clang-based tool that parses CUDA code and converts it to HIP code. It handles syntax changes, API calls, and kernel launch differences. 14 | • ``hipify-perl``: A simpler tool generated from ``hipify-clang`` that replaces CUDA API calls with HIP equivalents for basic code translation needs. ``hipify-perl`` is useful for simple CUDA programs, but offers less error detection when running into issues during translation. 15 | 16 | .. note:: 17 | 18 | `hipify_torch `_ is a related tool that also translates CUDA source code into portable HIP C++. It was developed as part of the PyTorch project to cater to the project's unique requirements, was found to be useful for PyTorch-related projects, and released as an independent utility. 19 | 20 | HIPIFY does not automatically convert all CUDA code into HIP code seamlessly. While it is a powerful tool for translating CUDA code to HIP, there are some limitations and areas where manual intervention is often required. HIPIFY can automatically convert many CUDA runtime API calls, kernel launch syntax, standard CUDA library functions where there is a HIP library equivalent, specific keywords like ``__global__`` and ``__device__``. However, HIP is not a complete replacement for CUDA, and HIPIFY cannot automatically translate all code. CUDA libraries, or third-party libraries that have no HIP equivalent cannot be translated. In addition, code which is optimized for performance on NVIDIA GPUs might require additional rework to optimize performance on AMD GPUs. 21 | 22 | After migrating code through HIPIFY, you should perform a code review to ensure functional correctness, replace any unsupported libraries or constructs with HIP or ROCm features. Debug and test the new HIP program, and optimize the performance on the target AMD GPUs. 23 | 24 | HIPIFY is open-source and freely available as part of the ROCm ecosystem. You can find the HIPIFY code on AMD's `GitHub HIPIFY repository `_. 25 | 26 | The documentation is structured as follows: 27 | 28 | .. grid:: 2 29 | :gutter: 3 30 | 31 | .. grid-item-card:: Building 32 | 33 | * :ref:`build-hipify-clang` 34 | * :ref:`build-hipify-perl` 35 | 36 | .. grid-item-card:: How to 37 | 38 | * :doc:`Use hipify-clang <./how-to/hipify-clang>` 39 | * :doc:`Use hipify-perl <./how-to/hipify-perl>` 40 | 41 | .. grid-item-card:: API reference 42 | 43 | * :ref:`hipify_clang-command` 44 | * :ref:`hipify_perl-command` 45 | * :doc:`Supported APIs <./reference/supported_apis>` 46 | 47 | To contribute to the documentation, refer to 48 | `Contributing to ROCm `_. 49 | 50 | You can find licensing information on the `Licensing `_ page. 51 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ```{include} ../LICENSE.txt 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/reference/hipify-perl-cmd.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Tools to automatically translate CUDA source code into portable HIP C++ 3 | :keywords: HIPIFY, ROCm, library, tool, CUDA, CUDA2HIP, hipify-clang, hipify-perl 4 | 5 | .. _hipify_perl-command: 6 | 7 | ************************************************************************** 8 | hipify-perl command 9 | ************************************************************************** 10 | 11 | For a list of ``hipify-perl`` options, run: 12 | 13 | .. code-block:: cpp 14 | 15 | hipify-perl --help 16 | 17 | Output: 18 | ======= 19 | 20 | Usage 21 | ----- 22 | 23 | .. code-block:: cpp 24 | 25 | hipify-perl [options] [... ] 26 | 27 | Options 28 | ------- 29 | 30 | .. # COMMENT: The following lines define a break for use in the table below. 31 | .. |br| raw:: html 32 | 33 |
34 | 35 | .. list-table:: 36 | :widths: 2 5 37 | 38 | * - **Options** 39 | - **Description** 40 | 41 | * - ``-cuda-kernel-execution-syntax`` 42 | - Keep CUDA kernel launch syntax (default) 43 | 44 | * - ``-examine`` 45 | - Combines ``-no-output`` and ``-print-stats`` options 46 | 47 | * - ``-exclude-dirs=`` 48 | - Exclude directories 49 | 50 | * - ``-exclude-files=`` 51 | - Exclude files 52 | 53 | * - ``-experimental`` 54 | - HIPIFY experimentally supported APIs 55 | 56 | * - ``-help`` 57 | - Display available options 58 | 59 | * - ``-hip-kernel-execution-syntax`` 60 | - Transform CUDA kernel launch syntax to a regular HIP function call (overrides ``--cuda-kernel-execution-syntax``) 61 | 62 | * - ``-inplace`` 63 | - Backs up the input file in ``.prehip`` file, and modifies the input file in-place 64 | 65 | * - ``-no-output`` 66 | - Don't write any translated output to stdout 67 | 68 | * - ``-o=`` 69 | - Output filename 70 | 71 | * - ``-print-stats`` 72 | - Print translation statistics as described in :ref:`hipify-stats` 73 | 74 | * - ``-quiet-warnings`` 75 | - Don't print warnings on unknown CUDA identifiers 76 | 77 | * - ``-roc`` 78 | - Translate to ``roc`` libraries instead of ``hip`` libraries where possible 79 | 80 | * - ``-version`` 81 | - The supported HIP version 82 | 83 | * - ``-whitelist=`` 84 | - Whitelist of identifiers 85 | 86 | -------------------------------------------------------------------------------- /docs/reference/supported_apis.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | # Supported NVIDIA CUDA APIs 9 | 10 | | **CUDA** | **HIP** | **ROC** | **HIP & ROC** | 11 | |:-----------------|:------------------------------------------------------------------|:----------------------------------------------------------|:------------------------------------------------------------------------| 12 | | CUDA Runtime API | [HIP API](tables/CUDA_Runtime_API_functions_supported_by_HIP.md) | | | 13 | | CUDA Driver API | [HIP API](tables/CUDA_Driver_API_functions_supported_by_HIP.md) | | | 14 | | CUComplex API | [HIP API](tables/cuComplex_API_supported_by_HIP.md) | | | 15 | | CUDA Device API | [HIP Device API](tables/CUDA_Device_API_supported_by_HIP.md) | | | 16 | | CUDA RTC API | [HIP RTC API](tables/CUDA_RTC_API_supported_by_HIP.md) | | | 17 | | CUBLAS API | [HIP BLAS API](tables/CUBLAS_API_supported_by_HIP.md) | [ROC BLAS API](tables/CUBLAS_API_supported_by_ROC.md) | [HIP + ROC BLAS API](tables/CUBLAS_API_supported_by_HIP_and_ROC.md) | 18 | | CUSPARSE API | [HIP SPARSE API](tables/CUSPARSE_API_supported_by_HIP.md) | [ROC SPARSE API](tables/CUSPARSE_API_supported_by_ROC.md) | [HIP + ROC SPARSE API](tables/CUSPARSE_API_supported_by_HIP_and_ROC.md) | 19 | | CUSOLVER API | [HIP SOLVER API](tables/CUSOLVER_API_supported_by_HIP.md) | | | 20 | | CURAND API | [HIP RAND API](tables/CURAND_API_supported_by_HIP.md) | [ROC RAND API](tables/CURAND_API_supported_by_ROC.md) | [HIP + ROC RAND API](tables/CURAND_API_supported_by_HIP_and_ROC.md) | 21 | | CUFFT API | [HIP FFT API](tables/CUFFT_API_supported_by_HIP.md) | | | 22 | | CUDNN API | [HIP DNN API](tables/CUDNN_API_supported_by_HIP.md) | [MIOPEN API](tables/CUDNN_API_supported_by_MIOPEN.md) | [HIP + MIOPEN API](tables/CUDNN_API_supported_by_HIP_and_MIOPEN.md) | 23 | | CUTENSOR API | [HIP TENSOR API](tables/CUTENSOR_API_supported_by_HIP.md) | | | 24 | | CUB API | [HIP CUB API](tables/CUB_API_supported_by_HIP.md) | | | 25 | 26 | To generate the above documentation with the information about all supported CUDA APIs in Markdown format, run `hipify-clang --md --doc-format=full` with or without specifying the output directory (`-o`), for HIP and ROC separately `--doc-roc=separate` or in the joint format (ROC & HIP) `--doc-roc=joint`. 27 | -------------------------------------------------------------------------------- /docs/reference/tables/CUB_API_supported_by_HIP.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | # CUB API supported by HIP 8 | 9 | 10 | **Note\:** In the tables that follow the columns marked `A`, `D`, `C`, `R`, and `E` mean the following: 11 | **A** - Added; **D** - Deprecated; **C** - Changed; **R** - Removed; **E** - Experimental 12 | 13 | ## **1. CUB Data types** 14 | 15 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| 16 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| 17 | |`CUB_ALIGN`| | | | | | | | | | | 18 | |`CUB_CAT`| | | | | | | | | | | 19 | |`CUB_CAT_`| | | | | | | | | | | 20 | |`CUB_COMPILER_DEPRECATION`| | | | | | | | | | | 21 | |`CUB_COMPILER_DEPRECATION_SOFT`| | | | | | | | | | | 22 | |`CUB_COMP_DEPR_IMPL`| | | | | | | | | | | 23 | |`CUB_COMP_DEPR_IMPL0`| | | | | | | | | | | 24 | |`CUB_COMP_DEPR_IMPL1`| | | | | | | | | | | 25 | |`CUB_CPLUSPLUS`| | | | | | | | | | | 26 | |`CUB_CPP_DIALECT`| | | | | | | | | | | 27 | |`CUB_DEFINE_DETECT_NESTED_TYPE`| | | | | | | | | | | 28 | |`CUB_DEFINE_VECTOR_TYPE`| | | | | | | | | | | 29 | |`CUB_DEPRECATED`| | | | | | | | | | | 30 | |`CUB_DEVICE_COMPILER`| | | | | | | | | | | 31 | |`CUB_DEVICE_COMPILER_CLANG`| | | | | | | | | | | 32 | |`CUB_DEVICE_COMPILER_GCC`| | | | | | | | | | | 33 | |`CUB_DEVICE_COMPILER_MSVC`| | | | | | | | | | | 34 | |`CUB_DEVICE_COMPILER_NVCC`| | | | | | | | | | | 35 | |`CUB_DEVICE_COMPILER_UNKNOWN`| | | | | | | | | | | 36 | |`CUB_HOST_COMPILER`| | | | | | | | | | | 37 | |`CUB_HOST_COMPILER_CLANG`| | | | | | | | | | | 38 | |`CUB_HOST_COMPILER_GCC`| | | | | | | | | | | 39 | |`CUB_HOST_COMPILER_MSVC`| | | | | | | | | | | 40 | |`CUB_HOST_COMPILER_UNKNOWN`| | | | | | | | | | | 41 | |`CUB_IGNORE_DEPRECATED_API`| | | | | | | | | | | 42 | |`CUB_IGNORE_DEPRECATED_COMPILER`| | | | | | | | | | | 43 | |`CUB_IGNORE_DEPRECATED_CPP_11`| | | | | | | | | | | 44 | |`CUB_IGNORE_DEPRECATED_CPP_DIALECT`| | | | | | | | | | | 45 | |`CUB_IGNORE_DEPRECATED_DIALECT`| | | | | | | | | | | 46 | |`CUB_INCLUDE_DEVICE_CODE`| | | | | | | | | | | 47 | |`CUB_INCLUDE_HOST_CODE`| | | | | | | | | | | 48 | |`CUB_IS_DEVICE_CODE`| | | | | | | | | | | 49 | |`CUB_IS_HOST_CODE`| | | | | | | | | | | 50 | |`CUB_LOG_SMEM_BANKS`| | | | | | | | | | | 51 | |`CUB_LOG_WARP_THREADS`| | | | | | | | | | | 52 | |`CUB_MAX`| | | | |`CUB_MAX`|4.5.0| | | | | 53 | |`CUB_MAX_DEVICES`| | | | | | | | | | | 54 | |`CUB_MIN`| | | | |`CUB_MIN`|4.5.0| | | | | 55 | |`CUB_MSVC_VERSION`| | | | | | | | | | | 56 | |`CUB_MSVC_VERSION_FULL`| | | | | | | | | | | 57 | |`CUB_NAMESPACE_BEGIN`| | | | |`BEGIN_HIPCUB_NAMESPACE`|2.5.0| | | | | 58 | |`CUB_NAMESPACE_END`| | | | |`END_HIPCUB_NAMESPACE`|2.5.0| | | | | 59 | |`CUB_PREFER_CONFLICT_OVER_PADDING`| | | | | | | | | | | 60 | |`CUB_PREVENT_MACRO_SUBSTITUTION`| | | | | | | | | | | 61 | |`CUB_PTX_ARCH`| | | | |`HIPCUB_ARCH`|2.5.0| | | | | 62 | |`CUB_PTX_LOG_SMEM_BANKS`| | | | | | | | | | | 63 | |`CUB_PTX_LOG_WARP_THREADS`| | | | | | | | | | | 64 | |`CUB_PTX_PREFER_CONFLICT_OVER_PADDING`| | | | | | | | | | | 65 | |`CUB_PTX_SMEM_BANKS`| | | | | | | | | | | 66 | |`CUB_PTX_SUBSCRIPTION_FACTOR`| | | | | | | | | | | 67 | |`CUB_PTX_WARP_THREADS`| | | | |`HIPCUB_WARP_THREADS`|2.5.0| | | | | 68 | |`CUB_QUOTIENT_CEILING`| | | | | | | | | | | 69 | |`CUB_QUOTIENT_FLOOR`| | | | | | | | | | | 70 | |`CUB_ROUND_DOWN_NEAREST`| | | | | | | | | | | 71 | |`CUB_ROUND_UP_NEAREST`| | | | | | | | | | | 72 | |`CUB_RUNTIME_ENABLED`| | | | | | | | | | | 73 | |`CUB_RUNTIME_FUNCTION`| | | | |`HIPCUB_RUNTIME_FUNCTION`|2.5.0| | | | | 74 | |`CUB_SMEM_BANKS`| | | | | | | | | | | 75 | |`CUB_STATIC_ASSERT`| | | | | | | | | | | 76 | |`CUB_STDERR`| | | | |`HIPCUB_STDERR`|2.5.0| | | | | 77 | |`CUB_SUBSCRIPTION_FACTOR`| | | | | | | | | | | 78 | |`CUB_USE_COOPERATIVE_GROUPS`| | | | | | | | | | | 79 | |`CubDebug`| | | | |`HipcubDebug`|2.5.0| | | | | 80 | |`CubDebugExit`| | | | | | | | | | | 81 | |`CubVector`| | | | | | | | | | | 82 | |`_CUB_ASM_PTR_`| | | | | | | | | | | 83 | |`_CUB_ASM_PTR_SIZE_`| | | | | | | | | | | 84 | |`_CubLog`| | | | |`_HipcubLog`|2.5.0| | | | | 85 | |`__CUB_ALIGN_BYTES`| | | | |`__HIPCUB_ALIGN_BYTES`|4.5.0| | | | | 86 | |`__CUB_LP64__`| | | | | | | | | | | 87 | 88 | -------------------------------------------------------------------------------- /docs/reference/tables/CUDA_RTC_API_supported_by_HIP.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | # CUDA RTC API supported by HIP 8 | 9 | 10 | **Note\:** In the tables that follow the columns marked `A`, `D`, `C`, `R`, and `E` mean the following: 11 | **A** - Added; **D** - Deprecated; **C** - Changed; **R** - Removed; **E** - Experimental 12 | 13 | ## **1. RTC Data types** 14 | 15 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| 16 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| 17 | |`NVRTC_ERROR_BUILTIN_OPERATION_FAILURE`| | | | |`HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE`|2.6.0| | | | | 18 | |`NVRTC_ERROR_CANCELLED`|12.8| | | | | | | | | | 19 | |`NVRTC_ERROR_COMPILATION`| | | | |`HIPRTC_ERROR_COMPILATION`|2.6.0| | | | | 20 | |`NVRTC_ERROR_INTERNAL_ERROR`|8.0| | | |`HIPRTC_ERROR_INTERNAL_ERROR`|2.6.0| | | | | 21 | |`NVRTC_ERROR_INVALID_INPUT`| | | | |`HIPRTC_ERROR_INVALID_INPUT`|2.6.0| | | | | 22 | |`NVRTC_ERROR_INVALID_OPTION`| | | | |`HIPRTC_ERROR_INVALID_OPTION`|2.6.0| | | | | 23 | |`NVRTC_ERROR_INVALID_PROGRAM`| | | | |`HIPRTC_ERROR_INVALID_PROGRAM`|2.6.0| | | | | 24 | |`NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID`|8.0| | | |`HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID`|2.6.0| | | | | 25 | |`NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION`|8.0| | | |`HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION`|2.6.0| | | | | 26 | |`NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION`|8.0| | | |`HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION`|2.6.0| | | | | 27 | |`NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED`|12.8| | | | | | | | | | 28 | |`NVRTC_ERROR_OUT_OF_MEMORY`| | | | |`HIPRTC_ERROR_OUT_OF_MEMORY`|2.6.0| | | | | 29 | |`NVRTC_ERROR_PCH_CREATE`|12.8| | | | | | | | | | 30 | |`NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED`|12.8| | | | | | | | | | 31 | |`NVRTC_ERROR_PROGRAM_CREATION_FAILURE`| | | | |`HIPRTC_ERROR_PROGRAM_CREATION_FAILURE`|2.6.0| | | | | 32 | |`NVRTC_ERROR_TIME_FILE_WRITE_FAILED`|12.1| | | | | | | | | | 33 | |`NVRTC_SUCCESS`| | | | |`HIPRTC_SUCCESS`|2.6.0| | | | | 34 | |`_nvrtcProgram`| | | | |`_hiprtcProgram`|2.6.0| | | | | 35 | |`nvrtcProgram`| | | | |`hiprtcProgram`|2.6.0| | | | | 36 | |`nvrtcResult`| | | | |`hiprtcResult`|2.6.0| | | | | 37 | 38 | ## **2. RTC API functions** 39 | 40 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| 41 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| 42 | |`nvrtcAddNameExpression`|8.0| | | |`hiprtcAddNameExpression`|2.6.0| | | | | 43 | |`nvrtcCompileProgram`| | |8.0| |`hiprtcCompileProgram`|2.6.0| | | | | 44 | |`nvrtcCreateProgram`| | |8.0| |`hiprtcCreateProgram`|2.6.0| | | | | 45 | |`nvrtcDestroyProgram`| | | | |`hiprtcDestroyProgram`|2.6.0| | | | | 46 | |`nvrtcGetCUBIN`|11.1| | | |`hiprtcGetBitcode`|5.3.0| | | | | 47 | |`nvrtcGetCUBINSize`|11.1| | | |`hiprtcGetBitcodeSize`|5.3.0| | | | | 48 | |`nvrtcGetErrorString`| | | | |`hiprtcGetErrorString`|2.6.0| | | | | 49 | |`nvrtcGetLTOIR`|12.0| | | | | | | | | | 50 | |`nvrtcGetLTOIRSize`|12.0| | | | | | | | | | 51 | |`nvrtcGetLoweredName`|8.0| | | |`hiprtcGetLoweredName`|2.6.0| | | | | 52 | |`nvrtcGetNVVM`|11.4|12.0| | | | | | | | | 53 | |`nvrtcGetNVVMSize`|11.4|12.0| | | | | | | | | 54 | |`nvrtcGetNumSupportedArchs`|11.2| | | | | | | | | | 55 | |`nvrtcGetOptiXIR`|12.0| | | | | | | | | | 56 | |`nvrtcGetOptiXIRSize`|12.0| | | | | | | | | | 57 | |`nvrtcGetPCHCreateStatus`|12.8| | | | | | | | | | 58 | |`nvrtcGetPCHHeapSize`|12.8| | | | | | | | | | 59 | |`nvrtcGetPCHHeapSizeRequired`|12.8| | | | | | | | | | 60 | |`nvrtcGetPTX`| | | | |`hiprtcGetCode`|2.6.0| | | | | 61 | |`nvrtcGetPTXSize`| | | | |`hiprtcGetCodeSize`|2.6.0| | | | | 62 | |`nvrtcGetProgramLog`| | | | |`hiprtcGetProgramLog`|2.6.0| | | | | 63 | |`nvrtcGetProgramLogSize`| | | | |`hiprtcGetProgramLogSize`|2.6.0| | | | | 64 | |`nvrtcGetSupportedArchs`|11.2| | | | | | | | | | 65 | |`nvrtcSetFlowCallback`|12.8| | | | | | | | | | 66 | |`nvrtcSetPCHHeapSize`|12.8| | | | | | | | | | 67 | |`nvrtcVersion`| | | | |`hiprtcVersion`|2.6.0| | | | | 68 | 69 | -------------------------------------------------------------------------------- /docs/reference/tables/cuComplex_API_supported_by_HIP.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | # CUCOMPLEX API supported by HIP 8 | 9 | 10 | **Note\:** In the tables that follow the columns marked `A`, `D`, `C`, `R`, and `E` mean the following: 11 | **A** - Added; **D** - Deprecated; **C** - Changed; **R** - Removed; **E** - Experimental 12 | 13 | ## **1. cuComplex Data types** 14 | 15 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| 16 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| 17 | |`cuComplex`| | | | |`hipComplex`|1.6.0| | | | | 18 | |`cuDoubleComplex`| | | | |`hipDoubleComplex`|1.6.0| | | | | 19 | |`cuFloatComplex`| | | | |`hipFloatComplex`|1.6.0| | | | | 20 | 21 | ## **2. cuComplex API functions** 22 | 23 | |**CUDA**|**A**|**D**|**C**|**R**|**HIP**|**A**|**D**|**C**|**R**|**E**| 24 | |:--|:-:|:-:|:-:|:-:|:--|:-:|:-:|:-:|:-:|:-:| 25 | |`cuCabs`| | | | |`hipCabs`|1.6.0| | | | | 26 | |`cuCabsf`| | | | |`hipCabsf`|1.6.0| | | | | 27 | |`cuCadd`| | | | |`hipCadd`|1.6.0| | | | | 28 | |`cuCaddf`| | | | |`hipCaddf`|1.6.0| | | | | 29 | |`cuCdiv`| | | | |`hipCdiv`|1.6.0| | | | | 30 | |`cuCdivf`| | | | |`hipCdivf`|1.6.0| | | | | 31 | |`cuCfma`| | | | |`hipCfma`|1.6.0| | | | | 32 | |`cuCfmaf`| | | | |`hipCfmaf`|1.6.0| | | | | 33 | |`cuCimag`| | | | |`hipCimag`|1.6.0| | | | | 34 | |`cuCimagf`| | | | |`hipCimagf`|1.6.0| | | | | 35 | |`cuCmul`| | | | |`hipCmul`|1.6.0| | | | | 36 | |`cuCmulf`| | | | |`hipCmulf`|1.6.0| | | | | 37 | |`cuComplexDoubleToFloat`| | | | |`hipComplexDoubleToFloat`|1.6.0| | | | | 38 | |`cuComplexFloatToDouble`| | | | |`hipComplexFloatToDouble`|1.6.0| | | | | 39 | |`cuConj`| | | | |`hipConj`|1.6.0| | | | | 40 | |`cuConjf`| | | | |`hipConjf`|1.6.0| | | | | 41 | |`cuCreal`| | | | |`hipCreal`|1.6.0| | | | | 42 | |`cuCrealf`| | | | |`hipCrealf`|1.6.0| | | | | 43 | |`cuCsub`| | | | |`hipCsub`|1.6.0| | | | | 44 | |`cuCsubf`| | | | |`hipCsubf`|1.6.0| | | | | 45 | |`make_cuComplex`| | | | |`make_hipComplex`|1.6.0| | | | | 46 | |`make_cuDoubleComplex`| | | | |`make_hipDoubleComplex`|1.6.0| | | | | 47 | |`make_cuFloatComplex`| | | | |`make_hipFloatComplex`|1.6.0| | | | | 48 | 49 | -------------------------------------------------------------------------------- /docs/sphinx/_toc.yml.in: -------------------------------------------------------------------------------- 1 | # Anywhere {branch} is used, the branch name will be substituted. 2 | # These comments will also be removed. 3 | defaults: 4 | numbered: False 5 | root: index 6 | subtrees: 7 | - caption: Building 8 | entries: 9 | - file: building/building-hipify 10 | - file: building/build-hipify-perl 11 | - caption: How to 12 | entries: 13 | - file: how-to/hipify-clang 14 | title: Use hipify-clang 15 | - file: how-to/hipify-perl 16 | title: Use hipify-perl 17 | - caption: API reference 18 | entries: 19 | - file: reference/hipify-clang-cmd 20 | - file: reference/hipify-perl-cmd 21 | - file: reference/supported_apis 22 | subtrees: 23 | - entries: 24 | - file: reference/tables/CUDA_Runtime_API_functions_supported_by_HIP 25 | title: CUDA Runtime functions 26 | - file: reference/tables/CUDA_Driver_API_functions_supported_by_HIP 27 | title: CUDA Driver functions 28 | - file: reference/tables/cuComplex_API_supported_by_HIP 29 | title: cuComplex API 30 | - file: reference/tables/CUDA_Device_API_supported_by_HIP 31 | title: CUDA Device API 32 | - file: reference/tables/CUDA_RTC_API_supported_by_HIP 33 | title: CUDA RTC API 34 | - file: reference/tables/CUBLAS_API_supported_by_HIP 35 | title: cuBLAS API 36 | - file: reference/tables/CUSPARSE_API_supported_by_HIP 37 | title: cuSPARSE 38 | - file: reference/tables/CUSOLVER_API_supported_by_HIP 39 | title: cuSOLVER API 40 | - file: reference/tables/CURAND_API_supported_by_HIP 41 | title: cuRAND API 42 | - file: reference/tables/CUFFT_API_supported_by_HIP 43 | title: cuFFT API 44 | - file: reference/tables/CUDNN_API_supported_by_HIP 45 | title: cuDNN API 46 | - file: reference/tables/CUTENSOR_API_supported_by_HIP 47 | title: cuTENSOR API 48 | - file: reference/tables/CUB_API_supported_by_HIP 49 | title: CUB API 50 | - caption: About 51 | entries: 52 | - file: license 53 | -------------------------------------------------------------------------------- /docs/sphinx/requirements.in: -------------------------------------------------------------------------------- 1 | rocm-docs-core==1.20.0 2 | -------------------------------------------------------------------------------- /hipify-backward-compat.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. 2 | # Permission is hereby granted, free of charge, to any person obtaining a copy 3 | # of this software and associated documentation files (the "Software"), to deal 4 | # in the Software without restriction, including without limitation the rights 5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 | # copies of the Software, and to permit persons to whom the Software is 7 | # furnished to do so, subject to the following conditions: 8 | # 9 | # The above copyright notice and this permission notice shall be included in 10 | # all copies or substantial portions of the Software. 11 | # 12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | # THE SOFTWARE. 19 | 20 | cmake_minimum_required(VERSION 3.16.8) 21 | 22 | set(HIPIFY_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) 23 | set(HIPIFY_WRAPPER_DIR ${HIPIFY_BUILD_DIR}/wrapper_dir) 24 | set(HIPIFY_WRAPPER_BIN_DIR ${HIPIFY_WRAPPER_DIR}/bin) 25 | 26 | #With File Reorganization , hipify(and hip) will be installed in /opt/rocm-ver 27 | #instead of /opt/rocm-ver/hip/. For maintaining backward compatibility 28 | # the previous location(/opt/rocm-ver/hip/) will have soft link. 29 | #This file is for creating soft link to binary files and install it in the previous location 30 | #Note: soft link added for binary files. 31 | 32 | #function to create symlink to binaries 33 | function(create_binary_symlink) 34 | file(MAKE_DIRECTORY ${HIPIFY_WRAPPER_BIN_DIR}) 35 | #create softlink for public scripts 36 | file(GLOB binary_files ${CMAKE_SOURCE_DIR}/bin/hip*) 37 | foreach(binary_file ${binary_files}) 38 | get_filename_component(file_name ${binary_file} NAME) 39 | add_custom_target(link_${file_name} ALL 40 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 41 | COMMAND ${CMAKE_COMMAND} -E create_symlink 42 | ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIPIFY_WRAPPER_BIN_DIR}/${file_name}) 43 | endforeach() 44 | #create softlink for private scripts 45 | file(GLOB binary_files ${CMAKE_SOURCE_DIR}/bin/find*) 46 | foreach(binary_file ${binary_files}) 47 | get_filename_component(file_name ${binary_file} NAME) 48 | add_custom_target(link_${file_name} ALL 49 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 50 | COMMAND ${CMAKE_COMMAND} -E create_symlink 51 | ../../${CMAKE_INSTALL_LIBEXECDIR}/hipify/${file_name} ${HIPIFY_WRAPPER_BIN_DIR}/${file_name}) 52 | endforeach() 53 | 54 | #symlink for hipify-clang 55 | set(file_name "hipify-clang") 56 | add_custom_target(link_${file_name} ALL 57 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 58 | COMMAND ${CMAKE_COMMAND} -E create_symlink 59 | ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIPIFY_WRAPPER_BIN_DIR}/${file_name}) 60 | endfunction() 61 | # Create symlink to binaries 62 | create_binary_symlink() 63 | #install symlink in hip folder 64 | install(DIRECTORY ${HIPIFY_WRAPPER_BIN_DIR} DESTINATION hip) 65 | -------------------------------------------------------------------------------- /mdlrc-style.rb: -------------------------------------------------------------------------------- 1 | # Ref: https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md 2 | all 3 | 4 | # First header should be a top level header 5 | exclude_rule 'MD002' 6 | # Unordered list style 7 | exclude_rule 'MD004' 8 | # Unordered list indentation 9 | exclude_rule 'MD007' 10 | # Trailing spaces 11 | exclude_rule 'MD009' 12 | # Multiple consecutive blank lines 13 | exclude_rule 'MD012' 14 | # Line length 15 | exclude_rule 'MD013' 16 | # No space after hash on atx style header 17 | exclude_rule 'MD018' 18 | # Headers should be surrounded by blank lines 19 | exclude_rule 'MD022' 20 | # Headers must start at the beginning of the line 21 | exclude_rule 'MD023' 22 | # Multiple headers with the same content 23 | exclude_rule 'MD024' 24 | # Multiple top level headers in the same document 25 | exclude_rule 'MD025' 26 | # Trailing punctuation in header 27 | exclude_rule 'MD026' 28 | # Ordered list item prefix 29 | exclude_rule 'MD029' 30 | # Fenced code blocks should be surrounded by blank lines 31 | exclude_rule 'MD031' 32 | # Lists should be surrounded by blank lines 33 | exclude_rule 'MD032' 34 | # Inline HTML 35 | exclude_rule 'MD033' 36 | # Bare URLs 37 | exclude_rule 'MD034' 38 | # Spaces inside emphasis markers 39 | exclude_rule 'MD037' 40 | # Fenced code blocks should have a language specified 41 | exclude_rule 'MD040' 42 | # First line in file should be a top level header 43 | exclude_rule 'MD041' 44 | # Code block style 45 | exclude_rule 'MD046' 46 | # File should end with a single newline character 47 | exclude_rule 'MD047' 48 | -------------------------------------------------------------------------------- /packaging/hipify-clang.postinst: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Soft-link to bin files 6 | 7 | case "$1" in 8 | abort-deconfigure|abort-remove|abort-upgrade) 9 | echo "$1" 10 | ;; 11 | configure) 12 | mkdir -p @ROCMBINDIR@ 13 | CURRENTDIR=`pwd` 14 | cd @ROCMBINDIR@ 15 | ln -r -s -f @HIPBINDIR@/hipify-perl @ROCMBINDIR@/hipify-perl 16 | ln -r -s -f @HIPBINDIR@/hipify-clang @ROCMBINDIR@/hipify-clang 17 | ln -r -s -f @HIPBINDIR@/hipconvertinplace-perl.sh @ROCMBINDIR@/hipconvertinplace-perl.sh 18 | ln -r -s -f @HIPBINDIR@/hipconvertinplace.sh @ROCMBINDIR@/hipconvertinplace.sh 19 | ln -r -s -f @HIPBINDIR@/hipexamine-perl.sh @ROCMBINDIR@/hipexamine-perl.sh 20 | ln -r -s -f @HIPBINDIR@/hipexamine.sh @ROCMBINDIR@/hipexamine.sh 21 | ln -r -s -f @HIPBINDIR@/findcode.sh @ROCMBINDIR@/findcode.sh 22 | ln -r -s -f @HIPBINDIR@/findcode_headers.sh @ROCMBINDIR@/findcode_headers.sh 23 | ln -r -s -f @HIPBINDIR@/findcode_sources.sh @ROCMBINDIR@/findcode_sources.sh 24 | ln -r -s -f @HIPBINDIR@/findcode_custom.sh @ROCMBINDIR@/findcode_custom.sh 25 | ln -r -s -f @HIPBINDIR@/finduncodep.sh @ROCMBINDIR@/finduncodep.sh 26 | cd $CURRENTDIR 27 | ;; 28 | *) 29 | exit 0 30 | ;; 31 | esac 32 | -------------------------------------------------------------------------------- /packaging/hipify-clang.prerm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | HIPBINDIR=@CPACK_PACKAGING_INSTALL_PREFIX@/bin 6 | ROCMBINDIR=@CPACK_PACKAGING_INSTALL_PREFIX@/../bin 7 | 8 | # Remove soft-link to bin files 9 | case "$1" in 10 | purge) 11 | ;; 12 | remove | upgrade) 13 | rm -f @ROCMBINDIR@/hipify-perl 14 | rm -f @ROCMBINDIR@/hipify-clang 15 | rm -f @ROCMBINDIR@/hipconvertinplace-perl.sh 16 | rm -f @ROCMBINDIR@/hipconvertinplace.sh 17 | rm -f @ROCMBINDIR@/hipexamine-perl.sh 18 | rm -f @ROCMBINDIR@/hipexamine.sh 19 | rm -f @ROCMBINDIR@/findcode.sh 20 | rm -f @ROCMBINDIR@/findcode_headers.sh 21 | rm -f @ROCMBINDIR@/findcode_sources.sh 22 | rm -f @ROCMBINDIR@/findcode_custom.sh 23 | rm -f @ROCMBINDIR@/finduncodep.sh 24 | rmdir --ignore-fail-on-non-empty @ROCMBINDIR@ 25 | ;; 26 | *) 27 | exit 0 28 | ;; 29 | esac 30 | -------------------------------------------------------------------------------- /packaging/hipify-clang.rpm_post: -------------------------------------------------------------------------------- 1 | # Soft-link to bin files 2 | 3 | mkdir -p @ROCMBINDIR@ 4 | CURRENTDIR=`pwd` 5 | cd @ROCMBINDIR@ 6 | ln -r -s -f @HIPBINDIR@/hipify-perl @ROCMBINDIR@/hipify-perl 7 | ln -r -s -f @HIPBINDIR@/hipify-clang @ROCMBINDIR@/hipify-clang 8 | ln -r -s -f @HIPBINDIR@/hipconvertinplace-perl.sh @ROCMBINDIR@/hipconvertinplace-perl.sh 9 | ln -r -s -f @HIPBINDIR@/hipconvertinplace.sh @ROCMBINDIR@/hipconvertinplace.sh 10 | ln -r -s -f @HIPBINDIR@/hipexamine-perl.sh @ROCMBINDIR@/hipexamine-perl.sh 11 | ln -r -s -f @HIPBINDIR@/hipexamine.sh @ROCMBINDIR@/hipexamine.sh 12 | ln -r -s -f @HIPBINDIR@/findcode.sh @ROCMBINDIR@/findcode.sh 13 | ln -r -s -f @HIPBINDIR@/findcode_headers.sh @ROCMBINDIR@/findcode_headers.sh 14 | ln -r -s -f @HIPBINDIR@/findcode_sources.sh @ROCMBINDIR@/findcode_sources.sh 15 | ln -r -s -f @HIPBINDIR@/findcode_custom.sh @ROCMBINDIR@/findcode_custom.sh 16 | ln -r -s -f @HIPBINDIR@/finduncodep.sh @ROCMBINDIR@/finduncodep.sh 17 | cd $CURRENTDIR 18 | -------------------------------------------------------------------------------- /packaging/hipify-clang.rpm_postun: -------------------------------------------------------------------------------- 1 | if [ $1 -le 1 ]; then 2 | # perform the below actions for rpm remove($1=0) or upgrade($1=1) operations 3 | rm -f @ROCMBINDIR@/hipify-perl 4 | rm -f @ROCMBINDIR@/hipify-clang 5 | rm -f @ROCMBINDIR@/hipconvertinplace-perl.sh 6 | rm -f @ROCMBINDIR@/hipconvertinplace.sh 7 | rm -f @ROCMBINDIR@/hipexamine-perl.sh 8 | rm -f @ROCMBINDIR@/hipexamine.sh 9 | rm -f @ROCMBINDIR@/findcode.sh 10 | rm -f @ROCMBINDIR@/findcode_headers.sh 11 | rm -f @ROCMBINDIR@/findcode_sources.sh 12 | rm -f @ROCMBINDIR@/findcode_custom.sh 13 | rm -f @ROCMBINDIR@/finduncodep.sh 14 | rmdir --ignore-fail-on-non-empty @ROCMBINDIR@ 15 | rmdir --ignore-fail-on-non-empty @HIPBINDIR@ 16 | rmdir --ignore-fail-on-non-empty @CPACK_PACKAGING_INSTALL_PREFIX@ 17 | fi 18 | -------------------------------------------------------------------------------- /src/ArgParse.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "clang/Tooling/CommonOptionsParser.h" 26 | #include "llvm/Support/CommandLine.h" 27 | 28 | namespace cl = llvm::cl; 29 | namespace ct = clang::tooling; 30 | 31 | extern cl::OptionCategory ToolTemplateCategory; 32 | extern cl::opt OutputFilename; 33 | extern cl::opt OutputHipifyPerlDir; 34 | extern cl::opt OutputPythonMapDir; 35 | extern cl::opt OutputDir; 36 | extern cl::opt TemporaryDir; 37 | extern cl::opt CudaPath; 38 | extern cl::opt ClangResourceDir; 39 | extern cl::list IncludeDirs; 40 | extern cl::list MacroNames; 41 | extern cl::opt Inplace; 42 | extern cl::opt SaveTemps; 43 | extern cl::opt GeneratePerl; 44 | extern cl::opt GeneratePython; 45 | extern cl::opt Verbose; 46 | extern cl::opt NoBackup; 47 | extern cl::opt NoOutput; 48 | extern cl::opt PrintStats; 49 | extern cl::opt PrintStatsCSV; 50 | extern cl::opt OutputStatsFilename; 51 | extern cl::opt Examine; 52 | extern cl::extrahelp CommonHelp; 53 | extern cl::opt TranslateToRoc; 54 | extern cl::opt TranslateToMIOpen; 55 | extern cl::opt DashDash; 56 | extern cl::opt SkipExcludedPPConditionalBlocks; 57 | extern cl::opt DefaultPreprocessor; 58 | extern cl::opt CudaGpuArch; 59 | extern cl::opt GenerateMarkdown; 60 | extern cl::opt GenerateCSV; 61 | extern cl::opt DocFormat; 62 | extern cl::opt DocRoc; 63 | extern cl::opt Experimental; 64 | extern cl::opt CudaKernelExecutionSyntax; 65 | extern cl::opt HipKernelExecutionSyntax; 66 | extern const std::vector hipifyOptions; 67 | extern const std::vector hipifyOptionsWithTwoArgs; 68 | extern cl::opt Versions; 69 | extern cl::opt NoUndocumented; 70 | extern cl::opt NoWarningsUndocumented; 71 | extern cl::opt HipifyAMAP; 72 | -------------------------------------------------------------------------------- /src/CUDA2HIP_CAFFE2_API_functions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "CUDA2HIP.h" 24 | 25 | // Maps the names of CUDA SPARSE API functions to the corresponding HIP functions 26 | const std::map CUDA_CAFFE2_FUNCTION_MAP { 27 | {"cuda_stream", {"hip_stream", "", CONV_LIB_FUNC, API_CAFFE2, 2}}, 28 | }; 29 | 30 | const std::map CUDA_CAFFE2_FUNCTION_VER_MAP { 31 | }; 32 | 33 | const std::map HIP_CAFFE2_FUNCTION_VER_MAP { 34 | }; 35 | 36 | const std::map CUDA_CAFFE2_API_SECTION_MAP { 37 | {1, "CAFFE2 Data types"}, 38 | {2, "CAFFE2 Functions"}, 39 | }; 40 | -------------------------------------------------------------------------------- /src/CUDA2HIP_CAFFE2_API_types.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "CUDA2HIP.h" 24 | 25 | // Map of all types 26 | const std::map CUDA_CAFFE2_TYPE_NAME_MAP { 27 | 28 | // 5. Defines 29 | {"REGISTER_CUDA_OPERATOR", {"REGISTER_HIP_OPERATOR", "", CONV_DEFINE, API_CAFFE2, 1}}, 30 | {"REGISTER_CUDA_OPERATOR_CREATOR", {"REGISTER_HIP_OPERATOR_CREATOR", "", CONV_DEFINE, API_CAFFE2, 1}}, 31 | 32 | // 6. Classes 33 | {"CUDAContext", {"HIPContext", "", CONV_TYPE, API_CAFFE2, 1}}, 34 | }; 35 | 36 | const std::map CUDA_CAFFE2_TYPE_NAME_VER_MAP { 37 | }; 38 | 39 | const std::map HIP_CAFFE2_TYPE_NAME_VER_MAP { 40 | }; 41 | -------------------------------------------------------------------------------- /src/CUDA2HIP_CUB_API_functions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "CUDA2HIP.h" 24 | 25 | // Maps the names of CUDA CUB API functions to the corresponding HIP functions 26 | const std::map CUDA_CUB_FUNCTION_MAP { 27 | }; 28 | 29 | const std::map CUDA_CUB_FUNCTION_VER_MAP { 30 | }; 31 | 32 | const std::map HIP_CUB_FUNCTION_VER_MAP { 33 | }; 34 | -------------------------------------------------------------------------------- /src/CUDA2HIP_Complex_API_types.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "CUDA2HIP.h" 24 | 25 | // Maps the names of CUDA Complex API types to the corresponding HIP types 26 | const std::map CUDA_COMPLEX_TYPE_NAME_MAP { 27 | {"cuFloatComplex", {"hipFloatComplex", "rocblas_float_complex", CONV_TYPE, API_COMPLEX, 1}}, 28 | {"cuDoubleComplex", {"hipDoubleComplex", "rocblas_double_complex", CONV_TYPE, API_COMPLEX, 1}}, 29 | {"cuComplex", {"hipComplex", "rocblas_float_complex", CONV_TYPE, API_COMPLEX, 1}}, 30 | }; 31 | 32 | const std::map CUDA_COMPLEX_TYPE_NAME_VER_MAP { 33 | }; 34 | 35 | const std::map HIP_COMPLEX_TYPE_NAME_VER_MAP { 36 | {"hipFloatComplex", {HIP_1060, HIP_0, HIP_0 }}, 37 | {"hipDoubleComplex",{HIP_1060, HIP_0, HIP_0 }}, 38 | {"hipComplex", {HIP_1060, HIP_0, HIP_0 }}, 39 | }; 40 | -------------------------------------------------------------------------------- /src/CUDA2HIP_Scripting.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | namespace hipify { 30 | 31 | enum CastTypes { 32 | e_HIP_SYMBOL, 33 | e_reinterpret_cast, 34 | e_reinterpret_cast_size_t, 35 | e_const_cast_char_pp, 36 | e_static_cast_uint64_t, 37 | e_int32_t, 38 | e_int64_t, 39 | e_remove_argument, 40 | e_add_const_argument, 41 | e_add_var_argument, 42 | e_move_argument, 43 | e_replace_argument_with_const, 44 | }; 45 | 46 | enum OverloadTypes { 47 | ot_arguments_number, 48 | }; 49 | 50 | enum CastWarning { 51 | cw_None, 52 | cw_DataLoss, 53 | }; 54 | 55 | enum OverloadWarning { 56 | ow_None, 57 | }; 58 | 59 | struct CastInfo { 60 | CastTypes castType; 61 | CastWarning castWarn; 62 | std::string constValToAddOrReplace = ""; 63 | unsigned moveOrCopyTo = 0; 64 | unsigned numberToMoveOrCopy = 1; 65 | }; 66 | 67 | typedef std::map ArgCastMap; 68 | 69 | struct ArgCastStruct { 70 | ArgCastMap castMap; 71 | bool isToRoc = false; 72 | bool isToMIOpen = false; 73 | }; 74 | 75 | struct OverloadInfo { 76 | hipCounter counter; 77 | OverloadTypes overloadType; 78 | OverloadWarning overloadWarn; 79 | }; 80 | 81 | typedef std::map OverloadMap; 82 | 83 | struct FuncOverloadsStruct { 84 | OverloadMap overloadMap; 85 | bool isToRoc = false; 86 | bool isToMIOpen = false; 87 | }; 88 | } 89 | 90 | extern std::string getCastType(hipify::CastTypes c); 91 | extern std::map> FuncArgCasts; 92 | 93 | extern std::map FuncOverloads; 94 | 95 | extern std::map TypeOverloads; 96 | 97 | namespace perl { 98 | 99 | bool generate(bool Generate = true); 100 | } 101 | 102 | namespace python { 103 | 104 | bool generate(bool Generate = true); 105 | } 106 | 107 | namespace doc { 108 | 109 | bool generate(bool GenerateMD = true, bool GenerateCSV = true); 110 | } 111 | -------------------------------------------------------------------------------- /src/LLVMCompat.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | namespace ct = clang::tooling; 32 | 33 | extern const std::string sHipify, sConflict, sError, sWarning; 34 | 35 | // Things for papering over the differences between different LLVM versions. 36 | 37 | namespace llcompat { 38 | /** 39 | * The getNumArgs function on macros was rather unhelpfully renamed in clang 4.0. Its semantics 40 | * remain unchanged, so let's be slightly ugly about it here. :D 41 | */ 42 | #if LLVM_VERSION_MAJOR > 4 43 | #define GET_NUM_ARGS() getNumParams() 44 | #else 45 | #define GET_NUM_ARGS() getNumArgs() 46 | #endif 47 | 48 | #if LLVM_VERSION_MAJOR < 7 49 | #define LLVM_DEBUG(X) DEBUG(X) 50 | #endif 51 | 52 | clang::SourceLocation getBeginLoc(const clang::Stmt *stmt); 53 | clang::SourceLocation getBeginLoc(const clang::TypeLoc &typeLoc); 54 | 55 | clang::SourceLocation getEndLoc(const clang::Stmt *stmt); 56 | clang::SourceLocation getEndLoc(const clang::TypeLoc &typeLoc); 57 | 58 | void PrintStackTraceOnErrorSignal(); 59 | 60 | using namespace llvm; 61 | 62 | /** 63 | * Get the replacement map for a given filename in a RefactoringTool. 64 | * 65 | * Older LLVM versions don't actually support multiple filenames, so everything all gets 66 | * smushed together. It is the caller's responsibility to cope with this. 67 | */ 68 | ct::Replacements &getReplacements(ct::RefactoringTool &Tool, StringRef file); 69 | 70 | /** 71 | * Add a Replacement to a Replacements. 72 | */ 73 | void insertReplacement(ct::Replacements &replacements, const ct::Replacement &rep); 74 | 75 | /** 76 | * Version-agnostic version of Preprocessor::EnterTokenStream(). 77 | */ 78 | void EnterPreprocessorTokenStream(clang::Preprocessor &_pp, 79 | const clang::Token *start, 80 | size_t len, 81 | bool DisableMacroExpansion); 82 | 83 | std::error_code real_path(const Twine &path, SmallVectorImpl &output, 84 | bool expand_tilde = false); 85 | 86 | bool pragma_once_outside_header(); 87 | 88 | void RetainExcludedConditionalBlocks(clang::CompilerInstance &CI); 89 | 90 | bool CheckCompatibility(); 91 | 92 | clang::SourceLocation getEndOfExpansionRangeForLoc(const clang::SourceManager &SM, const clang::SourceLocation &loc); 93 | 94 | #if LLVM_VERSION_MAJOR >= 12 95 | typedef MemoryBufferRef Memory_Buffer; 96 | #else 97 | typedef const MemoryBuffer *Memory_Buffer; 98 | #endif 99 | 100 | Memory_Buffer getMemoryBuffer(const clang::SourceManager &SM); 101 | 102 | void addTargetIfNeeded(ct::RefactoringTool &Tool); 103 | 104 | const clang::IdentifierInfo *getControllingMacro(clang::CompilerInstance &CI); 105 | 106 | } // namespace llcompat 107 | -------------------------------------------------------------------------------- /src/ReplacementsFrontendActionFactory.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include "clang/Tooling/Tooling.h" 26 | #include "clang/Frontend/FrontendAction.h" 27 | #include "clang/Tooling/Core/Replacement.h" 28 | 29 | namespace ct = clang::tooling; 30 | 31 | /** 32 | * A FrontendActionFactory that propagates a set of Replacements into the FrontendAction. 33 | * This is necessary boilerplate for using a custom FrontendAction with a RefactoringTool. 34 | * 35 | * @tparam T The FrontendAction to create. 36 | */ 37 | template 38 | class ReplacementsFrontendActionFactory : public ct::FrontendActionFactory { 39 | ct::Replacements *replacements; 40 | 41 | public: 42 | explicit ReplacementsFrontendActionFactory(ct::Replacements *r): 43 | ct::FrontendActionFactory(), 44 | replacements(r) {} 45 | 46 | #if LLVM_VERSION_MAJOR < 10 47 | clang::FrontendAction *create() override { 48 | return new T(replacements); 49 | } 50 | #else 51 | std::unique_ptr create() override { 52 | return std::unique_ptr(new T(replacements)); 53 | } 54 | #endif 55 | }; 56 | -------------------------------------------------------------------------------- /src/StringUtils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "StringUtils.h" 24 | #include "LLVMCompat.h" 25 | #include "llvm/ADT/SmallString.h" 26 | 27 | using namespace llvm; 28 | 29 | llvm::StringRef unquoteStr(llvm::StringRef s) { 30 | if (s.size() > 1 && s.front() == '"' && s.back() == '"') 31 | return s.substr(1, s.size() - 2); 32 | return s; 33 | } 34 | 35 | void removePrefixIfPresent(std::string &s, const std::string &prefix) { 36 | if (s.find(prefix) != 0) return; 37 | s.erase(0, prefix.size()); 38 | } 39 | 40 | std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC) { 41 | if (sFile.empty()) 42 | return sFile; 43 | if (!sys::fs::exists(sFile)) { 44 | llvm::errs() << "\n" << sHipify << sError << "source file: " << sFile << " doesn't exist\n"; 45 | EC = std::error_code(static_cast(std::errc::no_such_file_or_directory), std::generic_category()); 46 | return ""; 47 | } 48 | SmallString<256> fileAbsPath; 49 | EC = llcompat::real_path(sFile, fileAbsPath, true); 50 | if (EC) { 51 | llvm::errs() << "\n" << sHipify << sError << EC.message() << ": source file: " << sFile << "\n"; 52 | return ""; 53 | } 54 | EC = std::error_code(); 55 | return fileAbsPath.c_str(); 56 | } 57 | 58 | std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC, 59 | const std::string &sDirType, bool bCreateDir) { 60 | if (sDir.empty()) 61 | return sDir; 62 | EC = std::error_code(); 63 | SmallString<256> dirAbsPath; 64 | if (sys::fs::exists(sDir)) { 65 | if (sys::fs::is_regular_file(sDir)) { 66 | llvm::errs() << "\n" << sHipify << sError << sDir << " is not a directory\n"; 67 | EC = std::error_code(static_cast(std::errc::not_a_directory), std::generic_category()); 68 | return ""; 69 | } 70 | } else { 71 | if (bCreateDir) { 72 | EC = sys::fs::create_directory(sDir); 73 | if (EC) { 74 | llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n"; 75 | return ""; 76 | } 77 | } 78 | else { 79 | llvm::errs() << "\n" << sHipify << sError << sDirType << " directory: " << sDir << " doesn't exist\n"; 80 | EC = std::error_code(static_cast(std::errc::no_such_file_or_directory), std::generic_category()); 81 | return ""; 82 | } 83 | } 84 | EC = llcompat::real_path(sDir, dirAbsPath, true); 85 | if (EC) { 86 | llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n"; 87 | return ""; 88 | } 89 | return dirAbsPath.c_str(); 90 | } 91 | -------------------------------------------------------------------------------- /src/StringUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include "llvm/ADT/StringRef.h" 28 | 29 | /** 30 | * Remove double-quotes from the start/end of a string, if present. 31 | */ 32 | llvm::StringRef unquoteStr(llvm::StringRef s); 33 | 34 | /** 35 | * If `s` starts with `prefix`, remove it. Otherwise, does nothing. 36 | */ 37 | void removePrefixIfPresent(std::string &s, const std::string &prefix); 38 | 39 | /** 40 | * Returns Absolute File Path based on filename, otherwise - error. 41 | */ 42 | std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC); 43 | 44 | /** 45 | * Returns Absolute Directory Path based on directory name, otherwise - error; 46 | * by default the directory is temporary and created. 47 | */ 48 | std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC, 49 | const std::string &sDirType = "temporary", bool bCreateDir = true); 50 | -------------------------------------------------------------------------------- /tests/lit.site.cfg.in: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | config.hipify_install_path = "@CMAKE_INSTALL_PREFIX@" 5 | config.hipify_clang_tests_only = "@HIPIFY_CLANG_TESTS_ONLY@" 6 | config.pointer_size = @CMAKE_SIZEOF_VOID_P@ 7 | config.llvm_version = "@LLVM_PACKAGE_VERSION@" 8 | config.llvm_version_major = int("@LLVM_VERSION_MAJOR@") 9 | config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" 10 | config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@" 11 | config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" 12 | config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" 13 | config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@" 14 | config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@" 15 | config.cuda_tensor_root = "@CUDA_TENSOR_ROOT_DIR@" 16 | config.cuda_version_major = int("@CUDA_VERSION_MAJOR@") 17 | config.cuda_version_minor = int("@CUDA_VERSION_MINOR@") 18 | config.cuda_version_full = "@CUDA_VERSION_FULL@" 19 | config.cuda_version = "@CUDA_VERSION@" 20 | config.clang_resource_dir = "@HIPIFY_CLANG_RES@" 21 | if sys.platform in ['win32']: 22 | config.build_type = "@CMAKE_BUILD_TYPE@" 23 | if not config.build_type: 24 | config.build_type = "Debug" 25 | 26 | config.cudnn_version_major = int("0") 27 | if config.cuda_dnn_root and config.cuda_dnn_root != "OFF": 28 | cudnn_ver_file = config.cuda_dnn_root + '/include/cudnn_version.h'; 29 | if os.path.isfile(cudnn_ver_file): 30 | with open(cudnn_ver_file) as f: 31 | if 'CUDNN_MAJOR 9' in f.read(): 32 | config.cudnn_version_major = int("9") 33 | 34 | # Support substitution of the tools and libs dirs with user parameters. This is 35 | # used when we can't determine the tool dir at configuration time. 36 | try: 37 | config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params 38 | config.obj_root = config.obj_root % lit_config.params 39 | except KeyError: 40 | e = sys.exc_info()[1] 41 | key, = e.args 42 | lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) 43 | -------------------------------------------------------------------------------- /tests/run_test.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal 3 | 4 | for %%i in (FileCheck.exe) do set FILE_CHECK=%%~$PATH:i 5 | if not defined FILE_CHECK (echo Error: FileCheck.exe not found in PATH. && exit /b 1) 6 | 7 | set HIPIFY=%1 8 | set IN_FILE=%2 9 | set TMP_FILE=%3 10 | set CUDA_ROOT=%4 11 | set CLANG_RES=%5 12 | set NUM=%6 13 | set all_args=%* 14 | 15 | if %NUM% EQU 1 ( 16 | set HIPIFY_OPTS=%7 17 | call set clang_args=%%all_args:*%7=%% 18 | ) else if %NUM% EQU 2 ( 19 | set HIPIFY_OPTS=%7 %8 20 | call set clang_args=%%all_args:*%8=%% 21 | ) else if %NUM% EQU 3 ( 22 | set HIPIFY_OPTS=%7 %8 %9 23 | shift 24 | call set clang_args=%%all_args:*%9=%% 25 | ) else if %NUM% EQU 4 ( 26 | shift 27 | call set HIPIFY_OPTS=%%6 %%7 %%8 %%9 28 | ) else if %NUM% EQU 5 ( 29 | shift 30 | shift 31 | call set HIPIFY_OPTS=%%5 %%6 %%7 %%8 %%9 32 | ) else ( 33 | set clang_args=%%all_args:*%6=%% 34 | set NUM=0 35 | ) 36 | if %NUM% EQU 4 ( 37 | shift 38 | call set clang_args=%%all_args:*%9=%% 39 | ) 40 | if %NUM% EQU 5 ( 41 | shift 42 | call set clang_args=%%all_args:*%9=%% 43 | ) 44 | 45 | set test_dir=%~dp2 46 | set "test_dir=%test_dir:\=/%" 47 | 48 | set compile_commands=compile_commands.json 49 | set json_in=%test_dir%%compile_commands%.in 50 | set json_out=%test_dir%%compile_commands% 51 | 52 | if exist %json_in% ( 53 | powershell -Command "(gc %json_in%) -replace '', '%test_dir%' -replace '', '%CUDA_ROOT%' | Out-File -encoding ASCII %json_out%" 54 | set hipify_cmd=%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% --clang-resource-directory=%CLANG_RES% %HIPIFY_OPTS% -p=%test_dir% %HIPIFY_OPTS% -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH=1 55 | ) else ( 56 | set hipify_cmd=%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% --clang-resource-directory=%CLANG_RES% %HIPIFY_OPTS% -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH=1 -- %clang_args% 57 | ) 58 | 59 | SET CUDA=%CUDA_ROOT:~13,-1% 60 | 61 | echo [HIPIFY] CUDA directory : %CUDA% 62 | echo [HIPIFY] clang res directory : %CLANG_RES% 63 | echo [HIPIFY] hipify options count: %NUM% 64 | echo [HIPIFY] hipify options : %HIPIFY_OPTS% 65 | echo [HIPIFY] clang options : %clang_args% 66 | echo [HIPIFY] hipify-clang command: %hipify_cmd% 67 | 68 | call %hipify_cmd% 69 | 70 | if errorlevel 1 (echo Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) 71 | 72 | findstr /v /r /c:"[ ]*//[ ]*[CHECK*|RUN]" %TMP_FILE% | %FILE_CHECK% %IN_FILE% 73 | if errorlevel 1 (echo Error: FileCheck.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) 74 | -------------------------------------------------------------------------------- /tests/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | 5 | # Run a single LIT test file in a magical way that preserves colour output, to work around 6 | # a known flaw in lit. 7 | 8 | # Capture lit substitutions 9 | HIPIFY=$1 10 | IN_FILE=$2 11 | TMP_FILE=$3 12 | CUDA_ROOT=$4 13 | CLANG_RES=$5 14 | NUM=$6 15 | HIPIFY_OPTS="" 16 | 17 | if [ $NUM = "1" ] 18 | then 19 | HIPIFY_OPTS=$7 20 | shift 7 21 | elif [ $NUM = "2" ] 22 | then 23 | HIPIFY_OPTS="$7 $8" 24 | shift 8 25 | elif [ $NUM = "3" ] 26 | then 27 | HIPIFY_OPTS="$7 $8 $9" 28 | shift 9 29 | elif [ $NUM = "4" ] 30 | then 31 | HIPIFY_OPTS="$7 $8 $9 ${10}" 32 | shift 10 33 | elif [ $NUM = "5" ] 34 | then 35 | HIPIFY_OPTS="$7 $8 $9 ${10} ${11}" 36 | shift 11 37 | fi 38 | 39 | test_dir=${IN_FILE%/*} 40 | 41 | compile_commands=compile_commands.json 42 | json_out=${test_dir}/${compile_commands} 43 | json_in=${json_out}.in 44 | 45 | # Remaining args are the ones to forward to clang proper. 46 | 47 | if [ -e $json_in ] 48 | then 49 | cp $json_in $json_out 50 | sed -i -e "s||${test_dir}|g; s||${CUDA_ROOT}|g" $json_out 51 | $HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT --clang-resource-directory=$CLANG_RES $HIPIFY_OPTS -p=$test_dir $HIPIFY_OPTS && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE 52 | else 53 | $HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT --clang-resource-directory=$CLANG_RES $HIPIFY_OPTS -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE 54 | fi 55 | -------------------------------------------------------------------------------- /tests/unit_tests/compilation_database/compile_commands.json.in: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "directory": "", 4 | "command": "hipify-clang \"\" -I./include -v", 5 | "file": "cd_intro.cu" 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /tests/unit_tests/compilation_database/include/inc.h: -------------------------------------------------------------------------------- 1 | #define K_THREADS 64 2 | #define K_INDEX() ((gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x) 3 | #define RND() ((rand() & 0x7FFF) / float(0x8000)) 4 | #define ERRORCHECK() cErrorCheck(__FILE__, __LINE__) 5 | -------------------------------------------------------------------------------- /tests/unit_tests/device/math_functions.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones. 3 | // ToDo: change lit testing in order to parse the output. 4 | 5 | #define LEN 1024 6 | #define SIZE LEN * sizeof(float) 7 | // CHECK: #include 8 | #include 9 | 10 | namespace my { 11 | // user defined function 12 | unsigned int umin(unsigned int arg1, unsigned int arg2) { 13 | return (arg1 < arg2) ? arg1 : arg2; 14 | } 15 | // user defined function 16 | unsigned int umax(unsigned int arg1, unsigned int arg2) { 17 | return (arg1 > arg2) ? arg1 : arg2; 18 | } 19 | } 20 | 21 | __global__ void uint_arithm(float* A, float* B, float* C, unsigned int u1, unsigned int u2) 22 | { 23 | // device function call (warn if unsupported) 24 | unsigned int _umin = umin ( u1, u2 ); 25 | // device function call (warn if unsupported) 26 | unsigned int _umax = umax ( u1, u2 ); 27 | // device function call (warn if unsupported) 28 | unsigned int _umin_global = ::umin ( u1, u2 ); 29 | // device function call (warn if unsupported) 30 | unsigned int _umax_global = ::umax(u1, u2); 31 | if (_umin != _umin_global) return; 32 | if (_umax != _umax_global) return; 33 | int i = threadIdx.x; 34 | A[i] = i + _umin; 35 | B[i] = i + _umax; 36 | C[i] = A[i] + B[i]; 37 | } 38 | 39 | int main() { 40 | unsigned int u1 = 33; 41 | unsigned int u2 = 34; 42 | // user defined function call 43 | unsigned int _min = my::umin(u1, u2); 44 | // user defined function call 45 | unsigned int _max = my::umax(u1, u2); 46 | float *A, *B, *C; 47 | // CHECK: hipMalloc((void**)&A, SIZE); 48 | cudaMalloc((void**)&A, SIZE); 49 | // CHECK: hipMalloc((void**)&B, SIZE); 50 | cudaMalloc((void**)&B, SIZE); 51 | // CHECK: hipMalloc((void**)&C, SIZE); 52 | cudaMalloc((void**)&C, SIZE); 53 | dim3 dimGrid(LEN / 512, 1, 1); 54 | dim3 dimBlock(512, 1, 1); 55 | // CHECK: hipLaunchKernelGGL(uint_arithm, dim3(dimGrid), dim3(dimBlock), 0, 0, A, B, C, u1, u2); 56 | uint_arithm<<>>(A, B, C, u1, u2); 57 | return _min < _max; 58 | } 59 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_01.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK-NOT: #include 5 | // CHECK: #include 6 | #include 7 | #include 8 | #include 9 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_02.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include "hip/hip_runtime.h" 4 | // CHECK-NOT: #include "cuda_runtime.h" 5 | // CHECK: #include 6 | #include "cuda.h" 7 | #include "cuda_runtime.h" 8 | #include 9 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_03.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #pragma once 4 | // CHECK-NEXT: #include 5 | #pragma once 6 | // CHECK-NOT: #include 7 | int main(int argc, char* argv[]) { 8 | return 0; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_04.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK-NEXT: #include 5 | // CHECK-NEXT: #include 6 | #include 7 | #include 8 | // CHECK-NOT: #include 9 | int main(int argc, char* argv[]) { 10 | return 0; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_05.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #pragma once 4 | // CHECK-NEXT: #include 5 | #pragma once 6 | // CHECK-NOT: #include 7 | #include 8 | 9 | int main(int argc, char* argv[]) { 10 | return 0; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_06.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK-NOT: #include 5 | // CHECK: #include 6 | #include 7 | #include 8 | #include 9 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_06_12000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | // CHECK: #include 6 | #include 7 | #include 8 | #include 9 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_07.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include "hipblas.h" 5 | // CHECK-NOT: #include "cublas.h" 6 | // CHECK-NOT: #include "cublas_v2.h" 7 | // CHECK: #include 8 | #include "cublas.h" 9 | #include "cublas_v2.h" 10 | // CHECK-NOT: #include "hipblas.h" 11 | #include 12 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_07_12000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include "hipblas.h" 5 | // CHECK-NOT: #include "cublas_v2.h" 6 | // CHECK: #include 7 | #include 8 | #include "cublas_v2.h" 9 | // CHECK-NOT: #include "hipblas.h" 10 | #include 11 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_08.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | // CHECK: #include "hipblas.h" 6 | // CHECK-NOT: #include "cublas.h" 7 | // CHECK-NOT: #include "cublas_v2.h" 8 | // CHECK: #include 9 | // CHECK-NOT: #include 10 | #include 11 | // CHECK-NOT: #include 12 | #include 13 | // CHECK-NOT: #include 14 | #include 15 | #include "cublas.h" 16 | #include "cublas_v2.h" 17 | // CHECK-NOT: #include "hipblas.h" 18 | #include 19 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_08_12000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | // CHECK: #include "hipblas.h" 6 | // CHECK-NOT: #include "cublas.h" 7 | // CHECK: #include 8 | // CHECK-NOT: #include 9 | #include 10 | // CHECK-NOT: #include 11 | #include 12 | // CHECK-NOT: #include 13 | #include 14 | #include "cublas.h" 15 | // CHECK-NOT: #include "hipblas.h" 16 | #include 17 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_09.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | 6 | // CHECK-NOT: #include 7 | // CHECK-NOT: #include 8 | 9 | // CHECK: #include "hip/hip_runtime_api.h" 10 | // CHECK: #include "hip/channel_descriptor.h" 11 | // CHECK: #include "hip/device_functions.h" 12 | // CHECK: #include "hip/driver_types.h" 13 | // CHECK: #include "hip/hip_complex.h" 14 | // CHECK: #include "hip/hip_fp16.h" 15 | // CHECK: #include "hip/hip_texture_types.h" 16 | // CHECK: #include "hip/hip_vector_types.h" 17 | 18 | // CHECK: #include 19 | 20 | // CHECK: #include "hipblas.h" 21 | // CHECK-NOT: #include "cublas.h" 22 | // CHECK-NOT: #include "cublas_v2.h" 23 | 24 | // CHECK: #include 25 | 26 | // CHECK: #include "hiprand/hiprand.h" 27 | // CHECK: #include "hiprand/hiprand_kernel.h" 28 | 29 | // CHECK: #include 30 | 31 | // CHECK-NOT: #include "hiprand/hiprand.h" 32 | // CHECK-NOT: #include "hiprand/hiprand_kernel.h" 33 | // CHECK-NOT: #include "curand_discrete.h" 34 | // CHECK-NOT: #include "curand_discrete2.h" 35 | // CHECK-NOT: #include "curand_globals.h" 36 | // CHECK-NOT: #include "curand_lognormal.h" 37 | // CHECK-NOT: #include "curand_mrg32k3a.h" 38 | // CHECK-NOT: #include "curand_mtgp32.h" 39 | // CHECK-NOT: #include "curand_mtgp32_host.h" 40 | // CHECK-NOT: #include "curand_mtgp32_kernel.h" 41 | // CHECK-NOT: #include "curand_mtgp32dc_p_11213.h" 42 | // CHECK-NOT: #include "curand_normal.h" 43 | // CHECK-NOT: #include "curand_normal_static.h" 44 | // CHECK-NOT: #include "curand_philox4x32_x.h" 45 | // CHECK-NOT: #include "curand_poisson.h" 46 | // CHECK-NOT: #include "curand_precalc.h" 47 | // CHECK-NOT: #include "curand_uniform.h" 48 | 49 | // CHECK: #include "hiprand/hiprand_mtgp32_host.h" 50 | // CHECK: #include "rocrand/rocrand_mtgp32_11213.h" 51 | 52 | // CHECK: #include 53 | 54 | // CHECK: #include "hipfft/hipfft.h" 55 | // CHECK: #include "hipsparse.h" 56 | 57 | #include 58 | // CHECK-NOT: #include 59 | 60 | #include 61 | 62 | #include 63 | // CHECK-NOT: #include 64 | 65 | #include "cuda_runtime_api.h" 66 | #include "channel_descriptor.h" 67 | #include "device_functions.h" 68 | #include "driver_types.h" 69 | #include "cuComplex.h" 70 | #include "cuda_fp16.h" 71 | #include "cuda_texture_types.h" 72 | #include "vector_types.h" 73 | 74 | #include 75 | 76 | #include "cublas.h" 77 | #include "cublas_v2.h" 78 | // CHECK-NOT: #include "hipblas.h" 79 | 80 | #include 81 | 82 | #include "curand.h" 83 | #include "curand_kernel.h" 84 | 85 | #include 86 | 87 | #include "curand_discrete.h" 88 | #include "curand_discrete2.h" 89 | #include "curand_globals.h" 90 | #include "curand_lognormal.h" 91 | #include "curand_mrg32k3a.h" 92 | #include "curand_mtgp32.h" 93 | #include "curand_mtgp32_host.h" 94 | #include "curand_mtgp32_kernel.h" 95 | #include "curand_mtgp32dc_p_11213.h" 96 | #include "curand_normal.h" 97 | #include "curand_normal_static.h" 98 | #include "curand_philox4x32_x.h" 99 | #include "curand_poisson.h" 100 | #include "curand_precalc.h" 101 | #include "curand_uniform.h" 102 | 103 | #include 104 | 105 | #include "cufft.h" 106 | 107 | #include "cusparse.h" 108 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_09_12000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | 6 | // CHECK-NOT: #include 7 | // CHECK-NOT: #include 8 | 9 | // CHECK: #include "hip/hip_runtime_api.h" 10 | // CHECK: #include "hip/channel_descriptor.h" 11 | // CHECK: #include "hip/device_functions.h" 12 | // CHECK: #include "hip/driver_types.h" 13 | // CHECK: #include "hip/hip_complex.h" 14 | // CHECK: #include "hip/hip_fp16.h" 15 | // CHECK: #include "hip/hip_texture_types.h" 16 | // CHECK: #include "hip/hip_vector_types.h" 17 | 18 | // CHECK: #include 19 | 20 | // CHECK: #include "hipblas.h" 21 | // CHECK-NOT: #include "cublas_v2.h" 22 | 23 | // CHECK: #include 24 | 25 | // CHECK: #include "hiprand/hiprand.h" 26 | // CHECK: #include "hiprand/hiprand_kernel.h" 27 | 28 | // CHECK: #include 29 | 30 | // CHECK-NOT: #include "hiprand/hiprand.h" 31 | // CHECK-NOT: #include "hiprand/hiprand_kernel.h" 32 | // CHECK-NOT: #include "curand_discrete.h" 33 | // CHECK-NOT: #include "curand_discrete2.h" 34 | // CHECK-NOT: #include "curand_globals.h" 35 | // CHECK-NOT: #include "curand_lognormal.h" 36 | // CHECK-NOT: #include "curand_mrg32k3a.h" 37 | // CHECK-NOT: #include "curand_mtgp32.h" 38 | // CHECK-NOT: #include "curand_mtgp32_host.h" 39 | // CHECK-NOT: #include "curand_mtgp32_kernel.h" 40 | // CHECK-NOT: #include "curand_mtgp32dc_p_11213.h" 41 | // CHECK-NOT: #include "curand_normal.h" 42 | // CHECK-NOT: #include "curand_normal_static.h" 43 | // CHECK-NOT: #include "curand_philox4x32_x.h" 44 | // CHECK-NOT: #include "curand_poisson.h" 45 | // CHECK-NOT: #include "curand_precalc.h" 46 | // CHECK-NOT: #include "curand_uniform.h" 47 | 48 | // CHECK: #include "hiprand/hiprand_mtgp32_host.h" 49 | // CHECK: #include "rocrand/rocrand_mtgp32_11213.h" 50 | 51 | // CHECK: #include 52 | 53 | // CHECK: #include "hipfft/hipfft.h" 54 | // CHECK: #include "hipsparse.h" 55 | 56 | #include 57 | // CHECK-NOT: #include 58 | 59 | #include 60 | 61 | #include 62 | // CHECK-NOT: #include 63 | 64 | #include "cuda_runtime_api.h" 65 | #include "channel_descriptor.h" 66 | #include "device_functions.h" 67 | #include "driver_types.h" 68 | #include "cuComplex.h" 69 | #include "cuda_fp16.h" 70 | #include "cuda_texture_types.h" 71 | #include "vector_types.h" 72 | 73 | #include 74 | 75 | #include "cublas_v2.h" 76 | 77 | // CHECK-NOT: #include "hipblas.h" 78 | 79 | #include 80 | 81 | #include "curand.h" 82 | #include "curand_kernel.h" 83 | 84 | #include 85 | 86 | #include "curand_discrete.h" 87 | #include "curand_discrete2.h" 88 | #include "curand_globals.h" 89 | #include "curand_lognormal.h" 90 | #include "curand_mrg32k3a.h" 91 | #include "curand_mtgp32.h" 92 | #include "curand_mtgp32_host.h" 93 | #include "curand_mtgp32_kernel.h" 94 | #include "curand_mtgp32dc_p_11213.h" 95 | #include "curand_normal.h" 96 | #include "curand_normal_static.h" 97 | #include "curand_philox4x32_x.h" 98 | #include "curand_poisson.h" 99 | #include "curand_precalc.h" 100 | #include "curand_uniform.h" 101 | 102 | #include 103 | 104 | #include "cufft.h" 105 | 106 | #include "cusparse.h" 107 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_09_fp16_7050.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --default-preprocessor --roc %clang_args 2 | 3 | // CHECK: #include 4 | #include 5 | // CHECK-NOT: #include 6 | #include 7 | 8 | #include 9 | // CHECK-NOT: #include 10 | 11 | #if CUDA_VERSION >= 7050 12 | // CHECK: #include "hip/hip_fp16.h" 13 | #include "cuda_fp16.h" 14 | #endif 15 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_09_rocrand.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --roc %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | 6 | // CHECK-NOT: #include 7 | // CHECK-NOT: #include 8 | 9 | // CHECK: #include "hip/hip_runtime_api.h" 10 | // CHECK: #include "hip/channel_descriptor.h" 11 | // CHECK: #include "hip/device_functions.h" 12 | // CHECK: #include "hip/driver_types.h" 13 | // CHECK: #include "hip/hip_complex.h" 14 | // CHECK: #include "hip/hip_texture_types.h" 15 | // CHECK: #include "hip/hip_vector_types.h" 16 | 17 | // CHECK: #include 18 | 19 | // CHECK: #include 20 | 21 | // CHECK: #include "rocrand/rocrand.h" 22 | // CHECK: #include "rocrand/rocrand_kernel.h" 23 | 24 | // CHECK: #include 25 | 26 | // CHECK: #include "rocrand/rocrand_discrete.h" 27 | // CHECK: #include "rocrand/rocrand_common.h" 28 | // CHECK: #include "rocrand/rocrand_log_normal.h" 29 | // CHECK: #include "rocrand/rocrand_mrg32k3a.h" 30 | // CHECK: #include "rocrand/rocrand_mtgp32.h" 31 | // CHECK: #include "rocrand/rocrand_mtgp32_11213.h" 32 | // CHECK: #include "rocrand/rocrand_normal.h" 33 | // CHECK: #include "rocrand/rocrand_philox4x32_10.h" 34 | // CHECK: #include "rocrand/rocrand_poisson.h" 35 | // CHECK: #include "rocrand/rocrand_xorwow_precomputed.h" 36 | // CHECK: #include "rocrand/rocrand_uniform.h" 37 | 38 | // CHECK-NOT: #include "rocrand/rocrand.h" 39 | // CHECK-NOT: #include "rocrand/rocrand_kernel.h" 40 | // CHECK-NOT: #include "rocrand/rocrand_discrete.h" 41 | // CHECK-NOT: #include "rocrand/rocrand_mtgp32.h" 42 | // CHECK-NOT: #include "rocrand/rocrand_normal.h" 43 | 44 | // CHECK-NOT: #include "curand_discrete.h" 45 | // CHECK-NOT: #include "curand_discrete2.h" 46 | // CHECK-NOT: #include "curand_globals.h" 47 | // CHECK-NOT: #include "curand_lognormal.h" 48 | // CHECK-NOT: #include "curand_mrg32k3a.h" 49 | // CHECK-NOT: #include "curand_mtgp32.h" 50 | // CHECK-NOT: #include "curand_mtgp32_host.h" 51 | // CHECK-NOT: #include "curand_mtgp32_kernel.h" 52 | // CHECK-NOT: #include "curand_mtgp32dc_p_11213.h" 53 | // CHECK-NOT: #include "curand_normal.h" 54 | // CHECK-NOT: #include "curand_normal_static.h" 55 | // CHECK-NOT: #include "curand_philox4x32_x.h" 56 | // CHECK-NOT: #include "curand_poisson.h" 57 | // CHECK-NOT: #include "curand_precalc.h" 58 | // CHECK-NOT: #include "curand_uniform.h" 59 | 60 | // CHECK: #include 61 | 62 | // CHECK: #include "hipfft/hipfft.h" 63 | // CHECK: #include "rocsparse.h" 64 | 65 | #include 66 | // CHECK-NOT: #include 67 | 68 | #include 69 | 70 | #include 71 | // CHECK-NOT: #include 72 | 73 | #include "cuda_runtime_api.h" 74 | #include "channel_descriptor.h" 75 | #include "device_functions.h" 76 | #include "driver_types.h" 77 | #include "cuComplex.h" 78 | #include "cuda_texture_types.h" 79 | #include "vector_types.h" 80 | 81 | #include 82 | 83 | #include 84 | 85 | #include "curand.h" 86 | #include "curand_kernel.h" 87 | 88 | #include 89 | 90 | #include "curand_discrete.h" 91 | #include "curand_discrete2.h" 92 | #include "curand_globals.h" 93 | #include "curand_lognormal.h" 94 | #include "curand_mrg32k3a.h" 95 | #include "curand_mtgp32.h" 96 | #include "curand_mtgp32_host.h" 97 | #include "curand_mtgp32_kernel.h" 98 | #include "curand_mtgp32dc_p_11213.h" 99 | #include "curand_normal.h" 100 | #include "curand_normal_static.h" 101 | #include "curand_philox4x32_x.h" 102 | #include "curand_poisson.h" 103 | #include "curand_precalc.h" 104 | #include "curand_uniform.h" 105 | 106 | #include 107 | 108 | #include "cufft.h" 109 | 110 | #include "cusparse.h" 111 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_10.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // Checks that HIP header file is included after #pragma once, 4 | // which goes before include guard controlling macro. 5 | // CHECK: #pragma once 6 | // CHECK-NEXT: #include 7 | #pragma once 8 | #ifndef HEADERS_TEST_10_H 9 | // CHECK: #ifndef HEADERS_TEST_10_H 10 | // CHECK-NOT: #include 11 | #define HEADERS_TEST_10_H 12 | #include 13 | static int counter = 0; 14 | #endif // HEADERS_TEST_10_H 15 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_11.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // Checks that HIP header file is included after include guard controlling macro, 4 | // which goes before #pragma once. 5 | // CHECK: #ifndef HEADERS_TEST_10_H 6 | // CHECK-NEXT: #include 7 | #ifndef HEADERS_TEST_10_H 8 | // CHECK: #pragma once 9 | #pragma once 10 | // CHECK-NOT: #include 11 | #define HEADERS_TEST_10_H 12 | #include 13 | static int counter = 0; 14 | #endif // HEADERS_TEST_10_H 15 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_12_SOLVER.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args 2 | 3 | #ifndef HEADERS_TEST_12_SOLVER_H 4 | // CHECK: #pragma once 5 | #pragma once 6 | #define HEADERS_TEST_12_SOLVER_H 7 | // CHECK: #include 8 | // CHECK-NOT: #include 9 | #include 10 | static int counter = 0; 11 | 12 | // CHECK: #include "hipsolver.h" 13 | // CHECK-NOT: #include "hipsolver.h" 14 | // CHECK-NOT: #include "cusolver_common.h" 15 | // CHECK-NOT: #include "cusolverDn.h" 16 | // CHECK-NOT: #include "cusolverRf.h" 17 | // CHECK-NOT: #include "cusolverSp.h" 18 | #include "cusolver_common.h" 19 | #include "cusolverDn.h" 20 | #include "cusolverRf.h" 21 | #include "cusolverSp.h" 22 | #endif // HEADERS_TEST_12_SOLVER_H 23 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_12_SOLVER_10010.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // Checks that HIP header file is included after include guard controlling macro, 4 | // which goes before #pragma once. 5 | // CHECK: #ifndef HEADERS_TEST_12_SOLVER_H 6 | // CHECK-NEXT: #include 7 | #ifndef HEADERS_TEST_12_SOLVER_H 8 | // CHECK: #pragma once 9 | #pragma once 10 | // CHECK-NOT: #include 11 | #define HEADERS_TEST_12_SOLVER_H 12 | #include 13 | static int counter = 0; 14 | 15 | // CHECK: #include "hipsolver.h" 16 | // CHECK-NOT: #include "hipsolver.h" 17 | // CHECK-NOT: #include "cusolver_common.h" 18 | // CHECK-NOT: #include "cusolverDn.h" 19 | // CHECK-NOT: #include "cusolverRf.h" 20 | // CHECK-NOT: #include "cusolverMg.h" 21 | // CHECK-NOT: #include "cusolverSp.h" 22 | // CHECK-NOT: #include "cusolverSp_LOWLEVEL_PREVIEW.h" 23 | #include "cusolver_common.h" 24 | #include "cusolverDn.h" 25 | #include "cusolverRf.h" 26 | #include "cusolverMg.h" 27 | #include "cusolverSp.h" 28 | #include "cusolverSp_LOWLEVEL_PREVIEW.h" 29 | 30 | #endif // HEADERS_TEST_12_SOLVER_H -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_12_SOLVER_7050.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --skip-excluded-preprocessor-conditional-blocks %clang_args 2 | 3 | #ifndef HEADERS_TEST_12_SOLVER_H 4 | // CHECK: #pragma once 5 | #pragma once 6 | #define HEADERS_TEST_12_SOLVER_H 7 | // CHECK: #include 8 | // CHECK-NOT: #include 9 | #include 10 | static int counter = 0; 11 | 12 | // CHECK: #include "hipsolver.h" 13 | // CHECK-NOT: #include "hipsolver.h" 14 | // CHECK-NOT: #include "cusolver_common.h" 15 | // CHECK-NOT: #include "cusolverDn.h" 16 | // CHECK-NOT: #include "cusolverRf.h" 17 | // CHECK-NOT: #include "cusolverSp.h" 18 | // CHECK-NOT: #include "cusolverSp_LOWLEVEL_PREVIEW.h" 19 | #include "cusolver_common.h" 20 | #include "cusolverDn.h" 21 | #include "cusolverRf.h" 22 | #include "cusolverSp.h" 23 | #include "cusolverSp_LOWLEVEL_PREVIEW.h" 24 | #endif // HEADERS_TEST_12_SOLVER_H 25 | -------------------------------------------------------------------------------- /tests/unit_tests/headers/headers_test_13.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | // CHECK: #include 5 | // CHECK-NOT: #include 6 | // CHECK: #include 7 | #include 8 | #include 9 | #include 10 | -------------------------------------------------------------------------------- /tests/unit_tests/kernel_launch/kernel_launch_01.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones. 3 | // ToDo: change lit testing in order to parse the output. 4 | 5 | #define LEN 1024 6 | #define SIZE LEN * sizeof(float) 7 | #define ITER 1024*1024 8 | 9 | // CHECK: #include 10 | #include 11 | 12 | #define CUDA_LAUNCH(cuda_call,dimGrid,dimBlock, ...) \ 13 | cuda_call<<>>(__VA_ARGS__); 14 | 15 | __global__ void Inc1(float *Ad, float *Bd) { 16 | int tx = threadIdx.x + blockIdx.x * blockDim.x; 17 | if (tx < 1) { 18 | for (int i = 0; i < ITER; ++i) { 19 | Ad[tx] = Ad[tx] + 1.0f; 20 | for (int j = 0; j < 256; ++j) { 21 | Bd[tx] = Ad[tx]; 22 | } 23 | } 24 | } 25 | } 26 | 27 | int main() { 28 | float *A, *Ad, *Bd; 29 | A = new float[LEN]; 30 | for (int i = 0; i < LEN; ++i) { 31 | A[i] = 0.0f; 32 | } 33 | // CHECK: hipError_t status; 34 | cudaError_t status; 35 | // CHECK: status = hipHostRegister(A, SIZE, hipHostRegisterMapped); 36 | status = cudaHostRegister(A, SIZE, cudaHostRegisterMapped); 37 | // CHECK: hipHostGetDevicePointer(&Ad, A, 0); 38 | cudaHostGetDevicePointer(&Ad, A, 0); 39 | // CHECK: hipMalloc((void**)&Bd, SIZE); 40 | cudaMalloc((void**)&Bd, SIZE); 41 | dim3 dimGrid(LEN / 512, 1, 1); 42 | dim3 dimBlock(512, 1, 1); 43 | 44 | // CHECK: hipLaunchKernelGGL(Inc1, dim3(dimGrid), dim3(dimBlock), 0, 0, Ad, Bd); 45 | CUDA_LAUNCH(Inc1, dimGrid, dimBlock, Ad, Bd); 46 | } 47 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE2_CORE_COMMON_CUDNN_H_ 2 | #define CAFFE2_CORE_COMMON_CUDNN_H_ 3 | 4 | #include 5 | #include 6 | 7 | #endif // CAFFE2_CORE_COMMON_CUDNN_H_ 8 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ 2 | #define CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace caffe2 { 11 | 12 | } // namespace caffe2 13 | 14 | #endif // CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ 15 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CAFFE2/caffe2_01.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args 2 | 3 | // NOTE: Nonworking code just for conversion testing 4 | 5 | // CHECK: #include 6 | #include 7 | #include 8 | #include 9 | // CHECK: #include "caffe2/operators/hip/spatial_batch_norm_op_miopen.hip" 10 | #include "caffe2/operators/spatial_batch_norm_op.h" 11 | // CHECK: #include "caffe2/core/hip/common_miopen.h" 12 | #include "caffe2/core/common_cudnn.h" 13 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CAFFE2/caffe2_02.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args 2 | 3 | // NOTE: Nonworking code just for conversion testing 4 | 5 | // CHECK: #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace caffe2 { 12 | 13 | // Operator Definition. 14 | struct OperatorDef { 15 | int input = 1; 16 | int output = 2; 17 | int name = 3; 18 | }; 19 | 20 | class OperatorBase; 21 | class Workspace; 22 | 23 | template 24 | class Observable { 25 | public: 26 | Observable() = default; 27 | 28 | Observable(Observable&&) = default; 29 | Observable& operator =(Observable&&) = default; 30 | 31 | virtual ~Observable() = default; 32 | }; 33 | 34 | template 35 | class ObserverBase { 36 | public: 37 | explicit ObserverBase(T* subject) : subject_(subject) {} 38 | 39 | virtual void Start() {} 40 | virtual void Stop() {} 41 | 42 | virtual std::string debugInfo() { 43 | return "Not implemented."; 44 | } 45 | 46 | virtual ~ObserverBase() noexcept {}; 47 | 48 | T* subject() const { 49 | return subject_; 50 | } 51 | 52 | protected: 53 | T* subject_; 54 | }; 55 | 56 | typedef ObserverBase OperatorObserver; 57 | 58 | class OperatorBase : public Observable { 59 | public: 60 | explicit OperatorBase(const OperatorDef& operator_def, Workspace* ws); 61 | virtual ~OperatorBase() noexcept {} 62 | }; 63 | 64 | template 65 | class Operator : public OperatorBase { 66 | public: 67 | explicit Operator(const OperatorDef& operator_def, Workspace* ws) 68 | : OperatorBase(operator_def, ws) { 69 | } 70 | ~Operator() noexcept override {} 71 | }; 72 | 73 | template 74 | class DummyEmptyOp : public Operator { 75 | public: 76 | DummyEmptyOp(const OperatorDef& def, Workspace* ws) 77 | : Operator(def, ws) {} 78 | 79 | bool RunOnDevice() final { return true; } 80 | }; 81 | 82 | 83 | class CUDAContext { 84 | public: 85 | CUDAContext(); 86 | virtual ~CUDAContext() noexcept {} 87 | }; 88 | 89 | #define REGISTER_CUDA_OPERATOR(name, ...) \ 90 | void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ 91 | static void CAFFE_ANONYMOUS_VARIABLE_CUDA##name() { \ 92 | CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ 93 | } 94 | 95 | #define REGISTER_CUDA_OPERATOR_CREATOR(key, ...) 96 | 97 | // CHECK: REGISTER_HIP_OPERATOR(Operator, DummyEmptyOp); 98 | REGISTER_CUDA_OPERATOR(Operator, DummyEmptyOp); 99 | // CHECK: REGISTER_HIP_OPERATOR_CREATOR(Operator, DummyEmptyOp); 100 | REGISTER_CUDA_OPERATOR_CREATOR(Operator, DummyEmptyOp); 101 | 102 | } 103 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CUB/cub_01.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | // CHECK: #include 5 | #include 6 | #define THRUST_NS_QUALIFIER ::thrust 7 | // CHECK: #include 8 | #include 9 | 10 | #include 11 | 12 | // TODO: 13 | // using namespace cub; 14 | 15 | template 16 | __global__ void sort(const T* data_in, T* data_out){ 17 | // CHECK: typedef ::hipcub::BlockRadixSort BlockRadixSortT; 18 | typedef ::cub::BlockRadixSort BlockRadixSortT; 19 | __shared__ typename BlockRadixSortT::TempStorage tmp_sort; 20 | double items[4]; 21 | int i0 = 4 * (blockIdx.x * blockDim.x + threadIdx.x); 22 | for (int i = 0; i < 4; ++i){ 23 | items[i] = data_in[i0 + i]; 24 | } 25 | BlockRadixSortT(tmp_sort).Sort(items); 26 | for (int i = 0; i < 4; ++i){ 27 | data_out[i0 + i] = items[i]; 28 | } 29 | } 30 | 31 | int main(){ 32 | double* d_gpu = NULL; 33 | double* result_gpu = NULL; 34 | double* data_sorted = new double[4096]; 35 | // Allocate memory on the GPU 36 | // CHECK: hipMalloc(&d_gpu, 4096 * sizeof(double)); 37 | cudaMalloc(&d_gpu, 4096 * sizeof(double)); 38 | // CHECK: hipMalloc(&result_gpu, 4096 * sizeof(double)); 39 | cudaMalloc(&result_gpu, 4096 * sizeof(double)); 40 | // CHECK: hiprandGenerator_t gen; 41 | curandGenerator_t gen; 42 | // Create generator 43 | // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT); 44 | curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); 45 | // Fill array with random numbers 46 | // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0); 47 | curandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0); 48 | // Destroy generator 49 | // CHECK: hiprandDestroyGenerator(gen); 50 | curandDestroyGenerator(gen); 51 | // Sort data 52 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort), dim3(1), dim3(1024), 0, 0, d_gpu, result_gpu); 53 | sort<<<1, 1024>>>(d_gpu, result_gpu); 54 | // CHECK: hipMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), hipMemcpyDeviceToHost); 55 | cudaMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), cudaMemcpyDeviceToHost); 56 | // Write the sorted data to standard out 57 | for (int i = 0; i < 4096; ++i){ 58 | std::cout << data_sorted[i] << ", "; 59 | } 60 | std::cout << std::endl; 61 | } 62 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CUB/cub_02.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | // CHECK: #include 5 | #include 6 | #define THRUST_NS_QUALIFIER ::thrust 7 | // CHECK: #include 8 | #include 9 | 10 | #include 11 | 12 | template 18 | __global__ void sort(const T* data_in, T* data_out){ 19 | // CHECK: typedef hipcub::BlockLoad BlockLoadT; 20 | typedef cub::BlockLoad BlockLoadT; 21 | // CHECK: typedef hipcub::BlockRadixSort BlockRadixSortT; 22 | typedef cub::BlockRadixSort BlockRadixSortT; 23 | // CHECK: typedef hipcub::BlockStore BlockStoreT; 24 | typedef cub::BlockStore BlockStoreT; 25 | __shared__ union { 26 | typename BlockLoadT::TempStorage load; 27 | typename BlockRadixSortT::TempStorage sort; 28 | typename BlockStoreT::TempStorage store; 29 | } tmp_storage; 30 | T items[ITEMS_PER_THREAD]; 31 | BlockLoadT(tmp_storage.load).Load(data_in + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items); 32 | __syncthreads(); 33 | BlockRadixSortT(tmp_storage.sort).Sort(items); 34 | __syncthreads(); 35 | BlockStoreT(tmp_storage.store).Store(data_out + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items); 36 | } 37 | 38 | int main() { 39 | double* d_gpu = NULL; 40 | double* result_gpu = NULL; 41 | double* data_sorted = new double[1000*4096]; 42 | // Allocate memory on the GPU 43 | // CHECK: hipMalloc(&d_gpu, 1000*4096 * sizeof(double)); 44 | cudaMalloc(&d_gpu, 1000*4096 * sizeof(double)); 45 | // CHECK: hipMalloc(&result_gpu, 1000*4096 * sizeof(double)); 46 | cudaMalloc(&result_gpu, 1000*4096 * sizeof(double)); 47 | // CHECK: hiprandGenerator_t gen; 48 | curandGenerator_t gen; 49 | // Create generator 50 | // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT); 51 | curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); 52 | // Fill array with random numbers 53 | // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0); 54 | curandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0); 55 | // Destroy generator 56 | // CHECK: hiprandDestroyGenerator(gen); 57 | curandDestroyGenerator(gen); 58 | // Sort data 59 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, hipcub::BLOCK_LOAD_TRANSPOSE, hipcub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu); 60 | sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE><<<1000, 512>>>(d_gpu, result_gpu); 61 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, hipcub::BLOCK_LOAD_DIRECT, hipcub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu); 62 | sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT><<<1000, 256>>>(d_gpu, result_gpu); 63 | // CHECK: hipMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), hipMemcpyDeviceToHost); 64 | cudaMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), cudaMemcpyDeviceToHost); 65 | // Write the sorted data to standard out 66 | for (int i = 0; i < 4095; ++i) { 67 | std::cout << data_sorted[i] << ", "; 68 | } 69 | std::cout << data_sorted[4095] << std::endl; 70 | } 71 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/CUB/cub_03.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | // CHECK: #include 3 | #include 4 | #define THRUST_NS_QUALIFIER ::thrust 5 | // CHECK: #include 6 | #include 7 | 8 | // using namespace hipcub; 9 | using namespace cub; 10 | 11 | // Simple CUDA kernel for computing tiled partial sums 12 | template 16 | cub::BlockScanAlgorithm SCAN_ALGO> 17 | __global__ void ScanTilesKernel(int *d_in, int *d_out) { 18 | // Specialize collective types for problem context 19 | // CHECK: typedef ::hipcub::BlockLoad BlockLoadT; 20 | typedef ::cub::BlockLoad BlockLoadT; 21 | typedef BlockScan BlockScanT; 22 | // Allocate on-chip temporary storage 23 | __shared__ union { 24 | typename BlockLoadT::TempStorage load; 25 | typename BlockScanT::TempStorage reduce; 26 | } temp_storage; 27 | // Load data per thread 28 | int thread_data[ITEMS_PER_THREAD]; 29 | int offset = blockIdx.x * (BLOCK_THREADS * ITEMS_PER_THREAD); 30 | BlockLoadT(temp_storage.load).Load(d_in + offset, offset); 31 | __syncthreads(); 32 | // Compute the block-wide prefix sum 33 | BlockScanT(temp_storage).Sum(thread_data); 34 | } 35 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | #include 7 | // CHECK: #include "hipblas.h" 8 | // CHECK-NOT: #include "cublas_v2.h" 9 | #include "cublas.h" 10 | #include "cublas_v2.h" 11 | // CHECK-NOT: #include "hipblas.h" 12 | #define M 6 13 | #define N 5 14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float 16 | alpha, float beta) { 17 | // CHECK: hipblasHandle_t blasHandle; 18 | cublasHandle_t blasHandle; 19 | // CHECK: hipblasStatus_t blasStatus = hipblasCreate(&blasHandle); 20 | cublasStatus blasStatus = cublasCreate(&blasHandle); 21 | // CHECK: hipblasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 22 | cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 23 | // CHECK: hipblasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 24 | cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 25 | // CHECK: hipblasDestroy(blasHandle); 26 | cublasDestroy(blasHandle); 27 | } 28 | int main(void) { 29 | int i, j; 30 | // CHECK: hipblasStatus_t stat; 31 | cublasStatus stat; 32 | float* devPtrA; 33 | float* a = 0; 34 | a = (float *)malloc(M * N * sizeof(*a)); 35 | if (!a) { 36 | printf("host memory allocation failed"); 37 | return EXIT_FAILURE; 38 | } 39 | for (j = 0; j < N; j++) { 40 | for (i = 0; i < M; i++) { 41 | a[IDX2C(i, j, M)] = (float)(i * M + j + 1); 42 | } 43 | } 44 | // cublasInit is not supported yet 45 | cublasInit(); 46 | // cublasAlloc is not supported yet 47 | stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA); 48 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 49 | if (stat != CUBLAS_STATUS_SUCCESS) { 50 | printf("device memory allocation failed"); 51 | // cublasShutdown is not supported yet 52 | cublasShutdown(); 53 | return EXIT_FAILURE; 54 | } 55 | // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 56 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 57 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 58 | if (stat != CUBLAS_STATUS_SUCCESS) { 59 | printf("data download failed"); 60 | // cublasFree is not supported yet 61 | cublasFree(devPtrA); 62 | // cublasShutdown is not supported yet 63 | cublasShutdown(); 64 | return EXIT_FAILURE; 65 | } 66 | modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); 67 | // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 68 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 69 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 70 | if (stat != CUBLAS_STATUS_SUCCESS) { 71 | printf("data upload failed"); 72 | // cublasFree is not supported yet 73 | cublasFree(devPtrA); 74 | // cublasShutdown is not supported yet 75 | cublasShutdown(); 76 | return EXIT_FAILURE; 77 | } 78 | // cublasFree is not supported yet 79 | cublasFree(devPtrA); 80 | // cublasShutdown is not supported yet 81 | cublasShutdown(); 82 | for (j = 0; j < N; j++) { 83 | for (i = 0; i < M; i++) { 84 | printf("%7.0f", a[IDX2C(i, j, M)]); 85 | } 86 | printf("\n"); 87 | } 88 | free(a); 89 | return EXIT_SUCCESS; 90 | } 91 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/cublas_0_based_indexing_v2.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | #include 4 | #include 5 | #include 6 | // CHECK: #include "hip/hip_runtime.h" 7 | #include "cuda.h" 8 | // CHECK: #include "hipblas.h" 9 | // CHECK-NOT: #include "cublas_v2.h" 10 | #include "cublas_v2.h" 11 | // CHECK-NOT: #include "hipblas.h" 12 | #define M 6 13 | #define N 5 14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float 16 | alpha, float beta) { 17 | // CHECK: hipblasHandle_t blasHandle; 18 | cublasHandle_t blasHandle; 19 | // CHECK: hipblasStatus_t blasStatus = hipblasCreate(&blasHandle); 20 | cublasStatus_t blasStatus = cublasCreate(&blasHandle); 21 | // CHECK: hipblasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 22 | cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 23 | // CHECK: hipblasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 24 | cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 25 | // CHECK: hipblasDestroy(blasHandle); 26 | cublasDestroy(blasHandle); 27 | } 28 | int main(void) { 29 | int i, j; 30 | // CHECK: hipblasStatus_t stat; 31 | cublasStatus_t stat; 32 | // CHECK: hipError_t result = hipSuccess; 33 | cudaError_t result = cudaSuccess; 34 | float* devPtrA; 35 | float* a = 0; 36 | a = (float *)malloc(M * N * sizeof(*a)); 37 | if (!a) { 38 | printf("host memory allocation failed"); 39 | return EXIT_FAILURE; 40 | } 41 | for (j = 0; j < N; j++) { 42 | for (i = 0; i < M; i++) { 43 | a[IDX2C(i, j, M)] = (float)(i * M + j + 1); 44 | } 45 | } 46 | // CHECK: hipMalloc((void**)&devPtrA, M*N*sizeof(*a)); 47 | result = cudaMalloc((void**)&devPtrA, M*N*sizeof(*a)); 48 | // CHECK: if (result != hipSuccess) { 49 | if (result != cudaSuccess) { 50 | printf("device memory allocation failed"); 51 | return EXIT_FAILURE; 52 | } 53 | // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 54 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 55 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 56 | if (stat != CUBLAS_STATUS_SUCCESS) { 57 | printf("data download failed"); 58 | // CHECK: hipFree(devPtrA); 59 | cudaFree(devPtrA); 60 | return EXIT_FAILURE; 61 | } 62 | modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); 63 | // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 64 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 65 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 66 | if (stat != CUBLAS_STATUS_SUCCESS) { 67 | printf("data upload failed"); 68 | // CHECK: hipFree(devPtrA); 69 | cudaFree(devPtrA); 70 | return EXIT_FAILURE; 71 | } 72 | // CHECK: hipFree(devPtrA); 73 | cudaFree(devPtrA); 74 | for (j = 0; j < N; j++) { 75 | for (i = 0; i < M; i++) { 76 | printf("%7.0f", a[IDX2C(i, j, M)]); 77 | } 78 | printf("\n"); 79 | } 80 | free(a); 81 | return EXIT_SUCCESS; 82 | } 83 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | #include 4 | #include 5 | #include 6 | // CHECK: #include 7 | #include 8 | // CHECK: #include "hipblas.h" 9 | #include "cublas_v2.h" 10 | #define M 6 11 | #define N 5 12 | #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) 13 | // CHECK: static __inline__ void modify(hipblasHandle_t handle, float *m, int ldm, int 14 | static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int 15 | n, int p, int q, float alpha, float beta) { 16 | // CHECK: hipblasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); 17 | // CHECK: hipblasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); 18 | cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); 19 | cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); 20 | } 21 | int main(void) { 22 | // CHECK: hipError_t cudaStat; 23 | // CHECK: hipblasStatus_t stat; 24 | // CHECK: hipblasHandle_t handle; 25 | cudaError_t cudaStat; 26 | cublasStatus_t stat; 27 | cublasHandle_t handle; 28 | int i, j; 29 | float* devPtrA; 30 | float* a = 0; 31 | a = (float *)malloc(M * N * sizeof(*a)); 32 | if (!a) { 33 | printf("host memory allocation failed"); 34 | return EXIT_FAILURE; 35 | } 36 | for (j = 1; j <= N; j++) { 37 | for (i = 1; i <= M; i++) { 38 | a[IDX2F(i, j, M)] = (float)((i - 1) * M + j); 39 | } 40 | } 41 | // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a)); 42 | cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a)); 43 | // CHECK: if (cudaStat != hipSuccess) { 44 | if (cudaStat != cudaSuccess) { 45 | printf("device memory allocation failed"); 46 | return EXIT_FAILURE; 47 | } 48 | // CHECK: stat = hipblasCreate(&handle); 49 | stat = cublasCreate(&handle); 50 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 51 | if (stat != CUBLAS_STATUS_SUCCESS) { 52 | printf("CUBLAS initialization failed\n"); 53 | return EXIT_FAILURE; 54 | } 55 | // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 56 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 57 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 58 | if (stat != CUBLAS_STATUS_SUCCESS) { 59 | printf("data download failed"); 60 | // CHECK: hipFree(devPtrA); 61 | // CHECK: hipblasDestroy(handle); 62 | cudaFree(devPtrA); 63 | cublasDestroy(handle); 64 | return EXIT_FAILURE; 65 | } 66 | modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); 67 | // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 68 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 69 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 70 | if (stat != CUBLAS_STATUS_SUCCESS) { 71 | printf("data upload failed"); 72 | // CHECK: hipFree(devPtrA); 73 | // CHECK: hipblasDestroy(handle); 74 | cudaFree(devPtrA); 75 | cublasDestroy(handle); 76 | return EXIT_FAILURE; 77 | } 78 | // CHECK: hipFree(devPtrA); 79 | // CHECK: hipblasDestroy(handle); 80 | cudaFree(devPtrA); 81 | cublasDestroy(handle); 82 | for (j = 1; j <= N; j++) { 83 | for (i = 1; i <= M; i++) { 84 | printf("%7.0f", a[IDX2F(i, j, M)]); 85 | } 86 | printf("\n"); 87 | } 88 | free(a); 89 | return EXIT_SUCCESS; 90 | } 91 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | #include 4 | #include 5 | // CHECK: #include 6 | #include 7 | // CHECK: #include "hipblas.h" 8 | #include "cublas_v2.h" 9 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 10 | #define m 6 11 | #define n 4 12 | #define k 5 13 | int main(void) { 14 | // CHECK: hipError_t cudaStat; 15 | // CHECK: hipblasStatus_t stat; 16 | // CHECK: hipblasHandle_t handle; 17 | cudaError_t cudaStat; 18 | cublasStatus_t stat; 19 | cublasHandle_t handle; 20 | int i, j; 21 | float * a; 22 | float * b; 23 | float * c; 24 | a = (float *)malloc(m*k * sizeof(float)); 25 | b = (float *)malloc(k*n * sizeof(float)); 26 | c = (float *)malloc(m*n * sizeof(float)); 27 | int ind = 11; 28 | for (j = 0; j 4 | #include 5 | #include 6 | #include 7 | // CHECK: #include "hipblas.h" 8 | #include "cublas.h" 9 | #define M 6 10 | #define N 5 11 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 12 | static __inline__ void modify(float* m, int ldm, int n, int p, int q, float 13 | alpha, float beta) { 14 | // CHECK-NOT: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); 15 | // CHECK-NOT: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); 16 | // CHECK: cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); 17 | // CHECK: cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); 18 | cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); 19 | cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); 20 | } 21 | int main(void) { 22 | int i, j; 23 | // CHECK: hipblasStatus_t stat; 24 | cublasStatus stat; 25 | float* devPtrA; 26 | float* a = 0; 27 | a = (float*)malloc(M * N * sizeof(*a)); 28 | if (!a) { 29 | printf("host memory allocation failed"); 30 | return EXIT_FAILURE; 31 | } 32 | for (j = 0; j < N; j++) { 33 | for (i = 0; i < M; i++) { 34 | a[IDX2C(i, j, M)] = (float)(i * M + j + 1); 35 | } 36 | } 37 | // cublasInit is not supported yet 38 | cublasInit(); 39 | // cublasAlloc is not supported yet 40 | stat = cublasAlloc(M * N, sizeof(*a), (void**)&devPtrA); 41 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 42 | if (stat != CUBLAS_STATUS_SUCCESS) { 43 | printf("device memory allocation failed"); 44 | // cublasShutdown is not supported yet 45 | cublasShutdown(); 46 | return EXIT_FAILURE; 47 | } 48 | // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 49 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 50 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 51 | if (stat != CUBLAS_STATUS_SUCCESS) { 52 | printf("data download failed"); 53 | // cublasFree is not supported yet 54 | cublasFree(devPtrA); 55 | // cublasShutdown is not supported yet 56 | cublasShutdown(); 57 | return EXIT_FAILURE; 58 | } 59 | modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); 60 | // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 61 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 62 | // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { 63 | if (stat != CUBLAS_STATUS_SUCCESS) { 64 | printf("data upload failed"); 65 | // cublasFree is not supported yet 66 | cublasFree(devPtrA); 67 | // cublasShutdown is not supported yet 68 | cublasShutdown(); 69 | return EXIT_FAILURE; 70 | } 71 | // cublasFree is not supported yet 72 | cublasFree(devPtrA); 73 | // cublasShutdown is not supported yet 74 | cublasShutdown(); 75 | for (j = 0; j < N; j++) { 76 | for (i = 0; i < M; i++) { 77 | printf("%7.0f", a[IDX2C(i, j, M)]); 78 | } 79 | printf("\n"); 80 | } 81 | free(a); 82 | return EXIT_SUCCESS; 83 | } 84 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | #include 7 | // CHECK: #include "rocblas.h" 8 | // CHECK-NOT: #include "cublas_v2.h" 9 | #include "cublas.h" 10 | #include "cublas_v2.h" 11 | // CHECK-NOT: #include "rocblas.h" 12 | #define M 6 13 | #define N 5 14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float 16 | alpha, float beta) { 17 | // CHECK: rocblas_handle blasHandle; 18 | cublasHandle_t blasHandle; 19 | // CHECK: rocblas_status blasStatus = rocblas_create_handle(&blasHandle); 20 | cublasStatus blasStatus = cublasCreate(&blasHandle); 21 | // CHECK: rocblas_sscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 22 | // CHECK: rocblas_sscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 23 | cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 24 | cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 25 | // CHECK: rocblas_destroy_handle(blasHandle); 26 | cublasDestroy(blasHandle); 27 | } 28 | int main(void) { 29 | int i, j; 30 | // CHECK: rocblas_status stat; 31 | cublasStatus stat; 32 | float* devPtrA; 33 | float* a = 0; 34 | a = (float *)malloc(M * N * sizeof(*a)); 35 | if (!a) { 36 | printf("host memory allocation failed"); 37 | return EXIT_FAILURE; 38 | } 39 | for (j = 0; j < N; j++) { 40 | for (i = 0; i < M; i++) { 41 | a[IDX2C(i, j, M)] = (float)(i * M + j + 1); 42 | } 43 | } 44 | // cublasInit is not supported yet 45 | cublasInit(); 46 | // cublasAlloc is not supported yet 47 | stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA); 48 | // CHECK: if (stat != rocblas_status_success) { 49 | if (stat != CUBLAS_STATUS_SUCCESS) { 50 | printf("device memory allocation failed"); 51 | // cublasShutdown is not supported yet 52 | cublasShutdown(); 53 | return EXIT_FAILURE; 54 | } 55 | // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); 56 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 57 | // CHECK: if (stat != rocblas_status_success) { 58 | if (stat != CUBLAS_STATUS_SUCCESS) { 59 | printf("data download failed"); 60 | // cublasFree is not supported yet 61 | cublasFree(devPtrA); 62 | // cublasShutdown is not supported yet 63 | cublasShutdown(); 64 | return EXIT_FAILURE; 65 | } 66 | modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); 67 | // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); 68 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 69 | // CHECK: if (stat != rocblas_status_success) { 70 | if (stat != CUBLAS_STATUS_SUCCESS) { 71 | printf("data upload failed"); 72 | // cublasFree is not supported yet 73 | cublasFree(devPtrA); 74 | // cublasShutdown is not supported yet 75 | cublasShutdown(); 76 | return EXIT_FAILURE; 77 | } 78 | // cublasFree is not supported yet 79 | cublasFree(devPtrA); 80 | // cublasShutdown is not supported yet 81 | cublasShutdown(); 82 | for (j = 0; j < N; j++) { 83 | for (i = 0; i < M; i++) { 84 | printf("%7.0f", a[IDX2C(i, j, M)]); 85 | } 86 | printf("\n"); 87 | } 88 | free(a); 89 | return EXIT_SUCCESS; 90 | } 91 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas_v2.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args 2 | 3 | #include 4 | #include 5 | #include 6 | // CHECK: #include "hip/hip_runtime.h" 7 | #include "cuda.h" 8 | // CHECK: #include "rocblas.h" 9 | // CHECK-NOT: #include "cublas_v2.h" 10 | #include "cublas_v2.h" 11 | // CHECK-NOT: #include "rocblas.h" 12 | #define M 6 13 | #define N 5 14 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 15 | static __inline__ void modify(float *m, int ldm, int n, int p, int q, float 16 | alpha, float beta) { 17 | // CHECK: rocblas_handle blasHandle; 18 | cublasHandle_t blasHandle; 19 | // CHECK: rocblas_status blasStatus = rocblas_create_handle(&blasHandle); 20 | cublasStatus_t blasStatus = cublasCreate(&blasHandle); 21 | // CHECK: rocblas_sscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 22 | cublasSscal(blasHandle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); 23 | // CHECK: rocblas_sscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 24 | cublasSscal(blasHandle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1); 25 | // CHECK: rocblas_destroy_handle(blasHandle); 26 | cublasDestroy(blasHandle); 27 | } 28 | int main(void) { 29 | int i, j; 30 | // CHECK: rocblas_status stat; 31 | cublasStatus_t stat; 32 | // CHECK: hipError_t result = hipSuccess; 33 | cudaError_t result = cudaSuccess; 34 | float* devPtrA; 35 | float* a = 0; 36 | a = (float *)malloc(M * N * sizeof(*a)); 37 | if (!a) { 38 | printf("host memory allocation failed"); 39 | return EXIT_FAILURE; 40 | } 41 | for (j = 0; j < N; j++) { 42 | for (i = 0; i < M; i++) { 43 | a[IDX2C(i, j, M)] = (float)(i * M + j + 1); 44 | } 45 | } 46 | // CHECK: hipMalloc((void**)&devPtrA, M*N*sizeof(*a)); 47 | result = cudaMalloc((void**)&devPtrA, M*N*sizeof(*a)); 48 | // CHECK: if (result != hipSuccess) { 49 | if (result != cudaSuccess) { 50 | printf("device memory allocation failed"); 51 | return EXIT_FAILURE; 52 | } 53 | // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); 54 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 55 | // CHECK: if (stat != rocblas_status_success) { 56 | if (stat != CUBLAS_STATUS_SUCCESS) { 57 | printf("data download failed"); 58 | // CHECK: hipFree(devPtrA); 59 | cudaFree(devPtrA); 60 | return EXIT_FAILURE; 61 | } 62 | modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); 63 | // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); 64 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 65 | // CHECK: if (stat != rocblas_status_success) { 66 | if (stat != CUBLAS_STATUS_SUCCESS) { 67 | printf("data upload failed"); 68 | // CHECK: hipFree(devPtrA); 69 | cudaFree(devPtrA); 70 | return EXIT_FAILURE; 71 | } 72 | // CHECK: hipFree(devPtrA); 73 | cudaFree(devPtrA); 74 | for (j = 0; j < N; j++) { 75 | for (i = 0; i < M; i++) { 76 | printf("%7.0f", a[IDX2C(i, j, M)]); 77 | } 78 | printf("\n"); 79 | } 80 | free(a); 81 | return EXIT_SUCCESS; 82 | } 83 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args 2 | 3 | #include 4 | #include 5 | #include 6 | // CHECK: #include 7 | #include 8 | // CHECK: #include "rocblas.h" 9 | #include "cublas_v2.h" 10 | #define M 6 11 | #define N 5 12 | #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) 13 | // CHECK: static __inline__ void modify(rocblas_handle handle, float *m, int ldm, int 14 | static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int 15 | n, int p, int q, float alpha, float beta) { 16 | // CHECK: rocblas_sscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); 17 | // CHECK: rocblas_sscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); 18 | cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); 19 | cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); 20 | } 21 | int main(void) { 22 | // CHECK: hipError_t cudaStat; 23 | // CHECK: rocblas_status stat; 24 | // CHECK: rocblas_handle handle; 25 | cudaError_t cudaStat; 26 | cublasStatus_t stat; 27 | cublasHandle_t handle; 28 | int i, j; 29 | float* devPtrA; 30 | float* a = 0; 31 | a = (float *)malloc(M * N * sizeof(*a)); 32 | if (!a) { 33 | printf("host memory allocation failed"); 34 | return EXIT_FAILURE; 35 | } 36 | for (j = 1; j <= N; j++) { 37 | for (i = 1; i <= M; i++) { 38 | a[IDX2F(i, j, M)] = (float)((i - 1) * M + j); 39 | } 40 | } 41 | // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a)); 42 | cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a)); 43 | // CHECK: if (cudaStat != hipSuccess) { 44 | if (cudaStat != cudaSuccess) { 45 | printf("device memory allocation failed"); 46 | return EXIT_FAILURE; 47 | } 48 | // CHECK: stat = rocblas_create_handle(&handle); 49 | stat = cublasCreate(&handle); 50 | // CHECK: if (stat != rocblas_status_success) { 51 | if (stat != CUBLAS_STATUS_SUCCESS) { 52 | printf("CUBLAS initialization failed\n"); 53 | return EXIT_FAILURE; 54 | } 55 | // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); 56 | stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); 57 | // CHECK: if (stat != rocblas_status_success) { 58 | if (stat != CUBLAS_STATUS_SUCCESS) { 59 | printf("data download failed"); 60 | // CHECK: hipFree(devPtrA); 61 | // CHECK: rocblas_destroy_handle(handle); 62 | cudaFree(devPtrA); 63 | cublasDestroy(handle); 64 | return EXIT_FAILURE; 65 | } 66 | modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); 67 | // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); 68 | stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); 69 | // CHECK: if (stat != rocblas_status_success) { 70 | if (stat != CUBLAS_STATUS_SUCCESS) { 71 | printf("data upload failed"); 72 | // CHECK: hipFree(devPtrA); 73 | // CHECK: rocblas_destroy_handle(handle); 74 | cudaFree(devPtrA); 75 | cublasDestroy(handle); 76 | return EXIT_FAILURE; 77 | } 78 | // CHECK: hipFree(devPtrA); 79 | // CHECK: rocblas_destroy_handle(handle); 80 | cudaFree(devPtrA); 81 | cublasDestroy(handle); 82 | for (j = 1; j <= N; j++) { 83 | for (i = 1; i <= M; i++) { 84 | printf("%7.0f", a[IDX2F(i, j, M)]); 85 | } 86 | printf("\n"); 87 | } 88 | free(a); 89 | return EXIT_SUCCESS; 90 | } 91 | -------------------------------------------------------------------------------- /tests/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 -roc %clang_args 2 | 3 | #include 4 | #include 5 | // CHECK: #include 6 | #include 7 | // CHECK: #include "rocblas.h" 8 | #include "cublas_v2.h" 9 | #define IDX2C(i,j,ld) (((j)*(ld))+(i)) 10 | #define m 6 11 | #define n 4 12 | #define k 5 13 | int main(void) { 14 | // CHECK: hipError_t cudaStat; 15 | // CHECK: rocblas_status stat; 16 | // CHECK: rocblas_handle handle; 17 | cudaError_t cudaStat; 18 | cublasStatus_t stat; 19 | cublasHandle_t handle; 20 | int i, j; 21 | float * a; 22 | float * b; 23 | float * c; 24 | a = (float *)malloc(m*k * sizeof(float)); 25 | b = (float *)malloc(k*n * sizeof(float)); 26 | c = (float *)malloc(m*n * sizeof(float)); 27 | int ind = 11; 28 | for (j = 0; j 4 | // CHECK: #include "hip/hip_complex.h" 5 | #include "cuComplex.h" 6 | 7 | #define TYPEFLOAT 8 | #define DIMX 100 9 | #define DIMY 40 10 | #define moveX 2 11 | #define moveY 1 12 | 13 | #define MAXITERATIONS 10 14 | 15 | #ifdef TYPEFLOAT 16 | #define TYPE float 17 | // CHECK: #define cTYPE hipFloatComplex 18 | #define cTYPE cuFloatComplex 19 | // CHECK: #define cMakecuComplex(re,i) make_hipFloatComplex(re,i) 20 | #define cMakecuComplex(re,i) make_cuFloatComplex(re,i) 21 | #endif 22 | #ifdef TYPEDOUBLE 23 | // CHECK: #define TYPE hipDoubleComplex 24 | #define TYPE cuDoubleComplex 25 | // CHECK: #define cMakecuComplex(re,i) make_hipDoubleComplex(re,i) 26 | #define cMakecuComplex(re,i) make_cuDoubleComplex(re,i) 27 | #endif 28 | 29 | __device__ cTYPE juliaFunctor(cTYPE p, cTYPE c) { 30 | // CHECK: return hipCaddf(hipCmulf(p, p), c); 31 | return cuCaddf(cuCmulf(p, p), c); 32 | } 33 | 34 | __device__ cTYPE convertToComplex(int x, int y, float zoom) { 35 | TYPE jx = 1.5 * (x - DIMX / 2) / (0.5 * zoom * DIMX) + moveX; 36 | TYPE jy = (y - DIMY / 2) / (0.5 * zoom * DIMY) + moveY; 37 | return cMakecuComplex(jx, jy); 38 | } 39 | 40 | __device__ int evolveComplexPoint(cTYPE p, cTYPE c) { 41 | int it = 1; 42 | // CHECK: while (it <= MAXITERATIONS && hipCabsf(p) <= 4) { 43 | while (it <= MAXITERATIONS && cuCabsf(p) <= 4) { 44 | p = juliaFunctor(p, c); 45 | it++; 46 | } 47 | return it; 48 | } 49 | 50 | __global__ void computeJulia(int* data, cTYPE c, float zoom) { 51 | int i = blockIdx.x * blockDim.x + threadIdx.x; 52 | int j = blockIdx.y * blockDim.y + threadIdx.y; 53 | 54 | if (i 4 | #include 5 | // CHECK: #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define DATASIZE 8 11 | #define BATCH 2 12 | 13 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 14 | // CHECK: inline void gpuAssert(hipError_t code, const char *file, int line, bool abort = true) 15 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) 16 | { 17 | // CHECK: if (code != hipSuccess) 18 | if (code != cudaSuccess) 19 | { 20 | // CHECK: fprintf(stderr, "GPUassert: %s %s %dn", hipGetErrorString(code), file, line); 21 | fprintf(stderr, "GPUassert: %s %s %dn", cudaGetErrorString(code), file, line); 22 | if (abort) exit(code); 23 | } 24 | } 25 | 26 | int main() 27 | { 28 | // --- Host side input data allocation and initialization 29 | // CHECK: hipfftReal *hostInputData = (hipfftReal*)malloc(DATASIZE*BATCH * sizeof(hipfftReal)); 30 | cufftReal *hostInputData = (cufftReal*)malloc(DATASIZE*BATCH * sizeof(cufftReal)); 31 | for (int i = 0; iHost copy of the results 65 | // CHECK: gpuErrchk(hipMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(hipfftComplex), hipMemcpyDeviceToHost)); 66 | gpuErrchk(cudaMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(cufftComplex), cudaMemcpyDeviceToHost)); 67 | 68 | for (int i = 0; i 3 | #include 4 | 5 | __global__ void test_0() { 6 | int a = 10; 7 | } 8 | 9 | namespace first { 10 | __global__ void test_1() { 11 | int b = 20; 12 | } 13 | namespace second { 14 | __global__ void test_2() { 15 | int c = 30; 16 | } 17 | } 18 | } 19 | 20 | int main() { 21 | // CHECK: hipLaunchKernelGGL(::test_0, dim3(1), dim3(1), 0, 0); 22 | ::test_0<<<1, 1>>>(); 23 | // CHECK: hipLaunchKernelGGL(first::test_1, dim3(1), dim3(1), 0, 0); 24 | first::test_1<<<1, 1>>>(); 25 | // CHECK: hipLaunchKernelGGL(first::second::test_2, dim3(1), dim3(1), 0, 0); 26 | first::second::test_2<<<1, 1>>>(); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /tests/unit_tests/options/kernel-execution-syntax/both-kernel-execution-syntax.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --cuda-kernel-execution-syntax --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | 5 | __global__ 6 | void add(int n, float *x, float *y) 7 | { 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | int stride = blockDim.x * gridDim.x; 10 | for (int i = index; i < n; i += stride) 11 | y[i] = x[i] + y[i]; 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int numElements = 10; 17 | bool testResult = true; 18 | float *A, *B; 19 | // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); 20 | cudaMallocManaged(&A, numElements * sizeof(float)); 21 | // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); 22 | cudaMallocManaged(&B, numElements * sizeof(float)); 23 | for (int i = 0; i < numElements; i++) { 24 | A[i] = 1.0f; 25 | B[i] = 2.0f; 26 | } 27 | int blockSize = 256; 28 | int numBlocks = (numElements + blockSize - 1) / blockSize; 29 | dim3 dimGrid(numBlocks, 1, 1); 30 | dim3 dimBlock(blockSize, 1, 1); 31 | // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); 32 | add<<>>(numElements, A, B); 33 | // CHECK: hipDeviceSynchronize(); 34 | cudaDeviceSynchronize(); 35 | float maxError = 0.0f; 36 | for (int i = 0; i < numElements; i++) 37 | maxError = fmax(maxError, fabs(B[i]-3.0f)); 38 | // CHECK: hipFree(A); 39 | cudaFree(A); 40 | // CHECK: hipFree(B); 41 | cudaFree(B); 42 | if(maxError == 0.0f) 43 | return 0; 44 | return -1; 45 | } 46 | -------------------------------------------------------------------------------- /tests/unit_tests/options/kernel-execution-syntax/cuda-kernel-execution-syntax.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --cuda-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | 5 | __global__ 6 | void add(int n, float *x, float *y) 7 | { 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | int stride = blockDim.x * gridDim.x; 10 | for (int i = index; i < n; i += stride) 11 | y[i] = x[i] + y[i]; 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int numElements = 10; 17 | bool testResult = true; 18 | float *A, *B; 19 | // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); 20 | cudaMallocManaged(&A, numElements * sizeof(float)); 21 | // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); 22 | cudaMallocManaged(&B, numElements * sizeof(float)); 23 | for (int i = 0; i < numElements; i++) { 24 | A[i] = 1.0f; 25 | B[i] = 2.0f; 26 | } 27 | int blockSize = 256; 28 | int numBlocks = (numElements + blockSize - 1) / blockSize; 29 | dim3 dimGrid(numBlocks, 1, 1); 30 | dim3 dimBlock(blockSize, 1, 1); 31 | // CHECK: add<<>>(numElements, A, B); 32 | add<<>>(numElements, A, B); 33 | // CHECK: hipDeviceSynchronize(); 34 | cudaDeviceSynchronize(); 35 | float maxError = 0.0f; 36 | for (int i = 0; i < numElements; i++) 37 | maxError = fmax(maxError, fabs(B[i]-3.0f)); 38 | // CHECK: hipFree(A); 39 | cudaFree(A); 40 | // CHECK: hipFree(B); 41 | cudaFree(B); 42 | if(maxError == 0.0f) 43 | return 0; 44 | return -1; 45 | } 46 | -------------------------------------------------------------------------------- /tests/unit_tests/options/kernel-execution-syntax/hip-kernel-execution-syntax.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | 5 | __global__ 6 | void add(int n, float *x, float *y) 7 | { 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | int stride = blockDim.x * gridDim.x; 10 | for (int i = index; i < n; i += stride) 11 | y[i] = x[i] + y[i]; 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int numElements = 10; 17 | bool testResult = true; 18 | float *A, *B; 19 | // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); 20 | cudaMallocManaged(&A, numElements * sizeof(float)); 21 | // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); 22 | cudaMallocManaged(&B, numElements * sizeof(float)); 23 | for (int i = 0; i < numElements; i++) { 24 | A[i] = 1.0f; 25 | B[i] = 2.0f; 26 | } 27 | int blockSize = 256; 28 | int numBlocks = (numElements + blockSize - 1) / blockSize; 29 | dim3 dimGrid(numBlocks, 1, 1); 30 | dim3 dimBlock(blockSize, 1, 1); 31 | // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); 32 | add<<>>(numElements, A, B); 33 | // CHECK: hipDeviceSynchronize(); 34 | cudaDeviceSynchronize(); 35 | float maxError = 0.0f; 36 | for (int i = 0; i < numElements; i++) 37 | maxError = fmax(maxError, fabs(B[i]-3.0f)); 38 | // CHECK: hipFree(A); 39 | cudaFree(A); 40 | // CHECK: hipFree(B); 41 | cudaFree(B); 42 | if(maxError == 0.0f) 43 | return 0; 44 | return -1; 45 | } 46 | -------------------------------------------------------------------------------- /tests/unit_tests/options/kernel-execution-syntax/none-kernel-execution-syntax.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | // CHECK: #include 3 | #include 4 | 5 | __global__ 6 | void add(int n, float *x, float *y) 7 | { 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | int stride = blockDim.x * gridDim.x; 10 | for (int i = index; i < n; i += stride) 11 | y[i] = x[i] + y[i]; 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int numElements = 10; 17 | bool testResult = true; 18 | float *A, *B; 19 | // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); 20 | cudaMallocManaged(&A, numElements * sizeof(float)); 21 | // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); 22 | cudaMallocManaged(&B, numElements * sizeof(float)); 23 | for (int i = 0; i < numElements; i++) { 24 | A[i] = 1.0f; 25 | B[i] = 2.0f; 26 | } 27 | int blockSize = 256; 28 | int numBlocks = (numElements + blockSize - 1) / blockSize; 29 | dim3 dimGrid(numBlocks, 1, 1); 30 | dim3 dimBlock(blockSize, 1, 1); 31 | // CHECK: add<<>>(numElements, A, B); 32 | add<<>>(numElements, A, B); 33 | // CHECK: hipDeviceSynchronize(); 34 | cudaDeviceSynchronize(); 35 | float maxError = 0.0f; 36 | for (int i = 0; i < numElements; i++) 37 | maxError = fmax(maxError, fabs(B[i]-3.0f)); 38 | // CHECK: hipFree(A); 39 | cudaFree(A); 40 | // CHECK: hipFree(B); 41 | cudaFree(B); 42 | if(maxError == 0.0f) 43 | return 0; 44 | return -1; 45 | } 46 | -------------------------------------------------------------------------------- /tests/unit_tests/pp/pp_if_else_conditionals.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | 4 | #include 5 | 6 | __global__ void axpy_kernel(float a, float* x, float* y) { 7 | y[threadIdx.x] = a * x[threadIdx.x]; 8 | } 9 | 10 | void axpy(float a, float* x, float* y) { 11 | 12 | #ifdef SOME_MACRO 13 | // CHECK: axpy_kernel <<<1, 1>>> (a, y, x); 14 | axpy_kernel <<<1, 1>>> (a, y, x); 15 | #endif 16 | 17 | #ifndef SOME_MACRO 18 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x); 19 | axpy_kernel <<<1, 2>>> (a, y, x); 20 | #endif 21 | 22 | #ifdef SOME_MACRO 23 | // CHECK: axpy_kernel <<<1, 3>>> (a, y, x); 24 | axpy_kernel <<<1, 3>>> (a, y, x); 25 | #else 26 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); 27 | axpy_kernel <<<1, 4>>> (a, x, y); 28 | #endif 29 | 30 | } -------------------------------------------------------------------------------- /tests/unit_tests/pp/pp_if_else_conditionals_01.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | 4 | __global__ void axpy_kernel(float a, float* x, float* y) { 5 | y[threadIdx.x] = a * x[threadIdx.x]; 6 | } 7 | 8 | void axpy(float a, float* x, float* y) { 9 | float* y_new = nullptr; 10 | #ifdef SOME_MACRO 11 | y_new = x; 12 | // CHECK: axpy_kernel <<<1, 1>>> (a, y_new, x); 13 | axpy_kernel <<<1, 1>>> (a, y_new, x); 14 | #endif 15 | 16 | #ifndef SOME_MACRO 17 | y_new = y; 18 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x); 19 | axpy_kernel <<<1, 2>>> (a, y_new, x); 20 | #endif 21 | 22 | #ifdef SOME_MACRO 23 | // CHECK: axpy_kernel <<<1, 3>>> (a, y, x); 24 | axpy_kernel <<<1, 3>>> (a, y, x); 25 | #else 26 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); 27 | axpy_kernel <<<1, 4>>> (a, x, y); 28 | #endif 29 | 30 | #ifdef SOME_MACRO 31 | // CHECK: axpy_kernel <<<1, 5>>> (a, y, x); 32 | axpy_kernel <<<1, 5>>> (a, y, x); 33 | #elif defined SOME_MACRO_1 34 | // CHECK: axpy_kernel <<<1, 6>>> (a, x, y); 35 | axpy_kernel <<<1, 6>>> (a, x, y); 36 | #else 37 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y); 38 | axpy_kernel <<<1, 7>>> (a, x, y); 39 | #endif 40 | 41 | #ifndef SOME_MACRO 42 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x); 43 | axpy_kernel <<<1, 8>>> (a, y, x); 44 | #elif !defined(SOME_MACRO_1) 45 | // CHECK: axpy_kernel <<<1, 9>>> (a, x, y); 46 | axpy_kernel <<<1, 9>>> (a, x, y); 47 | #else 48 | // CHECK: axpy_kernel <<<1, 10>>> (a, x, y); 49 | axpy_kernel <<<1, 10>>> (a, x, y); 50 | #endif 51 | 52 | } -------------------------------------------------------------------------------- /tests/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | 4 | __global__ void axpy_kernel(float a, float* x, float* y) { 5 | y[threadIdx.x] = a * x[threadIdx.x]; 6 | } 7 | 8 | void axpy(float a, float* x, float* y) { 9 | float* y_new = nullptr; 10 | #ifdef SOME_MACRO 11 | y_new = x; 12 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y_new, x); 13 | axpy_kernel <<<1, 1>>> (a, y_new, x); 14 | #endif 15 | 16 | #ifndef SOME_MACRO 17 | y_new = y; 18 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x); 19 | axpy_kernel <<<1, 2>>> (a, y_new, x); 20 | #endif 21 | 22 | #ifdef SOME_MACRO 23 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x); 24 | axpy_kernel <<<1, 3>>> (a, y, x); 25 | #else 26 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); 27 | axpy_kernel <<<1, 4>>> (a, x, y); 28 | #endif 29 | 30 | #ifdef SOME_MACRO 31 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(5), 0, 0, a, y, x); 32 | axpy_kernel <<<1, 5>>> (a, y, x); 33 | #elif defined SOME_MACRO_1 34 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(6), 0, 0, a, x, y); 35 | axpy_kernel <<<1, 6>>> (a, x, y); 36 | #else 37 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y); 38 | axpy_kernel <<<1, 7>>> (a, x, y); 39 | #endif 40 | 41 | #ifndef SOME_MACRO 42 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x); 43 | axpy_kernel <<<1, 8>>> (a, y, x); 44 | #elif !defined(SOME_MACRO_1) 45 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(9), 0, 0, a, x, y); 46 | axpy_kernel <<<1, 9>>> (a, x, y); 47 | #else 48 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(10), 0, 0, a, x, y); 49 | axpy_kernel <<<1, 10>>> (a, x, y); 50 | #endif 51 | 52 | } -------------------------------------------------------------------------------- /tests/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | 4 | #include 5 | 6 | __global__ void axpy_kernel(float a, float* x, float* y) { 7 | y[threadIdx.x] = a * x[threadIdx.x]; 8 | } 9 | 10 | void axpy(float a, float* x, float* y) { 11 | 12 | #ifdef SOME_MACRO 13 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y, x); 14 | axpy_kernel <<<1, 1>>> (a, y, x); 15 | #endif 16 | 17 | #ifndef SOME_MACRO 18 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x); 19 | axpy_kernel <<<1, 2>>> (a, y, x); 20 | #endif 21 | 22 | #ifdef SOME_MACRO 23 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x); 24 | axpy_kernel <<<1, 3>>> (a, y, x); 25 | #else 26 | // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); 27 | axpy_kernel <<<1, 4>>> (a, x, y); 28 | #endif 29 | 30 | } -------------------------------------------------------------------------------- /tests/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | /* 3 | Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | 24 | // CHECK: #include 25 | #include 26 | // CHECK-NOT: #include 27 | #include 28 | 29 | // CHECK: extern texture tex; 30 | extern texture tex; 31 | 32 | extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { 33 | int x = blockDim.x * blockIdx.x + threadIdx.x; 34 | int y = blockDim.y * blockIdx.y + threadIdx.y; 35 | outputData[y * width + x] = tex2D(tex, x, y); 36 | } 37 | 38 | int main(int argc, char** argv) { 39 | float out = 0.f; 40 | // CHECK: tex2dKernel<<<512, 512>>>(&out, 128, 128); 41 | tex2dKernel<<<512, 512>>>(&out, 128, 128); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/MallocManaged.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | 5 | __global__ 6 | void add(int n, float *x, float *y) 7 | { 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | int stride = blockDim.x * gridDim.x; 10 | for (int i = index; i < n; i += stride) 11 | y[i] = x[i] + y[i]; 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int numElements = 10; 17 | bool testResult = true; 18 | float *A, *B; 19 | // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); 20 | cudaMallocManaged(&A, numElements * sizeof(float)); 21 | // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); 22 | cudaMallocManaged(&B, numElements * sizeof(float)); 23 | for (int i = 0; i < numElements; i++) { 24 | A[i] = 1.0f; 25 | B[i] = 2.0f; 26 | } 27 | int blockSize = 256; 28 | int numBlocks = (numElements + blockSize - 1) / blockSize; 29 | dim3 dimGrid(numBlocks, 1, 1); 30 | dim3 dimBlock(blockSize, 1, 1); 31 | // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); 32 | add<<>>(numElements, A, B); 33 | // CHECK: hipDeviceSynchronize(); 34 | cudaDeviceSynchronize(); 35 | float maxError = 0.0f; 36 | for (int i = 0; i < numElements; i++) 37 | maxError = fmax(maxError, fabs(B[i]-3.0f)); 38 | // CHECK: hipFree(A); 39 | cudaFree(A); 40 | // CHECK: hipFree(B); 41 | cudaFree(B); 42 | if(maxError == 0.0f) 43 | return 0; 44 | return -1; 45 | } 46 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/allocators.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | 3 | #pragma once 4 | // CHECK: #include 5 | #include 6 | #include 7 | 8 | /** 9 | * Allocate GPU memory for `count` elements of type `T`. 10 | */ 11 | template 12 | static T* gpuMalloc(size_t count) { 13 | T* ret = nullptr; 14 | // CHECK: hipMalloc(&ret, count * sizeof(T)); 15 | cudaMalloc(&ret, count * sizeof(T)); 16 | return ret; 17 | } 18 | 19 | template 20 | __global__ void add(int n, T* x, T* y) { 21 | int index = blockIdx.x * blockDim.x + threadIdx.x; 22 | int stride = blockDim.x * gridDim.x; 23 | for (int i = index; i < n; i += stride) 24 | y[i] = x[i] + y[i]; 25 | } 26 | 27 | int main(int argc, char* argv[]) { 28 | size_t numElements = 50; 29 | float *A = gpuMalloc(numElements); 30 | float* B = gpuMalloc(numElements); 31 | for (int i = 0; i < numElements; ++i) { 32 | A[i] = 1.0f; 33 | B[i] = 2.0f; 34 | } 35 | int blockSize = 512; 36 | int numBlocks = (numElements + blockSize - 1) / blockSize; 37 | dim3 dimGrid(numBlocks, 1, 1); 38 | dim3 dimBlock(blockSize, 1, 1); 39 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); 40 | add<<>>(numElements, A, B); 41 | // CHECK: hipDeviceSynchronize(); 42 | cudaDeviceSynchronize(); 43 | float maxError = 0.0f; 44 | for (int i = 0; i < numElements; ++i) 45 | maxError = fmax(maxError, fabs(B[i] - 3.0f)); 46 | // CHECK: hipFree(A); 47 | cudaFree(A); 48 | // CHECK: hipFree(B); 49 | cudaFree(B); 50 | if (maxError == 0.0f) 51 | return 0; 52 | return -1; 53 | } 54 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/axpy.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | 3 | #include 4 | 5 | // CHECK: #include 6 | #include 7 | 8 | #define TOKEN_PASTE(X, Y) X ## Y 9 | #define ARG_LIST_AS_MACRO a, device_x, device_y 10 | #define KERNEL_CALL_AS_MACRO axpy<<<1, kDataLen>>> 11 | #define KERNEL_NAME_MACRO axpy 12 | 13 | // CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y) 14 | #define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y) 15 | 16 | 17 | template 18 | __global__ void axpy(T a, T *x, T *y) { 19 | y[threadIdx.x] = a * x[threadIdx.x]; 20 | } 21 | 22 | __global__ void empty() { 23 | } 24 | 25 | int main(int argc, char* argv[]) { 26 | const int kDataLen = 4; 27 | 28 | float a = 2.0f; 29 | float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; 30 | float host_y[kDataLen]; 31 | 32 | // Copy input data to device. 33 | float* device_x; 34 | float* device_y; 35 | 36 | // CHECK: hipMalloc(&device_x, kDataLen * sizeof(float)); 37 | cudaMalloc(&device_x, kDataLen * sizeof(float)); 38 | 39 | #ifdef HERRING 40 | // CHECK: hipMalloc(&device_y, kDataLen * sizeof(float)); 41 | cudaMalloc(&device_y, kDataLen * sizeof(float)); 42 | #else 43 | // CHECK: hipMalloc(&device_y, kDataLen * sizeof(double)); 44 | cudaMalloc(&device_y, kDataLen * sizeof(double)); 45 | #endif 46 | 47 | // CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice); 48 | cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); 49 | 50 | // Launch the kernel in numerous different strange ways to exercise the prerocessor. 51 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); 52 | axpy<<<1, kDataLen>>>(a, device_x, device_y); 53 | 54 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); 55 | axpy<<<1, kDataLen>>>(a, device_x, device_y); 56 | 57 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y); 58 | axpy<<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y); 59 | 60 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); 61 | axpy<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); 62 | 63 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); 64 | KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); 65 | 66 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); 67 | KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO); 68 | 69 | // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); 70 | empty<<<1, kDataLen>>> ( ); 71 | 72 | // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); 73 | empty<<<1, kDataLen, 0>>>(); 74 | 75 | // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); 76 | empty<<<1, kDataLen, 0, 0>>>(); 77 | 78 | // CHECK: COMPLETE_LAUNCH; 79 | COMPLETE_LAUNCH; 80 | 81 | 82 | // Copy output data to host. 83 | // CHECK: hipDeviceSynchronize(); 84 | cudaDeviceSynchronize(); 85 | 86 | // CHECK: hipMemcpy(host_y, device_y, kDataLen * sizeof(float), hipMemcpyDeviceToHost); 87 | cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); 88 | 89 | // Print the results. 90 | for (int i = 0; i < kDataLen; ++i) { 91 | std::cout << "y[" << i << "] = " << host_y[i] << "\n"; 92 | } 93 | 94 | // CHECK: hipDeviceReset(); 95 | cudaDeviceReset(); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/cudaRegister.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | 3 | /* 4 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #define LEN 1024 29 | #define SIZE LEN * sizeof(float) 30 | #define ITER 1024*1024 31 | 32 | // CHECK: if(status != hipSuccess) { 33 | #define check(msg, status){ \ 34 | if(status != cudaSuccess) { \ 35 | printf("%s failed. \n", #msg); \ 36 | } \ 37 | } 38 | 39 | __global__ void Inc1(float *Ad, float *Bd){ 40 | int tx = threadIdx.x + blockIdx.x * blockDim.x; 41 | if(tx < 1 ){ 42 | for(int i=0;i>>(Ad, Bd); 87 | A[0] = -(ITER*1.0f); 88 | std::cout<<"Same cache line before completion: \t"<< A[0]<>>(Ad, Bd); 100 | A[0] = -(ITER*1.0f); 101 | std::cout<<"Diff cache line before completion: \t"< 6 | // CHECK: #include 7 | #include 8 | 9 | __global__ void dynamicReverse(int *d, int n) 10 | { 11 | // Dynamic shared memory 12 | // CHECK: __shared__ int s[]; 13 | extern __shared__ int s[]; 14 | int t = threadIdx.x; 15 | int tr = n-t-1; 16 | s[t] = d[t]; 17 | __syncthreads(); 18 | d[t] = s[tr]; 19 | } 20 | 21 | int main(void) 22 | { 23 | const int n = 64; 24 | int a[n], r[n], d[n]; 25 | 26 | for (int i = 0; i < n; i++) { 27 | a[i] = i; 28 | r[i] = n-i-1; 29 | d[i] = 0; 30 | } 31 | 32 | int *d_d; 33 | // CHECK: hipMalloc(&d_d, n * sizeof(int)); 34 | cudaMalloc(&d_d, n * sizeof(int)); 35 | // run version with dynamic shared memory 36 | // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice); 37 | cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice); 38 | // CHECK: hipLaunchKernelGGL(dynamicReverse, dim3(1), dim3(n), n*sizeof(int), 0, d_d, n); 39 | dynamicReverse<<<1,n,n*sizeof(int)>>>(d_d, n); 40 | // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost); 41 | cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost); 42 | for (int i = 0; i < n; i++) 43 | if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]); 44 | } 45 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/half2_allocators.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | 3 | #pragma once 4 | // CHECK: #include 5 | #include 6 | #include 7 | // CHECK: #include "hip/hip_fp16.h" 8 | #include "cuda_fp16.h" 9 | /** 10 | * Allocate GPU memory for `count` elements of type `T`. 11 | */ 12 | template 13 | static T *gpuMalloc(size_t count) { 14 | T *ret = nullptr; 15 | // CHECK: hipMalloc(&ret, count * sizeof(T)); 16 | cudaMalloc(&ret, count * sizeof(T)); 17 | return ret; 18 | } 19 | 20 | template 21 | __global__ void add(int n, T *x, T *y) { 22 | int index = blockIdx.x * blockDim.x + threadIdx.x; 23 | int stride = blockDim.x * gridDim.x; 24 | // CHECK: half2 tmp = __float2half2_rn(0.0f); 25 | half2 tmp = __float2half2_rn(0.0f); 26 | // CHECK: float max_val = fmax((float)reinterpret_cast(tmp.x), (float)reinterpret_cast(tmp.y)); 27 | float max_val = fmax((float)tmp.x, (float)tmp.y); 28 | // CHECK: float min_val = fmin((float)reinterpret_cast(tmp.x), (float)reinterpret_cast(tmp.y)); 29 | float min_val = fmin((float)reinterpret_cast(tmp.x), (float)reinterpret_cast(tmp.y)); 30 | for (int i = index; i < n; i += stride) 31 | y[i] = max_val - min_val + x[i] + y[i]; 32 | } 33 | 34 | int main(int argc, char* argv[]) { 35 | size_t numElements = 50; 36 | float *A = gpuMalloc(numElements); 37 | float* B = gpuMalloc(numElements); 38 | for (int i = 0; i < numElements; ++i) { 39 | A[i] = 1.0f; 40 | B[i] = 2.0f; 41 | } 42 | int blockSize = 512; 43 | int numBlocks = (numElements + blockSize - 1) / blockSize; 44 | dim3 dimGrid(numBlocks, 1, 1); 45 | dim3 dimBlock(blockSize, 1, 1); 46 | // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); 47 | add<<>>(numElements, A, B); 48 | // CHECK: hipDeviceSynchronize(); 49 | cudaDeviceSynchronize(); 50 | float maxError = 0.0f; 51 | for (int i = 0; i < numElements; ++i) 52 | maxError = fmax(maxError, fabs(B[i] - 3.0f)); 53 | // CHECK: hipFree(A); 54 | cudaFree(A); 55 | // CHECK: hipFree(B); 56 | cudaFree(B); 57 | if (maxError == 0.0f) 58 | return 0; 59 | return -1; 60 | } 61 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/macro_check.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | // CHECK: #include 3 | #include 4 | 5 | #define a0 -3.0124472f 6 | #define a1 1.7383092f 7 | #define a2 -0.2796695f 8 | #define a3 0.0547837f 9 | #define a4 -0.0073118f 10 | 11 | #define CHECK(call) \ 12 | { \ 13 | const cudaError_t error = call; \ 14 | if (error != cudaSuccess) \ 15 | { \ 16 | fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ 17 | fprintf(stderr, "code: %d, reason: %s\n", error, \ 18 | cudaGetErrorString(error)); \ 19 | } \ 20 | } 21 | 22 | __device__ __constant__ float coef[5]; 23 | 24 | int main() { 25 | const float h_coef[] = {a0, a1, a2, a3, a4}; 26 | // CHECK: CHECK( hipMemcpyToSymbol( HIP_SYMBOL(coef), h_coef, 5 * sizeof(float) )); 27 | CHECK( cudaMemcpyToSymbol( coef, h_coef, 5 * sizeof(float) )); 28 | } -------------------------------------------------------------------------------- /tests/unit_tests/samples/static_shared_memory.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | 3 | // Taken from Jonathan Hui blog https://jhui.github.io/2017/03/06/CUDA 4 | 5 | #include 6 | // CHECK: #include 7 | #include 8 | 9 | __global__ void staticReverse(int *d, int n) 10 | { 11 | // CHECK: __shared__ int s[64]; 12 | __shared__ int s[64]; 13 | int t = threadIdx.x; 14 | int tr = n-t-1; 15 | s[t] = d[t]; 16 | // Will not conttinue until all threads completed. 17 | __syncthreads(); 18 | d[t] = s[tr]; 19 | } 20 | 21 | int main(void) 22 | { 23 | const int n = 64; 24 | int a[n], r[n], d[n]; 25 | 26 | for (int i = 0; i < n; i++) { 27 | a[i] = i; 28 | r[i] = n-i-1; 29 | d[i] = 0; 30 | } 31 | 32 | int *d_d; 33 | // CHECK: hipMalloc(&d_d, n * sizeof(int)); 34 | cudaMalloc(&d_d, n * sizeof(int)); 35 | // run version with static shared memory 36 | // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice); 37 | cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice); 38 | // CHECK: hipLaunchKernelGGL(staticReverse, dim3(1), dim3(n), 0, 0, d_d, n); 39 | staticReverse<<<1,n>>>(d_d, n); 40 | // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost); 41 | cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost); 42 | for (int i = 0; i < n; i++) 43 | if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]); 44 | } 45 | -------------------------------------------------------------------------------- /tests/unit_tests/samples/vec_add.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 1 --hip-kernel-execution-syntax %clang_args 2 | 3 | // Kernel definition 4 | __global__ void vecAdd(float* A, float* B, float* C) 5 | { 6 | int i = threadIdx.x; 7 | A[i] = 0; 8 | B[i] = i; 9 | C[i] = A[i] + B[i]; 10 | } 11 | // CHECK: #include 12 | #include 13 | #define SIZE 10 14 | #define KERNELINVOKES 5000000 15 | int vecadd(int gpudevice, int rank) 16 | { 17 | int devcheck(int, int); 18 | devcheck(gpudevice, rank); 19 | float A[SIZE], B[SIZE], C[SIZE]; 20 | // Kernel invocation 21 | float *devPtrA; 22 | float *devPtrB; 23 | float *devPtrC; 24 | int memsize = SIZE * sizeof(float); 25 | // CHECK: hipMalloc((void**)&devPtrA, memsize); 26 | // CHECK: hipMalloc((void**)&devPtrB, memsize); 27 | // CHECK: hipMalloc((void**)&devPtrC, memsize); 28 | cudaMalloc((void**)&devPtrA, memsize); 29 | cudaMalloc((void**)&devPtrB, memsize); 30 | cudaMalloc((void**)&devPtrC, memsize); 31 | // CHECK: hipMemcpy(devPtrA, A, memsize, hipMemcpyHostToDevice); 32 | // CHECK: hipMemcpy(devPtrB, B, memsize, hipMemcpyHostToDevice); 33 | cudaMemcpy(devPtrA, A, memsize, cudaMemcpyHostToDevice); 34 | cudaMemcpy(devPtrB, B, memsize, cudaMemcpyHostToDevice); 35 | for (int i = 0; i>>(devPtrA, devPtrB, devPtrC); 39 | } 40 | // CHECK: hipMemcpy(C, devPtrC, memsize, hipMemcpyDeviceToHost); 41 | cudaMemcpy(C, devPtrC, memsize, cudaMemcpyDeviceToHost); 42 | // calculate only up to gpudevice to show the unique output 43 | // of each rank's kernel launch 44 | for (int i = 0; i= device_count) 60 | { 61 | printf("gpudevice >= device_count ... exiting\n"); 62 | exit(1); 63 | } 64 | // CHECK: hipError_t cudareturn; 65 | // CHECK: hipDeviceProp_t deviceProp; 66 | // CHECK: hipGetDeviceProperties(&deviceProp, gpudevice); 67 | cudaError_t cudareturn; 68 | cudaDeviceProp deviceProp; 69 | cudaGetDeviceProperties(&deviceProp, gpudevice); 70 | if (deviceProp.warpSize <= 1) 71 | { 72 | printf("rank %d: warning, CUDA Device Emulation (CPU) detected, exiting\n", rank); 73 | exit(1); 74 | } 75 | // CHECK: cudareturn = hipSetDevice(gpudevice); 76 | cudareturn = cudaSetDevice(gpudevice); 77 | // CHECK: if (cudareturn == hipErrorInvalidDevice) 78 | if (cudareturn == cudaErrorInvalidDevice) 79 | { 80 | // CHECK: perror("hipSetDevice returned hipErrorInvalidDevice"); 81 | perror("cudaSetDevice returned cudaErrorInvalidDevice"); 82 | } 83 | else 84 | { 85 | // CHECK: hipGetDevice(&device); 86 | cudaGetDevice(&device); 87 | printf("rank %d: cudaGetDevice()=%d\n", rank, device); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/driver_functions_internal.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | #include 7 | #if defined(_WIN32) 8 | #include "windows.h" 9 | #include 10 | #elif CUDA_VERSION <= 10000 11 | #include 12 | #endif 13 | #include "cudaGL.h" 14 | 15 | int main() { 16 | printf("13. CUDA Driver API Internal Functions synthetic test\n"); 17 | 18 | size_t bytes = 0; 19 | 20 | // CHECK: hipTexRef texref; 21 | // CHECK-NEXT: HIP_ARRAY_DESCRIPTOR ARRAY_DESCRIPTOR; 22 | // CHECK-NEXT: hipDeviceptr_t deviceptr; 23 | CUtexref texref; 24 | CUDA_ARRAY_DESCRIPTOR ARRAY_DESCRIPTOR; 25 | CUdeviceptr deviceptr; 26 | 27 | // CUDA: CUresult CUDAAPI cuTexRefSetAddress2D_v2(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); 28 | // HIP: DEPRECATED(DEPRECATED_MSG) hipError_t hipTexRefSetAddress2D(textureReference* texRef, const HIP_ARRAY_DESCRIPTOR* desc, hipDeviceptr_t dptr, size_t Pitch); 29 | // CHECK: hipError_t result = hipTexRefSetAddress2D(texref, &ARRAY_DESCRIPTOR, deviceptr, bytes); 30 | CUresult result = cuTexRefSetAddress2D_v2(texref, &ARRAY_DESCRIPTOR, deviceptr, bytes); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/driver_typedefs.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | 7 | int main() { 8 | printf("03. CUDA Driver API Typedefs synthetic test\n"); 9 | 10 | // CHECK: hipDevice_t device; 11 | CUdevice device; 12 | 13 | // CHECK: hipDeviceptr_t deviceptr; 14 | // CHECK-NEXT: hipDeviceptr_t deviceptr_v1; 15 | CUdeviceptr deviceptr; 16 | CUdeviceptr_v1 deviceptr_v1; 17 | 18 | // CHECK: hipStreamCallback_t streamCallback; 19 | CUstreamCallback streamCallback; 20 | 21 | // CHECK: hipSurfaceObject_t surfObject; 22 | CUsurfObject surfObject; 23 | 24 | // CHECK: hipTextureObject_t texObject; 25 | CUtexObject texObject; 26 | 27 | // CHECK: hipUUID uuid; 28 | CUuuid uuid; 29 | 30 | #if CUDA_VERSION >= 10000 31 | // CHECK: hipHostFn_t hostFn; 32 | CUhostFn hostFn; 33 | 34 | // CHECK: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS_st; 35 | // CHECK-NEXT: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS; 36 | CUDA_MEMSET_NODE_PARAMS_st MEMSET_NODE_PARAMS_st; 37 | CUDA_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS; 38 | #endif 39 | 40 | #if CUDA_VERSION >= 11000 41 | // CHECK: hipMemGenericAllocationHandle_t memGenericAllocationHandle_t; 42 | CUmemGenericAllocationHandle memGenericAllocationHandle_t; 43 | #endif 44 | 45 | #if CUDA_VERSION >= 11030 46 | // CHECK: hipDevice_t device_v1; 47 | CUdevice_v1 device_v1; 48 | 49 | // CHECK: hipDeviceptr_t deviceptr_v2; 50 | CUdeviceptr_v2 deviceptr_v2; 51 | 52 | // CHECK: hipSurfaceObject_t surfObject_v1; 53 | CUsurfObject_v1 surfObject_v1; 54 | 55 | // CHECK: hipTextureObject_t texObject_v1; 56 | CUtexObject_v1 texObject_v1; 57 | 58 | // CHECK: hipMemGenericAllocationHandle_t memGenericAllocationHandle_v1; 59 | CUmemGenericAllocationHandle_v1 memGenericAllocationHandle_v1; 60 | 61 | // CHECK: HIP_MEMSET_NODE_PARAMS MEMSET_NODE_PARAMS_v1; 62 | CUDA_MEMSET_NODE_PARAMS_v1 MEMSET_NODE_PARAMS_v1; 63 | 64 | // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams_v1; 65 | CUstreamBatchMemOpParams_v1 streamBatchMemOpParams_v1; 66 | #endif 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/driver_unions.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | 7 | int main() { 8 | printf("10. CUDA Driver API Unions synthetic test\n"); 9 | 10 | #if CUDA_VERSION >= 8000 11 | // CHECK: hipStreamBatchMemOpParams streamBatchMemOpParams; 12 | // CHECK-NEXT: hipStreamBatchMemOpParams_union streamBatchMemOpParams_union; 13 | CUstreamBatchMemOpParams streamBatchMemOpParams; 14 | CUstreamBatchMemOpParams_union streamBatchMemOpParams_union; 15 | #endif 16 | 17 | #if CUDA_VERSION >= 11000 18 | // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue; 19 | CUkernelNodeAttrValue kernelNodeAttrValue; 20 | #endif 21 | 22 | #if CUDA_VERSION >= 11000 && CUDA_VERSION < 11080 23 | // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_union; 24 | CUkernelNodeAttrValue_union kernelNodeAttrValue_union; 25 | #endif 26 | 27 | #if CUDA_VERSION >= 11030 28 | // CHECK: hipKernelNodeAttrValue kernelNodeAttrValue_v1; 29 | CUkernelNodeAttrValue_v1 kernelNodeAttrValue_v1; 30 | #endif 31 | 32 | #if CUDA_VERSION >= 11080 33 | // CHECK: hipLaunchAttributeValue LaunchAttributeValue; 34 | cudaLaunchAttributeValue LaunchAttributeValue; 35 | #endif 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/libraries/cublaslt2hipblaslt_10010_10020.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL -ferror-limit=500 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | // CHECK: #include "hipblaslt.h" 7 | #include "cublasLt.h" 8 | // CHECK-NOT: #include "hipblaslt.h" 9 | 10 | int main() { 11 | printf("21.10010.10020. cuBLASLt API to hipBLASLt API synthetic test\n"); 12 | 13 | #if CUDA_VERSION >= 10010 && CUDA_VERSION <= 10020 14 | // CHECK: hipblasLtMatrixLayoutOpaque_t blasLtMatrixLayoutOpaque; 15 | cublasLtMatrixLayoutStruct blasLtMatrixLayoutOpaque; 16 | #endif 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/libraries/cusparse2rocsparse_10010.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental --roc %clang_args -ferror-limit=500 2 | 3 | // CHECK: #include 4 | #include 5 | // CHECK: #include "hip/hip_complex.h" 6 | #include "cuComplex.h" 7 | #include 8 | // CHECK: #include "rocsparse.h" 9 | #include "cusparse.h" 10 | // CHECK-NOT: #include "rocsparse.h" 11 | 12 | int main() { 13 | printf("18.10010. cuSPARSE API to rocSPARSE API synthetic test\n"); 14 | 15 | // CHECK: _rocsparse_handle *handle = nullptr; 16 | // CHECK-NEXT: rocsparse_handle handle_t; 17 | cusparseContext *handle = nullptr; 18 | cusparseHandle_t handle_t; 19 | 20 | // CHECK: rocsparse_status status_t; 21 | cusparseStatus_t status_t; 22 | 23 | // CHECK: _rocsparse_mat_descr *matDescr = nullptr; 24 | // CHECK-NEXT: rocsparse_mat_descr matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D; 25 | cusparseMatDescr *matDescr = nullptr; 26 | cusparseMatDescr_t matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D; 27 | 28 | // CHECK: rocsparse_action action_t; 29 | cusparseAction_t action_t; 30 | 31 | // CHECK: rocsparse_index_base indexBase_t; 32 | cusparseIndexBase_t indexBase_t; 33 | 34 | int m = 0; 35 | int n = 0; 36 | int innz = 0; 37 | int csrRowPtrA = 0; 38 | int csrRowPtrB = 0; 39 | int csrRowPtrC = 0; 40 | int cscRowIndA = 0; 41 | int csrColIndA = 0; 42 | int csrColIndB = 0; 43 | int csrColIndC = 0; 44 | int cscColPtrA = 0; 45 | size_t bufferSize = 0; 46 | void *pcsrVal = nullptr; 47 | void *pcscVal = nullptr; 48 | 49 | #if CUDA_VERSION >= 8000 50 | // TODO: [#899] There should be rocsparse_datatype instead of hipDataType 51 | cudaDataType_t dataType_t; 52 | cudaDataType dataType; 53 | #endif 54 | 55 | #if CUDA_VERSION >= 10010 56 | // TODO: cusparseCsr2CscAlg_t has no analogue in rocSPARSE. The deletion of declaration and usage is needed to be implemented 57 | cusparseCsr2CscAlg_t Csr2CscAlg_t; 58 | 59 | // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize(cusparseHandle_t handle, int m, int n, int nnz, const void* csrVal, const int* csrRowPtr, const int* csrColInd, void* cscVal, int* cscColPtr, int* cscRowInd, cudaDataType valType, cusparseAction_t copyValues, cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t* bufferSize); 60 | // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_csr2csc_buffer_size(rocsparse_handle handle, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, const rocsparse_int* csr_row_ptr, const rocsparse_int* csr_col_ind, rocsparse_action copy_values, size_t* buffer_size); 61 | // CHECK: status_t = rocsparse_csr2csc_buffer_size(handle_t, m, n, innz, &csrRowPtrA, &csrColIndA, action_t, &bufferSize); 62 | status_t = cusparseCsr2cscEx2_bufferSize(handle_t, m, n, innz, pcsrVal, &csrRowPtrA, &csrColIndA, pcscVal, &cscColPtrA, &cscRowIndA, dataType, action_t, indexBase_t, Csr2CscAlg_t, &bufferSize); 63 | #endif 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/libraries/cusparse2rocsparse_10010_12000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 4 --skip-excluded-preprocessor-conditional-blocks --experimental --roc --amap %clang_args -ferror-limit=500 2 | 3 | // CHECK: #include 4 | #include 5 | // CHECK: #include "hip/hip_complex.h" 6 | #include "cuComplex.h" 7 | #include 8 | // CHECK: #include "rocsparse.h" 9 | #include "cusparse.h" 10 | // CHECK-NOT: #include "rocsparse.h" 11 | 12 | int main() { 13 | printf("18.10010.12000. cuSPARSE API to rocSPARSE API synthetic test\n"); 14 | 15 | // CHECK: _rocsparse_handle *handle = nullptr; 16 | // CHECK-NEXT: rocsparse_handle handle_t; 17 | cusparseContext *handle = nullptr; 18 | cusparseHandle_t handle_t; 19 | 20 | // CHECK: rocsparse_status status_t; 21 | cusparseStatus_t status_t; 22 | 23 | // CHECK: _rocsparse_mat_descr *matDescr = nullptr; 24 | // CHECK-NEXT: rocsparse_mat_descr matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D; 25 | cusparseMatDescr *matDescr = nullptr; 26 | cusparseMatDescr_t matDescr_t, matDescr_t_2, matDescr_A, matDescr_B, matDescr_C, matDescr_D; 27 | 28 | // CHECK: rocsparse_action action_t; 29 | cusparseAction_t action_t; 30 | 31 | // CHECK: rocsparse_index_base indexBase_t; 32 | cusparseIndexBase_t indexBase_t; 33 | 34 | int m = 0; 35 | int n = 0; 36 | int innz = 0; 37 | int csrRowPtrA = 0; 38 | int csrRowPtrB = 0; 39 | int csrRowPtrC = 0; 40 | int cscRowIndA = 0; 41 | int csrColIndA = 0; 42 | int csrColIndB = 0; 43 | int csrColIndC = 0; 44 | int cscColPtrA = 0; 45 | size_t bufferSize = 0; 46 | void *pcsrVal = nullptr; 47 | void *pcscVal = nullptr; 48 | void *alpha = nullptr; 49 | void *beta = nullptr; 50 | void *tempBuffer = nullptr; 51 | void* result = nullptr; 52 | 53 | // CHECK: rocsparse_operation opA, opB, opX; 54 | cusparseOperation_t opA, opB, opX; 55 | 56 | #if CUDA_VERSION >= 8000 57 | // TODO: [#899] There should be rocsparse_datatype instead of hipDataType 58 | cudaDataType_t dataType_t; 59 | cudaDataType dataType; 60 | #endif 61 | 62 | #if CUDA_VERSION >= 10010 63 | 64 | #if (CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR == 2) || CUSPARSE_VERSION >= 10300 65 | // TODO: cusparseCsr2CscAlg_t has no analogue in rocSPARSE. The deletion of declaration and usage is needed to be implemented 66 | cusparseCsr2CscAlg_t Csr2CscAlg_t; 67 | 68 | // CHECK: _rocsparse_spvec_descr *spVecDescr = nullptr; 69 | // CHECK-NEXT: rocsparse_spvec_descr spVecDescr_t; 70 | cusparseSpVecDescr *spVecDescr = nullptr; 71 | cusparseSpVecDescr_t spVecDescr_t; 72 | 73 | // CHECK: _rocsparse_dnvec_descr *dnVecDescr = nullptr; 74 | // CHECK-NEXT: rocsparse_dnvec_descr dnVecDescr_t, vecX, vecY; 75 | cusparseDnVecDescr *dnVecDescr = nullptr; 76 | cusparseDnVecDescr_t dnVecDescr_t, vecX, vecY; 77 | #endif 78 | 79 | #if (CUDA_VERSION < 11000 && !defined(_WIN32)) || CUDA_VERSION >= 11000 80 | // CHECK: rocsparse_spmat_descr spMatDescr_t, spmatA, spmatB, spmatC; 81 | cusparseSpMatDescr_t spMatDescr_t, spmatA, spmatB, spmatC; 82 | 83 | // CHECK: rocsparse_dnmat_descr dnMatDescr_t, dnmatA, dnmatB, dnmatC; 84 | cusparseDnMatDescr_t dnMatDescr_t, dnmatA, dnmatB, dnmatC; 85 | 86 | // CHECK: rocsparse_spmm_alg spMMAlg_t; 87 | cusparseSpMMAlg_t spMMAlg_t; 88 | #endif 89 | 90 | #endif 91 | 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/libraries/cusparse2rocsparse_7050.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --skip-excluded-preprocessor-conditional-blocks --experimental --roc %clang_args -ferror-limit=500 2 | 3 | // CHECK: #include 4 | #include 5 | // CHECK: #include "hip/hip_complex.h" 6 | #include "cuComplex.h" 7 | #include 8 | // CHECK: #include "rocsparse.h" 9 | #include "cusparse.h" 10 | // CHECK-NOT: #include "rocsparse.h" 11 | 12 | int main() { 13 | printf("18.7050. cuSPARSE API to rocSPARSE API synthetic test\n"); 14 | 15 | // CHECK: _rocsparse_handle *handle = nullptr; 16 | // CHECK-NEXT: rocsparse_handle handle_t; 17 | cusparseContext *handle = nullptr; 18 | cusparseHandle_t handle_t; 19 | 20 | // CHECK: rocsparse_status status_t; 21 | cusparseStatus_t status_t; 22 | 23 | // CHECK: rocsparse_operation opA, opB, opX; 24 | cusparseOperation_t opA, opB, opX; 25 | 26 | int batchCount = 0; 27 | int m = 0; 28 | int n = 0; 29 | int innz = 0; 30 | int algo = 0; 31 | int bufferSizeInBytes = 0; 32 | double dds = 0.f; 33 | double ddl = 0.f; 34 | double dd = 0.f; 35 | double ddu = 0.f; 36 | double ddw = 0.f; 37 | double dx = 0.f; 38 | float fds = 0.f; 39 | float fdl = 0.f; 40 | float fd = 0.f; 41 | float fdu = 0.f; 42 | float fdw = 0.f; 43 | float fx = 0.f; 44 | size_t bufferSize = 0; 45 | void *pBuffer = nullptr; 46 | 47 | #if CUDA_VERSION >= 7050 48 | // CUDA: cusparseStatus_t CUSPARSEAPI cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize); 49 | // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_zgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size); 50 | // CHECK: status_t = rocsparse_zgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast(&bufferSizeInBytes)); 51 | status_t = cusparseZgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes); 52 | 53 | // CUDA: cusparseStatus_t CUSPARSEAPI cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize); 54 | // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_cgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size); 55 | // CHECK: status_t = rocsparse_cgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast(&bufferSizeInBytes)); 56 | status_t = cusparseCgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes); 57 | 58 | // CUDA: cusparseStatus_t CUSPARSEAPI cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize); 59 | // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_dgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size); 60 | // CHECK: status_t = rocsparse_dgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast(&bufferSizeInBytes)); 61 | status_t = cusparseDgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes); 62 | 63 | // CUDA: cusparseStatus_t CUSPARSEAPI cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, int nnz, int* pBufferSize); 64 | // ROC: ROCSPARSE_EXPORT rocsparse_status rocsparse_sgemvi_buffer_size(rocsparse_handle handle, rocsparse_operation trans, rocsparse_int m, rocsparse_int n, rocsparse_int nnz, size_t* buffer_size); 65 | // CHECK: status_t = rocsparse_sgemvi_buffer_size(handle_t, opA, m, n, innz, reinterpret_cast(&bufferSizeInBytes)); 66 | status_t = cusparseSgemvi_bufferSize(handle_t, opA, m, n, innz, &bufferSizeInBytes); 67 | #endif 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/nvrtc2hiprtc.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -ferror-limit=500 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | // CHECK: #include "hip/hiprtc.h" 7 | #include "nvrtc.h" 8 | // CHECK-NOT: #include "hip/hiprtc.h" 9 | // CHECK-NOT: #include "nvrtc.h" 10 | 11 | int main() { 12 | printf("25. CUDA nvRTC API to HIP hipRTC API synthetic test\n"); 13 | 14 | int numOptions = 0; 15 | int numHeaders = 0; 16 | const char* pOptions = nullptr; 17 | const char* pHeadedrs = nullptr; 18 | const char* pIncludeNames = nullptr; 19 | char* pchSrc = nullptr; 20 | char* pchName = nullptr; 21 | 22 | // CHECK: hiprtcProgram rtcProgram = nullptr; 23 | nvrtcProgram rtcProgram = nullptr; 24 | 25 | // CHECK: hiprtcResult rtcResult; 26 | nvrtcResult rtcResult; 27 | 28 | // CUDA: nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char* const* options); 29 | // HIP: hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options); 30 | // CHECK: rtcResult = hiprtcCompileProgram(rtcProgram, numOptions, const_cast(&pOptions)); 31 | rtcResult = nvrtcCompileProgram(rtcProgram, numOptions, &pOptions); 32 | 33 | // CUDA: nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char* src, const char* name, int numHeaders, const char* const* headers, const char* const* includeNames); 34 | // HIP: hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames); 35 | // CHECK: rtcResult = hiprtcCreateProgram(&rtcProgram, pchSrc, pchName, numHeaders, const_cast(&pHeadedrs), const_cast(&pIncludeNames)); 36 | rtcResult = nvrtcCreateProgram(&rtcProgram, pchSrc, pchName, numHeaders, &pHeadedrs, &pIncludeNames); 37 | 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/runtime_functions_12000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | #include 7 | 8 | int main() { 9 | printf("12.12000. CUDA Runtime API Functions synthetic test for CUDA >= 12000\n"); 10 | 11 | // CHECK: hipError_t result = hipSuccess; 12 | cudaError result = cudaSuccess; 13 | 14 | // CHECK: hipGraphExec_t GraphExec_t; 15 | cudaGraphExec_t GraphExec_t; 16 | 17 | // CHECK: hipGraph_t Graph_t; 18 | cudaGraph_t Graph_t; 19 | 20 | void *pfn = nullptr; 21 | std::string symbol = "symbol"; 22 | 23 | #if defined(_WIN32) && CUDA_VERSION < 9000 24 | typedef signed __int64 int64_t; 25 | typedef unsigned __int64 uint64_t; 26 | #endif 27 | 28 | #if defined(_WIN32) 29 | unsigned long long ull = 0; 30 | #else 31 | unsigned long ull = 0; 32 | #endif 33 | 34 | #if CUDA_VERSION >= 12000 35 | // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags __dv(0)); 36 | // HIP: hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, unsigned long long flags); 37 | // CHECK: result = hipGraphInstantiateWithFlags(&GraphExec_t, Graph_t, ull); 38 | result = cudaGraphInstantiate(&GraphExec_t, Graph_t, ull); 39 | 40 | // CHECK: hipDriverProcAddressQueryResult driverProcAddressQueryResult; 41 | cudaDriverEntryPointQueryResult driverProcAddressQueryResult; 42 | 43 | // CUDA: extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags, enum cudaDriverEntryPointQueryResult *driverStatus = NULL); 44 | // CUDA < 12000: extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags); 45 | // NOTE: cudaGetDriverEntryPoint for CUDA < 12000 is not supported by HIP 46 | // TODO: detect cudaGetDriverEntryPoint signature and report warning/error for old (before CUDA 12.0) signature 47 | // HIP: hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, hipDriverProcAddressQueryResult* symbolStatus); 48 | // TODO: add an explicit static_cast for ull 49 | // CHECK: result = hipGetProcAddress(symbol.c_str(), &pfn, 700, ull, &driverProcAddressQueryResult); 50 | result = cudaGetDriverEntryPoint(symbol.c_str(), &pfn, ull, &driverProcAddressQueryResult); 51 | #endif 52 | 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/runtime_functions_9000.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 2 --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | #include 7 | 8 | int main() { 9 | printf("12.9000. CUDA Runtime API Functions synthetic test for CUDA >= 9000\n"); 10 | 11 | // CHECK: hipError_t result = hipSuccess; 12 | cudaError result = cudaSuccess; 13 | 14 | // CHECK: hipStream_t stream; 15 | cudaStream_t stream; 16 | 17 | int intVal = 0; 18 | unsigned int flags = 0; 19 | dim3 gridDim; 20 | dim3 blockDim; 21 | void* func = nullptr; 22 | void* image = nullptr; 23 | 24 | #if CUDA_VERSION >= 9000 25 | // CHECK: hipFuncAttribute FuncAttribute; 26 | cudaFuncAttribute FuncAttribute; 27 | 28 | // CUDA: extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value); 29 | // HIP: hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value); 30 | // CHECK: result = hipFuncSetAttribute(reinterpret_cast(func), FuncAttribute, intVal); 31 | result = cudaFuncSetAttribute(func, FuncAttribute, intVal); 32 | 33 | // CUDA: extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream); 34 | // HIP: hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream); 35 | // CHECK: result = hipLaunchCooperativeKernel(reinterpret_cast(func), gridDim, blockDim, &image, flags, stream); 36 | result = cudaLaunchCooperativeKernel(func, gridDim, blockDim, &image, flags, stream); 37 | #endif 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/runtime_typedefs.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | 7 | int main() { 8 | printf("07. CUDA Runtime API Typedefs synthetic test\n"); 9 | 10 | // CHECK: hipStreamCallback_t StreamCallback_t; 11 | // CHECK-NEXT: hipSurfaceObject_t SurfaceObject_t; 12 | // CHECK-NEXT: hipTextureObject_t TextureObject_t; 13 | cudaStreamCallback_t StreamCallback_t; 14 | cudaSurfaceObject_t SurfaceObject_t; 15 | cudaTextureObject_t TextureObject_t; 16 | 17 | // CHECK: hipUUID uuid; 18 | cudaUUID_t uuid; 19 | 20 | #if CUDA_VERSION >= 10000 21 | // CHECK: hipHostFn_t HostFn_t; 22 | cudaHostFn_t HostFn_t; 23 | #endif 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/runtime_unions.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args 3 --amap --skip-excluded-preprocessor-conditional-blocks --experimental %clang_args -D__CUDA_API_VERSION_INTERNAL 2 | 3 | // CHECK: #include 4 | #include 5 | #include 6 | 7 | int main() { 8 | printf("11. CUDA Runtime API Unions synthetic test\n"); 9 | 10 | #if CUDA_VERSION >= 11000 11 | // CHECK: hipKernelNodeAttrValue KernelNodeAttrValue; 12 | cudaKernelNodeAttrValue KernelNodeAttrValue; 13 | #endif 14 | 15 | #if CUDA_VERSION >= 11080 16 | // CHECK: hipLaunchAttributeValue LaunchAttributeValue; 17 | // CHECK-NEXT: hipLaunchAttributeValue launchAttributeValue_union; 18 | CUlaunchAttributeValue LaunchAttributeValue; 19 | CUlaunchAttributeValue_union launchAttributeValue_union; 20 | #endif 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/unit_tests/synthetic/transforming_matchers.cu: -------------------------------------------------------------------------------- 1 | // RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args 2 | 3 | // CHECK: #include 4 | #include 5 | 6 | template 7 | void check(T result, char const *const func, const char *const file, int const line) { 8 | if (result) { 9 | fprintf(stderr, "Error at %s:%d code=%d(%s) \" \n", file, line, static_cast(result), func); 10 | exit(EXIT_FAILURE); 11 | } 12 | } 13 | 14 | #define checkErrors(val) check((val), #val, __FILE__, __LINE__) 15 | #define num 1024 16 | 17 | int main(int argc, const char *argv[]) { 18 | int *input = nullptr; 19 | void *input_ptr = nullptr; 20 | int deviceCount = 0; 21 | // CHECK: checkErrors(hipGetDeviceCount(&deviceCount)); 22 | checkErrors(cudaGetDeviceCount(&deviceCount)); 23 | printf("Device Count: %d\n", deviceCount); 24 | // CHECK: hipDeviceProp_t deviceProp; 25 | cudaDeviceProp deviceProp; 26 | deviceProp.major = 0; 27 | deviceProp.minor = 0; 28 | int deviceID = 0; 29 | // CHECK: checkErrors(hipGetDeviceProperties(&deviceProp, deviceID)); 30 | checkErrors(cudaGetDeviceProperties(&deviceProp, deviceID)); 31 | // CHECK: checkErrors(hipSetDevice(deviceID)); 32 | checkErrors(cudaSetDevice(deviceID)); 33 | // CHECK: checkErrors(hipHostMalloc(&input, sizeof(int) * num * 2, hipHostMallocDefault)); 34 | checkErrors(cudaMallocHost(&input, sizeof(int) * num * 2)); 35 | // CHECK: checkErrors(hipHostMalloc(&input_ptr, sizeof(int) * num * 2, hipHostMallocDefault)); 36 | checkErrors(cudaMallocHost(&input_ptr, sizeof(int) * num * 2)); 37 | for (int i = 0; i < num * 2; ++i) { 38 | input[i] = i; 39 | } 40 | 41 | int *value = 0; 42 | int *value_2 = 0; 43 | int iBlockSize = 0; 44 | int iBlockSize_2 = 0; 45 | size_t bytes = 0; 46 | // CHECK: hipFunction_t function; 47 | CUfunction function; 48 | // CHECK: void* occupancyB2DSize; 49 | CUoccupancyB2DSize occupancyB2DSize; 50 | 51 | // CHECK: checkErrors(hipModuleOccupancyMaxPotentialBlockSizeWithFlags(value, value_2, function, bytes, iBlockSize, iBlockSize_2)); 52 | checkErrors(cuOccupancyMaxPotentialBlockSizeWithFlags(value, value_2, function, occupancyB2DSize, bytes, iBlockSize, iBlockSize_2)); 53 | 54 | // CHECK: checkErrors(hipHostFree(input)); 55 | checkErrors(cudaFreeHost(input)); 56 | // CHECK: checkErrors(hipDeviceSynchronize()); 57 | checkErrors(cudaDeviceSynchronize()); 58 | return 0; 59 | } 60 | --------------------------------------------------------------------------------