├── .azuredevops └── rocm-ci.yml ├── .clang-format ├── .gitattributes ├── .github ├── dependabot.yml ├── palamida.yml ├── pull_request_template.md ├── scripts │ └── validate_pr_description.py └── workflows │ ├── keyword-check.yml │ ├── kws-caller.yml │ ├── linting.yml │ ├── pr-title-validate.yml │ ├── rocm-ci-caller.yml │ └── validate-pr-description.yml ├── .gitignore ├── .jenkins └── Jenkinsfile ├── .markdownlint-cli2.yaml ├── .readthedocs.yaml ├── .spellcheck.local.yaml ├── .wordlist.txt ├── CODEOWNERS ├── CONTRIBUTING.md ├── Jenkinsfile ├── LICENSE.txt ├── README.md ├── RELEASE.md ├── VERSION ├── bin ├── hipcc_cmake_linker_helper └── hipdemangleatp ├── cmake ├── FindHIP.cmake └── FindHIP │ ├── run_hipcc.cmake │ └── run_make2cmake.cmake ├── configure ├── docker ├── dockerfile-build-ubuntu-16.04 └── dockerfile-hip-ubuntu-16.04 ├── docs ├── .gitignore ├── conf.py ├── data │ ├── env_variables_hip.rst │ ├── how-to │ │ └── hip_runtime_api │ │ │ ├── asynchronous │ │ │ ├── sequential_async_event.drawio │ │ │ └── sequential_async_event.svg │ │ │ ├── cooperative_groups │ │ │ ├── thread_hierarchy_coop_bottom.drawio │ │ │ ├── thread_hierarchy_coop_bottom.svg │ │ │ ├── thread_hierarchy_coop_top.drawio │ │ │ └── thread_hierarchy_coop_top.svg │ │ │ ├── hipgraph │ │ │ ├── hip_graph.drawio │ │ │ ├── hip_graph.svg │ │ │ ├── hip_graph_speedup.drawio │ │ │ └── hip_graph_speedup.svg │ │ │ ├── memory_management │ │ │ ├── pageable_pinned.drawio │ │ │ ├── pageable_pinned.svg │ │ │ ├── textures │ │ │ │ ├── border.png │ │ │ │ ├── clamp.png │ │ │ │ ├── linear.png │ │ │ │ ├── mirror.png │ │ │ │ ├── nearest.png │ │ │ │ ├── original.png │ │ │ │ └── wrap.png │ │ │ └── unified_memory │ │ │ │ ├── um.drawio │ │ │ │ └── um.svg │ │ │ ├── runtimes.drawio │ │ │ ├── runtimes.svg │ │ │ ├── stream_management.drawio │ │ │ └── stream_management.svg │ ├── tutorial │ │ └── reduction │ │ │ ├── conflict_free_reduction.drawio │ │ │ ├── conflict_free_reduction.svg │ │ │ ├── foldl.drawio │ │ │ ├── foldl.svg │ │ │ ├── naive_reduction.drawio │ │ │ ├── naive_reduction.svg │ │ │ ├── parallel_foldl.drawio │ │ │ ├── parallel_foldl.svg │ │ │ ├── reduced_divergence_reduction.drawio │ │ │ ├── reduced_divergence_reduction.svg │ │ │ ├── warp_reduction.drawio │ │ │ ├── warp_reduction.svg │ │ │ ├── warp_reduction_with_shared.drawio │ │ │ └── warp_reduction_with_shared.svg │ ├── understand │ │ ├── hardware_implementation │ │ │ ├── cdna2_gcd.png │ │ │ ├── cdna3_cu.png │ │ │ ├── compute_unit.drawio │ │ │ ├── compute_unit.svg │ │ │ └── rdna3_cu.png │ │ └── programming_model │ │ │ ├── cdna2_gcd.png │ │ │ ├── cdna3_cu.png │ │ │ ├── memory_hierarchy.drawio │ │ │ ├── memory_hierarchy.svg │ │ │ ├── rdna3_cu.png │ │ │ ├── simt.drawio │ │ │ ├── simt.svg │ │ │ ├── thread_hierarchy.drawio │ │ │ └── thread_hierarchy.svg │ └── what_is_hip │ │ ├── hip.drawio │ │ └── hip.svg ├── device_md_gen.py ├── doxygen-input │ ├── doxy.cfg │ ├── mainpage.txt │ └── sync.txt ├── doxygen │ └── Doxyfile ├── environment.yml ├── extension │ ├── __init__.py │ └── custom_directive.py ├── faq.rst ├── gen_clang_option_doc.sh ├── how-to │ ├── debugging.rst │ ├── hip_cpp_language_extensions.rst │ ├── hip_porting_driver_api.rst │ ├── hip_porting_guide.md │ ├── hip_rtc.md │ ├── hip_runtime_api.rst │ ├── hip_runtime_api │ │ ├── asynchronous.rst │ │ ├── call_stack.rst │ │ ├── cooperative_groups.rst │ │ ├── error_handling.rst │ │ ├── external_interop.rst │ │ ├── hipgraph.rst │ │ ├── initialization.rst │ │ ├── memory_management.rst │ │ ├── memory_management │ │ │ ├── coherence_control.rst │ │ │ ├── device_memory.rst │ │ │ ├── device_memory │ │ │ │ └── texture_fetching.rst │ │ │ ├── host_memory.rst │ │ │ ├── stream_ordered_allocator.rst │ │ │ ├── unified_memory.rst │ │ │ └── virtual_memory.rst │ │ ├── multi_device.rst │ │ └── opengl_interop.rst │ ├── kernel_language_cpp_support.rst │ ├── logging.rst │ └── performance_guidelines.rst ├── index.md ├── install │ ├── build.rst │ └── install.rst ├── license.md ├── programming_guide.rst ├── reference │ ├── api_syntax.rst │ ├── deprecated_api_list.rst │ ├── env_variables.rst │ ├── fp8_numbers.rst │ ├── hardware_features.rst │ ├── hip_runtime_api │ │ ├── global_defines_enums_structs_files.rst │ │ ├── global_defines_enums_structs_files │ │ │ ├── driver_types.rst │ │ │ └── global_enum_and_defines.rst │ │ ├── modules.rst │ │ └── modules │ │ │ ├── callback_activity_apis.rst │ │ │ ├── context_management.rst │ │ │ ├── cooperative_groups_reference.rst │ │ │ ├── device_management.rst │ │ │ ├── error_handling.rst │ │ │ ├── event_management.rst │ │ │ ├── execution_control.rst │ │ │ ├── graph_management.rst │ │ │ ├── graphics_interoperability.rst │ │ │ ├── initialization_and_version.rst │ │ │ ├── launch_api.rst │ │ │ ├── memory_management.rst │ │ │ ├── memory_management │ │ │ ├── external_resource_interoperability.rst │ │ │ ├── memory_management_deprecated.rst │ │ │ ├── stream_ordered_memory_allocator.rst │ │ │ ├── surface_object.rst │ │ │ ├── texture_management.rst │ │ │ ├── texture_management_deprecated.rst │ │ │ ├── unified_memory_reference.rst │ │ │ └── virtual_memory_reference.rst │ │ │ ├── module_management.rst │ │ │ ├── occupancy.rst │ │ │ ├── opengl_interoperability.rst │ │ │ ├── peer_to_peer_device_memory_access.rst │ │ │ ├── profiler_control.rst │ │ │ ├── runtime_compilation.rst │ │ │ ├── stream_management.rst │ │ │ └── stream_memory_operations.rst │ ├── hip_runtime_api_reference.rst │ ├── math_api.rst │ └── virtual_rocr.rst ├── sphinx │ ├── _toc.yml.in │ ├── requirements.in │ └── requirements.txt ├── tools │ ├── example_codes │ │ ├── external_interop.hip │ │ └── opengl_interop.hip │ └── update_example_codes.py ├── tutorial │ ├── cooperative_groups_tutorial.rst │ ├── reduction.rst │ └── saxpy.rst ├── understand │ ├── amd_clr.rst │ ├── compilers.rst │ ├── glossary.md │ ├── hardware_implementation.rst │ └── programming_model.rst └── what_is_hip.rst ├── hip-lang-config.cmake.in ├── include └── hip │ ├── channel_descriptor.h │ ├── device_functions.h │ ├── driver_types.h │ ├── hip_bf16.h │ ├── hip_bfloat16.h │ ├── hip_common.h │ ├── hip_complex.h │ ├── hip_cooperative_groups.h │ ├── hip_deprecated.h │ ├── hip_ext.h │ ├── hip_ext_ocp.h │ ├── hip_fp16.h │ ├── hip_fp4.h │ ├── hip_fp6.h │ ├── hip_fp8.h │ ├── hip_gl_interop.h │ ├── hip_hcc.h │ ├── hip_math_constants.h │ ├── hip_profile.h │ ├── hip_runtime.h │ ├── hip_runtime_api.h │ ├── hip_texture_types.h │ ├── hip_vector_types.h │ ├── hiprtc.h │ ├── library_types.h │ ├── linker_types.h │ ├── math_functions.h │ ├── surface_types.h │ └── texture_types.h ├── install.sh └── util ├── gedit ├── README.md ├── hip.lang └── install.sh └── vim ├── README.md └── hip.vim /.azuredevops/rocm-ci.yml: -------------------------------------------------------------------------------- 1 | resources: 2 | repositories: 3 | - repository: pipelines_repo 4 | type: github 5 | endpoint: ROCm 6 | name: ROCm/ROCm 7 | - repository: matching_repo 8 | type: github 9 | endpoint: ROCm 10 | name: ROCm/clr 11 | ref: $(Build.SourceBranch) 12 | - repository: hipother_repo 13 | type: github 14 | endpoint: ROCm 15 | name: ROCm/hipother 16 | ref: $(Build.SourceBranch) 17 | pipelines: 18 | - pipeline: rocr-runtime_pipeline 19 | source: \ROCR-Runtime 20 | trigger: 21 | branches: 22 | include: 23 | - amd-staging 24 | - amd-mainline 25 | - pipeline: rocprofiler-register_pipeline 26 | source: \rocprofiler-register 27 | trigger: 28 | branches: 29 | include: 30 | - amd-staging 31 | - amd-mainline 32 | 33 | variables: 34 | - group: common 35 | - template: /.azuredevops/variables-global.yml@pipelines_repo 36 | 37 | trigger: 38 | batch: true 39 | branches: 40 | include: 41 | - amd-staging 42 | - amd-mainline 43 | paths: 44 | exclude: 45 | - docs 46 | - '.github' 47 | - '.jenkins' 48 | - '.*.yaml' 49 | - CODEOWNERS 50 | - Jenkinsfile 51 | - LICENSE.txt 52 | - '**/*.md' 53 | - VERSION 54 | 55 | pr: 56 | autoCancel: true 57 | branches: 58 | include: 59 | - amd-staging 60 | - amd-mainline 61 | paths: 62 | exclude: 63 | - docs 64 | - '.github' 65 | - '.jenkins' 66 | - '.*.yaml' 67 | - CODEOWNERS 68 | - Jenkinsfile 69 | - LICENSE.txt 70 | - '**/.md' 71 | - VERSION 72 | drafts: false 73 | 74 | jobs: 75 | - template: ${{ variables.CI_COMPONENT_PATH }}/HIP.yml@pipelines_repo 76 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | BasedOnStyle: Google 3 | AlignEscapedNewlinesLeft: false 4 | AlignOperands: false 5 | ColumnLimit: 100 6 | AlwaysBreakTemplateDeclarations: false 7 | DerivePointerAlignment: false 8 | IndentFunctionDeclarationAfterType: false 9 | MaxEmptyLinesToKeep: 2 10 | SortIncludes: false 11 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autolf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to have LF line endings on checkout. 6 | *.c text eol=lf 7 | *.cpp text eol=lf 8 | *.cc text eol=lf 9 | *.h text eol=lf 10 | *.hpp text eol=lf 11 | *.txt text eol=lf 12 | 13 | # Define files to support auto-remove trailing white space 14 | # Need to run the command below, before add modified file(s) to the staging area 15 | # git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"' 16 | *.cpp filter=trimspace 17 | *.c filter=trimspace 18 | *.h filter=trimspacecpp 19 | *.hpp filter=trimspace 20 | *.md filter=trimspace 21 | 22 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/docs/sphinx" # Location of package manifests 10 | open-pull-requests-limit: 10 11 | schedule: 12 | interval: "daily" 13 | labels: 14 | - "documentation" 15 | - "dependencies" 16 | - "ci:docs-only" 17 | target-branch: "docs/develop" 18 | reviewers: 19 | - "samjwu" 20 | -------------------------------------------------------------------------------- /.github/palamida.yml: -------------------------------------------------------------------------------- 1 | disabled: false 2 | scmId: gh-emu-rocm 3 | branchesToScan: 4 | - amd-staging 5 | - amd-mainline 6 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Associated JIRA ticket number/Github issue number 2 | 3 | 4 | ## What type of PR is this? (check all applicable) 5 | 6 | - [ ] Refactor 7 | - [ ] Feature 8 | - [ ] Bug Fix 9 | - [ ] Optimization 10 | - [ ] Documentation Update 11 | - [ ] Continuous Integration 12 | 13 | ## What were the changes? 14 | 15 | 16 | 17 | ## Why are these changes needed? 18 | 19 | 20 | 21 | ## Updated CHANGELOG? 22 | 23 | 24 | 25 | - [ ] Yes 26 | - [ ] No, Does not apply to this PR. 27 | 28 | ## Added/Updated documentation? 29 | 30 | - [ ] Yes 31 | - [ ] No, Does not apply to this PR. 32 | 33 | ## Additional Checks 34 | 35 | - [ ] I have added tests relevant to the introduced functionality, and the unit tests are passing locally. 36 | - [ ] Any dependent changes have been merged. 37 | -------------------------------------------------------------------------------- /.github/scripts/validate_pr_description.py: -------------------------------------------------------------------------------- 1 | import os, re, sys 2 | from typing import List, Optional 3 | 4 | 5 | def is_checkbox(line: str) -> bool: 6 | return bool(re.match(r"^\s*-\s*\[[ xX]\]\s*.+", line)) 7 | 8 | 9 | def is_checked(line: str) -> bool: 10 | return bool(re.match(r"^\s*-\s*\[[xX]\]\s*.+", line)) 11 | 12 | 13 | def is_comment(line: str) -> bool: 14 | return bool(re.match(r"^\s*\s*$", line)) 15 | 16 | 17 | def text_clean(lines: List[str]) -> str: 18 | text = [line for line in lines if not is_comment(line)] 19 | return "".join("".join(text).strip().split()) 20 | 21 | 22 | def validate_section(section_name: str, lines: List[str]) -> Optional[str]: 23 | has_checkboxes = any(is_checkbox(line) for line in lines) 24 | if has_checkboxes: 25 | if not any(is_checked(line) for line in lines): 26 | return f"Section {section_name} is a checklist without selections" 27 | return None 28 | if not text_clean(lines): 29 | return f"Section {section_name} is empty text section" 30 | return None 31 | 32 | 33 | def check_description(description: str) -> List[str]: 34 | if not description: 35 | # pull_request_template is not merged yet, so treat as valid for now 36 | return [] 37 | # return ["PR description is empty"] 38 | 39 | sections = [] 40 | current_section = None 41 | current_lines = [] 42 | errors = [] 43 | 44 | for line in description.splitlines(): 45 | header_match = re.match(r"^\s*##\s*(.+?)\s*$", line) 46 | if header_match: 47 | if current_section: 48 | sections.append((current_section, current_lines)) 49 | current_section = header_match.group(1) 50 | current_lines = [] 51 | elif current_section: 52 | current_lines.append(line) 53 | 54 | if current_section: 55 | sections.append((current_section, current_lines)) 56 | 57 | if not sections: 58 | return ["No sections available, template is empty"] 59 | 60 | for section_name, section_lines in sections: 61 | error = validate_section(section_name, section_lines) 62 | if error: 63 | errors.append(error) 64 | 65 | return errors 66 | 67 | 68 | if __name__ == "__main__": 69 | pr_description = os.getenv("PR_DESCRIPTION", "") 70 | 71 | errors = check_description(pr_description) 72 | if not errors: 73 | print("All good") 74 | exit(0) 75 | print("\n".join(errors)) 76 | exit(1) 77 | -------------------------------------------------------------------------------- /.github/workflows/keyword-check.yml: -------------------------------------------------------------------------------- 1 | name: Keywords checker 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, edited] 6 | branches: 7 | - amd-staging 8 | workflow_dispatch: 9 | 10 | jobs: 11 | check-keywords: 12 | runs-on: ubuntu-latest 13 | env: 14 | KEYWORDS: ${{ vars.KEYWORDS }} 15 | 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v3 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Check keywords 23 | run: | 24 | set -e 25 | 26 | if [ -z "$KEYWORDS" ]; then 27 | echo "No keywords set. Skipping check" 28 | exit 0 29 | fi 30 | 31 | IFS=',' read -ra KEYWORDS_ARRAY <<< "$KEYWORDS" 32 | echo "Checking against list of keywords: ${KEYWORDS_ARRAY[*]}" 33 | 34 | MATCHED=0 35 | BASE_BRANCH=${{github.event.pull_request.base.ref}} 36 | HEAD_BRANCH=${{github.event.pull_request.head.ref}} 37 | PR_TITLE="${{ github.event.pull_request.title }}" 38 | 39 | for file in $(git diff --name-only origin/$BASE_BRANCH..origin/$HEAD_BRANCH); do 40 | if [ -f "$file" ]; then 41 | for keyword in "${KEYWORDS_ARRAY[*]}"; do 42 | grep -in -E "${keyword}" "$file" | while IFS= read -r line; do 43 | echo "Matched in '$file': $line" 44 | MATCHED=1 45 | done 46 | done 47 | fi 48 | done 49 | 50 | for commit in $(git log --format=%H origin/$BASE_BRANCH..origin/$HEAD_BRANCH); do 51 | msg=$(git log -1 --format=%B "$commit") 52 | for keyword in "${KEYWORDS_ARRAY[*]}"; do 53 | if echo "$msg" | grep -i -q "$keyword"; then 54 | echo "Match in commit $commit: $msg" 55 | MATCHED=1 56 | fi 57 | done 58 | done 59 | 60 | for keyword in "${KEYWORDS_ARRAY[*]}"; do 61 | if echo "$PR_TITLE" | grep -i -q "$keyword"; then 62 | echo "Match in PR title" 63 | MATCHED=1 64 | fi 65 | done 66 | 67 | if [ "$MATCHED" -eq 1 ]; then 68 | echo "Keywords found, please see diagnostics higher" 69 | exit 1 70 | else 71 | echo "No keywords found" 72 | exit 0 73 | fi 74 | -------------------------------------------------------------------------------- /.github/workflows/kws-caller.yml: -------------------------------------------------------------------------------- 1 | name: Rocm Validation Suite KWS 2 | on: 3 | push: 4 | branches: [amd-staging, amd-mainline] 5 | pull_request: 6 | types: [opened, synchronize, reopened] 7 | workflow_dispatch: 8 | jobs: 9 | kws: 10 | if: ${{ github.event_name == 'pull_request' }} 11 | uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline 12 | secrets: inherit 13 | with: 14 | pr_number: ${{github.event.pull_request.number}} 15 | base_branch: ${{github.base_ref}} 16 | -------------------------------------------------------------------------------- /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: 4 | push: 5 | branches: 6 | - develop 7 | - main 8 | - 'docs/*' 9 | - 'roc**' 10 | pull_request: 11 | branches: 12 | - develop 13 | - main 14 | - 'docs/*' 15 | - 'roc**' 16 | 17 | jobs: 18 | call-workflow-passing-data: 19 | name: Documentation 20 | uses: ROCm/rocm-docs-core/.github/workflows/linting.yml@develop 21 | -------------------------------------------------------------------------------- /.github/workflows/pr-title-validate.yml: -------------------------------------------------------------------------------- 1 | name: Validate PR Title 2 | 3 | on: 4 | pull_request: 5 | types: [opened, edited, synchronize, reopened] 6 | 7 | jobs: 8 | validate-pr-title: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check PR Title 12 | id: check-pr-title 13 | run: | 14 | PR_TITLE="${{ github.event.pull_request.title }}" 15 | 16 | if [[ ! "$PR_TITLE" =~ ^SWDEV-[0-9]+ ]]; then 17 | echo "::error::PR title must start with a Jira ticket ID, SWDEV-" 18 | exit 1 19 | else 20 | echo "PR title is valid" 21 | fi 22 | 23 | validate-commit-messages: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v3 28 | with: 29 | fetch-depth: 0 30 | 31 | - name: Check all commit messages 32 | id: validate-commit-messags 33 | run: | 34 | COMMITS=$(git log --format="%H %s" origin/${{ github.event.pull_request.base.ref }}..origin/${{ github.event.pull_request.head.ref }}) 35 | echo "$COMMITS" 36 | echo "$COMMITS" | while read -r hash message; do 37 | echo -e "$hash $message\n " 38 | if [[ ! "$message" =~ ^SWDEV-[0-9]+ ]]; then 39 | echo "::error:: $hash commit should start with Jira ticket ID, SWDEV-" 40 | exit 1 41 | fi 42 | done 43 | -------------------------------------------------------------------------------- /.github/workflows/rocm-ci-caller.yml: -------------------------------------------------------------------------------- 1 | name: ROCm CI Caller 2 | on: 3 | pull_request: 4 | branches: [amd-staging, amd-npi-next, release/rocm-rel-*, amd-mainline] 5 | types: [opened, reopened, synchronize] 6 | push: 7 | branches: [amd-mainline] 8 | workflow_dispatch: 9 | issue_comment: 10 | types: [created] 11 | 12 | jobs: 13 | call-workflow: 14 | if: github.event_name != 'issue_comment' ||(github.event_name == 'issue_comment' && github.event.issue.pull_request && (startsWith(github.event.comment.body, '!verify') || startsWith(github.event.comment.body, '!linux-hip-psdb') || startsWith(github.event.comment.body, '!verify release') || startsWith(github.event.comment.body, '!verify retest'))) 15 | uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline 16 | secrets: inherit 17 | with: 18 | input_sha: ${{github.event_name == 'pull_request' && github.event.pull_request.head.sha || (github.event_name == 'push' && github.sha) || (github.event_name == 'issue_comment' && github.event.issue.pull_request.head.sha) || github.sha}} 19 | input_pr_num: ${{github.event_name == 'pull_request' && github.event.pull_request.number || (github.event_name == 'issue_comment' && github.event.issue.number) || 0}} 20 | input_pr_url: ${{github.event_name == 'pull_request' && github.event.pull_request.html_url || (github.event_name == 'issue_comment' && github.event.issue.pull_request.html_url) || ''}} 21 | input_pr_title: ${{github.event_name == 'pull_request' && github.event.pull_request.title || (github.event_name == 'issue_comment' && github.event.issue.pull_request.title) || ''}} 22 | repository_name: ${{ github.repository }} 23 | base_ref: ${{github.event_name == 'pull_request' && github.event.pull_request.base.ref || (github.event_name == 'issue_comment' && github.event.issue.pull_request.base.ref) || github.ref}} 24 | trigger_event_type: ${{ github.event_name }} 25 | comment_text: ${{ github.event_name == 'issue_comment' && github.event.comment.body || '' }} 26 | -------------------------------------------------------------------------------- /.github/workflows/validate-pr-description.yml: -------------------------------------------------------------------------------- 1 | name: Validate PR desription 2 | 3 | on: 4 | pull_request: 5 | types: [opened, edited, synchronize] 6 | 7 | jobs: 8 | validate-pr-description: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout repository 12 | uses: actions/checkout@v4 13 | 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.13" 18 | 19 | - name: Validate PR description 20 | env: 21 | PR_DESCRIPTION: ${{ github.event.pull_request.body }} 22 | run: python .github/scripts/validate_pr_description.py 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | !.gitignore 3 | !.spellcheck.local.yaml 4 | *.o 5 | *.exe 6 | *.swp 7 | lib 8 | packages 9 | build 10 | bin/hipInfo 11 | bin/hipBusBandwidth 12 | bin/hipDispatchLatency 13 | bin/hipify-clang 14 | tags 15 | samples/0_Intro/module_api/runKernel.hip.out 16 | samples/0_Intro/module_api/vcpy_isa.code 17 | samples/0_Intro/module_api/vcpy_isa.hsaco 18 | samples/0_Intro/module_api/vcpy_kernel.co 19 | samples/0_Intro/module_api/vcpy_kernel.code 20 | samples/1_Utils/hipInfo/hipInfo 21 | samples/1_Utils/hipBusBandwidth/hipBusBandwidth 22 | samples/1_Utils/hipDispatchLatency/hipDispatchLatency 23 | -------------------------------------------------------------------------------- /.jenkins/Jenkinsfile: -------------------------------------------------------------------------------- 1 | def hipBuildTest(String backendLabel) { 2 | node(backendLabel) { 3 | stage("SYNC - ${backendLabel}") { 4 | 5 | // Checkout hip repository with the PR patch 6 | dir("${WORKSPACE}/hip") { 7 | checkout scm 8 | env.HIP_DIR = "${WORKSPACE}" + "/hip" 9 | } 10 | 11 | // Clone hip-tests repository 12 | dir("${WORKSPACE}/hip-tests") { 13 | git branch: 'develop', 14 | url: 'https://github.com/ROCm-Developer-Tools/hip-tests' 15 | env.HIP_TESTS_DIR = "${WORKSPACE}" + "/hip-tests" 16 | } 17 | 18 | // Clone clr repository 19 | dir("${WORKSPACE}/clr") { 20 | git branch: 'develop', 21 | credentialsId: 'branch-credentials', 22 | url: 'https://github.com/ROCm-Developer-Tools/clr' 23 | env.CLR_DIR = "${WORKSPACE}" + "/clr" 24 | } 25 | 26 | // Clone hipcc repspoitory 27 | dir("${WORKSPACE}/hipcc") { 28 | git branch: 'develop', 29 | credentialsId: 'branch-credentials', 30 | url: 'https://github.com/ROCm-Developer-Tools/hipcc' 31 | env.HIPCC_DIR = "${WORKSPACE}" + "/hipcc" 32 | } 33 | } 34 | 35 | stage("BUILD HIP - ${backendLabel}") { 36 | // Running the build on clr workspace 37 | dir("${WORKSPACE}/clr") { 38 | sh """#!/usr/bin/env bash 39 | set -x 40 | rm -rf build 41 | mkdir -p build 42 | cd build 43 | # Check if backend label contains string "amd" or backend host is a server with amd gpu 44 | if [[ $backendLabel =~ amd ]]; then 45 | cmake -DCLR_BUILD_HIP=ON -DHIP_PATH=\$PWD/install -DHIPCC_BIN_DIR=\$HIPCC_DIR/bin -DHIP_COMMON_DIR=\$HIP_DIR -DCMAKE_PREFIX_PATH="/opt/rocm/" -DCMAKE_INSTALL_PREFIX=\$PWD/install .. 46 | else 47 | cmake -DCLR_BUILD_HIP=ON -DHIP_PLATFORM=nvidia -DHIPCC_BIN_DIR=\$HIPCC_DIR/bin -DHIP_COMMON_DIR=\$HIP_DIR -DCMAKE_INSTALL_PREFIX=\$PWD/install .. 48 | fi 49 | make -j\$(nproc) 50 | make install -j\$(nproc) 51 | """ 52 | } 53 | } 54 | 55 | stage("BUILD HIP TESTS - ${backendLabel}") { 56 | // Running the build on HIP TESTS workspace 57 | dir("${WORKSPACE}/hip-tests") { 58 | env.HIP_PATH = "${CLR_DIR}" + "/build/install" 59 | sh """#!/usr/bin/env bash 60 | set -x 61 | rm -rf build 62 | mkdir -p build 63 | cd build 64 | echo "testing $HIP_PATH" 65 | # Check if backend label contains string "amd" or backend host is a server with amd gpu 66 | if [[ $backendLabel =~ amd ]]; then 67 | cmake -DHIP_PLATFORM=amd -DHIP_PATH=\$CLR_DIR/build/install ../catch 68 | else 69 | export HIP_PLATFORM=nvidia 70 | cmake -DHIP_PLATFORM=nvidia -DHIP_PATH=\$CLR_DIR/build/install ../catch 71 | fi 72 | make -j\$(nproc) build_tests 73 | """ 74 | } 75 | } 76 | 77 | timeout(time: 1, unit: 'HOURS') { 78 | stage("TEST - ${backendLabel}") { 79 | dir("${WORKSPACE}/hip-tests") { 80 | sh """#!/usr/bin/env bash 81 | set -x 82 | cd build 83 | if [[ $backendLabel =~ amd ]]; then 84 | ctest --overwrite BuildDirectory=. --output-junit hiptest_output_catch_amd.xml 85 | else 86 | ctest --overwrite BuildDirectory=. --output-junit hiptest_output_catch_nvidia.xml -E 'Unit_hipMemcpyHtoD_Positive_Synchronization_Behavior|Unit_hipMemcpy_Positive_Synchronization_Behavior|Unit_hipFreeNegativeHost' 87 | fi 88 | """ 89 | } 90 | } 91 | } 92 | } 93 | } 94 | 95 | timestamps { 96 | node('external-bootstrap') { 97 | skipDefaultCheckout() 98 | 99 | // labels belonging to each backend - AMD, NVIDIA 100 | String[] labels = ['hip-amd-gfx908-ubu2004', 'hip-nvidia-rtx5000-ubu2004'] 101 | buildMap = [:] 102 | 103 | labels.each { backendLabel -> 104 | echo "backendLabel: ${backendLabel}" 105 | buildMap[backendLabel] = { hipBuildTest(backendLabel) } 106 | } 107 | buildMap['failFast'] = false 108 | parallel buildMap 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /.markdownlint-cli2.yaml: -------------------------------------------------------------------------------- 1 | ignores: 2 | - RELEASE.md 3 | - docs/doxygen/mainpage.md 4 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | sphinx: 7 | configuration: docs/conf.py 8 | 9 | formats: [] 10 | 11 | python: 12 | install: 13 | - requirements: docs/sphinx/requirements.txt 14 | 15 | conda: 16 | environment: docs/environment.yml # needed until ubuntu ships doxygen >= 1.9.8 17 | 18 | build: 19 | os: ubuntu-22.04 20 | tools: 21 | python: "mambaforge-22.9" # needed until ubuntu ships doxygen >= 1.9.8 22 | apt_packages: 23 | - "gfortran" # For pre-processing fortran sources 24 | - "graphviz" # For dot graphs in doxygen 25 | jobs: 26 | post_checkout: 27 | - if [ -d ../clr ]; then rm -rf ../clr; fi 28 | - if [ -d ../ROCR-Runtime ]; then rm -rf ../ROCR-Runtime; fi 29 | - git clone --depth=1 --single-branch --branch docs/develop https://github.com/ROCm/clr.git ../clr 30 | - git clone --depth=1 --single-branch --branch master https://github.com/ROCm/ROCR-Runtime.git ../ROCR-Runtime 31 | post_build: 32 | - rm -rf ../clr 33 | - rm -rf ../ROCR-Runtime 34 | -------------------------------------------------------------------------------- /.spellcheck.local.yaml: -------------------------------------------------------------------------------- 1 | matrix: 2 | - name: Markdown 3 | sources: 4 | - ['!docs/doxygen/mainpage.md'] 5 | - name: reST 6 | sources: 7 | - [] 8 | - name: Cpp 9 | sources: 10 | - ['include/hip/*'] 11 | -------------------------------------------------------------------------------- /.wordlist.txt: -------------------------------------------------------------------------------- 1 | .hip_fatbin 2 | ALU 3 | ALUs 4 | AmgX 5 | APU 6 | APUs 7 | AQL 8 | AXPY 9 | asm 10 | asynchrony 11 | backtrace 12 | Bitcode 13 | bitcode 14 | bitcodes 15 | blockDim 16 | blockIdx 17 | builtins 18 | Builtins 19 | CAS 20 | clr 21 | compilable 22 | constexpr 23 | coroutines 24 | Ctx 25 | cuBLASLt 26 | cuCtx 27 | CUDA's 28 | cuDNN 29 | cuModule 30 | dataflow 31 | deallocate 32 | decompositions 33 | denormal 34 | Dereferencing 35 | dll 36 | DirectX 37 | EIGEN 38 | EIGEN's 39 | enqueue 40 | enqueues 41 | entrypoint 42 | entrypoints 43 | enum 44 | enums 45 | embeded 46 | extern 47 | fatbin 48 | fatbinary 49 | foundationally 50 | framebuffer 51 | frontends 52 | fnuz 53 | FNUZ 54 | fp 55 | gedit 56 | GPGPU 57 | gridDim 58 | GROMACS 59 | GWS 60 | hardcoded 61 | HC 62 | hcBLAS 63 | HIP-Clang 64 | HIP's 65 | hipcc 66 | hipCtx 67 | hipexamine 68 | hipified 69 | HIPify 70 | hipModule 71 | hipModuleLaunchKernel 72 | hipother 73 | HIPRTC 74 | icc 75 | IILE 76 | iGPU 77 | inlined 78 | inplace 79 | interop 80 | interoperation 81 | interoperate 82 | interoperation 83 | Interprocess 84 | interprocess 85 | Intrinsics 86 | intrinsics 87 | IPC 88 | IPs 89 | isa 90 | iteratively 91 | Lapack 92 | latencies 93 | libc 94 | libhipcxx 95 | libstdc 96 | lifecycle 97 | linearizing 98 | LOC 99 | LUID 100 | ltrace 101 | makefile 102 | Malloc 103 | malloc 104 | MALU 105 | maxregcount 106 | MiB 107 | memset 108 | multicore 109 | multigrid 110 | multithreading 111 | multitenant 112 | MALU 113 | NaN 114 | NCCL 115 | NDRange 116 | nonnegative 117 | NOP 118 | Numa 119 | Nsight 120 | ocp 121 | omnitrace 122 | overindex 123 | overindexing 124 | oversubscription 125 | overutilized 126 | parallelizable 127 | parallelized 128 | pixelated 129 | pragmas 130 | preallocated 131 | preconditioners 132 | predefining 133 | prefetched 134 | preprocessor 135 | printf 136 | profilers 137 | PTX 138 | PyHIP 139 | queryable 140 | prefetching 141 | quad 142 | representable 143 | RMW 144 | rocgdb 145 | ROCm's 146 | rocTX 147 | roundtrip 148 | rst 149 | RTC 150 | RTTI 151 | rvalue 152 | SAXPY 153 | scalarizing 154 | sceneries 155 | shaders 156 | SIMT 157 | sinewave 158 | SOMA 159 | SPMV 160 | structs 161 | struct's 162 | SYCL 163 | syntaxes 164 | texel 165 | texels 166 | threadIdx 167 | tradeoffs 168 | templated 169 | toolkits 170 | transfering 171 | typedefs 172 | unintuitive 173 | UMM 174 | unmap 175 | unmapped 176 | unmapping 177 | unregister 178 | upscaled 179 | variadic 180 | vulkan 181 | warpSize 182 | WinGDB 183 | zc 184 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @cpaquot_amdeng @gandryey_amdeng @skudchad_amdeng @lmoriche_amdeng 2 | 3 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008 - 2025 Advanced Micro Devices, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | #HIP_VERSION_MAJOR 2 | 7 3 | #HIP_VERSION_MINOR 4 | 0 5 | #HIP_VERSION_PATCH 6 | 0 7 | -------------------------------------------------------------------------------- /bin/hipcc_cmake_linker_helper: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | 22 | SOURCE="${BASH_SOURCE[0]}" 23 | HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )" 24 | HIP_COMPILER=$(eval "$HIP_PATH/bin/hipconfig --compiler") 25 | if [ "$HIP_COMPILER" = "hcc" ]; then 26 | HCC_HOME=$1 $HIP_PATH/bin/hipcc "${@:2}" 27 | elif [ "$HIP_COMPILER" = "clang" ]; then 28 | HIP_CLANG_PATH=$1 $HIP_PATH/bin/hipcc "${@:2}" 29 | else 30 | $HIP_PATH/bin/hipcc "${@:1}" 31 | fi -------------------------------------------------------------------------------- /bin/hipdemangleatp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | 22 | # usage: hipdemangleatp.sh ATP_FILE 23 | 24 | # HIP kernels 25 | kernels=$(grep grid_launch_parm $1 | cut -d" " -f1 | sort | uniq) 26 | for mangled_sym in $kernels; do 27 | real_sym=$(c++filt -p $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g' | sed 's/ /\\\ /g')) 28 | #echo "$mangled_sym => $real_sym" >> $1.log 29 | sed -i "s/$mangled_sym/$real_sym/g" $1 30 | done 31 | 32 | # HC kernels 33 | kernels=$(grep cxxamp_trampoline $1 | cut -d" " -f1 | sort | uniq) 34 | for mangled_sym in $kernels; do 35 | real_sym=$(echo $mangled_sym | sed "s/^/_/g; s/_EC_/$/g" | c++filt -p | cut -d\( -f1 | cut -d" " -f1 --complement | sed 's/ /\\\ /g') 36 | #echo "$mangled_sym => $real_sym" >> $1.log 37 | sed -i "s/$mangled_sym/$real_sym/g" $1 38 | done 39 | -------------------------------------------------------------------------------- /cmake/FindHIP/run_make2cmake.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE. 20 | 21 | ############################################################################### 22 | # Computes dependencies using HIPCC 23 | ############################################################################### 24 | 25 | ############################################################################### 26 | # This file converts dependency files generated using hipcc to a format that 27 | # cmake can understand. 28 | 29 | # Input variables: 30 | # 31 | # input_file:STRING=<> Dependency file to parse. Required argument 32 | # output_file:STRING=<> Output file to generate. Required argument 33 | 34 | if(NOT input_file OR NOT output_file) 35 | message(FATAL_ERROR "You must specify input_file and output_file on the command line") 36 | endif() 37 | 38 | file(READ ${input_file} depend_text) 39 | 40 | if (NOT "${depend_text}" STREQUAL "") 41 | string(REPLACE " /" "\n/" depend_text ${depend_text}) 42 | string(REGEX REPLACE "^.*:" "" depend_text ${depend_text}) 43 | string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) 44 | 45 | set(dependency_list "") 46 | 47 | foreach(file ${depend_text}) 48 | string(REGEX REPLACE "^ +" "" file ${file}) 49 | if(NOT EXISTS "${file}") 50 | message(WARNING " Removing non-existent dependency file: ${file}") 51 | set(file "") 52 | endif() 53 | 54 | if(NOT IS_DIRECTORY "${file}") 55 | get_filename_component(file_absolute "${file}" ABSOLUTE) 56 | list(APPEND dependency_list "${file_absolute}") 57 | endif() 58 | endforeach() 59 | endif() 60 | 61 | # Remove the duplicate entries and sort them. 62 | list(REMOVE_DUPLICATES dependency_list) 63 | list(SORT dependency_list) 64 | 65 | foreach(file ${dependency_list}) 66 | set(hip_hipcc_depend "${hip_hipcc_depend} \"${file}\"\n") 67 | endforeach() 68 | 69 | file(WRITE ${output_file} "# Generated by: FindHIP.cmake. Do not edit.\nSET(HIP_HIPCC_DEPEND\n ${hip_hipcc_depend})\n\n") 70 | # vim: ts=4:sw=4:expandtab:smartindent 71 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/configure -------------------------------------------------------------------------------- /docker/dockerfile-build-ubuntu-16.04: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE. 20 | 21 | # Parameters related to building hip 22 | ARG base_image 23 | 24 | FROM ${base_image} 25 | MAINTAINER Maneesh Gupta 26 | 27 | ARG user_uid 28 | 29 | # docker pipeline runs containers with particular uid 30 | # create a jenkins user with this specific uid so it can use sudo priviledges 31 | # Grant any member of sudo group password-less sudo privileges 32 | RUN useradd --create-home -u ${user_uid} -G sudo,video --shell /bin/bash jenkins && \ 33 | mkdir -p /etc/sudoers.d/ && \ 34 | echo '%sudo ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd 35 | -------------------------------------------------------------------------------- /docker/dockerfile-hip-ubuntu-16.04: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE. 20 | 21 | # Parameters related to building hip 22 | ARG base_image 23 | 24 | FROM ${base_image} 25 | MAINTAINER Kent Knox 26 | 27 | # Copy the debian package of hip into the container from host 28 | COPY *.deb /tmp/ 29 | 30 | # Install the debian package 31 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y curl \ 32 | && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --allow-unauthenticated -y \ 33 | /tmp/hip-devel-*.deb \ 34 | /tmp/hip-runtime-amd-*.deb \ 35 | /tmp/hip-doc-*.deb \ 36 | /tmp/hip-samples-* \ 37 | && rm -f /tmp/*.deb \ 38 | && apt-get clean \ 39 | && rm -rf /var/lib/apt/lists/* 40 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /_doxygen 3 | /_images 4 | /_static 5 | /_templates 6 | /doxygen/html 7 | /doxygen/xml 8 | /sphinx/_toc.yml 9 | __pycache__ 10 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | import re 8 | import sys 9 | from pathlib import Path 10 | from typing import Any, Dict, List 11 | 12 | from rocm_docs import ROCmDocs 13 | 14 | version_numbers = [] 15 | version_file = open("../VERSION", "r") 16 | lines = version_file.readlines() 17 | for line in lines: 18 | if line[0] == '#': 19 | continue 20 | version_numbers.append(line.strip()) 21 | version_number = ".".join(version_numbers) 22 | left_nav_title = f"HIP {version_number} Documentation" 23 | 24 | # for PDF output on Read the Docs 25 | project = "HIP Documentation" 26 | author = "Advanced Micro Devices, Inc." 27 | copyright = "Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved." 28 | version = version_number 29 | release = version_number 30 | 31 | external_toc_path = "./sphinx/_toc.yml" 32 | 33 | docs_core = ROCmDocs(left_nav_title) 34 | docs_core.run_doxygen(doxygen_root="doxygen", doxygen_path="doxygen/xml") 35 | docs_core.enable_api_reference() 36 | docs_core.setup() 37 | 38 | external_projects_current_project = "hip" 39 | 40 | for sphinx_var in ROCmDocs.SPHINX_VARS: 41 | globals()[sphinx_var] = getattr(docs_core, sphinx_var) 42 | 43 | # Add the _extensions directory to Python's search path 44 | sys.path.append(str(Path(__file__).parent / 'extension')) 45 | 46 | extensions += ["sphinxcontrib.doxylink", "custom_directive"] 47 | 48 | cpp_id_attributes = ["__global__", "__device__", "__host__", "__forceinline__", "static"] 49 | cpp_paren_attributes = ["__declspec"] 50 | 51 | suppress_warnings = ["etoc.toctree"] 52 | 53 | numfig = False 54 | 55 | exclude_patterns = [ 56 | "doxygen/mainpage.md", 57 | "understand/glossary.md", 58 | 'how-to/debugging_env.rst', 59 | "data/env_variables_hip.rst" 60 | ] -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/border.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/border.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/clamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/clamp.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/linear.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/mirror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/mirror.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/nearest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/nearest.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/original.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/memory_management/textures/wrap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/how-to/hip_runtime_api/memory_management/textures/wrap.png -------------------------------------------------------------------------------- /docs/data/how-to/hip_runtime_api/stream_management.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /docs/data/understand/hardware_implementation/cdna2_gcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/understand/hardware_implementation/cdna2_gcd.png -------------------------------------------------------------------------------- /docs/data/understand/hardware_implementation/cdna3_cu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/understand/hardware_implementation/cdna3_cu.png -------------------------------------------------------------------------------- /docs/data/understand/hardware_implementation/rdna3_cu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/understand/hardware_implementation/rdna3_cu.png -------------------------------------------------------------------------------- /docs/data/understand/programming_model/cdna2_gcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/understand/programming_model/cdna2_gcd.png -------------------------------------------------------------------------------- /docs/data/understand/programming_model/cdna3_cu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/understand/programming_model/cdna3_cu.png -------------------------------------------------------------------------------- /docs/data/understand/programming_model/rdna3_cu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/data/understand/programming_model/rdna3_cu.png -------------------------------------------------------------------------------- /docs/doxygen-input/mainpage.txt: -------------------------------------------------------------------------------- 1 | /** 2 | * @file mainpage.cpp 3 | * @brief : DoxyGen Main Page. 4 | * @mainpage Heterogeneous-computing Interface for Portability (HIP) 5 | * @tableofcontents 6 | * 7 | * The HIP interface makes it very easy to port existing CUDA apps to run on AMD GPUs, 8 | * or to develop new apps that can run on either CUDA or AMD GPUs from a common source base. 9 | * 10 | * - HIP is very thin and has little or no performance impact over coding directly in CUDA NVCC mode. 11 | * - HIP allows developers to use the "best" development environment and tools on each target platform. 12 | * - HIP allows coding in a single-source C++ programming language including features such as templates, C++11 lambdas,and more. 13 | * - HIPIFY tools automatically convert CUDA sources to HIP. 14 | * - Developers can specialize for CUDA or HIP to tune for performance or handle tricky cases with #ifdef. 15 | 16 | * - See the @ref API. 17 | 18 | 19 | */ 20 | -------------------------------------------------------------------------------- /docs/doxygen-input/sync.txt: -------------------------------------------------------------------------------- 1 | /** @page Synchonization 2 | * @tableofcontents 3 | 4 | * # Host-synchronous behavior: 5 | The following commands are "host-asynchronous" - meaning they do not wait for any preceding commands to complete, and may return control to the host thread before the requested operation completes: 6 | 7 | - Kernel launches (hipLaunchKernel() ) 8 | - Asynchronous memory copies - any memory copy API which contains "Async", such as hipMemcpyAsync()) 9 | - Any memory set (for example, hipMemset()); 10 | - TODO 11 | 12 | "Host-synchronous" commands have the following properties: 13 | - wait for all previous commands to complete. 14 | - will not return control back to host until the command completes. 15 | 16 | 17 | The following commands are "host-synchronous". 18 | 19 | - hipMemcpy waits for preceding work in the same stream to complete. 20 | 21 | 22 | * # Stream synchronization 23 | 24 | 25 | ### Blocking 26 | 27 | The term "blocking" has two meanings in HIP. 28 | 29 | The first refers to synchronization commands (ie hipStreamSynchronize, hipEventSynchronize) that cause the host CPU to wait for GPU activity to complete. 30 | These can either use an active where the host CPU spin-waits on the synchronization variable, or can use an interrupt-based scheme where the core is interrupted 31 | when the wait completes. The second technique is referred to as "blocking" (ie hipDeviceBlockingSync, hipEventBlockingSync) while the first is referred 32 | to as "active". Active can be appropriate for short tasks where latency is critical, but comes at the expense of a CPU core dedicated to monitoring the event. 33 | 34 | ### HIP_LAUNCH_BLOCKING (also can use CUDA_LAUNCH_BLOCKING) 35 | 36 | - The following commands become host-synchronous and will not return until the requested command has completed: 37 | 38 | - Kernel launches (hipKernelLaunch). 39 | - Memory set commands (hipMemset, hipMemsetAsync). 40 | - Memory copy commands (hipMemcpy, hipMemsetAsync). 41 | 42 | Note CUDA_LAUNCH_BLOCKING does add any pre-serialization to the commands and does not affect the concurrent stream behavior. For example, 43 | even when CUDA_LAUNCH_BLOCKING is set, kernels or data copy commands launched to separate streams can execute concurrently. Use the NULL 44 | stream if additional stream synchronization is desired. 45 | 46 | 47 | 48 | 49 | */ 50 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: RTD 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.10 7 | - pip 8 | - doxygen=1.9.8 9 | - pip: 10 | - -r ./sphinx/requirements.txt 11 | -------------------------------------------------------------------------------- /docs/extension/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/hip/b301ef228278172593156524f9e8aa3fde6e5229/docs/extension/__init__.py -------------------------------------------------------------------------------- /docs/extension/custom_directive.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from docutils.parsers.rst import Directive 4 | from docutils.statemachine import StringList 5 | 6 | class TableInclude(Directive): 7 | required_arguments = 1 8 | optional_arguments = 0 9 | final_argument_whitespace = True 10 | option_spec = { 11 | 'table': str 12 | } 13 | 14 | def run(self): 15 | # Get the file path from the first argument 16 | file_path = self.arguments[0] 17 | 18 | # Get the environment to resolve the full path 19 | env = self.state.document.settings.env 20 | src_dir = os.path.abspath(env.srcdir) 21 | full_file_path = os.path.join(src_dir, file_path) 22 | 23 | # Check if the file exists 24 | if not os.path.exists(full_file_path): 25 | raise self.error(f"RST file {full_file_path} does not exist.") 26 | 27 | # Read the entire file content 28 | with open(full_file_path, 'r', encoding='utf-8') as f: 29 | content = f.read() 30 | 31 | # Find all tables with named targets 32 | table_pattern = r'(?:^\.\.\ _(.+?):\n)(.. list-table::.*?(?:\n\s*\*\s*-.*?)+)(?=\n\n|\Z)' 33 | table_matches = list(re.finditer(table_pattern, content, re.MULTILINE | re.DOTALL)) 34 | 35 | # Get the specific table name from options 36 | table_name = self.options.get('table') 37 | 38 | # If no table specified, merge compatible tables 39 | if not table_name: 40 | raise self.error("The ':table:' option is required to specify which table to include.") 41 | 42 | # Find the specific table 43 | matching_tables = [ 44 | match for match in table_matches 45 | if match.group(1).strip() == table_name 46 | ] 47 | 48 | if not matching_tables: 49 | raise self.error(f"Table '{table_name}' not found in {full_file_path}") 50 | 51 | # Extract the matched table content 52 | table_content = matching_tables[0].group(2) 53 | 54 | # Insert the table content into the current document 55 | self.state_machine.insert_input(table_content.splitlines(), full_file_path) 56 | return [] 57 | 58 | def setup(app): 59 | app.add_directive('include-table', TableInclude) 60 | -------------------------------------------------------------------------------- /docs/gen_clang_option_doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2019-2021 Advanced Micro Devices, Inc. All Rights Reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | 22 | ## generates documentation about clang options. 23 | 24 | clang=/opt/rocm*/llvm/bin/clang 25 | 26 | exec > clang_options.md 27 | 28 | echo "# Support of Clang options" 29 | echo " Clang version: $($clang --version | head -1|sed 's:\(.*\) (.* \(.*\)).*:\1 \2:')" 30 | echo 31 | echo "|Option|Support|Description|" 32 | echo "|-------|------|-------|" 33 | 34 | declare -A db 35 | while read a b; do 36 | if [[ "$a" != "" && "$b" != "" ]]; then 37 | db[$a]="$b" 38 | #echo "db[$a]=${db[$a]}" 39 | fi 40 | done '* ]]; then 51 | opt=$(echo $a $b| sed -e 's:\(^-[^ ]*[= ]*<[^<>]*>\) *\(.*\):\1:') 52 | desc=$(echo $a $b| sed -e 's:\(^-[^ ]*[= ]*<[^<>]*>\) *\(.*\):\2:') 53 | if [[ "$opt" == "$desc" ]]; then 54 | opt="$a" 55 | desc="$b" 56 | fi 57 | else 58 | opt="$a" 59 | desc="$b" 60 | fi 61 | supp= 62 | key=$(printf "%s" "$opt" |sed 's:\([^ =<]*\).*:\1:') 63 | if [[ "$key" != "" ]]; then 64 | supp="${db[$key]}" 65 | #echo "opt=$opt supp=${db[$opt]}" 66 | fi 67 | if [[ "$supp" == "" ]]; then 68 | if [[ "$desc" = *AArch* ||\ 69 | "$desc" = *MIPS* || \ 70 | "$desc" = *ARM* || \ 71 | "$desc" = *Arm* || \ 72 | "$desc" = *SYCL* || \ 73 | "$desc" = *PPC* || \ 74 | "$desc" = *RISC-V* || \ 75 | "$desc" = *WebAssembly* || \ 76 | "$desc" = *Objective-C* || \ 77 | "$opt" = *xray* \ 78 | ]]; then 79 | supp="n" 80 | elif [[ "$opt" = *sanity* ]]; then 81 | supp="h" 82 | else 83 | supp="s" 84 | fi 85 | fi 86 | s=$supp 87 | case $supp in 88 | s) supp="Supported";; 89 | n) supp="Unsupported";; 90 | h) supp="Supported on Host only";; 91 | esac 92 | 93 | desc=$(echo "$desc"| sed -e 's:^ *::' -e 's:|:\\|:g') 94 | #echo a=$a 95 | #echo b=$b 96 | #echo opt=$opt 97 | #echo desc=$desc 98 | if [[ "$desc" != "" ]]; then 99 | printf "%s %s\n" "$key" "$s" >>$tmpf 100 | echo '|`'$opt'`|'$supp'|`'$desc'`|' 101 | fi 102 | done 103 | -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP runtime API usage 3 | :keywords: AMD, ROCm, HIP, CUDA, HIP runtime API How to, 4 | 5 | .. _hip_runtime_api_how-to: 6 | 7 | ******************************************************************************** 8 | Using HIP runtime API 9 | ******************************************************************************** 10 | 11 | The HIP runtime API provides C and C++ functionalities to manage event, stream, 12 | and memory on GPUs. On the AMD platform, the HIP runtime uses 13 | :doc:`Compute Language Runtime (CLR) <../understand/amd_clr>`, while on NVIDIA 14 | CUDA platform, it is only a thin layer over the CUDA runtime or Driver API. 15 | 16 | - **CLR** contains source code for AMD's compute language runtimes: ``HIP`` and 17 | ``OpenCL™``. CLR includes the ``HIP`` implementation on the AMD 18 | platform: `hipamd `_ and the 19 | ROCm Compute Language Runtime (``rocclr``). ``rocclr`` is a 20 | virtual device interface that enables the HIP runtime to interact with 21 | different backends such as :doc:`ROCr ` on Linux or PAL on 22 | Windows. CLR also includes the `OpenCL runtime `_ 23 | implementation. 24 | - The **CUDA runtime** is built on top of the CUDA driver API, which is a C API 25 | with lower-level access to NVIDIA GPUs. For details about the CUDA driver and 26 | runtime API with reference to HIP, see :doc:`CUDA driver API porting guide <../how-to/hip_porting_driver_api>`. 27 | 28 | The backends of HIP runtime API under AMD and NVIDIA platform are summarized in 29 | the following figure: 30 | 31 | .. figure:: ../data/how-to/hip_runtime_api/runtimes.svg 32 | 33 | .. note:: 34 | 35 | On NVIDIA platform HIP runtime API calls CUDA runtime or CUDA driver via 36 | hipother interface. For more information, see the `hipother repository `_. 37 | 38 | Here are the various HIP Runtime API high level functions: 39 | 40 | * :doc:`./hip_runtime_api/initialization` 41 | * :doc:`./hip_runtime_api/memory_management` 42 | * :doc:`./hip_runtime_api/error_handling` 43 | * :doc:`./hip_runtime_api/asynchronous` 44 | * :doc:`./hip_runtime_api/cooperative_groups` 45 | * :doc:`./hip_runtime_api/hipgraph` 46 | * :doc:`./hip_runtime_api/call_stack` 47 | * :doc:`./hip_runtime_api/multi_device` 48 | * :doc:`./hip_runtime_api/opengl_interop` 49 | * :doc:`./hip_runtime_api/external_interop` -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api/call_stack.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: This page describes call stack concept in HIP 3 | :keywords: AMD, ROCm, HIP, call stack 4 | 5 | ******************************************************************************* 6 | Call stack 7 | ******************************************************************************* 8 | 9 | The call stack is a data structure for managing function calls, by saving the 10 | state of the current function. Each time a function is called, a new call frame 11 | is added to the top of the stack, containing information such as local 12 | variables, return addresses and function parameters. When the function 13 | execution completes, the frame is removed from the stack and loaded back into 14 | the corresponding registers. This concept allows the program to return to the 15 | calling function and continue execution from where it left off. 16 | 17 | The call stack for each thread must track its function calls, local variables, 18 | and return addresses. However, in GPU programming, the memory required to store 19 | the call stack increases due to the parallelism inherent to the GPUs. NVIDIA 20 | and AMD GPUs use different approaches. NVIDIA GPUs have the independent thread 21 | scheduling feature where each thread has its own call stack and effective 22 | program counter. On AMD GPUs threads are grouped; each warp has its own call 23 | stack and program counter. Warps are described and explained in the 24 | :ref:`inherent_thread_hierarchy` 25 | 26 | If a thread or warp exceeds its stack size, a stack overflow occurs, causing 27 | kernel failure. This can be detected using debuggers. 28 | 29 | Call stack management with HIP 30 | =============================================================================== 31 | 32 | You can adjust the call stack size as shown in the following example, allowing 33 | fine-tuning based on specific kernel requirements. This helps prevent stack 34 | overflow errors by ensuring sufficient stack memory is allocated. 35 | 36 | .. code-block:: cpp 37 | 38 | #include 39 | #include 40 | 41 | #define HIP_CHECK(expression) \ 42 | { \ 43 | const hipError_t status = expression; \ 44 | if(status != hipSuccess){ \ 45 | std::cerr << "HIP error " \ 46 | << status << ": " \ 47 | << hipGetErrorString(status) \ 48 | << " at " << __FILE__ << ":" \ 49 | << __LINE__ << std::endl; \ 50 | } \ 51 | } 52 | 53 | int main() 54 | { 55 | size_t stackSize; 56 | HIP_CHECK(hipDeviceGetLimit(&stackSize, hipLimitStackSize)); 57 | std::cout << "Default stack size: " << stackSize << " bytes" << std::endl; 58 | 59 | // Set a new stack size 60 | size_t newStackSize = 1024 * 8; // 8 KiB 61 | HIP_CHECK(hipDeviceSetLimit(hipLimitStackSize, newStackSize)); 62 | 63 | HIP_CHECK(hipDeviceGetLimit(&stackSize, hipLimitStackSize)); 64 | std::cout << "Updated stack size: " << stackSize << " bytes" << std::endl; 65 | 66 | return 0; 67 | } 68 | 69 | Depending on the GPU model, at full occupancy, it can consume a significant 70 | amount of memory. For instance, an MI300X with 304 compute units (CU) and up to 71 | 2048 threads per CU could use 304 · 2048 · 1024 bytes = 608 MiB for the call 72 | stack by default. 73 | 74 | Handling recursion and deep function calls 75 | ------------------------------------------------------------------------------- 76 | 77 | Similar to CPU programming, recursive functions and deeply nested function 78 | calls are supported. However, developers must ensure that these functions do 79 | not exceed the available stack memory, considering the huge amount of memory 80 | needed for the call stack due to the GPUs inherent parallelism. This can be 81 | achieved by increasing stack size or optimizing code to reduce stack usage. To 82 | detect stack overflow add proper error handling or use debugging tools. 83 | 84 | .. code-block:: cpp 85 | 86 | #include 87 | #include 88 | 89 | #define HIP_CHECK(expression) \ 90 | { \ 91 | const hipError_t status = expression; \ 92 | if(status != hipSuccess){ \ 93 | std::cerr << "HIP error " \ 94 | << status << ": " \ 95 | << hipGetErrorString(status) \ 96 | << " at " << __FILE__ << ":" \ 97 | << __LINE__ << std::endl; \ 98 | } \ 99 | } 100 | 101 | __device__ unsigned long long fibonacci(unsigned long long n) 102 | { 103 | if (n == 0 || n == 1) 104 | { 105 | return n; 106 | } 107 | return fibonacci(n - 1) + fibonacci(n - 2); 108 | } 109 | 110 | __global__ void kernel(unsigned long long n) 111 | { 112 | unsigned long long result = fibonacci(n); 113 | const size_t x = threadIdx.x + blockDim.x * blockIdx.x; 114 | 115 | if (x == 0) 116 | printf("%llu! = %llu \n", n, result); 117 | } 118 | 119 | int main() 120 | { 121 | kernel<<<1, 1>>>(10); 122 | HIP_CHECK(hipDeviceSynchronize()); 123 | 124 | // With -O0 optimization option hit the stack limit 125 | // kernel<<<1, 256>>>(2048); 126 | // HIP_CHECK(hipDeviceSynchronize()); 127 | 128 | return 0; 129 | } 130 | -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api/error_handling.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Error Handling 3 | :keywords: AMD, ROCm, HIP, error handling, error 4 | 5 | .. _error_handling: 6 | 7 | ******************************************************************************** 8 | Error handling 9 | ******************************************************************************** 10 | 11 | HIP provides functionality to detect, report, and manage errors that occur 12 | during the execution of HIP runtime functions or when launching kernels. Every 13 | HIP runtime function, apart from launching kernels, has :cpp:type:`hipError_t` 14 | as return type. :cpp:func:`hipGetLastError` and :cpp:func:`hipPeekAtLastError` 15 | can be used for catching errors from kernel launches, as kernel launches don't 16 | return an error directly. HIP maintains an internal state, that includes the 17 | last error code. :cpp:func:`hipGetLastError` returns and resets that error to 18 | ``hipSuccess``, while :cpp:func:`hipPeekAtLastError` just returns the error 19 | without changing it. To get a human readable version of the errors, 20 | :cpp:func:`hipGetErrorString` and :cpp:func:`hipGetErrorName` can be used. 21 | 22 | .. note:: 23 | 24 | :cpp:func:`hipGetLastError` returns the returned error code of the last HIP 25 | runtime API call even if it's ``hipSuccess``, while ``cudaGetLastError`` 26 | returns the error returned by any of the preceding CUDA APIs in the same 27 | host thread. :cpp:func:`hipGetLastError` behavior will be matched with 28 | ``cudaGetLastError`` in ROCm release 7.0. 29 | 30 | Best practices of HIP error handling: 31 | 32 | 1. Check errors after each API call - Avoid error propagation. 33 | 2. Use macros for error checking - Check :ref:`hip_check_macros`. 34 | 3. Handle errors gracefully - Free resources and provide meaningful error 35 | messages to the user. 36 | 37 | For more details on the error handling functions, see :ref:`error handling 38 | functions reference page `. 39 | 40 | .. _hip_check_macros: 41 | 42 | HIP check macros 43 | ================================================================================ 44 | 45 | HIP uses check macros to simplify error checking and reduce code duplication. 46 | The ``HIP_CHECK`` macros are mainly used to detect and report errors. It can 47 | also exit from application with ``exit(1);`` function call after the error 48 | print. The ``HIP_CHECK`` macro example: 49 | 50 | .. code-block:: cpp 51 | 52 | #define HIP_CHECK(expression) \ 53 | { \ 54 | const hipError_t status = expression; \ 55 | if(status != hipSuccess){ \ 56 | std::cerr << "HIP error " \ 57 | << status << ": " \ 58 | << hipGetErrorString(status) \ 59 | << " at " << __FILE__ << ":" \ 60 | << __LINE__ << std::endl; \ 61 | } \ 62 | } 63 | 64 | Complete example 65 | ================================================================================ 66 | 67 | A complete example to demonstrate the error handling with a simple addition of 68 | two values kernel: 69 | 70 | .. code-block:: cpp 71 | 72 | #include 73 | #include 74 | #include 75 | 76 | #define HIP_CHECK(expression) \ 77 | { \ 78 | const hipError_t status = expression; \ 79 | if(status != hipSuccess){ \ 80 | std::cerr << "HIP error " \ 81 | << status << ": " \ 82 | << hipGetErrorString(status) \ 83 | << " at " << __FILE__ << ":" \ 84 | << __LINE__ << std::endl; \ 85 | } \ 86 | } 87 | 88 | // Addition of two values. 89 | __global__ void add(int *a, int *b, int *c, size_t size) { 90 | const size_t index = threadIdx.x + blockDim.x * blockIdx.x; 91 | if(index < size) { 92 | c[index] += a[index] + b[index]; 93 | } 94 | } 95 | 96 | int main() { 97 | constexpr int numOfBlocks = 256; 98 | constexpr int threadsPerBlock = 256; 99 | constexpr size_t arraySize = 1U << 16; 100 | 101 | std::vector a(arraySize), b(arraySize), c(arraySize); 102 | int *d_a, *d_b, *d_c; 103 | 104 | // Setup input values. 105 | std::fill(a.begin(), a.end(), 1); 106 | std::fill(b.begin(), b.end(), 2); 107 | 108 | // Allocate device copies of a, b and c. 109 | HIP_CHECK(hipMalloc(&d_a, arraySize * sizeof(*d_a))); 110 | HIP_CHECK(hipMalloc(&d_b, arraySize * sizeof(*d_b))); 111 | HIP_CHECK(hipMalloc(&d_c, arraySize * sizeof(*d_c))); 112 | 113 | // Copy input values to device. 114 | HIP_CHECK(hipMemcpy(d_a, &a, arraySize * sizeof(*d_a), hipMemcpyHostToDevice)); 115 | HIP_CHECK(hipMemcpy(d_b, &b, arraySize * sizeof(*d_b), hipMemcpyHostToDevice)); 116 | 117 | // Launch add() kernel on GPU. 118 | hipLaunchKernelGGL(add, dim3(numOfBlocks), dim3(threadsPerBlock), 0, 0, d_a, d_b, d_c, arraySize); 119 | // Check the kernel launch 120 | HIP_CHECK(hipGetLastError()); 121 | // Check for kernel execution error 122 | HIP_CHECK(hipDeviceSynchronize()); 123 | 124 | // Copy the result back to the host. 125 | HIP_CHECK(hipMemcpy(&c, d_c, arraySize * sizeof(*d_c), hipMemcpyDeviceToHost)); 126 | 127 | // Cleanup allocated memory. 128 | HIP_CHECK(hipFree(d_a)); 129 | HIP_CHECK(hipFree(d_b)); 130 | HIP_CHECK(hipFree(d_c)); 131 | 132 | // Print the result. 133 | std::cout << a[0] << " + " << b[0] << " = " << c[0] << std::endl; 134 | 135 | return 0; 136 | } 137 | -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api/external_interop.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP provides an external resource interoperability API that 3 | allows efficient data sharing between HIP's computing power and 4 | OpenGL's graphics rendering. 5 | :keywords: AMD, ROCm, HIP, external, interop, interoperability 6 | 7 | ******************************************************************************* 8 | External resource interoperability 9 | ******************************************************************************* 10 | 11 | This feature allows HIP to work with resources -- like memory and semaphores -- 12 | created by other APIs. This means resources can be used from APIs like CUDA, 13 | OpenCL and Vulkan within HIP, making it easier to integrate HIP into existing 14 | projects. 15 | 16 | To use external resources in HIP, you typically follow these steps: 17 | 18 | - Import resources from other APIs using HIP provided functions 19 | - Use external resources as if they were created in HIP 20 | - Destroy the HIP resource object to clean up 21 | 22 | Semaphore Functions 23 | =============================================================================== 24 | 25 | Semaphore functions are essential for synchronization in parallel computing. 26 | These functions facilitate communication and coordination between different 27 | parts of a program or between different programs. By managing semaphores, tasks 28 | are executed in the correct order, and resources are utilized effectively. 29 | Semaphore functions ensure smooth operation, preventing conflicts and 30 | maintaining the integrity of processes; upholding the integrity and performance 31 | of concurrent processes. 32 | 33 | External semaphore functions can be used in HIP as described in :ref:`external_resource_interoperability_reference`. 34 | 35 | Memory Functions 36 | =============================================================================== 37 | 38 | HIP external memory functions focus on the efficient sharing and management of 39 | memory resources. These functions enable importing memory created by external 40 | systems, enabling the HIP program to use this memory seamlessly. Memory 41 | functions include mapping memory for effective use and ensuring proper cleanup 42 | to prevent resource leaks. This is critical for performance, particularly in 43 | applications handling large datasets or complex structures such as textures in 44 | graphics. Proper memory management ensures stability and efficient resource 45 | utilization. 46 | 47 | Example 48 | =============================================================================== 49 | 50 | ROCm examples include a 51 | `HIP--Vulkan interoperation example `_ 52 | demonstrates how to perform interoperation between HIP and Vulkan. 53 | 54 | In this example, a simple HIP kernel is used to compute a sine wave, which is 55 | then rendered to a window as a graphical output using Vulkan. The process 56 | requires several initialization steps, such as setting up a HIP context, 57 | creating a Vulkan instance, and configuring the GPU device and queue. After 58 | these initial steps, the kernel executes the sine wave computation, and Vulkan 59 | continuously updates the window framebuffer to display the computed data until 60 | the window is closed. 61 | 62 | The following code converts a Vulkan memory handle to its equivalent HIP 63 | handle. The input ``VkDeviceMemory`` and the created HIP memory represents the 64 | same physical area of GPU memory, through the handles of each respective API. 65 | Writing to the buffer in one API will allow us to read the results through the 66 | other. Note that access to the buffer should be synchronized between the APIs, 67 | for example using queue syncs or semaphores. 68 | 69 | .. 70 | 71 | .. literalinclude:: ../../tools/example_codes/external_interop.hip 72 | :start-after: // [Sphinx vulkan memory to hip start] 73 | :end-before: // [Sphinx vulkan memory to hip end] 74 | :language: cpp 75 | 76 | .. 77 | 78 | The Vulkan semaphore is converted to HIP semaphore shown in the following 79 | example. Signaling on the semaphore in one API will allow the other API to wait 80 | on it, which is how we can guarantee synchronized access to resources in a 81 | cross-API manner. 82 | 83 | .. 84 | 85 | .. literalinclude:: ../../tools/example_codes/external_interop.hip 86 | :start-after: // [Sphinx semaphore import start] 87 | :end-before: // [Sphinx semaphore import end] 88 | :language: cpp 89 | 90 | .. 91 | 92 | When the HIP external memory is exported from Vulkan and imported to HIP, it is 93 | not yet ready for use. The Vulkan handle is shared, allowing for memory sharing 94 | rather than copying during the export process. To actually use the memory, we 95 | need to map it to a pointer so that we may pass it to the kernel so that it can 96 | be read from and written to. The external memory map to HIP in the following 97 | example: 98 | 99 | .. 100 | 101 | .. literalinclude:: ../../tools/example_codes/external_interop.hip 102 | :start-after: // [Sphinx map external memory start] 103 | :end-before: // [Sphinx map external memory end] 104 | :language: cpp 105 | 106 | .. 107 | 108 | Wait for buffer is ready and not under modification at Vulkan side: 109 | 110 | .. 111 | 112 | .. literalinclude:: ../../tools/example_codes/external_interop.hip 113 | :start-after: // [Sphinx wait semaphore start] 114 | :end-before: // [Sphinx wait semaphore end] 115 | :language: cpp 116 | 117 | .. 118 | 119 | The sinewave kernel implementation: 120 | 121 | .. 122 | 123 | .. literalinclude:: ../../tools/example_codes/external_interop.hip 124 | :start-after: [Sphinx sinewave kernel start] 125 | :end-before: // [Sphinx sinewave kernel end] 126 | :language: cpp 127 | 128 | .. 129 | 130 | Signal to Vulkan that we are done with the buffer and that it can proceed with 131 | rendering: 132 | 133 | .. 134 | 135 | .. literalinclude:: ../../tools/example_codes/external_interop.hip 136 | :start-after: // [Sphinx signal semaphore start] 137 | :end-before: // [Sphinx signal semaphore end] 138 | :language: cpp 139 | 140 | .. -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api/initialization.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Initialization. 3 | :keywords: AMD, ROCm, HIP, initialization 4 | 5 | .. _initialization: 6 | 7 | ******************************************************************************** 8 | Initialization 9 | ******************************************************************************** 10 | 11 | The initialization involves setting up the environment and resources needed for 12 | using GPUs. The following steps are covered with the initialization: 13 | 14 | - Setting up the HIP runtime 15 | 16 | This includes reading the environment variables set during init, setting up 17 | the active or visible devices, loading necessary libraries, setting up 18 | internal buffers for memory copies or cooperative launches, initialize the 19 | compiler as well as HSA runtime and checks any errors due to lack of resources 20 | or no active devices. 21 | 22 | - Querying and setting GPUs 23 | 24 | Identifying and querying the available GPU devices on the system. 25 | 26 | - Setting up contexts 27 | 28 | Creating contexts for each GPU device, which are essential for managing 29 | resources and executing kernels. For further details, check the :ref:`context 30 | section `. 31 | 32 | Initialize the HIP runtime 33 | ================================================================================ 34 | 35 | The HIP runtime is initialized automatically when the first HIP API call is 36 | made. However, you can explicitly initialize it using :cpp:func:`hipInit`, 37 | to be able to control the timing of the initialization. The manual 38 | initialization can be useful to ensure that the GPU is initialized and 39 | ready, or to isolate GPU initialization time from other parts of 40 | your program. 41 | 42 | .. note:: 43 | 44 | You can use :cpp:func:`hipDeviceReset` to delete all streams created, memory 45 | allocated, kernels running and events created by the current process. Any new 46 | HIP API call initializes the HIP runtime again. 47 | 48 | Querying and setting GPUs 49 | ================================================================================ 50 | 51 | If multiple GPUs are available in the system, you can query and select the 52 | desired GPU(s) to use based on device properties, such as size of global memory, 53 | size shared memory per block, support of cooperative launch and support of 54 | managed memory. 55 | 56 | Querying GPUs 57 | -------------------------------------------------------------------------------- 58 | 59 | The properties of a GPU can be queried using :cpp:func:`hipGetDeviceProperties`, 60 | which returns a struct of :cpp:struct:`hipDeviceProp_t`. The properties in the 61 | struct can be used to identify a device or give an overview of hardware 62 | characteristics, that might make one GPU better suited for the task than others. 63 | 64 | The :cpp:func:`hipGetDeviceCount` function returns the number of available GPUs, 65 | which can be used to loop over the available GPUs. 66 | 67 | Example code of querying GPUs: 68 | 69 | .. code-block:: cpp 70 | 71 | #include 72 | #include 73 | 74 | int main() { 75 | 76 | int deviceCount; 77 | if (hipGetDeviceCount(&deviceCount) == hipSuccess){ 78 | for (int i = 0; i < deviceCount; ++i){ 79 | hipDeviceProp_t prop; 80 | if ( hipGetDeviceProperties(&prop, i) == hipSuccess) 81 | std::cout << "Device" << i << prop.name << std::endl; 82 | } 83 | } 84 | 85 | return 0; 86 | } 87 | 88 | Setting the GPU 89 | -------------------------------------------------------------------------------- 90 | 91 | :cpp:func:`hipSetDevice` function select the GPU to be used for subsequent HIP 92 | operations. This function performs several key tasks: 93 | 94 | - Context Binding 95 | 96 | Binds the current thread to the context of the specified GPU device. This 97 | ensures that all subsequent operations are executed on the selected device. 98 | 99 | - Resource Allocation 100 | 101 | Prepares the device for resource allocation, such as memory allocation and 102 | stream creation. 103 | 104 | - Check device availability 105 | 106 | Checks for errors in device selection and returns error if the specified 107 | device is not available or not capable of executing HIP operations. 108 | -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api/memory_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Memory management and its usage 3 | :keywords: AMD, ROCm, HIP, CUDA, memory management 4 | 5 | .. _memory_management: 6 | 7 | ******************************************************************************** 8 | Memory management 9 | ******************************************************************************** 10 | 11 | Memory management is an important part of the HIP runtime API, when creating 12 | high-performance applications. Both allocating and copying memory can result in 13 | bottlenecks, which can significantly impact performance. 14 | 15 | The programming model is based on a system with a host and a device, each having 16 | its own distinct memory. Kernels operate on :ref:`device_memory`, while host functions 17 | operate on :ref:`host_memory`. 18 | 19 | The runtime offers functions for allocating, freeing, and copying device memory, 20 | along with transferring data between host and device memory. 21 | 22 | Here are the various memory management techniques: 23 | 24 | * :ref:`coherence_control` 25 | * :ref:`unified_memory` 26 | * :ref:`virtual_memory` 27 | * :ref:`stream_ordered_memory_allocator_how-to` 28 | 29 | Memory allocation 30 | ================================================================================ 31 | 32 | The API calls and the resulting allocations are listed here: 33 | 34 | .. list-table:: Memory coherence control 35 | :header-rows: 1 36 | :align: center 37 | 38 | * - API 39 | - Data location 40 | - Allocation 41 | * - System allocated 42 | - Host 43 | - :ref:`Pageable ` 44 | * - :cpp:func:`hipMallocManaged` 45 | - Host 46 | - :ref:`Managed ` 47 | * - :cpp:func:`hipHostMalloc` 48 | - Host 49 | - :ref:`Pinned ` 50 | * - :cpp:func:`hipMalloc` 51 | - Device 52 | - Pinned 53 | -------------------------------------------------------------------------------- /docs/how-to/hip_runtime_api/opengl_interop.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP provides an OpenGL interoperability API that allows 3 | efficient data sharing between HIP's computing power and 4 | OpenGL's graphics rendering. 5 | :keywords: AMD, ROCm, HIP, OpenGL, interop, interoperability 6 | 7 | ******************************************************************************* 8 | OpenGL interoperability 9 | ******************************************************************************* 10 | 11 | The HIP--OpenGL interoperation involves mapping OpenGL resources, such as 12 | buffers and textures, for HIP to interact with OpenGL. This mapping process 13 | enables HIP to utilize these resources directly, bypassing the need for costly 14 | data transfers between the CPU and GPU. This capability is useful in 15 | applications that require both intensive GPU computation and real-time 16 | visualization. 17 | 18 | The graphics resources must be registered using functions like 19 | :cpp:func:`hipGraphicsGLRegisterBuffer` or :cpp:func:`hipGraphicsGLRegisterImage` 20 | then they can be mapped to HIP with :cpp:func:`hipGraphicsMapResources` 21 | function. 22 | 23 | After mapping, the :cpp:func:`hipGraphicsResourceGetMappedPointer` or 24 | :cpp:func:`hipGraphicsSubResourceGetMappedArray` functions used to retrieve a 25 | device pointer to the mapped resource, which can then be used in HIP kernels. 26 | 27 | Unmapping resources with :cpp:func:`hipGraphicsUnmapResources` after 28 | computations ensure proper resource management. 29 | 30 | Example 31 | =============================================================================== 32 | 33 | ROCm examples have a `HIP--OpenGL interoperation example `_, 34 | where a simple HIP kernel is used to simulate a sine wave and rendered to a 35 | window as a grid of triangles using OpenGL. For a working example, there are 36 | multiple initialization steps needed like creating and opening a window, 37 | initializing OpenGL or selecting the OpenGL-capable device. After the 38 | initialization in the example, the kernel simulates the sinewave and updates 39 | the window's framebuffer in a cycle until the window is closed. 40 | 41 | .. note:: 42 | 43 | The more recent OpenGL functions are loaded with `OpenGL loader `_, 44 | as these are not loaded by default on all platforms. The use of a custom 45 | loader is shown in the following example 46 | 47 | .. 48 | 49 | .. literalinclude:: ../../tools/example_codes/opengl_interop.hip 50 | :start-after: // [Sphinx opengl functions load start] 51 | :end-before: // [Sphinx opengl functions load end] 52 | :language: cpp 53 | 54 | .. 55 | 56 | The OpenGL buffer is imported to HIP in the following way: 57 | 58 | .. 59 | 60 | .. literalinclude:: ../../tools/example_codes/opengl_interop.hip 61 | :start-after: // [Sphinx buffer register and get start] 62 | :end-before: // [Sphinx buffer register and get end] 63 | :language: cpp 64 | 65 | .. 66 | 67 | The imported pointer is manipulated in the sinewave kernel as shown in the 68 | following example: 69 | 70 | .. 71 | 72 | .. literalinclude:: ../../tools/example_codes/opengl_interop.hip 73 | :start-after: /// [Sphinx sinewave kernel start] 74 | :end-before: /// [Sphinx sinewave kernel end] 75 | :language: cpp 76 | 77 | .. literalinclude:: ../../tools/example_codes/opengl_interop.hip 78 | :start-after: // [Sphinx buffer use in kernel start] 79 | :end-before: // [Sphinx buffer use in kernel end] 80 | :language: cpp 81 | 82 | .. 83 | 84 | The HIP graphics resource that is imported from the OpenGL buffer and is not 85 | needed anymore should be unmapped and unregistered as shown in the following way: 86 | 87 | .. 88 | 89 | .. literalinclude:: ../../tools/example_codes/opengl_interop.hip 90 | :start-after: // [Sphinx unregister start] 91 | :end-before: // [Sphinx unregister end] 92 | :language: cpp 93 | 94 | .. 95 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | # HIP documentation 8 | 9 | The Heterogeneous-computing Interface for Portability (HIP) is a C++ runtime API 10 | and kernel language that lets you create portable applications for AMD and 11 | NVIDIA GPUs from a single source code. For more information, see [What is HIP?](./what_is_hip) 12 | 13 | Installation instructions are available from: 14 | 15 | * [Installing HIP](./install/install) 16 | * [Building HIP from source](./install/build) 17 | 18 | The HIP documentation is organized into the following categories: 19 | 20 | ::::{grid} 1 2 2 2 21 | :gutter: 3 22 | 23 | :::{grid-item-card} Programming guide 24 | 25 | * [Introduction](./programming_guide) 26 | * {doc}`./understand/programming_model` 27 | * {doc}`./understand/hardware_implementation` 28 | * {doc}`./understand/compilers` 29 | * {doc}`./how-to/performance_guidelines` 30 | * [Debugging with HIP](./how-to/debugging) 31 | * {doc}`./how-to/logging` 32 | * {doc}`./how-to/hip_runtime_api` 33 | * {doc}`./how-to/hip_cpp_language_extensions` 34 | * {doc}`./how-to/kernel_language_cpp_support` 35 | * [HIP porting guide](./how-to/hip_porting_guide) 36 | * [HIP porting: driver API guide](./how-to/hip_porting_driver_api) 37 | * {doc}`./how-to/hip_rtc` 38 | * {doc}`./understand/amd_clr` 39 | 40 | ::: 41 | 42 | :::{grid-item-card} Reference 43 | 44 | * [HIP runtime API](./reference/hip_runtime_api_reference) 45 | * [HSA runtime API for ROCm](./reference/virtual_rocr) 46 | * [HIP math API](./reference/math_api) 47 | * [HIP environment variables](./reference/env_variables) 48 | * [CUDA to HIP API Function Comparison](./reference/api_syntax) 49 | * [List of deprecated APIs](./reference/deprecated_api_list) 50 | * [FP8 numbers in HIP](./reference/fp8_numbers) 51 | * {doc}`./reference/hardware_features` 52 | 53 | ::: 54 | 55 | :::{grid-item-card} Tutorial 56 | 57 | * [HIP basic examples](https://github.com/ROCm/rocm-examples/tree/develop/HIP-Basic) 58 | * [HIP examples](https://github.com/ROCm/rocm-examples) 59 | * [SAXPY tutorial](./tutorial/saxpy) 60 | * [Reduction tutorial](./tutorial/reduction) 61 | * [Cooperative groups tutorial](./tutorial/cooperative_groups_tutorial) 62 | 63 | ::: 64 | 65 | :::: 66 | 67 | Known issues are listed on the [HIP GitHub repository](https://github.com/ROCm/HIP/issues). 68 | 69 | To contribute features or functions to the HIP project, refer to [Contributing to HIP](https://github.com/ROCm/HIP/blob/develop/CONTRIBUTING.md). 70 | To contribute to the documentation, refer to {doc}`Contributing to ROCm docs ` page. 71 | 72 | You can find licensing information on the [Licensing](https://rocm.docs.amd.com/en/latest/about/license.html) page. 73 | -------------------------------------------------------------------------------- /docs/install/install.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: This page explains how to install HIP 3 | :keywords: AMD, ROCm, HIP, install, installation 4 | 5 | ******************************************* 6 | Install HIP 7 | ******************************************* 8 | 9 | HIP can be installed on AMD (ROCm with HIP-Clang) and NVIDIA (CUDA with NVCC) platforms. 10 | 11 | .. note:: 12 | 13 | The version definition for the HIP runtime is different from CUDA. On AMD 14 | platforms, the :cpp:func:`hipRuntimeGetVersion` function returns the HIP 15 | runtime version. On NVIDIA platforms, this function returns the CUDA runtime 16 | version. 17 | 18 | .. _install_prerequisites: 19 | 20 | Prerequisites 21 | ======================================= 22 | 23 | .. tab-set:: 24 | 25 | .. tab-item:: AMD 26 | :sync: amd 27 | 28 | Refer to the Prerequisites section in the ROCm install guides: 29 | 30 | * :doc:`rocm-install-on-linux:reference/system-requirements` 31 | * :doc:`rocm-install-on-windows:reference/system-requirements` 32 | 33 | .. tab-item:: NVIDIA 34 | :sync: nvidia 35 | 36 | With NVIDIA GPUs, HIP requires unified memory. All CUDA-enabled NVIDIA 37 | GPUs with compute capability 5.0 or later should be supported. For more 38 | information, see `NVIDIA's list of CUDA enabled GPUs `_. 39 | 40 | Installation 41 | ======================================= 42 | 43 | .. tab-set:: 44 | 45 | .. tab-item:: AMD 46 | :sync: amd 47 | 48 | HIP is automatically installed during the ROCm installation. If you haven't yet installed ROCm, you 49 | can find installation instructions here: 50 | 51 | * :doc:`rocm-install-on-linux:index` 52 | * :doc:`rocm-install-on-windows:index` 53 | 54 | By default, HIP is installed into ``/opt/rocm``. 55 | 56 | .. note:: 57 | There is no autodetection for the HIP installation. If you choose to install it somewhere other than the default location, you must set the ``HIP_PATH`` environment variable as explained in `Build HIP from source <./build.html>`_. 58 | 59 | .. tab-item:: NVIDIA 60 | :sync: nvidia 61 | 62 | #. Install the NVIDIA toolkit. 63 | 64 | The latest release can be found here: 65 | `CUDA Toolkit `_. 66 | 67 | #. Setup the radeon repo. 68 | 69 | .. code-block::shell 70 | 71 | # Replace url with appropriate link in the table below 72 | wget https://repo.radeon.com/amdgpu-install/6.2/distro/version_name/amdgpu-install_6.2.60200-1_all.deb 73 | sudo apt install ./amdgpu-install_6.2.60200-1_all.deb 74 | sudo apt update 75 | 76 | .. list-table:: amdgpu-install links 77 | :widths: 25 100 78 | :header-rows: 1 79 | 80 | * - Ubuntu version 81 | - URL 82 | * - 24.04 83 | - https://repo.radeon.com/amdgpu-install/6.2.4/ubuntu/noble/amdgpu-install_6.2.60204-1_all.deb 84 | * - 22.04 85 | - https://repo.radeon.com/amdgpu-install/6.2.4/ubuntu/jammy/amdgpu-install_6.2.60204-1_all.deb 86 | 87 | #. Install the ``hip-runtime-nvidia`` and ``hip-dev`` packages. This installs the CUDA SDK and HIP 88 | porting layer. 89 | 90 | .. code-block:: shell 91 | 92 | apt-get install hip-runtime-nvidia hip-dev 93 | 94 | The default paths are: 95 | * CUDA SDK: ``/usr/local/cuda`` 96 | * HIP: ``/opt/rocm`` 97 | 98 | #. Set the HIP_PLATFORM to nvidia. 99 | 100 | .. code-block:: shell 101 | 102 | export HIP_PLATFORM="nvidia" 103 | 104 | Verify your installation 105 | ========================================================== 106 | 107 | Run ``hipconfig`` in your installation path. 108 | 109 | .. code-block:: shell 110 | 111 | /opt/rocm/bin/hipconfig --full 112 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ```{include} ../LICENSE.txt 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/programming_guide.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP programming guide introduction 3 | :keywords: HIP programming guide introduction, HIP programming guide 4 | 5 | .. _hip-programming-guide: 6 | 7 | ******************************************************************************** 8 | HIP programming guide introduction 9 | ******************************************************************************** 10 | 11 | This topic provides key HIP programming concepts and links to more detailed 12 | information. 13 | 14 | Write GPU Kernels for Parallel Execution 15 | ================================================================================ 16 | 17 | To make the most of the parallelism inherent to GPUs, a thorough understanding 18 | of the :ref:`programming model ` is helpful. The HIP 19 | programming model is designed to make it easy to map data-parallel algorithms to 20 | architecture of the GPUs. HIP employs the SIMT-model (Single Instruction 21 | Multiple Threads) with a multi-layered thread hierarchy for efficient execution. 22 | 23 | Understand the Target Architecture (CPU and GPU) 24 | ================================================================================ 25 | 26 | The :ref:`hardware implementation ` topic outlines the 27 | GPUs supported by HIP. In general, GPUs are made up of Compute Units that excel 28 | at executing parallelizable, computationally intensive workloads without complex 29 | control-flow. 30 | 31 | Increase parallelism on multiple level 32 | ================================================================================ 33 | 34 | To maximize performance and keep all system components fully utilized, the 35 | application should expose and efficiently manage as much parallelism as possible. 36 | :ref:`Parallel execution ` can be achieved at the 37 | application, device, and multiprocessor levels. 38 | 39 | The application’s host and device operations can achieve parallel execution 40 | through asynchronous calls, streams, or HIP graphs. On the device level, 41 | multiple kernels can execute concurrently when resources are available, and at 42 | the multiprocessor level, developers can overlap data transfers with 43 | computations to further optimize performance. 44 | 45 | Memory management 46 | ================================================================================ 47 | 48 | GPUs generally have their own distinct memory, also called :ref:`device 49 | memory `, separate from the :ref:`host memory `. 50 | Device memory needs to be managed separately from the host memory. This includes 51 | allocating the memory and transfering it between the host and the device. These 52 | operations can be performance critical, so it's important to know how to use 53 | them effectively. For more information, see :ref:`Memory management `. 54 | 55 | Synchronize CPU and GPU Workloads 56 | ================================================================================ 57 | 58 | Tasks on the host and devices run asynchronously, so proper synchronization is 59 | needed when dependencies between those tasks exist. The asynchronous execution 60 | of tasks is useful for fully utilizing the available resources. Even when only a 61 | single device is available, memory transfers and the execution of tasks can be 62 | overlapped with asynchronous execution. 63 | 64 | Error Handling 65 | ================================================================================ 66 | 67 | All functions in the HIP runtime API return an error value of type 68 | :cpp:enum:`hipError_t` that can be used to verify whether the function was 69 | successfully executed. It's important to confirm these returned values, in order 70 | to catch and handle those errors, if possible. An exception is kernel launches, 71 | which don't return any value. These errors can be caught with specific functions 72 | like :cpp:func:`hipGetLastError()`. 73 | 74 | For more information, see :ref:`error_handling` . 75 | 76 | Multi-GPU and Load Balancing 77 | ================================================================================ 78 | 79 | Large-scale applications that need more compute power can use multiple GPUs in 80 | the system. This requires distributing workloads across multiple GPUs to balance 81 | the load to prevent GPUs from being overutilized while others are idle. 82 | 83 | For more information, see :ref:`multi-device` . -------------------------------------------------------------------------------- /docs/reference/api_syntax.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Maps CUDA API syntax to HIP API syntax with an example 3 | :keywords: AMD, ROCm, HIP, CUDA, syntax, HIP syntax 4 | 5 | ******************************************************************************** 6 | CUDA to HIP API Function Comparison 7 | ******************************************************************************** 8 | 9 | This page introduces key syntax differences between CUDA and HIP APIs with a focused code 10 | example and comparison table. For a complete list of mappings, visit :ref:`HIPIFY `. 11 | 12 | The following CUDA code example illustrates several CUDA API syntaxes. 13 | 14 | .. code-block:: cpp 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | __global__ void block_reduction(const float* input, float* output, int num_elements) 21 | { 22 | extern __shared__ float s_data[]; 23 | 24 | int tid = threadIdx.x; 25 | int global_id = blockDim.x * blockIdx.x + tid; 26 | 27 | if (global_id < num_elements) 28 | { 29 | s_data[tid] = input[global_id]; 30 | } 31 | else 32 | { 33 | s_data[tid] = 0.0f; 34 | } 35 | __syncthreads(); 36 | 37 | for (int stride = blockDim.x / 2; stride > 0; stride >>= 1) 38 | { 39 | if (tid < stride) 40 | { 41 | s_data[tid] += s_data[tid + stride]; 42 | } 43 | __syncthreads(); 44 | } 45 | 46 | if (tid == 0) 47 | { 48 | output[blockIdx.x] = s_data[0]; 49 | } 50 | } 51 | 52 | int main() 53 | { 54 | int threads = 256; 55 | const int num_elements = 50000; 56 | 57 | std::vector h_a(num_elements); 58 | std::vector h_b((num_elements + threads - 1) / threads); 59 | 60 | for (int i = 0; i < num_elements; ++i) 61 | { 62 | h_a[i] = rand() / static_cast(RAND_MAX); 63 | } 64 | 65 | float *d_a, *d_b; 66 | cudaMalloc(&d_a, h_a.size() * sizeof(float)); 67 | cudaMalloc(&d_b, h_b.size() * sizeof(float)); 68 | 69 | cudaStream_t stream; 70 | cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); 71 | 72 | cudaEvent_t start_event, stop_event; 73 | cudaEventCreate(&start_event); 74 | cudaEventCreate(&stop_event); 75 | 76 | cudaMemcpyAsync(d_a, h_a.data(), h_a.size() * sizeof(float), cudaMemcpyHostToDevice, stream); 77 | 78 | cudaEventRecord(start_event, stream); 79 | 80 | int blocks = (num_elements + threads - 1) / threads; 81 | block_reduction<<>>(d_a, d_b, num_elements); 82 | 83 | cudaMemcpyAsync(h_b.data(), d_b, h_b.size() * sizeof(float), cudaMemcpyDeviceToHost, stream); 84 | 85 | cudaEventRecord(stop_event, stream); 86 | cudaEventSynchronize(stop_event); 87 | 88 | cudaEventElapsedTime(&milliseconds, start_event, stop_event); 89 | std::cout << "Kernel execution time: " << milliseconds << " ms\n"; 90 | 91 | cudaFree(d_a); 92 | cudaFree(d_b); 93 | 94 | cudaEventDestroy(start_event); 95 | cudaEventDestroy(stop_event); 96 | cudaStreamDestroy(stream); 97 | 98 | return 0; 99 | } 100 | 101 | The following table maps CUDA API functions to corresponding HIP API functions, as demonstrated in the 102 | preceding code examples. 103 | 104 | .. list-table:: 105 | :header-rows: 1 106 | :name: syntax-mapping-table 107 | 108 | * 109 | - CUDA 110 | - HIP 111 | 112 | * 113 | - ``#include `` 114 | - ``#include `` 115 | 116 | * 117 | - ``cudaError_t`` 118 | - ``hipError_t`` 119 | 120 | * 121 | - ``cudaEvent_t`` 122 | - ``hipEvent_t`` 123 | 124 | * 125 | - ``cudaStream_t`` 126 | - ``hipStream_t`` 127 | 128 | * 129 | - ``cudaMalloc`` 130 | - ``hipMalloc`` 131 | 132 | * 133 | - ``cudaStreamCreateWithFlags`` 134 | - ``hipStreamCreateWithFlags`` 135 | 136 | * 137 | - ``cudaStreamNonBlocking`` 138 | - ``hipStreamNonBlocking`` 139 | 140 | * 141 | - ``cudaEventCreate`` 142 | - ``hipEventCreate`` 143 | 144 | * 145 | - ``cudaMemcpyAsync`` 146 | - ``hipMemcpyAsync`` 147 | 148 | * 149 | - ``cudaMemcpyHostToDevice`` 150 | - ``hipMemcpyHostToDevice`` 151 | 152 | * 153 | - ``cudaEventRecord`` 154 | - ``hipEventRecord`` 155 | 156 | * 157 | - ``cudaEventSynchronize`` 158 | - ``hipEventSynchronize`` 159 | 160 | * 161 | - ``cudaEventElapsedTime`` 162 | - ``hipEventElapsedTime`` 163 | 164 | * 165 | - ``cudaFree`` 166 | - ``hipFree`` 167 | 168 | * 169 | - ``cudaEventDestroy`` 170 | - ``hipEventDestroy`` 171 | 172 | * 173 | - ``cudaStreamDestroy`` 174 | - ``hipStreamDestroy`` 175 | 176 | In summary, this comparison highlights the primary differences between CUDA and HIP APIs. 177 | -------------------------------------------------------------------------------- /docs/reference/deprecated_api_list.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP deprecated runtime API functions. 3 | :keywords: AMD, ROCm, HIP, deprecated, API 4 | 5 | ********************************************************************************************** 6 | HIP deprecated runtime API functions 7 | ********************************************************************************************** 8 | 9 | Several of our API functions have been flagged for deprecation. Using the 10 | following functions results in errors and unexpected results, so we encourage 11 | you to update your code accordingly. 12 | 13 | Deprecated since ROCm 6.1.0 14 | ============================================================ 15 | 16 | Deprecated texture management functions. 17 | 18 | .. list-table:: 19 | :widths: 40 20 | :header-rows: 1 21 | :align: left 22 | 23 | * - function 24 | * - :cpp:func:`hipTexRefGetBorderColor` 25 | * - :cpp:func:`hipTexRefGetArray` 26 | 27 | Deprecated since ROCm 5.7.0 28 | ============================================================ 29 | 30 | Deprecated texture management functions. 31 | 32 | .. list-table:: 33 | :widths: 40 34 | :header-rows: 1 35 | :align: left 36 | 37 | * - function 38 | * - :cpp:func:`hipBindTextureToMipmappedArray` 39 | 40 | Deprecated since ROCm 5.3.0 41 | ============================================================ 42 | 43 | Deprecated texture management functions. 44 | 45 | .. list-table:: 46 | :widths: 40 47 | :header-rows: 1 48 | :align: left 49 | 50 | * - function 51 | * - :cpp:func:`hipGetTextureReference` 52 | * - :cpp:func:`hipTexRefSetAddressMode` 53 | * - :cpp:func:`hipTexRefSetArray` 54 | * - :cpp:func:`hipTexRefSetFlags` 55 | * - :cpp:func:`hipTexRefSetFilterMode` 56 | * - :cpp:func:`hipTexRefSetFormat` 57 | * - :cpp:func:`hipTexRefSetMipmapFilterMode` 58 | * - :cpp:func:`hipTexRefSetMipmapLevelBias` 59 | * - :cpp:func:`hipTexRefSetMipmapLevelClamp` 60 | * - :cpp:func:`hipTexRefSetMipmappedArray` 61 | 62 | Deprecated since ROCm 4.3.0 63 | ============================================================ 64 | 65 | Deprecated texture management functions. 66 | 67 | .. list-table:: 68 | :widths: 40 69 | :header-rows: 1 70 | :align: left 71 | 72 | * - function 73 | * - :cpp:func:`hipTexRefGetAddress` 74 | * - :cpp:func:`hipTexRefGetAddressMode` 75 | * - :cpp:func:`hipTexRefGetFilterMode` 76 | * - :cpp:func:`hipTexRefGetFlags` 77 | * - :cpp:func:`hipTexRefGetFormat` 78 | * - :cpp:func:`hipTexRefGetMaxAnisotropy` 79 | * - :cpp:func:`hipTexRefGetMipmapFilterMode` 80 | * - :cpp:func:`hipTexRefGetMipmapLevelBias` 81 | * - :cpp:func:`hipTexRefGetMipmapLevelClamp` 82 | * - :cpp:func:`hipTexRefGetMipMappedArray` 83 | * - :cpp:func:`hipTexRefSetAddress` 84 | * - :cpp:func:`hipTexRefSetAddress2D` 85 | * - :cpp:func:`hipTexRefSetBorderColor` 86 | * - :cpp:func:`hipTexRefSetMaxAnisotropy` 87 | 88 | Deprecated since ROCm 3.8.0 89 | ============================================================ 90 | 91 | Deprecated memory management and texture management functions. 92 | 93 | .. list-table:: 94 | :widths: 40 95 | :header-rows: 1 96 | :align: left 97 | 98 | * - function 99 | * - :cpp:func:`hipBindTexture` 100 | * - :cpp:func:`hipBindTexture2D` 101 | * - :cpp:func:`hipBindTextureToArray` 102 | * - :cpp:func:`hipGetTextureAlignmentOffset` 103 | * - :cpp:func:`hipUnbindTexture` 104 | * - :cpp:func:`hipMemcpyToArray` 105 | * - :cpp:func:`hipMemcpyFromArray` 106 | 107 | Deprecated since ROCm 3.1.0 108 | ============================================================ 109 | 110 | Deprecated memory management functions. 111 | 112 | .. list-table:: 113 | :widths: 40, 60 114 | :header-rows: 1 115 | :align: left 116 | 117 | * - function 118 | - 119 | * - :cpp:func:`hipMallocHost` 120 | - replaced with :cpp:func:`hipHostAlloc` 121 | * - :cpp:func:`hipMemAllocHost` 122 | - replaced with :cpp:func:`hipHostAlloc` 123 | 124 | Deprecated since ROCm 3.0.0 125 | ============================================================ 126 | 127 | The ``hipProfilerStart`` and ``hipProfilerStop`` functions are deprecated. 128 | Instead, you can use ``roctracer`` or ``rocTX`` for profiling which provide more 129 | flexibility and detailed profiling capabilities. 130 | 131 | .. list-table:: 132 | :widths: 40 133 | :header-rows: 1 134 | :align: left 135 | 136 | * - function 137 | * - :cpp:func:`hipProfilerStart` 138 | * - :cpp:func:`hipProfilerStop` 139 | 140 | Deprecated since ROCm 1.9.0 141 | ============================================================ 142 | 143 | CUDA supports cuCtx API, which is the driver API that defines "Context" and 144 | "Devices" as separate entities. Context contains a single device, and a device 145 | can theoretically have multiple contexts. HIP initially added limited support 146 | for context APIs in order to facilitate porting from existing driver codes. These 147 | APIs are now marked as deprecated because there are better alternate interfaces 148 | (such as ``hipSetDevice`` or the stream API) to achieve these functions. 149 | 150 | .. list-table:: 151 | :widths: 40 152 | :header-rows: 1 153 | :align: left 154 | 155 | * - function 156 | * - :cpp:func:`hipCtxCreate` 157 | * - :cpp:func:`hipCtxDestroy` 158 | * - :cpp:func:`hipCtxPopCurrent` 159 | * - :cpp:func:`hipCtxPushCurrent` 160 | * - :cpp:func:`hipCtxSetCurrent` 161 | * - :cpp:func:`hipCtxGetCurrent` 162 | * - :cpp:func:`hipCtxGetDevice` 163 | * - :cpp:func:`hipCtxGetApiVersion` 164 | * - :cpp:func:`hipCtxGetCacheConfig` 165 | * - :cpp:func:`hipCtxSetCacheConfig` 166 | * - :cpp:func:`hipCtxSetSharedMemConfig` 167 | * - :cpp:func:`hipCtxGetSharedMemConfig` 168 | * - :cpp:func:`hipCtxSynchronize` 169 | * - :cpp:func:`hipCtxGetFlags` 170 | * - :cpp:func:`hipCtxEnablePeerAccess` 171 | * - :cpp:func:`hipCtxDisablePeerAccess` 172 | * - :cpp:func:`hipDevicePrimaryCtxGetState` 173 | * - :cpp:func:`hipDevicePrimaryCtxRelease` 174 | * - :cpp:func:`hipDevicePrimaryCtxRetain` 175 | * - :cpp:func:`hipDevicePrimaryCtxReset` 176 | * - :cpp:func:`hipDevicePrimaryCtxSetFlags` 177 | -------------------------------------------------------------------------------- /docs/reference/env_variables.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP environment variables reference 3 | :keywords: AMD, HIP, environment variables, environment, reference 4 | 5 | ******************************************************************************** 6 | HIP environment variables 7 | ******************************************************************************** 8 | 9 | In this section, the reader can find all the important HIP environment variables 10 | on AMD platform, which are grouped by functionality. 11 | 12 | GPU isolation variables 13 | ================================================================================ 14 | 15 | The GPU isolation environment variables in HIP are collected in the following table. 16 | For more information, check :doc:`GPU isolation page `. 17 | 18 | .. include-table:: data/env_variables_hip.rst 19 | :table: hip-env-isolation 20 | 21 | Profiling variables 22 | ================================================================================ 23 | 24 | The profiling environment variables in HIP are collected in the following table. For 25 | more information, check :doc:`setting the number of CUs page `. 26 | 27 | .. include-table:: data/env_variables_hip.rst 28 | :table: hip-env-prof 29 | 30 | Debug variables 31 | ================================================================================ 32 | 33 | The debugging environment variables in HIP are collected in the following table. For 34 | more information, check :ref:`debugging_with_hip`. 35 | 36 | .. include-table:: data/env_variables_hip.rst 37 | :table: hip-env-debug 38 | 39 | Memory management related variables 40 | ================================================================================ 41 | 42 | The memory management related environment variables in HIP are collected in the 43 | following table. 44 | 45 | .. include-table:: data/env_variables_hip.rst 46 | :table: hip-env-memory 47 | 48 | Other useful variables 49 | ================================================================================ 50 | 51 | The following table lists environment variables that are useful but relate to 52 | different features. 53 | 54 | .. include-table:: data/env_variables_hip.rst 55 | :table: hip-env-other 56 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/global_defines_enums_structs_files.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The global defines, enum, structs and files reference page. 3 | 4 | .. _global_defines_enums_structs_files_reference: 5 | 6 | ******************************************************************************* 7 | Global defines, enums, structs and files 8 | ******************************************************************************* 9 | 10 | The structs, define macros, enums and files in the HIP runtime API. 11 | 12 | * :ref:`global_enum_defines_reference` 13 | * :ref:`driver_types_reference` 14 | * :doc:`../../doxygen/html/annotated` 15 | * :doc:`../../doxygen/html/files` 16 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/global_defines_enums_structs_files/driver_types.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The driver types reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, driver types 4 | 5 | .. _driver_types_reference: 6 | 7 | ******************************************************************************* 8 | Driver types 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: DriverTypes 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/global_defines_enums_structs_files/global_enum_and_defines.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The global enum and defines reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, global enum, defines 4 | 5 | .. _global_enum_defines_reference: 6 | 7 | ******************************************************************************* 8 | Global enum and defines 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: GlobalDefs 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The HIP runtime API modules reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, HIP runtime API modules, modules 4 | 5 | .. _modules_reference: 6 | 7 | ******************************************************************************* 8 | Modules 9 | ******************************************************************************* 10 | 11 | The API is organized into modules based on functionality. 12 | 13 | * :ref:`initialization_version_reference` 14 | * :ref:`device_management_reference` 15 | * :ref:`execution_control_reference` 16 | * :ref:`error_handling_reference` 17 | * :ref:`stream_management_reference` 18 | * :ref:`stream_memory_operations_reference` 19 | * :ref:`event_management_reference` 20 | * :ref:`memory_management_reference` 21 | 22 | * :ref:`memory_management_deprecated_reference` 23 | * :ref:`external_resource_interoperability_reference` 24 | * :ref:`stream_ordered_memory_allocator_reference` 25 | * :ref:`unified_memory_reference` 26 | * :ref:`virtual_memory_reference` 27 | * :ref:`texture_management_reference` 28 | * :ref:`texture_management_deprecated_reference` 29 | * :ref:`surface_object_reference` 30 | 31 | * :ref:`peer_to_peer_device_memory_access_reference` 32 | * :ref:`context_management_reference` 33 | * :ref:`module_management_reference` 34 | * :ref:`occupancy_reference` 35 | * :ref:`profiler_control_reference` 36 | * :ref:`launch_api_reference` 37 | * :ref:`runtime_compilation_reference` 38 | * :ref:`callback_activity_apis_reference` 39 | * :ref:`graph_management_reference` 40 | * :ref:`opengl_interoperability_reference` 41 | * :ref:`graphics_interoperability_reference` 42 | * :ref:`cooperative_groups_reference` 43 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/callback_activity_apis.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The callback activity APIs reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, callback activity APIs, callback activity 4 | 5 | .. _callback_activity_apis_reference: 6 | 7 | ******************************************************************************* 8 | Callback activity APIs 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Callback 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/context_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The context management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, context management, context 4 | 5 | .. _context_management_reference: 6 | 7 | ******************************************************************************* 8 | Context management [deprecated] 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Context 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/cooperative_groups_reference.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: This chapter lists types and device API wrappers related to the 3 | Cooperative Group feature. Programmers can directly use these 4 | API features in their kernels. 5 | :keywords: AMD, ROCm, HIP, cooperative groups 6 | 7 | .. _cooperative_groups_reference: 8 | 9 | ******************************************************************************* 10 | Cooperative groups 11 | ******************************************************************************* 12 | 13 | Cooperative kernel launches 14 | =========================== 15 | 16 | The following host-side functions are used for cooperative kernel launches. 17 | 18 | .. doxygengroup:: ModuleCooperativeG 19 | :content-only: 20 | 21 | Cooperative groups classes 22 | ========================== 23 | 24 | The following cooperative groups classes can be used on the device side. 25 | 26 | .. _thread_group_ref: 27 | 28 | .. doxygenclass:: cooperative_groups::thread_group 29 | :members: 30 | 31 | .. _thread_block_ref: 32 | 33 | .. doxygenclass:: cooperative_groups::thread_block 34 | :members: 35 | 36 | .. _grid_group_ref: 37 | 38 | .. doxygenclass:: cooperative_groups::grid_group 39 | :members: 40 | 41 | .. _multi_grid_group_ref: 42 | 43 | .. doxygenclass:: cooperative_groups::multi_grid_group 44 | :members: 45 | 46 | .. _thread_block_tile_ref: 47 | 48 | .. doxygenclass:: cooperative_groups::thread_block_tile 49 | :members: 50 | 51 | .. _coalesced_group_ref: 52 | 53 | .. doxygenclass:: cooperative_groups::coalesced_group 54 | :members: 55 | 56 | Cooperative groups construct functions 57 | ====================================== 58 | 59 | The following functions are used to construct different group-type instances on the device side. 60 | 61 | .. doxygengroup:: CooperativeGConstruct 62 | :content-only: 63 | 64 | Cooperative groups exposed API functions 65 | ======================================== 66 | 67 | The following functions are the exposed API for different group-type instances on the device side. 68 | 69 | .. doxygengroup:: CooperativeGAPI 70 | :content-only: 71 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/device_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The device management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, device management, device 4 | 5 | .. _device_management_reference: 6 | 7 | ******************************************************************************* 8 | Device management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Device 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/error_handling.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The error handling reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, error handling, error 4 | 5 | .. _error_handling_reference: 6 | 7 | ******************************************************************************* 8 | Error handling 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Error 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/event_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The event management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, event management, event 4 | 5 | .. _event_management_reference: 6 | 7 | ******************************************************************************* 8 | Event management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Event 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/execution_control.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The execution control reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, execution control, execution 4 | 5 | .. _execution_control_reference: 6 | 7 | ******************************************************************************* 8 | Execution control 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Execution 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/graph_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The graph management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, graph management, graph 4 | 5 | .. _graph_management_reference: 6 | 7 | ******************************************************************************* 8 | Graph management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Graph 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/graphics_interoperability.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The Graphics interoperability reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, Graphics interoperability 4 | 5 | .. _graphics_interoperability_reference: 6 | 7 | ******************************************************************************* 8 | Graphics interoperability 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: GraphicsInterop 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/initialization_and_version.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The initialization and version reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, initialization, version 4 | 5 | .. _initialization_version_reference: 6 | 7 | ******************************************************************************* 8 | Initialization and version 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Driver 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/launch_api.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The launch API reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, launch API, triple-chevron 4 | 5 | .. _launch_api_reference: 6 | 7 | ******************************************************************************* 8 | Launch API 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Clang 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The memory management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, memory management, memory 4 | 5 | .. _memory_management_reference: 6 | 7 | ******************************************************************************* 8 | Memory management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Memory 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/external_resource_interoperability.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The external resource interoperability reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, external resource interoperability 4 | 5 | .. _external_resource_interoperability_reference: 6 | 7 | ******************************************************************************* 8 | External resource interoperability 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: External 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/memory_management_deprecated.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The deprecated memory management reference page. 3 | 4 | .. _memory_management_deprecated_reference: 5 | 6 | ******************************************************************************* 7 | Memory management (deprecated) 8 | ******************************************************************************* 9 | 10 | .. doxygengroup:: MemoryD 11 | :content-only: 12 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/stream_ordered_memory_allocator.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The stream ordered memory allocator reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, stream ordered memory allocator 4 | 5 | .. _stream_ordered_memory_allocator_reference: 6 | 7 | ******************************************************************************* 8 | Stream ordered memory allocator 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: StreamO 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/surface_object.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The surface object reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, surface object, surface 4 | 5 | .. _surface_object_reference: 6 | 7 | ******************************************************************************* 8 | Surface object 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Surface 12 | :content-only: 13 | 14 | .. doxygengroup:: SurfaceAPI 15 | :content-only: 16 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/texture_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The texture management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, texture management, texture 4 | 5 | .. _texture_management_reference: 6 | 7 | ******************************************************************************* 8 | Texture management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Texture 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/texture_management_deprecated.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The deprecated texture management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, deprecated texture management 4 | 5 | .. _texture_management_deprecated_reference: 6 | 7 | ******************************************************************************* 8 | Texture management (deprecated) 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: TextureD 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/unified_memory_reference.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The managed memory reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, unified memory, unified, memory, UM, APU 4 | 5 | .. _unified_memory_reference: 6 | 7 | ******************************************************************************* 8 | Managed memory 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: MemoryM 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/memory_management/virtual_memory_reference.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The virtual memory (VM) management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, virtual memory, virtual, memory, VM 4 | 5 | .. _virtual_memory_reference: 6 | 7 | ******************************************************************************* 8 | Virtual memory management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Virtual 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/module_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The module management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, module management, module 4 | 5 | .. _module_management_reference: 6 | 7 | ******************************************************************************* 8 | Module management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Module 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/occupancy.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The occupancy reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, occupancy 4 | 5 | .. _occupancy_reference: 6 | 7 | ******************************************************************************* 8 | Occupancy 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Occupancy 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/opengl_interoperability.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The OpenGL interoperability reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, OpenGL interoperability, OpenGL interop 4 | 5 | .. _opengl_interoperability_reference: 6 | 7 | ******************************************************************************* 8 | OpenGL interoperability 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: GL 12 | :content-only: -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/peer_to_peer_device_memory_access.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The peer to peer device memory access reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, peer to peer device memory access, peer to peer 4 | 5 | .. _peer_to_peer_device_memory_access_reference: 6 | 7 | ******************************************************************************* 8 | Peer to peer device memory access 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: PeerToPeer 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/profiler_control.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The profiler control reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, profiler control, profiler 4 | 5 | .. _profiler_control_reference: 6 | 7 | ******************************************************************************* 8 | Profiler control 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Profiler 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/runtime_compilation.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The runtime compilation reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, runtime compilation 4 | 5 | .. _runtime_compilation_reference: 6 | 7 | ******************************************************************************* 8 | Runtime compilation 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Runtime 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/stream_management.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The stream management reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, stream management, stream 4 | 5 | .. _stream_management_reference: 6 | 7 | ******************************************************************************* 8 | Stream management 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: Stream 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api/modules/stream_memory_operations.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The stream memory operations reference page. 3 | :keywords: AMD, ROCm, HIP, CUDA, stream memory operations 4 | 5 | .. _stream_memory_operations_reference: 6 | 7 | ******************************************************************************* 8 | Stream memory operations 9 | ******************************************************************************* 10 | 11 | .. doxygengroup:: StreamM 12 | :content-only: 13 | -------------------------------------------------------------------------------- /docs/reference/hip_runtime_api_reference.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: HIP runtime API reference page 3 | :keywords: AMD, ROCm, HIP, CUDA, HIP runtime API, HIP runtime 4 | 5 | .. _runtime_api_reference: 6 | 7 | ******************************************************************************** 8 | HIP runtime API 9 | ******************************************************************************** 10 | 11 | The HIP Runtime API reference: 12 | 13 | * :ref:`modules_reference` 14 | * :ref:`global_defines_enums_structs_files_reference` 15 | -------------------------------------------------------------------------------- /docs/reference/virtual_rocr.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: This chapter lists user-mode API interfaces and libraries 3 | necessary for host applications to launch compute kernels to 4 | available HSA ROCm kernel agents. 5 | :keywords: AMD, ROCm, HIP, HSA, ROCR runtime, virtual memory management 6 | 7 | ******************************************************************************* 8 | HSA runtime API for ROCm 9 | ******************************************************************************* 10 | 11 | The following functions are located in the https://github.com/ROCm/ROCR-Runtime repository. 12 | 13 | .. doxygenfunction:: hsa_amd_vmem_address_reserve 14 | 15 | .. doxygenfunction:: hsa_amd_vmem_address_free 16 | 17 | .. doxygenfunction:: hsa_amd_vmem_handle_create 18 | 19 | .. doxygenfunction:: hsa_amd_vmem_handle_release 20 | 21 | .. doxygenfunction:: hsa_amd_vmem_map 22 | 23 | .. doxygenfunction:: hsa_amd_vmem_unmap 24 | 25 | .. doxygenfunction:: hsa_amd_vmem_set_access 26 | 27 | .. doxygenfunction:: hsa_amd_vmem_get_access 28 | 29 | .. doxygenfunction:: hsa_amd_vmem_export_shareable_handle 30 | 31 | .. doxygenfunction:: hsa_amd_vmem_import_shareable_handle 32 | 33 | .. doxygenfunction:: hsa_amd_vmem_retain_alloc_handle 34 | 35 | .. doxygenfunction:: hsa_amd_vmem_get_alloc_properties_from_handle 36 | -------------------------------------------------------------------------------- /docs/sphinx/_toc.yml.in: -------------------------------------------------------------------------------- 1 | # Anywhere {branch} is used, the branch name will be substituted. 2 | # These comments will also be removed. 3 | defaults: 4 | numbered: False 5 | maxdepth: 6 6 | root: index 7 | subtrees: 8 | - entries: 9 | - file: what_is_hip 10 | - file: faq 11 | 12 | - caption: Install 13 | entries: 14 | - file: install/install 15 | title: Installing HIP 16 | - file: install/build 17 | title: Building HIP from source 18 | - url: https://rocm.docs.amd.com/projects/install-on-linux/en/${branch}/reference/system-requirements.html 19 | title: Linux supported AMD GPUs 20 | - url: https://rocm.docs.amd.com/projects/install-on-windows/en/${branch}/reference/system-requirements.html 21 | title: Windows supported AMD GPUs 22 | - url: https://developer.nvidia.com/cuda-gpus 23 | title: NVIDIA supported GPUs 24 | 25 | - caption: Programming guide 26 | entries: 27 | - file: programming_guide 28 | title: Introduction 29 | - file: understand/programming_model 30 | - file: understand/hardware_implementation 31 | - file: understand/compilers 32 | - file: how-to/performance_guidelines 33 | - file: how-to/debugging 34 | - file: how-to/logging 35 | - file: how-to/hip_runtime_api 36 | subtrees: 37 | - entries: 38 | - file: how-to/hip_runtime_api/initialization 39 | - file: how-to/hip_runtime_api/memory_management 40 | subtrees: 41 | - entries: 42 | - file: how-to/hip_runtime_api/memory_management/host_memory 43 | - file: how-to/hip_runtime_api/memory_management/device_memory 44 | subtrees: 45 | - entries: 46 | - file: how-to/hip_runtime_api/memory_management/device_memory/texture_fetching 47 | - file: how-to/hip_runtime_api/memory_management/coherence_control 48 | - file: how-to/hip_runtime_api/memory_management/unified_memory 49 | - file: how-to/hip_runtime_api/memory_management/virtual_memory 50 | - file: how-to/hip_runtime_api/memory_management/stream_ordered_allocator 51 | - file: how-to/hip_runtime_api/error_handling 52 | - file: how-to/hip_runtime_api/call_stack 53 | - file: how-to/hip_runtime_api/asynchronous 54 | - file: how-to/hip_runtime_api/hipgraph 55 | - file: how-to/hip_runtime_api/cooperative_groups 56 | - file: how-to/hip_runtime_api/multi_device 57 | - file: how-to/hip_runtime_api/opengl_interop 58 | - file: how-to/hip_runtime_api/external_interop 59 | - file: how-to/hip_cpp_language_extensions 60 | - file: how-to/kernel_language_cpp_support 61 | - file: how-to/hip_porting_guide 62 | - file: how-to/hip_porting_driver_api 63 | - file: how-to/hip_rtc 64 | - file: understand/amd_clr 65 | 66 | - caption: Reference 67 | entries: 68 | - file: reference/hip_runtime_api_reference 69 | subtrees: 70 | - entries: 71 | - file: reference/hip_runtime_api/modules 72 | subtrees: 73 | - entries: 74 | - file: reference/hip_runtime_api/modules/initialization_and_version 75 | - file: reference/hip_runtime_api/modules/device_management 76 | - file: reference/hip_runtime_api/modules/execution_control 77 | - file: reference/hip_runtime_api/modules/error_handling 78 | - file: reference/hip_runtime_api/modules/stream_management 79 | - file: reference/hip_runtime_api/modules/stream_memory_operations 80 | - file: reference/hip_runtime_api/modules/event_management 81 | - file: reference/hip_runtime_api/modules/memory_management 82 | subtrees: 83 | - entries: 84 | - file: reference/hip_runtime_api/modules/memory_management/memory_management_deprecated 85 | - file: reference/hip_runtime_api/modules/memory_management/external_resource_interoperability 86 | - file: reference/hip_runtime_api/modules/memory_management/stream_ordered_memory_allocator 87 | - file: reference/hip_runtime_api/modules/memory_management/unified_memory_reference 88 | - file: reference/hip_runtime_api/modules/memory_management/virtual_memory_reference 89 | - file: reference/hip_runtime_api/modules/memory_management/texture_management 90 | - file: reference/hip_runtime_api/modules/memory_management/texture_management_deprecated 91 | - file: reference/hip_runtime_api/modules/memory_management/surface_object 92 | - file: reference/hip_runtime_api/modules/peer_to_peer_device_memory_access 93 | - file: reference/hip_runtime_api/modules/context_management 94 | - file: reference/hip_runtime_api/modules/module_management 95 | - file: reference/hip_runtime_api/modules/occupancy 96 | - file: reference/hip_runtime_api/modules/profiler_control 97 | - file: reference/hip_runtime_api/modules/launch_api 98 | - file: reference/hip_runtime_api/modules/runtime_compilation 99 | - file: reference/hip_runtime_api/modules/callback_activity_apis 100 | - file: reference/hip_runtime_api/modules/graph_management 101 | - file: reference/hip_runtime_api/modules/graphics_interoperability 102 | - file: reference/hip_runtime_api/modules/opengl_interoperability 103 | - file: reference/hip_runtime_api/modules/cooperative_groups_reference 104 | - file: reference/hip_runtime_api/global_defines_enums_structs_files 105 | subtrees: 106 | - entries: 107 | - file: reference/hip_runtime_api/global_defines_enums_structs_files/global_enum_and_defines 108 | - file: reference/hip_runtime_api/global_defines_enums_structs_files/driver_types 109 | - file: doxygen/html/annotated 110 | - file: doxygen/html/files 111 | - file: reference/virtual_rocr 112 | - file: reference/math_api 113 | - file: reference/env_variables 114 | - file: reference/api_syntax 115 | - file: reference/deprecated_api_list 116 | title: List of deprecated APIs 117 | - file: reference/fp8_numbers 118 | title: FP8 numbers in HIP 119 | - file: reference/hardware_features 120 | 121 | - caption: Tutorials 122 | entries: 123 | - url: https://github.com/ROCm/rocm-examples/tree/develop/HIP-Basic 124 | title: HIP basic examples 125 | - url: https://github.com/ROCm/rocm-examples 126 | title: HIP examples 127 | - file: tutorial/saxpy 128 | - file: tutorial/reduction 129 | - file: tutorial/cooperative_groups_tutorial 130 | 131 | - caption: About 132 | entries: 133 | - file: license.md 134 | -------------------------------------------------------------------------------- /docs/sphinx/requirements.in: -------------------------------------------------------------------------------- 1 | rocm-docs-core[api_reference]==1.15.0 2 | sphinxcontrib.doxylink 3 | -------------------------------------------------------------------------------- /docs/tools/update_example_codes.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | 3 | urllib.request.urlretrieve("https://raw.githubusercontent.com/ROCm/rocm-examples/refs/heads/develop/HIP-Basic/opengl_interop/main.hip", "docs/tools/example_codes/opengl_interop.hip") 4 | urllib.request.urlretrieve("https://raw.githubusercontent.com/ROCm/rocm-examples/refs/heads/develop/HIP-Basic/vulkan_interop/main.hip", "docs/tools/example_codes/external_interop.hip") 5 | -------------------------------------------------------------------------------- /docs/understand/amd_clr.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: This chapter describes the AMD CLR which is the implementation of HIP supporting on the AMD platform. 3 | :keywords: AMD, ROCm, HIP, CLR, HIPAMD, OpenCL, ROCCLR, CHANGELOG 4 | 5 | .. _AMD_Compute_Language_Runtimes: 6 | 7 | ******************************************************************************* 8 | AMD compute language runtimes (CLR) 9 | ******************************************************************************* 10 | 11 | CLR contains source codes for AMD's compute languages runtimes: ``HIP`` and ``OpenCL™``. 12 | CLR is the part of HIP runtime which is supported on the AMD ROCm platform, it provides a header and runtime library built on top of HIP-Clang compiler. 13 | For developers and users, CLR implements HIP runtime APIs including streams, events, and memory APIs, which is a object library that is linked with the application. 14 | The source codes for all headers and the library implementation are available on GitHub in the `CLR repository `_. 15 | 16 | 17 | Project organization 18 | ==================== 19 | 20 | CLR includes the following source code, 21 | 22 | * ``hipamd`` - contains implementation of ``HIP`` language on the AMD platform. It is hosted at `clr/hipamd `_. 23 | 24 | * ``opencl`` - contains implementation of `OpenCL™ `_ on AMD platform. It is hosted at `clr/opencl `_. 25 | 26 | * ``rocclr`` - contains ROCm compute runtime used in `HIP` and `OpenCL™`. This is hosted at `clr/rocclr `_. 27 | 28 | 29 | How to build/install 30 | ==================== 31 | 32 | Prerequisites 33 | ------------- 34 | 35 | Please refer to Quick Start Guide in `ROCm Docs `_. 36 | 37 | Building CLR requires ``rocm-hip-libraries`` meta package, which provides the pre-requisites for CLR. 38 | 39 | 40 | Linux 41 | ----- 42 | 43 | * Clone this repository 44 | 45 | .. code-block:: shell 46 | 47 | cd clr && mkdir build && cd build 48 | 49 | * For ``HIP`` 50 | 51 | .. code-block:: shell 52 | 53 | cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR 54 | 55 | ``HIP_COMMON_DIR`` points to `HIP `_. 56 | 57 | * For ``OpenCL™`` 58 | 59 | .. code-block:: shell 60 | 61 | cmake .. -DCLR_BUILD_OCL=ON 62 | make 63 | make install 64 | 65 | 66 | Users can also build ``OCL`` and ``HIP`` at the same time by passing ``-DCLR_BUILD_HIP=ON -DCLR_BUILD_OCL=ON`` to configure command. 67 | 68 | For detail instructions, please refer to `build HIP `_. 69 | 70 | 71 | Test 72 | ----- 73 | 74 | ``hip-tests`` is a separate repository hosted at `hip-tests `_. 75 | 76 | To run ``hip-tests`` please go to the repository and follow the steps. 77 | 78 | 79 | Release notes 80 | ------------- 81 | 82 | HIP provides release notes in CLR `change log `_, which has records of changes in each release. 83 | -------------------------------------------------------------------------------- /docs/understand/compilers.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: Compilation workflow of the HIP compilers. 3 | :keywords: AMD, ROCm, HIP, CUDA, HIP runtime API 4 | 5 | .. _hip_compilers: 6 | 7 | ******************************************************************************** 8 | HIP compilers 9 | ******************************************************************************** 10 | 11 | ROCm provides the compiler driver ``hipcc``, that can be used on AMD ROCm and 12 | NVIDIA CUDA platforms. 13 | 14 | On ROCm, ``hipcc`` takes care of the following: 15 | 16 | - Setting the default library and include paths for HIP 17 | - Setting some environment variables 18 | - Invoking the appropriate compiler - ``amdclang++`` 19 | 20 | On NVIDIA CUDA platform, ``hipcc`` takes care of invoking compiler ``nvcc``. 21 | ``amdclang++`` is based on the ``clang++`` compiler. For more 22 | details, see the :doc:`llvm project`. 23 | 24 | HIP compilation workflow 25 | ================================================================================ 26 | 27 | HIP provides a flexible compilation workflow that supports both offline 28 | compilation and runtime or just-in-time (JIT) compilation. Each approach has 29 | advantages depending on the use case, target architecture, and performance 30 | needs. 31 | 32 | The offline compilation is ideal for production environments, where the 33 | performance is critical and the target GPU architecture is known in advance. 34 | 35 | The runtime compilation is useful in development environments or when 36 | distributing software that must run on a wide range of hardware without the 37 | knowledge of the GPU in advance. It provides flexibility at the cost of some 38 | performance overhead. 39 | 40 | Offline compilation 41 | -------------------------------------------------------------------------------- 42 | 43 | The HIP code compilation is performed in two stages: host and device code 44 | compilation stage. 45 | 46 | - Device-code compilation stage: The compiled device code is embedded into the 47 | host object file. Depending on the platform, the device code can be compiled 48 | into assembly or binary. ``nvcc`` and ``amdclang++`` target different 49 | architectures and use different code object formats. ``nvcc`` uses the binary 50 | ``cubin`` or the assembly PTX files, while the ``amdclang++`` path is the 51 | binary ``hsaco`` format. On CUDA platforms, the driver compiles the PTX files 52 | to executable code during runtime. 53 | 54 | - Host-code compilation stage: On the host side, ``hipcc`` or ``amdclang++`` can 55 | compile the host code in one step without other C++ compilers. On the other 56 | hand, ``nvcc`` only replaces the ``<<<...>>>`` kernel launch syntax with the 57 | appropriate CUDA runtime function call and the modified host code is passed to 58 | the default host compiler. 59 | 60 | For an example on how to compile HIP from the command line, see :ref:`SAXPY 61 | tutorial` . 62 | 63 | Runtime compilation 64 | -------------------------------------------------------------------------------- 65 | 66 | HIP allows you to compile kernels at runtime using the ``hiprtc*`` API. Kernels 67 | are stored as a text string, which is passed to HIPRTC alongside options to 68 | guide the compilation. 69 | 70 | For more details, see 71 | :doc:`HIP runtime compiler <../how-to/hip_rtc>`. 72 | 73 | Static libraries 74 | ================================================================================ 75 | 76 | ``hipcc`` supports generating two types of static libraries. 77 | 78 | - The first type of static library only exports and launches host functions 79 | within the same library and not the device functions. This library type offers 80 | the ability to link with a non-hipcc compiler such as ``gcc``. Additionally, 81 | this library type contains host objects with device code embedded as fat 82 | binaries. This library type is generated using the flag ``--emit-static-lib``: 83 | 84 | .. code-block:: shell 85 | 86 | hipcc hipOptLibrary.cpp --emit-static-lib -fPIC -o libHipOptLibrary.a 87 | gcc test.cpp -L. -lhipOptLibrary -L/path/to/hip/lib -lamdhip64 -o test.out 88 | 89 | - The second type of static library exports device functions to be linked by 90 | other code objects by using ``hipcc`` as the linker. This library type 91 | contains relocatable device objects and is generated using ``ar``: 92 | 93 | .. code-block:: shell 94 | 95 | hipcc hipDevice.cpp -c -fgpu-rdc -o hipDevice.o 96 | ar rcsD libHipDevice.a hipDevice.o 97 | hipcc libHipDevice.a test.cpp -fgpu-rdc -o test.out 98 | 99 | A full example for this can be found in the ROCm-examples, see the examples for 100 | `static host libraries `_ 101 | or `static device libraries `_. 102 | -------------------------------------------------------------------------------- /docs/understand/glossary.md: -------------------------------------------------------------------------------- 1 | # Glossary of terms 2 | 3 | * **host**, **host CPU** : Executes the HIP runtime API and is capable of initiating kernel launches to one or more devices. 4 | * **default device** : Each host thread maintains a default device. 5 | Most HIP runtime APIs (including memory allocation, copy commands, kernel launches) do not accept an explicit device 6 | argument but instead implicitly use the default device. 7 | The default device can be set with `hipSetDevice`. 8 | 9 | * **active host thread** - the thread which is running the HIP APIs. 10 | 11 | * **HIP-Clang** - Heterogeneous AMDGPU Compiler, with its capability to compile HIP programs on AMD platform (https://github.com/RadeonOpenCompute/llvm-project). 12 | 13 | * **clr** - a repository for AMD Compute Language Runtime, contains source codes for AMD's compute languages runtimes: HIP and OpenCL. 14 | clr (https://github.com/ROCm/clr) contains the following three parts, 15 | 16 | * `hipamd`: contains implementation of HIP language on AMD platform. 17 | * `rocclr`: contains common runtime used in HIP and OpenCL, which provides virtual device interfaces that compute runtimes interact with different backends such as ROCr on Linux or PAL on Windows. 18 | * `opencl`: contains implementation of OpenCL on AMD platform. 19 | 20 | * **hipify tools** - tools to convert CUDA code to portable C++ code (https://github.com/ROCm/HIPIFY). 21 | 22 | * **`hipconfig`** - tool to report various configuration properties of the target platform. 23 | 24 | * **`nvcc`** - NVIDIA CUDA `nvcc` compiler, do not capitalize. 25 | -------------------------------------------------------------------------------- /docs/what_is_hip.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: This chapter provides an introduction to the HIP API. 3 | :keywords: AMD, ROCm, HIP, CUDA, C++ language extensions 4 | 5 | .. _intro-to-hip: 6 | 7 | ******************************************************************************* 8 | What is HIP? 9 | ******************************************************************************* 10 | 11 | The Heterogeneous-computing Interface for Portability (HIP) API is a C++ runtime API 12 | and kernel language that lets developers create portable applications running in heterogeneous systems, 13 | using CPUs and AMD GPUs or NVIDIA GPUs from a single source code. HIP provides a simple 14 | marshalling language to access either the AMD ROCM back-end, or NVIDIA CUDA back-end, 15 | to build and run application kernels. 16 | 17 | .. figure:: data/what_is_hip/hip.svg 18 | :alt: HIP in an application. 19 | :align: center 20 | 21 | * HIP is a thin API with little or no performance impact over coding directly 22 | in NVIDIA CUDA or AMD :doc:`ROCm `. 23 | 24 | * HIP enables coding in a single-source C++ programming language including 25 | features such as templates, C++11 lambdas, classes, namespaces, and more. 26 | 27 | * Developers can specialize for the platform (CUDA or ROCm) to tune for 28 | performance or handle tricky cases. 29 | 30 | ROCm offers compilers (``clang``, ``hipcc``), code 31 | profilers (``rocprof``, ``omnitrace``), debugging tools (``rocgdb``), libraries 32 | and HIP with the runtime API and kernel language, to create heterogeneous applications 33 | running on both CPUs and GPUs. ROCm provides marshalling libraries like 34 | :doc:`hipFFT ` or :doc:`hipBLAS ` that act as a 35 | thin programming layer over either NVIDIA CUDA or AMD ROCm to enable support for 36 | either back-end. These libraries offer pointer-based memory interfaces and are 37 | easily integrated into your applications. 38 | 39 | HIP supports the ability to build and run on either AMD GPUs or NVIDIA GPUs. 40 | GPU Programmers familiar with NVIDIA CUDA or OpenCL will find the HIP API 41 | familiar and easy to use. Developers no longer need to choose between AMD or 42 | NVIDIA GPUs. You can quickly port your application to run on the available 43 | hardware while maintaining a single codebase. The :doc:`HIPify ` 44 | tools, based on the clang front-end and Perl language, can convert CUDA API 45 | calls into the corresponding HIP API calls. However, HIP is not intended to be a 46 | drop-in replacement for CUDA, and developers should expect to do some manual 47 | coding and performance tuning work for AMD GPUs to port existing projects as 48 | described :doc:`HIP porting guide `. 49 | 50 | HIP provides two components: those that run on the CPU, also known as host 51 | system, and those that run on GPUs, also referred to as device. The host-based 52 | code is used to create device buffers, move data between the host application 53 | and a device, launch the device code (also known as kernel), manage streams and 54 | events, and perform synchronization. The kernel language provides a way to 55 | develop massively parallel programs that run on GPUs, and provides access to GPU 56 | specific hardware capabilities. 57 | 58 | In summary, HIP simplifies cross-platform development, maintains performance, 59 | and provides a familiar C++ experience for GPU programming that runs seamlessly 60 | on both AMD and NVIDIA GPUs. 61 | 62 | HIP components 63 | =============================================== 64 | 65 | HIP consists of the following components. For information on the license 66 | associated with each component, see :doc:`HIP licensing `. 67 | 68 | C++ runtime API 69 | ----------------------------------------------- 70 | 71 | For the AMD ROCm platform, HIP provides headers and a runtime library built on 72 | top of HIP-Clang compiler in the repository 73 | :doc:`Compute Language Runtime (CLR) `. The HIP runtime 74 | implements HIP streams, events, and memory APIs, and is an object library that 75 | is linked with the application. The source code for all headers and the library 76 | implementation is available on GitHub. 77 | 78 | For the NVIDIA CUDA platform, HIP provides headers that translate from the 79 | HIP runtime API to the CUDA runtime API. The host-side contains mostly inlined 80 | wrappers or even just preprocessor defines, with no additional overhead. 81 | The device-side code is compiled with ``nvcc``, just like normal CUDA kernels, 82 | and therefore one can expect the same performance as if directly coding in CUDA. 83 | The CUDA specific headers can be found in the `hipother repository `_. 84 | 85 | For further details, check :ref:`HIP Runtime API Reference `. 86 | 87 | Kernel language 88 | ----------------------------------------------- 89 | 90 | HIP provides a C++ syntax that is suitable for compiling most code that commonly appears in 91 | compute kernels (classes, namespaces, operator overloading, and templates). HIP also defines other 92 | language features that are designed to target accelerators, such as: 93 | 94 | * Short-vector headers that can serve on a host or device 95 | * Math functions that resemble those in ``math.h``, which is included with standard C++ compilers 96 | * Built-in functions for accessing specific GPU hardware capabilities 97 | 98 | For further details, check :doc:`HIP C++ language extensions ` 99 | and :doc:`Kernel language C++ support `. 100 | -------------------------------------------------------------------------------- /hip-lang-config.cmake.in: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Kitware, Inc. All Rights Reserved. 2 | # Permission is hereby granted, free of charge, to any person obtaining a copy 3 | # of this software and associated documentation files (the "Software"), to deal 4 | # in the Software without restriction, including without limitation the rights 5 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 | # copies of the Software, and to permit persons to whom the Software is 7 | # furnished to do so, subject to the following conditions: 8 | # 9 | # The above copyright notice and this permission notice shall be included in 10 | # all copies or substantial portions of the Software. 11 | # 12 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | # THE SOFTWARE. 19 | 20 | @PACKAGE_INIT@ 21 | include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) 22 | if (NOT _CMakeFindDependencyMacro_FOUND) 23 | macro(find_dependency dep) 24 | if (NOT ${dep}_FOUND) 25 | set(cmake_fd_version) 26 | if (${ARGC} GREATER 1) 27 | set(cmake_fd_version ${ARGV1}) 28 | endif() 29 | set(cmake_fd_exact_arg) 30 | if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) 31 | set(cmake_fd_exact_arg EXACT) 32 | endif() 33 | set(cmake_fd_quiet_arg) 34 | if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) 35 | set(cmake_fd_quiet_arg QUIET) 36 | endif() 37 | set(cmake_fd_required_arg) 38 | if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) 39 | set(cmake_fd_required_arg REQUIRED) 40 | endif() 41 | find_package(${dep} ${cmake_fd_version} 42 | ${cmake_fd_exact_arg} 43 | ${cmake_fd_quiet_arg} 44 | ${cmake_fd_required_arg} 45 | ) 46 | string(TOUPPER ${dep} cmake_dep_upper) 47 | if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) 48 | set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") 49 | set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) 50 | return() 51 | endif() 52 | set(cmake_fd_version) 53 | set(cmake_fd_required_arg) 54 | set(cmake_fd_quiet_arg) 55 | set(cmake_fd_exact_arg) 56 | endif() 57 | endmacro() 58 | endif() 59 | 60 | set(HIP_COMPILER "@HIP_COMPILER@") 61 | set(HIP_RUNTIME "@HIP_RUNTIME@") 62 | 63 | if (NOT @BUILD_SHARED_LIBS@) 64 | list(APPEND CMAKE_PREFIX_PATH "${PACKAGE_PREFIX_DIR}" "${PACKAGE_PREFIX_DIR}/lib/llvm") 65 | 66 | # Setting CMAKE_SIZEOF_VOID_P is a workaround to force cmake 67 | # include searching in lib64 directories for dependencies. 68 | set(CMAKE_SIZEOF_VOID_P_OLD ${CMAKE_SIZEOF_VOID_P}) 69 | set(CMAKE_SIZEOF_VOID_P 8) 70 | 71 | find_dependency(hsa-runtime64) 72 | find_dependency(amd_comgr) 73 | 74 | # Restore the original CMAKE_PREFIX_PATH and CMAKE_SIZEOF_VOID_P 75 | # to avoid leaking the settings back to the project. 76 | set(CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P_OLD}) 77 | list(REMOVE_AT CMAKE_PREFIX_PATH -2 -1) 78 | endif() 79 | 80 | include( "${CMAKE_CURRENT_LIST_DIR}/hip-lang-targets.cmake" ) 81 | 82 | # Find the hip-lang config file path with symlinks resolved 83 | # RealPath: /opt/rocm-ver/lib/cmake/hip-lang/hip-lang-config.cmake 84 | # Go 4 level up to get IMPORT PREFIX 85 | get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" REALPATH) 86 | get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../../" ABSOLUTE) 87 | 88 | 89 | set_target_properties(hip-lang::device PROPERTIES 90 | INTERFACE_INCLUDE_DIRECTORIES "$<$:${_IMPORT_PREFIX}/include>" 91 | INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "$<$:${_IMPORT_PREFIX}/include>" 92 | ) 93 | 94 | set_target_properties(hip-lang::amdhip64 PROPERTIES 95 | INTERFACE_COMPILE_DEFINITIONS "$<$:__HIP_ROCclr__=1>" 96 | INTERFACE_INCLUDE_DIRECTORIES "$<$:${_IMPORT_PREFIX}/include>" 97 | INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "$<$:${_IMPORT_PREFIX}/include>" 98 | ) 99 | set_target_properties(hip-lang::device PROPERTIES 100 | INTERFACE_COMPILE_DEFINITIONS "$<$:__HIP_ROCclr__=1>" 101 | ) 102 | 103 | set_property(TARGET hip-lang::device APPEND PROPERTY 104 | INTERFACE_LINK_OPTIONS "$<$:--hip-link>" 105 | ) 106 | 107 | # Approach: Check CLANGRT LIB support for CMAKE_HIP_COMPILER 108 | # Use CMAKE_HIP_COMPILER option -print-libgcc-file-name --rtlib=compiler-rt 109 | # Note: For Linux add additional option -unwindlib=libgcc also 110 | # To fetch the compiler rt library file name and confirm. 111 | # If unsuccessful in getting clangrt using this option then 112 | # FATAL_ERROR message send since compiler-rt linkage dependency is mandatory. 113 | # If successful then --rtlib=compiler-rt (and -unwindlib=libgcc for non windows) 114 | # added to Target's INTERFACE_LINK_LIBRARIES property 115 | if (NOT WIN32) 116 | set(CLANGRT_LINUX_OPTION "-unwindlib=libgcc") 117 | endif() 118 | 119 | execute_process( 120 | COMMAND ${CMAKE_HIP_COMPILER} -print-libgcc-file-name --rtlib=compiler-rt ${CLANGRT_LINUX_OPTION} 121 | OUTPUT_VARIABLE CLANGRT_BUILTINS 122 | OUTPUT_STRIP_TRAILING_WHITESPACE 123 | RESULT_VARIABLE CLANGRT_BUILTINS_FETCH_EXIT_CODE) 124 | 125 | # Add support for __fp16 and _Float16, explicitly link with compiler-rt 126 | if( "${CLANGRT_BUILTINS_FETCH_EXIT_CODE}" STREQUAL "0" ) 127 | set_property(TARGET hip-lang::device APPEND PROPERTY 128 | INTERFACE_LINK_OPTIONS $<$:--rtlib=compiler-rt ${CLANGRT_LINUX_OPTION}> 129 | ) 130 | else() 131 | # FATAL_ERROR send if not successfull on compiler-rt linkage dependency 132 | message(FATAL_ERROR 133 | "${CMAKE_FIND_PACKAGE_NAME} Error:${CLANGRT_BUILTINS_FETCH_EXIT_CODE} - clangrt builtins lib could not be found.") 134 | endif() 135 | 136 | # Approved by CMake to use this name. This is used so that HIP can 137 | # change the name of the target and not require any modifications in CMake 138 | set(_CMAKE_HIP_DEVICE_RUNTIME_TARGET "hip-lang::device") 139 | -------------------------------------------------------------------------------- /include/hip/channel_descriptor.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H 24 | #define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H 25 | 26 | // Some standard header files, these are included by hc.hpp and so want to make them avail on both 27 | // paths to provide a consistent include env and avoid "missing symbol" errors that only appears 28 | // on NVCC path: 29 | 30 | 31 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 32 | #include 33 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 34 | #include 35 | #else 36 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /include/hip/device_functions.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H 24 | #define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H 25 | 26 | #if !defined(__HIPCC_RTC__) 27 | #include 28 | #endif 29 | 30 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 31 | #include 32 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 33 | #include 34 | #else 35 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /include/hip/hip_bf16.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_BF16_H 24 | #define HIP_INCLUDE_HIP_HIP_BF16_H 25 | 26 | #include 27 | 28 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 29 | #include 30 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 31 | #include 32 | #else 33 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 34 | #endif 35 | 36 | #endif // HIP_INCLUDE_HIP_HIP_BF16_H 37 | -------------------------------------------------------------------------------- /include/hip/hip_bfloat16.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /*!\file 26 | * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef 27 | */ 28 | 29 | #ifndef _HIP_BFLOAT16_H_ 30 | #define _HIP_BFLOAT16_H_ 31 | 32 | #if !defined(__HIPCC_RTC__) 33 | #include 34 | #endif 35 | 36 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 37 | #include 38 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 39 | #warning "hip_bfloat16.h is not supported on nvidia platform" 40 | #else 41 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 42 | #endif 43 | 44 | #endif // _HIP_BFLOAT16_H_ 45 | -------------------------------------------------------------------------------- /include/hip/hip_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_COMMON_H 24 | #define HIP_INCLUDE_HIP_HIP_COMMON_H 25 | 26 | #if defined(__clang__) 27 | #pragma clang diagnostic push 28 | #pragma clang diagnostic ignored "-Wreserved-macro-identifier" 29 | #endif 30 | // Common code included at start of every hip file. 31 | // Auto enable __HIP_PLATFORM_AMD__ if compiling on AMD platform 32 | // Other compiler (GCC,ICC,etc) need to set one of these macros explicitly 33 | #if defined(__clang__) && defined(__HIP__) 34 | #ifndef __HIP_PLATFORM_AMD__ 35 | #define __HIP_PLATFORM_AMD__ 36 | #endif 37 | #endif // defined(__clang__) && defined(__HIP__) 38 | 39 | // Auto enable __HIP_PLATFORM_NVIDIA__ if compiling with NVIDIA platform 40 | #if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__)) 41 | #ifndef __HIP_PLATFORM_NVIDIA__ 42 | #define __HIP_PLATFORM_NVIDIA__ 43 | #endif 44 | 45 | #ifdef __CUDACC__ 46 | #define __HIPCC__ 47 | #endif 48 | 49 | #endif //__NVCC__ 50 | 51 | // Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path 52 | #if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \ 53 | (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) 54 | #define __HIP_DEVICE_COMPILE__ 1 55 | #endif 56 | 57 | #ifdef __GNUC__ 58 | #define HIP_PUBLIC_API __attribute__ ((visibility ("default"))) 59 | #define HIP_INTERNAL_EXPORTED_API __attribute__ ((visibility ("default"))) 60 | #else 61 | #define HIP_PUBLIC_API 62 | #define HIP_INTERNAL_EXPORTED_API 63 | #endif 64 | 65 | #if __HIP_DEVICE_COMPILE__ == 0 66 | // 32-bit Atomics 67 | #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0) 68 | #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0) 69 | #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) 70 | #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) 71 | #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) 72 | 73 | // 64-bit Atomics 74 | #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0) 75 | #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) 76 | 77 | // Doubles 78 | #define __HIP_ARCH_HAS_DOUBLES__ (0) 79 | 80 | // Warp cross-lane operations 81 | #define __HIP_ARCH_HAS_WARP_VOTE__ (0) 82 | #define __HIP_ARCH_HAS_WARP_BALLOT__ (0) 83 | #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0) 84 | #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) 85 | 86 | // Sync 87 | #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) 88 | #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) 89 | 90 | // Misc 91 | #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) 92 | #define __HIP_ARCH_HAS_3DGRID__ (0) 93 | #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) 94 | #endif 95 | 96 | #if defined(__clang__) 97 | #pragma clang diagnostic pop 98 | #endif 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /include/hip/hip_complex.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H 24 | #define HIP_INCLUDE_HIP_HIP_COMPLEX_H 25 | 26 | #if !defined(__HIPCC_RTC__) 27 | #include 28 | #endif 29 | 30 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 31 | #include 32 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 33 | #include 34 | #else 35 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /include/hip/hip_cooperative_groups.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | /** 24 | * @file hip_cooperative_groups.h 25 | * 26 | * @brief Defines new types and device API wrappers for `Cooperative Group` 27 | * feature. 28 | */ 29 | 30 | #ifndef HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H 31 | #define HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H 32 | 33 | #include 34 | #include 35 | 36 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 37 | #if __cplusplus && defined(__clang__) && defined(__HIP__) 38 | #include 39 | #endif 40 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 41 | #include 42 | #else 43 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 44 | #endif 45 | 46 | #endif // HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H 47 | -------------------------------------------------------------------------------- /include/hip/hip_deprecated.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // This file will add older hip functions used in the versioning system 4 | // Find the deprecated functions and structs in hip_device.cpp 5 | 6 | // This struct is also kept in hip_device.cpp 7 | typedef struct hipDeviceProp_tR0000 { 8 | char name[256]; ///< Device name. 9 | size_t totalGlobalMem; ///< Size of global memory region (in bytes). 10 | size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). 11 | int regsPerBlock; ///< Registers per block. 12 | int warpSize; ///< Warp size. 13 | int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. 14 | int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. 15 | int maxGridSize[3]; ///< Max grid dimensions (XYZ). 16 | int clockRate; ///< Max clock frequency of the multiProcessors in khz. 17 | int memoryClockRate; ///< Max global memory clock frequency in khz. 18 | int memoryBusWidth; ///< Global memory bus width in bits. 19 | size_t totalConstMem; ///< Size of shared memory region (in bytes). 20 | int major; ///< Major compute capability. On HCC, this is an approximation and features may 21 | ///< differ from CUDA CC. See the arch feature flags for portable ways to query 22 | ///< feature caps. 23 | int minor; ///< Minor compute capability. On HCC, this is an approximation and features may 24 | ///< differ from CUDA CC. See the arch feature flags for portable ways to query 25 | ///< feature caps. 26 | int multiProcessorCount; ///< Number of multi-processors (compute units). 27 | int l2CacheSize; ///< L2 cache size. 28 | int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. 29 | int computeMode; ///< Compute mode. 30 | int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" 31 | ///< instructions. New for HIP. 32 | hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. 33 | int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. 34 | int pciDomainID; ///< PCI Domain ID 35 | int pciBusID; ///< PCI Bus ID. 36 | int pciDeviceID; ///< PCI Device ID. 37 | size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. 38 | int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. 39 | int canMapHostMemory; ///< Check whether HIP can map host memory 40 | int gcnArch; ///< DEPRECATED: use gcnArchName instead 41 | char gcnArchName[256]; ///< AMD GCN Arch Name. 42 | int integrated; ///< APU vs dGPU 43 | int cooperativeLaunch; ///< HIP device supports cooperative launch 44 | int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple 45 | ///< devices 46 | int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory 47 | int maxTexture1D; ///< Maximum number of elements in 1D images 48 | int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements 49 | int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image 50 | ///< elements 51 | unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register 52 | unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register 53 | size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies 54 | size_t textureAlignment; ///< Alignment requirement for textures 55 | size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to 56 | ///< pitched memory 57 | int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device 58 | int ECCEnabled; ///< Device has ECC support enabled 59 | int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 60 | int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on 61 | ///< multiple 62 | /// devices with unmatched functions 63 | int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on 64 | ///< multiple 65 | /// devices with unmatched grid dimensions 66 | int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on 67 | ///< multiple 68 | /// devices with unmatched block dimensions 69 | int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on 70 | ///< multiple 71 | /// devices with unmatched shared memories 72 | int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 73 | int asicRevision; ///< Revision of the GPU in this device 74 | int managedMemory; ///< Device supports allocating managed memory on this system 75 | int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device 76 | ///< without migration 77 | int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with 78 | ///< the CPU 79 | int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory 80 | ///< without calling hipHostRegister on it 81 | int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's 82 | ///< page tables 83 | } hipDeviceProp_tR0000; 84 | 85 | 86 | #ifdef __cplusplus 87 | extern "C" { 88 | #endif 89 | 90 | hipError_t hipGetDevicePropertiesR0000(hipDeviceProp_tR0000* prop, int device); 91 | hipError_t hipChooseDeviceR0000(int* device, const hipDeviceProp_tR0000* prop); 92 | 93 | #ifdef __cplusplus 94 | } 95 | #endif 96 | -------------------------------------------------------------------------------- /include/hip/hip_ext_ocp.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_EXT_OCP_H 24 | #define HIP_INCLUDE_HIP_HIP_EXT_OCP_H 25 | 26 | #include 27 | 28 | #include 29 | #include 30 | 31 | #endif // HIP_INCLUDE_HIP_HIP_EXT_OCP_H 32 | -------------------------------------------------------------------------------- /include/hip/hip_fp16.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_FP16_H 24 | #define HIP_INCLUDE_HIP_HIP_FP16_H 25 | 26 | #include 27 | 28 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 29 | #include 30 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 31 | #define HIPRT_INF_FP16 CUDART_INF_FP16 32 | #define HIPRT_MAX_NORMAL_FP16 CUDART_MAX_NORMAL_FP16 33 | #define HIPRT_MIN_DENORM_FP16 CUDART_MIN_DENORM_FP16 34 | #define HIPRT_NAN_FP16 CUDART_NAN_FP16 35 | #define HIPRT_NEG_ZERO_FP16 CUDART_NEG_ZERO_FP16 36 | #define HIPRT_ONE_FP16 CUDART_ONE_FP16 37 | #define HIPRT_ZERO_FP16 CUDART_ZERO_FP16 38 | 39 | #include "cuda_fp16.h" 40 | #else 41 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 42 | #endif 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /include/hip/hip_fp4.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_FP4_H 24 | #define HIP_INCLUDE_HIP_HIP_FP4_H 25 | 26 | #include 27 | 28 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 29 | #include 30 | #endif 31 | 32 | #endif // HIP_INCLUDE_HIP_HIP_FP4_H 33 | -------------------------------------------------------------------------------- /include/hip/hip_fp6.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_FP6_H 24 | #define HIP_INCLUDE_HIP_HIP_FP6_H 25 | 26 | #include 27 | 28 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 29 | #include 30 | #endif 31 | 32 | #endif // HIP_INCLUDE_HIP_HIP_FP6_H 33 | -------------------------------------------------------------------------------- /include/hip/hip_fp8.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_HIP_FP8_H 24 | #define HIP_INCLUDE_HIP_HIP_FP8_H 25 | 26 | #include 27 | 28 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 29 | // We only have fnuz defs for now, which are not supported by other platforms 30 | #include 31 | #endif 32 | 33 | #endif // HIP_INCLUDE_HIP_HIP_FP8_H 34 | -------------------------------------------------------------------------------- /include/hip/hip_gl_interop.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef HIP_GL_INTEROP_H 23 | #define HIP_GL_INTEROP_H 24 | 25 | #include 26 | 27 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 28 | #include "hip/amd_detail/amd_hip_gl_interop.h" 29 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 30 | #include "hip/nvidia_detail/nvidia_hip_gl_interop.h" 31 | #endif 32 | #endif 33 | -------------------------------------------------------------------------------- /include/hip/hip_hcc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | 20 | #ifndef HIP_INCLUDE_HIP_HIP_HCC_H 21 | #define HIP_INCLUDE_HIP_HIP_HCC_H 22 | #warning "hip/hip_hcc.h is deprecated, please use hip/hip_ext.h" 23 | #include "hip/hip_ext.h" 24 | #endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H 25 | -------------------------------------------------------------------------------- /include/hip/hip_math_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef HIP_MATH_CONSTANTS_H 23 | #define HIP_MATH_CONSTANTS_H 24 | 25 | #if !defined(__HIPCC_RTC__) 26 | #include 27 | #endif 28 | 29 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 30 | #include "hip/amd_detail/amd_hip_math_constants.h" 31 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 32 | #include "hip/nvidia_detail/nvidia_hip_math_constants.h" 33 | #else 34 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 35 | #endif 36 | #endif 37 | -------------------------------------------------------------------------------- /include/hip/hip_profile.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | 20 | #ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H 21 | #define HIP_INCLUDE_HIP_HIP_PROFILE_H 22 | 23 | #define HIP_SCOPED_MARKER(markerName, group) 24 | #define HIP_BEGIN_MARKER(markerName, group) 25 | #define HIP_END_MARKER() 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /include/hip/hip_runtime.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | //! HIP = Heterogeneous-compute Interface for Portability 24 | //! 25 | //! Define a extremely thin runtime layer that allows source code to be compiled unmodified 26 | //! through either AMD CLANG or NVCC. Key features tend to be in the spirit 27 | //! and terminology of CUDA, but with a portable path to other accelerators as well: 28 | // 29 | //! Both paths support rich C++ features including classes, templates, lambdas, etc. 30 | //! Runtime API is C 31 | //! Memory management is based on pure pointers and resembles malloc/free/copy. 32 | // 33 | //! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch 34 | //! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ 35 | //! features. 36 | 37 | #ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H 38 | #define HIP_INCLUDE_HIP_HIP_RUNTIME_H 39 | 40 | #if !defined(__HIPCC_RTC__) 41 | // Some standard header files, these are included by hc.hpp and so want to make them avail on both 42 | // paths to provide a consistent include env and avoid "missing symbol" errors that only appears 43 | // on NVCC path: 44 | #if __cplusplus 45 | #include 46 | #include 47 | #else 48 | #include 49 | #include 50 | #endif // __cplusplus 51 | #endif // !defined(__HIPCC_RTC__) 52 | 53 | #include 54 | #include 55 | 56 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 57 | #include 58 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 59 | #include 60 | #else 61 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 62 | #endif 63 | 64 | #if !defined(__HIPCC_RTC__) 65 | #include 66 | #include 67 | #endif // !defined(__HIPCC_RTC__) 68 | #include 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /include/hip/hip_texture_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | 24 | #ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H 25 | #define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H 26 | 27 | #include 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /include/hip/hip_vector_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | //! hip_vector_types.h : Defines the HIP vector types. 24 | 25 | #ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H 26 | #define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H 27 | 28 | #include 29 | 30 | 31 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 32 | #if __cplusplus 33 | #include 34 | #endif 35 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 36 | #include 37 | #else 38 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 39 | #endif 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /include/hip/library_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H 24 | #define HIP_INCLUDE_HIP_LIBRARY_TYPES_H 25 | 26 | #if !defined(__HIPCC_RTC__) 27 | #include 28 | #endif 29 | 30 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 31 | 32 | typedef enum hipDataType { 33 | HIP_R_32F = 0, 34 | HIP_R_64F = 1, 35 | HIP_R_16F = 2, 36 | HIP_R_8I = 3, 37 | HIP_C_32F = 4, 38 | HIP_C_64F = 5, 39 | HIP_C_16F = 6, 40 | HIP_C_8I = 7, 41 | HIP_R_8U = 8, 42 | HIP_C_8U = 9, 43 | HIP_R_32I = 10, 44 | HIP_C_32I = 11, 45 | HIP_R_32U = 12, 46 | HIP_C_32U = 13, 47 | HIP_R_16BF = 14, 48 | HIP_C_16BF = 15, 49 | HIP_R_4I = 16, 50 | HIP_C_4I = 17, 51 | HIP_R_4U = 18, 52 | HIP_C_4U = 19, 53 | HIP_R_16I = 20, 54 | HIP_C_16I = 21, 55 | HIP_R_16U = 22, 56 | HIP_C_16U = 23, 57 | HIP_R_64I = 24, 58 | HIP_C_64I = 25, 59 | HIP_R_64U = 26, 60 | HIP_C_64U = 27, 61 | HIP_R_8F_E4M3 = 28, 62 | HIP_R_8F_E5M2 = 29, 63 | HIP_R_8F_UE8M0 = 30, 64 | HIP_R_6F_E2M3 = 31, 65 | HIP_R_6F_E3M2 = 32, 66 | HIP_R_4F_E2M1 = 33, 67 | // HIP specific Data Types 68 | HIP_R_8F_E4M3_FNUZ = 1000, 69 | HIP_R_8F_E5M2_FNUZ = 1001, 70 | } hipDataType; 71 | 72 | typedef enum hipLibraryPropertyType { 73 | HIP_LIBRARY_MAJOR_VERSION, 74 | HIP_LIBRARY_MINOR_VERSION, 75 | HIP_LIBRARY_PATCH_LEVEL 76 | } hipLibraryPropertyType; 77 | 78 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 79 | #include "library_types.h" 80 | #else 81 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 82 | #endif 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /include/hip/math_functions.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H 24 | #define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H 25 | 26 | // Some standard header files, these are included by hc.hpp and so want to make them avail on both 27 | // paths to provide a consistent include env and avoid "missing symbol" errors that only appears 28 | // on NVCC path: 29 | 30 | #if !defined(__HIPCC_RTC__) 31 | #include 32 | #endif 33 | 34 | #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) 35 | #include 36 | #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) 37 | //#include 38 | #else 39 | #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); 40 | #endif 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /include/hip/surface_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | 20 | /** 21 | * @file surface_types.h 22 | * @brief Defines surface types for HIP runtime. 23 | */ 24 | 25 | #ifndef HIP_INCLUDE_HIP_SURFACE_TYPES_H 26 | #define HIP_INCLUDE_HIP_SURFACE_TYPES_H 27 | 28 | #if defined(__clang__) 29 | #pragma clang diagnostic push 30 | #pragma clang diagnostic ignored "-Wreserved-identifier" 31 | #endif 32 | 33 | #if !defined(__HIPCC_RTC__) 34 | #include 35 | #endif 36 | 37 | /** 38 | * An opaque value that represents a hip surface object 39 | */ 40 | struct __hip_surface; 41 | typedef struct __hip_surface* hipSurfaceObject_t; 42 | 43 | /** 44 | * hip surface reference 45 | */ 46 | struct surfaceReference { 47 | hipSurfaceObject_t surfaceObject; 48 | }; 49 | 50 | /** 51 | * hip surface boundary modes 52 | */ 53 | enum hipSurfaceBoundaryMode { 54 | hipBoundaryModeZero = 0, 55 | hipBoundaryModeTrap = 1, 56 | hipBoundaryModeClamp = 2 57 | }; 58 | 59 | #if defined(__clang__) 60 | #pragma clang diagnostic pop 61 | #endif 62 | 63 | #endif /* !HIP_INCLUDE_HIP_SURFACE_TYPES_H */ 64 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | 22 | echo "install.sh has moved to HIP-AMD backend repo" 23 | exit 1 24 | -------------------------------------------------------------------------------- /util/gedit/README.md: -------------------------------------------------------------------------------- 1 | ### How to Install? 2 | 3 | There are two ways to install the configuration file 4 | 5 | 1. Run the installer 6 | 7 | 2. Copy ```hip.lang``` file to ```/usr/share/gtksourceview-3.0/language-specs``` 8 | -------------------------------------------------------------------------------- /util/gedit/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | 22 | sudo cp ./hip.lang /usr/share/gtksourceview-3.0/language-specs 23 | -------------------------------------------------------------------------------- /util/vim/README.md: -------------------------------------------------------------------------------- 1 | ### How to install? 2 | 3 | 1. Add the hip.vim to ~/.vim/syntax/ directory 4 | 2. Add the following text to the end of ~/.vimrc 5 | 6 | ```shell 7 | augroup filetypedetect 8 | au BufNewFile,BufRead *.cpp set filetype=cpp syntax=hip 9 | augroup END 10 | augroup filetypedetect 11 | au BufNewFile,BufRead *.c set filetype=c syntax=hip 12 | augroup END 13 | augroup filetypedetect 14 | au BufNewFile,BufRead *.cu set filetype=cu syntax=hip 15 | augroup END 16 | ``` 17 | --------------------------------------------------------------------------------