├── .clang-format ├── .githooks ├── install └── pre-commit ├── .github ├── CODEOWNERS └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .jenkins ├── common.groovy ├── precheckin.groovy └── staticanalysis.groovy ├── BuildTools └── CMake │ ├── CMakeLists.txt │ ├── Makefile │ ├── Makefile-run-cmake │ ├── README.md │ └── src │ ├── CMakeLists.txt │ └── main.cpp ├── CHANGELOG.md ├── CMakeLists.txt ├── CppCheckSuppressions.txt ├── Extensions ├── gemm_ex_bf16_r │ ├── Makefile │ ├── README.md │ └── gemm_ex_bf16_r.cpp ├── gemm_ex_f16_r │ ├── Makefile │ ├── README.md │ └── gemm_ex_f16_r.cpp ├── gemm_ex_f32_r │ ├── Makefile │ ├── README.md │ └── gemm_ex_f32_r.cpp └── gemm_ex_i8_i32_r │ ├── Makefile │ ├── README.md │ └── gemm_ex_i8_i32_r.cpp ├── LICENSE.md ├── Languages ├── C │ ├── Makefile │ ├── README.md │ └── main.c ├── Fortran │ ├── Makefile │ └── main.f90 └── HIP │ ├── Makefile │ ├── README.md │ ├── kernel.cpp │ └── main.cpp ├── Level-1 ├── axpy │ ├── Makefile │ ├── README.md │ └── axpy.cpp ├── dot │ ├── Makefile │ ├── README.md │ └── dot.cpp ├── nrm2 │ ├── Makefile │ ├── README.md │ └── nrm2.cpp ├── scal │ ├── Makefile │ ├── README.md │ └── scal.cpp └── swap │ ├── Makefile │ ├── README.md │ └── swap.cpp ├── Level-2 ├── gemv │ ├── Makefile │ ├── README.md │ └── gemv.cpp ├── her │ ├── Makefile │ ├── README.md │ └── her.cpp └── trmv │ ├── Makefile │ ├── README.md │ └── trmv.cpp ├── Level-3 ├── gemm │ ├── Makefile │ ├── README.md │ └── gemm.cpp └── gemm_strided_batched │ ├── Makefile │ ├── README.md │ └── gemm_strided_batched.cpp ├── Makefile ├── Patterns ├── Multi-device │ ├── Makefile │ ├── Multi-device.cpp │ └── README.md └── Multi-stream │ ├── Makefile │ ├── Multi-stream.cpp │ └── README.md ├── README.md ├── common ├── ArgParser.cpp ├── ArgParser.hpp ├── error_macros.h ├── helpers.hpp ├── memoryHelpers.hpp └── timers.hpp ├── docker ├── dockerfile-build-centos ├── dockerfile-build-sles ├── dockerfile-build-ubuntu-rock ├── dockerfile-install-centos ├── dockerfile-install-sles └── dockerfile-install-ubuntu ├── rmake.py ├── rtest.py └── rtest.xml /.clang-format: -------------------------------------------------------------------------------- 1 | # Style file for MLSE Libraries based on the modified rocBLAS style 2 | 3 | # Common settings 4 | BasedOnStyle: WebKit 5 | TabWidth: 4 6 | IndentWidth: 4 7 | UseTab: Never 8 | ColumnLimit: 100 9 | 10 | # Other languages JavaScript, Proto 11 | 12 | --- 13 | Language: Cpp 14 | 15 | # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code 16 | # int formatted_code; 17 | # // clang-format off 18 | # void unformatted_code ; 19 | # // clang-format on 20 | # void formatted_code_again; 21 | 22 | DisableFormat: false 23 | Standard: Cpp11 24 | 25 | AccessModifierOffset: -4 26 | AlignAfterOpenBracket: Align 27 | AlignConsecutiveAssignments: true 28 | AlignConsecutiveDeclarations: true 29 | AlignEscapedNewlines: Left 30 | AlignOperands: true 31 | AlignTrailingComments: false 32 | AllowAllArgumentsOnNextLine: true 33 | AllowAllConstructorInitializersOnNextLine: true 34 | AllowAllParametersOfDeclarationOnNextLine: true 35 | AllowShortBlocksOnASingleLine: false 36 | AllowShortCaseLabelsOnASingleLine: false 37 | AllowShortFunctionsOnASingleLine: Empty 38 | AllowShortIfStatementsOnASingleLine: false 39 | AllowShortLoopsOnASingleLine: false 40 | AlwaysBreakAfterDefinitionReturnType: false 41 | AlwaysBreakAfterReturnType: None 42 | AlwaysBreakBeforeMultilineStrings: false 43 | AlwaysBreakTemplateDeclarations: true 44 | BinPackArguments: false 45 | BinPackParameters: false 46 | 47 | # Configure each individual brace in BraceWrapping 48 | BreakBeforeBraces: Custom 49 | # Control of individual brace wrapping cases 50 | BraceWrapping: { 51 | AfterCaseLabel: 'true' 52 | AfterClass: 'true' 53 | AfterControlStatement: 'true' 54 | AfterEnum : 'true' 55 | AfterFunction : 'true' 56 | AfterNamespace : 'true' 57 | AfterStruct : 'true' 58 | AfterUnion : 'true' 59 | BeforeCatch : 'true' 60 | BeforeElse : 'true' 61 | IndentBraces : 'false' 62 | # AfterExternBlock : 'true' 63 | } 64 | 65 | #BreakAfterJavaFieldAnnotations: true 66 | #BreakBeforeInheritanceComma: false 67 | #BreakBeforeBinaryOperators: None 68 | #BreakBeforeTernaryOperators: true 69 | #BreakConstructorInitializersBeforeComma: true 70 | #BreakStringLiterals: true 71 | 72 | CommentPragmas: '^ IWYU pragma:' 73 | #CompactNamespaces: false 74 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 75 | ConstructorInitializerIndentWidth: 4 76 | ContinuationIndentWidth: 4 77 | Cpp11BracedListStyle: true 78 | SpaceBeforeCpp11BracedList: false 79 | DerivePointerAlignment: false 80 | ExperimentalAutoDetectBinPacking: false 81 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 82 | IndentCaseLabels: false 83 | IndentPPDirectives: None 84 | #FixNamespaceComments: true 85 | IndentWrappedFunctionNames: true 86 | KeepEmptyLinesAtTheStartOfBlocks: true 87 | MacroBlockBegin: '' 88 | MacroBlockEnd: '' 89 | #JavaScriptQuotes: Double 90 | MaxEmptyLinesToKeep: 1 91 | NamespaceIndentation: All 92 | ObjCBlockIndentWidth: 4 93 | #ObjCSpaceAfterProperty: true 94 | #ObjCSpaceBeforeProtocolList: true 95 | PenaltyBreakBeforeFirstCallParameter: 19 96 | PenaltyBreakComment: 300 97 | PenaltyBreakFirstLessLess: 120 98 | PenaltyBreakString: 1000 99 | 100 | PenaltyExcessCharacter: 1000000 101 | PenaltyReturnTypeOnItsOwnLine: 60 102 | PointerAlignment: Left 103 | SpaceAfterCStyleCast: false 104 | SpaceBeforeAssignmentOperators: true 105 | SpaceBeforeParens: Never 106 | SpaceInEmptyBlock: false 107 | SpaceInEmptyParentheses: false 108 | SpacesBeforeTrailingComments: 1 109 | SpacesInAngles: false 110 | SpacesInContainerLiterals: true 111 | SpacesInCStyleCastParentheses: false 112 | SpacesInParentheses: false 113 | SpacesInSquareBrackets: false 114 | #SpaceAfterTemplateKeyword: true 115 | #SpaceBeforeInheritanceColon: true 116 | 117 | #SortUsingDeclarations: true 118 | SortIncludes: true 119 | 120 | # Comments are for developers, they should arrange them 121 | ReflowComments: false 122 | 123 | #IncludeBlocks: Preserve 124 | --- 125 | -------------------------------------------------------------------------------- /.githooks/install: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd $(git rev-parse --git-dir) 4 | cd hooks 5 | 6 | echo "Installing hooks..." 7 | ln -s ../../.githooks/pre-commit pre-commit 8 | echo "Done!" 9 | -------------------------------------------------------------------------------- /.githooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This pre-commit hook checks if any versions of clang-format 4 | # are installed, and if so, uses the installed version to format 5 | # the staged changes. 6 | 7 | export PATH=$PATH:/opt/rocm/llvm/bin:/opt/rocm/hcc/bin:/usr/bin:/bin 8 | 9 | # Redirect stdout to stderr. 10 | exec >&2 11 | 12 | # Do everything from top - level 13 | cd $(git rev-parse --show-toplevel) 14 | 15 | if git rev-parse --verify HEAD >/dev/null 2>&1; then 16 | against=HEAD 17 | else 18 | # Initial commit: diff against an empty tree object 19 | against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 20 | fi 21 | 22 | if [[ "$1" == "--reformat" ]]; then 23 | files=$(git ls-files --exclude-standard) 24 | else 25 | files=$(git diff-index --cached --name-only $against) 26 | fi 27 | 28 | [[ -z "$files" ]] && exit 29 | 30 | # Change the copyright date at the top of any text files 31 | for file in $files; do 32 | [[ -L $file ]] && continue 33 | echo "Processing copyright dates in $file" 34 | if [[ -e $file ]]; then 35 | /usr/bin/perl -pi -e 'INIT { exit 1 if !-f $ARGV[0] || -B $ARGV[0]; $year = (localtime)[5] + 1900 } 36 | s/^([*\/#\/"*[:space:]]*)Copyright\s+(?:\(C\)\s*)?(\d+)(?:\s*-\s*\d+)?\s(Advanced\s*Micro\s*Devices)/qq($1Copyright (C) $2@{[$year != $2 ? "-$year" : ""]} $3)/ie 37 | if $. < 10' "$file" && git add -u "$file" 38 | fi 39 | done 40 | 41 | # do the formatting 42 | for file in $files; do 43 | [[ -L $file ]] && continue 44 | if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.in$|\.txt$|\.yaml$|\.yml$|\.sh$|\.py$|\.pl$|\.cmake$|\.md$|\.rst$|\.groovy$|\.ini$|\.awk$|\.csv$'; then 45 | echo "Processing line endings in $file" 46 | sed -i -e 's/[[:space:]]*$//' "$file" # Remove whitespace at end of lines 47 | sed -i -e '$a\' "$file" # Add missing newline to end of file 48 | 49 | echo "Converting non-ASCII characters to ASCII equivalents in $file" 50 | # Convert UTF8 non-ASCII to ASCII 51 | temp=$(mktemp) 52 | [[ -w $temp ]] || exit 53 | iconv -s -f utf-8 -t ascii//TRANSLIT "$file" > "$temp" || exit 54 | chmod --reference="$file" "$temp" || exit 55 | mv -f "$temp" "$file" || exit 56 | git add -u "$file" 57 | fi 58 | done 59 | 60 | # if clang-format exists, run it on C/C++ files 61 | if command -v clang-format >/dev/null; then 62 | for file in $files; do 63 | [[ -L $file ]] && continue 64 | if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then 65 | echo "clang-format $file" 66 | clang-format -i -style=file "$file" 67 | git add -u "$file" 68 | fi 69 | done 70 | else 71 | echo "clang-format command not found, skipping file formatting." 72 | # exit 127 73 | fi 74 | 75 | # Perform cppcheck for added or modified files 76 | # if cppcheck exists, run it on C/C++ files 77 | if command -v cppcheck >/dev/null; then 78 | for file in $files; do 79 | [[ -L $file ]] && continue 80 | if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then 81 | echo "cppcheck for $file" 82 | cppcheck --enable=all --inconclusive --library=googletest --inline-suppr \ 83 | -i./build --suppressions-list=./CppCheckSuppressions.txt \ 84 | --template="{file}:{line}: {severity}: {id} :{message}" \ 85 | --error-exitcode=1 "$file" 86 | exit $? 87 | fi 88 | done 89 | else 90 | echo "cppcheck command not found, skipping static analysis." 91 | # exit 127 92 | fi 93 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[Bug]: " 5 | labels: ["bug", "triage"] 6 | 7 | --- 8 | 9 | ### Describe the bug 10 | A clear and concise description of what the bug or problem is. 11 | 12 | ### To Reproduce 13 | Precise version of rocBLAS and rocBLAS-Examples commit hash. 14 | Steps to reproduce the behavior: 15 | 1. Install '...' version '...' 16 | 2. Run '...' with data '...' 17 | 3. See error on logfile '...' 18 | 19 | ### Expected behavior 20 | A clear and concise description of what you expected to happen. 21 | 22 | ### Log-files 23 | Add **full** logfiles to help explain your problem. 24 | 25 | ### Environment 26 | 27 | | Hardware | description | 28 | |-----|-----| 29 | | CPU | device name | 30 | | GPU | device name | 31 | 32 | The above hardware Table information can be generated by command: 33 | ``` 34 | rocminfo | grep Marketing 35 | ``` 36 | 37 | | Software | version | 38 | |-----|-----| 39 | | rocm-core | v0.0 | 40 | | rocblas | v0.0 | 41 | 42 | The above software Table information can be queried with: 43 | ``` 44 | Ubuntu/Debian: 45 | dpkg -s rocm-core | grep Version 46 | dpkg -s rocblas | grep Version 47 | Centos/RHEL: 48 | rpm -qa | grep rocm-core 49 | rpm -qa | grep rocblas 50 | SLES: 51 | zypper se -s | grep rocm-core 52 | zypper se -s | grep rocblas 53 | ``` 54 | 55 | Make sure that ROCm is correctly installed and to capture detailed environment information run the following command: 56 | ``` 57 | printf '=== environment\n' > environment.txt && 58 | printf '\n\n=== date\n' >> environment.txt && date >> environment.txt && 59 | printf '\n\n=== Linux Kernel\n' >> environment.txt && uname -a >> environment.txt && 60 | printf '\n\n=== rocm-smi' >> environment.txt && rocm-smi >> environment.txt && 61 | printf '\n\n' >> environment.txt && hipconfig >> environment.txt && 62 | printf '\n\n=== rocminfo\n' >> environment.txt && rocminfo >> environment.txt && 63 | printf '\n\n=== lspci VGA\n' >> environment.txt && lspci | grep -i vga >> environment.txt 64 | ``` 65 | 66 | Attach `environment.txt` 67 | 68 | 69 | ### Additional context 70 | Add any other context about the problem here. 71 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[Feature]: " 5 | labels: ["feature", "triage"] 6 | 7 | --- 8 | 9 | ### Is your feature request related to a problem? Please describe. 10 | A clear and concise description of what the problem is. 11 | 12 | ### Describe the solution you'd like 13 | A clear and concise description of what you want to happen. 14 | 15 | ### Describe alternatives you've considered 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | ### Additional context 19 | Add any other context or screenshots about the feature request here. 20 | 21 | ### Library context 22 | | Software | version | 23 | |-----|-----| 24 | | rocblas | v0.0 | 25 | 26 | The above Table information can be queried with: 27 | ``` 28 | Ubuntu/Debian: 29 | dpkg -s rocblas | grep Version 30 | Centos/RHEL: 31 | rpm -qa | grep rocblas 32 | SLES: 33 | zypper se -s | grep rocblas 34 | ``` 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | # vim tags 31 | tags 32 | .tags 33 | .*.swp 34 | 35 | # Editors 36 | .vscode 37 | 38 | # build-in-source directory 39 | build* 40 | 41 | # emacs temporary/backup files 42 | .\#* 43 | \#*\# 44 | *~ 45 | -------------------------------------------------------------------------------- /.jenkins/common.groovy: -------------------------------------------------------------------------------- 1 | // This file is for internal AMD use. 2 | // If you are interested in running your own Jenkins, please raise a github issue for assistance. 3 | 4 | def runCompileCommand(platform, project, jobName, boolean sameOrg=false) 5 | { 6 | project.paths.construct_build_prefix() 7 | 8 | def sudo = platform.jenkinsLabel.contains('sles') ? '/usr/bin/sudo --preserve-env ' : '' 9 | String centos = platform.jenkinsLabel.contains('centos') ? 'source scl_source enable devtoolset-7' : '' 10 | def getDependencies = auxiliary.getLibrary('rocBLAS-internal',platform.jenkinsLabel, null, sameOrg) 11 | def command = """#!/usr/bin/env bash 12 | set -x 13 | cd ${project.paths.project_build_prefix} 14 | ${getDependencies} 15 | export PATH=/opt/rocm/bin:$PATH 16 | ${centos} 17 | ${sudo} make 18 | """ 19 | 20 | platform.runCommand(this, command) 21 | } 22 | 23 | def runTestCommand (platform, project) 24 | { 25 | def sudo = auxiliary.sudo(platform.jenkinsLabel) 26 | 27 | def command = """#!/usr/bin/env bash 28 | set -x 29 | cd ${project.paths.project_build_prefix} 30 | ${sudo} make run 2>&1 | tee test_log 31 | grep -ni error test_log 32 | grep -ni warning test_log 33 | grep -ni fail test_log 34 | grep -ni error test_log > test_errors 35 | grep -ni warning test_log >> test_errors 36 | grep -ni fail test_log >> test_errors 37 | VAR=\$(wc -l < test_errors) 38 | if [ \$VAR != 0 ]; then 39 | exit 1 40 | fi 41 | """ 42 | 43 | platform.runCommand(this, command) 44 | } 45 | 46 | return this 47 | 48 | -------------------------------------------------------------------------------- /.jenkins/precheckin.groovy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env groovy 2 | @Library('rocJenkins@pong') _ 3 | import com.amd.project.* 4 | import com.amd.docker.* 5 | import java.nio.file.Path; 6 | 7 | def runCI = 8 | { 9 | nodeDetails, jobName-> 10 | 11 | def prj = new rocProject('rocBLAS-Examples', 'PreCheckin') 12 | prj.libraryDependencies = ['rocBLAS-internal'] 13 | 14 | def nodes = new dockerNodes(nodeDetails, jobName, prj) 15 | 16 | def commonGroovy 17 | 18 | boolean formatCheck = true 19 | 20 | def compileCommand = 21 | { 22 | platform, project-> 23 | 24 | commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" 25 | commonGroovy.runCompileCommand(platform, project, jobName) 26 | } 27 | 28 | def testCommand = 29 | { 30 | platform, project-> 31 | 32 | commonGroovy.runTestCommand(platform, project) 33 | } 34 | 35 | 36 | buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null) 37 | } 38 | 39 | ci: { 40 | String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) 41 | 42 | def propertyList = ["compute-rocm-dkms-no-npi":[], 43 | "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], 44 | "rocm-docker":[]] 45 | propertyList = auxiliary.appendPropertyList(propertyList) 46 | 47 | def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), 48 | "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), 49 | "rocm-docker":([ubuntu16:['gfx906'],centos7:['gfx906'],sles15sp1:['gfx908']])] 50 | jobNameList = auxiliary.appendJobNameList(jobNameList) 51 | 52 | propertyList.each 53 | { 54 | jobName, property-> 55 | if (urlJobName == jobName) 56 | properties(auxiliary.addCommonProperties(property)) 57 | } 58 | 59 | jobNameList.each 60 | { 61 | jobName, nodeDetails-> 62 | if (urlJobName == jobName) 63 | stage(jobName) { 64 | runCI(nodeDetails, jobName) 65 | } 66 | } 67 | 68 | // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 69 | if(!jobNameList.keySet().contains(urlJobName)) 70 | { 71 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) 72 | stage(urlJobName) { 73 | runCI([ubuntu16:['gfx906']], urlJobName) 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /.jenkins/staticanalysis.groovy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env groovy 2 | // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ 3 | @Library('rocJenkins@pong') _ 4 | 5 | // This is file for AMD Continuous Integration use. 6 | // If you are interested in running your own Jenkins, please raise a github issue for assistance. 7 | 8 | import com.amd.project.* 9 | import com.amd.docker.* 10 | import java.nio.file.Path 11 | 12 | def runCI = 13 | { 14 | nodeDetails, jobName-> 15 | 16 | def prj = new rocProject('rocBLAS-Examples', 'StaticAnalysis') 17 | 18 | def nodes = new dockerNodes(nodeDetails, jobName, prj) 19 | 20 | boolean formatCheck = true 21 | boolean staticAnalysis = true 22 | 23 | buildProject(prj, formatCheck, nodes.dockerArray, null, null, null, staticAnalysis) 24 | } 25 | 26 | ci: { 27 | String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) 28 | 29 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 6')])])) 30 | 31 | def jobNameList = ["main":([ubuntu22:['any']])] 32 | jobNameList = auxiliary.appendJobNameList(jobNameList, 'rocBLAS-Examples') 33 | 34 | jobNameList.each 35 | { 36 | jobName, nodeDetails-> 37 | if (urlJobName == jobName) 38 | stage(jobName) { 39 | runCI(nodeDetails, jobName) 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /BuildTools/CMake/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | cmake_minimum_required( VERSION 3.5 ) 24 | 25 | project( example-cmake LANGUAGES CXX) 26 | 27 | find_package(rocblas REQUIRED) 28 | 29 | add_subdirectory( src ) 30 | -------------------------------------------------------------------------------- /BuildTools/CMake/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | 24 | EXE = $(shell basename $(CURDIR)) 25 | SOURCES = ./CMakeLists.txt ./src/CMakeLists.txt ./Makefile-run-cmake 26 | BUILD = build 27 | 28 | RM = rm -rf 29 | 30 | 31 | .PHONY: all clean run 32 | 33 | all: $(BUILD) 34 | 35 | $(BUILD): $(SOURCES) 36 | mkdir -p $(BUILD) 37 | cp Makefile-run-cmake $(BUILD) 38 | make -C $(BUILD) -f Makefile-run-cmake 39 | 40 | clean: 41 | $(RM) $(BUILD) 42 | 43 | run: 44 | ./$(BUILD)/src/example-cmake 45 | 46 | -------------------------------------------------------------------------------- /BuildTools/CMake/Makefile-run-cmake: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | CXX=g++ 29 | #CXX=hipcc 30 | 31 | .PHONY: runcmake 32 | 33 | runcmake: 34 | # CMAKE_PREFIX_PATH is required on some distributions so that find_package(rocblas) can find the cmake config files 35 | CXX=$(CXX) cmake -DCMAKE_PREFIX_PATH=$(ROCM_PATH) .. 36 | make 37 | 38 | -------------------------------------------------------------------------------- /BuildTools/CMake/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples CMake 2 | This example shows how to use rocBLAS in a C++ program with a CMake build system. The focus for this example is the setup in [CMakeLists.txt](CMakeLists.txt) and [src/CMakeLists.txt](src/CMakeLists.txt). The C++ code in `main.cpp` is just an example of some rocBLAS function calls, and the Makefiles are there only as part of the rocBLAS-Examples make build system, so they can be ignored. The CMake project depends only on the rocBLAS package, which automatically brings in the rocBLAS dependencies for hip. This allows inclusion of hip header files in `main.cpp` without having to explicitly specify include search paths. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values for a matrix size. Otherwise a single argument runs the geam function with (M=N=argument with alpha=1, beta=2). 6 | 7 | Usage: ./build/src/example-cmake [size] 8 | [size] Matrix dimension (default 2048) 9 | 10 | 11 | ## Building 12 | These examples require that you have rocBLAS on your machine. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocBLAS build. The makefile defaults to compile using g++, but you can also use the the hipcc compiler from the ROCm installation. Note the standard cmake style of building is invoked via make within the file Makefile-run-cmake. However, using this pattern yourself you don't need the top level Makefiles and would invoke cmake directly from a build directory, and can specify the compiler using the form: CXX=g++ cmake .. 13 | 14 | cd BuildTools/CMake 15 | mkdir build 16 | cd build 17 | cmake .. 18 | make 19 | ./src/example-cmake 20 | -------------------------------------------------------------------------------- /BuildTools/CMake/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | # target 24 | add_executable( example-cmake main.cpp ) 25 | 26 | # rocBLAS-examples common include path, e.g. "error_macros.h" 27 | target_include_directories( example-cmake PRIVATE ../../../common ) 28 | 29 | if (MSVC) 30 | # required for Visual Studio or it defines it as 199711L regardless of C++ standard 31 | target_compile_options( example-cmake PRIVATE /Zc:__cplusplus ) 32 | endif() 33 | 34 | target_link_libraries( example-cmake PRIVATE roc::rocblas ) 35 | -------------------------------------------------------------------------------- /BuildTools/CMake/src/main.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "error_macros.h" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | size_t lda, ldb, lddev; 34 | size_t rows, cols; 35 | 36 | int n = 2048; 37 | if(argc > 1) 38 | n = atoi(argv[1]); 39 | 40 | rows = n; 41 | cols = n; 42 | lda = ldb = lddev = n; 43 | 44 | typedef double data_type; 45 | 46 | rocblas_handle handle; 47 | rocblas_status rstatus = rocblas_create_handle(&handle); 48 | CHECK_ROCBLAS_STATUS(rstatus); 49 | 50 | hipStream_t test_stream; 51 | rstatus = rocblas_get_stream(handle, &test_stream); 52 | CHECK_ROCBLAS_STATUS(rstatus); 53 | 54 | data_type* ha; 55 | data_type* hb; 56 | 57 | // allocate pinned memory to allow async memory transfer 58 | CHECK_HIP_ERROR( 59 | hipHostMalloc((void**)&ha, lda * cols * sizeof(data_type), hipHostMallocMapped)); 60 | CHECK_HIP_ERROR( 61 | hipHostMalloc((void**)&hb, ldb * cols * sizeof(data_type), hipHostMallocMapped)); 62 | 63 | for(int i1 = 0; i1 < rows; i1++) 64 | for(int i2 = 0; i2 < cols; i2++) 65 | ha[i1 + i2 * lda] = 1.0; 66 | 67 | data_type* da = 0; 68 | data_type* db = 0; 69 | data_type* dc = 0; 70 | CHECK_HIP_ERROR(hipMalloc((void**)&da, lddev * cols * sizeof(data_type))); 71 | CHECK_HIP_ERROR(hipMalloc((void**)&db, lddev * cols * sizeof(data_type))); 72 | CHECK_HIP_ERROR(hipMalloc((void**)&dc, lddev * cols * sizeof(data_type))); 73 | 74 | // upload asynchronously from pinned memory 75 | rstatus 76 | = rocblas_set_matrix_async(rows, cols, sizeof(data_type), ha, lda, da, lddev, test_stream); 77 | CHECK_ROCBLAS_STATUS(rstatus); 78 | rstatus 79 | = rocblas_set_matrix_async(rows, cols, sizeof(data_type), ha, lda, db, lddev, test_stream); 80 | CHECK_ROCBLAS_STATUS(rstatus); 81 | 82 | // scalar arguments will be from host memory 83 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 84 | CHECK_ROCBLAS_STATUS(rstatus); 85 | 86 | data_type alpha = 1.0; 87 | data_type beta = 2.0; 88 | 89 | // invoke asynchronous computation 90 | rstatus = rocblas_dgeam(handle, 91 | rocblas_operation_none, 92 | rocblas_operation_none, 93 | rows, 94 | cols, 95 | &alpha, 96 | da, 97 | lddev, 98 | &beta, 99 | db, 100 | lddev, 101 | dc, 102 | lddev); 103 | CHECK_ROCBLAS_STATUS(rstatus); 104 | 105 | // fetch results asynchronously to pinned memory 106 | rstatus 107 | = rocblas_get_matrix_async(rows, cols, sizeof(data_type), dc, lddev, hb, ldb, test_stream); 108 | CHECK_ROCBLAS_STATUS(rstatus); 109 | 110 | // wait on transfer to be finished 111 | CHECK_HIP_ERROR(hipStreamSynchronize(test_stream)); 112 | 113 | // check against expected results 114 | bool fail = false; 115 | for(int i1 = 0; i1 < rows; i1++) 116 | for(int i2 = 0; i2 < cols; i2++) 117 | if(hb[i1 + i2 * ldb] != 3.0 * ha[i1 + i2 * lda]) 118 | fail = true; 119 | 120 | CHECK_HIP_ERROR(hipFree(da)); 121 | CHECK_HIP_ERROR(hipFree(db)); 122 | CHECK_HIP_ERROR(hipFree(dc)); 123 | 124 | // free pinned memory 125 | CHECK_HIP_ERROR(hipHostFree(ha)); 126 | CHECK_HIP_ERROR(hipHostFree(hb)); 127 | 128 | rstatus = rocblas_destroy_handle(handle); 129 | CHECK_ROCBLAS_STATUS(rstatus); 130 | 131 | fprintf(stdout, "%s\n", fail ? "FAIL" : "PASS"); 132 | 133 | return 0; 134 | } 135 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log for rocBLAS-Examples 2 | 3 | Full documentation for rocBLAS is available at [rocblas.readthedocs.io](https://rocblas.readthedocs.io/en/latest/). 4 | 5 | ## For rocBLAS 4.1.0 in ROCm 6.1.0 6 | 7 | ### Changed 8 | - gemm_ex examples to use transpose arguments NT 9 | 10 | ## For rocBLAS 4.0.0 in ROCm 6.0.0 11 | 12 | ### Changed 13 | - Fixed tolerance for half precisions to include problem size accumulation factor 14 | 15 | ## For rocBLAS 3.1.0 in ROCm 5.7.0 16 | 17 | ### Changed 18 | - Added example calling complex version of trmv (ctrmv) which illustrates how different data types for handling complex numbers can be used and also absolute and relative error checking. 19 | - Removed the deprecated paths to hip/bin and rocblas fortran module being removed in future releases 20 | 21 | ## For rocBLAS 3.0.0 in ROCm 5.6.0 22 | 23 | ### Changed 24 | - Removed deprecated defines, code and referring to deprecated folders 25 | 26 | ## For rocBLAS 2.47.0 in ROCm 5.5.0 27 | 28 | ### Fixed 29 | - On Windows the Visual Studio toolchain include of rocblas.h shows deprecation warnings. No longer requires define ROCBLAS_NO_DEPRECATED_WARNINGS. 30 | 31 | ## For rocBLAS 2.46.0 in ROCm 5.4.0 32 | 33 | ### Changed 34 | - On Windows the Visual Studio toolchain include of rocblas.h requires adding a define ROCBLAS_NO_DEPRECATED_WARNINGS for ROCM 5.4 35 | 36 | ## For rocBLAS 2.45.0 in ROCm 5.3.0 37 | 38 | ### Changed 39 | - No changes were made for this release. 40 | 41 | ## For rocBLAS 2.44.0 in ROCm 5.2.0 42 | 43 | ### Changed 44 | - ROCmTM installation paths for include files and libraries have changed locations. This release of examples takes these from the new locations to avoid the deprecation messages introduced into the old header locations. Relevant changes can be seen in the Makefiles. 45 | 46 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | cmake_minimum_required( VERSION 3.5 ) 24 | 25 | set( CMAKE_CXX_STANDARD 17 ) 26 | 27 | project( rocblas-examples LANGUAGES CXX ) 28 | 29 | if (WIN32) 30 | if (NOT CMAKE_PREFIX_PATH) 31 | set(CMAKE_PREFIX_PATH C:/hipSDK) 32 | endif() 33 | endif() 34 | 35 | find_package(rocblas REQUIRED) 36 | 37 | file( GLOB msvc_examples 38 | LIST_DIRECTORIES OFF 39 | CONFIGURE_DEPENDS 40 | # building with msvc 41 | ${CMAKE_SOURCE_DIR}/Level-1/*/*.cpp 42 | ${CMAKE_SOURCE_DIR}/Level-2/*gemv*/*.cpp 43 | # level 2 her using hip type 44 | ${CMAKE_SOURCE_DIR}/Level-3/*/*.cpp 45 | ${CMAKE_SOURCE_DIR}/Extensions/*gemm_ex_i*/*.cpp 46 | ${CMAKE_SOURCE_DIR}/Extensions/*gemm_ex_f32*/*.cpp 47 | # bf16 helpers require amd clang or user defined operators so skipped 48 | ${CMAKE_SOURCE_DIR}/Patterns/*/*.cpp 49 | ) 50 | 51 | file( GLOB hipcc_examples 52 | LIST_DIRECTORIES OFF 53 | CONFIGURE_DEPENDS 54 | # either clang or hip types so using amd clang compiler 55 | ${CMAKE_SOURCE_DIR}/Level-2/*her*/*.cpp 56 | ${CMAKE_SOURCE_DIR}/Extensions/*gemm_ex_bf*/*.cpp 57 | ${CMAKE_SOURCE_DIR}/Extensions/*gemm_ex_f16*/*.cpp 58 | ) 59 | 60 | add_library( examples-common STATIC ${CMAKE_SOURCE_DIR}/common/ArgParser.cpp ) 61 | target_include_directories( examples-common PRIVATE ${CMAKE_SOURCE_DIR}/common ) 62 | # other targets will inherit the rocblas dependency from this 63 | target_link_libraries( examples-common PUBLIC roc::rocblas ) 64 | 65 | function( new_target file_name target_name ) 66 | string(REGEX MATCH "^(.*)\/(.*)\\.[^.]*$" temp ${file_name}) 67 | set( target_name ${CMAKE_MATCH_2}) 68 | 69 | message(STATUS ${target_name}) 70 | add_executable( ${target_name} ${file_name} ) 71 | target_include_directories( ${target_name} PRIVATE ${CMAKE_SOURCE_DIR}/common ) 72 | set_target_properties( ${target_name} PROPERTIES 73 | CXX_STANDARD 17 74 | CXX_STANDARD_REQUIRED ON 75 | CXX_EXTENSIONS OFF 76 | ) 77 | 78 | target_link_libraries( ${target_name} PRIVATE examples-common ) 79 | set( target_name ${target_name} PARENT_SCOPE ) 80 | endfunction() 81 | 82 | if (NOT CMAKE_CXX_COMPILER MATCHES ".*hipcc.*") 83 | 84 | list( APPEND msvc_targets examples-common ) 85 | 86 | foreach( file_i ${msvc_examples}) 87 | new_target( ${file_i} target_name ) 88 | list( APPEND msvc_targets ${target_name} ) 89 | endforeach( file_i ) 90 | 91 | message( STATUS "targets: ${msvc_targets}" ) 92 | 93 | # msvc modifications 94 | foreach( target_i ${msvc_targets}) 95 | if (WIN32) 96 | # required for Visual Studio or it defines it as 199711L regardless of C++ standard 97 | target_compile_options( ${target_i} PRIVATE /Zc:__cplusplus ) 98 | 99 | # we use hip types so setting these 100 | #target_compile_definitions( ${target_i} PRIVATE __HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__ ) 101 | endif() 102 | endforeach( target_i ) 103 | 104 | else() 105 | 106 | list( APPEND hipcc_targets examples-common ) 107 | 108 | foreach( file_i ${hipcc_examples}) 109 | new_target( ${file_i} target_name ) 110 | list( APPEND hipcc_targets ${target_name} ) 111 | endforeach( file_i ) 112 | 113 | message( STATUS "targets: ${hipcc_tagets}" ) 114 | 115 | # hipcc modifications 116 | foreach( target_i ${hipcc_targets}) 117 | endforeach( target_i ) 118 | 119 | if (WIN32) 120 | # for now put test harness in build binary directory 121 | file( GLOB test_harness 122 | LIST_DIRECTORIES OFF 123 | CONFIGURE_DEPENDS 124 | ${CMAKE_SOURCE_DIR}/rtest.* 125 | ) 126 | list(GET hipcc_targets -1 last_target) 127 | foreach( file_i ${test_harness}) 128 | add_custom_command( TARGET ${last_target} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${file_i} ${PROJECT_BINARY_DIR} ) 129 | endforeach( file_i ) 130 | # copy msvc build executables into PROJECT_BINARY_DIR 131 | if (CMAKE_BUILD_TYPE MATCHES "Debug") 132 | set(msvc_subdir "Debug") 133 | else() 134 | set(msvc_subdir "Release") 135 | endif() 136 | add_custom_command( TARGET ${last_target} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory ${PROJECT_BINARY_DIR}/../msvc/${msvc_subdir} ${PROJECT_BINARY_DIR} ) 137 | endif() 138 | 139 | endif() 140 | 141 | 142 | -------------------------------------------------------------------------------- /CppCheckSuppressions.txt: -------------------------------------------------------------------------------- 1 | 2 | // List of global false positives to be suppressed 3 | 4 | // cpccheck has an open issue ticket to address this issue https://trac.cppcheck.net/ticket/9301 5 | // we will suppress this error globally until the fix. 6 | internalAstError 7 | 8 | // cppcheck community recommends disabling 'unusedFunction' check for library code. 9 | unusedFunction 10 | 11 | // To suppress missing include false positives 12 | missingInclude 13 | 14 | // To suppress any unmatched suppression in a file 15 | unmatchedSuppression 16 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_bf16_r/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | CXX=$(HIPCXX) 47 | OPT = -g -Wall 48 | # removing these temporarily as hipcc can not process 49 | # -Ofast -march=native 50 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 51 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 52 | ifneq ($(CXX),$(HIPCXX)) 53 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 54 | endif 55 | 56 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lrocblas -lm -lpthread -lstdc++ 57 | ifneq ($(CXX),$(HIPCXX)) 58 | LDFLAGS += -lamdhip64 59 | endif 60 | 61 | RM = rm -f 62 | 63 | .PHONY: all clean run 64 | 65 | all: $(EXE) 66 | 67 | %.o: %.cpp 68 | $(CXX) $(CXXFLAGS) -c $< -o $@ 69 | 70 | $(EXE): $(OBJECTS) 71 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 72 | 73 | clean: 74 | $(RM) $(EXE) $(OBJECTS) 75 | 76 | run: 77 | ./$(EXE) 78 | 79 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_bf16_r/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemm_ex_bf16_r 2 | Example showing moving matrix rocblas_bfloat16 data to the GPU device and calling the rocblas gemm_ex (general matrix matrix product) function. Results are fetched from GPU and compared against a CPU implementation and displayed. This example uses the HIPCC compiler and C++14 standard in order to use the rocblas_bfloat16 type. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemm_ex_bf16_r 9 | --K Matrix/vector dimension 10 | --M Matrix/vector dimension 11 | --N Matrix/vector dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. This example uses hipcc and c++14 so can reuse the common library built with gcc, but if the common code is built from this folder it can not be reused with gcc. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Extensions/gemm_ex_bf16_r 19 | make 20 | ./gemm_ex_bf16_r 21 | 22 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_f16_r/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | CXX=$(HIPCXX) 47 | OPT = -g -Wall 48 | # removing these temporarily as hipcc can not process 49 | # -Ofast -march=native 50 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 51 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 52 | ifneq ($(CXX),$(HIPCXX)) 53 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 54 | endif 55 | 56 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 57 | ifneq ($(CXX),$(HIPCXX)) 58 | LDFLAGS += -lamdhip64 59 | endif 60 | 61 | RM = rm -f 62 | 63 | .PHONY: all clean run 64 | 65 | all: $(EXE) 66 | 67 | %.o: %.cpp 68 | $(CXX) $(CXXFLAGS) -c $< -o $@ 69 | 70 | $(EXE): $(OBJECTS) 71 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 72 | 73 | clean: 74 | $(RM) $(EXE) $(OBJECTS) 75 | 76 | run: 77 | ./$(EXE) 78 | 79 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_f16_r/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemm_ex_f16_r 2 | Example showing moving matrix float16 data to the GPU device and calling the rocblas gemm_ex (general matrix matrix product) function. Results are fetched from GPU and compared against a CPU implementation and displayed. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemm_ex_f16_r 9 | --K Matrix/vector dimension 10 | --M Matrix/vector dimension 11 | --N Matrix/vector dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Extensions/gemm_ex_f16_r 19 | make 20 | ./gemm_ex_f16_r 21 | 22 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_f32_r/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_f32_r/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemm_ex_f32_r 2 | Example showing moving matrix rocblas_f32 data to the GPU device and calling the rocblas gemm_ex (general matrix matrix product) function. Results are fetched from GPU and compared against a CPU implementation and displayed. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemm_ex_f32_r 9 | --K Matrix/vector dimension 10 | --M Matrix/vector dimension 11 | --N Matrix/vector dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Extensions/gemm_ex_f32_r 19 | make 20 | ./gemm_ex_f32_r 21 | 22 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_i8_i32_r/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Extensions/gemm_ex_i8_i32_r/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemm_ex_i8_i32_r 2 | Example showing moving 8-bit integer and 32-bit integer matrix data types to the GPU device and calling the rocblas gemm_ex (general matrix matrix product) function. Results are fetched from GPU and compared against a CPU implementation and displayed. Input data should or should not be packed to i8x4 depending on the GPU capability. This example calls a query function to get the capability which is available from ROCm 4.2. So ROCm 4.2 or above is required to run this example. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemm_ex_i8_i32_r 9 | --K Matrix/vector dimension 10 | --M Matrix/vector dimension 11 | --N Matrix/vector dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Extensions/gemm_ex_i8_i32_r 19 | make 20 | ./gemm_ex_i8_i32_r 21 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Languages/C/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | SOURCES = $(wildcard *.c) 41 | OBJECTS = $(patsubst %.c, %.o, $(SOURCES)) 42 | 43 | CC=gcc 44 | # uncomment to use hip compiler 45 | #CC=$(HIPCXX) 46 | OPT = -ggdb -O0 -march=native -Wall 47 | INC = -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include -I../../common 48 | CCFLAGS = -std=c11 $(INC) $(OPT) 49 | ifneq ($(CC),$(HIPCXX)) 50 | CCFLAGS += -D__HIP_PLATFORM_AMD__ 51 | endif 52 | 53 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lc 54 | ifneq ($(CXX),$(HIPCXX)) 55 | LDFLAGS += -lamdhip64 56 | endif 57 | 58 | RM = rm -f 59 | 60 | .PHONY: all clean run 61 | 62 | all: $(EXE) 63 | 64 | %.o: %.c 65 | $(CC) $(CCFLAGS) -c $< -o $@ 66 | 67 | $(EXE): $(OBJECTS) 68 | $(CC) $(OBJECTS) $(LDFLAGS) -o $@ 69 | 70 | clean: 71 | $(RM) $(EXE) $(OBJECTS) 72 | 73 | run: 74 | ./$(EXE) 75 | 76 | -------------------------------------------------------------------------------- /Languages/C/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples C 2 | Example showing C program asynchronously moving data to the GPU device and calling the rocblas dgeam function. Results are fetched from GPU and compared against a CPU implementation and displayed. This example uses the gcc -c11 for compilation. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | 7 | Usage: ./C [N] Matrix of NxN dimensions 8 | 9 | 10 | ## Building 11 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. This example uses gcc. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 12 | 13 | cd Languages/C 14 | make 15 | ./C 16 | 17 | -------------------------------------------------------------------------------- /Languages/C/main.c: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "error_macros.h" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | size_t lda, ldb, lddev; 34 | size_t rows, cols; 35 | 36 | int n = 267; 37 | if(argc > 1) 38 | n = atoi(argv[1]); 39 | 40 | rows = n; 41 | cols = 2 * n; 42 | lda = ldb = lddev = n; 43 | 44 | typedef double data_type; 45 | 46 | rocblas_handle handle; 47 | rocblas_status rstatus = rocblas_create_handle(&handle); 48 | CHECK_ROCBLAS_STATUS(rstatus); 49 | 50 | hipStream_t test_stream; 51 | rstatus = rocblas_get_stream(handle, &test_stream); 52 | CHECK_ROCBLAS_STATUS(rstatus); 53 | 54 | data_type* ha; 55 | data_type* hb; 56 | 57 | // allocate pinned memory to allow async memory transfer 58 | CHECK_HIP_ERROR( 59 | hipHostMalloc((void**)&ha, lda * cols * sizeof(data_type), hipHostMallocMapped)); 60 | CHECK_HIP_ERROR( 61 | hipHostMalloc((void**)&hb, ldb * cols * sizeof(data_type), hipHostMallocMapped)); 62 | 63 | for(int i1 = 0; i1 < rows; i1++) 64 | for(int i2 = 0; i2 < cols; i2++) 65 | ha[i1 + i2 * lda] = 1.0; 66 | 67 | data_type* da = 0; 68 | data_type* db = 0; 69 | data_type* dc = 0; 70 | CHECK_HIP_ERROR(hipMalloc((void**)&da, lddev * cols * sizeof(data_type))); 71 | CHECK_HIP_ERROR(hipMalloc((void**)&db, lddev * cols * sizeof(data_type))); 72 | CHECK_HIP_ERROR(hipMalloc((void**)&dc, lddev * cols * sizeof(data_type))); 73 | 74 | // upload asynchronously from pinned memory 75 | rstatus 76 | = rocblas_set_matrix_async(rows, cols, sizeof(data_type), ha, lda, da, lddev, test_stream); 77 | rstatus 78 | = rocblas_set_matrix_async(rows, cols, sizeof(data_type), ha, lda, db, lddev, test_stream); 79 | 80 | // scalar arguments will be from host memory 81 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 82 | CHECK_ROCBLAS_STATUS(rstatus); 83 | 84 | data_type alpha = 1.0; 85 | data_type beta = 2.0; 86 | 87 | // invoke asynchronous computation 88 | rstatus = rocblas_dgeam(handle, 89 | rocblas_operation_none, 90 | rocblas_operation_none, 91 | rows, 92 | cols, 93 | &alpha, 94 | da, 95 | lddev, 96 | &beta, 97 | db, 98 | lddev, 99 | dc, 100 | lddev); 101 | CHECK_ROCBLAS_STATUS(rstatus); 102 | 103 | // fetch results asynchronously to pinned memory 104 | rstatus 105 | = rocblas_get_matrix_async(rows, cols, sizeof(data_type), dc, lddev, hb, ldb, test_stream); 106 | CHECK_ROCBLAS_STATUS(rstatus); 107 | 108 | // wait on transfer to be finished 109 | CHECK_HIP_ERROR(hipStreamSynchronize(test_stream)); 110 | 111 | // check against expected results 112 | bool fail = false; 113 | for(int i1 = 0; i1 < rows; i1++) 114 | for(int i2 = 0; i2 < cols; i2++) 115 | if(hb[i1 + i2 * ldb] != 3.0 * ha[i1 + i2 * lda]) 116 | fail = true; 117 | 118 | CHECK_HIP_ERROR(hipFree(da)); 119 | CHECK_HIP_ERROR(hipFree(db)); 120 | CHECK_HIP_ERROR(hipFree(dc)); 121 | 122 | // free pinned memory 123 | CHECK_HIP_ERROR(hipHostFree(ha)); 124 | CHECK_HIP_ERROR(hipHostFree(hb)); 125 | 126 | rstatus = rocblas_destroy_handle(handle); 127 | CHECK_ROCBLAS_STATUS(rstatus); 128 | 129 | fprintf(stdout, "%s\n", fail ? "FAIL" : "PASS"); 130 | 131 | return 0; 132 | } 133 | -------------------------------------------------------------------------------- /Languages/Fortran/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | SOURCES = $(wildcard *.f90) 41 | OBJECTS = $(patsubst %.f90, %.o, $(SOURCES)) 42 | 43 | CC=gfortran 44 | OPT = -ggdb -O0 -march=native -Wall -Wno-c-binding-type 45 | INC = -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include -I../../common 46 | FFLAGS = $(INC) $(OPT) 47 | ifneq ($(CC),$(HIPCXX)) 48 | FFLAGS += -D__HIP_PLATFORM_AMD__ 49 | endif 50 | 51 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread 52 | ifneq ($(CXX),$(HIPCXX)) 53 | LDFLAGS += -lamdhip64 54 | endif 55 | 56 | RM = rm -f 57 | 58 | .PHONY: all clean run 59 | 60 | all: $(EXE) 61 | 62 | %.o: %.f90 63 | $(CC) $(FFLAGS) -c $< -o $@ 64 | 65 | rocblas.mod: $(ROCBLAS_PATH)/include/rocblas/rocblas_module.f90 66 | $(CC) $(FFLAGS) -c $< 67 | 68 | $(EXE): rocblas.mod $(OBJECTS) 69 | $(CC) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) *.mod rocblas_module.o 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Languages/Fortran/main.f90: -------------------------------------------------------------------------------- 1 | 2 | ! Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | ! Permission is hereby granted, free of charge, to any person obtaining a copy 5 | ! of this software and associated documentation files (the "Software"), to deal 6 | ! in the Software without restriction, including without limitation the rights 7 | ! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | ! copies of the Software, and to permit persons to whom the Software is 9 | ! furnished to do so, subject to the following conditions: 10 | 11 | ! The above copyright notice and this permission notice shall be included in 12 | ! all copies or substantial portions of the Software. 13 | 14 | ! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | ! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | ! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | ! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | ! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | ! THE SOFTWARE. 21 | 22 | 23 | subroutine HIP_CHECK(stat) 24 | use iso_c_binding 25 | 26 | implicit none 27 | 28 | integer(c_int) :: stat 29 | 30 | if(stat /= 0) then 31 | write(*,*) 'hip error:' 32 | stop 33 | end if 34 | end subroutine HIP_CHECK 35 | 36 | subroutine ROCBLAS_CHECK(stat) 37 | use iso_c_binding 38 | 39 | implicit none 40 | 41 | integer(c_int) :: stat 42 | 43 | if(stat /= 0) then 44 | write(*,*) 'rocBLAS error:' 45 | stop 46 | endif 47 | end subroutine ROCBLAS_CHECK 48 | 49 | 50 | program fortran_example 51 | use iso_c_binding 52 | use rocblas 53 | ! generated from rocblas/include/rocblas_module.f90 54 | 55 | implicit none 56 | 57 | ! access hip until there is hip module 58 | interface 59 | function hipMalloc(ptr, size) & 60 | result(c_int) & 61 | bind(c, name = 'hipMalloc') 62 | use iso_c_binding 63 | implicit none 64 | type(c_ptr), value :: ptr 65 | integer(c_size_t), value :: size 66 | end function hipMalloc 67 | end interface 68 | 69 | interface 70 | function hipFree(ptr) & 71 | result(c_int) & 72 | bind(c, name = 'hipFree') 73 | use iso_c_binding 74 | implicit none 75 | type(c_ptr), value :: ptr 76 | end function hipFree 77 | end interface 78 | 79 | interface 80 | function hipMemcpy(dst, src, size, kind) & 81 | result(c_int) & 82 | bind(c, name = 'hipMemcpy') 83 | use iso_c_binding 84 | implicit none 85 | type(c_ptr), value :: dst 86 | type(c_ptr), intent(in), value :: src 87 | integer(c_size_t), value :: size 88 | integer(c_int), value :: kind 89 | end function hipMemcpy 90 | end interface 91 | 92 | interface 93 | function hipMemset(dst, val, size) & 94 | result(c_int) & 95 | bind(c, name = 'hipMemset') 96 | use iso_c_binding 97 | implicit none 98 | type(c_ptr), value :: dst 99 | integer(c_int), value :: val 100 | integer(c_size_t), value :: size 101 | end function hipMemset 102 | end interface 103 | 104 | interface 105 | function hipDeviceSynchronize() & 106 | result(c_int) & 107 | bind(c, name = 'hipDeviceSynchronize') 108 | use iso_c_binding 109 | implicit none 110 | end function hipDeviceSynchronize 111 | end interface 112 | 113 | interface 114 | function hipDeviceReset() & 115 | result(c_int) & 116 | bind(c, name = 'hipDeviceReset') 117 | use iso_c_binding 118 | implicit none 119 | end function hipDeviceReset 120 | end interface 121 | ! hip access 122 | 123 | 124 | integer tbegin(8) 125 | integer tend(8) 126 | real(8) timing 127 | logical :: failure = .FALSE. 128 | real(c_float) :: res 129 | 130 | integer(c_int) :: n = 10240 131 | real(c_float), target :: alpha = 2 132 | 133 | real(4), dimension(:), allocatable, target :: hx 134 | real(4), dimension(:), allocatable, target :: hz 135 | type(c_ptr), target :: dx 136 | 137 | integer(c_int) :: i, element 138 | 139 | ! Create rocBLAS handle 140 | type(c_ptr), target :: handle 141 | call ROCBLAS_CHECK(rocblas_create_handle(c_loc(handle))) 142 | 143 | ! Allocate host-side memory 144 | allocate(hx(n)) 145 | allocate(hz(n)) 146 | 147 | ! Allocate device-side memory 148 | call HIP_CHECK(hipMalloc(c_loc(dx), int(n, c_size_t) * 4)) 149 | 150 | ! Initialize host memory 151 | do i = 1, n 152 | hx(i) = i 153 | hz(i) = i 154 | end do 155 | 156 | ! Copy memory from host to device 157 | call HIP_CHECK(hipMemcpy(dx, c_loc(hx), int(n, c_size_t) * 4, 1)) 158 | 159 | ! Begin time 160 | call date_and_time(values = tbegin) 161 | 162 | ! Call rocblas_scal 163 | call ROCBLAS_CHECK(rocblas_set_pointer_mode(handle, 0)) 164 | call ROCBLAS_CHECK(rocblas_sscal(handle, n, c_loc(alpha), dx, 1)) 165 | call HIP_CHECK(hipDeviceSynchronize()) 166 | 167 | ! Stop time 168 | call date_and_time(values = tend) 169 | 170 | ! Copy output from device to host 171 | call HIP_CHECK(hipMemcpy(c_loc(hx), dx, int(n, c_size_t) * 4, 2)) 172 | 173 | do element = 1, n 174 | res = alpha * hz(element) 175 | if(res .ne. hx(element)) then 176 | failure = .true. 177 | write(*,*) 'ERROR: ', res, '!=', hx(element) 178 | end if 179 | end do 180 | 181 | ! Calculate time 182 | tbegin = tend - tbegin 183 | timing = (0.001d0 * tbegin(8) + tbegin(7) + 60d0 * tbegin(6) + 3600d0 * tbegin(5)) / 200d0 * 1000d0 184 | write(*,fmt='(A,F0.2,A)') 'Function call took ', timing, ' msec' 185 | 186 | if(failure) then 187 | write(*,*) 'FAIL' 188 | else 189 | write(*,*) 'PASS' 190 | end if 191 | 192 | ! Cleanup 193 | call HIP_CHECK(hipFree(dx)) 194 | deallocate(hx, hz) 195 | call ROCBLAS_CHECK(rocblas_destroy_handle(handle)) 196 | call HIP_CHECK(hipDeviceReset()) 197 | 198 | end program fortran_example -------------------------------------------------------------------------------- /Languages/HIP/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. 2 | 3 | ROCM_PATH?= $(wildcard /opt/rocm) 4 | ifeq (,$(ROCM_PATH)) 5 | ROCM_PATH= 6 | endif 7 | 8 | HIP_PATH?= $(wildcard /opt/rocm) 9 | ifeq (,$(HIP_PATH)) 10 | HIP_PATH= 11 | endif 12 | HIPCXX=$(HIP_PATH)/bin/hipcc 13 | 14 | ifeq (,$(ROCBLAS_PATH)) 15 | # default to rocblas in standard ROCM tree 16 | ROCBLAS_PATH= $(ROCM_PATH) 17 | endif 18 | 19 | EXE = $(shell basename $(CURDIR)) 20 | SOURCES = $(wildcard *.cpp) 21 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 22 | 23 | CXX=$(HIPCXX) # needed for hip kernel compilation 24 | OPT = -ggdb -O0 -march=native -Wall 25 | INC = -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include -I../../common 26 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 27 | ifneq ($(CXX),$(HIPCXX)) 28 | CCFLAGS += -D__HIP_PLATFORM_AMD__ 29 | endif 30 | 31 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 32 | ifneq ($(CXX),$(HIPCXX)) 33 | LDFLAGS += -lamdhip64 34 | endif 35 | 36 | RM = rm -f 37 | 38 | .PHONY: all clean run 39 | 40 | all: $(EXE) 41 | 42 | %.o: %.cpp # all are using hip 43 | $(CXX) $(CXXFLAGS) -c $< -o $@ 44 | 45 | $(EXE): $(OBJECTS) 46 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 47 | 48 | clean: 49 | $(RM) $(EXE) $(OBJECTS) 50 | 51 | run: 52 | ./${EXE} 53 | # to log hip function calls 54 | # AMD_LOG_LEVEL=3 ./$(EXE) 55 | 56 | -------------------------------------------------------------------------------- /Languages/HIP/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples HIP 2 | Example showing HIP program asynchronously moving data to the GPU device and calling a hip kernel for some computation and then the rocblas dgeam function with the results. Device results are fetched from GPU and compared against a CPU implementation and displayed. This example uses hipcc for compilation. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | 7 | Usage: ./HIP [rows] [cols] Matrix of dimension rows x cols (default 256 x 512) 8 | 9 | The make run target shows the use of AMD_LOG_LEVEL environment variable to display the hip API calls being made at runtime. 10 | 11 | ## Building 12 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples. This example uses the hipcc compiler from the ROCm installation. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 13 | 14 | cd Languages/HIP 15 | make 16 | ./HIP 17 | 18 | -------------------------------------------------------------------------------- /Languages/HIP/kernel.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | __global__ void 30 | matrix_square_elements(int rows, int cols, const double* a, int lda, double* b, int ldb) 31 | { 32 | int cid = blockIdx.x * blockDim.x + threadIdx.x; 33 | int rid = blockIdx.y * blockDim.y + threadIdx.y; 34 | if(cid < cols && rid < rows) 35 | { 36 | double v = a[rid + size_t(cid) * lda]; 37 | b[rid + size_t(cid) * ldb] = v * v; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /Languages/HIP/main.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "error_macros.h" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | // prototype for external kernel defined in kernel.cpp 32 | __global__ void 33 | matrix_square_elements(int rows, int cols, const double* a, int lda, double* b, int ldb); 34 | 35 | int main(int argc, char** argv) 36 | { 37 | int lda, ldb, lddev; 38 | int rows, cols; 39 | 40 | int n = 255; 41 | int m = 512; 42 | if(argc > 1) 43 | n = atoi(argv[1]); 44 | if(argc > 2) 45 | m = atoi(argv[2]); 46 | 47 | rows = n; 48 | cols = m; 49 | lda = ldb = lddev = n; 50 | 51 | rocblas_handle handle; 52 | rocblas_status rstatus = rocblas_create_handle(&handle); 53 | CHECK_ROCBLAS_STATUS(rstatus); 54 | 55 | hipStream_t test_stream; 56 | rstatus = rocblas_get_stream(handle, &test_stream); 57 | CHECK_ROCBLAS_STATUS(rstatus); 58 | 59 | double* ha; 60 | double* hb; 61 | 62 | // allocate pinned memory to allow async memory transfer 63 | CHECK_HIP_ERROR(hipHostMalloc((void**)&ha, sizeof(double) * lda * cols, hipHostMallocMapped)); 64 | CHECK_HIP_ERROR(hipHostMalloc((void**)&hb, sizeof(double) * ldb * cols, hipHostMallocMapped)); 65 | 66 | for(int i1 = 0; i1 < rows; i1++) 67 | for(int i2 = 0; i2 < cols; i2++) 68 | ha[i1 + size_t(i2) * lda] = double(i1); 69 | 70 | double* da = 0; 71 | double* db = 0; 72 | double* dc = 0; 73 | CHECK_HIP_ERROR(hipMalloc((void**)&da, sizeof(double) * lddev * cols)); 74 | CHECK_HIP_ERROR(hipMalloc((void**)&db, sizeof(double) * lddev * cols)); 75 | CHECK_HIP_ERROR(hipMalloc((void**)&dc, sizeof(double) * lddev * cols)); 76 | 77 | // upload asynchronously from pinned memory 78 | rstatus = rocblas_set_matrix_async(rows, cols, sizeof(double), ha, lda, da, lddev, test_stream); 79 | rstatus = rocblas_set_matrix_async(rows, cols, sizeof(double), ha, lda, dc, lddev, test_stream); 80 | 81 | // compute db as square of ha with hip kernel 82 | const unsigned threads = 32; 83 | const unsigned rblock = (rows - 1) / threads + 1; 84 | const unsigned cblock = (cols - 1) / threads + 1; 85 | hipLaunchKernelGGL((matrix_square_elements), /* compute kernel*/ 86 | dim3(cblock, rblock), 87 | dim3(threads, threads), 88 | 0 /*dynamic shared*/, 89 | 0 /*stream*/, 90 | rows, 91 | cols, 92 | dc, 93 | lda, 94 | db, 95 | ldb); /* arguments to the compute kernel */ 96 | 97 | // this should result in db matrix having each element the squared element value of ha 98 | 99 | // scalar arguments will be from host memory. 100 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 101 | CHECK_ROCBLAS_STATUS(rstatus); 102 | 103 | double alpha = 2.0; 104 | double beta = 1.0; 105 | 106 | // invoke asynchronous computation 107 | rstatus = rocblas_dgeam(handle, 108 | rocblas_operation_none, 109 | rocblas_operation_none, 110 | rows, 111 | cols, 112 | &alpha, 113 | da, 114 | lddev, 115 | &beta, 116 | db, 117 | lddev, 118 | dc, 119 | lddev); 120 | CHECK_ROCBLAS_STATUS(rstatus); 121 | 122 | // fetch results asynchronously to pinned memory 123 | rstatus = rocblas_get_matrix_async(rows, cols, sizeof(double), dc, lddev, hb, ldb, test_stream); 124 | CHECK_ROCBLAS_STATUS(rstatus); 125 | 126 | // wait on transfer to be finished 127 | CHECK_HIP_ERROR(hipStreamSynchronize(test_stream)); 128 | 129 | // check against expected results 130 | bool fail = false; 131 | for(int i1 = 0; i1 < rows; i1++) 132 | for(int i2 = 0; i2 < cols; i2++) 133 | { 134 | double v = ha[i1 + size_t(i2) * lda]; 135 | if(hb[i1 + size_t(i2) * ldb] != 2.0 * v + v * v) 136 | fail = true; 137 | } 138 | 139 | CHECK_HIP_ERROR(hipFree(da)); 140 | CHECK_HIP_ERROR(hipFree(db)); 141 | CHECK_HIP_ERROR(hipFree(dc)); 142 | 143 | // free pinned memory 144 | CHECK_HIP_ERROR(hipHostFree(ha)); 145 | CHECK_HIP_ERROR(hipHostFree(hb)); 146 | 147 | rstatus = rocblas_destroy_handle(handle); 148 | CHECK_ROCBLAS_STATUS(rstatus); 149 | 150 | fprintf(stdout, "%s\n", fail ? "FAIL" : "PASS"); 151 | 152 | return 0; 153 | } 154 | -------------------------------------------------------------------------------- /Level-1/axpy/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-1/axpy/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples axpy 2 | This example presents two independent vectors 'X', 'Y' and a scalar 'alpha' transferred to the GPU device and calling the rocBLAS axpy function. Inside the rocBLAS axpy function, 'alpha' is multiplied with the individual element of vector 'X' and the resultant vector is added with the vector 'Y', overwriting vector 'Y' with the result. Result vector is retrieved from the device to the host. Then, result vector along with the gold standard (calculated using CPU) are displayed and maximum relative error between them is calculated. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values (alpha=1, incx=1, incy=1, n=5). 6 | Running with --help will show the options: 7 | 8 | Usage: ./axpy 9 | --alpha Alpha scalar 10 | --incx Increment for x vector 11 | --incy Increment for y vector 12 | --n Size of vector 13 | 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not require sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Level-1/axpy 19 | make 20 | ./axpy 21 | -------------------------------------------------------------------------------- /Level-1/axpy/axpy.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | helpers::ArgParser options("axyn"); 34 | if(!options.validArgs(argc, argv)) 35 | return EXIT_FAILURE; 36 | 37 | //Initialize HIP error to check the return status of the HIP API functions 38 | hipError_t herror = hipSuccess; 39 | 40 | //Initialize rocBLAS error to check the return status of the rocBLAS API functions 41 | rocblas_status rstatus = rocblas_status_success; 42 | 43 | //Stride between consecutive values of input vector X (default value is 1) 44 | rocblas_int incx = options.incx; 45 | 46 | //Stride between consecutive values of input vector Y (default value is 1) 47 | rocblas_int incy = options.incy; 48 | 49 | //Number of elements in input vector X and input vector Y (default value is 5) 50 | rocblas_int n = options.n; 51 | 52 | if(n <= 0) //Edge condition check 53 | { 54 | std::cout << "Value of 'n' should be greater than 0" << std::endl; 55 | return 0; 56 | } 57 | 58 | //Scalar value used for multiplication 59 | float hAlpha = options.alpha; 60 | 61 | //Adjusting the size of input vector X for value of stride (incx) not equal to 1 62 | size_t sizeX = (n * incx) >= 0 ? n * incx : -(n * incx); 63 | 64 | //Adjusting the size of input vector Y for value of stride (incy) not equal to 1 65 | size_t sizeY = (n * incy) >= 0 ? n * incy : -(n * incy); 66 | 67 | //Allocating memory for both the host input vectors X and Y 68 | std::vector hX(sizeX); 69 | std::vector hY(sizeY); 70 | 71 | //Intialising random values to both the host vectors X and Y 72 | helpers::fillVectorNormRand(hX); 73 | helpers::fillVectorNormRand(hY); 74 | 75 | std::cout << "Input Vectors (X)" << std::endl; 76 | helpers::printVector(hX); 77 | 78 | std::cout << "Input Vectors (Y)" << std::endl; 79 | helpers::printVector(hY); 80 | 81 | /*Initialising the values for vector hYGold, this vector will be used as a Gold Standard 82 | to compare our results from rocBLAS SAXPY funtion*/ 83 | std::vector hYGold(hY); 84 | 85 | //CPU function for SAXPY 86 | for(int i = 0; i < n; i++) 87 | hYGold[i * incy] = hAlpha * hX[i * incx] + hY[i * incy]; 88 | 89 | //Using rocblas API to create a handle 90 | rocblas_handle handle; 91 | rstatus = rocblas_create_handle(&handle); 92 | CHECK_ROCBLAS_STATUS(rstatus); 93 | 94 | { 95 | //Allocating memory for both the both device vectors X and Y 96 | helpers::DeviceVector dX(sizeX); 97 | helpers::DeviceVector dY(sizeY); 98 | 99 | //Tansfer data from host vector X to device vector X 100 | herror = hipMemcpy(dX, hX.data(), sizeof(float) * sizeX, hipMemcpyHostToDevice); 101 | CHECK_HIP_ERROR(herror); 102 | 103 | //Tansfer data from host vector Y to device vector Y 104 | herror = hipMemcpy(dY, hY.data(), sizeof(float) * sizeY, hipMemcpyHostToDevice); 105 | CHECK_HIP_ERROR(herror); 106 | 107 | //Enable passing alpha parameter from pointer to host memory 108 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 109 | CHECK_ROCBLAS_STATUS(rstatus); 110 | 111 | //Saxpy calculation on device 112 | rstatus = rocblas_saxpy(handle, n, &hAlpha, dX, incx, dY, incy); 113 | 114 | CHECK_ROCBLAS_STATUS(rstatus); 115 | 116 | /*Transfer the result from device vector Y to host vector Y, 117 | automatically blocked until results ready*/ 118 | herror = hipMemcpy(hY.data(), dY, sizeof(float) * sizeY, hipMemcpyDeviceToHost); 119 | 120 | CHECK_HIP_ERROR(herror); 121 | } // release device memory via helpers::DeviceVector destructors 122 | 123 | std::cout << "Output Vector Y" << std::endl; 124 | 125 | //Print output result Vector 126 | helpers::printVector(hY); 127 | 128 | //Print the CPU generated output 129 | std::cout << "Output Vector YGold" << std::endl; 130 | helpers::printVector(hYGold); 131 | 132 | /*Helper function to check the Relative error between output generated 133 | from rocBLAS API saxpy and the CPU function*/ 134 | float maxRelativeError = (float)helpers::maxRelativeError(hY, hYGold); 135 | float eps = std::numeric_limits::epsilon(); 136 | float tolerance = 10; 137 | 138 | if(maxRelativeError > eps * tolerance) 139 | { 140 | std::cout << "FAIL"; 141 | } 142 | else 143 | { 144 | std::cout << "PASS"; 145 | } 146 | std::cout << ": max. relative err. = " << maxRelativeError << std::endl; 147 | 148 | rstatus = rocblas_destroy_handle(handle); 149 | CHECK_ROCBLAS_STATUS(rstatus); 150 | return 0; 151 | } 152 | //End of the program 153 | -------------------------------------------------------------------------------- /Level-1/dot/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | -------------------------------------------------------------------------------- /Level-1/dot/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples dot 2 | This example presents independent vectors 'X', 'Y' and a scalar value 'Result' transferred to the GPU device and calling the rocBLAS dot function. Then, the rocBLAS dot function computes the dot product of vectors 'X' and 'Y' and stores the output in 'Result'. Finally, the gold-standard value 'goldResult' (computed in CPU) along with the Result 'hResult' (Computed in GPU) are displayed for comparison. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values (incx=1, incy=1, n=5). 6 | Running with --help will show the options: 7 | 8 | Usage: ./dot 9 | --incx Increment for x vector 10 | --incy Increment for y vector 11 | --n Size of vector 12 | 13 | 14 | ## Building 15 | These examples require that you have an installation of rocBLAS on your machine. You do not require sudo or other access to build these examples which default to compile using gcc but can also use the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocBLAS build. 16 | 17 | cd Level-1/dot 18 | make 19 | ./dot -------------------------------------------------------------------------------- /Level-1/dot/dot.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | helpers::ArgParser options("xyn"); 34 | if(!options.validArgs(argc, argv)) 35 | return EXIT_FAILURE; 36 | 37 | //Initialize HIP error to check the return status of the HIP API functions 38 | hipError_t herror = hipSuccess; 39 | 40 | //Initialize rocBLAS error to check the return status of the rocBLAS API functions 41 | rocblas_status rstatus = rocblas_status_success; 42 | 43 | //Stride between consecutive values of input vector X (default value is 1) 44 | rocblas_int incx = options.incx; 45 | 46 | //Stride between consecutive values of input vector Y (default value is 1) 47 | rocblas_int incy = options.incy; 48 | 49 | //Number of elements in input vector X and input vector Y (default value is 5) 50 | rocblas_int n = options.n; 51 | 52 | //Edge condition check 53 | if(n <= 0) 54 | { 55 | std::cout << "Value of 'n' should be greater than 0" << std::endl; 56 | return 0; 57 | } 58 | 59 | //Adjusting the size of input vector X for value of stride (incx) not equal to 1 60 | size_t sizeX = (n * incx) >= 0 ? n * incx : -(n * incx); 61 | 62 | //Adjusting the size of input vector Y for value of stride (incy) not equal to 1 63 | size_t sizeY = (n * incy) >= 0 ? n * incy : -(n * incy); 64 | 65 | //Allocating memory for the host input vectors X, Y and the host scalar result 66 | std::vector hX(sizeX); 67 | std::vector hY(sizeY); 68 | 69 | float hResult = 0.0; 70 | 71 | //Initialising random values to both the host vectors X and Y 72 | helpers::fillVectorNormRand(hX); 73 | helpers::fillVectorNormRand(hY); 74 | 75 | std::cout << "Input Vectors (X)" << std::endl; 76 | helpers::printVector(hX); 77 | 78 | std::cout << "Input Vectors (Y)" << std::endl; 79 | helpers::printVector(hY); 80 | 81 | /*Initialising the scalar goldResult, goldResult will be used as a 82 | gold standard to compare our result from rocBLAS SDOT funtion*/ 83 | float goldResult = 0.0; 84 | 85 | //CPU function for SDOT 86 | for(int i = 0; i < n; i++) 87 | goldResult += hX[i * incx] * hY[i * incy]; 88 | 89 | //Using rocblas API to create a handle 90 | rocblas_handle handle; 91 | rstatus = rocblas_create_handle(&handle); 92 | CHECK_ROCBLAS_STATUS(rstatus); 93 | 94 | { 95 | //Allocating memory for the device vectors X, Y and the scalar Result 96 | helpers::DeviceVector dX(sizeX); 97 | helpers::DeviceVector dY(sizeY); 98 | 99 | //Enable passing hResult parameter from pointer to host memory 100 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 101 | CHECK_ROCBLAS_STATUS(rstatus); 102 | 103 | //Tansfer data from host vector X to device vector X 104 | herror = hipMemcpy(dX, hX.data(), sizeof(float) * sizeX, hipMemcpyHostToDevice); 105 | CHECK_HIP_ERROR(herror); 106 | 107 | //Tansfer data from host vector Y to device vector Y 108 | herror = hipMemcpy(dY, hY.data(), sizeof(float) * sizeY, hipMemcpyHostToDevice); 109 | CHECK_HIP_ERROR(herror); 110 | 111 | //Asynchronous SDOT calculation on device 112 | rstatus = rocblas_sdot(handle, n, dX, incx, dY, incy, &hResult); 113 | 114 | CHECK_ROCBLAS_STATUS(rstatus); 115 | 116 | //Block until result is ready 117 | CHECK_HIP_ERROR(hipDeviceSynchronize()); 118 | 119 | } // release device memory via helpers::DeviceVector destructors 120 | 121 | //Print the GPU generated output 122 | std::cout << "Output result" << std::endl; 123 | std::cout << hResult << std::endl; 124 | 125 | //Print the CPU generated output 126 | std::cout << "Output Goldstandard result" << std::endl; 127 | std::cout << goldResult << std::endl; 128 | 129 | rstatus = rocblas_destroy_handle(handle); 130 | CHECK_ROCBLAS_STATUS(rstatus); 131 | return 0; 132 | } 133 | //End of the program 134 | -------------------------------------------------------------------------------- /Level-1/nrm2/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | -------------------------------------------------------------------------------- /Level-1/nrm2/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples nrm2 2 | This example presents vector 'X' and a scalar 'Result' transferred to the GPU device and calling the rocBLAS nrm2 function. Then, The rocBLAS nrm2 function computes the Euclidean norm of vector 'X' and stores the output in 'Result'. Finally, the gold-standard value 'goldResult' (computed in CPU) along with the Result 'hResult' (Computed in GPU) are displayed for comparison. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values (incx=1, n=5). 6 | Running with --help will show the options: 7 | 8 | Usage: ./nrm2 9 | --incx Increment for x vector 10 | --n Size of vector 11 | 12 | ## Building 13 | These examples require that you have an installation of rocBLAS on your machine. You do not require sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 14 | 15 | cd Level-1/nrm2 16 | make 17 | ./nrm2 -------------------------------------------------------------------------------- /Level-1/nrm2/nrm2.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | helpers::ArgParser options("xn"); 34 | if(!options.validArgs(argc, argv)) 35 | return EXIT_FAILURE; 36 | 37 | //Initialize HIP error to check the return status of the HIP API functions 38 | hipError_t herror = hipSuccess; 39 | 40 | //Initialize rocBLAS error to check the return status of the rocBLAS API functions 41 | rocblas_status rstatus = rocblas_status_success; 42 | 43 | //Stride between consecutive values of input vector X (default value is 1) 44 | rocblas_int incx = options.incx; 45 | 46 | //Number of elements in input vector X and input vector Y (default value is 5) 47 | rocblas_int n = options.n; 48 | 49 | //Edge condition check 50 | if(n <= 0) 51 | { 52 | std::cout << "Value of 'n' should be greater than 0" << std::endl; 53 | return 0; 54 | } 55 | 56 | //Adjusting the size of input vector X for value of stride (incx) not equal to 1 57 | size_t sizeX = (n * incx) >= 0 ? n * incx : -(n * incx); 58 | 59 | //Allocating memory for the host input vector X and the host scalar result 60 | std::vector hX(sizeX); 61 | float hResult = 0.0; 62 | 63 | //Initialising random values to the host vector X 64 | helpers::fillVectorNormRand(hX); 65 | 66 | std::cout << "Input Vectors (X)" << std::endl; 67 | helpers::printVector(hX); 68 | 69 | //accumulate is used to store the sum of squares of vector X 70 | float accumulate = 0.0; 71 | 72 | /*goldResult is used to store the square root of accumulate 73 | and is used to compare our result from rocBLAS NRM2 funtion*/ 74 | float goldResult = 0.0; 75 | 76 | //CPU function for NRM2 77 | for(int i = 0; i < n; i++) 78 | accumulate += (hX[i * incx] * hX[i * incx]); 79 | 80 | goldResult = sqrt(accumulate); 81 | 82 | //Using rocblas API to create a handle 83 | rocblas_handle handle; 84 | rstatus = rocblas_create_handle(&handle); 85 | CHECK_ROCBLAS_STATUS(rstatus); 86 | 87 | { 88 | //Allocating memory for the device vector X 89 | helpers::DeviceVector dX(sizeX); 90 | 91 | //Enable passing hResult parameter from pointer to host memory 92 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 93 | CHECK_ROCBLAS_STATUS(rstatus); 94 | 95 | //Tansfer data from host vector X to device vector X 96 | herror = hipMemcpy(dX, hX.data(), sizeof(float) * sizeX, hipMemcpyHostToDevice); 97 | CHECK_HIP_ERROR(herror); 98 | 99 | //Asynchronous NRM2 calculation on device 100 | rstatus = rocblas_snrm2(handle, n, dX, incx, &hResult); 101 | 102 | CHECK_ROCBLAS_STATUS(rstatus); 103 | 104 | //block until result is ready 105 | CHECK_HIP_ERROR(hipDeviceSynchronize()); 106 | 107 | } // release device memory via helpers::DeviceVector destructors 108 | 109 | //Print GPU generated output 110 | std::cout << "Output result" << std::endl; 111 | std::cout << hResult << std::endl; 112 | 113 | //Print the CPU generated output 114 | std::cout << "Output Goldstandard result" << std::endl; 115 | std::cout << goldResult << std::endl; 116 | 117 | rstatus = rocblas_destroy_handle(handle); 118 | CHECK_ROCBLAS_STATUS(rstatus); 119 | return 0; 120 | } 121 | //End of the program 122 | -------------------------------------------------------------------------------- /Level-1/scal/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run cxxver 62 | 63 | all: cxxver $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | cxxver: 69 | $(CXX) --version 70 | 71 | $(EXE): $(OBJECTS) 72 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 73 | 74 | clean: 75 | $(RM) $(EXE) $(OBJECTS) 76 | 77 | run: 78 | ./$(EXE) 79 | 80 | -------------------------------------------------------------------------------- /Level-1/scal/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples scal 2 | Example showing moving vector data to the GPU device and calling the rocblas scal function. Results are retrieved to host and displayed. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./scal 9 | --alpha Alpha scalar 10 | --n Size of vector 11 | --xinc Increment for x vector 12 | 13 | ## Building 14 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 15 | 16 | cd Level-1/scal 17 | make 18 | ./scal 19 | -------------------------------------------------------------------------------- /Level-1/scal/scal.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | 34 | helpers::ArgParser options("nax"); 35 | if(!options.validArgs(argc, argv)) 36 | return EXIT_FAILURE; 37 | 38 | hipError_t herror = hipSuccess; 39 | rocblas_status rstatus = rocblas_status_success; 40 | 41 | rocblas_int n = options.n; 42 | rocblas_int incx = options.incx; 43 | size_t size = (n * incx) > 0 ? (n * incx) : -(n * incx); 44 | 45 | typedef double dataType; 46 | 47 | // host input vectors of size n 48 | std::vector hostVecA(size); 49 | helpers::fillVectorNormRand(hostVecA); 50 | 51 | // print input 52 | std::cout << "Input Vector" << std::endl; 53 | helpers::printVector(hostVecA); 54 | 55 | size_t vectorBytes = size * sizeof(dataType); 56 | 57 | // allocate device vectors and copy memory from host 58 | dataType* deviceVecA; 59 | herror = hipMalloc(&deviceVecA, vectorBytes); 60 | CHECK_HIP_ERROR(herror); 61 | herror = hipMemcpy(deviceVecA, hostVecA.data(), vectorBytes, hipMemcpyHostToDevice); 62 | CHECK_HIP_ERROR(herror); 63 | 64 | // using rocblas API 65 | rocblas_handle handle; 66 | rstatus = rocblas_create_handle(&handle); 67 | CHECK_ROCBLAS_STATUS(rstatus); 68 | 69 | double alpha = options.alpha; 70 | 71 | // enable passing alpha parameter from pointer to host memory 72 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 73 | CHECK_ROCBLAS_STATUS(rstatus); 74 | 75 | // asynchronous calculation on device, returns before finished calculations 76 | rstatus = rocblas_dscal(handle, n, &alpha, deviceVecA, incx); 77 | // check that calculation was launched correctly on device, not that result 78 | // was computed yet 79 | CHECK_ROCBLAS_STATUS(rstatus); 80 | 81 | // fetch device memory results, automatically blocked until results ready 82 | herror = hipMemcpy(hostVecA.data(), deviceVecA, vectorBytes, hipMemcpyDeviceToHost); 83 | CHECK_HIP_ERROR(herror); 84 | 85 | // print results 86 | std::cout << "Output Vector, alpha = " << alpha << std::endl; 87 | helpers::printVector(hostVecA); 88 | 89 | // release device memory 90 | herror = hipFree(deviceVecA); 91 | CHECK_HIP_ERROR(herror); 92 | 93 | rstatus = rocblas_destroy_handle(handle); 94 | CHECK_ROCBLAS_STATUS(rstatus); 95 | 96 | return EXIT_SUCCESS; 97 | } 98 | -------------------------------------------------------------------------------- /Level-1/swap/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-1/swap/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples swap 2 | Example showing moving vector data to device and calling the rocblas swap element function. Results are retrieved to host and displayed. This is the simplest example and should be the first one to review if you are not already familiar with other BLAS libraries. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./swap 9 | --n Size of vector 10 | --incx Increment for x vector 11 | --incy Increment for y vector 12 | 13 | ## Building 14 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 15 | 16 | cd Level-1/swap 17 | make 18 | ./swap 19 | 20 | -------------------------------------------------------------------------------- /Level-1/swap/swap.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | 34 | helpers::ArgParser options("nxy"); 35 | if(!options.validArgs(argc, argv)) 36 | return EXIT_FAILURE; 37 | 38 | hipError_t herror = hipSuccess; 39 | rocblas_status rstatus = rocblas_status_success; 40 | 41 | rocblas_int incx = options.incx; 42 | rocblas_int incy = options.incy; 43 | rocblas_int n = options.n; 44 | 45 | // enlarge nSize to allow for input parameters increment 46 | rocblas_int nSize = n * std::max(incx, incy); 47 | 48 | // host input vectors of size nSize 49 | std::vector hostVecA(nSize); 50 | // test data is just random numbers set by helper functions 51 | helpers::fillVectorNormRand(hostVecA, incx); 52 | std::vector hostVecB(nSize); 53 | helpers::fillVectorNormRand(hostVecB, incy); 54 | 55 | // print input 56 | std::cout << "Input Vectors" << std::endl; 57 | helpers::printVector(hostVecA, n, incx); 58 | helpers::printVector(hostVecB, n, incy); 59 | 60 | size_t vectorBytes = nSize * sizeof(float); 61 | 62 | // allocate device vector memory using hipMalloc and copy data from host 63 | float* deviceVecA; 64 | herror = hipMalloc(&deviceVecA, vectorBytes); 65 | CHECK_HIP_ERROR(herror); 66 | rstatus = rocblas_set_vector(nSize, sizeof(float), hostVecA.data(), incx, deviceVecA, incx); 67 | CHECK_ROCBLAS_STATUS(rstatus); 68 | // equivalent if increments are all 1 to doing call 69 | //herror = hipMemcpy(deviceVecA, hostVecA.data(), vectorBytes, hipMemcpyHostToDevice); 70 | //CHECK_HIP_ERROR(herror); 71 | 72 | float* deviceVecB; 73 | herror = hipMalloc(&deviceVecB, vectorBytes); 74 | CHECK_HIP_ERROR(herror); 75 | rstatus = rocblas_set_vector(nSize, sizeof(float), hostVecB.data(), incy, deviceVecB, incy); 76 | CHECK_ROCBLAS_STATUS(rstatus); 77 | 78 | // using rocblas API 79 | rocblas_handle handle; 80 | rstatus = rocblas_create_handle(&handle); 81 | CHECK_ROCBLAS_STATUS(rstatus); 82 | 83 | // asynchronous calculation on device, returns before finished calculations 84 | // Leading 's' in sswap stands for single precision float 85 | // the rocblas "C" API specifies data type this way as there is no function overloading in "C" 86 | rstatus = rocblas_sswap(handle, n, deviceVecA, incx, deviceVecB, incy); 87 | // check that calculation was launched correctly on device, not that result 88 | // was computed yet 89 | CHECK_ROCBLAS_STATUS(rstatus); 90 | 91 | // fetch device memory results, hipMemcpy automatically blocked until results ready 92 | rstatus = rocblas_get_vector(nSize, sizeof(float), deviceVecA, incx, hostVecA.data(), incx); 93 | CHECK_ROCBLAS_STATUS(rstatus); 94 | // equivalent if increments are all 1 to doing call 95 | // herror = hipMemcpy(hostVecA.data(), deviceVecA, vectorBytes, hipMemcpyDeviceToHost); 96 | // CHECK_HIP_ERROR(herror); 97 | 98 | rstatus = rocblas_get_vector(nSize, sizeof(float), deviceVecB, incy, hostVecB.data(), incy); 99 | CHECK_ROCBLAS_STATUS(rstatus); 100 | 101 | // print results 102 | std::cout << "Output Vectors" << std::endl; 103 | helpers::printVector(hostVecA, n, incx); 104 | helpers::printVector(hostVecB, n, incy); 105 | 106 | // release device memory 107 | herror = hipFree(deviceVecA); 108 | CHECK_HIP_ERROR(herror); 109 | herror = hipFree(deviceVecB); 110 | CHECK_HIP_ERROR(herror); 111 | 112 | // releasing rocblas resources 113 | rstatus = rocblas_destroy_handle(handle); 114 | CHECK_ROCBLAS_STATUS(rstatus); 115 | 116 | return EXIT_SUCCESS; 117 | } 118 | -------------------------------------------------------------------------------- /Level-2/gemv/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-2/gemv/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemv 2 | Example showing moving matrix and vector data to the GPU device and calling the rocblas gemv (general matrix vector product) function. Results are fetched from GPU and compared against a CPU implementation and displayed. This example uses the helper::GPUTimer which can be viewed to see how hip API calls can be used to time computation in a stream using events. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemv 9 | --M Matrix/vector dimension 10 | --N Matrix/vector dimension 11 | --alpha Alpha scalar 12 | --beta Beta scalar 13 | --incx Increment for x vector 14 | --incy Increment for y vector 15 | 16 | ## Building 17 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 18 | 19 | cd Level-2/gemv 20 | make 21 | ./gemv 22 | 23 | -------------------------------------------------------------------------------- /Level-2/gemv/gemv.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | helpers::ArgParser options("MNabxy"); 34 | if(!options.validArgs(argc, argv)) 35 | return EXIT_FAILURE; 36 | 37 | rocblas_status rstatus = rocblas_status_success; 38 | 39 | typedef float dataType; 40 | 41 | rocblas_int M = options.M; 42 | rocblas_int N = options.N; 43 | rocblas_int incx = options.incx; 44 | rocblas_int incy = options.incy; 45 | 46 | float hAlpha = options.alpha; 47 | float hBeta = options.beta; 48 | 49 | const rocblas_operation transA = rocblas_operation_none; 50 | 51 | size_t sizeX, dimX, absIncx; 52 | size_t sizeY, dimY, absIncy; 53 | 54 | if(transA == rocblas_operation_none) 55 | { 56 | dimX = N; 57 | dimY = M; 58 | } 59 | else // transpose 60 | { 61 | dimX = M; 62 | dimY = N; 63 | } 64 | rocblas_int lda = M; 65 | size_t sizeA = lda * size_t(N); 66 | 67 | absIncx = incx >= 0 ? incx : -incx; 68 | absIncy = incy >= 0 ? incy : -incy; 69 | 70 | sizeX = dimX * absIncx; 71 | sizeY = dimY * absIncy; 72 | 73 | // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory 74 | std::vector hA(sizeA); 75 | std::vector hX(sizeX); 76 | std::vector hY(sizeY, 1); 77 | 78 | std::vector hYGold(hY); 79 | 80 | helpers::matIdentity(hA.data(), M, N, lda); 81 | 82 | helpers::fillVectorNormRand(hX); 83 | 84 | // print input 85 | std::cout << "Input Vectors (X)" << std::endl; 86 | helpers::printVector(hX); 87 | 88 | // using rocblas API 89 | rocblas_handle handle; 90 | rstatus = rocblas_create_handle(&handle); 91 | CHECK_ROCBLAS_STATUS(rstatus); 92 | 93 | { 94 | // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory 95 | 96 | // allocate memory on device 97 | helpers::DeviceVector dA(sizeA); 98 | helpers::DeviceVector dX(sizeX); 99 | helpers::DeviceVector dY(sizeY); 100 | 101 | if((!dA && sizeA) || (!dX && sizeX) || (!dY && sizeY)) 102 | { 103 | CHECK_HIP_ERROR(hipErrorOutOfMemory); 104 | return EXIT_FAILURE; 105 | } 106 | 107 | // time data to device, computation, and data from device back to host 108 | helpers::GPUTimer gpuTimer; 109 | gpuTimer.start(); 110 | 111 | // copy data from CPU to device 112 | CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(dataType) * sizeA, hipMemcpyHostToDevice)); 113 | CHECK_HIP_ERROR(hipMemcpy(dX, hX.data(), sizeof(dataType) * sizeX, hipMemcpyHostToDevice)); 114 | CHECK_HIP_ERROR(hipMemcpy(dY, hY.data(), sizeof(dataType) * sizeY, hipMemcpyHostToDevice)); 115 | 116 | // enable passing alpha and beta parameters from pointer to host memory 117 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 118 | CHECK_ROCBLAS_STATUS(rstatus); 119 | 120 | // asynchronous calculation on device, returns before finished calculations 121 | rstatus = rocblas_sgemv(handle, transA, M, N, &hAlpha, dA, lda, dX, incx, &hBeta, dY, incy); 122 | 123 | // check that calculation was launched correctly on device, not that result 124 | // was computed yet 125 | CHECK_ROCBLAS_STATUS(rstatus); 126 | 127 | // fetch device memory results, automatically blocked until results ready 128 | CHECK_HIP_ERROR(hipMemcpy(hY.data(), dY, sizeof(dataType) * sizeY, hipMemcpyDeviceToHost)); 129 | 130 | gpuTimer.stop(); 131 | 132 | } // release device memory via helpers::DeviceVector destructors 133 | 134 | std::cout << "M, N, lda = " << M << ", " << N << ", " << lda << std::endl; 135 | 136 | // print input 137 | std::cout << "Output Vector Y = alpha*Identity*X(random,...) + beta*Y(1,1,...)" << std::endl; 138 | helpers::printVector(hY); 139 | 140 | // calculate expected result using CPU 141 | for(size_t i = 0; i < sizeY; i++) 142 | { 143 | // matrix is identity so just doing simpler calculation over vectors 144 | hYGold[i] = hAlpha * 1.0f * hX[i] + hBeta * hYGold[i]; 145 | } 146 | 147 | dataType maxRelativeError = (dataType)helpers::maxRelativeError(hY, hYGold); 148 | dataType eps = std::numeric_limits::epsilon(); 149 | dataType tolerance = 10; 150 | if(maxRelativeError > eps * tolerance) 151 | { 152 | std::cout << "FAIL"; 153 | } 154 | else 155 | { 156 | std::cout << "PASS"; 157 | } 158 | std::cout << ": max. relative err. = " << maxRelativeError << std::endl; 159 | 160 | rstatus = rocblas_destroy_handle(handle); 161 | CHECK_ROCBLAS_STATUS(rstatus); 162 | 163 | return EXIT_SUCCESS; 164 | } 165 | -------------------------------------------------------------------------------- /Level-2/her/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-2/her/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples her 2 | Example showing moving matrix and vector data to the GPU device and calling the rocblas her (Hermitian rank-1 update) function. This example illustrates the mixed usage of 3 different complex types with the same memory layout (hipFloatComplex, std::complex, and rocblas_float_complex). A reinterpret_cast can be used if passing one pointer type into the rocblas function which uses the rocblas_float_complex type. hipResults are fetched from GPU and compared against a CPU implementation and displayed. This example uses the helper::GPUTimer which can be viewed to see how hip API calls can be used to time computation in a stream using events. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./her 9 | --N Matrix/vector dimension 10 | --alpha Alpha scalar 11 | --incx Increment for x vector 12 | 13 | ## Building 14 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 15 | 16 | cd Level-2/her 17 | make 18 | ./her 19 | 20 | -------------------------------------------------------------------------------- /Level-2/her/her.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | int main(int argc, char** argv) 34 | { 35 | helpers::ArgParser options("Nax"); 36 | if(!options.validArgs(argc, argv)) 37 | return EXIT_FAILURE; 38 | 39 | rocblas_status rstatus = rocblas_status_success; 40 | 41 | rocblas_int N = options.N; 42 | rocblas_int incx = options.incx; 43 | 44 | float hAlpha = options.alpha; 45 | 46 | const rocblas_fill uplo = rocblas_fill_upper; 47 | 48 | size_t sizeX, absIncx; 49 | 50 | rocblas_int lda = N; 51 | size_t sizeA = lda * size_t(N); 52 | 53 | absIncx = incx >= 0 ? incx : -incx; 54 | 55 | sizeX = N * absIncx; 56 | 57 | // Naming: dK is in GPU (device) memory. hK is in CPU (host) memory 58 | 59 | std::vector hA(sizeA); 60 | 61 | // we are using std::complex for it's operators and it has same memory layout 62 | // as hipFloatComplex so can copy the data into the array for use in the rocblas C API 63 | std::vector> hX(sizeX); 64 | helpers::fillVectorUniformIntRand(hX); 65 | 66 | std::vector hAGold(sizeA); 67 | 68 | // initialize simple data for simple host side reference computation 69 | helpers::matIdentity(hA.data(), N, N, lda); 70 | hAGold = hA; 71 | 72 | // using rocblas API 73 | rocblas_handle handle; 74 | rstatus = rocblas_create_handle(&handle); 75 | CHECK_ROCBLAS_STATUS(rstatus); 76 | 77 | { 78 | // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory 79 | 80 | // allocate memory on device 81 | helpers::DeviceVector dA(sizeA); 82 | helpers::DeviceVector dX(sizeX); 83 | 84 | if((!dA && sizeA) || (!dX && sizeX)) 85 | { 86 | CHECK_HIP_ERROR(hipErrorOutOfMemory); 87 | return EXIT_FAILURE; 88 | } 89 | 90 | // time data to device, computation, and data from device back to host 91 | helpers::GPUTimer gpuTimer; 92 | gpuTimer.start(); 93 | 94 | // copy data from CPU to device (all 3 complex types same memory layout) 95 | CHECK_HIP_ERROR( 96 | hipMemcpy(dA, hA.data(), sizeof(hipFloatComplex) * sizeA, hipMemcpyHostToDevice)); 97 | CHECK_HIP_ERROR( 98 | hipMemcpy(dX, hX.data(), sizeof(std::complex) * sizeX, hipMemcpyHostToDevice)); 99 | 100 | // enable passing alpha and beta parameters from pointer to host memory 101 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 102 | CHECK_ROCBLAS_STATUS(rstatus); 103 | 104 | // asynchronous calculation on device, returns before finished calculations 105 | rstatus = rocblas_cher(handle, uplo, N, &hAlpha, dX, incx, dA, lda); 106 | 107 | // check that calculation was launched correctly on device, not that result 108 | // was computed yet 109 | CHECK_ROCBLAS_STATUS(rstatus); 110 | 111 | // fetch device memory results, automatically blocked until results ready 112 | CHECK_HIP_ERROR( 113 | hipMemcpy(hA.data(), dA, sizeof(hipFloatComplex) * sizeA, hipMemcpyDeviceToHost)); 114 | 115 | gpuTimer.stop(); 116 | 117 | } // release device memory via helpers::DeviceVector destructors 118 | 119 | std::cout << "alpha, N, lda = " << hAlpha << ", " << N << ", " << lda << std::endl; 120 | 121 | // calculate expected result using CPU 122 | for(int i = 0; i < N; i++) 123 | { 124 | // matrix is identity so just doing simpler calculation over x vectors 125 | for(int j = 0; j < N; j++) 126 | { 127 | std::complex r = hX[j] * std::conj(hX[i]); 128 | r *= std::complex(hAlpha, 0); 129 | 130 | // using hip helper function hipCaddf to add hip complex type 131 | hAGold[i * lda + j] 132 | = hipCaddf(hipFloatComplex(r.real(), r.imag()), hAGold[i * lda + j]); 133 | } 134 | } 135 | 136 | bool fail = false; 137 | for(int i = 0; i < N; i++) 138 | { 139 | for(int j = 0; j < N; j++) 140 | { 141 | if(uplo == rocblas_fill_upper && j > i) 142 | continue; 143 | else if(uplo != rocblas_fill_upper && j < i) 144 | continue; 145 | 146 | if(hAGold[i * lda + j] != hA[i * lda + j]) 147 | fail = true; 148 | } 149 | } 150 | 151 | if(fail) 152 | { 153 | std::cout << "FAIL"; 154 | } 155 | else 156 | { 157 | std::cout << "PASS"; 158 | } 159 | 160 | rstatus = rocblas_destroy_handle(handle); 161 | CHECK_ROCBLAS_STATUS(rstatus); 162 | 163 | return EXIT_SUCCESS; 164 | } 165 | -------------------------------------------------------------------------------- /Level-2/trmv/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-2/trmv/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples trmv 2 | Example showing moving matrix and vector data to the GPU device and calling the rocblas trmv function which does the matrix-vector operations. hipResults are fetched from GPU and compared against a CPU implementation and displayed. This example illustrates the mixed usage of 2 different complex types with the same memory layout (std::complex, and rocblas_float_complex) and additionally shows absolute and relative error checking. This example uses the helper::GPUTimer which can be viewed to see how hip API calls can be used to time computation in a stream using events. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./trmv 9 | --N Matrix/vector dimension 10 | --incx Increment for x vector 11 | 12 | ## Building 13 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 14 | 15 | cd Level-2/trmv 16 | make 17 | ./trmv 18 | 19 | -------------------------------------------------------------------------------- /Level-2/trmv/trmv.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | // lda and incx promoted to 64bit to avoid int32 overflow 33 | template 34 | void referenceTrmvCalc(rocblas_fill uplo, 35 | std::vector& A, 36 | rocblas_int N, 37 | size_t lda, 38 | std::vector& x, 39 | std::vector& cpu_ref_result, 40 | ssize_t incx) 41 | { 42 | // calculate expected result using CPU 43 | if(uplo == rocblas_fill_lower) 44 | { 45 | for(int row = 0; row < N; row++) 46 | { 47 | T elem = T(0.0); 48 | for(int col = 0; col < row + 1; col++) 49 | { 50 | elem += A[col * lda + row] * x[col * incx]; 51 | } 52 | cpu_ref_result[row * incx] = elem; 53 | } 54 | } 55 | else 56 | { 57 | for(int row = 0; row < N; row++) 58 | { 59 | T elem = T(0.0); 60 | for(int col = row; col < N; col++) 61 | { 62 | elem += A[col * lda + row] * x[col * incx]; 63 | } 64 | cpu_ref_result[row * incx] = elem; 65 | } 66 | } 67 | } 68 | 69 | int main(int argc, char** argv) 70 | { 71 | 72 | typedef std::complex T; 73 | 74 | helpers::ArgParser options("Nx"); 75 | if(!options.validArgs(argc, argv)) 76 | return EXIT_FAILURE; 77 | 78 | rocblas_status rstatus = rocblas_status_success; 79 | 80 | rocblas_int N = options.N; 81 | rocblas_int incx = options.incx; 82 | 83 | // Pre-filled parameters 84 | const rocblas_fill uplo = rocblas_fill_lower; 85 | const rocblas_diagonal diag = rocblas_diagonal_non_unit; 86 | 87 | //trans is fixed to rocblas_operation_none in this example and support for other options would be added in the future release 88 | const rocblas_operation trans = rocblas_operation_none; 89 | 90 | size_t sizeX, sizeA; 91 | rocblas_int absIncx; 92 | 93 | rocblas_int lda = N; 94 | absIncx = incx >= 0 ? incx : -incx; 95 | sizeX = size_t(N) * absIncx; 96 | sizeA = size_t(lda) * N; 97 | 98 | // Naming: dA is in GPU (device) memory. hA is in CPU (host) memory 99 | 100 | // we are using std::complex for it's operators and it has same memory layout 101 | // as rocblas_float_complex so can copy the data into the array for use in the rocblas C API 102 | std::vector hA(sizeA); 103 | std::vector hX(sizeX); 104 | std::vector hXCopy(sizeX); 105 | std::vector hXGold(sizeX, {0.0f, 0.0f}); 106 | 107 | // initialize uniform random data with lower and upper range 108 | helpers::fillVectorUniformRealDist(hA, -0.5, 0.5); 109 | helpers::fillVectorUniformRealDist(hX, -0.5, 0.5); 110 | 111 | hXCopy = hX; 112 | 113 | //zero out lower/upper part of the matrix depending upon the uplo parameter 114 | helpers::makeMatrixUpperOrlower(uplo, hA, N, lda); 115 | 116 | //Make matrix unit diagonal depending upon the diag parameter 117 | if(diag == rocblas_diagonal_unit) 118 | helpers::make_unit_diagonal(uplo, hA, N, lda); 119 | 120 | // using rocblas API 121 | rocblas_handle handle; 122 | rstatus = rocblas_create_handle(&handle); 123 | CHECK_ROCBLAS_STATUS(rstatus); 124 | 125 | { 126 | // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory 127 | 128 | // allocate memory on device 129 | helpers::DeviceVector dA(sizeA); 130 | helpers::DeviceVector dX(sizeX); 131 | 132 | if((!dA && sizeA) || (!dX && sizeX)) 133 | { 134 | CHECK_HIP_ERROR(hipErrorOutOfMemory); 135 | return EXIT_FAILURE; 136 | } 137 | 138 | // time data to device, computation, and data from device back to host 139 | helpers::GPUTimer gpuTimer; 140 | gpuTimer.start(); 141 | 142 | // copy data from CPU to device (all 3 complex types same memory layout) 143 | CHECK_HIP_ERROR(hipMemcpy(dA, hA.data(), sizeof(T) * sizeA, hipMemcpyHostToDevice)); 144 | CHECK_HIP_ERROR(hipMemcpy(dX, hX.data(), sizeof(T) * sizeX, hipMemcpyHostToDevice)); 145 | 146 | // enable passing alpha and beta parameters from pointer to host memory 147 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 148 | CHECK_ROCBLAS_STATUS(rstatus); 149 | 150 | // asynchronous calculation on device, returns before finished calculations 151 | rstatus = rocblas_ctrmv(handle, uplo, trans, diag, N, dA, lda, dX, incx); 152 | 153 | // check that calculation was launched correctly on device, not that result 154 | // was computed yet 155 | CHECK_ROCBLAS_STATUS(rstatus); 156 | 157 | // fetch device memory results, automatically blocked until results ready 158 | CHECK_HIP_ERROR( 159 | hipMemcpy(hX.data(), dX, sizeof(rocblas_float_complex) * sizeX, hipMemcpyDeviceToHost)); 160 | 161 | gpuTimer.stop(); 162 | 163 | } // release device memory via helpers::DeviceVector destructors 164 | 165 | std::cout << "N, lda, incx = " << N << ", " << lda << ", " << incx << std::endl; 166 | 167 | // calculate expected result using CPU 168 | referenceTrmvCalc(uplo, hA, N, lda, hXCopy, hXGold, incx); 169 | 170 | double maxRelativeError = helpers::maxRelativeErrorComplexVector(hXGold, hX, N, incx); 171 | 172 | double maxAbsoluteError = helpers::maxAbsoluteErrorComplexVector(hXGold, hX, N, incx); 173 | 174 | std::cout << "max relative err = " << maxRelativeError 175 | << ", max absolute err = " << maxAbsoluteError << std::endl; 176 | 177 | rstatus = rocblas_destroy_handle(handle); 178 | CHECK_ROCBLAS_STATUS(rstatus); 179 | 180 | return EXIT_SUCCESS; 181 | } 182 | -------------------------------------------------------------------------------- /Level-3/gemm/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-3/gemm/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemm 2 | Example showing moving matrix and vector data to the GPU device and calling the rocblas gemm (general matrix matrix product) function. Results are fetched from GPU and compared against a CPU implementation. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemm 9 | --K Matrix/vector dimension 10 | --M Matrix/vector dimension 11 | --N Matrix/vector dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Level-1/gemm 19 | make 20 | ./gemm 21 | -------------------------------------------------------------------------------- /Level-3/gemm/gemm.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #include "helpers.hpp" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | int main(int argc, char** argv) 32 | { 33 | helpers::ArgParser options("MNKab"); 34 | if(!options.validArgs(argc, argv)) 35 | return EXIT_FAILURE; 36 | 37 | rocblas_status rstatus = rocblas_status_success; 38 | 39 | typedef float dataType; 40 | 41 | rocblas_int M = options.M; 42 | rocblas_int N = options.N; 43 | rocblas_int K = options.K; 44 | 45 | float hAlpha = options.alpha; 46 | float hBeta = options.beta; 47 | 48 | const rocblas_operation transA = rocblas_operation_none; 49 | const rocblas_operation transB = rocblas_operation_none; 50 | 51 | rocblas_int lda, ldb, ldc, sizeA, sizeB, sizeC; 52 | int strideA1, strideA2, strideB1, strideB2; 53 | 54 | if(transA == rocblas_operation_none) 55 | { 56 | lda = M; 57 | sizeA = K * lda; 58 | strideA1 = 1; 59 | strideA2 = lda; 60 | } 61 | else 62 | { 63 | lda = K; 64 | sizeA = M * lda; 65 | strideA1 = lda; 66 | strideA2 = 1; 67 | } 68 | if(transB == rocblas_operation_none) 69 | { 70 | ldb = K; 71 | sizeB = N * ldb; 72 | strideB1 = 1; 73 | strideB2 = ldb; 74 | } 75 | else 76 | { 77 | ldb = N; 78 | sizeB = K * ldb; 79 | strideB1 = ldb; 80 | strideB2 = 1; 81 | } 82 | ldc = M; 83 | sizeC = N * ldc; 84 | 85 | // using rocblas API 86 | rocblas_handle handle; 87 | rstatus = rocblas_create_handle(&handle); 88 | CHECK_ROCBLAS_STATUS(rstatus); 89 | 90 | // Naming: dX is in GPU (device) memory. hK is in CPU (host) memory 91 | 92 | std::vector hA(sizeA, 1); 93 | std::vector hB(sizeB); 94 | std::vector hC(sizeC, 1); 95 | std::vector hGold(sizeC); 96 | 97 | // helpers::matIdentity(hA.data(), M, K, lda); 98 | helpers::matIdentity(hB.data(), K, N, ldb); 99 | // helpers::matIdentity(hC.data(), M, N, ldc); 100 | hGold = hC; 101 | 102 | { 103 | // allocate memory on device 104 | helpers::DeviceVector dA(sizeA); 105 | helpers::DeviceVector dB(sizeB); 106 | helpers::DeviceVector dC(sizeC); 107 | 108 | if(!dA || !dB || !dC) 109 | { 110 | CHECK_HIP_ERROR(hipErrorOutOfMemory); 111 | return EXIT_FAILURE; 112 | } 113 | 114 | // copy data from CPU to device 115 | CHECK_HIP_ERROR(hipMemcpy( 116 | dA, static_cast(hA.data()), sizeof(dataType) * sizeA, hipMemcpyHostToDevice)); 117 | CHECK_HIP_ERROR(hipMemcpy( 118 | dB, static_cast(hB.data()), sizeof(dataType) * sizeB, hipMemcpyHostToDevice)); 119 | CHECK_HIP_ERROR(hipMemcpy( 120 | dC, static_cast(hC.data()), sizeof(dataType) * sizeC, hipMemcpyHostToDevice)); 121 | 122 | // enable passing alpha parameter from pointer to host memory 123 | rstatus = rocblas_set_pointer_mode(handle, rocblas_pointer_mode_host); 124 | CHECK_ROCBLAS_STATUS(rstatus); 125 | 126 | // asynchronous calculation on device, returns before finished calculations 127 | rstatus = rocblas_sgemm( 128 | handle, transA, transB, M, N, K, &hAlpha, dA, lda, dB, ldb, &hBeta, dC, ldc); 129 | 130 | // check that calculation was launched correctly on device, not that result 131 | // was computed yet 132 | CHECK_ROCBLAS_STATUS(rstatus); 133 | 134 | // fetch device memory results, automatically blocked until results ready 135 | CHECK_HIP_ERROR(hipMemcpy(hC.data(), dC, sizeof(dataType) * sizeC, hipMemcpyDeviceToHost)); 136 | 137 | } // release device memory via helpers::DeviceVector destructors 138 | 139 | std::cout << "M, N, K, lda, ldb, ldc = " << M << ", " << N << ", " << K << ", " << lda << ", " 140 | << ldb << ", " << ldc << std::endl; 141 | 142 | // calculate gold standard using CPU 143 | helpers::matMatMult(hAlpha, 144 | hBeta, 145 | M, 146 | N, 147 | K, 148 | hA.data(), 149 | strideA1, 150 | strideA2, 151 | hB.data(), 152 | strideB1, 153 | strideB2, 154 | hGold.data(), 155 | 1, 156 | ldc); 157 | 158 | dataType maxRelativeError = (dataType)helpers::maxRelativeError(hC, hGold); 159 | dataType eps = std::numeric_limits::epsilon(); 160 | dataType tolerance = 10; 161 | if(maxRelativeError > eps * tolerance) 162 | { 163 | std::cout << "FAIL"; 164 | } 165 | else 166 | { 167 | std::cout << "PASS"; 168 | } 169 | std::cout << ": max. relative err. = " << maxRelativeError << std::endl; 170 | 171 | rstatus = rocblas_destroy_handle(handle); 172 | CHECK_ROCBLAS_STATUS(rstatus); 173 | 174 | return EXIT_SUCCESS; 175 | } 176 | -------------------------------------------------------------------------------- /Level-3/gemm_strided_batched/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Level-3/gemm_strided_batched/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples gemm_strided_batched 2 | Example showing moving matrices and vector data to the GPU device and calling the rocblas gemm_strided_batched (general matrix matrix product) function. Batched functions repeat the same operation over multiple inputs. Strided variants use a stride to increment the data pointer between individual data sets in the batch. Results are fetched from GPU and compared against a CPU implementation. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values. 6 | Running with --help will show the options: 7 | 8 | Usage: ./gemm_strided_batched 9 | --K Matrix/vector dimension 10 | --M Matrix/vector dimension 11 | --N Matrix/vector dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | --count Batch count 15 | 16 | ## Building 17 | These examples require that you have an installation of rocBLAS on your machine. You do not required sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 18 | 19 | cd Level-1/gemm_strided_batched 20 | make 21 | ./gemm_strided_batched 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | 24 | # folders to recurse into 25 | LIBS = Level-1 Level-2 Level-3 Extensions Languages Patterns BuildTools 26 | LIBSPATH = ./ 27 | 28 | .PHONY: all clean exe run 29 | 30 | all: exe 31 | 32 | clean: 33 | $(foreach dir,$(LIBS),$(foreach eg, $(wildcard $(dir)/*/), make clean --no-print-directory -C $(LIBSPATH)/$(eg);) ) 34 | 35 | exe: 36 | $(foreach dir,$(LIBS),$(foreach eg, $(wildcard $(dir)/*/), make --no-print-directory -C $(LIBSPATH)/$(eg);) ) 37 | 38 | run: 39 | $(foreach dir,$(LIBS),$(foreach eg, $(wildcard $(dir)/*/), make run --no-print-directory -C $(LIBSPATH)/$(eg);) ) 40 | -------------------------------------------------------------------------------- /Patterns/Multi-device/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ -fopenmp 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Patterns/Multi-device/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples Multi-device 2 | This rocBLAS example showcases the use of multi-devices and multiple streams per device to call the 'rocblas_sgemm' function. Here, the input matrices are 'A' which is an 'M × K' matrix stored in rocblas_operation_none (No Transpose) mode and 'B' which is a 'K × N' matrix stored in rocblas_operation_none (No Transpose) mode. They are allocated per device per stream and transferred asynchronously from the host (CPU) to the device (GPU). The 'Alpha' and 'Beta' are scalar values. After calling the 'rocblas_sgemm' (general matrix-matrix multiply) function the result matrix 'C' which is of size 'M x N' is transferred asynchronously from the device to the host and compared against a CPU implementation. This example uses the helper::GPUTimer, which measures the time taken in the streams using events. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values (K=5, M=5, N=5, alpha=1, beta=1). 6 | Running with --help will show the options: 7 | 8 | Usage: ./Multi-device 9 | --K Matrix dimension 10 | --M Matrix dimension 11 | --N Matrix dimension 12 | --alpha Alpha scalar 13 | --beta Beta scalar 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not require sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Patterns/Multi-device 19 | make 20 | ./Multi-device 21 | 22 | ## Calculation of allowed_error 23 | 24 | Individual entries in the result matrix 'c' are calculated using: 25 | 26 | for(int i = 0; i < M; i++) 27 | { 28 | for(int j = 0; j < N; j++) 29 | { 30 | accumulator = 0; 31 | for(int k = 0, k < K, k++) 32 | { 33 | accumulator += a[i + k * lda] * b[k + j * ldb]; 34 | } 35 | c[i + j * ldc] = accumulator; 36 | } 37 | } 38 | 39 | 40 | With IEEE arithmetic, the allowed roundoff error for each update of the accumulator in the loop is 0.5 * ULP (unit of least precision). When the magnitude of the accumulator is 1 then ULP is approximately equal to eps (epsilon). When the magnitude of the accumulator is |accumulator|, then ULP is approximately 41 | equal to |accumulator| * eps. 42 | 43 | The correctness check requires the calculated result to be close enough to a reference result. The worst-case for the calculated result differing from the reference result would occur if every update of the accumulator in the calculated result rounded up 0.5 ULP and every update of the accumulator in the reference result rounded down 0.5 ULP or vice versa. The matrices a and b have pseudo-random values between 0 and 1. The worst-case for the error between the calculated and reference result would be if every pseudo-random value has the maximum value of 1. This would mean that the accumulator would have values 1, 2, 3, ... K. 44 | 45 | If we have both the worst cases above, then the allowable error is approximately: 46 | eps * (1 + 2 + 3 + ... + K) = eps * 0.5 * K * (K+1) 47 | The statistical probability that all 2 * K pseudo-random values in matrices a and b will be equal to 1 is small. The statistical probability that the calculated result will always round up and the reference result will always round down (or vice versa) is small. In place of the K in the above allowable error, the statistical argument suggests we use sqrt(K). Because this is a non-rigorous argument, in place of 0.5 we use a tolerance of 10. Thus, we allow an error between the calculated and reference result of: 48 | eps * sqrt(K) * K * 10. 49 | 50 | -------------------------------------------------------------------------------- /Patterns/Multi-stream/Makefile: -------------------------------------------------------------------------------- 1 | # ######################################################################## 2 | # Copyright (C) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | # ies of the Software, and to permit persons to whom the Software is furnished 9 | # to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | # 21 | # ######################################################################## 22 | 23 | ROCM_PATH?= $(wildcard /opt/rocm) 24 | ifeq (,$(ROCM_PATH)) 25 | ROCM_PATH= 26 | endif 27 | 28 | HIP_PATH?= $(wildcard /opt/rocm) 29 | ifeq (,$(HIP_PATH)) 30 | HIP_PATH= 31 | endif 32 | HIPCXX=$(HIP_PATH)/bin/hipcc 33 | 34 | ifeq (,$(ROCBLAS_PATH)) 35 | # default to rocblas in standard ROCM tree 36 | ROCBLAS_PATH= $(ROCM_PATH) 37 | endif 38 | 39 | EXE = $(shell basename $(CURDIR)) 40 | COMMON_PATH = ../../common 41 | SOURCES = $(wildcard *.cpp) $(wildcard $(COMMON_PATH)/*.cpp) 42 | OBJECTS = $(patsubst %.cpp, %.o, $(SOURCES)) 43 | 44 | CXX=g++ -fopenmp 45 | # uncomment to use hip compiler 46 | #CXX=$(HIPCXX) 47 | OPT = -g -Ofast -march=native -Wall 48 | INC = -I$(COMMON_PATH) -isystem$(ROCBLAS_PATH)/include -isystem$(ROCM_PATH)/include 49 | CXXFLAGS = -std=c++14 $(INC) $(OPT) 50 | ifneq ($(CXX),$(HIPCXX)) 51 | CXXFLAGS += -D__HIP_PLATFORM_AMD__ 52 | endif 53 | 54 | LDFLAGS=-L$(ROCBLAS_PATH)/lib -L$(ROCM_PATH)/lib -lrocblas -Wl,-rpath=$(ROCBLAS_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lm -lpthread -lstdc++ 55 | ifneq ($(CXX),$(HIPCXX)) 56 | LDFLAGS += -lamdhip64 57 | endif 58 | 59 | RM = rm -f 60 | 61 | .PHONY: all clean run 62 | 63 | all: $(EXE) 64 | 65 | %.o: %.cpp 66 | $(CXX) $(CXXFLAGS) -c $< -o $@ 67 | 68 | $(EXE): $(OBJECTS) 69 | $(CXX) $(OBJECTS) $(LDFLAGS) -o $@ 70 | 71 | clean: 72 | $(RM) $(EXE) $(OBJECTS) 73 | 74 | run: 75 | ./$(EXE) 76 | 77 | -------------------------------------------------------------------------------- /Patterns/Multi-stream/README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples Multi-stream 2 | This example presents an input matrix 'A' which is 'N × N' symmetric matrix stored in upper triangular mode, input vectors 'X', 'Y' of size 'N x incx' and 'N x incy' respectively. They are transferred asynchronously using multiple-streams from the host (CPU) to the device (GPU). The 'Alpha' and 'Beta' are scalar values. After calling the rocblas symv (symmetric matrix-vector product) function, the vector 'y' overwritten with the result is transferred asynchronously using multiple-streams from the device to the host and compared against a CPU implementation and displayed. This example uses the helper::GPUTimer to see how hip API calls can be used to time computation in a stream using events. 3 | 4 | ## Documentation 5 | Run the example without any command line arguments to use default values (N=5, alpha=1, beta=1, incx=1, incy=1). 6 | Running with --help will show the options: 7 | 8 | Usage: ./Multi-stream 9 | --N Matrix/vector dimension 10 | --alpha Alpha scalar 11 | --beta Beta scalar 12 | --incx Increment for x vector 13 | --incy Increment for y vector 14 | 15 | ## Building 16 | These examples require that you have an installation of rocBLAS on your machine. You do not require sudo or other access to build these examples which default to compile using gcc but can also use the the hipcc compiler from the rocBLAS installation. The use of hipcc compiler can be set by uncommenting lines in the Makefiles. If rocBLAS is not installed you can set the environment variable ROCBLAS_PATH to point to the location of your rocblas build. 17 | 18 | cd Patterns/Multi-stream 19 | make 20 | ./Multi-stream 21 | 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rocBLAS-Examples 2 | Examples for using rocBLAS which is a GPU exploiting implementation of BLAS. 3 | 4 | # rocBLAS 5 | rocBLAS is AMD's library for BLAS on [ROCmTM](https://rocmdocs.amd.com/en/latest/). 6 | It is implemented in the [HIP](https://github.com/ROCm-Developer-Tools/HIP) 7 | programming language and optimized for AMD's GPUs. 8 | 9 | |Acronym | Expansion | 10 | |-------------|-------------------------------------------------------------| 11 | |**BLAS** | **B**asic **L**inear **A**lgebra **S**ubprograms | 12 | |**HIP** | **H**eterogeneous-Compute **I**nterface for **P**ortability | 13 | 14 | ## Documentation 15 | Documentation for each example is contained in the README.md of each example's directory and in the source code itself. 16 | The examples utilize C++ and some shared helper code which is all contained in the common directory. The design patterns used in common may be utilized but are intended to keep the focus of individual examples on the rocBLAS calling structure. 17 | 18 | ## Prerequisites 19 | * rocBLAS and it's prerequisites 20 | * ROCm version 3.5 or later (rocBLAS version 2.22 or later) 21 | * As this repo is not tied to specific ROCm releases we recommend building against the latest release of ROCm or the master branch of rocBLAS 22 | 23 | If you require rocBLAS it is available at 24 | [https://github.com/ROCmSoftwarePlatform/rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) 25 | 26 | ## Installing 27 | This repository can be cloned into any directory where you want to build the examples. 28 | 29 | ## Building 30 | These examples require that you have an installation of rocBLAS on your machine. You do not require sudo or other access to build these examples which default to compile with gcc but can also use the hipcc compiler from the rocBLAS installation. The compiler must support the c++14 standard. The use of hipcc can be set by uncommenting a line in the Makefiles. The Makefiles support building against a locally built but not installed version of rocBLAS by setting the environment variable ROCBLAS_PATH, e.g. 31 | export ROCBLAS_PATH=/...yourlocalpath.../rocBLAS/build/release/rocblas-install 32 | 33 | After cloning this repository you can build all the examples using make in the top-level directory, or run make in a sub-level directory to build a specific example: 34 | 35 | cd Level-1/swap 36 | make 37 | ./swap 38 | 39 | Level-1/swap is the simplest example and is a good starting point to read over the code as it introduces the concepts which may be skipped over in other examples. 40 | 41 | Note when compiling with gcc we are defining both the newer -D__HIP_PLATFORM_AMD__ and the deprecated -D__HIP_PLATFORM_HCC__ to allow building against various rocm releases. 42 | 43 | ## Contributing 44 | Additional examples should be added in the Applications directory. The directory name should indicate the application domain and examples must contain a README.md file. Additional examples may use the common code but should not modify it. 45 | 46 | -------------------------------------------------------------------------------- /common/ArgParser.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | namespace helpers 36 | { 37 | struct ArgInfo 38 | { 39 | std::string mParamDescription; 40 | int mParamKeyInt; 41 | }; 42 | 43 | /*! \brief Base class for simple command line parsing 44 | ********************************************************************/ 45 | class ArgParserBase 46 | { 47 | public: 48 | enum 49 | { 50 | eOk, 51 | eInvalid, 52 | eNoMatch, 53 | }; 54 | 55 | ArgParserBase(const std::vector& options = {}); 56 | 57 | void printHelp(); 58 | 59 | void usage(int argIdx); 60 | 61 | void decodeOption(const std::string& option); 62 | 63 | void addParams(const std::vector& options); 64 | 65 | std::string argKey(int argIdx); 66 | 67 | int getOptionInt(int argIdx); 68 | 69 | virtual int parse(int& argIdx, char** argv) 70 | { 71 | return eNoMatch; 72 | } 73 | 74 | virtual int parseStandardOption(int& argIdx, char** argv) 75 | { 76 | return eOk; 77 | } 78 | 79 | /*! * @brief validArgs parses the command line options 80 | * @param argc number of elements in cmd line input 81 | * @param argv array of char* storing the CmdLine Options 82 | * @return true if all specified arguments are valid 83 | ********************************************************************/ 84 | bool validArgs(int argc, char** argv); 85 | 86 | protected: 87 | // arg processing information 88 | std::map mOptions; 89 | std::set mStandardParam; 90 | 91 | int mArgc; 92 | char** mArgv; 93 | }; 94 | 95 | /*! \brief class for rocblas examples command line parsing of common parameters 96 | *******************************************************************************/ 97 | class ArgParser : public ArgParserBase 98 | { 99 | public: 100 | ArgParser(std::string standardArgs, const std::vector& options = {}); 101 | 102 | int parseStandardOption(int& argIdx, char** argv) override; 103 | 104 | public: 105 | // common arguments for rocblas functions 106 | 107 | rocblas_int M = 5; 108 | rocblas_int N = 5; 109 | rocblas_int K = 5; 110 | 111 | rocblas_int n = 5; 112 | rocblas_int incx = 1; 113 | rocblas_int incy = 1; 114 | 115 | rocblas_int batchCount = 3; 116 | 117 | float alpha = 1.0f; 118 | float beta = 1.0f; 119 | }; 120 | 121 | } // namespace helpers 122 | -------------------------------------------------------------------------------- /common/error_macros.h: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 8 | * ies of the Software, and to permit persons to whom the Software is furnished 9 | * to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in all 12 | * copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 15 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 19 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | * 21 | * ************************************************************************ */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #ifndef CHECK_HIP_ERROR 30 | #define CHECK_HIP_ERROR(error) \ 31 | if(error != hipSuccess) \ 32 | { \ 33 | fprintf(stderr, \ 34 | "hip error: '%s'(%d) at %s:%d\n", \ 35 | hipGetErrorString(error), \ 36 | error, \ 37 | __FILE__, \ 38 | __LINE__); \ 39 | exit(EXIT_FAILURE); \ 40 | } 41 | #endif 42 | 43 | #ifndef CHECK_ROCBLAS_STATUS 44 | #define CHECK_ROCBLAS_STATUS(status) \ 45 | if(status != rocblas_status_success) \ 46 | { \ 47 | fprintf(stderr, "rocBLAS error: "); \ 48 | fprintf(stderr, \ 49 | "rocBLAS error: '%s'(%d) at %s:%d\n", \ 50 | rocblas_status_to_string(status), \ 51 | status, \ 52 | __FILE__, \ 53 | __LINE__); \ 54 | exit(EXIT_FAILURE); \ 55 | } 56 | #endif 57 | -------------------------------------------------------------------------------- /common/memoryHelpers.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* ************************************************************************ 3 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 9 | * ies of the Software, and to permit persons to whom the Software is furnished 10 | * to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 16 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 20 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | * 22 | * ************************************************************************ */ 23 | 24 | #pragma once 25 | #include "error_macros.h" 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | namespace helpers 36 | { 37 | 38 | /*! \brief base-class to allocate/deallocate device memory */ 39 | template 40 | class DeviceVectorMemory 41 | { 42 | protected: 43 | size_t mSize, mBytes; 44 | 45 | U mGuard[PAD]; 46 | DeviceVectorMemory(size_t s) 47 | : mSize(s) 48 | , mBytes((s + PAD * 2) * sizeof(T)) 49 | { 50 | // Initialize mGuard 51 | if(PAD > 0) 52 | { 53 | memset(&mGuard[0], 0xfe, PAD * sizeof(U)); 54 | } 55 | } 56 | 57 | T* setup() 58 | { 59 | T* d; 60 | if((hipMalloc)(&d, mBytes) != hipSuccess) 61 | { 62 | fprintf(stderr, "Error allocating %zu mBytes (%zu GB)\n", mBytes, mBytes >> 30); 63 | d = nullptr; 64 | } 65 | else 66 | { 67 | if(PAD > 0) 68 | { 69 | // Copy mGuard to device memory before start of allocated memory 70 | CHECK_HIP_ERROR(hipMemcpy(d, mGuard, sizeof(mGuard), hipMemcpyHostToDevice)); 71 | 72 | // Point to allocated block 73 | d += PAD; 74 | 75 | // Copy mGuard to device memory after end of allocated memory 76 | CHECK_HIP_ERROR( 77 | hipMemcpy(d + mSize, mGuard, sizeof(mGuard), hipMemcpyHostToDevice)); 78 | } 79 | } 80 | return d; 81 | } 82 | 83 | void teardown(T* d) 84 | { 85 | if(d != nullptr) 86 | { 87 | if(PAD > 0) 88 | { 89 | U host[PAD]; 90 | 91 | // Copy device memory after allocated memory to host 92 | CHECK_HIP_ERROR( 93 | hipMemcpy(host, d + mSize, sizeof(mGuard), hipMemcpyDeviceToHost)); 94 | 95 | // Make sure no corruption has occurred 96 | assert(!memcmp(host, mGuard, sizeof(mGuard))); 97 | 98 | // Point to mGuard before allocated memory 99 | d -= PAD; 100 | 101 | // Copy device memory after allocated memory to host 102 | CHECK_HIP_ERROR(hipMemcpy(host, d, sizeof(mGuard), hipMemcpyDeviceToHost)); 103 | 104 | // Make sure no corruption has occurred 105 | assert(!memcmp(host, mGuard, sizeof(mGuard))); 106 | } 107 | // Free device memory 108 | CHECK_HIP_ERROR((hipFree)(d)); 109 | } 110 | } 111 | }; 112 | 113 | /*! \brief pseudo-vector subclass which uses device memory */ 114 | template 115 | class DeviceVector : private DeviceVectorMemory 116 | { 117 | public: 118 | explicit DeviceVector(size_t s) 119 | : DeviceVectorMemory(s) 120 | { 121 | mData = this->setup(); 122 | } 123 | 124 | ~DeviceVector() 125 | { 126 | this->teardown(mData); 127 | } 128 | 129 | // Decay into pointer wherever pointer is expected 130 | operator T*() 131 | { 132 | return mData; 133 | } 134 | 135 | operator const T*() const 136 | { 137 | return mData; 138 | } 139 | 140 | T* data() const 141 | { 142 | return mData; 143 | } 144 | 145 | // Tell whether malloc failed 146 | explicit operator bool() const 147 | { 148 | return mData != nullptr; 149 | } 150 | 151 | // Disallow copying or assigning 152 | DeviceVector(const DeviceVector&) = delete; 153 | DeviceVector& operator=(const DeviceVector&) = delete; 154 | 155 | private: 156 | T* mData; 157 | }; 158 | 159 | /*! \brief pseudo-vector subclass which uses a BatchCount of device memory 160 | pointers and an array of pointers in host memory*/ 161 | template 162 | class DeviceBatchVector : private DeviceVectorMemory 163 | { 164 | public: 165 | explicit DeviceBatchVector(size_t b, size_t s) 166 | : mBatchCount(b) 167 | , DeviceVectorMemory(s) 168 | { 169 | mData = (T**)malloc(mBatchCount * sizeof(T*)); 170 | for(int b = 0; b < mBatchCount; ++b) 171 | mData[b] = this->setup(); 172 | } 173 | 174 | ~DeviceBatchVector() 175 | { 176 | if(mData != nullptr) 177 | { 178 | for(int b = 0; b < mBatchCount; ++b) 179 | this->teardown(mData[b]); 180 | free(mData); 181 | } 182 | } 183 | 184 | T* operator[](int n) 185 | { 186 | return mData[n]; 187 | } 188 | 189 | operator T**() 190 | { 191 | return mData; 192 | } 193 | 194 | T** data() const 195 | { 196 | return mData; 197 | } 198 | 199 | // Disallow copying or assigning 200 | DeviceBatchVector(const DeviceBatchVector&) = delete; 201 | DeviceBatchVector& operator=(const DeviceBatchVector&) = delete; 202 | 203 | private: 204 | T** mData; 205 | size_t mBatchCount; 206 | }; 207 | 208 | } // namespace helpers 209 | -------------------------------------------------------------------------------- /common/timers.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* ************************************************************************ 3 | * Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- 9 | * ies of the Software, and to permit persons to whom the Software is furnished 10 | * to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- 16 | * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- 20 | * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | * 22 | * ************************************************************************ */ 23 | 24 | #pragma once 25 | #include "error_macros.h" 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | namespace helpers 32 | { 33 | 34 | /*! \brief Hip event based GPU timer 35 | ********************************************************************/ 36 | class GPUTimer 37 | { 38 | public: 39 | GPUTimer() 40 | { 41 | CHECK_HIP_ERROR(hipEventCreate(&mStart)); 42 | CHECK_HIP_ERROR(hipEventCreate(&mStop)); 43 | } 44 | virtual ~GPUTimer() 45 | { 46 | CHECK_HIP_ERROR(hipEventDestroy(mStart)); 47 | CHECK_HIP_ERROR(hipEventDestroy(mStop)); 48 | } 49 | 50 | void start() 51 | { 52 | CHECK_HIP_ERROR(hipEventRecord(mStart)); 53 | } 54 | float stop(const char* msg = nullptr) 55 | { 56 | CHECK_HIP_ERROR(hipEventRecord(mStop)); 57 | CHECK_HIP_ERROR(hipEventSynchronize(mStop)); 58 | float timeElaspsedMillisec = 0.0f; 59 | CHECK_HIP_ERROR(hipEventElapsedTime(&timeElaspsedMillisec, mStart, mStop)); 60 | const char* prefix = msg ? msg : "hipEventElapsedTime: "; 61 | std::cout << prefix << timeElaspsedMillisec << "ms" << std::endl; 62 | return timeElaspsedMillisec; 63 | } 64 | 65 | protected: 66 | hipEvent_t mStart; 67 | hipEvent_t mStop; 68 | }; 69 | 70 | /*! \brief std::chrono based CPU timer 71 | ********************************************************************/ 72 | class CPUTimer 73 | { 74 | public: 75 | CPUTimer() {} 76 | virtual ~CPUTimer() {} 77 | 78 | void start() 79 | { 80 | mStart = std::chrono::high_resolution_clock::now(); 81 | } 82 | double stop(const char* msg = nullptr) 83 | { 84 | mStop = std::chrono::high_resolution_clock::now(); 85 | constexpr double cNanosecToMillisec = 1e-6; 86 | double timeElaspsedMillisec 87 | = std::chrono::duration_cast(mStop - mStart).count() 88 | * cNanosecToMillisec; 89 | const char* prefix = msg ? msg : "Time elpased: "; 90 | std::cout << prefix << timeElaspsedMillisec << "ms" << std::endl; 91 | return timeElaspsedMillisec; 92 | } 93 | 94 | protected: 95 | std::chrono::high_resolution_clock::time_point mStart; 96 | std::chrono::high_resolution_clock::time_point mStop; 97 | }; 98 | 99 | } // namespace helpers 100 | -------------------------------------------------------------------------------- /docker/dockerfile-build-centos: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocblas 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rocblas-examples.maintainer@amd.com" 6 | 7 | ARG user_uid 8 | 9 | # Install dependent packages 10 | RUN yum install -y \ 11 | sudo \ 12 | rocm-dev \ 13 | centos-release-scl \ 14 | devtoolset-7 \ 15 | ca-certificates \ 16 | git \ 17 | make \ 18 | gcc-c++ \ 19 | unzip \ 20 | wget \ 21 | libcxx-devel \ 22 | zlib-devel \ 23 | numactl-libs 24 | 25 | # Remove old rocblas rpm already in CentOS docker image 26 | RUN yum remove -y rocblas 27 | 28 | RUN echo '#!/bin/bash' | tee /etc/profile.d/devtoolset7.sh && echo \ 29 | 'source scl_source enable devtoolset-7' >> /etc/skel/.bashrc 30 | 31 | # Grant any member of sudo group password-less sudo privileges 32 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ 33 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \ 34 | chmod 400 /etc/sudoers.d/sudo-nopasswd 35 | -------------------------------------------------------------------------------- /docker/dockerfile-build-sles: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocBLAS 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rocblas-examples-maintainer@amd.com" 6 | 7 | ARG user_uid 8 | 9 | # Install dependent packages 10 | RUN zypper refresh && zypper -n --no-gpg-checks install \ 11 | sudo \ 12 | ca-certificates \ 13 | git \ 14 | gcc-c++ \ 15 | make \ 16 | cmake \ 17 | libcxxtools9 \ 18 | unzip \ 19 | wget \ 20 | libnuma1 \ 21 | libLLVM7 22 | 23 | # docker pipeline runs containers with particular uid 24 | # create a jenkins user with this specific uid so it can use sudo priviledges 25 | # Grant any member of sudo group password-less sudo privileges 26 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ 27 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \ 28 | chmod 400 /etc/sudoers.d/sudo-nopasswd 29 | -------------------------------------------------------------------------------- /docker/dockerfile-build-ubuntu-rock: -------------------------------------------------------------------------------- 1 | # Parameters related to building hip 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rocblas-examples.maintainer@amd.com" 6 | 7 | ARG user_uid 8 | 9 | # Install dependent packages 10 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 11 | dkms \ 12 | rocm-dev \ 13 | sudo \ 14 | ca-certificates \ 15 | git \ 16 | make \ 17 | libnuma1 \ 18 | zlib1g-dev \ 19 | unzip \ 20 | wget \ 21 | libomp-dev \ 22 | && \ 23 | apt-get clean && \ 24 | rm -rf /var/lib/apt/lists/* 25 | 26 | # docker pipeline runs containers with particular uid 27 | # create a jenkins user with this specific uid so it can use sudo priviledges 28 | # Grant any member of sudo group password-less sudo privileges 29 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ 30 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \ 31 | chmod 400 /etc/sudoers.d/sudo-nopasswd 32 | -------------------------------------------------------------------------------- /docker/dockerfile-install-centos: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocsolver 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rocblas-examples.maintainer@amd.com" 6 | 7 | # Copy the rpm package of rocblas into the container from host 8 | COPY *.rpm /tmp/ 9 | 10 | # Install the rpm package, and print out contents of expected changed locations 11 | RUN yum -y update && yum install -y\ 12 | /tmp/rocblas-*.rpm \ 13 | && rm -f /tmp/*.rpm \ 14 | && yum -y clean all \ 15 | && rm -rf /var/lib/apt/lists/* \ 16 | && printf "ls -la /etc/ld.so.conf.d/\n" && ls -la /etc/ld.so.conf.d/ \ 17 | && printf "ls -la /opt/rocm/include\n" && ls -la /opt/rocm/include \ 18 | && printf "ls -la /opt/rocm/lib\n" && ls -la /opt/rocm/lib \ 19 | && printf "ls -la /opt/rocm/lib/cmake\n" && ls -la /opt/rocm/lib/cmake \ 20 | && printf "ls -la /opt/rocm/rocblas/include\n" && ls -la /opt/rocm/rocblas/include \ 21 | && printf "ls -la /opt/rocm/rocblas/lib\n" && ls -la /opt/rocm/rocblas/lib 22 | -------------------------------------------------------------------------------- /docker/dockerfile-install-sles: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocblas 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rocblas-examples-maintainer@amd.com" 6 | -------------------------------------------------------------------------------- /docker/dockerfile-install-ubuntu: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocsolver 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rocblas-examples.maintainer@amd.com" 6 | 7 | # Copy the debian package of rocblas into the container from host 8 | COPY *.deb /tmp/ 9 | 10 | # Install the debian package, and print out contents of expected changed locations 11 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --allow-unauthenticated -y \ 12 | /tmp/rocblas-*.deb \ 13 | && rm -f /tmp/*.deb \ 14 | && apt-get clean \ 15 | && rm -rf /var/lib/apt/lists/* \ 16 | && printf "ls -la /etc/ld.so.conf.d/\n" && ls -la /etc/ld.so.conf.d/ \ 17 | && printf "ls -la /opt/rocm/include\n" && ls -la /opt/rocm/include \ 18 | && printf "ls -la /opt/rocm/lib\n" && ls -la /opt/rocm/lib \ 19 | && printf "ls -la /opt/rocm/lib/cmake\n" && ls -la /opt/rocm/lib/cmake \ 20 | && printf "ls -la /opt/rocm/rocblas/include\n" && ls -la /opt/rocm/rocblas/include \ 21 | && printf "ls -la /opt/rocm/rocblas/lib\n" && ls -la /opt/rocm/rocblas/lib 22 | -------------------------------------------------------------------------------- /rtest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | gemm_ex_bf16_r 5 | gemm_ex_f16_r 6 | her 7 | 8 | axpy 9 | dot 10 | nrm2 11 | scal 12 | swap 13 | gemv 14 | gemm 15 | gemm_strided_batched 16 | Multi-device 17 | Multi-stream 18 | 19 | 20 | --------------------------------------------------------------------------------