├── .clang-format ├── .github ├── CODEOWNERS ├── CONTRIBUTING.md └── workflows │ └── docs.yaml ├── .gitignore ├── .jenkins ├── common.groovy └── precheckin.groovy ├── CMakeLists.txt ├── LICENSE.md ├── NOTICES.txt ├── README.md ├── cmake ├── Dependencies.cmake ├── FindLIBNUMA.cmake └── version.cmake ├── docker ├── dockerfile-build-centos ├── dockerfile-build-sles └── dockerfile-build-ubuntu-rock ├── install.sh ├── rtest.xml └── src ├── CG.cpp ├── CG.hpp ├── CGData.hpp ├── CG_ref.cpp ├── CG_ref.hpp ├── CMakeLists.txt ├── CheckAspectRatio.cpp ├── CheckAspectRatio.hpp ├── CheckProblem.cpp ├── CheckProblem.hpp ├── ComputeDotProduct.cpp ├── ComputeDotProduct.hpp ├── ComputeDotProduct_ref.cpp ├── ComputeDotProduct_ref.hpp ├── ComputeMG.cpp ├── ComputeMG.hpp ├── ComputeMG_ref.cpp ├── ComputeMG_ref.hpp ├── ComputeOptimalShapeXYZ.cpp ├── ComputeOptimalShapeXYZ.hpp ├── ComputeProlongation.cpp ├── ComputeProlongation.hpp ├── ComputeProlongation_ref.cpp ├── ComputeProlongation_ref.hpp ├── ComputeResidual.cpp ├── ComputeResidual.hpp ├── ComputeResidual_ref.cpp ├── ComputeResidual_ref.hpp ├── ComputeRestriction.cpp ├── ComputeRestriction.hpp ├── ComputeRestriction_ref.cpp ├── ComputeRestriction_ref.hpp ├── ComputeSPMV.cpp ├── ComputeSPMV.hpp ├── ComputeSPMV_ref.cpp ├── ComputeSPMV_ref.hpp ├── ComputeSYMGS.cpp ├── ComputeSYMGS.hpp ├── ComputeSYMGS_ref.cpp ├── ComputeSYMGS_ref.hpp ├── ComputeWAXPBY.cpp ├── ComputeWAXPBY.hpp ├── ComputeWAXPBY_ref.cpp ├── ComputeWAXPBY_ref.hpp ├── ExchangeHalo.cpp ├── ExchangeHalo.hpp ├── GenerateCoarseProblem.cpp ├── GenerateCoarseProblem.hpp ├── GenerateGeometry.cpp ├── GenerateGeometry.hpp ├── GenerateProblem.cpp ├── GenerateProblem.hpp ├── GenerateProblem_ref.cpp ├── GenerateProblem_ref.hpp ├── Geometry.hpp ├── MGData.hpp ├── Memory.cpp ├── Memory.hpp ├── MixedBaseCounter.cpp ├── MixedBaseCounter.hpp ├── MultiColoring.cpp ├── MultiColoring.hpp ├── OptimizeProblem.cpp ├── OptimizeProblem.hpp ├── OutputFile.cpp ├── OutputFile.hpp ├── Permute.cpp ├── Permute.hpp ├── ReadHpcgDat.cpp ├── ReadHpcgDat.hpp ├── ReportResults.cpp ├── ReportResults.hpp ├── SetupHalo.cpp ├── SetupHalo.hpp ├── SetupHalo_ref.cpp ├── SetupHalo_ref.hpp ├── SparseMatrix.cpp ├── SparseMatrix.hpp ├── TestCG.cpp ├── TestCG.hpp ├── TestNorms.cpp ├── TestNorms.hpp ├── TestSymmetry.cpp ├── TestSymmetry.hpp ├── Vector.hpp ├── Version.hpp.in ├── WriteProblem.cpp ├── WriteProblem.hpp ├── YAML_Doc.cpp ├── YAML_Doc.hpp ├── YAML_Element.cpp ├── YAML_Element.hpp ├── finalize.cpp ├── hpcg.hpp ├── init.cpp ├── main.cpp ├── mytimer.cpp ├── mytimer.hpp ├── rochpcg_gtest_main.cpp ├── test_rochpcg.cpp ├── test_rochpcg.hpp └── utils.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | AccessModifierOffset: 0 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveAssignments: true 6 | AlignConsecutiveDeclarations: false 7 | AlignEscapedNewlinesLeft: true 8 | AlignOperands: true 9 | AlignTrailingComments: true 10 | AllowAllParametersOfDeclarationOnNextLine: true 11 | AllowShortBlocksOnASingleLine: true 12 | AllowShortCaseLabelsOnASingleLine: true 13 | AllowShortFunctionsOnASingleLine: All 14 | AllowShortIfStatementsOnASingleLine: false 15 | AllowShortLoopsOnASingleLine: false 16 | AlwaysBreakAfterDefinitionReturnType: None 17 | AlwaysBreakAfterReturnType: None 18 | AlwaysBreakBeforeMultilineStrings: false 19 | AlwaysBreakTemplateDeclarations: true 20 | BinPackArguments: false 21 | BinPackParameters: false 22 | BraceWrapping: 23 | AfterClass: true 24 | AfterControlStatement: true 25 | AfterEnum: true 26 | AfterFunction: true 27 | AfterNamespace: false 28 | AfterObjCDeclaration: true 29 | AfterStruct: true 30 | AfterUnion: true 31 | BeforeCatch: true 32 | BeforeElse: true 33 | IndentBraces: false 34 | BreakBeforeBinaryOperators: None 35 | BreakBeforeBraces: Custom 36 | BreakBeforeTernaryOperators: true 37 | BreakConstructorInitializersBeforeComma: false 38 | ColumnLimit: 100 39 | CommentPragmas: '^ IWYU pragma:' 40 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 41 | ConstructorInitializerIndentWidth: 4 42 | ContinuationIndentWidth: 4 43 | Cpp11BracedListStyle: true 44 | DerivePointerAlignment: false 45 | DisableFormat: false 46 | ExperimentalAutoDetectBinPacking: false 47 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 48 | IncludeCategories: 49 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 50 | Priority: 2 51 | - Regex: '^(<|"(gtest|isl|json)/)' 52 | Priority: 3 53 | - Regex: '.*' 54 | Priority: 1 55 | IndentCaseLabels: false 56 | IndentWidth: 4 57 | IndentWrappedFunctionNames: false 58 | KeepEmptyLinesAtTheStartOfBlocks: true 59 | MacroBlockBegin: '' 60 | MacroBlockEnd: '' 61 | MaxEmptyLinesToKeep: 1 62 | NamespaceIndentation: None 63 | ObjCBlockIndentWidth: 2 64 | ObjCSpaceAfterProperty: false 65 | ObjCSpaceBeforeProtocolList: true 66 | PenaltyBreakBeforeFirstCallParameter: 19 67 | PenaltyBreakComment: 300 68 | PenaltyBreakFirstLessLess: 120 69 | PenaltyBreakString: 1000 70 | PenaltyExcessCharacter: 1000000 71 | PenaltyReturnTypeOnItsOwnLine: 60 72 | PointerAlignment: Left 73 | ReflowComments: true 74 | SortIncludes: false 75 | SpaceAfterCStyleCast: false 76 | # SpaceAfterTemplateKeyword: true 77 | SpaceBeforeAssignmentOperators: true 78 | SpaceBeforeParens: Never 79 | SpaceInEmptyParentheses: false 80 | SpacesBeforeTrailingComments: 1 81 | SpacesInAngles: false 82 | SpacesInContainerLiterals: true 83 | SpacesInCStyleCastParentheses: false 84 | SpacesInParentheses: false 85 | SpacesInSquareBrackets: false 86 | Standard: c++17 87 | TabWidth: 8 88 | UseTab: Never 89 | ... 90 | 91 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @ntrost57 @YvanMokwinski @jsandham 2 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Upload to the upload server 2 | 3 | # Controls when the workflow will run 4 | on: 5 | push: 6 | branches: [develop, master] 7 | tags: 8 | - rocm-5.* 9 | release: 10 | types: [published] 11 | 12 | # Allows you to run this workflow manually from the Actions tab 13 | workflow_dispatch: 14 | 15 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 16 | jobs: 17 | # This workflow contains a single job called "build" 18 | build: 19 | # The type of runner that the job will run on 20 | runs-on: ubuntu-latest 21 | 22 | # Steps represent a sequence of tasks that will be executed as part of the job 23 | steps: 24 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 25 | - uses: actions/checkout@v2 26 | 27 | - name: getting branch name 28 | shell: bash 29 | run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" 30 | id: branch_name 31 | - name: getting tag name 32 | shell: bash 33 | run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" 34 | id: tag_name 35 | - name: zipping files 36 | run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' 37 | - name: echo-step 38 | run: echo "${{ github.event.release.target_commitish }}" 39 | - name: uploading archive to prod 40 | if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} 41 | uses: wlixcc/SFTP-Deploy-Action@v1.0 42 | with: 43 | username: ${{ secrets.USERNAME }} 44 | server: ${{ secrets.SERVER }} 45 | ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} 46 | local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip 47 | remote_path: '${{ secrets.PROD_UPLOAD_URL }}' 48 | args: '-o ConnectTimeout=5' 49 | - name: uploading archive to staging 50 | if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} 51 | uses: wlixcc/SFTP-Deploy-Action@v1.0 52 | with: 53 | username: ${{ secrets.USERNAME }} 54 | server: ${{ secrets.SERVER }} 55 | ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} 56 | local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip 57 | remote_path: '${{ secrets.STG_UPLOAD_URL }}' 58 | args: '-o ConnectTimeout=5' 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | # vim tags 31 | tags 32 | .tags 33 | .*.swp 34 | 35 | # Editors 36 | .vscode 37 | 38 | # build-in-source directory 39 | build 40 | 41 | # doc directory 42 | docBin 43 | _build 44 | -------------------------------------------------------------------------------- /.jenkins/common.groovy: -------------------------------------------------------------------------------- 1 | // This file is for internal AMD use. 2 | // If you are interested in running your own Jenkins, please raise a github issue for assistance. 3 | 4 | def runCompileCommand(platform, project, jobName) 5 | { 6 | project.paths.construct_build_prefix() 7 | 8 | def command 9 | def getDependencies = auxiliary.getLibrary('rocPRIM', platform.jenkinsLabel,'develop') 10 | def compiler = '/opt/rocm/bin/amdclang++' 11 | 12 | command = """#!/usr/bin/env bash 13 | set -ex 14 | ${getDependencies} 15 | cd ${project.paths.project_build_prefix} 16 | ${project.paths.build_command} 17 | """ 18 | 19 | platform.runCommand(this, command) 20 | } 21 | 22 | def runTestCommand (platform, project) 23 | { 24 | String sudo = auxiliary.sudo(platform.jenkinsLabel) 25 | def command = """#!/usr/bin/env bash 26 | set -ex 27 | cd ${project.paths.project_build_prefix}/build/release/tests 28 | ${sudo} ./rochpcg-test --gtest_output=xml --gtest_color=yes 29 | """ 30 | 31 | platform.runCommand(this, command) 32 | junit "${project.paths.project_build_prefix}/build/release/tests/*.xml" 33 | } 34 | 35 | return this 36 | -------------------------------------------------------------------------------- /.jenkins/precheckin.groovy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env groovy 2 | @Library('rocJenkins@pong') _ 3 | import com.amd.project.* 4 | import com.amd.docker.* 5 | import java.nio.file.Path; 6 | 7 | def runCI = 8 | { 9 | nodeDetails, jobName-> 10 | 11 | def prj = new rocProject('rocHPCG', 'PreCheckin') 12 | prj.paths.build_command = './install.sh -t --with-openmp=OFF --with-mpi=OFF' 13 | 14 | def nodes = new dockerNodes(nodeDetails, jobName, prj) 15 | 16 | def commonGroovy 17 | 18 | boolean formatCheck = false 19 | 20 | def compileCommand = 21 | { 22 | platform, project-> 23 | 24 | commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" 25 | commonGroovy.runCompileCommand(platform, project, jobName) 26 | } 27 | 28 | def testCommand = 29 | { 30 | platform, project-> 31 | 32 | commonGroovy.runTestCommand(platform, project) 33 | } 34 | 35 | buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null) 36 | } 37 | 38 | ci: { 39 | String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) 40 | 41 | def propertyList = ["compute-rocm-dkms-no-npi":[], 42 | "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], 43 | "rocm-docker":[]] 44 | propertyList = auxiliary.appendPropertyList(propertyList) 45 | 46 | def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), 47 | "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), 48 | "rocm-docker":([ubuntu16:['gfx900']])] 49 | jobNameList = auxiliary.appendJobNameList(jobNameList) 50 | 51 | propertyList.each 52 | { 53 | jobName, property-> 54 | if (urlJobName == jobName) 55 | properties(auxiliary.addCommonProperties(property)) 56 | } 57 | 58 | jobNameList.each 59 | { 60 | jobName, nodeDetails-> 61 | if (urlJobName == jobName) 62 | stage(jobName) { 63 | runCI(nodeDetails, jobName) 64 | } 65 | } 66 | 67 | // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 68 | if(!jobNameList.keySet().contains(urlJobName)) 69 | { 70 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) 71 | stage(urlJobName) { 72 | runCI([ubuntu16:['gfx906']], urlJobName) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Modifications (c) 2019-2023 Advanced Micro Devices, Inc. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, 4 | # are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 3. Neither the name of the copyright holder nor the names of its contributors 12 | # may be used to endorse or promote products derived from this software without 13 | # specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | # POSSIBILITY OF SUCH DAMAGE. 25 | 26 | cmake_minimum_required(VERSION 3.10 FATAL_ERROR) 27 | 28 | # Consider removing this in the future 29 | # This should appear before the project command, because it does not use FORCE 30 | set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "Install path prefix, prepended onto install directories") 31 | 32 | # CMake modules 33 | list(APPEND CMAKE_MODULE_PATH 34 | ${CMAKE_CURRENT_SOURCE_DIR}/cmake 35 | ${ROCM_PATH}/lib/cmake/hip 36 | ${ROCM_PATH}/hip/cmake) 37 | 38 | # Set a default build type if none was specified 39 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) 40 | message(STATUS "Setting build type to 'Release' as none was specified.") 41 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) 42 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo") 43 | endif() 44 | 45 | # Honor per-config flags in try_compile() source-file signature. cmake v3.7 and up 46 | if(POLICY CMP0066) 47 | cmake_policy(SET CMP0066 NEW) 48 | endif() 49 | 50 | # rocHPCG project 51 | project(rochpcg LANGUAGES CXX) 52 | 53 | # Force library install path to lib (CentOS 7 defaults to lib64) 54 | set(CMAKE_INSTALL_LIBDIR "lib" CACHE INTERNAL "Installation directory for libraries" FORCE) 55 | 56 | # Build flags 57 | set(CMAKE_CXX_STANDARD 17) 58 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 59 | set(CMAKE_CXX_EXTENSIONS OFF) 60 | 61 | # Build options 62 | option(HPCG_DEBUG "Compile with modest debugging turned on" OFF) 63 | option(HPCG_DETAILED_DEBUG "Compile with voluminous debugging information turned on" OFF) 64 | option(HPCG_DETAILED_TIMING "Enable detail timers" OFF) 65 | option(HPCG_REFERENCE "Build reference mode" OFF) 66 | option(BUILD_TEST "Build rocHPCG single-node test" OFF) 67 | 68 | # Optimization options 69 | option(OPT_MEMMGMT "Build with memory management module" ON) 70 | option(OPT_DEFRAG "Build with memory management defragmentation" ON) 71 | option(GPU_AWARE_MPI "Enable use of GPU-Aware MPI functionality" OFF) 72 | 73 | # roctx Markers 74 | option(OPT_ROCTX "Enable rocTX markers" OFF) 75 | 76 | # Dependencies 77 | include(cmake/Dependencies.cmake) 78 | 79 | # Find HIP package 80 | find_package(HIP REQUIRED) 81 | find_package(rocprim REQUIRED) 82 | 83 | # GPU arch targets 84 | if(AMDGPU_TARGETS AND NOT GPU_TARGETS) 85 | message( DEPRECATION "AMDGPU_TARGETS use is deprecated. Use GPU_TARGETS." ) 86 | endif() 87 | set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "List of specific machine types for library to target") 88 | if(HIP_VERSION VERSION_GREATER_EQUAL "3.7") 89 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx908") 90 | endif() 91 | if(HIP_VERSION VERSION_GREATER_EQUAL "4.3") 92 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx90a") 93 | endif() 94 | if (HIP_VERSION VERSION_GREATER_EQUAL "5.7") 95 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx942") 96 | endif() 97 | if (HIP_VERSION VERSION_GREATER_EQUAL "6.5") 98 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx950") 99 | endif() 100 | # Don't force, as users should be able to override GPU_TARGETS at the command line if desired 101 | set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to build for") 102 | 103 | # Setup version 104 | rocm_setup_version(VERSION 0.8.6) 105 | 106 | # rocHPCG source directory 107 | add_subdirectory(src) 108 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019-2021 Advanced Micro Devices, Inc. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 3. Neither the name of the copyright holder nor the names of its contributors 12 | may be used to endorse or promote products derived from this software without 13 | specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /NOTICES.txt: -------------------------------------------------------------------------------- 1 | Notices and licenses file 2 | _________________________ 3 | 4 | AMD copyrighted code (BSD3) 5 | 6 | Copyright (c) 2019-2021 Advanced Micro Devices, Inc. 7 | 8 | Redistribution and use in source and binary forms, with or without modification, 9 | are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 3. Neither the name of the copyright holder nor the names of its contributors 17 | may be used to endorse or promote products derived from this software without 18 | specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 26 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | 31 | 32 | Dependencies on hpcg-benchmark-hpcg v3.1 (BSD3) 33 | 34 | Modifications (c) 2019-2021 Advanced Micro Devices, Inc. 35 | 36 | Redistribution and use in source and binary forms, with or without modification, 37 | are permitted provided that the following conditions are met: 38 | 39 | 1. Redistributions of source code must retain the above copyright notice, this 40 | list of conditions and the following disclaimer. 41 | 2. Redistributions in binary form must reproduce the above copyright notice, 42 | this list of conditions and the following disclaimer in the documentation 43 | and/or other materials provided with the distribution. 44 | 3. Neither the name of the copyright holder nor the names of its contributors 45 | may be used to endorse or promote products derived from this software without 46 | specific prior written permission. 47 | 48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 49 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 50 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 51 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 52 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 53 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 54 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 55 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 56 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 57 | POSSIBILITY OF SUCH DAMAGE. 58 | 59 | 60 | hpcg-benchmark-hpcg v3.1 (BSD3) 61 | Copyright (c) 2013-2019, hpcg-benchmark 62 | All rights reserved. 63 | 64 | 65 | Redistribution and use in source and binary forms, with or without 66 | modification, are permitted provided that the following conditions are met: 67 | 68 | * Redistributions of source code must retain the above copyright notice, this 69 | list of conditions and the following disclaimer. 70 | 71 | * Redistributions in binary form must reproduce the above copyright notice, 72 | this list of conditions and the following disclaimer in the documentation 73 | and/or other materials provided with the distribution. 74 | 75 | * Neither the name of hpcg nor the names of its 76 | contributors may be used to endorse or promote products derived from 77 | this software without specific prior written permission. 78 | 79 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 80 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 81 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 82 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 83 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 84 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 85 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 86 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 87 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 88 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rocHPCG 2 | rocHPCG is a benchmark based on the [HPCG][] benchmark application, implemented on top of AMD's Radeon Open eCosystem Platform [ROCm][] runtime and toolchains. rocHPCG is created using the [HIP][] programming language and optimized for AMD's latest discrete GPUs. 3 | 4 | ## Requirements 5 | * Git 6 | * CMake (3.10 or later) 7 | * MPI 8 | * NUMA library 9 | * AMD [ROCm] platform (4.1 or later) 10 | * [rocPRIM][] 11 | * googletest (for test application only) 12 | 13 | ## Quickstart rocHPCG build and install 14 | 15 | #### Install script 16 | You can build rocHPCG using the *install.sh* script 17 | ``` 18 | # Clone rocHPCG using git 19 | git clone https://github.com/ROCmSoftwarePlatform/rocHPCG.git 20 | 21 | # Go to rocHPCG directory 22 | cd rocHPCG 23 | 24 | # Run install.sh script 25 | # Command line options: 26 | # -h|--help - prints this help message 27 | # -i|--install - install after build 28 | # -d|--dependencies - install dependencies 29 | # -r|--reference - reference mode 30 | # -g|--debug - -DCMAKE_BUILD_TYPE=Debug (default: Release) 31 | # -t|--test - build single GPU test 32 | # --with-rocm= - Path to ROCm install (default: /opt/rocm) 33 | # --with-mpi= - Path to external MPI install (Default: clone+build OpenMPI v4.1.0 in deps/) 34 | # --with-openmp - compile with OpenMP support (default: enabled) 35 | # --with-memmgmt - compile with smart memory management (default: enabled) 36 | # --with-memdefrag - compile with memory defragmentation (defaut: enabled) 37 | ./install.sh -di 38 | ``` 39 | By default, [UCX] v1.10.0 and [OpenMPI] v4.1.0 will be cloned and build in `rocHPCG/deps`. 40 | After build and install, the `rochpcg` executable is placed in `build/release/rochpcg-install`. 41 | 42 | #### MPI 43 | You can build rocHPCG using your own MPI installation by specifying the directory, e.g. 44 | ``` 45 | ./install.sh -di --with-mpi=/my/mpiroot/ 46 | ``` 47 | Alternatively, when you do not pass a specific directory, OpenMPI v4.1.0 with UCX will be cloned and built within `rocHPCG/deps` directory. 48 | If you want to disable MPI, you need to run 49 | ``` 50 | ./install.sh -di --with-mpi=off 51 | ``` 52 | 53 | #### ROCm 54 | You can build rocHPCG with specific ROCm versions by passing the directory to the install script, e.g. 55 | ``` 56 | ./install.sh -di --with-rocm=/my/rocm-x.y.z/ 57 | ``` 58 | 59 | ## Running rocHPCG benchmark application 60 | You can run the rocHPCG benchmark application by either using command line parameters or the `hpcg.dat` input file 61 | ``` 62 | rochpcg 63 | # where 64 | # nx - is the global problem size in x dimension 65 | # ny - is the global problem size in y dimension 66 | # nz - is the global problem size in z dimension 67 | # runtime - is the desired benchmarking time in seconds (> 1800s for official runs) 68 | ``` 69 | 70 | Similarly, these parameters can be entered into an input file `hpcg.dat` in the working directory, e.g. `nx = ny = nz = 280` and `runtime = 1860`. 71 | ``` 72 | HPCG benchmark input file 73 | Sandia National Laboratories; University of Tennessee, Knoxville 74 | 280 280 280 75 | 1860 76 | ``` 77 | 78 | ## Performance evaluation 79 | For performance evaluation purposes, the number of iterations should be as low as possible (e.g. convergence rate as high as possible), since the final HPCG score is scaled to 50 iterations. 80 | Furthermore, it is observed that high memory occupancy performs better on AMD devices. Problem size suggestion for devices with 16GB is `nx = ny = nz = 280` and `nx = 560, ny = nz = 280` for devices with 32GB or more. Runtime for official runs have to be at least 1800 seconds (use 1860 to be on the safe side), e.g. 81 | ``` 82 | ./rochpcg 560 280 280 1860 83 | ``` 84 | Please note that convergence rate behaviour might change in a multi-GPU environment and need to be adjusted accordingly. 85 | 86 | Additionally, you can specify the device to be used for the application (e.g. device #1): 87 | ``` 88 | ./rochpcg 560 280 280 1860 --dev=1 89 | ``` 90 | 91 | ## Support 92 | Please use [the issue tracker][] for bugs and feature requests. 93 | 94 | ## License 95 | The [license file][] can be found in the main repository. 96 | 97 | [HPCG]: https://www.hpcg-benchmark.org/ 98 | [ROCm]: https://github.com/RadeonOpenCompute/ROCm 99 | [HIP]: https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/ 100 | [rocPRIM]: https://github.com/ROCmSoftwarePlatform/rocPRIM 101 | [OpenMPI]: https://github.com/open-mpi/ompi 102 | [UCX]: https://github.com/openucx/ucx 103 | [the issue tracker]: https://github.com/ROCmSoftwarePlatform/rocHPCG/issues 104 | [license file]: https://github.com/ROCmSoftwarePlatform/rocHPCG 105 | -------------------------------------------------------------------------------- /cmake/Dependencies.cmake: -------------------------------------------------------------------------------- 1 | # Modifications (c) 2019-2021 Advanced Micro Devices, Inc. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, 4 | # are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 3. Neither the name of the copyright holder nor the names of its contributors 12 | # may be used to endorse or promote products derived from this software without 13 | # specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | # POSSIBILITY OF SUCH DAMAGE. 25 | 26 | # Dependencies 27 | 28 | # Git 29 | find_package(Git REQUIRED) 30 | 31 | # Add some paths 32 | list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip) 33 | 34 | # Find OpenMP package 35 | find_package(OpenMP) 36 | if (NOT OPENMP_FOUND) 37 | message("-- OpenMP not found. Compiling WITHOUT OpenMP support.") 38 | else() 39 | option(HPCG_OPENMP "Compile WITH OpenMP support." ON) 40 | endif() 41 | 42 | # MPI 43 | set(MPI_HOME ${HPCG_MPI_DIR}) 44 | find_package(MPI) 45 | if (NOT MPI_FOUND) 46 | message("-- MPI not found. Compiling WITHOUT MPI support.") 47 | if (HPCG_MPI) 48 | message(FATAL_ERROR "Cannot build with MPI support.") 49 | endif() 50 | else() 51 | option(HPCG_MPI "Compile WITH MPI support." ON) 52 | endif() 53 | 54 | # gtest 55 | if(BUILD_TEST) 56 | find_package(GTest REQUIRED) 57 | endif() 58 | 59 | # libnuma if MPI is enabled 60 | if(HPCG_MPI) 61 | find_package(LIBNUMA REQUIRED) 62 | endif() 63 | 64 | # rocm-cmake 65 | find_package(ROCM 0.7.3 QUIET CONFIG PATHS ${CMAKE_PREFIX_PATH} $ENV{ROCM_PATH}) 66 | if(NOT ROCM_FOUND) 67 | set(PROJECT_EXTERN_DIR "${CMAKE_CURRENT_BINARY_DIR}/deps") 68 | file( TO_NATIVE_PATH "${PROJECT_EXTERN_DIR}" PROJECT_EXTERN_DIR_NATIVE) 69 | set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") 70 | file( 71 | DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.tar.gz 72 | ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.tar.gz 73 | STATUS rocm_cmake_download_status LOG rocm_cmake_download_log 74 | ) 75 | list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) 76 | if(rocm_cmake_download_error_code) 77 | message(FATAL_ERROR "Error: downloading " 78 | "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " 79 | "error_code: ${rocm_cmake_download_error_code} " 80 | "log: ${rocm_cmake_download_log} " 81 | ) 82 | endif() 83 | 84 | execute_process( 85 | COMMAND ${CMAKE_COMMAND} -E tar xzvf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.tar.gz 86 | WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} 87 | ) 88 | execute_process( 89 | COMMAND ${CMAKE_COMMAND} -S ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} -B ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/build 90 | WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} 91 | ) 92 | execute_process( 93 | COMMAND ${CMAKE_COMMAND} --install ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/build --prefix ${PROJECT_EXTERN_DIR}/rocm 94 | WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} ) 95 | if(rocm_cmake_unpack_error_code) 96 | message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") 97 | endif() 98 | find_package(ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}) 99 | endif() 100 | 101 | include(ROCMSetupVersion) 102 | include(ROCMCreatePackage) 103 | include(ROCMInstallTargets) 104 | include(ROCMPackageConfigHelpers) 105 | include(ROCMInstallSymlinks) 106 | include(ROCMCheckTargetIds) 107 | include(ROCMClients) 108 | -------------------------------------------------------------------------------- /cmake/FindLIBNUMA.cmake: -------------------------------------------------------------------------------- 1 | # Modifications (c) 2019-2021 Advanced Micro Devices, Inc. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, 4 | # are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 3. Neither the name of the copyright holder nor the names of its contributors 12 | # may be used to endorse or promote products derived from this software without 13 | # specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | # POSSIBILITY OF SUCH DAMAGE. 25 | 26 | find_path(LIBNUMA_INCLUDE_DIR NAMES numa.h 27 | PATHS 28 | ENV 29 | INCLUDE 30 | CPATH 31 | /usr/include) 32 | 33 | find_library(LIBNUMA_LIBRARY NAMES numa 34 | PATHS 35 | ENV 36 | LD_LIBRARY_PATH 37 | /usr/lib/x86_64-linux-gnu) 38 | 39 | if(LIBNUMA_INCLUDE_DIR AND LIBNUMA_LIBRARY) 40 | set(LIBNUMA_FOUND TRUE) 41 | else() 42 | set(LIBNUMA_FOUND FALSE) 43 | endif() 44 | 45 | if(NOT TARGET libnuma::libnuma) 46 | add_library(libnuma::libnuma INTERFACE IMPORTED) 47 | endif() 48 | 49 | set_property(TARGET libnuma::libnuma PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${LIBNUMA_INCLUDE_DIR}") 50 | set_property(TARGET libnuma::libnuma PROPERTY INTERFACE_LINK_LIBRARIES "${LIBNUMA_LIBRARY}") 51 | 52 | include(FindPackageHandleStandardArgs) 53 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBNUMA DEFAULT_MSG 54 | LIBNUMA_LIBRARY 55 | LIBNUMA_INCLUDE_DIR) 56 | 57 | mark_as_advanced(LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARY) 58 | -------------------------------------------------------------------------------- /cmake/version.cmake: -------------------------------------------------------------------------------- 1 | # Modifications (c) 2019 Advanced Micro Devices, Inc. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, 4 | # are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this 7 | # list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 3. Neither the name of the copyright holder nor the names of its contributors 12 | # may be used to endorse or promote products derived from this software without 13 | # specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | # POSSIBILITY OF SUCH DAMAGE. 25 | 26 | # TODO: move this function to https://github.com/RadeonOpenCompute/rocm-cmake/blob/master/share/rocm/cmake/ROCMSetupVersion.cmake 27 | 28 | macro(rocm_set_parent VAR) 29 | set(${VAR} ${ARGN} PARENT_SCOPE) 30 | set(${VAR} ${ARGN}) 31 | endmacro() 32 | 33 | function(rocm_get_git_commit_id OUTPUT_VERSION) 34 | set(options) 35 | set(oneValueArgs VERSION DIRECTORY) 36 | set(multiValueArgs) 37 | 38 | cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 39 | 40 | set(_version ${PARSE_VERSION}) 41 | 42 | set(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) 43 | if(PARSE_DIRECTORY) 44 | set(DIRECTORY ${PARSE_DIRECTORY}) 45 | endif() 46 | 47 | find_program(GIT NAMES git) 48 | 49 | if(GIT) 50 | set(GIT_COMMAND ${GIT} describe --dirty --long --match [0-9]*) 51 | execute_process(COMMAND ${GIT_COMMAND} 52 | WORKING_DIRECTORY ${DIRECTORY} 53 | OUTPUT_VARIABLE GIT_TAG_VERSION 54 | OUTPUT_STRIP_TRAILING_WHITESPACE 55 | RESULT_VARIABLE RESULT 56 | ERROR_QUIET) 57 | if(${RESULT} EQUAL 0) 58 | set(_version ${GIT_TAG_VERSION}) 59 | else() 60 | execute_process(COMMAND ${GIT_COMMAND} --always 61 | WORKING_DIRECTORY ${DIRECTORY} 62 | OUTPUT_VARIABLE GIT_TAG_VERSION 63 | OUTPUT_STRIP_TRAILING_WHITESPACE 64 | RESULT_VARIABLE RESULT 65 | ERROR_QUIET) 66 | if(${RESULT} EQUAL 0) 67 | set(_version ${GIT_TAG_VERSION}) 68 | endif() 69 | endif() 70 | endif() 71 | rocm_set_parent(${OUTPUT_VERSION} ${_version}) 72 | endfunction() 73 | -------------------------------------------------------------------------------- /docker/dockerfile-build-centos: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocHPCG 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rochpcg-maintainer@amd.com" 6 | 7 | ARG user_uid 8 | 9 | ARG library_dependencies="rocprim" 10 | 11 | # Install dependent packages 12 | RUN yum install -y \ 13 | sudo \ 14 | centos-release-scl \ 15 | devtoolset-7 \ 16 | ca-certificates \ 17 | git \ 18 | cmake3 \ 19 | make \ 20 | clang \ 21 | clang-devel \ 22 | gcc-c++ \ 23 | pkgconfig \ 24 | libcxx-devel \ 25 | numactl-libs \ 26 | rpm-build \ 27 | deltarpm \ 28 | ${library_dependencies} 29 | 30 | RUN echo '#!/bin/bash' | tee /etc/profile.d/devtoolset7.sh && echo \ 31 | 'source scl_source enable devtoolset-7' >>/etc/profile.d/devtoolset7.sh 32 | 33 | # docker pipeline runs containers with particular uid 34 | # create a jenkins user with this specific uid so it can use sudo priviledges 35 | # Grant any member of sudo group password-less sudo privileges 36 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ 37 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \ 38 | chmod 400 /etc/sudoers.d/sudo-nopasswd 39 | 40 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest 41 | 42 | # Clone gtest repo 43 | # Build gtest and install into /usr/local 44 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \ 45 | git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \ 46 | mkdir -p build && cd build && \ 47 | cmake .. && \ 48 | make -j $(nproc) install && \ 49 | rm -rf ${GTEST_SRC_ROOT} -------------------------------------------------------------------------------- /docker/dockerfile-build-sles: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocHPCG 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rochpcg-maintainer@amd.com" 6 | 7 | ARG user_uid 8 | 9 | ARG library_dependencies="rocprim" 10 | 11 | # Install dependent packages 12 | RUN zypper -n update && zypper -n install\ 13 | sudo \ 14 | ca-certificates \ 15 | git \ 16 | gcc-c++ \ 17 | gcc-fortran \ 18 | make \ 19 | cmake \ 20 | rpm-build \ 21 | dpkg \ 22 | libcxxtools9 \ 23 | ${library_dependencies} 24 | 25 | # docker pipeline runs containers with particular uid 26 | # create a jenkins user with this specific uid so it can use sudo priviledges 27 | # Grant any member of sudo group password-less sudo privileges 28 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ 29 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \ 30 | chmod 400 /etc/sudoers.d/sudo-nopasswd 31 | 32 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest 33 | 34 | # Clone gtest repo 35 | # Build gtest and install into /usr/local 36 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \ 37 | git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \ 38 | mkdir -p build && cd build && \ 39 | cmake .. && \ 40 | make -j $(nproc) install && \ 41 | rm -rf ${GTEST_SRC_ROOT} -------------------------------------------------------------------------------- /docker/dockerfile-build-ubuntu-rock: -------------------------------------------------------------------------------- 1 | # Parameters related to building rocHPCG 2 | ARG base_image 3 | 4 | FROM ${base_image} 5 | LABEL maintainer="rochpcg-maintainer@amd.com" 6 | 7 | ARG user_uid 8 | 9 | ARG library_dependencies="rocblas rocsolver" 10 | 11 | # Install dependent packages 12 | # Dependencies: 13 | # * hcc-config.cmake: pkg-config 14 | # * rochpcg-test: googletest rocprim 15 | # * libhsakmt.so: libnuma1 libnuma-dev 16 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 17 | sudo \ 18 | ca-certificates \ 19 | git \ 20 | make \ 21 | cmake \ 22 | pkg-config \ 23 | libnuma1 \ 24 | libnuma-dev \ 25 | mpi-default-bin \ 26 | mpi-default-dev \ 27 | libomp-dev \ 28 | ${library_dependencies} \ 29 | && \ 30 | apt-get clean && \ 31 | rm -rf /var/lib/apt/lists/* 32 | 33 | # docker pipeline runs containers with particular uid 34 | # create a jenkins user with this specific uid so it can use sudo priviledges 35 | # Grant any member of video group password-less sudo privileges 36 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ 37 | mkdir -p /etc/sudoers.d/ && \ 38 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd 39 | 40 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest 41 | 42 | # Clone gtest repo 43 | # Build gtest and install into /usr/local 44 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \ 45 | git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \ 46 | mkdir -p build && cd build && \ 47 | cmake .. && \ 48 | make -j $(nproc) install && \ 49 | rm -rf ${GTEST_SRC_ROOT} 50 | -------------------------------------------------------------------------------- /rtest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 0.1 5 | rochpcg-test --gtest_color=yes --gtest_output=xml 6 | * 7 | 8 | {COMMAND}:output_psdb.xml 9 | 10 | 11 | {COMMAND}:output_osdb.xml 12 | 13 | 14 | {COMMAND}:output_custom.xml --gtest_filter={GTEST_FILTER} 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/CG.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #ifndef CG_HPP 44 | #define CG_HPP 45 | 46 | #include "SparseMatrix.hpp" 47 | #include "Vector.hpp" 48 | #include "CGData.hpp" 49 | 50 | int CG(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x, 51 | const int max_iter, const double tolerance, int & niters, double & normr, double & normr0, 52 | double * times, bool doPreconditioning, bool verbose); 53 | 54 | // this function will compute the Conjugate Gradient iterations. 55 | // geom - Domain and processor topology information 56 | // A - Matrix 57 | // b - constant 58 | // x - used for return value 59 | // max_iter - how many times we iterate 60 | // tolerance - Stopping tolerance for preconditioned iterations. 61 | // niters - number of iterations performed 62 | // normr - computed residual norm 63 | // normr0 - Original residual 64 | // times - array of timing information 65 | // doPreconditioning - bool to specify whether or not symmetric GS will be applied. 66 | 67 | #endif // CG_HPP 68 | -------------------------------------------------------------------------------- /src/CGData.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | /*! 44 | @file CGData.hpp 45 | 46 | HPCG data structure 47 | */ 48 | 49 | #ifndef CGDATA_HPP 50 | #define CGDATA_HPP 51 | 52 | #include "SparseMatrix.hpp" 53 | #include "Vector.hpp" 54 | 55 | struct CGData_STRUCT { 56 | Vector r; //!< pointer to residual vector 57 | Vector z; //!< pointer to preconditioned residual vector 58 | Vector p; //!< pointer to direction vector 59 | Vector Ap; //!< pointer to Krylov vector 60 | }; 61 | typedef struct CGData_STRUCT CGData; 62 | 63 | /*! 64 | Constructor for the data structure of CG vectors. 65 | 66 | @param[in] A the data structure that describes the problem matrix and its structure 67 | @param[out] data the data structure for CG vectors that will be allocated to get it ready for use in CG iterations 68 | */ 69 | inline void InitializeSparseCGData(SparseMatrix & A, CGData & data) { 70 | local_int_t nrow = A.localNumberOfRows; 71 | local_int_t ncol = A.localNumberOfColumns; 72 | InitializeVector(data.r, nrow); 73 | InitializeVector(data.z, ncol); 74 | InitializeVector(data.p, ncol); 75 | InitializeVector(data.Ap, nrow); 76 | return; 77 | } 78 | 79 | inline void HIPInitializeSparseCGData(SparseMatrix& A, CGData& data) 80 | { 81 | HIPInitializeVector(data.r, A.localNumberOfRows); 82 | HIPInitializeVector(data.z, A.localNumberOfColumns); 83 | HIPInitializeVector(data.p, A.localNumberOfColumns); 84 | HIPInitializeVector(data.Ap, A.localNumberOfRows); 85 | } 86 | 87 | /*! 88 | Destructor for the CG vectors data. 89 | 90 | @param[inout] data the CG vectors data structure whose storage is deallocated 91 | */ 92 | inline void DeleteCGData(CGData & data) { 93 | 94 | DeleteVector (data.r); 95 | DeleteVector (data.z); 96 | DeleteVector (data.p); 97 | DeleteVector (data.Ap); 98 | return; 99 | } 100 | 101 | inline void HIPDeleteCGData(CGData& data) 102 | { 103 | HIPDeleteVector (data.r); 104 | HIPDeleteVector (data.z); 105 | HIPDeleteVector (data.p); 106 | HIPDeleteVector (data.Ap); 107 | } 108 | 109 | #endif // CGDATA_HPP 110 | 111 | -------------------------------------------------------------------------------- /src/CG_ref.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #ifndef CG_REF_HPP 44 | #define CG_REF_HPP 45 | 46 | #include "SparseMatrix.hpp" 47 | #include "Vector.hpp" 48 | #include "CGData.hpp" 49 | 50 | int CG_ref(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x, 51 | const int max_iter, const double tolerance, int & niters, double & normr, double & normr0, 52 | double * times, bool doPreconditioning, bool verbose); 53 | 54 | // this function will compute the Conjugate Gradient iterations. 55 | // geom - Domain and processor topology information 56 | // A - Matrix 57 | // b - constant 58 | // x - used for return value 59 | // max_iter - how many times we iterate 60 | // tolerance - Stopping tolerance for preconditioned iterations. 61 | // niters - number of iterations performed 62 | // normr - computed residual norm 63 | // normr0 - Original residual 64 | // times - array of timing information 65 | // doPreconditioning - bool to specify whether or not symmetric GS will be applied. 66 | 67 | #endif // CG_REF_HPP 68 | -------------------------------------------------------------------------------- /src/CheckAspectRatio.cpp: -------------------------------------------------------------------------------- 1 | //@HEADER 2 | // *************************************************** 3 | // 4 | // HPCG: High Performance Conjugate Gradient Benchmark 5 | // 6 | // Contact: 7 | // Michael A. Heroux ( maherou@sandia.gov) 8 | // Jack Dongarra (dongarra@eecs.utk.edu) 9 | // Piotr Luszczek (luszczek@eecs.utk.edu) 10 | // 11 | // *************************************************** 12 | //@HEADER 13 | 14 | /*! 15 | @file CheckAspectRatio.cpp 16 | 17 | HPCG routine 18 | */ 19 | 20 | #include 21 | 22 | #ifndef HPCG_NO_MPI 23 | #include 24 | #endif 25 | 26 | #include "hpcg.hpp" 27 | 28 | #include "CheckAspectRatio.hpp" 29 | 30 | int 31 | CheckAspectRatio(double smallest_ratio, int x, int y, int z, const char *what, bool DoIo) { 32 | double current_ratio = std::min(std::min(x, y), z) / double(std::max(std::max(x, y), z)); 33 | 34 | if (current_ratio < smallest_ratio) { // ratio of the smallest to the largest 35 | if (DoIo) { 36 | HPCG_fout << "The " << what << " sizes (" << x << "," << y << "," << z << 37 | ") are invalid because the ratio min(x,y,z)/max(x,y,z)=" << current_ratio << 38 | " is too small (at least " << smallest_ratio << " is required)." << std::endl; 39 | HPCG_fout << "The shape should resemble a 3D cube. Please adjust and try again." << std::endl; 40 | HPCG_fout.flush(); 41 | } 42 | 43 | #ifndef HPCG_NO_MPI 44 | MPI_Abort(MPI_COMM_WORLD, 127); 45 | #endif 46 | 47 | return 127; 48 | } 49 | 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /src/CheckAspectRatio.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef CHECKASPECTRATIO_HPP 16 | #define CHECKASPECTRATIO_HPP 17 | extern int CheckAspectRatio(double smallest_ratio, int x, int y, int z, const char *what, bool DoIo); 18 | #endif // CHECKASPECTRATIO_HPP 19 | 20 | -------------------------------------------------------------------------------- /src/CheckProblem.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef CHECKPROBLEM_HPP 16 | #define CHECKPROBLEM_HPP 17 | #include "SparseMatrix.hpp" 18 | #include "Vector.hpp" 19 | 20 | void CheckProblem(SparseMatrix & A, Vector * b, Vector * x, Vector * xexact); 21 | #endif // CHECKPROBLEM_HPP 22 | -------------------------------------------------------------------------------- /src/ComputeDotProduct.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTEDOTPRODUCT_HPP 16 | #define COMPUTEDOTPRODUCT_HPP 17 | #include "Vector.hpp" 18 | int ComputeDotProduct(const local_int_t n, const Vector & x, const Vector & y, 19 | double & result, double & time_allreduce, bool & isOptimized); 20 | 21 | #endif // COMPUTEDOTPRODUCT_HPP 22 | -------------------------------------------------------------------------------- /src/ComputeDotProduct_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeDotProduct_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #ifndef HPCG_NO_MPI 22 | #include 23 | #include "mytimer.hpp" 24 | #endif 25 | #ifndef HPCG_NO_OPENMP 26 | #include 27 | #endif 28 | #include 29 | #include "ComputeDotProduct_ref.hpp" 30 | 31 | /*! 32 | Routine to compute the dot product of two vectors where: 33 | 34 | This is the reference dot-product implementation. It _CANNOT_ be modified for the 35 | purposes of this benchmark. 36 | 37 | @param[in] n the number of vector elements (on this processor) 38 | @param[in] x, y the input vectors 39 | @param[in] result a pointer to scalar value, on exit will contain result. 40 | @param[out] time_allreduce the time it took to perform the communication between processes 41 | 42 | @return returns 0 upon success and non-zero otherwise 43 | 44 | @see ComputeDotProduct 45 | */ 46 | int ComputeDotProduct_ref(const local_int_t n, const Vector & x, const Vector & y, 47 | double & result, double & time_allreduce) { 48 | assert(x.localLength>=n); // Test vector lengths 49 | assert(y.localLength>=n); 50 | 51 | double local_result = 0.0; 52 | double * xv = x.values; 53 | double * yv = y.values; 54 | if (yv==xv) { 55 | #ifndef HPCG_NO_OPENMP 56 | #pragma omp parallel for reduction (+:local_result) 57 | #endif 58 | for (local_int_t i=0; inumberOfPresmootherSteps; 73 | 74 | for(int i = 1; i < numberOfPresmootherSteps; ++i) 75 | { 76 | RETURN_IF_HPCG_ERROR(ComputeSYMGS(A, r, x)); 77 | } 78 | 79 | #ifndef HPCG_REFERENCE 80 | RETURN_IF_HPCG_ERROR(ComputeFusedSpMVRestriction(A, r, x)); 81 | #else 82 | RETURN_IF_HPCG_ERROR(ComputeSPMV(A, x, *A.mgData->Axf)); 83 | RETURN_IF_HPCG_ERROR(ComputeRestriction(A, r)); 84 | #endif 85 | 86 | RETURN_IF_HPCG_ERROR(ComputeMG(*A.Ac, *A.mgData->rc, *A.mgData->xc)); 87 | RETURN_IF_HPCG_ERROR(ComputeProlongation(A, x)); 88 | 89 | int numberOfPostsmootherSteps = A.mgData->numberOfPostsmootherSteps; 90 | 91 | for(int i = 0; i < numberOfPostsmootherSteps; ++i) 92 | { 93 | RETURN_IF_HPCG_ERROR(ComputeSYMGS(A, r, x)); 94 | } 95 | } 96 | else 97 | { 98 | RETURN_IF_HPCG_ERROR(ComputeSYMGSZeroGuess(A, r, x)); 99 | } 100 | 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /src/ComputeMG.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTEMG_HPP 16 | #define COMPUTEMG_HPP 17 | #include "SparseMatrix.hpp" 18 | #include "Vector.hpp" 19 | 20 | int ComputeMG(const SparseMatrix & A, const Vector & r, Vector & x); 21 | 22 | #endif // COMPUTEMG_HPP 23 | -------------------------------------------------------------------------------- /src/ComputeMG_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeSYMGS_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #include "ComputeMG_ref.hpp" 22 | #include "ComputeSYMGS_ref.hpp" 23 | #include "ComputeSPMV_ref.hpp" 24 | #include "ComputeRestriction_ref.hpp" 25 | #include "ComputeProlongation_ref.hpp" 26 | #include 27 | #include 28 | 29 | /*! 30 | 31 | @param[in] A the known system matrix 32 | @param[in] r the input vector 33 | @param[inout] x On exit contains the result of the multigrid V-cycle with r as the RHS, x is the approximation to Ax = r. 34 | 35 | @return returns 0 upon success and non-zero otherwise 36 | 37 | @see ComputeMG 38 | */ 39 | int ComputeMG_ref(const SparseMatrix & A, const Vector & r, Vector & x) { 40 | assert(x.localLength==A.localNumberOfColumns); // Make sure x contain space for halo values 41 | 42 | ZeroVector(x); // initialize x to zero 43 | 44 | int ierr = 0; 45 | if (A.mgData!=0) { // Go to next coarse level if defined 46 | int numberOfPresmootherSteps = A.mgData->numberOfPresmootherSteps; 47 | for (int i=0; i< numberOfPresmootherSteps; ++i) ierr += ComputeSYMGS_ref(A, r, x); 48 | if (ierr!=0) return ierr; 49 | ierr = ComputeSPMV_ref(A, x, *A.mgData->Axf); if (ierr!=0) return ierr; 50 | // Perform restriction operation using simple injection 51 | ierr = ComputeRestriction_ref(A, r); if (ierr!=0) return ierr; 52 | ierr = ComputeMG_ref(*A.Ac,*A.mgData->rc, *A.mgData->xc); if (ierr!=0) return ierr; 53 | ierr = ComputeProlongation_ref(A, x); if (ierr!=0) return ierr; 54 | int numberOfPostsmootherSteps = A.mgData->numberOfPostsmootherSteps; 55 | for (int i=0; i< numberOfPostsmootherSteps; ++i) ierr += ComputeSYMGS_ref(A, r, x); 56 | if (ierr!=0) return ierr; 57 | } 58 | else { 59 | ierr = ComputeSYMGS_ref(A, r, x); 60 | if (ierr!=0) return ierr; 61 | } 62 | return 0; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /src/ComputeMG_ref.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTEMG_REF_HPP 16 | #define COMPUTEMG_REF_HPP 17 | #include "SparseMatrix.hpp" 18 | #include "Vector.hpp" 19 | 20 | int ComputeMG_ref(const SparseMatrix & A, const Vector & r, Vector & x); 21 | 22 | #endif // COMPUTEMG_REF_HPP 23 | -------------------------------------------------------------------------------- /src/ComputeOptimalShapeXYZ.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #ifdef HPCG_CUBIC_RADICAL_SEARCH 6 | #include 7 | #endif 8 | #include 9 | 10 | #include "ComputeOptimalShapeXYZ.hpp" 11 | #include "MixedBaseCounter.hpp" 12 | 13 | #ifdef HPCG_CUBIC_RADICAL_SEARCH 14 | static int 15 | min3(int a, int b, int c) { 16 | return std::min(a, std::min(b, c)); 17 | } 18 | 19 | static int 20 | max3(int a, int b, int c) { 21 | return std::max(a, std::max(b, c)); 22 | } 23 | 24 | static void 25 | cubic_radical_search(int n, int & x, int & y, int & z) { 26 | double best = 0.0; 27 | 28 | for (int f1 = (int)(pow(n,1.0/3.0)+0.5); f1 > 0; --f1) 29 | if (n % f1 == 0) { 30 | int n1 = n/f1; 31 | for (int f2 = (int)(pow(n1,0.5)+0.5); f2 > 0; --f2) 32 | if (n1 % f2 == 0) { 33 | int f3 = n1 / f2; 34 | double current = (double)min3(f1, f2, f3)/max3(f1, f2, f3); 35 | if (current > best) { 36 | best = current; 37 | x = f1; 38 | y = f2; 39 | z = f3; 40 | } 41 | } 42 | } 43 | } 44 | 45 | #else 46 | 47 | static void 48 | ComputePrimeFactors(int n, std::map & factors) { 49 | int d, sq = int((sqrt(double(n)))+1L); 50 | div_t r; 51 | 52 | // remove 2 as a factor with shifts instead "/" and "%" 53 | for (; n > 1 && (n & 1) == 0; n >>= 1) { 54 | factors[2]++; 55 | } 56 | 57 | // keep removing subsequent odd numbers 58 | for (d = 3; d <= sq; d += 2) { 59 | while (1) { 60 | r = div(n, d); 61 | if (r.rem == 0) { 62 | factors[d]++; 63 | n = r.quot; 64 | continue; 65 | } 66 | break; 67 | } 68 | } 69 | if (n > 1 || factors.size() == 0) // left with a prime or x==1 70 | factors[n]++; 71 | } 72 | 73 | static int 74 | pow_i(int x, int p) { 75 | int v; 76 | 77 | if (0 == x || 1 == x) return x; 78 | 79 | if (p < 0) 80 | return 0; 81 | 82 | for (v = 1; p; p >>= 1) { 83 | if (1 & p) 84 | v *= x; 85 | x *= x; 86 | } 87 | 88 | return v; 89 | } 90 | 91 | #endif 92 | 93 | void 94 | ComputeOptimalShapeXYZ(int xyz, int & x, int & y, int & z) { 95 | #ifdef HPCG_CUBIC_RADICAL_SEARCH 96 | cubic_radical_search( xyz, x, y, z); 97 | #else 98 | std::map factors; 99 | 100 | ComputePrimeFactors( xyz, factors ); // factors are sorted: ascending order 101 | 102 | std::map::iterator iter = factors.begin(); 103 | 104 | // there is at least one prime factor 105 | x = (iter++)->first; // cache the first factor, move to the next one 106 | 107 | y = iter != factors.end() ? (iter++)->first : y; // try to cache the second factor in "y" 108 | 109 | if (factors.size() == 1) { // only a single factor 110 | z = pow_i(x, factors[x] / 3); 111 | y = pow_i(x, factors[x] / 3 + ((factors[x] % 3) >= 2 ? 1 : 0)); 112 | x = pow_i(x, factors[x] / 3 + ((factors[x] % 3) >= 1 ? 1 : 0)); 113 | 114 | } else if (factors.size() == 2 && factors[x] == 1 && factors[y] == 1) { // two distinct prime factors 115 | z = 1; 116 | 117 | } else if (factors.size() == 2 && factors[x] + factors[y] == 3) { // three prime factors, one repeated 118 | z = factors[x] == 2 ? x : y; // test which factor is repeated 119 | 120 | } else if (factors.size() == 3 && factors[x] == 1 && factors[y] == 1 && iter->second == 1) { // three distinct and single prime factors 121 | z = iter->first; 122 | 123 | } else { // 3 or more prime factors so try all possible 3-subsets 124 | 125 | int i, distinct_factors[32+1], count_factors[32+1]; 126 | 127 | i = 0; 128 | for (std::map::iterator iter = factors.begin(); iter != factors.end(); ++iter, ++i) { 129 | distinct_factors[i] = iter->first; 130 | count_factors[i] = iter->second; 131 | } 132 | 133 | // count total number of prime factors in "c_main" and distribute some factors into "c1" 134 | MixedBaseCounter c_main(count_factors, factors.size()), c1(count_factors, factors.size()); 135 | 136 | // at the beginning, minimum area is the maximum area 137 | double area, min_area = 2.0 * xyz + 1.0; 138 | 139 | for (c1.next(); ! c1.is_zero(); c1.next()) { 140 | MixedBaseCounter c2(c_main, c1); // "c2" gets the factors remaining in "c_main" that "c1" doesn't have 141 | for (c2.next(); ! c2.is_zero(); c2.next()) { 142 | int tf1 = c1.product(distinct_factors); 143 | int tf2 = c2.product(distinct_factors); 144 | int tf3 = xyz / tf1/ tf2; // we derive the third dimension, we don't keep track of the factors it has 145 | 146 | area = tf1 * double(tf2) + tf2 * double(tf3) + tf1 * double(tf3); 147 | if (area < min_area) { 148 | min_area = area; 149 | x = tf1; 150 | y = tf2; 151 | z = tf3; 152 | } 153 | } 154 | } 155 | } 156 | #endif 157 | } 158 | -------------------------------------------------------------------------------- /src/ComputeOptimalShapeXYZ.hpp: -------------------------------------------------------------------------------- 1 | 2 | void ComputeOptimalShapeXYZ(int xyz, int & x, int & y, int & z); 3 | -------------------------------------------------------------------------------- /src/ComputeProlongation.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019-2021 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | /*! 30 | @file ComputeProlongation.cpp 31 | 32 | HPCG routine 33 | */ 34 | 35 | #include "ComputeProlongation.hpp" 36 | 37 | #include 38 | 39 | template 40 | __launch_bounds__(BLOCKSIZE) 41 | __global__ void kernel_prolongation(local_int_t size, 42 | const local_int_t* __restrict__ f2cOperator, 43 | const double* __restrict__ coarse, 44 | double* __restrict__ fine, 45 | const local_int_t* __restrict__ perm_fine, 46 | const local_int_t* __restrict__ perm_coarse) 47 | { 48 | local_int_t idx_coarse = blockIdx.x * BLOCKSIZE + threadIdx.x; 49 | 50 | if(idx_coarse >= size) 51 | { 52 | return; 53 | } 54 | 55 | local_int_t idx_fine = __builtin_nontemporal_load(f2cOperator + idx_coarse); 56 | local_int_t idx_perm = __builtin_nontemporal_load(perm_coarse + idx_coarse); 57 | 58 | fine[perm_fine[idx_fine]] += coarse[idx_perm]; 59 | } 60 | 61 | /*! 62 | Routine to compute the coarse residual vector. 63 | 64 | @param[in] Af - Fine grid sparse matrix object containing pointers to current coarse grid correction and the f2c operator. 65 | @param[inout] xf - Fine grid solution vector, update with coarse grid correction. 66 | 67 | Note that the fine grid residual is never explicitly constructed. 68 | We only compute it for the fine grid points that will be injected into corresponding coarse grid points. 69 | 70 | @return Returns zero on success and a non-zero value otherwise. 71 | */ 72 | int ComputeProlongation(const SparseMatrix& Af, Vector& xf) 73 | { 74 | dim3 blocks((Af.mgData->rc->localLength - 1) / 128 + 1); 75 | dim3 threads(128); 76 | 77 | kernel_prolongation<128><<>>( 78 | Af.mgData->rc->localLength, 79 | Af.mgData->d_f2cOperator, 80 | Af.mgData->xc->d_values, 81 | xf.d_values, 82 | Af.perm, 83 | Af.Ac->perm); 84 | 85 | return 0; 86 | } 87 | -------------------------------------------------------------------------------- /src/ComputeProlongation.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | #ifndef COMPUTEPROLONGATION_HPP 30 | #define COMPUTEPROLONGATION_HPP 31 | 32 | #include "Vector.hpp" 33 | #include "SparseMatrix.hpp" 34 | 35 | int ComputeProlongation(const SparseMatrix& Af, Vector& xf); 36 | 37 | #endif // COMPUTEPROLONGATION_HPP 38 | -------------------------------------------------------------------------------- /src/ComputeProlongation_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeProlongation_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #ifndef HPCG_NO_OPENMP 22 | #include 23 | #endif 24 | 25 | #include "ComputeProlongation_ref.hpp" 26 | 27 | /*! 28 | Routine to compute the coarse residual vector. 29 | 30 | @param[in] Af - Fine grid sparse matrix object containing pointers to current coarse grid correction and the f2c operator. 31 | @param[inout] xf - Fine grid solution vector, update with coarse grid correction. 32 | 33 | Note that the fine grid residual is never explicitly constructed. 34 | We only compute it for the fine grid points that will be injected into corresponding coarse grid points. 35 | 36 | @return Returns zero on success and a non-zero value otherwise. 37 | */ 38 | int ComputeProlongation_ref(const SparseMatrix & Af, Vector & xf) { 39 | 40 | double * xfv = xf.values; 41 | double * xcv = Af.mgData->xc->values; 42 | local_int_t * f2c = Af.mgData->f2cOperator; 43 | local_int_t nc = Af.mgData->rc->localLength; 44 | 45 | #ifndef HPCG_NO_OPENMP 46 | #pragma omp parallel for 47 | #endif 48 | // TODO: Somehow note that this loop can be safely vectorized since f2c has no repeated indices 49 | for (local_int_t i=0; i 50 | #endif 51 | 52 | #include "ComputeResidual.hpp" 53 | 54 | #include 55 | 56 | #ifdef OPT_ROCTX 57 | #include 58 | #endif 59 | 60 | template 61 | __device__ void reduce_max(local_int_t tid, double* data) 62 | { 63 | __syncthreads(); 64 | 65 | if(BLOCKSIZE > 512) { if(tid < 512 && tid + 512 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 512]); } __syncthreads(); } 66 | if(BLOCKSIZE > 256) { if(tid < 256 && tid + 256 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 256]); } __syncthreads(); } 67 | if(BLOCKSIZE > 128) { if(tid < 128 && tid + 128 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 128]); } __syncthreads(); } 68 | if(BLOCKSIZE > 64) { if(tid < 64 && tid + 64 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 64]); } __syncthreads(); } 69 | if(BLOCKSIZE > 32) { if(tid < 32 && tid + 32 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 32]); } __syncthreads(); } 70 | if(BLOCKSIZE > 16) { if(tid < 16 && tid + 16 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 16]); } __syncthreads(); } 71 | if(BLOCKSIZE > 8) { if(tid < 8 && tid + 8 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 8]); } __syncthreads(); } 72 | if(BLOCKSIZE > 4) { if(tid < 4 && tid + 4 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 4]); } __syncthreads(); } 73 | if(BLOCKSIZE > 2) { if(tid < 2 && tid + 2 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 2]); } __syncthreads(); } 74 | if(BLOCKSIZE > 1) { if(tid < 1 && tid + 1 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 1]); } __syncthreads(); } 75 | } 76 | 77 | template 78 | __launch_bounds__(BLOCKSIZE) 79 | __global__ void kernel_residual_part1(local_int_t n, 80 | const double* __restrict__ v1, 81 | const double* __restrict__ v2, 82 | double* __restrict__ workspace) 83 | { 84 | local_int_t tid = threadIdx.x; 85 | local_int_t gid = blockIdx.x * BLOCKSIZE + tid; 86 | local_int_t inc = gridDim.x * BLOCKSIZE; 87 | 88 | __shared__ double sdata[BLOCKSIZE]; 89 | sdata[tid] = 0.0; 90 | 91 | for(local_int_t idx = gid; idx < n; idx += inc) 92 | { 93 | sdata[tid] = max(sdata[tid], fabs(v1[idx] - v2[idx])); 94 | } 95 | 96 | reduce_max(tid, sdata); 97 | 98 | if(tid == 0) 99 | { 100 | workspace[blockIdx.x] = sdata[0]; 101 | } 102 | } 103 | 104 | template 105 | __launch_bounds__(BLOCKSIZE) 106 | __global__ void kernel_residual_part2(double* workspace) 107 | { 108 | __shared__ double sdata[BLOCKSIZE]; 109 | sdata[threadIdx.x] = workspace[threadIdx.x]; 110 | 111 | __syncthreads(); 112 | 113 | reduce_max(threadIdx.x, sdata); 114 | 115 | if(threadIdx.x == 0) 116 | { 117 | workspace[0] = sdata[0]; 118 | } 119 | } 120 | 121 | int ComputeResidual(local_int_t n, const Vector& v1, const Vector& v2, double& residual) 122 | { 123 | double* tmp = reinterpret_cast(workspace); 124 | 125 | kernel_residual_part1<256><<<256, 256, 0, stream_interior>>>(n, 126 | v1.d_values, 127 | v2.d_values, 128 | tmp); 129 | kernel_residual_part2<256><<<1, 256, 0, stream_interior>>>(tmp); 130 | 131 | double local_residual; 132 | HIP_CHECK(hipMemcpyAsync(&local_residual, tmp, sizeof(double), hipMemcpyDeviceToHost, stream_interior)); 133 | HIP_CHECK(hipStreamSynchronize(stream_interior)); 134 | 135 | #ifndef HPCG_NO_MPI 136 | double global_residual = 0.0; 137 | 138 | #ifdef OPT_ROCTX 139 | roctxRangePush("MPI AllReduce"); 140 | #endif 141 | MPI_Allreduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); 142 | #ifdef OPT_ROCTX 143 | roctxRangePop(); 144 | #endif 145 | 146 | residual = global_residual; 147 | #else 148 | residual = local_residual; 149 | #endif 150 | 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /src/ComputeResidual.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTERESIDUAL_HPP 16 | #define COMPUTERESIDUAL_HPP 17 | #include "Vector.hpp" 18 | int ComputeResidual(const local_int_t n, const Vector & v1, const Vector & v2, double & residual); 19 | #endif // COMPUTERESIDUAL_HPP 20 | -------------------------------------------------------------------------------- /src/ComputeResidual_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeResidual_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | #ifndef HPCG_NO_MPI 21 | #include 22 | #endif 23 | #ifndef HPCG_NO_OPENMP 24 | #include 25 | #endif 26 | 27 | #include "Vector.hpp" 28 | 29 | #ifdef HPCG_DETAILED_DEBUG 30 | #include 31 | #include "hpcg.hpp" 32 | #endif 33 | 34 | #include // needed for fabs 35 | #include "ComputeResidual_ref.hpp" 36 | #ifdef HPCG_DETAILED_DEBUG 37 | #include 38 | #endif 39 | 40 | /*! 41 | Routine to compute the inf-norm difference between two vectors where: 42 | 43 | @param[in] n number of vector elements (local to this processor) 44 | @param[in] v1, v2 input vectors 45 | @param[out] residual pointer to scalar value; on exit, will contain result: inf-norm difference 46 | 47 | @return Returns zero on success and a non-zero value otherwise. 48 | */ 49 | int ComputeResidual_ref(const local_int_t n, const Vector & v1, const Vector & v2, double & residual) { 50 | 51 | double * v1v = v1.values; 52 | double * v2v = v2.values; 53 | double local_residual = 0.0; 54 | 55 | #ifndef HPCG_NO_OPENMP 56 | #pragma omp parallel default(none) shared(local_residual, v1v, v2v) 57 | { 58 | double threadlocal_residual = 0.0; 59 | #pragma omp for 60 | for (local_int_t i=0; i threadlocal_residual) threadlocal_residual = diff; 63 | } 64 | #pragma omp critical 65 | { 66 | if (threadlocal_residual>local_residual) local_residual = threadlocal_residual; 67 | } 68 | } 69 | #else // No threading 70 | for (local_int_t i=0; i local_residual) local_residual = diff; 73 | #ifdef HPCG_DETAILED_DEBUG 74 | HPCG_fout << " Computed, exact, diff = " << v1v[i] << " " << v2v[i] << " " << diff << std::endl; 75 | #endif 76 | } 77 | #endif 78 | 79 | #ifndef HPCG_NO_MPI 80 | // Use MPI's reduce function to collect all partial sums 81 | double global_residual = 0; 82 | MPI_Allreduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); 83 | residual = global_residual; 84 | #else 85 | residual = local_residual; 86 | #endif 87 | 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /src/ComputeResidual_ref.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTERESIDUAL_REF_HPP 16 | #define COMPUTERESIDUAL_REF_HPP 17 | #include "Vector.hpp" 18 | int ComputeResidual_ref(const local_int_t n, const Vector & v1, const Vector & v2, double & residual); 19 | #endif // COMPUTERESIDUAL_REF_HPP 20 | -------------------------------------------------------------------------------- /src/ComputeRestriction.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | #ifndef COMPUTERESTRICTION_HPP 30 | #define COMPUTERESTRICTION_HPP 31 | 32 | #include "Vector.hpp" 33 | #include "SparseMatrix.hpp" 34 | 35 | int ComputeRestriction(const SparseMatrix& A, const Vector& rf); 36 | int ComputeFusedSpMVRestriction(const SparseMatrix& A, const Vector& rf, Vector& xf); 37 | 38 | #endif // COMPUTERESTRICTION_HPP 39 | -------------------------------------------------------------------------------- /src/ComputeRestriction_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeRestriction_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | 22 | #ifndef HPCG_NO_OPENMP 23 | #include 24 | #endif 25 | 26 | #include "ComputeRestriction_ref.hpp" 27 | 28 | /*! 29 | Routine to compute the coarse residual vector. 30 | 31 | @param[inout] A - Sparse matrix object containing pointers to mgData->Axf, the fine grid matrix-vector product and mgData->rc the coarse residual vector. 32 | @param[in] rf - Fine grid RHS. 33 | 34 | 35 | Note that the fine grid residual is never explicitly constructed. 36 | We only compute it for the fine grid points that will be injected into corresponding coarse grid points. 37 | 38 | @return Returns zero on success and a non-zero value otherwise. 39 | */ 40 | int ComputeRestriction_ref(const SparseMatrix & A, const Vector & rf) { 41 | 42 | double * Axfv = A.mgData->Axf->values; 43 | double * rfv = rf.values; 44 | double * rcv = A.mgData->rc->values; 45 | local_int_t * f2c = A.mgData->f2cOperator; 46 | local_int_t nc = A.mgData->rc->localLength; 47 | 48 | #ifndef HPCG_NO_OPENMP 49 | #pragma omp parallel for 50 | #endif 51 | for (local_int_t i=0; i 29 | #endif 30 | #include 31 | 32 | /*! 33 | Routine to compute matrix vector product y = Ax where: 34 | Precondition: First call exchange_externals to get off-processor values of x 35 | 36 | This is the reference SPMV implementation. It CANNOT be modified for the 37 | purposes of this benchmark. 38 | 39 | @param[in] A the known system matrix 40 | @param[in] x the known vector 41 | @param[out] y the On exit contains the result: Ax. 42 | 43 | @return returns 0 upon success and non-zero otherwise 44 | 45 | @see ComputeSPMV 46 | */ 47 | int ComputeSPMV_ref( const SparseMatrix & A, Vector & x, Vector & y) { 48 | 49 | assert(x.localLength>=A.localNumberOfColumns); // Test vector lengths 50 | assert(y.localLength>=A.localNumberOfRows); 51 | 52 | #ifndef HPCG_NO_MPI 53 | ExchangeHalo(A,x); 54 | #endif 55 | const double * const xv = x.values; 56 | double * const yv = y.values; 57 | const local_int_t nrow = A.localNumberOfRows; 58 | #ifndef HPCG_NO_OPENMP 59 | #pragma omp parallel for 60 | #endif 61 | for (local_int_t i=0; i< nrow; i++) { 62 | double sum = 0.0; 63 | const double * const cur_vals = A.matrixValues[i]; 64 | const local_int_t * const cur_inds = A.mtxIndL[i]; 65 | const int cur_nnz = A.nonzerosInRow[i]; 66 | 67 | for (int j=0; j< cur_nnz; j++) 68 | sum += cur_vals[j]*xv[cur_inds[j]]; 69 | yv[i] = sum; 70 | } 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /src/ComputeSPMV_ref.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTESPMV_REF_HPP 16 | #define COMPUTESPMV_REF_HPP 17 | #include "Vector.hpp" 18 | #include "SparseMatrix.hpp" 19 | 20 | int ComputeSPMV_ref( const SparseMatrix & A, Vector & x, Vector & y); 21 | 22 | #endif // COMPUTESPMV_REF_HPP 23 | -------------------------------------------------------------------------------- /src/ComputeSYMGS.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #ifndef COMPUTESYMGS_HPP 44 | #define COMPUTESYMGS_HPP 45 | 46 | #include "SparseMatrix.hpp" 47 | #include "Vector.hpp" 48 | 49 | int ComputeSYMGS(const SparseMatrix & A, const Vector& r, Vector& x); 50 | int ComputeSYMGSZeroGuess(const SparseMatrix & A, const Vector& r, Vector& x); 51 | 52 | #endif // COMPUTESYMGS_HPP 53 | -------------------------------------------------------------------------------- /src/ComputeSYMGS_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeSYMGS_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #ifndef HPCG_NO_MPI 22 | #include "ExchangeHalo.hpp" 23 | #endif 24 | #include "ComputeSYMGS_ref.hpp" 25 | #include 26 | 27 | /*! 28 | Computes one step of symmetric Gauss-Seidel: 29 | 30 | Assumption about the structure of matrix A: 31 | - Each row 'i' of the matrix has nonzero diagonal value whose address is matrixDiagonal[i] 32 | - Entries in row 'i' are ordered such that: 33 | - lower triangular terms are stored before the diagonal element. 34 | - upper triangular terms are stored after the diagonal element. 35 | - No other assumptions are made about entry ordering. 36 | 37 | Symmetric Gauss-Seidel notes: 38 | - We use the input vector x as the RHS and start with an initial guess for y of all zeros. 39 | - We perform one forward sweep. x should be initially zero on the first GS sweep, but we do not attempt to exploit this fact. 40 | - We then perform one back sweep. 41 | - For simplicity we include the diagonal contribution in the for-j loop, then correct the sum after 42 | 43 | @param[in] A the known system matrix 44 | @param[in] r the input vector 45 | @param[inout] x On entry, x should contain relevant values, on exit x contains the result of one symmetric GS sweep with r as the RHS. 46 | 47 | 48 | @warning Early versions of this kernel (Version 1.1 and earlier) had the r and x arguments in reverse order, and out of sync with other kernels. 49 | 50 | @return returns 0 upon success and non-zero otherwise 51 | 52 | @see ComputeSYMGS 53 | */ 54 | int ComputeSYMGS_ref( const SparseMatrix & A, const Vector & r, Vector & x) { 55 | 56 | assert(x.localLength==A.localNumberOfColumns); // Make sure x contain space for halo values 57 | 58 | #ifndef HPCG_NO_MPI 59 | ExchangeHalo(A,x); 60 | #endif 61 | 62 | const local_int_t nrow = A.localNumberOfRows; 63 | double ** matrixDiagonal = A.matrixDiagonal; // An array of pointers to the diagonal entries A.matrixValues 64 | const double * const rv = r.values; 65 | double * const xv = x.values; 66 | 67 | for (local_int_t i=0; i< nrow; i++) { 68 | const double * const currentValues = A.matrixValues[i]; 69 | const local_int_t * const currentColIndices = A.mtxIndL[i]; 70 | const int currentNumberOfNonzeros = A.nonzerosInRow[i]; 71 | const double currentDiagonal = matrixDiagonal[i][0]; // Current diagonal value 72 | double sum = rv[i]; // RHS value 73 | 74 | for (int j=0; j< currentNumberOfNonzeros; j++) { 75 | local_int_t curCol = currentColIndices[j]; 76 | sum -= currentValues[j] * xv[curCol]; 77 | } 78 | sum += xv[i]*currentDiagonal; // Remove diagonal contribution from previous loop 79 | 80 | xv[i] = sum/currentDiagonal; 81 | 82 | } 83 | 84 | // Now the back sweep. 85 | 86 | for (local_int_t i=nrow-1; i>=0; i--) { 87 | const double * const currentValues = A.matrixValues[i]; 88 | const local_int_t * const currentColIndices = A.mtxIndL[i]; 89 | const int currentNumberOfNonzeros = A.nonzerosInRow[i]; 90 | const double currentDiagonal = matrixDiagonal[i][0]; // Current diagonal value 91 | double sum = rv[i]; // RHS value 92 | 93 | for (int j = 0; j< currentNumberOfNonzeros; j++) { 94 | local_int_t curCol = currentColIndices[j]; 95 | sum -= currentValues[j]*xv[curCol]; 96 | } 97 | sum += xv[i]*currentDiagonal; // Remove diagonal contribution from previous loop 98 | 99 | xv[i] = sum/currentDiagonal; 100 | } 101 | 102 | return 0; 103 | } 104 | 105 | -------------------------------------------------------------------------------- /src/ComputeSYMGS_ref.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef COMPUTESYMGS_REF_HPP 16 | #define COMPUTESYMGS_REF_HPP 17 | #include "SparseMatrix.hpp" 18 | #include "Vector.hpp" 19 | 20 | int ComputeSYMGS_ref( const SparseMatrix & A, const Vector & r, Vector & x); 21 | 22 | #endif // COMPUTESYMGS_REF_HPP 23 | -------------------------------------------------------------------------------- /src/ComputeWAXPBY.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #ifndef COMPUTEWAXPBY_HPP 44 | #define COMPUTEWAXPBY_HPP 45 | 46 | #include "Vector.hpp" 47 | 48 | int ComputeWAXPBY(local_int_t n, 49 | double alpha, 50 | const Vector& x, 51 | double beta, 52 | const Vector& y, 53 | Vector& w, 54 | bool& isOptimized); 55 | 56 | int ComputeFusedWAXPBYDot(local_int_t n, 57 | double alpha, 58 | const Vector& x, 59 | Vector& y, 60 | double& result, 61 | double& time_allreduce); 62 | 63 | #endif // COMPUTEWAXPBY_HPP 64 | -------------------------------------------------------------------------------- /src/ComputeWAXPBY_ref.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file ComputeWAXPBY_ref.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #include "ComputeWAXPBY_ref.hpp" 22 | #ifndef HPCG_NO_OPENMP 23 | #include 24 | #endif 25 | #include 26 | /*! 27 | Routine to compute the update of a vector with the sum of two 28 | scaled vectors where: w = alpha*x + beta*y 29 | 30 | This is the reference WAXPBY impmentation. It CANNOT be modified for the 31 | purposes of this benchmark. 32 | 33 | @param[in] n the number of vector elements (on this processor) 34 | @param[in] alpha, beta the scalars applied to x and y respectively. 35 | @param[in] x, y the input vectors 36 | @param[out] w the output vector. 37 | 38 | @return returns 0 upon success and non-zero otherwise 39 | 40 | @see ComputeWAXPBY 41 | */ 42 | int ComputeWAXPBY_ref(const local_int_t n, const double alpha, const Vector & x, 43 | const double beta, const Vector & y, Vector & w) { 44 | 45 | assert(x.localLength>=n); // Test vector lengths 46 | assert(y.localLength>=n); 47 | 48 | const double * const xv = x.values; 49 | const double * const yv = y.values; 50 | double * const wv = w.values; 51 | 52 | if (alpha==1.0) { 53 | #ifndef HPCG_NO_OPENMP 54 | #pragma omp parallel for 55 | #endif 56 | for (local_int_t i=0; i 22 | #include 23 | #include 24 | 25 | #include "ComputeOptimalShapeXYZ.hpp" 26 | #include "GenerateGeometry.hpp" 27 | 28 | #ifdef HPCG_DEBUG 29 | #include 30 | #include "hpcg.hpp" 31 | using std::endl; 32 | 33 | #endif 34 | 35 | /*! 36 | Computes the factorization of the total number of processes into a 37 | 3-dimensional process grid that is as close as possible to a cube. The 38 | quality of the factorization depends on the prime number structure of the 39 | total number of processes. It then stores this decompostion together with the 40 | parallel parameters of the run in the geometry data structure. 41 | 42 | @param[in] size total number of MPI processes 43 | @param[in] rank this process' rank among other MPI processes 44 | @param[in] numThreads number of OpenMP threads in this process 45 | @param[in] pz z-dimension processor ID where second zone of nz values start 46 | @param[in] nx, ny, nz number of grid points for each local block in the x, y, and z dimensions, respectively 47 | @param[out] geom data structure that will store the above parameters and the factoring of total number of processes into three dimensions 48 | */ 49 | void GenerateGeometry(int size, int rank, int numThreads, 50 | int pz, local_int_t zl, local_int_t zu, 51 | local_int_t nx, local_int_t ny, local_int_t nz, 52 | int npx, int npy, int npz, 53 | Geometry * geom) 54 | { 55 | 56 | if (npx * npy * npz <= 0 || npx * npy * npz > size) 57 | ComputeOptimalShapeXYZ( size, npx, npy, npz ); 58 | 59 | int * partz_ids = 0; 60 | local_int_t * partz_nz = 0; 61 | int npartz = 0; 62 | if (pz==0) { // No variation in nz sizes 63 | npartz = 1; 64 | partz_ids = new int[1]; 65 | partz_nz = new local_int_t[1]; 66 | partz_ids[0] = npz; 67 | partz_nz[0] = nz; 68 | } 69 | else { 70 | npartz = 2; 71 | partz_ids = new int[2]; 72 | partz_ids[0] = pz; 73 | partz_ids[1] = npz; 74 | partz_nz = new local_int_t[2]; 75 | partz_nz[0] = zl; 76 | partz_nz[1] = zu; 77 | } 78 | // partz_ids[npartz-1] = npz; // The last element of this array is always npz 79 | int ipartz_ids = 0; 80 | for (int i=0; i< npartz; ++i) { 81 | assert(ipartz_ids 2^31 28 | */ 29 | typedef int local_int_t; 30 | //typedef long long local_int_t; 31 | 32 | /*! 33 | This defines the type for integers that have global dimension 34 | 35 | Define as "long long" when global problem dimension is > 2^31 36 | */ 37 | //typedef int global_int_t; 38 | typedef long long global_int_t; 39 | 40 | // This macro should be defined if the global_int_t is not long long 41 | // in order to stop complaints from non-C++11 compliant compilers. 42 | //#define HPCG_NO_LONG_LONG 43 | 44 | /*! 45 | This is a data structure to contain all processor geometry information 46 | */ 47 | struct Geometry_STRUCT { 48 | int size; //!< Number of MPI processes 49 | int rank; //!< This process' rank in the range [0 to size - 1] 50 | int numThreads; //!< This process' number of threads 51 | local_int_t nx; //!< Number of x-direction grid points for each local subdomain 52 | local_int_t ny; //!< Number of y-direction grid points for each local subdomain 53 | local_int_t nz; //!< Number of z-direction grid points for each local subdomain 54 | int npx; //!< Number of processors in x-direction 55 | int npy; //!< Number of processors in y-direction 56 | int npz; //!< Number of processors in z-direction 57 | int pz; //!< partition ID of z-dimension process that starts the second region of nz values 58 | int npartz; //!< Number of partitions with varying nz values 59 | int * partz_ids; //!< Array of partition ids of processor in z-direction where new value of nz starts (valid values are 1 to npz) 60 | local_int_t * partz_nz; //!< Array of length npartz containing the nz values for each partition 61 | int ipx; //!< Current rank's x location in the npx by npy by npz processor grid 62 | int ipy; //!< Current rank's y location in the npx by npy by npz processor grid 63 | int ipz; //!< Current rank's z location in the npx by npy by npz processor grid 64 | global_int_t gnx; //!< Global number of x-direction grid points 65 | global_int_t gny; //!< Global number of y-direction grid points 66 | global_int_t gnz; //!< Global number of z-direction grid points 67 | global_int_t gix0; //!< Base global x index for this rank in the npx by npy by npz processor grid 68 | global_int_t giy0; //!< Base global y index for this rank in the npx by npy by npz processor grid 69 | global_int_t giz0; //!< Base global z index for this rank in the npx by npy by npz processor grid 70 | 71 | }; 72 | typedef struct Geometry_STRUCT Geometry; 73 | 74 | /*! 75 | Returns the rank of the MPI process that is assigned the global row index 76 | given as the input argument. 77 | 78 | @param[in] geom The description of the problem's geometry. 79 | @param[in] index The global row index 80 | 81 | @return Returns the MPI rank of the process assigned the row 82 | */ 83 | inline int ComputeRankOfMatrixRow(const Geometry & geom, global_int_t index) { 84 | global_int_t gnx = geom.gnx; 85 | global_int_t gny = geom.gny; 86 | 87 | global_int_t iz = index/(gny*gnx); 88 | global_int_t iy = (index-iz*gny*gnx)/gnx; 89 | global_int_t ix = index%gnx; 90 | // We now permit varying values for nz for any nx-by-ny plane of MPI processes. 91 | // npartz is the number of different groups of nx-by-ny groups of processes. 92 | // partz_ids is an array of length npartz where each value indicates the z process of the last process in the ith nx-by-ny group. 93 | // partz_nz is an array of length npartz containing the value of nz for the ith group. 94 | 95 | // With no variation, npartz = 1, partz_ids[0] = npz, partz_nz[0] = nz 96 | 97 | int ipz = 0; 98 | int ipartz_ids = 0; 99 | for (int i=0; i< geom.npartz; ++i) { 100 | int ipart_nz = geom.partz_nz[i]; 101 | ipartz_ids = geom.partz_ids[i] - ipartz_ids; 102 | if (iz<= ipart_nz*ipartz_ids) { 103 | ipz += iz/ipart_nz; 104 | break; 105 | } else { 106 | ipz += ipartz_ids; 107 | iz -= ipart_nz*ipartz_ids; 108 | } 109 | 110 | } 111 | // global_int_t ipz = iz/geom.nz; 112 | int ipy = iy/geom.ny; 113 | int ipx = ix/geom.nx; 114 | int rank = ipx+ipy*geom.npx+ipz*geom.npy*geom.npx; 115 | return rank; 116 | } 117 | 118 | 119 | /*! 120 | Destructor for geometry data. 121 | 122 | @param[inout] data the geometry data structure whose storage is deallocated 123 | */ 124 | inline void DeleteGeometry(Geometry & geom) { 125 | 126 | delete [] geom.partz_nz; 127 | delete [] geom.partz_ids; 128 | 129 | return; 130 | } 131 | 132 | 133 | 134 | #endif // GEOMETRY_HPP 135 | -------------------------------------------------------------------------------- /src/MGData.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | /*! 44 | @file MGData.hpp 45 | 46 | HPCG data structure 47 | */ 48 | 49 | #ifndef MGDATA_HPP 50 | #define MGDATA_HPP 51 | 52 | #include 53 | #include 54 | 55 | #include "utils.hpp" 56 | #include "SparseMatrix.hpp" 57 | #include "Vector.hpp" 58 | 59 | struct MGData_STRUCT { 60 | int numberOfPresmootherSteps; // Call ComputeSYMGS this many times prior to coarsening 61 | int numberOfPostsmootherSteps; // Call ComputeSYMGS this many times after coarsening 62 | local_int_t * f2cOperator; //!< 1D array containing the fine operator local IDs that will be injected into coarse space. 63 | Vector * rc; // coarse grid residual vector 64 | Vector * xc; // coarse grid solution vector 65 | Vector * Axf; // fine grid residual vector 66 | /*! 67 | This is for storing optimized data structres created in OptimizeProblem and 68 | used inside optimized ComputeSPMV(). 69 | */ 70 | void * optimizationData; 71 | 72 | local_int_t* d_f2cOperator; //!< f2cOperator on device 73 | local_int_t* d_c2fOperator; 74 | }; 75 | typedef struct MGData_STRUCT MGData; 76 | 77 | /*! 78 | Constructor for the data structure of CG vectors. 79 | 80 | @param[in] Ac - Fully-formed coarse matrix 81 | @param[in] f2cOperator - 82 | @param[out] data the data structure for CG vectors that will be allocated to get it ready for use in CG iterations 83 | */ 84 | inline void InitializeMGData(local_int_t* d_f2cOperator, local_int_t* d_c2fOperator, Vector* rc, Vector* xc, Vector* Axf, MGData & data) { 85 | data.numberOfPresmootherSteps = 1; 86 | data.numberOfPostsmootherSteps = 1; 87 | data.f2cOperator = nullptr; 88 | data.d_f2cOperator = d_f2cOperator; // Space for injection operator 89 | data.d_c2fOperator = d_c2fOperator; 90 | data.rc = rc; 91 | data.xc = xc; 92 | data.Axf = Axf; 93 | return; 94 | } 95 | 96 | /*! 97 | Destructor for the CG vectors data. 98 | 99 | @param[inout] data the MG data structure whose storage is deallocated 100 | */ 101 | inline void DeleteMGData(MGData & data) { 102 | 103 | if (data.f2cOperator) delete [] data.f2cOperator; 104 | DeleteVector(*data.Axf); 105 | DeleteVector(*data.rc); 106 | DeleteVector(*data.xc); 107 | #ifdef HPCG_REFERENCE 108 | HIPDeleteVector(*data.Axf); 109 | #endif 110 | HIPDeleteVector(*data.rc); 111 | HIPDeleteVector(*data.xc); 112 | delete data.Axf; 113 | delete data.rc; 114 | delete data.xc; 115 | 116 | HIP_CHECK(deviceFree(data.d_f2cOperator)); 117 | HIP_CHECK(deviceFree(data.d_c2fOperator)); 118 | 119 | return; 120 | } 121 | 122 | #endif // MGDATA_HPP 123 | 124 | -------------------------------------------------------------------------------- /src/Memory.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | /*! 30 | @file Memory.hpp 31 | 32 | Device memory management 33 | */ 34 | 35 | #ifndef MEMORY_HPP 36 | #define MEMORY_HPP 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #include "Geometry.hpp" 44 | 45 | struct hipMemObject_t 46 | { 47 | size_t size; 48 | char* address; 49 | }; 50 | 51 | class hipAllocator_t 52 | { 53 | public: 54 | 55 | hipAllocator_t(void); 56 | ~hipAllocator_t(void); 57 | 58 | hipError_t Initialize(int rank, 59 | int nprocs, 60 | local_int_t nx, 61 | local_int_t ny, 62 | local_int_t nz); 63 | hipError_t Clear(void); 64 | 65 | hipError_t Alloc(void** ptr, size_t size); 66 | hipError_t Realloc(void* ptr, size_t size); 67 | hipError_t Free(void* ptr); 68 | 69 | inline size_t GetFreeMemory(void) const { return this->free_mem_; } 70 | inline size_t GetUsedMemory(void) const { return this->used_mem_; } 71 | inline size_t GetTotalMemory(void) const { return this->total_mem_; } 72 | 73 | private: 74 | 75 | // Current rank 76 | int rank_; 77 | 78 | // Returns the maximum memory requirements 79 | size_t ComputeMaxMemoryRequirements_(int nprocs, 80 | local_int_t nx, 81 | local_int_t ny, 82 | local_int_t nz) const; 83 | 84 | // Total memory size 85 | size_t total_mem_; 86 | 87 | // Free memory size 88 | size_t free_mem_; 89 | 90 | // Used memory size 91 | size_t used_mem_; 92 | 93 | // Device memory buffer 94 | char* buffer_; 95 | 96 | // List to keep track of allocations 97 | std::list objects_; 98 | }; 99 | 100 | hipError_t deviceMalloc(void** ptr, size_t size); 101 | hipError_t deviceRealloc(void* ptr, size_t size); 102 | hipError_t deviceDefrag(void** ptr, size_t size); 103 | hipError_t deviceFree(void* ptr); 104 | 105 | #endif // MEMORY_HPP 106 | -------------------------------------------------------------------------------- /src/MixedBaseCounter.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include "MixedBaseCounter.hpp" 5 | 6 | MixedBaseCounter::MixedBaseCounter(int *counts, int length) { 7 | this->length = length; 8 | 9 | int i; 10 | 11 | for (i = 0; i < 32; ++i) { 12 | this->max_counts[i] = counts[i]; 13 | this->cur_counts[i] = 0; 14 | } 15 | // terminate with 0's 16 | this->max_counts[i] = this->cur_counts[i] = 0; 17 | this->max_counts[length] = this->cur_counts[length] = 0; 18 | } 19 | 20 | MixedBaseCounter::MixedBaseCounter(MixedBaseCounter & left, MixedBaseCounter & right) { 21 | this->length = left.length; 22 | for (int i = 0; i < left.length; ++i) { 23 | this->max_counts[i] = left.max_counts[i] - right.cur_counts[i]; 24 | this->cur_counts[i] = 0; 25 | } 26 | } 27 | 28 | void 29 | MixedBaseCounter::next() { 30 | for (int i = 0; i < this->length; ++i) { 31 | this->cur_counts[i]++; 32 | if (this->cur_counts[i] > this->max_counts[i]) { 33 | this->cur_counts[i] = 0; 34 | continue; 35 | } 36 | break; 37 | } 38 | } 39 | 40 | int 41 | MixedBaseCounter::is_zero() { 42 | for (int i = 0; i < this->length; ++i) 43 | if (this->cur_counts[i]) 44 | return 0; 45 | return 1; 46 | } 47 | 48 | int 49 | MixedBaseCounter::product(int * multipliers) { 50 | int k=0, x=1; 51 | 52 | for (int i = 0; i < this->length; ++i) 53 | for (int j = 0; j < this->cur_counts[i]; ++j) { 54 | k = 1; 55 | x *= multipliers[i]; 56 | } 57 | 58 | return x * k; 59 | } 60 | -------------------------------------------------------------------------------- /src/MixedBaseCounter.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | class MixedBaseCounter { 4 | private: 5 | int length; //!< number of prime factor counts (cannot exceed 32 for a 32-bit integer) 6 | int max_counts[32+1]; //!< maximum value for prime factor counts 7 | int cur_counts[32+1]; //!< current prime factor counts 8 | 9 | public: 10 | MixedBaseCounter(int *counts, int length); 11 | MixedBaseCounter(MixedBaseCounter & left, MixedBaseCounter & right); 12 | void next(); 13 | int is_zero(); 14 | int product(int * multipliers); 15 | }; 16 | -------------------------------------------------------------------------------- /src/MultiColoring.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | #ifndef MULTICOLORING_HPP 30 | #define MULTICOLORING_HPP 31 | 32 | #include "SparseMatrix.hpp" 33 | 34 | void JPLColoring(SparseMatrix& A); 35 | 36 | #endif // MULTICOLORING_HPP 37 | -------------------------------------------------------------------------------- /src/OptimizeProblem.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef OPTIMIZEPROBLEM_HPP 16 | #define OPTIMIZEPROBLEM_HPP 17 | 18 | #include "SparseMatrix.hpp" 19 | #include "Vector.hpp" 20 | #include "CGData.hpp" 21 | 22 | int OptimizeProblem(SparseMatrix & A, CGData & data, Vector & b, Vector & x, Vector & xexact); 23 | 24 | // This helper function should be implemented in a non-trivial way if OptimizeProblem is non-trivial 25 | // It should return as type double, the total number of bytes allocated and retained after calling OptimizeProblem. 26 | // This value will be used to report Gbytes used in ReportResults (the value returned will be divided by 1000000000.0). 27 | 28 | double OptimizeProblemMemoryUse(const SparseMatrix & A); 29 | 30 | #endif // OPTIMIZEPROBLEM_HPP 31 | -------------------------------------------------------------------------------- /src/OutputFile.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "OutputFile.hpp" 22 | 23 | using std::string; 24 | using std::stringstream; 25 | using std::list; 26 | using std::ofstream; 27 | 28 | OutputFile::OutputFile(const string & name_arg, const string & version_arg) 29 | : name(name_arg), version(version_arg), eol("\n"), keySeparator("::") {} 30 | 31 | OutputFile::OutputFile(void) : eol("\n"), keySeparator("::") {} 32 | 33 | OutputFile::~OutputFile() { 34 | for (list::iterator it = descendants.begin(); it != descendants.end(); ++it) { 35 | delete *it; 36 | } 37 | } 38 | 39 | void 40 | OutputFile::add(const string & key_arg, const string & value_arg) { 41 | descendants.push_back(allocKeyVal(key_arg, value_arg)); 42 | } 43 | 44 | void 45 | OutputFile::add(const string & key_arg, double value_arg) { 46 | stringstream ss; 47 | ss << value_arg; 48 | descendants.push_back(allocKeyVal(key_arg, ss.str())); 49 | } 50 | 51 | void 52 | OutputFile::add(const string & key_arg, int value_arg) { 53 | stringstream ss; 54 | ss << value_arg; 55 | descendants.push_back(allocKeyVal(key_arg, ss.str())); 56 | } 57 | 58 | #ifndef HPCG_NO_LONG_LONG 59 | 60 | void 61 | OutputFile::add(const string & key_arg, long long value_arg) { 62 | stringstream ss; 63 | ss << value_arg; 64 | descendants.push_back(allocKeyVal(key_arg, ss.str())); 65 | } 66 | 67 | #endif 68 | 69 | void 70 | OutputFile::add(const string & key_arg, size_t value_arg) { 71 | stringstream ss; 72 | ss << value_arg; 73 | descendants.push_back(allocKeyVal(key_arg, ss.str())); 74 | } 75 | 76 | void 77 | OutputFile::setKeyValue(const string & key_arg, const string & value_arg) { 78 | key = key_arg; 79 | value = value_arg; 80 | } 81 | 82 | OutputFile * 83 | OutputFile::get(const string & key_arg) { 84 | for (list::iterator it = descendants.begin(); it != descendants.end(); ++it) { 85 | if ((*it)->key == key_arg) 86 | return *it; 87 | } 88 | 89 | return 0; 90 | } 91 | 92 | string 93 | OutputFile::generateRecursive(string prefix) { 94 | string result = ""; 95 | 96 | result += prefix + key + "=" + value + eol; 97 | 98 | for (list::iterator it = descendants.begin(); it != descendants.end(); ++it) { 99 | result += (*it)->generateRecursive(prefix + key + keySeparator); 100 | } 101 | 102 | return result; 103 | } 104 | 105 | string 106 | OutputFile::generate(void) { 107 | string result = name + "\nversion=" + version + eol; 108 | 109 | for (list::iterator it = descendants.begin(); it != descendants.end(); ++it) { 110 | result += (*it)->generateRecursive(""); 111 | } 112 | 113 | time_t rawtime; 114 | time(&rawtime); 115 | tm * ptm = localtime(&rawtime); 116 | char sdate[256]; 117 | //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12 118 | sprintf (sdate,"%04d-%02d-%02d_%02d-%02d-%02d",ptm->tm_year + 1900, ptm->tm_mon+1, 119 | ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec); 120 | 121 | string filename = name + "_" + version + "_"; 122 | filename += string(sdate) + ".txt"; 123 | 124 | ofstream myfile(filename.c_str()); 125 | myfile << result; 126 | myfile.close(); 127 | 128 | return result; 129 | } 130 | 131 | OutputFile * OutputFile::allocKeyVal(const std::string & key_arg, const std::string & value_arg) { 132 | OutputFile * of = new OutputFile(); 133 | of->setKeyValue(key_arg, value_arg); 134 | return of; 135 | } 136 | -------------------------------------------------------------------------------- /src/OutputFile.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file Output_File.hpp 17 | 18 | HPCG output file classes 19 | */ 20 | 21 | #ifndef OUTPUTFILE_HPP 22 | #define OUTPUTFILE_HPP 23 | 24 | #include 25 | #include 26 | 27 | //! The OutputFile class for the uniform collecting and reporting of performance data for HPCG 28 | 29 | /*! 30 | 31 | The OutputFile class facilitates easy collecting and reporting of 32 | key-value-formatted data that can be then registered with the HPCG results 33 | collection website. The keys may have hierarchy key1::key2::key3=val with 34 | double colon :: as a separator. A sample output may look like this (note how 35 | "major" and "micro" keys repeat with different ancestor keys): 36 | 37 | \code 38 | 39 | version=3.2.1alpha 40 | version::major=3 41 | version::minor=2 42 | version::micro=1 43 | version::release=alpha 44 | axis=xyz 45 | axis::major=x 46 | axis::minor=y 47 | 48 | \endcode 49 | 50 | */ 51 | class OutputFile { 52 | protected: 53 | std::list descendants; //!< descendant elements 54 | std::string name; //!< name of the benchmark 55 | std::string version; //!< version of the benchmark 56 | std::string key; //!< the key under which the element is stored 57 | std::string value; //!< the value of the stored element 58 | std::string eol; //!< end-of-line character sequence in the output file 59 | std::string keySeparator; //!< character sequence to separate keys in the output file 60 | 61 | //! Recursively generate output string from descendant list, and their descendants and so on 62 | std::string generateRecursive(std::string prefix); 63 | 64 | public: 65 | static OutputFile * allocKeyVal(const std::string & key, const std::string & value); 66 | 67 | //! Constructor: accepts name and version as strings that are used to create a file name for printing results. 68 | /*! 69 | This constructor accepts and name and version number for the benchmark that 70 | are used to form a file name information for results that are generated by 71 | the generate() method. 72 | \param name (in) string containing name of the benchmark 73 | \param version (in) string containing the version of the benchmark 74 | */ 75 | OutputFile(const std::string & name, const std::string & version); 76 | 77 | //! Default constructor: no-arguments accepted, should be used for descendant nodes 78 | /*! 79 | This no-argument constructor can be used for descendant nodes to provide 80 | key1::key2::key3=val output. Unlike the root node, descendant nodes do not 81 | have name and version but only store key-value pairs. 82 | */ 83 | OutputFile(void); 84 | 85 | ~OutputFile(); 86 | 87 | //! Create and add a descendant element with value of type "string" 88 | /*! 89 | Create and add a descendant element identified by "key" and associated with 90 | "value". The element is added at the end of a list of previously added 91 | elements. 92 | 93 | @param[in] key The key that identifies the added element and under which the element is stored 94 | @param[in] value The value stored by the element 95 | */ 96 | void add(const std::string & key, const std::string & value); 97 | 98 | //! Create and add a descendant element with value of type "double" 99 | /*! 100 | Create and add a descendant element identified by "key" and associated with 101 | "value". The element is added at the end of a list of previously added 102 | elements. 103 | 104 | @param[in] key The key that identifies the added element and under which the element is stored 105 | @param[in] value The value stored by the element 106 | */ 107 | void add(const std::string & key, double value); 108 | 109 | //! Create and add a descendant element with value of type "int" 110 | /*! 111 | Create and add a descendant element identified by "key" and associated with 112 | "value". The element is added at the end of a list of previously added 113 | elements. 114 | 115 | @param[in] key The key that identifies the added element and under which the element is stored 116 | @param[in] value The value stored by the element 117 | */ 118 | void add(const std::string & key, int value); 119 | 120 | #ifndef HPCG_NO_LONG_LONG 121 | //! Create and add a descendant element with value of type "long long" 122 | /*! 123 | Create and add a descendant element identified by "key" and associated with 124 | "value". The element is added at the end of a list of previously added 125 | elements. 126 | 127 | @param[in] key The key that identifies the added element and under which the element is stored 128 | @param[in] value The value stored by the element 129 | */ 130 | void add(const std::string & key, long long value); 131 | #endif 132 | 133 | //! Create and add a descendant element with value of type "size_t" 134 | /*! 135 | Create and add a descendant element identified by "key" and associated with 136 | "value". The element is added at the end of a list of previously added 137 | elements. 138 | 139 | @param[in] key The key that identifies the added element and under which the element is stored 140 | @param[in] value The value stored by the element 141 | */ 142 | void add(const std::string & key, size_t value); 143 | 144 | //! Key-Value setter method 145 | /*! 146 | Set the key and the value of this element. 147 | 148 | @param[in] key The key that identifies this element and under which the element is stored 149 | @param[in] value The value stored by the element 150 | */ 151 | void setKeyValue(const std::string & key, const std::string & value); 152 | 153 | //! Get the element in the list with the given key or return NULL if not found 154 | OutputFile * get(const std::string & key); 155 | 156 | //! Generate output string with results based on the stored key-value hierarchy 157 | std::string generate(void); 158 | }; 159 | 160 | #endif // OUTPUTFILE_HPP 161 | -------------------------------------------------------------------------------- /src/Permute.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | #ifndef PERMUTE_HPP 30 | #define PERMUTE_HPP 31 | 32 | #include "SparseMatrix.hpp" 33 | 34 | void PermuteColumns(SparseMatrix& A); 35 | void PermuteRows(SparseMatrix& A); 36 | void PermuteVector(local_int_t size, Vector& v, const local_int_t* perm); 37 | 38 | #endif // PERMUTE_HPP 39 | -------------------------------------------------------------------------------- /src/ReadHpcgDat.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #include 16 | 17 | #include "ReadHpcgDat.hpp" 18 | 19 | static int 20 | SkipUntilEol(FILE *stream) { 21 | int chOrEof; 22 | bool finished; 23 | 24 | do { 25 | chOrEof = fgetc( stream ); 26 | finished = (chOrEof == EOF) || (chOrEof == '\n') || (chOrEof == '\r'); 27 | } while (! finished); 28 | 29 | if ('\r' == chOrEof) { // on Windows, \r might be followed by \n 30 | int chOrEofExtra = fgetc( stream ); 31 | 32 | if ('\n' == chOrEofExtra || EOF == chOrEofExtra) 33 | chOrEof = chOrEofExtra; 34 | else 35 | ungetc(chOrEofExtra, stream); 36 | } 37 | 38 | return chOrEof; 39 | } 40 | 41 | int 42 | ReadHpcgDat(int *localDimensions, int *secondsPerRun, int *localProcDimensions) { 43 | FILE * hpcgStream = fopen("hpcg.dat", "r"); 44 | 45 | if (! hpcgStream) 46 | return -1; 47 | 48 | SkipUntilEol(hpcgStream); // skip the first line 49 | 50 | SkipUntilEol(hpcgStream); // skip the second line 51 | 52 | for (int i = 0; i < 3; ++i) 53 | if (fscanf(hpcgStream, "%d", localDimensions+i) != 1 || localDimensions[i] < 16) 54 | localDimensions[i] = 16; 55 | 56 | SkipUntilEol( hpcgStream ); // skip the rest of the second line 57 | 58 | if (secondsPerRun!=0) { // Only read number of seconds if the pointer is non-zero 59 | if (fscanf(hpcgStream, "%d", secondsPerRun) != 1 || secondsPerRun[0] < 0) 60 | secondsPerRun[0] = 30 * 60; // 30 minutes 61 | } 62 | 63 | SkipUntilEol( hpcgStream ); // skip the rest of the third line 64 | 65 | for (int i = 0; i < 3; ++i) 66 | // the user didn't specify (or values are invalid) process dimensions 67 | if (fscanf(hpcgStream, "%d", localProcDimensions+i) != 1 || localProcDimensions[i] < 1) 68 | localProcDimensions[i] = 0; // value 0 means: "not specified" and it will be fixed later 69 | 70 | fclose(hpcgStream); 71 | 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /src/ReadHpcgDat.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef READHPCGDAT_HPP 16 | #define READHPCGDAT_HPP 17 | 18 | int ReadHpcgDat(int *localDimensions, int *secondsPerRun, int *localProcDimensions); 19 | 20 | #endif // READHPCGDAT_HPP 21 | -------------------------------------------------------------------------------- /src/ReportResults.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #ifndef REPORTRESULTS_HPP 44 | #define REPORTRESULTS_HPP 45 | 46 | #include "SparseMatrix.hpp" 47 | #include "TestCG.hpp" 48 | #include "TestSymmetry.hpp" 49 | #include "TestNorms.hpp" 50 | 51 | double ComputeTotalGFlops(const SparseMatrix& A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters, int optMaxIters, double times[]); 52 | void ReportResults(const SparseMatrix & A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters, int optMaxIters, double times[], 53 | const TestCGData & testcg_data, const TestSymmetryData & testsymmetry_data, const TestNormsData & testnorms_data, int global_failure, bool quickPath); 54 | 55 | #endif // REPORTRESULTS_HPP 56 | -------------------------------------------------------------------------------- /src/SetupHalo.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #ifndef SETUPHALO_HPP 44 | #define SETUPHALO_HPP 45 | 46 | #include "SparseMatrix.hpp" 47 | 48 | void SetupHalo(SparseMatrix& A); 49 | void CopyHaloToHost(SparseMatrix& A); 50 | 51 | #endif // SETUPHALO_HPP 52 | -------------------------------------------------------------------------------- /src/SetupHalo_ref.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef SETUPHALO_REF_HPP 16 | #define SETUPHALO_REF_HPP 17 | #include "SparseMatrix.hpp" 18 | 19 | void SetupHalo_ref(SparseMatrix & A); 20 | 21 | #endif // SETUPHALO_REF_HPP 22 | -------------------------------------------------------------------------------- /src/TestCG.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file TestCG.hpp 17 | 18 | HPCG data structure 19 | */ 20 | 21 | #ifndef TESTCG_HPP 22 | #define TESTCG_HPP 23 | 24 | #include "hpcg.hpp" 25 | #include "SparseMatrix.hpp" 26 | #include "Vector.hpp" 27 | #include "CGData.hpp" 28 | 29 | 30 | struct TestCGData_STRUCT { 31 | int count_pass; //!< number of succesful tests 32 | int count_fail; //!< number of succesful tests 33 | int expected_niters_no_prec; //!< expected number of test CG iterations without preconditioning with diagonally dominant matrix (~12) 34 | int expected_niters_prec; //!< expected number of test CG iterations with preconditioning and with diagonally dominant matrix (~1-2) 35 | int niters_max_no_prec; //!< maximum number of test CG iterations without predictitioner 36 | int niters_max_prec; //!< maximum number of test CG iterations without predictitioner 37 | double normr; //!< residual norm achieved during test CG iterations 38 | }; 39 | typedef struct TestCGData_STRUCT TestCGData; 40 | 41 | extern int TestCG(SparseMatrix & A, CGData & data, Vector & b, Vector & x, TestCGData & testcg_data); 42 | 43 | #endif // TESTCG_HPP 44 | 45 | -------------------------------------------------------------------------------- /src/TestNorms.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file TestNorms.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #include 22 | #include "TestNorms.hpp" 23 | 24 | /*! 25 | Computes the mean and standard deviation of the array of norm results. 26 | 27 | @param[in] testnorms_data data structure with the results of norm test 28 | 29 | @return Returns 0 upon success or non-zero otherwise 30 | */ 31 | int TestNorms(TestNormsData & testnorms_data) { 32 | double mean_delta = 0.0; 33 | for (int i= 0; i 52 | #include 53 | #include 54 | #include 55 | 56 | #include "utils.hpp" 57 | #include "Geometry.hpp" 58 | 59 | struct Vector_STRUCT { 60 | local_int_t localLength; //!< length of local portion of the vector 61 | double * values = nullptr; //!< array of values 62 | /*! 63 | This is for storing optimized data structures created in OptimizeProblem and 64 | used inside optimized ComputeSPMV(). 65 | */ 66 | void * optimizationData = nullptr; 67 | 68 | double* d_values = nullptr; 69 | }; 70 | typedef struct Vector_STRUCT Vector; 71 | 72 | /*! 73 | Initializes input vector. 74 | 75 | @param[in] v 76 | @param[in] localLength Length of local portion of input vector 77 | */ 78 | inline void InitializeVector(Vector & v, local_int_t localLength) { 79 | v.localLength = localLength; 80 | v.values = new double[localLength]; 81 | v.optimizationData = 0; 82 | return; 83 | } 84 | 85 | inline void HIPInitializeVector(Vector& v, local_int_t localLength) 86 | { 87 | v.localLength = localLength; 88 | v.optimizationData = 0; 89 | HIP_CHECK(deviceMalloc((void**)&v.d_values, sizeof(double) * localLength)); 90 | } 91 | 92 | /*! 93 | Fill the input vector with zero values. 94 | 95 | @param[inout] v - On entrance v is initialized, on exit all its values are zero. 96 | */ 97 | inline void ZeroVector(Vector & v) { 98 | local_int_t localLength = v.localLength; 99 | double * vv = v.values; 100 | for (int i=0; i=0 && index < v.localLength); 118 | double * vv = v.values; 119 | vv[index] *= value; 120 | return; 121 | } 122 | /*! 123 | Fill the input vector with pseudo-random values. 124 | 125 | @param[in] v 126 | */ 127 | inline void FillRandomVector(Vector & v) { 128 | local_int_t localLength = v.localLength; 129 | double * vv = v.values; 130 | for (int i=0; i rng(v.localLength); 137 | for(int i = 0; i < v.localLength; ++i) 138 | { 139 | rng[i] = rand() / (double)(RAND_MAX) + 1.0; 140 | } 141 | 142 | HIP_CHECK(hipMemcpy(v.d_values, 143 | rng.data(), 144 | sizeof(double) * v.localLength, 145 | hipMemcpyHostToDevice)); 146 | } 147 | 148 | /*! 149 | Copy input vector to output vector. 150 | 151 | @param[in] v Input vector 152 | @param[in] w Output vector 153 | */ 154 | inline void CopyVector(const Vector & v, Vector & w) { 155 | local_int_t localLength = v.localLength; 156 | assert(w.localLength >= localLength); 157 | double * vv = v.values; 158 | double * wv = w.values; 159 | for (int i=0; i 22 | #include "WriteProblem.hpp" 23 | 24 | 25 | /*! 26 | Routine to dump: 27 | - matrix in row, col, val format for analysis with MATLAB 28 | - x, xexact, b as simple arrays of numbers. 29 | 30 | Writes to A.dat, x.dat, xexact.dat and b.dat, respectivly. 31 | 32 | NOTE: THIS CODE ONLY WORKS ON SINGLE PROCESSOR RUNS 33 | 34 | Read into MATLAB using: 35 | 36 | load A.dat 37 | A=spconvert(A); 38 | load x.dat 39 | load xexact.dat 40 | load b.dat 41 | 42 | @param[in] geom The description of the problem's geometry. 43 | @param[in] A The known system matrix 44 | @param[in] b The known right hand side vector 45 | @param[in] x The solution vector computed by CG iteration 46 | @param[in] xexact Generated exact solution 47 | 48 | @return Returns with -1 if used with more than one MPI process. Returns with 0 otherwise. 49 | 50 | @see GenerateProblem 51 | */ 52 | int WriteProblem( const Geometry & geom, const SparseMatrix & A, 53 | const Vector b, const Vector x, const Vector xexact) { 54 | 55 | if (geom.size!=1) return -1; //TODO Only works on one processor. Need better error handler 56 | const global_int_t nrow = A.totalNumberOfRows; 57 | 58 | FILE * fA = 0, * fx = 0, * fxexact = 0, * fb = 0; 59 | fA = fopen("A.dat", "w"); 60 | fx = fopen("x.dat", "w"); 61 | fxexact = fopen("xexact.dat", "w"); 62 | fb = fopen("b.dat", "w"); 63 | 64 | if (! fA || ! fx || ! fxexact || ! fb) { 65 | if (fb) fclose(fb); 66 | if (fxexact) fclose(fxexact); 67 | if (fx) fclose(fx); 68 | if (fA) fclose(fA); 69 | return -1; 70 | } 71 | 72 | for (global_int_t i=0; i< nrow; i++) { 73 | const double * const currentRowValues = A.matrixValues[i]; 74 | const global_int_t * const currentRowIndices = A.mtxIndG[i]; 75 | const int currentNumberOfNonzeros = A.nonzerosInRow[i]; 76 | for (int j=0; j< currentNumberOfNonzeros; j++) 77 | #ifdef HPCG_NO_LONG_LONG 78 | fprintf(fA, " %d %d %22.16e\n",i+1,(global_int_t)(currentRowIndices[j]+1),currentRowValues[j]); 79 | #else 80 | fprintf(fA, " %lld %lld %22.16e\n",i+1,(global_int_t)(currentRowIndices[j]+1),currentRowValues[j]); 81 | #endif 82 | fprintf(fx, "%22.16e\n",x.values[i]); 83 | fprintf(fxexact, "%22.16e\n",xexact.values[i]); 84 | fprintf(fb, "%22.16e\n",b.values[i]); 85 | } 86 | 87 | fclose(fA); 88 | fclose(fx); 89 | fclose(fxexact); 90 | fclose(fb); 91 | return 0; 92 | } 93 | -------------------------------------------------------------------------------- /src/WriteProblem.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef WRITEPROBLEM_HPP 16 | #define WRITEPROBLEM_HPP 17 | #include "Geometry.hpp" 18 | #include "SparseMatrix.hpp" 19 | 20 | int WriteProblem( const Geometry & geom, const SparseMatrix & A, const Vector b, const Vector x, const Vector xexact); 21 | #endif // WRITEPROBLEM_HPP 22 | -------------------------------------------------------------------------------- /src/YAML_Doc.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "YAML_Doc.hpp" 21 | using namespace std; 22 | 23 | /*! 24 | Sets the application name and version which will become part of the YAML doc. 25 | 26 | @param[in] miniApp_Name application name 27 | @param[in] miniApp_Version application name 28 | @param[in] destination_Directory destination directory for the YAML document 29 | @param[in] destination_FileName file name for the YAML document 30 | */ 31 | YAML_Doc::YAML_Doc(const std::string & miniApp_Name, const std::string & miniApp_Version, const std::string & destination_Directory, const std::string & destination_FileName) { 32 | miniAppName = miniApp_Name; 33 | miniAppVersion = miniApp_Version; 34 | destinationDirectory = destination_Directory; 35 | destinationFileName = destination_FileName; 36 | } 37 | 38 | //inherits the destructor from YAML_Element 39 | YAML_Doc::~YAML_Doc(void) { 40 | } 41 | 42 | /*! 43 | Generates YAML from the elements of the document and saves it to a file. 44 | 45 | @return returns the complete YAML document as a string 46 | */ 47 | string YAML_Doc::generateYAML() { 48 | string yaml; 49 | 50 | yaml = yaml + miniAppName + " version: " + miniAppVersion + "\n"; 51 | 52 | for (size_t i=0; iprintYAML(""); 54 | } 55 | 56 | time_t rawtime; 57 | tm * ptm; 58 | time ( &rawtime ); 59 | ptm = localtime(&rawtime); 60 | char sdate[256]; 61 | //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12 62 | sprintf (sdate,"%04d.%02d.%02d.%02d.%02d.%02d",ptm->tm_year + 1900, ptm->tm_mon+1, 63 | ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec); 64 | 65 | string filename; 66 | if (destinationFileName=="") 67 | filename = miniAppName + "-" + miniAppVersion + "_"; 68 | else 69 | filename = destinationFileName; 70 | filename = filename + string(sdate) + ".yaml"; 71 | if (destinationDirectory!="" && destinationDirectory!=".") { 72 | string mkdir_cmd = "mkdir " + destinationDirectory; 73 | int err = system(mkdir_cmd.c_str()); 74 | filename = destinationDirectory + "/" + destinationFileName; 75 | } else 76 | filename = "./" + filename; 77 | 78 | ofstream myfile; 79 | myfile.open(filename.c_str()); 80 | myfile << yaml; 81 | myfile.close(); 82 | return yaml; 83 | } 84 | -------------------------------------------------------------------------------- /src/YAML_Doc.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file YAML_Doc.hpp 17 | 18 | HPCG YAML classes 19 | */ 20 | 21 | // Changelog 22 | // 23 | // Version 0.1 24 | // - Initial version. 25 | // 26 | ///////////////////////////////////////////////////////////////////////// 27 | 28 | #ifndef YAML_DOC_HPP 29 | #define YAML_DOC_HPP 30 | #include 31 | #include "YAML_Element.hpp" 32 | 33 | //! The YAML_Doc class for the uniform collecting and reporting of performance data for HPCG 34 | 35 | /*! 36 | 37 | The YAML_Doc class works in conjunction with the YAML_Element class to facilitate easy collecting and reporting of YAML-formatted 38 | data that can be then registered with the HPCG results collection website. 39 | 40 | \code 41 | 42 | //EXAMPLE CODE FOR GENERATING YAML 43 | 44 | YAML_Doc doc("hpcg","0.1"); 45 | doc.add("final_residual",1.4523e-13); 46 | doc.add("time","4.893"); 47 | 48 | //note: the following line will remove the data (4.890) associated with "time" 49 | doc.get("time")->add("total",4.243); 50 | 51 | //note: the following line will likewise remove the data (1.243) associated with "time" 52 | doc.get("time")->get("total")->add("time",2.457); 53 | doc.get("time")->get("total")->add("flops",4.88e5); 54 | doc.get("time")->add("ddot",1.243); 55 | doc.get("time")->add("sparsemv",""); 56 | doc.get("time")->get("sparsemv")->add("time",0.3445); 57 | doc.get("time")->get("sparsemv")->add("overhead",""); 58 | doc.get("time")->get("sparsemv")->get("overhead")->add("time",0.0123); 59 | doc.get("time")->get("sparsemv")->get("overhead")->add("percentage",0.034); 60 | cout << doc.generateYAML() << endl; 61 | return 0; 62 | 63 | \endcode 64 | 65 | Below is the output generated by the above code: 66 | 67 | \verbatim 68 | 69 | final_residual: 1.4523e-13 70 | time: 71 | total: 72 | time: 2.457 73 | flops: 4.88e5 74 | ddot: 1.243 75 | sparsemv: 76 | time: 0.3445 77 | overhead: 78 | time: 0.0123 79 | percentage: 0.034 80 | 81 | \endverbatim 82 | 83 | \note {No value is allowed to be attached to a key that has children. If children are added to a key, the value is simply set to "".} 84 | 85 | */ 86 | class YAML_Doc: public YAML_Element { 87 | public: 88 | //! Constructor: accepts mini-application name and version as strings, optionally accepts directory and file name for printing results. 89 | /*! 90 | The sole constructor for this class accepts and name and version number for the mini-application as well as optional directory 91 | and file name information for results that are generated by the generateYAML() method. 92 | \param miniApp_Name (in) string containing name of the mini-application 93 | \param miniApp_Version (in) string containing the version of the mini-application 94 | \param destination_Directory (in, optional) path of directory where results file will be stored, relative to current working directory. 95 | If this value is not supplied, the results file will be stored in the current working directory. If the directory does not exist 96 | it will be created. 97 | \param destination_FileName (in, optional) root name of the results file. A suffix of ".yaml" will be automatically appended. If no 98 | file name is specified the filename will be constructed by concatenating the miniAppName + miniAppVersion + ".yaml" strings. 99 | */ 100 | YAML_Doc(const std::string & miniApp_Name, const std::string & miniApp_Version, const std::string & destination_Directory = "", const std::string & destination_FileName = ""); 101 | //! Destructor 102 | ~YAML_Doc(); 103 | //! Generate YAML results to standard out and to a file using specified directory and filename, using current directory and miniAppName + miniAppVersion + ".yaml" by default 104 | std::string generateYAML(); 105 | 106 | protected: 107 | std::string miniAppName; //!< the name of the application that generated the YAML output 108 | std::string miniAppVersion; //!< the version of the application that generated the YAML output 109 | std::string destinationDirectory; //!< the destination directory for the generated the YAML output 110 | std::string destinationFileName; //!< the filename for the generated the YAML output 111 | }; 112 | #endif // YAML_DOC_HPP 113 | -------------------------------------------------------------------------------- /src/YAML_Element.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file YAML_Element.cpp 17 | 18 | HPCG routine 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include "YAML_Element.hpp" 25 | using namespace std; 26 | YAML_Element::YAML_Element(const std::string & key_arg, const std::string & value_arg) { 27 | key = key_arg; 28 | value = value_arg; 29 | } 30 | 31 | YAML_Element::~YAML_Element() { 32 | for (size_t i=0; ivalue = ""; 50 | string converted_value = convert_double_to_string(value_arg); 51 | YAML_Element * element = new YAML_Element(key_arg,converted_value); 52 | children.push_back(element); 53 | return element; 54 | } 55 | 56 | /*! 57 | Add an element to the vector 58 | 59 | @param[in] key_arg The key under which the element is stored 60 | @param[in] value_arg The value of the element 61 | 62 | @return Returns the added element 63 | */ 64 | YAML_Element * YAML_Element::add(const std::string & key_arg, int value_arg) { 65 | this->value = ""; 66 | string converted_value = convert_int_to_string(value_arg); 67 | YAML_Element * element = new YAML_Element(key_arg,converted_value); 68 | children.push_back(element); 69 | return element; 70 | } 71 | 72 | #ifndef HPCG_NO_LONG_LONG 73 | 74 | /*! 75 | Add an element to the vector 76 | 77 | @param[in] key_arg The key under which the element is stored 78 | @param[in] value_arg The value of the element 79 | 80 | @return Returns the added element 81 | */ 82 | YAML_Element * YAML_Element::add(const std::string & key_arg, long long value_arg) { 83 | this->value = ""; 84 | string converted_value = convert_long_long_to_string(value_arg); 85 | YAML_Element * element = new YAML_Element(key_arg,converted_value); 86 | children.push_back(element); 87 | return element; 88 | } 89 | 90 | #endif 91 | 92 | /*! 93 | Add an element to the vector 94 | 95 | @param[in] key_arg The key under which the element is stored 96 | @param[in] value_arg The value of the element 97 | 98 | @return Returns the added element 99 | */ 100 | YAML_Element * YAML_Element::add(const std::string & key_arg, size_t value_arg) { 101 | this->value = ""; 102 | string converted_value = convert_size_t_to_string(value_arg); 103 | YAML_Element * element = new YAML_Element(key_arg,converted_value); 104 | children.push_back(element); 105 | return element; 106 | } 107 | 108 | /*! 109 | Add an element to the vector 110 | 111 | @param[in] key_arg The key under which the element is stored 112 | @param[in] value_arg The value of the element 113 | 114 | @return Returns the added element 115 | */ 116 | YAML_Element * YAML_Element::add(const std::string & key_arg, const std::string & value_arg) { 117 | this->value = ""; 118 | YAML_Element * element = new YAML_Element(key_arg, value_arg); 119 | children.push_back(element); 120 | return element; 121 | } 122 | 123 | /*! 124 | Returns the pointer to the YAML_Element for the given key. 125 | @param[in] key_arg The key under which the element was stored 126 | 127 | @return If found, returns the element, otherwise returns NULL 128 | */ 129 | YAML_Element * YAML_Element::get(const std::string & key_arg) { 130 | for (size_t i=0; igetKey() == key_arg) { 132 | return children[i]; 133 | } 134 | } 135 | return 0; 136 | } 137 | 138 | /*! 139 | Prints a line of a YAML document. Correct YAML depends on 140 | correct spacing; the parameter space should be the proper 141 | amount of space for the parent element 142 | 143 | @param[in] space spacing inserted at the beginning of the line 144 | 145 | @return Returns a single line of the YAML document without the leading white space 146 | */ 147 | string YAML_Element::printYAML(std::string space) { 148 | string yaml_line = space + key + ": " + value + "\n"; 149 | for (int i=0; i<2; i++) space = space + " "; 150 | for (size_t i=0; iprintYAML(space); 152 | } 153 | return yaml_line; 154 | } 155 | 156 | /*! 157 | Converts a double precision value to a string. 158 | 159 | @param[in] value_arg The value to be converted. 160 | */ 161 | string YAML_Element::convert_double_to_string(double value_arg) { 162 | stringstream strm; 163 | strm << value_arg; 164 | return strm.str(); 165 | } 166 | 167 | /*! 168 | Converts a integer value to a string. 169 | 170 | @param[in] value_arg The value to be converted. 171 | */ 172 | string YAML_Element::convert_int_to_string(int value_arg) { 173 | stringstream strm; 174 | strm << value_arg; 175 | return strm.str(); 176 | } 177 | 178 | #ifndef HPCG_NO_LONG_LONG 179 | 180 | /*! 181 | Converts a "long long" integer value to a string. 182 | 183 | @param[in] value_arg The value to be converted. 184 | */ 185 | string YAML_Element::convert_long_long_to_string(long long value_arg) { 186 | stringstream strm; 187 | strm << value_arg; 188 | return strm.str(); 189 | } 190 | 191 | #endif 192 | 193 | /*! 194 | Converts a "size_t" integer value to a string. 195 | 196 | @param[in] value_arg The value to be converted. 197 | */ 198 | string YAML_Element::convert_size_t_to_string(size_t value_arg) { 199 | stringstream strm; 200 | strm << value_arg; 201 | return strm.str(); 202 | } 203 | -------------------------------------------------------------------------------- /src/YAML_Element.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /*! 16 | @file YAML_Element.hpp 17 | 18 | HPCG data structures for YAML output 19 | */ 20 | 21 | // Changelog 22 | // 23 | // Version 0.1 24 | // - Initial version. 25 | // 26 | ///////////////////////////////////////////////////////////////////////// 27 | 28 | #ifndef YAML_ELEMENT_HPP 29 | #define YAML_ELEMENT_HPP 30 | #include 31 | #include 32 | #include "Geometry.hpp" 33 | //! HPCG YAML_Element class, from the HPCG YAML_Element class for registering key-value pairs of performance data 34 | 35 | /*! 36 | HPCG generates a collection of performance data for each run of the executable. YAML_Element, and 37 | the related YAML_Doc class, provide a uniform facility for gathering and reporting this data using the YAML text format. 38 | */ 39 | class YAML_Element { 40 | public: 41 | 42 | //! Default constructor. 43 | YAML_Element () {key=""; value="";} 44 | //! Construct with known key-value pair 45 | YAML_Element (const std::string & key_arg, const std::string & value_arg); 46 | //! Destructor 47 | ~YAML_Element (); 48 | //! Key accessor method 49 | std::string getKey() {return key;} 50 | //! Add a child element to an element list associated with this element, value of type double 51 | YAML_Element * add(const std::string & key_arg, double value_arg); 52 | //! Add a child element to an element list associated with this element, value of type int 53 | YAML_Element * add(const std::string & key_arg, int value_arg); 54 | #ifndef HPCG_NO_LONG_LONG 55 | //! Add a child element to an element list associated with this element, value of type long long 56 | YAML_Element * add(const std::string & key_arg, long long value_arg); 57 | #endif 58 | //! Add a child element to an element list associated with this element, value of type size_t 59 | YAML_Element * add(const std::string & key_arg, size_t value_arg); 60 | //! Add a child element to an element list associated with this element, value of type string 61 | YAML_Element * add(const std::string & key_arg, const std::string & value_arg); 62 | //! get the element in the list with the given key 63 | YAML_Element * get(const std::string & key_arg); 64 | std::string printYAML(std::string space); 65 | 66 | protected: 67 | std::string key; //!< the key under which the element is stored 68 | std::string value; //!< the value of the stored element 69 | std::vector children; //!< children elements of this element 70 | 71 | private: 72 | std::string convert_double_to_string(double value_arg); 73 | std::string convert_int_to_string(int value_arg); 74 | #ifndef HPCG_NO_LONG_LONG 75 | std::string convert_long_long_to_string(long long value_arg); 76 | #endif 77 | std::string convert_size_t_to_string(size_t value_arg); 78 | }; 79 | #endif // YAML_ELEMENT_HPP 80 | -------------------------------------------------------------------------------- /src/finalize.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | #include 44 | #include 45 | 46 | #include "utils.hpp" 47 | #include "hpcg.hpp" 48 | 49 | /*! 50 | Closes the I/O stream used for logging information throughout the HPCG run. 51 | 52 | @return returns 0 upon success and non-zero otherwise 53 | 54 | @see HPCG_Init 55 | */ 56 | int 57 | HPCG_Finalize(void) { 58 | HPCG_fout.close(); 59 | 60 | // Destroy streams 61 | HIP_CHECK(hipStreamDestroy(stream_interior)); 62 | HIP_CHECK(hipStreamDestroy(stream_halo)); 63 | 64 | // Destroy events 65 | HIP_CHECK(hipEventDestroy(halo_gather)); 66 | 67 | // Free workspace 68 | HIP_CHECK(deviceFree(workspace)); 69 | 70 | #ifdef HPCG_MEMMGMT 71 | // Clear allocator 72 | HIP_CHECK(allocator.Clear()); 73 | #endif 74 | 75 | // Reset HIP device 76 | hipDeviceReset(); 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /src/hpcg.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | /* ************************************************************************ 16 | * Modifications (c) 2019 Advanced Micro Devices, Inc. 17 | * 18 | * Redistribution and use in source and binary forms, with or without modification, 19 | * are permitted provided that the following conditions are met: 20 | * 21 | * 1. Redistributions of source code must retain the above copyright notice, this 22 | * list of conditions and the following disclaimer. 23 | * 2. Redistributions in binary form must reproduce the above copyright notice, 24 | * this list of conditions and the following disclaimer in the documentation 25 | * and/or other materials provided with the distribution. 26 | * 3. Neither the name of the copyright holder nor the names of its contributors 27 | * may be used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 | * POSSIBILITY OF SUCH DAMAGE. 40 | * 41 | * ************************************************************************ */ 42 | 43 | /*! 44 | @file hpcg.hpp 45 | 46 | HPCG data structures and functions 47 | */ 48 | 49 | #ifndef HPCG_HPP 50 | #define HPCG_HPP 51 | 52 | #include 53 | #include "Geometry.hpp" 54 | 55 | extern std::ofstream HPCG_fout; 56 | 57 | struct HPCG_Params_STRUCT { 58 | int comm_size; //!< Number of MPI processes in MPI_COMM_WORLD 59 | int comm_rank; //!< This process' MPI rank in the range [0 to comm_size - 1] 60 | int numThreads; //!< This process' number of threads 61 | local_int_t nx; //!< Number of processes in x-direction of 3D process grid 62 | local_int_t ny; //!< Number of processes in y-direction of 3D process grid 63 | local_int_t nz; //!< Number of processes in z-direction of 3D process grid 64 | int runningTime; //!< Number of seconds to run the timed portion of the benchmark 65 | int npx; //!< Number of x-direction grid points for each local subdomain 66 | int npy; //!< Number of y-direction grid points for each local subdomain 67 | int npz; //!< Number of z-direction grid points for each local subdomain 68 | int pz; //!< Partition in the z processor dimension, default is npz 69 | local_int_t zl; //!< nz for processors in the z dimension with value less than pz 70 | local_int_t zu; //!< nz for processors in the z dimension with value greater than pz 71 | int device; //!< HIP device 72 | bool verify; //!< Do reference verification 73 | double tol; //!< Exit tolerance if verification is skipped 74 | }; 75 | /*! 76 | HPCG_Params is a shorthand for HPCG_Params_STRUCT 77 | */ 78 | typedef HPCG_Params_STRUCT HPCG_Params; 79 | 80 | extern int HPCG_Init(int * argc_p, char ** *argv_p, HPCG_Params & params); 81 | extern int HPCG_Finalize(void); 82 | 83 | #endif // HPCG_HPP 84 | -------------------------------------------------------------------------------- /src/mytimer.cpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | ///////////////////////////////////////////////////////////////////////// 16 | 17 | // Function to return time in seconds. 18 | // If compiled with no flags, return CPU time (user and system). 19 | // If compiled with -DWALL, returns elapsed time. 20 | 21 | ///////////////////////////////////////////////////////////////////////// 22 | 23 | #ifndef HPCG_NO_MPI 24 | #include 25 | 26 | double mytimer(void) { 27 | return MPI_Wtime(); 28 | } 29 | 30 | #elif !defined(HPCG_NO_OPENMP) 31 | 32 | // If this routine is compiled with HPCG_NO_MPI defined and not compiled with HPCG_NO_OPENMP then use the OpenMP timer 33 | #include 34 | double mytimer(void) { 35 | return omp_get_wtime(); 36 | } 37 | #else 38 | 39 | #include 40 | #include 41 | #include 42 | double mytimer(void) { 43 | struct timeval tp; 44 | static long start=0, startu; 45 | if (!start) { 46 | gettimeofday(&tp, NULL); 47 | start = tp.tv_sec; 48 | startu = tp.tv_usec; 49 | return 0.0; 50 | } 51 | gettimeofday(&tp, NULL); 52 | return ((double) (tp.tv_sec - start)) + (tp.tv_usec-startu)/1000000.0 ; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/mytimer.hpp: -------------------------------------------------------------------------------- 1 | 2 | //@HEADER 3 | // *************************************************** 4 | // 5 | // HPCG: High Performance Conjugate Gradient Benchmark 6 | // 7 | // Contact: 8 | // Michael A. Heroux ( maherou@sandia.gov) 9 | // Jack Dongarra (dongarra@eecs.utk.edu) 10 | // Piotr Luszczek (luszczek@eecs.utk.edu) 11 | // 12 | // *************************************************** 13 | //@HEADER 14 | 15 | #ifndef MYTIMER_HPP 16 | #define MYTIMER_HPP 17 | double mytimer(void); 18 | #endif // MYTIMER_HPP 19 | -------------------------------------------------------------------------------- /src/rochpcg_gtest_main.cpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | /*! 30 | @file rochpcg_gtest_main.cpp 31 | 32 | HPCG routine 33 | */ 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | #ifndef HPCG_NO_MPI 40 | #include 41 | #endif 42 | 43 | #include "Version.hpp" 44 | 45 | int device_id; 46 | 47 | int main(int argc, char* argv[]) 48 | { 49 | ::testing::InitGoogleTest(&argc, argv); 50 | 51 | int rank = 0; 52 | #ifndef HPCG_NO_MPI 53 | MPI_Init(&argc, &argv); 54 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 55 | #endif 56 | 57 | // Print rocHPCG version and device 58 | if(rank == 0) 59 | { 60 | printf("-------------------------------------------------------------------------\n"); 61 | printf("rocHPCG version: %d.%d.%d-%s\n", 62 | __ROCHPCG_VER_MAJOR, 63 | __ROCHPCG_VER_MINOR, 64 | __ROCHPCG_VER_PATCH, 65 | TO_STR(__ROCHPCG_VER_TWEAK)); 66 | } 67 | 68 | // Get device id from command line 69 | device_id = 0; 70 | 71 | for(int i = 1; i < argc; ++i) 72 | { 73 | if(strcmp(argv[i], "--device") == 0 && argc > i + 1) 74 | { 75 | device_id = atoi(argv[i + 1]); 76 | } 77 | } 78 | 79 | // Device query 80 | int device_count; 81 | hipError_t status = hipGetDeviceCount(&device_count); 82 | 83 | if(status != hipSuccess) 84 | { 85 | if(rank == 0) 86 | { 87 | fprintf(stderr, "Error: cannot get device count\n"); 88 | } 89 | 90 | return -1; 91 | } 92 | else 93 | { 94 | if(rank == 0) 95 | { 96 | printf("There are %d devices\n", device_count); 97 | } 98 | } 99 | 100 | for(int i = 0; i < device_count; ++i) 101 | { 102 | hipDeviceProp_t props; 103 | status = hipGetDeviceProperties(&props, i); 104 | 105 | if(rank == 0) 106 | { 107 | if(status != hipSuccess) 108 | { 109 | fprintf(stderr, "Error: cannot get device ID %d's properties\n", i); 110 | } 111 | else 112 | { 113 | printf("Device ID %d : %s\n", i, props.name); 114 | printf("-------------------------------------------------------------------------\n"); 115 | printf("with %ldMB memory, clock rate %dMHz @ computing capability %d.%d \n", 116 | props.totalGlobalMem >> 20, 117 | (int)(props.clockRate / 1000), 118 | props.major, 119 | props.minor); 120 | printf("maxGridDimX %d, sharedMemPerBlock %ldKB, maxThreadsPerBlock %d, wavefrontSize " 121 | "%d\n", 122 | props.maxGridSize[0], 123 | props.sharedMemPerBlock >> 10, 124 | props.maxThreadsPerBlock, 125 | props.warpSize); 126 | 127 | printf("-------------------------------------------------------------------------\n"); 128 | } 129 | } 130 | } 131 | 132 | if(device_count <= device_id) 133 | { 134 | if(rank == 0) 135 | { 136 | fprintf(stderr, "Error: invalid device ID. There may not be such device ID. Exiting\n"); 137 | } 138 | 139 | return -1; 140 | } 141 | 142 | status = hipSetDevice(device_id); 143 | 144 | if(rank == 0 && status != hipSuccess) 145 | { 146 | fprintf(stderr, "Error: cannot set device ID %d, there may not be such device ID\n", device_id); 147 | } 148 | 149 | hipDeviceProp_t prop; 150 | hipGetDeviceProperties(&prop, device_id); 151 | printf("Using device ID %d (%s) for rocHPCG\n", device_id, prop.name); 152 | 153 | #ifndef HPCG_NO_MPI 154 | MPI_Barrier(MPI_COMM_WORLD); 155 | #endif 156 | 157 | if(rank == 0) 158 | { 159 | printf("-------------------------------------------------------------------------\n"); 160 | } 161 | 162 | #ifndef HPCG_NO_MPI 163 | MPI_Barrier(MPI_COMM_WORLD); 164 | #endif 165 | 166 | // Only rank 0 should listen 167 | ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); 168 | 169 | if(rank != 0) 170 | { 171 | delete listeners.Release(listeners.default_result_printer()); 172 | } 173 | 174 | int ret = RUN_ALL_TESTS(); 175 | 176 | hipDeviceReset(); 177 | 178 | #ifndef HPCG_NO_MPI 179 | MPI_Finalize(); 180 | #endif 181 | 182 | return ret; 183 | } 184 | -------------------------------------------------------------------------------- /src/test_rochpcg.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | #ifndef TEST_ROCHPCG_HPP 30 | #define TEST_ROCHPCG_HPP 31 | 32 | extern int device_id; 33 | 34 | #endif // TEST_ROCHPCG_HPP 35 | -------------------------------------------------------------------------------- /src/utils.hpp: -------------------------------------------------------------------------------- 1 | /* ************************************************************************ 2 | * Copyright (c) 2019 Advanced Micro Devices, Inc. 3 | * 4 | * Redistribution and use in source and binary forms, with or without modification, 5 | * are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, this 8 | * list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright notice, 10 | * this list of conditions and the following disclaimer in the documentation 11 | * and/or other materials provided with the distribution. 12 | * 3. Neither the name of the copyright holder nor the names of its contributors 13 | * may be used to endorse or promote products derived from this software without 14 | * specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 20 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 22 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | * POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | * ************************************************************************ */ 28 | 29 | #ifndef UTILS_HPP 30 | #define UTILS_HPP 31 | 32 | #include 33 | #include 34 | 35 | #include "Memory.hpp" 36 | 37 | // Streams 38 | extern hipStream_t stream_interior; 39 | extern hipStream_t stream_halo; 40 | // Events 41 | extern hipEvent_t halo_gather; 42 | // Workspace 43 | extern void* workspace; 44 | // Memory allocator 45 | extern hipAllocator_t allocator; 46 | 47 | #define RNG_SEED 0x586744 48 | #define MAX_COLORS 128 49 | 50 | #define NULL_CHECK(ptr) \ 51 | { \ 52 | if(ptr == NULL) \ 53 | { \ 54 | fprintf(stderr, "ERROR in file %s ; line %d\n", \ 55 | __FILE__, \ 56 | __LINE__); \ 57 | \ 58 | hipDeviceReset(); \ 59 | exit(1); \ 60 | } \ 61 | } 62 | 63 | #define HIP_CHECK(err) \ 64 | { \ 65 | if(err != hipSuccess) \ 66 | { \ 67 | fprintf(stderr, "HIP ERROR %s (%d) in file %s ; line %d\n", \ 68 | hipGetErrorString(err), \ 69 | err, \ 70 | __FILE__, \ 71 | __LINE__); \ 72 | \ 73 | hipDeviceReset(); \ 74 | exit(1); \ 75 | } \ 76 | } 77 | 78 | #define RETURN_IF_HIP_ERROR(err) \ 79 | { \ 80 | if(err != hipSuccess) \ 81 | { \ 82 | return err; \ 83 | } \ 84 | } 85 | 86 | #define RETURN_IF_HPCG_ERROR(err) \ 87 | { \ 88 | if(err != 0) \ 89 | { \ 90 | return err; \ 91 | } \ 92 | } 93 | 94 | #define EXIT_IF_HPCG_ERROR(err) \ 95 | { \ 96 | if(err != 0) \ 97 | { \ 98 | hipDeviceReset(); \ 99 | exit(1); \ 100 | } \ 101 | } 102 | 103 | #endif // UTILS_HPP 104 | --------------------------------------------------------------------------------