├── .clang-format
├── .github
├── CODEOWNERS
├── CONTRIBUTING.md
└── workflows
│ └── docs.yaml
├── .gitignore
├── .jenkins
├── common.groovy
└── precheckin.groovy
├── CMakeLists.txt
├── LICENSE.md
├── NOTICES.txt
├── README.md
├── cmake
├── Dependencies.cmake
├── FindLIBNUMA.cmake
└── version.cmake
├── docker
├── dockerfile-build-centos
├── dockerfile-build-sles
└── dockerfile-build-ubuntu-rock
├── install.sh
├── rtest.xml
└── src
├── CG.cpp
├── CG.hpp
├── CGData.hpp
├── CG_ref.cpp
├── CG_ref.hpp
├── CMakeLists.txt
├── CheckAspectRatio.cpp
├── CheckAspectRatio.hpp
├── CheckProblem.cpp
├── CheckProblem.hpp
├── ComputeDotProduct.cpp
├── ComputeDotProduct.hpp
├── ComputeDotProduct_ref.cpp
├── ComputeDotProduct_ref.hpp
├── ComputeMG.cpp
├── ComputeMG.hpp
├── ComputeMG_ref.cpp
├── ComputeMG_ref.hpp
├── ComputeOptimalShapeXYZ.cpp
├── ComputeOptimalShapeXYZ.hpp
├── ComputeProlongation.cpp
├── ComputeProlongation.hpp
├── ComputeProlongation_ref.cpp
├── ComputeProlongation_ref.hpp
├── ComputeResidual.cpp
├── ComputeResidual.hpp
├── ComputeResidual_ref.cpp
├── ComputeResidual_ref.hpp
├── ComputeRestriction.cpp
├── ComputeRestriction.hpp
├── ComputeRestriction_ref.cpp
├── ComputeRestriction_ref.hpp
├── ComputeSPMV.cpp
├── ComputeSPMV.hpp
├── ComputeSPMV_ref.cpp
├── ComputeSPMV_ref.hpp
├── ComputeSYMGS.cpp
├── ComputeSYMGS.hpp
├── ComputeSYMGS_ref.cpp
├── ComputeSYMGS_ref.hpp
├── ComputeWAXPBY.cpp
├── ComputeWAXPBY.hpp
├── ComputeWAXPBY_ref.cpp
├── ComputeWAXPBY_ref.hpp
├── ExchangeHalo.cpp
├── ExchangeHalo.hpp
├── GenerateCoarseProblem.cpp
├── GenerateCoarseProblem.hpp
├── GenerateGeometry.cpp
├── GenerateGeometry.hpp
├── GenerateProblem.cpp
├── GenerateProblem.hpp
├── GenerateProblem_ref.cpp
├── GenerateProblem_ref.hpp
├── Geometry.hpp
├── MGData.hpp
├── Memory.cpp
├── Memory.hpp
├── MixedBaseCounter.cpp
├── MixedBaseCounter.hpp
├── MultiColoring.cpp
├── MultiColoring.hpp
├── OptimizeProblem.cpp
├── OptimizeProblem.hpp
├── OutputFile.cpp
├── OutputFile.hpp
├── Permute.cpp
├── Permute.hpp
├── ReadHpcgDat.cpp
├── ReadHpcgDat.hpp
├── ReportResults.cpp
├── ReportResults.hpp
├── SetupHalo.cpp
├── SetupHalo.hpp
├── SetupHalo_ref.cpp
├── SetupHalo_ref.hpp
├── SparseMatrix.cpp
├── SparseMatrix.hpp
├── TestCG.cpp
├── TestCG.hpp
├── TestNorms.cpp
├── TestNorms.hpp
├── TestSymmetry.cpp
├── TestSymmetry.hpp
├── Vector.hpp
├── Version.hpp.in
├── WriteProblem.cpp
├── WriteProblem.hpp
├── YAML_Doc.cpp
├── YAML_Doc.hpp
├── YAML_Element.cpp
├── YAML_Element.hpp
├── finalize.cpp
├── hpcg.hpp
├── init.cpp
├── main.cpp
├── mytimer.cpp
├── mytimer.hpp
├── rochpcg_gtest_main.cpp
├── test_rochpcg.cpp
├── test_rochpcg.hpp
└── utils.hpp
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | AccessModifierOffset: 0
4 | AlignAfterOpenBracket: Align
5 | AlignConsecutiveAssignments: true
6 | AlignConsecutiveDeclarations: false
7 | AlignEscapedNewlinesLeft: true
8 | AlignOperands: true
9 | AlignTrailingComments: true
10 | AllowAllParametersOfDeclarationOnNextLine: true
11 | AllowShortBlocksOnASingleLine: true
12 | AllowShortCaseLabelsOnASingleLine: true
13 | AllowShortFunctionsOnASingleLine: All
14 | AllowShortIfStatementsOnASingleLine: false
15 | AllowShortLoopsOnASingleLine: false
16 | AlwaysBreakAfterDefinitionReturnType: None
17 | AlwaysBreakAfterReturnType: None
18 | AlwaysBreakBeforeMultilineStrings: false
19 | AlwaysBreakTemplateDeclarations: true
20 | BinPackArguments: false
21 | BinPackParameters: false
22 | BraceWrapping:
23 | AfterClass: true
24 | AfterControlStatement: true
25 | AfterEnum: true
26 | AfterFunction: true
27 | AfterNamespace: false
28 | AfterObjCDeclaration: true
29 | AfterStruct: true
30 | AfterUnion: true
31 | BeforeCatch: true
32 | BeforeElse: true
33 | IndentBraces: false
34 | BreakBeforeBinaryOperators: None
35 | BreakBeforeBraces: Custom
36 | BreakBeforeTernaryOperators: true
37 | BreakConstructorInitializersBeforeComma: false
38 | ColumnLimit: 100
39 | CommentPragmas: '^ IWYU pragma:'
40 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
41 | ConstructorInitializerIndentWidth: 4
42 | ContinuationIndentWidth: 4
43 | Cpp11BracedListStyle: true
44 | DerivePointerAlignment: false
45 | DisableFormat: false
46 | ExperimentalAutoDetectBinPacking: false
47 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
48 | IncludeCategories:
49 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
50 | Priority: 2
51 | - Regex: '^(<|"(gtest|isl|json)/)'
52 | Priority: 3
53 | - Regex: '.*'
54 | Priority: 1
55 | IndentCaseLabels: false
56 | IndentWidth: 4
57 | IndentWrappedFunctionNames: false
58 | KeepEmptyLinesAtTheStartOfBlocks: true
59 | MacroBlockBegin: ''
60 | MacroBlockEnd: ''
61 | MaxEmptyLinesToKeep: 1
62 | NamespaceIndentation: None
63 | ObjCBlockIndentWidth: 2
64 | ObjCSpaceAfterProperty: false
65 | ObjCSpaceBeforeProtocolList: true
66 | PenaltyBreakBeforeFirstCallParameter: 19
67 | PenaltyBreakComment: 300
68 | PenaltyBreakFirstLessLess: 120
69 | PenaltyBreakString: 1000
70 | PenaltyExcessCharacter: 1000000
71 | PenaltyReturnTypeOnItsOwnLine: 60
72 | PointerAlignment: Left
73 | ReflowComments: true
74 | SortIncludes: false
75 | SpaceAfterCStyleCast: false
76 | # SpaceAfterTemplateKeyword: true
77 | SpaceBeforeAssignmentOperators: true
78 | SpaceBeforeParens: Never
79 | SpaceInEmptyParentheses: false
80 | SpacesBeforeTrailingComments: 1
81 | SpacesInAngles: false
82 | SpacesInContainerLiterals: true
83 | SpacesInCStyleCastParentheses: false
84 | SpacesInParentheses: false
85 | SpacesInSquareBrackets: false
86 | Standard: c++17
87 | TabWidth: 8
88 | UseTab: Never
89 | ...
90 |
91 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @ntrost57 @YvanMokwinski @jsandham
2 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
1 | name: Upload to the upload server
2 |
3 | # Controls when the workflow will run
4 | on:
5 | push:
6 | branches: [develop, master]
7 | tags:
8 | - rocm-5.*
9 | release:
10 | types: [published]
11 |
12 | # Allows you to run this workflow manually from the Actions tab
13 | workflow_dispatch:
14 |
15 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
16 | jobs:
17 | # This workflow contains a single job called "build"
18 | build:
19 | # The type of runner that the job will run on
20 | runs-on: ubuntu-latest
21 |
22 | # Steps represent a sequence of tasks that will be executed as part of the job
23 | steps:
24 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
25 | - uses: actions/checkout@v2
26 |
27 | - name: getting branch name
28 | shell: bash
29 | run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
30 | id: branch_name
31 | - name: getting tag name
32 | shell: bash
33 | run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})"
34 | id: tag_name
35 | - name: zipping files
36 | run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*'
37 | - name: echo-step
38 | run: echo "${{ github.event.release.target_commitish }}"
39 | - name: uploading archive to prod
40 | if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}}
41 | uses: wlixcc/SFTP-Deploy-Action@v1.0
42 | with:
43 | username: ${{ secrets.USERNAME }}
44 | server: ${{ secrets.SERVER }}
45 | ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
46 | local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
47 | remote_path: '${{ secrets.PROD_UPLOAD_URL }}'
48 | args: '-o ConnectTimeout=5'
49 | - name: uploading archive to staging
50 | if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }}
51 | uses: wlixcc/SFTP-Deploy-Action@v1.0
52 | with:
53 | username: ${{ secrets.USERNAME }}
54 | server: ${{ secrets.SERVER }}
55 | ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
56 | local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
57 | remote_path: '${{ secrets.STG_UPLOAD_URL }}'
58 | args: '-o ConnectTimeout=5'
59 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files
2 | *.slo
3 | *.lo
4 | *.o
5 | *.obj
6 |
7 | # Precompiled Headers
8 | *.gch
9 | *.pch
10 |
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 |
16 | # Fortran module files
17 | *.mod
18 |
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 |
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 |
30 | # vim tags
31 | tags
32 | .tags
33 | .*.swp
34 |
35 | # Editors
36 | .vscode
37 |
38 | # build-in-source directory
39 | build
40 |
41 | # doc directory
42 | docBin
43 | _build
44 |
--------------------------------------------------------------------------------
/.jenkins/common.groovy:
--------------------------------------------------------------------------------
1 | // This file is for internal AMD use.
2 | // If you are interested in running your own Jenkins, please raise a github issue for assistance.
3 |
4 | def runCompileCommand(platform, project, jobName)
5 | {
6 | project.paths.construct_build_prefix()
7 |
8 | def command
9 | def getDependencies = auxiliary.getLibrary('rocPRIM', platform.jenkinsLabel,'develop')
10 | def compiler = '/opt/rocm/bin/amdclang++'
11 |
12 | command = """#!/usr/bin/env bash
13 | set -ex
14 | ${getDependencies}
15 | cd ${project.paths.project_build_prefix}
16 | ${project.paths.build_command}
17 | """
18 |
19 | platform.runCommand(this, command)
20 | }
21 |
22 | def runTestCommand (platform, project)
23 | {
24 | String sudo = auxiliary.sudo(platform.jenkinsLabel)
25 | def command = """#!/usr/bin/env bash
26 | set -ex
27 | cd ${project.paths.project_build_prefix}/build/release/tests
28 | ${sudo} ./rochpcg-test --gtest_output=xml --gtest_color=yes
29 | """
30 |
31 | platform.runCommand(this, command)
32 | junit "${project.paths.project_build_prefix}/build/release/tests/*.xml"
33 | }
34 |
35 | return this
36 |
--------------------------------------------------------------------------------
/.jenkins/precheckin.groovy:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env groovy
2 | @Library('rocJenkins@pong') _
3 | import com.amd.project.*
4 | import com.amd.docker.*
5 | import java.nio.file.Path;
6 |
7 | def runCI =
8 | {
9 | nodeDetails, jobName->
10 |
11 | def prj = new rocProject('rocHPCG', 'PreCheckin')
12 | prj.paths.build_command = './install.sh -t --with-openmp=OFF --with-mpi=OFF'
13 |
14 | def nodes = new dockerNodes(nodeDetails, jobName, prj)
15 |
16 | def commonGroovy
17 |
18 | boolean formatCheck = false
19 |
20 | def compileCommand =
21 | {
22 | platform, project->
23 |
24 | commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy"
25 | commonGroovy.runCompileCommand(platform, project, jobName)
26 | }
27 |
28 | def testCommand =
29 | {
30 | platform, project->
31 |
32 | commonGroovy.runTestCommand(platform, project)
33 | }
34 |
35 | buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null)
36 | }
37 |
38 | ci: {
39 | String urlJobName = auxiliary.getTopJobName(env.BUILD_URL)
40 |
41 | def propertyList = ["compute-rocm-dkms-no-npi":[],
42 | "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])],
43 | "rocm-docker":[]]
44 | propertyList = auxiliary.appendPropertyList(propertyList)
45 |
46 | def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]),
47 | "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]),
48 | "rocm-docker":([ubuntu16:['gfx900']])]
49 | jobNameList = auxiliary.appendJobNameList(jobNameList)
50 |
51 | propertyList.each
52 | {
53 | jobName, property->
54 | if (urlJobName == jobName)
55 | properties(auxiliary.addCommonProperties(property))
56 | }
57 |
58 | jobNameList.each
59 | {
60 | jobName, nodeDetails->
61 | if (urlJobName == jobName)
62 | stage(jobName) {
63 | runCI(nodeDetails, jobName)
64 | }
65 | }
66 |
67 | // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
68 | if(!jobNameList.keySet().contains(urlJobName))
69 | {
70 | properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
71 | stage(urlJobName) {
72 | runCI([ubuntu16:['gfx906']], urlJobName)
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Modifications (c) 2019-2023 Advanced Micro Devices, Inc.
2 | #
3 | # Redistribution and use in source and binary forms, with or without modification,
4 | # are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
9 | # this list of conditions and the following disclaimer in the documentation
10 | # and/or other materials provided with the distribution.
11 | # 3. Neither the name of the copyright holder nor the names of its contributors
12 | # may be used to endorse or promote products derived from this software without
13 | # specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 |
26 | cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
27 |
28 | # Consider removing this in the future
29 | # This should appear before the project command, because it does not use FORCE
30 | set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "Install path prefix, prepended onto install directories")
31 |
32 | # CMake modules
33 | list(APPEND CMAKE_MODULE_PATH
34 | ${CMAKE_CURRENT_SOURCE_DIR}/cmake
35 | ${ROCM_PATH}/lib/cmake/hip
36 | ${ROCM_PATH}/hip/cmake)
37 |
38 | # Set a default build type if none was specified
39 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
40 | message(STATUS "Setting build type to 'Release' as none was specified.")
41 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE)
42 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
43 | endif()
44 |
45 | # Honor per-config flags in try_compile() source-file signature. cmake v3.7 and up
46 | if(POLICY CMP0066)
47 | cmake_policy(SET CMP0066 NEW)
48 | endif()
49 |
50 | # rocHPCG project
51 | project(rochpcg LANGUAGES CXX)
52 |
53 | # Force library install path to lib (CentOS 7 defaults to lib64)
54 | set(CMAKE_INSTALL_LIBDIR "lib" CACHE INTERNAL "Installation directory for libraries" FORCE)
55 |
56 | # Build flags
57 | set(CMAKE_CXX_STANDARD 17)
58 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
59 | set(CMAKE_CXX_EXTENSIONS OFF)
60 |
61 | # Build options
62 | option(HPCG_DEBUG "Compile with modest debugging turned on" OFF)
63 | option(HPCG_DETAILED_DEBUG "Compile with voluminous debugging information turned on" OFF)
64 | option(HPCG_DETAILED_TIMING "Enable detail timers" OFF)
65 | option(HPCG_REFERENCE "Build reference mode" OFF)
66 | option(BUILD_TEST "Build rocHPCG single-node test" OFF)
67 |
68 | # Optimization options
69 | option(OPT_MEMMGMT "Build with memory management module" ON)
70 | option(OPT_DEFRAG "Build with memory management defragmentation" ON)
71 | option(GPU_AWARE_MPI "Enable use of GPU-Aware MPI functionality" OFF)
72 |
73 | # roctx Markers
74 | option(OPT_ROCTX "Enable rocTX markers" OFF)
75 |
76 | # Dependencies
77 | include(cmake/Dependencies.cmake)
78 |
79 | # Find HIP package
80 | find_package(HIP REQUIRED)
81 | find_package(rocprim REQUIRED)
82 |
83 | # GPU arch targets
84 | if(AMDGPU_TARGETS AND NOT GPU_TARGETS)
85 | message( DEPRECATION "AMDGPU_TARGETS use is deprecated. Use GPU_TARGETS." )
86 | endif()
87 | set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "List of specific machine types for library to target")
88 | if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
89 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx908")
90 | endif()
91 | if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
92 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx90a")
93 | endif()
94 | if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
95 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx942")
96 | endif()
97 | if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
98 | set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx950")
99 | endif()
100 | # Don't force, as users should be able to override GPU_TARGETS at the command line if desired
101 | set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to build for")
102 |
103 | # Setup version
104 | rocm_setup_version(VERSION 0.8.6)
105 |
106 | # rocHPCG source directory
107 | add_subdirectory(src)
108 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
2 |
3 | Redistribution and use in source and binary forms, with or without modification,
4 | are permitted provided that the following conditions are met:
5 |
6 | 1. Redistributions of source code must retain the above copyright notice, this
7 | list of conditions and the following disclaimer.
8 | 2. Redistributions in binary form must reproduce the above copyright notice,
9 | this list of conditions and the following disclaimer in the documentation
10 | and/or other materials provided with the distribution.
11 | 3. Neither the name of the copyright holder nor the names of its contributors
12 | may be used to endorse or promote products derived from this software without
13 | specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/NOTICES.txt:
--------------------------------------------------------------------------------
1 | Notices and licenses file
2 | _________________________
3 |
4 | AMD copyrighted code (BSD3)
5 |
6 | Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
7 |
8 | Redistribution and use in source and binary forms, with or without modification,
9 | are permitted provided that the following conditions are met:
10 |
11 | 1. Redistributions of source code must retain the above copyright notice, this
12 | list of conditions and the following disclaimer.
13 | 2. Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 | 3. Neither the name of the copyright holder nor the names of its contributors
17 | may be used to endorse or promote products derived from this software without
18 | specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 |
31 |
32 | Dependencies on hpcg-benchmark-hpcg v3.1 (BSD3)
33 |
34 | Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
35 |
36 | Redistribution and use in source and binary forms, with or without modification,
37 | are permitted provided that the following conditions are met:
38 |
39 | 1. Redistributions of source code must retain the above copyright notice, this
40 | list of conditions and the following disclaimer.
41 | 2. Redistributions in binary form must reproduce the above copyright notice,
42 | this list of conditions and the following disclaimer in the documentation
43 | and/or other materials provided with the distribution.
44 | 3. Neither the name of the copyright holder nor the names of its contributors
45 | may be used to endorse or promote products derived from this software without
46 | specific prior written permission.
47 |
48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
49 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
50 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
52 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
53 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
54 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
55 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57 | POSSIBILITY OF SUCH DAMAGE.
58 |
59 |
60 | hpcg-benchmark-hpcg v3.1 (BSD3)
61 | Copyright (c) 2013-2019, hpcg-benchmark
62 | All rights reserved.
63 |
64 |
65 | Redistribution and use in source and binary forms, with or without
66 | modification, are permitted provided that the following conditions are met:
67 |
68 | * Redistributions of source code must retain the above copyright notice, this
69 | list of conditions and the following disclaimer.
70 |
71 | * Redistributions in binary form must reproduce the above copyright notice,
72 | this list of conditions and the following disclaimer in the documentation
73 | and/or other materials provided with the distribution.
74 |
75 | * Neither the name of hpcg nor the names of its
76 | contributors may be used to endorse or promote products derived from
77 | this software without specific prior written permission.
78 |
79 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
80 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
81 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
82 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
83 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
85 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
86 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
87 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
88 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
89 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # rocHPCG
2 | rocHPCG is a benchmark based on the [HPCG][] benchmark application, implemented on top of AMD's Radeon Open eCosystem Platform [ROCm][] runtime and toolchains. rocHPCG is created using the [HIP][] programming language and optimized for AMD's latest discrete GPUs.
3 |
4 | ## Requirements
5 | * Git
6 | * CMake (3.10 or later)
7 | * MPI
8 | * NUMA library
9 | * AMD [ROCm] platform (4.1 or later)
10 | * [rocPRIM][]
11 | * googletest (for test application only)
12 |
13 | ## Quickstart rocHPCG build and install
14 |
15 | #### Install script
16 | You can build rocHPCG using the *install.sh* script
17 | ```
18 | # Clone rocHPCG using git
19 | git clone https://github.com/ROCmSoftwarePlatform/rocHPCG.git
20 |
21 | # Go to rocHPCG directory
22 | cd rocHPCG
23 |
24 | # Run install.sh script
25 | # Command line options:
26 | # -h|--help - prints this help message
27 | # -i|--install - install after build
28 | # -d|--dependencies - install dependencies
29 | # -r|--reference - reference mode
30 | # -g|--debug - -DCMAKE_BUILD_TYPE=Debug (default: Release)
31 | # -t|--test - build single GPU test
32 | # --with-rocm=
- Path to ROCm install (default: /opt/rocm)
33 | # --with-mpi= - Path to external MPI install (Default: clone+build OpenMPI v4.1.0 in deps/)
34 | # --with-openmp - compile with OpenMP support (default: enabled)
35 | # --with-memmgmt - compile with smart memory management (default: enabled)
36 | # --with-memdefrag - compile with memory defragmentation (defaut: enabled)
37 | ./install.sh -di
38 | ```
39 | By default, [UCX] v1.10.0 and [OpenMPI] v4.1.0 will be cloned and build in `rocHPCG/deps`.
40 | After build and install, the `rochpcg` executable is placed in `build/release/rochpcg-install`.
41 |
42 | #### MPI
43 | You can build rocHPCG using your own MPI installation by specifying the directory, e.g.
44 | ```
45 | ./install.sh -di --with-mpi=/my/mpiroot/
46 | ```
47 | Alternatively, when you do not pass a specific directory, OpenMPI v4.1.0 with UCX will be cloned and built within `rocHPCG/deps` directory.
48 | If you want to disable MPI, you need to run
49 | ```
50 | ./install.sh -di --with-mpi=off
51 | ```
52 |
53 | #### ROCm
54 | You can build rocHPCG with specific ROCm versions by passing the directory to the install script, e.g.
55 | ```
56 | ./install.sh -di --with-rocm=/my/rocm-x.y.z/
57 | ```
58 |
59 | ## Running rocHPCG benchmark application
60 | You can run the rocHPCG benchmark application by either using command line parameters or the `hpcg.dat` input file
61 | ```
62 | rochpcg
63 | # where
64 | # nx - is the global problem size in x dimension
65 | # ny - is the global problem size in y dimension
66 | # nz - is the global problem size in z dimension
67 | # runtime - is the desired benchmarking time in seconds (> 1800s for official runs)
68 | ```
69 |
70 | Similarly, these parameters can be entered into an input file `hpcg.dat` in the working directory, e.g. `nx = ny = nz = 280` and `runtime = 1860`.
71 | ```
72 | HPCG benchmark input file
73 | Sandia National Laboratories; University of Tennessee, Knoxville
74 | 280 280 280
75 | 1860
76 | ```
77 |
78 | ## Performance evaluation
79 | For performance evaluation purposes, the number of iterations should be as low as possible (e.g. convergence rate as high as possible), since the final HPCG score is scaled to 50 iterations.
80 | Furthermore, it is observed that high memory occupancy performs better on AMD devices. Problem size suggestion for devices with 16GB is `nx = ny = nz = 280` and `nx = 560, ny = nz = 280` for devices with 32GB or more. Runtime for official runs have to be at least 1800 seconds (use 1860 to be on the safe side), e.g.
81 | ```
82 | ./rochpcg 560 280 280 1860
83 | ```
84 | Please note that convergence rate behaviour might change in a multi-GPU environment and need to be adjusted accordingly.
85 |
86 | Additionally, you can specify the device to be used for the application (e.g. device #1):
87 | ```
88 | ./rochpcg 560 280 280 1860 --dev=1
89 | ```
90 |
91 | ## Support
92 | Please use [the issue tracker][] for bugs and feature requests.
93 |
94 | ## License
95 | The [license file][] can be found in the main repository.
96 |
97 | [HPCG]: https://www.hpcg-benchmark.org/
98 | [ROCm]: https://github.com/RadeonOpenCompute/ROCm
99 | [HIP]: https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/
100 | [rocPRIM]: https://github.com/ROCmSoftwarePlatform/rocPRIM
101 | [OpenMPI]: https://github.com/open-mpi/ompi
102 | [UCX]: https://github.com/openucx/ucx
103 | [the issue tracker]: https://github.com/ROCmSoftwarePlatform/rocHPCG/issues
104 | [license file]: https://github.com/ROCmSoftwarePlatform/rocHPCG
105 |
--------------------------------------------------------------------------------
/cmake/Dependencies.cmake:
--------------------------------------------------------------------------------
1 | # Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
2 | #
3 | # Redistribution and use in source and binary forms, with or without modification,
4 | # are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
9 | # this list of conditions and the following disclaimer in the documentation
10 | # and/or other materials provided with the distribution.
11 | # 3. Neither the name of the copyright holder nor the names of its contributors
12 | # may be used to endorse or promote products derived from this software without
13 | # specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 |
26 | # Dependencies
27 |
28 | # Git
29 | find_package(Git REQUIRED)
30 |
31 | # Add some paths
32 | list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
33 |
34 | # Find OpenMP package
35 | find_package(OpenMP)
36 | if (NOT OPENMP_FOUND)
37 | message("-- OpenMP not found. Compiling WITHOUT OpenMP support.")
38 | else()
39 | option(HPCG_OPENMP "Compile WITH OpenMP support." ON)
40 | endif()
41 |
42 | # MPI
43 | set(MPI_HOME ${HPCG_MPI_DIR})
44 | find_package(MPI)
45 | if (NOT MPI_FOUND)
46 | message("-- MPI not found. Compiling WITHOUT MPI support.")
47 | if (HPCG_MPI)
48 | message(FATAL_ERROR "Cannot build with MPI support.")
49 | endif()
50 | else()
51 | option(HPCG_MPI "Compile WITH MPI support." ON)
52 | endif()
53 |
54 | # gtest
55 | if(BUILD_TEST)
56 | find_package(GTest REQUIRED)
57 | endif()
58 |
59 | # libnuma if MPI is enabled
60 | if(HPCG_MPI)
61 | find_package(LIBNUMA REQUIRED)
62 | endif()
63 |
64 | # rocm-cmake
65 | find_package(ROCM 0.7.3 QUIET CONFIG PATHS ${CMAKE_PREFIX_PATH} $ENV{ROCM_PATH})
66 | if(NOT ROCM_FOUND)
67 | set(PROJECT_EXTERN_DIR "${CMAKE_CURRENT_BINARY_DIR}/deps")
68 | file( TO_NATIVE_PATH "${PROJECT_EXTERN_DIR}" PROJECT_EXTERN_DIR_NATIVE)
69 | set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download")
70 | file(
71 | DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.tar.gz
72 | ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.tar.gz
73 | STATUS rocm_cmake_download_status LOG rocm_cmake_download_log
74 | )
75 | list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code)
76 | if(rocm_cmake_download_error_code)
77 | message(FATAL_ERROR "Error: downloading "
78 | "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed "
79 | "error_code: ${rocm_cmake_download_error_code} "
80 | "log: ${rocm_cmake_download_log} "
81 | )
82 | endif()
83 |
84 | execute_process(
85 | COMMAND ${CMAKE_COMMAND} -E tar xzvf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.tar.gz
86 | WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}
87 | )
88 | execute_process(
89 | COMMAND ${CMAKE_COMMAND} -S ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} -B ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/build
90 | WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}
91 | )
92 | execute_process(
93 | COMMAND ${CMAKE_COMMAND} --install ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/build --prefix ${PROJECT_EXTERN_DIR}/rocm
94 | WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} )
95 | if(rocm_cmake_unpack_error_code)
96 | message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed")
97 | endif()
98 | find_package(ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR})
99 | endif()
100 |
101 | include(ROCMSetupVersion)
102 | include(ROCMCreatePackage)
103 | include(ROCMInstallTargets)
104 | include(ROCMPackageConfigHelpers)
105 | include(ROCMInstallSymlinks)
106 | include(ROCMCheckTargetIds)
107 | include(ROCMClients)
108 |
--------------------------------------------------------------------------------
/cmake/FindLIBNUMA.cmake:
--------------------------------------------------------------------------------
1 | # Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
2 | #
3 | # Redistribution and use in source and binary forms, with or without modification,
4 | # are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
9 | # this list of conditions and the following disclaimer in the documentation
10 | # and/or other materials provided with the distribution.
11 | # 3. Neither the name of the copyright holder nor the names of its contributors
12 | # may be used to endorse or promote products derived from this software without
13 | # specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 |
26 | find_path(LIBNUMA_INCLUDE_DIR NAMES numa.h
27 | PATHS
28 | ENV
29 | INCLUDE
30 | CPATH
31 | /usr/include)
32 |
33 | find_library(LIBNUMA_LIBRARY NAMES numa
34 | PATHS
35 | ENV
36 | LD_LIBRARY_PATH
37 | /usr/lib/x86_64-linux-gnu)
38 |
39 | if(LIBNUMA_INCLUDE_DIR AND LIBNUMA_LIBRARY)
40 | set(LIBNUMA_FOUND TRUE)
41 | else()
42 | set(LIBNUMA_FOUND FALSE)
43 | endif()
44 |
45 | if(NOT TARGET libnuma::libnuma)
46 | add_library(libnuma::libnuma INTERFACE IMPORTED)
47 | endif()
48 |
49 | set_property(TARGET libnuma::libnuma PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${LIBNUMA_INCLUDE_DIR}")
50 | set_property(TARGET libnuma::libnuma PROPERTY INTERFACE_LINK_LIBRARIES "${LIBNUMA_LIBRARY}")
51 |
52 | include(FindPackageHandleStandardArgs)
53 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBNUMA DEFAULT_MSG
54 | LIBNUMA_LIBRARY
55 | LIBNUMA_INCLUDE_DIR)
56 |
57 | mark_as_advanced(LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARY)
58 |
--------------------------------------------------------------------------------
/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | # Modifications (c) 2019 Advanced Micro Devices, Inc.
2 | #
3 | # Redistribution and use in source and binary forms, with or without modification,
4 | # are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice, this
7 | # list of conditions and the following disclaimer.
8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
9 | # this list of conditions and the following disclaimer in the documentation
10 | # and/or other materials provided with the distribution.
11 | # 3. Neither the name of the copyright holder nor the names of its contributors
12 | # may be used to endorse or promote products derived from this software without
13 | # specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 |
26 | # TODO: move this function to https://github.com/RadeonOpenCompute/rocm-cmake/blob/master/share/rocm/cmake/ROCMSetupVersion.cmake
27 |
28 | macro(rocm_set_parent VAR)
29 | set(${VAR} ${ARGN} PARENT_SCOPE)
30 | set(${VAR} ${ARGN})
31 | endmacro()
32 |
33 | function(rocm_get_git_commit_id OUTPUT_VERSION)
34 | set(options)
35 | set(oneValueArgs VERSION DIRECTORY)
36 | set(multiValueArgs)
37 |
38 | cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
39 |
40 | set(_version ${PARSE_VERSION})
41 |
42 | set(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
43 | if(PARSE_DIRECTORY)
44 | set(DIRECTORY ${PARSE_DIRECTORY})
45 | endif()
46 |
47 | find_program(GIT NAMES git)
48 |
49 | if(GIT)
50 | set(GIT_COMMAND ${GIT} describe --dirty --long --match [0-9]*)
51 | execute_process(COMMAND ${GIT_COMMAND}
52 | WORKING_DIRECTORY ${DIRECTORY}
53 | OUTPUT_VARIABLE GIT_TAG_VERSION
54 | OUTPUT_STRIP_TRAILING_WHITESPACE
55 | RESULT_VARIABLE RESULT
56 | ERROR_QUIET)
57 | if(${RESULT} EQUAL 0)
58 | set(_version ${GIT_TAG_VERSION})
59 | else()
60 | execute_process(COMMAND ${GIT_COMMAND} --always
61 | WORKING_DIRECTORY ${DIRECTORY}
62 | OUTPUT_VARIABLE GIT_TAG_VERSION
63 | OUTPUT_STRIP_TRAILING_WHITESPACE
64 | RESULT_VARIABLE RESULT
65 | ERROR_QUIET)
66 | if(${RESULT} EQUAL 0)
67 | set(_version ${GIT_TAG_VERSION})
68 | endif()
69 | endif()
70 | endif()
71 | rocm_set_parent(${OUTPUT_VERSION} ${_version})
72 | endfunction()
73 |
--------------------------------------------------------------------------------
/docker/dockerfile-build-centos:
--------------------------------------------------------------------------------
1 | # Parameters related to building rocHPCG
2 | ARG base_image
3 |
4 | FROM ${base_image}
5 | LABEL maintainer="rochpcg-maintainer@amd.com"
6 |
7 | ARG user_uid
8 |
9 | ARG library_dependencies="rocprim"
10 |
11 | # Install dependent packages
12 | RUN yum install -y \
13 | sudo \
14 | centos-release-scl \
15 | devtoolset-7 \
16 | ca-certificates \
17 | git \
18 | cmake3 \
19 | make \
20 | clang \
21 | clang-devel \
22 | gcc-c++ \
23 | pkgconfig \
24 | libcxx-devel \
25 | numactl-libs \
26 | rpm-build \
27 | deltarpm \
28 | ${library_dependencies}
29 |
30 | RUN echo '#!/bin/bash' | tee /etc/profile.d/devtoolset7.sh && echo \
31 | 'source scl_source enable devtoolset-7' >>/etc/profile.d/devtoolset7.sh
32 |
33 | # docker pipeline runs containers with particular uid
34 | # create a jenkins user with this specific uid so it can use sudo priviledges
35 | # Grant any member of sudo group password-less sudo privileges
36 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \
37 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \
38 | chmod 400 /etc/sudoers.d/sudo-nopasswd
39 |
40 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest
41 |
42 | # Clone gtest repo
43 | # Build gtest and install into /usr/local
44 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \
45 | git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \
46 | mkdir -p build && cd build && \
47 | cmake .. && \
48 | make -j $(nproc) install && \
49 | rm -rf ${GTEST_SRC_ROOT}
--------------------------------------------------------------------------------
/docker/dockerfile-build-sles:
--------------------------------------------------------------------------------
1 | # Parameters related to building rocHPCG
2 | ARG base_image
3 |
4 | FROM ${base_image}
5 | LABEL maintainer="rochpcg-maintainer@amd.com"
6 |
7 | ARG user_uid
8 |
9 | ARG library_dependencies="rocprim"
10 |
11 | # Install dependent packages
12 | RUN zypper -n update && zypper -n install\
13 | sudo \
14 | ca-certificates \
15 | git \
16 | gcc-c++ \
17 | gcc-fortran \
18 | make \
19 | cmake \
20 | rpm-build \
21 | dpkg \
22 | libcxxtools9 \
23 | ${library_dependencies}
24 |
25 | # docker pipeline runs containers with particular uid
26 | # create a jenkins user with this specific uid so it can use sudo priviledges
27 | # Grant any member of sudo group password-less sudo privileges
28 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \
29 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \
30 | chmod 400 /etc/sudoers.d/sudo-nopasswd
31 |
32 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest
33 |
34 | # Clone gtest repo
35 | # Build gtest and install into /usr/local
36 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \
37 | git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \
38 | mkdir -p build && cd build && \
39 | cmake .. && \
40 | make -j $(nproc) install && \
41 | rm -rf ${GTEST_SRC_ROOT}
--------------------------------------------------------------------------------
/docker/dockerfile-build-ubuntu-rock:
--------------------------------------------------------------------------------
1 | # Parameters related to building rocHPCG
2 | ARG base_image
3 |
4 | FROM ${base_image}
5 | LABEL maintainer="rochpcg-maintainer@amd.com"
6 |
7 | ARG user_uid
8 |
9 | ARG library_dependencies="rocblas rocsolver"
10 |
11 | # Install dependent packages
12 | # Dependencies:
13 | # * hcc-config.cmake: pkg-config
14 | # * rochpcg-test: googletest rocprim
15 | # * libhsakmt.so: libnuma1 libnuma-dev
16 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
17 | sudo \
18 | ca-certificates \
19 | git \
20 | make \
21 | cmake \
22 | pkg-config \
23 | libnuma1 \
24 | libnuma-dev \
25 | mpi-default-bin \
26 | mpi-default-dev \
27 | libomp-dev \
28 | ${library_dependencies} \
29 | && \
30 | apt-get clean && \
31 | rm -rf /var/lib/apt/lists/*
32 |
33 | # docker pipeline runs containers with particular uid
34 | # create a jenkins user with this specific uid so it can use sudo priviledges
35 | # Grant any member of video group password-less sudo privileges
36 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \
37 | mkdir -p /etc/sudoers.d/ && \
38 | echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd
39 |
40 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest
41 |
42 | # Clone gtest repo
43 | # Build gtest and install into /usr/local
44 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \
45 | git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \
46 | mkdir -p build && cd build && \
47 | cmake .. && \
48 | make -j $(nproc) install && \
49 | rm -rf ${GTEST_SRC_ROOT}
50 |
--------------------------------------------------------------------------------
/rtest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 0.1
5 | rochpcg-test --gtest_color=yes --gtest_output=xml
6 | *
7 |
8 | {COMMAND}:output_psdb.xml
9 |
10 |
11 | {COMMAND}:output_osdb.xml
12 |
13 |
14 | {COMMAND}:output_custom.xml --gtest_filter={GTEST_FILTER}
15 |
16 |
17 |
--------------------------------------------------------------------------------
/src/CG.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | /* ************************************************************************
16 | * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 | *
18 | * Redistribution and use in source and binary forms, with or without modification,
19 | * are permitted provided that the following conditions are met:
20 | *
21 | * 1. Redistributions of source code must retain the above copyright notice, this
22 | * list of conditions and the following disclaimer.
23 | * 2. Redistributions in binary form must reproduce the above copyright notice,
24 | * this list of conditions and the following disclaimer in the documentation
25 | * and/or other materials provided with the distribution.
26 | * 3. Neither the name of the copyright holder nor the names of its contributors
27 | * may be used to endorse or promote products derived from this software without
28 | * specific prior written permission.
29 | *
30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 | * POSSIBILITY OF SUCH DAMAGE.
40 | *
41 | * ************************************************************************ */
42 |
43 | #ifndef CG_HPP
44 | #define CG_HPP
45 |
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | #include "CGData.hpp"
49 |
50 | int CG(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x,
51 | const int max_iter, const double tolerance, int & niters, double & normr, double & normr0,
52 | double * times, bool doPreconditioning, bool verbose);
53 |
54 | // this function will compute the Conjugate Gradient iterations.
55 | // geom - Domain and processor topology information
56 | // A - Matrix
57 | // b - constant
58 | // x - used for return value
59 | // max_iter - how many times we iterate
60 | // tolerance - Stopping tolerance for preconditioned iterations.
61 | // niters - number of iterations performed
62 | // normr - computed residual norm
63 | // normr0 - Original residual
64 | // times - array of timing information
65 | // doPreconditioning - bool to specify whether or not symmetric GS will be applied.
66 |
67 | #endif // CG_HPP
68 |
--------------------------------------------------------------------------------
/src/CGData.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | /* ************************************************************************
16 | * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 | *
18 | * Redistribution and use in source and binary forms, with or without modification,
19 | * are permitted provided that the following conditions are met:
20 | *
21 | * 1. Redistributions of source code must retain the above copyright notice, this
22 | * list of conditions and the following disclaimer.
23 | * 2. Redistributions in binary form must reproduce the above copyright notice,
24 | * this list of conditions and the following disclaimer in the documentation
25 | * and/or other materials provided with the distribution.
26 | * 3. Neither the name of the copyright holder nor the names of its contributors
27 | * may be used to endorse or promote products derived from this software without
28 | * specific prior written permission.
29 | *
30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 | * POSSIBILITY OF SUCH DAMAGE.
40 | *
41 | * ************************************************************************ */
42 |
43 | /*!
44 | @file CGData.hpp
45 |
46 | HPCG data structure
47 | */
48 |
49 | #ifndef CGDATA_HPP
50 | #define CGDATA_HPP
51 |
52 | #include "SparseMatrix.hpp"
53 | #include "Vector.hpp"
54 |
55 | struct CGData_STRUCT {
56 | Vector r; //!< pointer to residual vector
57 | Vector z; //!< pointer to preconditioned residual vector
58 | Vector p; //!< pointer to direction vector
59 | Vector Ap; //!< pointer to Krylov vector
60 | };
61 | typedef struct CGData_STRUCT CGData;
62 |
63 | /*!
64 | Constructor for the data structure of CG vectors.
65 |
66 | @param[in] A the data structure that describes the problem matrix and its structure
67 | @param[out] data the data structure for CG vectors that will be allocated to get it ready for use in CG iterations
68 | */
69 | inline void InitializeSparseCGData(SparseMatrix & A, CGData & data) {
70 | local_int_t nrow = A.localNumberOfRows;
71 | local_int_t ncol = A.localNumberOfColumns;
72 | InitializeVector(data.r, nrow);
73 | InitializeVector(data.z, ncol);
74 | InitializeVector(data.p, ncol);
75 | InitializeVector(data.Ap, nrow);
76 | return;
77 | }
78 |
79 | inline void HIPInitializeSparseCGData(SparseMatrix& A, CGData& data)
80 | {
81 | HIPInitializeVector(data.r, A.localNumberOfRows);
82 | HIPInitializeVector(data.z, A.localNumberOfColumns);
83 | HIPInitializeVector(data.p, A.localNumberOfColumns);
84 | HIPInitializeVector(data.Ap, A.localNumberOfRows);
85 | }
86 |
87 | /*!
88 | Destructor for the CG vectors data.
89 |
90 | @param[inout] data the CG vectors data structure whose storage is deallocated
91 | */
92 | inline void DeleteCGData(CGData & data) {
93 |
94 | DeleteVector (data.r);
95 | DeleteVector (data.z);
96 | DeleteVector (data.p);
97 | DeleteVector (data.Ap);
98 | return;
99 | }
100 |
101 | inline void HIPDeleteCGData(CGData& data)
102 | {
103 | HIPDeleteVector (data.r);
104 | HIPDeleteVector (data.z);
105 | HIPDeleteVector (data.p);
106 | HIPDeleteVector (data.Ap);
107 | }
108 |
109 | #endif // CGDATA_HPP
110 |
111 |
--------------------------------------------------------------------------------
/src/CG_ref.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | /* ************************************************************************
16 | * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 | *
18 | * Redistribution and use in source and binary forms, with or without modification,
19 | * are permitted provided that the following conditions are met:
20 | *
21 | * 1. Redistributions of source code must retain the above copyright notice, this
22 | * list of conditions and the following disclaimer.
23 | * 2. Redistributions in binary form must reproduce the above copyright notice,
24 | * this list of conditions and the following disclaimer in the documentation
25 | * and/or other materials provided with the distribution.
26 | * 3. Neither the name of the copyright holder nor the names of its contributors
27 | * may be used to endorse or promote products derived from this software without
28 | * specific prior written permission.
29 | *
30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 | * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 | * POSSIBILITY OF SUCH DAMAGE.
40 | *
41 | * ************************************************************************ */
42 |
43 | #ifndef CG_REF_HPP
44 | #define CG_REF_HPP
45 |
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | #include "CGData.hpp"
49 |
50 | int CG_ref(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x,
51 | const int max_iter, const double tolerance, int & niters, double & normr, double & normr0,
52 | double * times, bool doPreconditioning, bool verbose);
53 |
54 | // this function will compute the Conjugate Gradient iterations.
55 | // geom - Domain and processor topology information
56 | // A - Matrix
57 | // b - constant
58 | // x - used for return value
59 | // max_iter - how many times we iterate
60 | // tolerance - Stopping tolerance for preconditioned iterations.
61 | // niters - number of iterations performed
62 | // normr - computed residual norm
63 | // normr0 - Original residual
64 | // times - array of timing information
65 | // doPreconditioning - bool to specify whether or not symmetric GS will be applied.
66 |
67 | #endif // CG_REF_HPP
68 |
--------------------------------------------------------------------------------
/src/CheckAspectRatio.cpp:
--------------------------------------------------------------------------------
1 | //@HEADER
2 | // ***************************************************
3 | //
4 | // HPCG: High Performance Conjugate Gradient Benchmark
5 | //
6 | // Contact:
7 | // Michael A. Heroux ( maherou@sandia.gov)
8 | // Jack Dongarra (dongarra@eecs.utk.edu)
9 | // Piotr Luszczek (luszczek@eecs.utk.edu)
10 | //
11 | // ***************************************************
12 | //@HEADER
13 |
14 | /*!
15 | @file CheckAspectRatio.cpp
16 |
17 | HPCG routine
18 | */
19 |
20 | #include
21 |
22 | #ifndef HPCG_NO_MPI
23 | #include
24 | #endif
25 |
26 | #include "hpcg.hpp"
27 |
28 | #include "CheckAspectRatio.hpp"
29 |
30 | int
31 | CheckAspectRatio(double smallest_ratio, int x, int y, int z, const char *what, bool DoIo) {
32 | double current_ratio = std::min(std::min(x, y), z) / double(std::max(std::max(x, y), z));
33 |
34 | if (current_ratio < smallest_ratio) { // ratio of the smallest to the largest
35 | if (DoIo) {
36 | HPCG_fout << "The " << what << " sizes (" << x << "," << y << "," << z <<
37 | ") are invalid because the ratio min(x,y,z)/max(x,y,z)=" << current_ratio <<
38 | " is too small (at least " << smallest_ratio << " is required)." << std::endl;
39 | HPCG_fout << "The shape should resemble a 3D cube. Please adjust and try again." << std::endl;
40 | HPCG_fout.flush();
41 | }
42 |
43 | #ifndef HPCG_NO_MPI
44 | MPI_Abort(MPI_COMM_WORLD, 127);
45 | #endif
46 |
47 | return 127;
48 | }
49 |
50 | return 0;
51 | }
52 |
--------------------------------------------------------------------------------
/src/CheckAspectRatio.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | #ifndef CHECKASPECTRATIO_HPP
16 | #define CHECKASPECTRATIO_HPP
17 | extern int CheckAspectRatio(double smallest_ratio, int x, int y, int z, const char *what, bool DoIo);
18 | #endif // CHECKASPECTRATIO_HPP
19 |
20 |
--------------------------------------------------------------------------------
/src/CheckProblem.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | #ifndef CHECKPROBLEM_HPP
16 | #define CHECKPROBLEM_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 |
20 | void CheckProblem(SparseMatrix & A, Vector * b, Vector * x, Vector * xexact);
21 | #endif // CHECKPROBLEM_HPP
22 |
--------------------------------------------------------------------------------
/src/ComputeDotProduct.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | #ifndef COMPUTEDOTPRODUCT_HPP
16 | #define COMPUTEDOTPRODUCT_HPP
17 | #include "Vector.hpp"
18 | int ComputeDotProduct(const local_int_t n, const Vector & x, const Vector & y,
19 | double & result, double & time_allreduce, bool & isOptimized);
20 |
21 | #endif // COMPUTEDOTPRODUCT_HPP
22 |
--------------------------------------------------------------------------------
/src/ComputeDotProduct_ref.cpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | /*!
16 | @file ComputeDotProduct_ref.cpp
17 |
18 | HPCG routine
19 | */
20 |
21 | #ifndef HPCG_NO_MPI
22 | #include
23 | #include "mytimer.hpp"
24 | #endif
25 | #ifndef HPCG_NO_OPENMP
26 | #include
27 | #endif
28 | #include
29 | #include "ComputeDotProduct_ref.hpp"
30 |
31 | /*!
32 | Routine to compute the dot product of two vectors where:
33 |
34 | This is the reference dot-product implementation. It _CANNOT_ be modified for the
35 | purposes of this benchmark.
36 |
37 | @param[in] n the number of vector elements (on this processor)
38 | @param[in] x, y the input vectors
39 | @param[in] result a pointer to scalar value, on exit will contain result.
40 | @param[out] time_allreduce the time it took to perform the communication between processes
41 |
42 | @return returns 0 upon success and non-zero otherwise
43 |
44 | @see ComputeDotProduct
45 | */
46 | int ComputeDotProduct_ref(const local_int_t n, const Vector & x, const Vector & y,
47 | double & result, double & time_allreduce) {
48 | assert(x.localLength>=n); // Test vector lengths
49 | assert(y.localLength>=n);
50 |
51 | double local_result = 0.0;
52 | double * xv = x.values;
53 | double * yv = y.values;
54 | if (yv==xv) {
55 | #ifndef HPCG_NO_OPENMP
56 | #pragma omp parallel for reduction (+:local_result)
57 | #endif
58 | for (local_int_t i=0; inumberOfPresmootherSteps;
73 |
74 | for(int i = 1; i < numberOfPresmootherSteps; ++i)
75 | {
76 | RETURN_IF_HPCG_ERROR(ComputeSYMGS(A, r, x));
77 | }
78 |
79 | #ifndef HPCG_REFERENCE
80 | RETURN_IF_HPCG_ERROR(ComputeFusedSpMVRestriction(A, r, x));
81 | #else
82 | RETURN_IF_HPCG_ERROR(ComputeSPMV(A, x, *A.mgData->Axf));
83 | RETURN_IF_HPCG_ERROR(ComputeRestriction(A, r));
84 | #endif
85 |
86 | RETURN_IF_HPCG_ERROR(ComputeMG(*A.Ac, *A.mgData->rc, *A.mgData->xc));
87 | RETURN_IF_HPCG_ERROR(ComputeProlongation(A, x));
88 |
89 | int numberOfPostsmootherSteps = A.mgData->numberOfPostsmootherSteps;
90 |
91 | for(int i = 0; i < numberOfPostsmootherSteps; ++i)
92 | {
93 | RETURN_IF_HPCG_ERROR(ComputeSYMGS(A, r, x));
94 | }
95 | }
96 | else
97 | {
98 | RETURN_IF_HPCG_ERROR(ComputeSYMGSZeroGuess(A, r, x));
99 | }
100 |
101 | return 0;
102 | }
103 |
--------------------------------------------------------------------------------
/src/ComputeMG.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | #ifndef COMPUTEMG_HPP
16 | #define COMPUTEMG_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 |
20 | int ComputeMG(const SparseMatrix & A, const Vector & r, Vector & x);
21 |
22 | #endif // COMPUTEMG_HPP
23 |
--------------------------------------------------------------------------------
/src/ComputeMG_ref.cpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | /*!
16 | @file ComputeSYMGS_ref.cpp
17 |
18 | HPCG routine
19 | */
20 |
21 | #include "ComputeMG_ref.hpp"
22 | #include "ComputeSYMGS_ref.hpp"
23 | #include "ComputeSPMV_ref.hpp"
24 | #include "ComputeRestriction_ref.hpp"
25 | #include "ComputeProlongation_ref.hpp"
26 | #include
27 | #include
28 |
29 | /*!
30 |
31 | @param[in] A the known system matrix
32 | @param[in] r the input vector
33 | @param[inout] x On exit contains the result of the multigrid V-cycle with r as the RHS, x is the approximation to Ax = r.
34 |
35 | @return returns 0 upon success and non-zero otherwise
36 |
37 | @see ComputeMG
38 | */
39 | int ComputeMG_ref(const SparseMatrix & A, const Vector & r, Vector & x) {
40 | assert(x.localLength==A.localNumberOfColumns); // Make sure x contain space for halo values
41 |
42 | ZeroVector(x); // initialize x to zero
43 |
44 | int ierr = 0;
45 | if (A.mgData!=0) { // Go to next coarse level if defined
46 | int numberOfPresmootherSteps = A.mgData->numberOfPresmootherSteps;
47 | for (int i=0; i< numberOfPresmootherSteps; ++i) ierr += ComputeSYMGS_ref(A, r, x);
48 | if (ierr!=0) return ierr;
49 | ierr = ComputeSPMV_ref(A, x, *A.mgData->Axf); if (ierr!=0) return ierr;
50 | // Perform restriction operation using simple injection
51 | ierr = ComputeRestriction_ref(A, r); if (ierr!=0) return ierr;
52 | ierr = ComputeMG_ref(*A.Ac,*A.mgData->rc, *A.mgData->xc); if (ierr!=0) return ierr;
53 | ierr = ComputeProlongation_ref(A, x); if (ierr!=0) return ierr;
54 | int numberOfPostsmootherSteps = A.mgData->numberOfPostsmootherSteps;
55 | for (int i=0; i< numberOfPostsmootherSteps; ++i) ierr += ComputeSYMGS_ref(A, r, x);
56 | if (ierr!=0) return ierr;
57 | }
58 | else {
59 | ierr = ComputeSYMGS_ref(A, r, x);
60 | if (ierr!=0) return ierr;
61 | }
62 | return 0;
63 | }
64 |
65 |
--------------------------------------------------------------------------------
/src/ComputeMG_ref.hpp:
--------------------------------------------------------------------------------
1 |
2 | //@HEADER
3 | // ***************************************************
4 | //
5 | // HPCG: High Performance Conjugate Gradient Benchmark
6 | //
7 | // Contact:
8 | // Michael A. Heroux ( maherou@sandia.gov)
9 | // Jack Dongarra (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 |
15 | #ifndef COMPUTEMG_REF_HPP
16 | #define COMPUTEMG_REF_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 |
20 | int ComputeMG_ref(const SparseMatrix & A, const Vector & r, Vector & x);
21 |
22 | #endif // COMPUTEMG_REF_HPP
23 |
--------------------------------------------------------------------------------
/src/ComputeOptimalShapeXYZ.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include
3 | #include
4 |
5 | #ifdef HPCG_CUBIC_RADICAL_SEARCH
6 | #include
7 | #endif
8 | #include