├── .clang-format
├── .github
    ├── CODEOWNERS
    ├── CONTRIBUTING.md
    └── workflows
    │   └── docs.yaml
├── .gitignore
├── .jenkins
    ├── common.groovy
    └── precheckin.groovy
├── CMakeLists.txt
├── LICENSE.md
├── NOTICES.txt
├── README.md
├── cmake
    ├── Dependencies.cmake
    ├── FindLIBNUMA.cmake
    └── version.cmake
├── docker
    ├── dockerfile-build-centos
    ├── dockerfile-build-sles
    └── dockerfile-build-ubuntu-rock
├── install.sh
├── rtest.xml
└── src
    ├── CG.cpp
    ├── CG.hpp
    ├── CGData.hpp
    ├── CG_ref.cpp
    ├── CG_ref.hpp
    ├── CMakeLists.txt
    ├── CheckAspectRatio.cpp
    ├── CheckAspectRatio.hpp
    ├── CheckProblem.cpp
    ├── CheckProblem.hpp
    ├── ComputeDotProduct.cpp
    ├── ComputeDotProduct.hpp
    ├── ComputeDotProduct_ref.cpp
    ├── ComputeDotProduct_ref.hpp
    ├── ComputeMG.cpp
    ├── ComputeMG.hpp
    ├── ComputeMG_ref.cpp
    ├── ComputeMG_ref.hpp
    ├── ComputeOptimalShapeXYZ.cpp
    ├── ComputeOptimalShapeXYZ.hpp
    ├── ComputeProlongation.cpp
    ├── ComputeProlongation.hpp
    ├── ComputeProlongation_ref.cpp
    ├── ComputeProlongation_ref.hpp
    ├── ComputeResidual.cpp
    ├── ComputeResidual.hpp
    ├── ComputeResidual_ref.cpp
    ├── ComputeResidual_ref.hpp
    ├── ComputeRestriction.cpp
    ├── ComputeRestriction.hpp
    ├── ComputeRestriction_ref.cpp
    ├── ComputeRestriction_ref.hpp
    ├── ComputeSPMV.cpp
    ├── ComputeSPMV.hpp
    ├── ComputeSPMV_ref.cpp
    ├── ComputeSPMV_ref.hpp
    ├── ComputeSYMGS.cpp
    ├── ComputeSYMGS.hpp
    ├── ComputeSYMGS_ref.cpp
    ├── ComputeSYMGS_ref.hpp
    ├── ComputeWAXPBY.cpp
    ├── ComputeWAXPBY.hpp
    ├── ComputeWAXPBY_ref.cpp
    ├── ComputeWAXPBY_ref.hpp
    ├── ExchangeHalo.cpp
    ├── ExchangeHalo.hpp
    ├── GenerateCoarseProblem.cpp
    ├── GenerateCoarseProblem.hpp
    ├── GenerateGeometry.cpp
    ├── GenerateGeometry.hpp
    ├── GenerateProblem.cpp
    ├── GenerateProblem.hpp
    ├── GenerateProblem_ref.cpp
    ├── GenerateProblem_ref.hpp
    ├── Geometry.hpp
    ├── MGData.hpp
    ├── Memory.cpp
    ├── Memory.hpp
    ├── MixedBaseCounter.cpp
    ├── MixedBaseCounter.hpp
    ├── MultiColoring.cpp
    ├── MultiColoring.hpp
    ├── OptimizeProblem.cpp
    ├── OptimizeProblem.hpp
    ├── OutputFile.cpp
    ├── OutputFile.hpp
    ├── Permute.cpp
    ├── Permute.hpp
    ├── ReadHpcgDat.cpp
    ├── ReadHpcgDat.hpp
    ├── ReportResults.cpp
    ├── ReportResults.hpp
    ├── SetupHalo.cpp
    ├── SetupHalo.hpp
    ├── SetupHalo_ref.cpp
    ├── SetupHalo_ref.hpp
    ├── SparseMatrix.cpp
    ├── SparseMatrix.hpp
    ├── TestCG.cpp
    ├── TestCG.hpp
    ├── TestNorms.cpp
    ├── TestNorms.hpp
    ├── TestSymmetry.cpp
    ├── TestSymmetry.hpp
    ├── Vector.hpp
    ├── Version.hpp.in
    ├── WriteProblem.cpp
    ├── WriteProblem.hpp
    ├── YAML_Doc.cpp
    ├── YAML_Doc.hpp
    ├── YAML_Element.cpp
    ├── YAML_Element.hpp
    ├── finalize.cpp
    ├── hpcg.hpp
    ├── init.cpp
    ├── main.cpp
    ├── mytimer.cpp
    ├── mytimer.hpp
    ├── rochpcg_gtest_main.cpp
    ├── test_rochpcg.cpp
    ├── test_rochpcg.hpp
    └── utils.hpp


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | Language:        Cpp
 3 | AccessModifierOffset: 0
 4 | AlignAfterOpenBracket: Align
 5 | AlignConsecutiveAssignments: true
 6 | AlignConsecutiveDeclarations: false
 7 | AlignEscapedNewlinesLeft: true
 8 | AlignOperands:   true
 9 | AlignTrailingComments: true
10 | AllowAllParametersOfDeclarationOnNextLine: true
11 | AllowShortBlocksOnASingleLine: true
12 | AllowShortCaseLabelsOnASingleLine: true
13 | AllowShortFunctionsOnASingleLine: All
14 | AllowShortIfStatementsOnASingleLine: false
15 | AllowShortLoopsOnASingleLine: false
16 | AlwaysBreakAfterDefinitionReturnType: None
17 | AlwaysBreakAfterReturnType: None
18 | AlwaysBreakBeforeMultilineStrings: false
19 | AlwaysBreakTemplateDeclarations: true
20 | BinPackArguments: false
21 | BinPackParameters: false
22 | BraceWrapping:   
23 |   AfterClass:      true
24 |   AfterControlStatement: true
25 |   AfterEnum:       true
26 |   AfterFunction:   true
27 |   AfterNamespace:  false
28 |   AfterObjCDeclaration: true
29 |   AfterStruct:     true
30 |   AfterUnion:      true
31 |   BeforeCatch:     true
32 |   BeforeElse:      true
33 |   IndentBraces:    false
34 | BreakBeforeBinaryOperators: None
35 | BreakBeforeBraces: Custom
36 | BreakBeforeTernaryOperators: true
37 | BreakConstructorInitializersBeforeComma: false
38 | ColumnLimit:     100
39 | CommentPragmas:  '^ IWYU pragma:'
40 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
41 | ConstructorInitializerIndentWidth: 4
42 | ContinuationIndentWidth: 4
43 | Cpp11BracedListStyle: true
44 | DerivePointerAlignment: false
45 | DisableFormat:   false
46 | ExperimentalAutoDetectBinPacking: false
47 | ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
48 | IncludeCategories: 
49 |   - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
50 |     Priority:        2
51 |   - Regex:           '^(<|"(gtest|isl|json)/)'
52 |     Priority:        3
53 |   - Regex:           '.*'
54 |     Priority:        1
55 | IndentCaseLabels: false
56 | IndentWidth:     4
57 | IndentWrappedFunctionNames: false
58 | KeepEmptyLinesAtTheStartOfBlocks: true
59 | MacroBlockBegin: ''
60 | MacroBlockEnd:   ''
61 | MaxEmptyLinesToKeep: 1
62 | NamespaceIndentation: None
63 | ObjCBlockIndentWidth: 2
64 | ObjCSpaceAfterProperty: false
65 | ObjCSpaceBeforeProtocolList: true
66 | PenaltyBreakBeforeFirstCallParameter: 19
67 | PenaltyBreakComment: 300
68 | PenaltyBreakFirstLessLess: 120
69 | PenaltyBreakString: 1000
70 | PenaltyExcessCharacter: 1000000
71 | PenaltyReturnTypeOnItsOwnLine: 60
72 | PointerAlignment: Left
73 | ReflowComments:  true
74 | SortIncludes:    false
75 | SpaceAfterCStyleCast: false
76 | # SpaceAfterTemplateKeyword: true
77 | SpaceBeforeAssignmentOperators: true
78 | SpaceBeforeParens: Never
79 | SpaceInEmptyParentheses: false
80 | SpacesBeforeTrailingComments: 1
81 | SpacesInAngles:  false
82 | SpacesInContainerLiterals: true
83 | SpacesInCStyleCastParentheses: false
84 | SpacesInParentheses: false
85 | SpacesInSquareBrackets: false
86 | Standard:        c++17
87 | TabWidth:        8
88 | UseTab:          Never
89 | ...
90 | 
91 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @ntrost57 @YvanMokwinski @jsandham
2 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
 1 | name: Upload to the upload server
 2 | 
 3 | # Controls when the workflow will run
 4 | on:
 5 |   push:
 6 |     branches: [develop, master]
 7 |     tags:
 8 |       - rocm-5.*
 9 |   release:
10 |     types: [published]
11 | 
12 |   # Allows you to run this workflow manually from the Actions tab
13 |   workflow_dispatch:
14 | 
15 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
16 | jobs:
17 |   # This workflow contains a single job called "build"
18 |   build:
19 |     # The type of runner that the job will run on
20 |     runs-on: ubuntu-latest
21 | 
22 |     # Steps represent a sequence of tasks that will be executed as part of the job
23 |     steps:
24 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
25 |       - uses: actions/checkout@v2
26 | 
27 |       - name: getting branch name
28 |         shell: bash
29 |         run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
30 |         id: branch_name
31 |       - name: getting tag name
32 |         shell: bash
33 |         run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})"
34 |         id: tag_name
35 |       - name: zipping files
36 |         run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*'
37 |       - name: echo-step
38 |         run: echo "${{ github.event.release.target_commitish }}"
39 |       - name: uploading archive to prod
40 |         if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}}
41 |         uses: wlixcc/SFTP-Deploy-Action@v1.0
42 |         with:
43 |           username: ${{ secrets.USERNAME }}
44 |           server: ${{ secrets.SERVER }}
45 |           ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
46 |           local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
47 |           remote_path: '${{ secrets.PROD_UPLOAD_URL }}'
48 |           args: '-o ConnectTimeout=5'
49 |       - name: uploading archive to staging
50 |         if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }}
51 |         uses: wlixcc/SFTP-Deploy-Action@v1.0
52 |         with:
53 |           username: ${{ secrets.USERNAME }}
54 |           server: ${{ secrets.SERVER }}
55 |           ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
56 |           local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
57 |           remote_path: '${{ secrets.STG_UPLOAD_URL }}'
58 |           args: '-o ConnectTimeout=5'
59 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | 
30 | # vim tags
31 | tags
32 | .tags
33 | .*.swp
34 | 
35 | # Editors
36 | .vscode
37 | 
38 | # build-in-source directory
39 | build
40 | 
41 | # doc directory
42 | docBin
43 | _build
44 | 


--------------------------------------------------------------------------------
/.jenkins/common.groovy:
--------------------------------------------------------------------------------
 1 | // This file is for internal AMD use.
 2 | // If you are interested in running your own Jenkins, please raise a github issue for assistance.
 3 | 
 4 | def runCompileCommand(platform, project, jobName)
 5 | {
 6 |     project.paths.construct_build_prefix()
 7 | 
 8 |     def command
 9 |     def getDependencies = auxiliary.getLibrary('rocPRIM', platform.jenkinsLabel,'develop')
10 |     def compiler = '/opt/rocm/bin/amdclang++'
11 | 
12 |     command = """#!/usr/bin/env bash
13 |                 set -ex
14 |                 ${getDependencies}
15 |                 cd ${project.paths.project_build_prefix}
16 |                 ${project.paths.build_command}
17 |               """
18 | 
19 |     platform.runCommand(this, command)
20 | }
21 | 
22 | def runTestCommand (platform, project)
23 | {
24 |     String sudo = auxiliary.sudo(platform.jenkinsLabel)
25 |     def command = """#!/usr/bin/env bash
26 |                     set -ex
27 |                     cd ${project.paths.project_build_prefix}/build/release/tests
28 |                     ${sudo} ./rochpcg-test --gtest_output=xml --gtest_color=yes
29 |                   """
30 | 
31 |     platform.runCommand(this, command)
32 |     junit "${project.paths.project_build_prefix}/build/release/tests/*.xml"
33 | }
34 | 
35 | return this
36 | 


--------------------------------------------------------------------------------
/.jenkins/precheckin.groovy:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env groovy
 2 | @Library('rocJenkins@pong') _
 3 | import com.amd.project.*
 4 | import com.amd.docker.*
 5 | import java.nio.file.Path;
 6 | 
 7 | def runCI = 
 8 | {
 9 |     nodeDetails, jobName->
10 | 
11 |     def prj = new rocProject('rocHPCG', 'PreCheckin')
12 |     prj.paths.build_command = './install.sh -t --with-openmp=OFF --with-mpi=OFF'
13 | 
14 |     def nodes = new dockerNodes(nodeDetails, jobName, prj)
15 | 
16 |     def commonGroovy
17 | 
18 |     boolean formatCheck = false
19 |      
20 |     def compileCommand =
21 |     {
22 |         platform, project->
23 | 
24 |         commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy"
25 |         commonGroovy.runCompileCommand(platform, project, jobName)
26 |     }
27 | 
28 |     def testCommand =
29 |     {
30 |         platform, project->
31 | 
32 |         commonGroovy.runTestCommand(platform, project)
33 |     }
34 | 
35 |     buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, null)
36 | }
37 | 
38 | ci: { 
39 |     String urlJobName = auxiliary.getTopJobName(env.BUILD_URL)
40 | 
41 |     def propertyList = ["compute-rocm-dkms-no-npi":[], 
42 |                         "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])],
43 |                         "rocm-docker":[]]
44 |     propertyList = auxiliary.appendPropertyList(propertyList)
45 | 
46 |     def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), 
47 |                        "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), 
48 |                        "rocm-docker":([ubuntu16:['gfx900']])]
49 |     jobNameList = auxiliary.appendJobNameList(jobNameList)
50 | 
51 |     propertyList.each 
52 |     {
53 |         jobName, property->
54 |         if (urlJobName == jobName)
55 |             properties(auxiliary.addCommonProperties(property))
56 |     }
57 |     
58 |     jobNameList.each 
59 |     {
60 |         jobName, nodeDetails->
61 |         if (urlJobName == jobName)
62 |             stage(jobName) {
63 |                 runCI(nodeDetails, jobName)
64 |             }
65 |     }
66 | 
67 |     // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901
68 |     if(!jobNameList.keySet().contains(urlJobName))
69 |     {
70 |         properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])]))
71 |         stage(urlJobName) {
72 |             runCI([ubuntu16:['gfx906']], urlJobName)
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Modifications (c) 2019-2023 Advanced Micro Devices, Inc.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without modification,
  4 | # are permitted provided that the following conditions are met:
  5 | #
  6 | # 1. Redistributions of source code must retain the above copyright notice, this
  7 | #    list of conditions and the following disclaimer.
  8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
  9 | #    this list of conditions and the following disclaimer in the documentation
 10 | #    and/or other materials provided with the distribution.
 11 | # 3. Neither the name of the copyright holder nor the names of its contributors
 12 | #    may be used to endorse or promote products derived from this software without
 13 | #    specific prior written permission.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 | # POSSIBILITY OF SUCH DAMAGE.
 25 | 
 26 | cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 27 | 
 28 | # Consider removing this in the future
 29 | # This should appear before the project command, because it does not use FORCE
 30 | set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "Install path prefix, prepended onto install directories")
 31 | 
 32 | # CMake modules
 33 | list(APPEND CMAKE_MODULE_PATH
 34 |      ${CMAKE_CURRENT_SOURCE_DIR}/cmake
 35 |      ${ROCM_PATH}/lib/cmake/hip
 36 |      ${ROCM_PATH}/hip/cmake)
 37 | 
 38 | # Set a default build type if none was specified
 39 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 40 |   message(STATUS "Setting build type to 'Release' as none was specified.")
 41 |   set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE)
 42 |   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
 43 | endif()
 44 | 
 45 | # Honor per-config flags in try_compile() source-file signature. cmake v3.7 and up
 46 | if(POLICY CMP0066)
 47 |   cmake_policy(SET CMP0066 NEW)
 48 | endif()
 49 | 
 50 | # rocHPCG project
 51 | project(rochpcg LANGUAGES CXX)
 52 | 
 53 | # Force library install path to lib (CentOS 7 defaults to lib64)
 54 | set(CMAKE_INSTALL_LIBDIR "lib" CACHE INTERNAL "Installation directory for libraries" FORCE)
 55 | 
 56 | # Build flags
 57 | set(CMAKE_CXX_STANDARD 17)
 58 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 59 | set(CMAKE_CXX_EXTENSIONS OFF)
 60 | 
 61 | # Build options
 62 | option(HPCG_DEBUG "Compile with modest debugging turned on" OFF)
 63 | option(HPCG_DETAILED_DEBUG "Compile with voluminous debugging information turned on" OFF)
 64 | option(HPCG_DETAILED_TIMING "Enable detail timers" OFF)
 65 | option(HPCG_REFERENCE "Build reference mode" OFF)
 66 | option(BUILD_TEST "Build rocHPCG single-node test" OFF)
 67 | 
 68 | # Optimization options
 69 | option(OPT_MEMMGMT "Build with memory management module" ON)
 70 | option(OPT_DEFRAG "Build with memory management defragmentation" ON)
 71 | option(GPU_AWARE_MPI "Enable use of GPU-Aware MPI functionality" OFF)
 72 | 
 73 | # roctx Markers
 74 | option(OPT_ROCTX "Enable rocTX markers" OFF)
 75 | 
 76 | # Dependencies
 77 | include(cmake/Dependencies.cmake)
 78 | 
 79 | # Find HIP package
 80 | find_package(HIP REQUIRED)
 81 | find_package(rocprim REQUIRED)
 82 | 
 83 | # GPU arch targets
 84 | if(AMDGPU_TARGETS AND NOT GPU_TARGETS)
 85 |   message( DEPRECATION "AMDGPU_TARGETS use is deprecated. Use GPU_TARGETS." )
 86 | endif()
 87 | set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "List of specific machine types for library to target")
 88 | if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
 89 |   set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx908")
 90 | endif()
 91 | if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
 92 |   set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx90a")
 93 | endif()
 94 | if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
 95 |   set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx942")
 96 | endif()
 97 | if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
 98 |   set(AMDGPU_TARGETS "${AMDGPU_TARGETS};gfx950")
 99 | endif()
100 | # Don't force, as users should be able to override GPU_TARGETS at the command line if desired
101 | set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to build for")
102 | 
103 | # Setup version
104 | rocm_setup_version(VERSION 0.8.6)
105 | 
106 | # rocHPCG source directory
107 | add_subdirectory(src)
108 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification,
 4 | are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this
 7 |    list of conditions and the following disclaimer.
 8 | 2. Redistributions in binary form must reproduce the above copyright notice,
 9 |    this list of conditions and the following disclaimer in the documentation
10 |    and/or other materials provided with the distribution.
11 | 3. Neither the name of the copyright holder nor the names of its contributors
12 |    may be used to endorse or promote products derived from this software without
13 |    specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/NOTICES.txt:
--------------------------------------------------------------------------------
 1 | Notices and licenses file
 2 | _________________________
 3 | 
 4 | AMD copyrighted code (BSD3) 
 5 | 
 6 | Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
 7 | 
 8 | Redistribution and use in source and binary forms, with or without modification,
 9 | are permitted provided that the following conditions are met:
10 | 
11 |  1. Redistributions of source code must retain the above copyright notice, this
12 |     list of conditions and the following disclaimer.
13 |  2. Redistributions in binary form must reproduce the above copyright notice,
14 |     this list of conditions and the following disclaimer in the documentation
15 |     and/or other materials provided with the distribution.
16 |  3. Neither the name of the copyright holder nor the names of its contributors
17 |     may be used to endorse or promote products derived from this software without
18 |     specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
26 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | 
32 | Dependencies on hpcg-benchmark-hpcg v3.1 (BSD3) 
33 | 
34 | Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
35 | 
36 | Redistribution and use in source and binary forms, with or without modification,
37 | are permitted provided that the following conditions are met:
38 | 
39 |  1. Redistributions of source code must retain the above copyright notice, this
40 |     list of conditions and the following disclaimer.
41 |  2. Redistributions in binary form must reproduce the above copyright notice,
42 |     this list of conditions and the following disclaimer in the documentation
43 |     and/or other materials provided with the distribution.
44 |  3. Neither the name of the copyright holder nor the names of its contributors
45 |     may be used to endorse or promote products derived from this software without
46 |     specific prior written permission.
47 | 
48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
49 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
50 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
52 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
53 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
54 | OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
55 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57 | POSSIBILITY OF SUCH DAMAGE.
58 | 
59 | 
60 | hpcg-benchmark-hpcg v3.1 (BSD3) 
61 | Copyright (c) 2013-2019, hpcg-benchmark
62 | All rights reserved.
63 | 
64 | 
65 | Redistribution and use in source and binary forms, with or without
66 | modification, are permitted provided that the following conditions are met:
67 | 
68 | * Redistributions of source code must retain the above copyright notice, this
69 |   list of conditions and the following disclaimer.
70 | 
71 | * Redistributions in binary form must reproduce the above copyright notice,
72 |   this list of conditions and the following disclaimer in the documentation
73 |   and/or other materials provided with the distribution.
74 | 
75 | * Neither the name of hpcg nor the names of its
76 |   contributors may be used to endorse or promote products derived from
77 |   this software without specific prior written permission.
78 | 
79 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
80 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
81 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
82 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
83 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
84 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
85 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
86 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
87 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
88 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rocHPCG
  2 | rocHPCG is a benchmark based on the [HPCG][] benchmark application, implemented on top of AMD's Radeon Open eCosystem Platform [ROCm][] runtime and toolchains. rocHPCG is created using the [HIP][] programming language and optimized for AMD's latest discrete GPUs.
  3 | 
  4 | ## Requirements
  5 | * Git
  6 | * CMake (3.10 or later)
  7 | * MPI
  8 | * NUMA library
  9 | * AMD [ROCm] platform (4.1 or later)
 10 | * [rocPRIM][]
 11 | * googletest (for test application only)
 12 | 
 13 | ## Quickstart rocHPCG build and install
 14 | 
 15 | #### Install script
 16 | You can build rocHPCG using the *install.sh* script
 17 | ```
 18 | # Clone rocHPCG using git
 19 | git clone https://github.com/ROCmSoftwarePlatform/rocHPCG.git
 20 | 
 21 | # Go to rocHPCG directory
 22 | cd rocHPCG
 23 | 
 24 | # Run install.sh script
 25 | # Command line options:
 26 | #    -h|--help         - prints this help message
 27 | #    -i|--install      - install after build
 28 | #    -d|--dependencies - install dependencies
 29 | #    -r|--reference    - reference mode
 30 | #    -g|--debug        - -DCMAKE_BUILD_TYPE=Debug (default: Release)
 31 | #    -t|--test         - build single GPU test
 32 | #    --with-rocm=<dir> - Path to ROCm install (default: /opt/rocm)
 33 | #    --with-mpi=<dir>  - Path to external MPI install (Default: clone+build OpenMPI v4.1.0 in deps/)
 34 | #    --with-openmp     - compile with OpenMP support (default: enabled)
 35 | #    --with-memmgmt    - compile with smart memory management (default: enabled)
 36 | #    --with-memdefrag  - compile with memory defragmentation (defaut: enabled)
 37 | ./install.sh -di
 38 | ```
 39 | By default, [UCX] v1.10.0 and [OpenMPI] v4.1.0 will be cloned and build in `rocHPCG/deps`.
 40 | After build and install, the `rochpcg` executable is placed in `build/release/rochpcg-install`.
 41 | 
 42 | #### MPI
 43 | You can build rocHPCG using your own MPI installation by specifying the directory, e.g.
 44 | ```
 45 | ./install.sh -di --with-mpi=/my/mpiroot/
 46 | ```
 47 | Alternatively, when you do not pass a specific directory, OpenMPI v4.1.0 with UCX will be cloned and built within `rocHPCG/deps` directory.
 48 | If you want to disable MPI, you need to run
 49 | ```
 50 | ./install.sh -di --with-mpi=off
 51 | ```
 52 | 
 53 | #### ROCm
 54 | You can build rocHPCG with specific ROCm versions by passing the directory to the install script, e.g.
 55 | ```
 56 | ./install.sh -di --with-rocm=/my/rocm-x.y.z/
 57 | ```
 58 | 
 59 | ## Running rocHPCG benchmark application
 60 | You can run the rocHPCG benchmark application by either using command line parameters or the `hpcg.dat` input file
 61 | ```
 62 | rochpcg <nx> <ny> <nz> <runtime>
 63 | # where
 64 | # nx      - is the global problem size in x dimension
 65 | # ny      - is the global problem size in y dimension
 66 | # nz      - is the global problem size in z dimension
 67 | # runtime - is the desired benchmarking time in seconds (> 1800s for official runs)
 68 | ```
 69 | 
 70 | Similarly, these parameters can be entered into an input file `hpcg.dat` in the working directory, e.g. `nx = ny = nz = 280` and `runtime = 1860`.
 71 | ```
 72 | HPCG benchmark input file
 73 | Sandia National Laboratories; University of Tennessee, Knoxville
 74 | 280 280 280
 75 | 1860
 76 | ```
 77 | 
 78 | ## Performance evaluation
 79 | For performance evaluation purposes, the number of iterations should be as low as possible (e.g. convergence rate as high as possible), since the final HPCG score is scaled to 50 iterations.
 80 | Furthermore, it is observed that high memory occupancy performs better on AMD devices. Problem size suggestion for devices with 16GB is `nx = ny = nz = 280` and `nx = 560, ny = nz = 280` for devices with 32GB or more. Runtime for official runs have to be at least 1800 seconds (use 1860 to be on the safe side), e.g.
 81 | ```
 82 | ./rochpcg 560 280 280 1860
 83 | ```
 84 | Please note that convergence rate behaviour might change in a multi-GPU environment and need to be adjusted accordingly.
 85 | 
 86 | Additionally, you can specify the device to be used for the application (e.g. device #1):
 87 | ```
 88 | ./rochpcg 560 280 280 1860 --dev=1
 89 | ```
 90 | 
 91 | ## Support
 92 | Please use [the issue tracker][] for bugs and feature requests.
 93 | 
 94 | ## License
 95 | The [license file][] can be found in the main repository.
 96 | 
 97 | [HPCG]: https://www.hpcg-benchmark.org/
 98 | [ROCm]: https://github.com/RadeonOpenCompute/ROCm
 99 | [HIP]: https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/
100 | [rocPRIM]: https://github.com/ROCmSoftwarePlatform/rocPRIM
101 | [OpenMPI]: https://github.com/open-mpi/ompi
102 | [UCX]: https://github.com/openucx/ucx
103 | [the issue tracker]: https://github.com/ROCmSoftwarePlatform/rocHPCG/issues
104 | [license file]: https://github.com/ROCmSoftwarePlatform/rocHPCG
105 | 


--------------------------------------------------------------------------------
/cmake/Dependencies.cmake:
--------------------------------------------------------------------------------
  1 | # Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without modification,
  4 | # are permitted provided that the following conditions are met:
  5 | #
  6 | # 1. Redistributions of source code must retain the above copyright notice, this
  7 | #    list of conditions and the following disclaimer.
  8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
  9 | #    this list of conditions and the following disclaimer in the documentation
 10 | #    and/or other materials provided with the distribution.
 11 | # 3. Neither the name of the copyright holder nor the names of its contributors
 12 | #    may be used to endorse or promote products derived from this software without
 13 | #    specific prior written permission.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 | # POSSIBILITY OF SUCH DAMAGE.
 25 | 
 26 | # Dependencies
 27 | 
 28 | # Git
 29 | find_package(Git REQUIRED)
 30 | 
 31 | # Add some paths
 32 | list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
 33 | 
 34 | # Find OpenMP package
 35 | find_package(OpenMP)
 36 | if (NOT OPENMP_FOUND)
 37 |   message("-- OpenMP not found. Compiling WITHOUT OpenMP support.")
 38 | else()
 39 |   option(HPCG_OPENMP "Compile WITH OpenMP support." ON)
 40 | endif()
 41 | 
 42 | # MPI
 43 | set(MPI_HOME ${HPCG_MPI_DIR})
 44 | find_package(MPI)
 45 | if (NOT MPI_FOUND)
 46 |   message("-- MPI not found. Compiling WITHOUT MPI support.")
 47 |   if (HPCG_MPI)
 48 |     message(FATAL_ERROR "Cannot build with MPI support.")
 49 |   endif()
 50 | else()
 51 |   option(HPCG_MPI "Compile WITH MPI support." ON)
 52 | endif()
 53 | 
 54 | # gtest
 55 | if(BUILD_TEST)
 56 |   find_package(GTest REQUIRED)
 57 | endif()
 58 | 
 59 | # libnuma if MPI is enabled
 60 | if(HPCG_MPI)
 61 |   find_package(LIBNUMA REQUIRED)
 62 | endif()
 63 | 
 64 | # rocm-cmake
 65 | find_package(ROCM 0.7.3 QUIET CONFIG PATHS ${CMAKE_PREFIX_PATH} $ENV{ROCM_PATH})
 66 | if(NOT ROCM_FOUND)
 67 |   set(PROJECT_EXTERN_DIR "${CMAKE_CURRENT_BINARY_DIR}/deps")
 68 |   file( TO_NATIVE_PATH "${PROJECT_EXTERN_DIR}" PROJECT_EXTERN_DIR_NATIVE)
 69 |   set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download")
 70 |   file(
 71 |       DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.tar.gz
 72 |       ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.tar.gz
 73 |       STATUS rocm_cmake_download_status LOG rocm_cmake_download_log
 74 |   )
 75 |   list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code)
 76 |   if(rocm_cmake_download_error_code)
 77 |       message(FATAL_ERROR "Error: downloading "
 78 |           "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed "
 79 |           "error_code: ${rocm_cmake_download_error_code} "
 80 |           "log: ${rocm_cmake_download_log} "
 81 |       )
 82 |   endif()
 83 | 
 84 |   execute_process(
 85 |       COMMAND ${CMAKE_COMMAND} -E tar xzvf ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}.tar.gz
 86 |       WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}
 87 |   )
 88 |   execute_process(
 89 |       COMMAND ${CMAKE_COMMAND} -S ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag} -B ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/build
 90 |       WORKING_DIRECTORY ${PROJECT_EXTERN_DIR}
 91 |   )
 92 |   execute_process(
 93 |       COMMAND ${CMAKE_COMMAND} --install ${PROJECT_EXTERN_DIR}/rocm-cmake-${rocm_cmake_tag}/build --prefix ${PROJECT_EXTERN_DIR}/rocm
 94 |       WORKING_DIRECTORY ${PROJECT_EXTERN_DIR} )
 95 |   if(rocm_cmake_unpack_error_code)
 96 |       message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed")
 97 |   endif()
 98 |   find_package(ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR})
 99 | endif()
100 | 
101 | include(ROCMSetupVersion)
102 | include(ROCMCreatePackage)
103 | include(ROCMInstallTargets)
104 | include(ROCMPackageConfigHelpers)
105 | include(ROCMInstallSymlinks)
106 | include(ROCMCheckTargetIds)
107 | include(ROCMClients)
108 | 


--------------------------------------------------------------------------------
/cmake/FindLIBNUMA.cmake:
--------------------------------------------------------------------------------
 1 | # Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without modification,
 4 | # are permitted provided that the following conditions are met:
 5 | #
 6 | # 1. Redistributions of source code must retain the above copyright notice, this
 7 | #    list of conditions and the following disclaimer.
 8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 9 | #    this list of conditions and the following disclaimer in the documentation
10 | #    and/or other materials provided with the distribution.
11 | # 3. Neither the name of the copyright holder nor the names of its contributors
12 | #    may be used to endorse or promote products derived from this software without
13 | #    specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | find_path(LIBNUMA_INCLUDE_DIR NAMES numa.h
27 |           PATHS
28 |           ENV
29 |           INCLUDE
30 |           CPATH
31 |           /usr/include)
32 | 
33 | find_library(LIBNUMA_LIBRARY NAMES numa
34 |              PATHS
35 |              ENV
36 |              LD_LIBRARY_PATH
37 |              /usr/lib/x86_64-linux-gnu)
38 | 
39 | if(LIBNUMA_INCLUDE_DIR AND LIBNUMA_LIBRARY)
40 |   set(LIBNUMA_FOUND TRUE)
41 | else()
42 |   set(LIBNUMA_FOUND FALSE)
43 | endif()
44 | 
45 | if(NOT TARGET libnuma::libnuma)
46 |   add_library(libnuma::libnuma INTERFACE IMPORTED)
47 | endif()
48 | 
49 | set_property(TARGET libnuma::libnuma PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${LIBNUMA_INCLUDE_DIR}")
50 | set_property(TARGET libnuma::libnuma PROPERTY INTERFACE_LINK_LIBRARIES "${LIBNUMA_LIBRARY}")
51 | 
52 | include(FindPackageHandleStandardArgs)
53 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBNUMA DEFAULT_MSG
54 |                                   LIBNUMA_LIBRARY
55 |                                   LIBNUMA_INCLUDE_DIR)
56 | 
57 | mark_as_advanced(LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARY)
58 | 


--------------------------------------------------------------------------------
/cmake/version.cmake:
--------------------------------------------------------------------------------
 1 | # Modifications (c) 2019 Advanced Micro Devices, Inc.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without modification,
 4 | # are permitted provided that the following conditions are met:
 5 | #
 6 | # 1. Redistributions of source code must retain the above copyright notice, this
 7 | #    list of conditions and the following disclaimer.
 8 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 9 | #    this list of conditions and the following disclaimer in the documentation
10 | #    and/or other materials provided with the distribution.
11 | # 3. Neither the name of the copyright holder nor the names of its contributors
12 | #    may be used to endorse or promote products derived from this software without
13 | #    specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 | # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | # POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | # TODO: move this function to https://github.com/RadeonOpenCompute/rocm-cmake/blob/master/share/rocm/cmake/ROCMSetupVersion.cmake
27 | 
28 | macro(rocm_set_parent VAR)
29 |   set(${VAR} ${ARGN} PARENT_SCOPE)
30 |   set(${VAR} ${ARGN})
31 | endmacro()
32 | 
33 | function(rocm_get_git_commit_id OUTPUT_VERSION)
34 |   set(options)
35 |   set(oneValueArgs VERSION DIRECTORY)
36 |   set(multiValueArgs)
37 | 
38 |   cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
39 | 
40 |   set(_version ${PARSE_VERSION})
41 | 
42 |   set(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
43 |   if(PARSE_DIRECTORY)
44 |     set(DIRECTORY ${PARSE_DIRECTORY})
45 |   endif()
46 | 
47 |   find_program(GIT NAMES git)
48 | 
49 |   if(GIT)
50 |     set(GIT_COMMAND ${GIT} describe --dirty --long --match [0-9]*)
51 |     execute_process(COMMAND ${GIT_COMMAND}
52 |       WORKING_DIRECTORY ${DIRECTORY}
53 |       OUTPUT_VARIABLE GIT_TAG_VERSION
54 |       OUTPUT_STRIP_TRAILING_WHITESPACE
55 |       RESULT_VARIABLE RESULT
56 |       ERROR_QUIET)
57 |     if(${RESULT} EQUAL 0)
58 |       set(_version ${GIT_TAG_VERSION})
59 |     else()
60 |       execute_process(COMMAND ${GIT_COMMAND} --always
61 | 	WORKING_DIRECTORY ${DIRECTORY}
62 | 	OUTPUT_VARIABLE GIT_TAG_VERSION
63 | 	OUTPUT_STRIP_TRAILING_WHITESPACE
64 | 	RESULT_VARIABLE RESULT
65 | 	ERROR_QUIET)
66 |       if(${RESULT} EQUAL 0)
67 | 	set(_version ${GIT_TAG_VERSION})
68 |       endif()
69 |     endif()
70 |   endif()
71 |   rocm_set_parent(${OUTPUT_VERSION} ${_version})
72 | endfunction()
73 | 


--------------------------------------------------------------------------------
/docker/dockerfile-build-centos:
--------------------------------------------------------------------------------
 1 | # Parameters related to building rocHPCG
 2 | ARG base_image
 3 | 
 4 | FROM ${base_image}
 5 | LABEL maintainer="rochpcg-maintainer@amd.com"
 6 | 
 7 | ARG user_uid
 8 | 
 9 | ARG library_dependencies="rocprim"
10 | 
11 | # Install dependent packages
12 | RUN yum install -y \
13 |     sudo \
14 |     centos-release-scl \
15 |     devtoolset-7 \
16 |     ca-certificates \
17 |     git \
18 |     cmake3 \
19 |     make \
20 |     clang \
21 |     clang-devel \
22 |     gcc-c++ \
23 |     pkgconfig \
24 |     libcxx-devel \
25 |     numactl-libs \
26 |     rpm-build \
27 |     deltarpm \
28 |     ${library_dependencies}
29 | 
30 | RUN echo '#!/bin/bash' | tee /etc/profile.d/devtoolset7.sh && echo \
31 |     'source scl_source enable devtoolset-7' >>/etc/profile.d/devtoolset7.sh
32 | 
33 | # docker pipeline runs containers with particular uid
34 | # create a jenkins user with this specific uid so it can use sudo priviledges
35 | # Grant any member of sudo group password-less sudo privileges
36 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \
37 |     echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \
38 |     chmod 400 /etc/sudoers.d/sudo-nopasswd
39 | 
40 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest
41 | 
42 | # Clone gtest repo
43 | # Build gtest and install into /usr/local
44 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \
45 |     git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \
46 |     mkdir -p build && cd build && \
47 |     cmake .. && \
48 |     make -j $(nproc) install && \
49 |     rm -rf ${GTEST_SRC_ROOT}


--------------------------------------------------------------------------------
/docker/dockerfile-build-sles:
--------------------------------------------------------------------------------
 1 | # Parameters related to building rocHPCG
 2 | ARG base_image
 3 | 
 4 | FROM ${base_image}
 5 | LABEL maintainer="rochpcg-maintainer@amd.com"
 6 | 
 7 | ARG user_uid
 8 | 
 9 | ARG library_dependencies="rocprim"
10 | 
11 | # Install dependent packages
12 | RUN zypper -n update && zypper -n install\
13 |     sudo \
14 |     ca-certificates \
15 |     git \
16 |     gcc-c++ \
17 |     gcc-fortran \
18 |     make \
19 |     cmake \
20 |     rpm-build \
21 |     dpkg \
22 |     libcxxtools9 \
23 |     ${library_dependencies}
24 | 
25 | # docker pipeline runs containers with particular uid
26 | # create a jenkins user with this specific uid so it can use sudo priviledges
27 | # Grant any member of sudo group password-less sudo privileges
28 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \
29 |     echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \
30 |     chmod 400 /etc/sudoers.d/sudo-nopasswd
31 | 
32 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest
33 | 
34 | # Clone gtest repo
35 | # Build gtest and install into /usr/local
36 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \
37 |     git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \
38 |     mkdir -p build && cd build && \
39 |     cmake .. && \
40 |     make -j $(nproc) install && \
41 |     rm -rf ${GTEST_SRC_ROOT}


--------------------------------------------------------------------------------
/docker/dockerfile-build-ubuntu-rock:
--------------------------------------------------------------------------------
 1 | # Parameters related to building rocHPCG
 2 | ARG base_image
 3 | 
 4 | FROM ${base_image}
 5 | LABEL maintainer="rochpcg-maintainer@amd.com"
 6 | 
 7 | ARG user_uid
 8 | 
 9 | ARG library_dependencies="rocblas rocsolver"
10 | 
11 | # Install dependent packages
12 | # Dependencies:
13 | # * hcc-config.cmake: pkg-config
14 | # * rochpcg-test: googletest rocprim
15 | # * libhsakmt.so: libnuma1 libnuma-dev
16 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
17 |     sudo \
18 |     ca-certificates \
19 |     git \
20 |     make \
21 |     cmake \
22 |     pkg-config \
23 |     libnuma1 \
24 |     libnuma-dev \
25 |     mpi-default-bin \
26 |     mpi-default-dev \
27 |     libomp-dev \
28 |     ${library_dependencies} \
29 |     && \
30 |     apt-get clean && \
31 |     rm -rf /var/lib/apt/lists/*
32 | 
33 | # docker pipeline runs containers with particular uid
34 | # create a jenkins user with this specific uid so it can use sudo priviledges
35 | # Grant any member of video group password-less sudo privileges
36 | RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \
37 |     mkdir -p /etc/sudoers.d/ && \
38 |     echo '%video   ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd
39 | 
40 | ARG GTEST_SRC_ROOT=/usr/local/src/gtest
41 | 
42 | # Clone gtest repo
43 | # Build gtest and install into /usr/local
44 | RUN mkdir -p ${GTEST_SRC_ROOT} && cd ${GTEST_SRC_ROOT} && \
45 |     git clone -b release-1.8.1 --depth=1 https://github.com/google/googletest . && \
46 |     mkdir -p build && cd build && \
47 |     cmake .. && \
48 |     make -j $(nproc) install && \
49 |     rm -rf ${GTEST_SRC_ROOT}
50 | 


--------------------------------------------------------------------------------
/rtest.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE testset SYSTEM "/usr/local/share/rtest.dtd">
 3 | <testset>
 4 |     <fileversion>0.1</fileversion>
 5 |     <var name="COMMAND">rochpcg-test --gtest_color=yes --gtest_output=xml</var>
 6 |     <var name="GTEST_FILTER">*</var>
 7 |     <test sets="psdb">
 8 |         <run name="all-psdb">{COMMAND}:output_psdb.xml</run>
 9 |     </test>
10 |     <test sets="osdb">
11 |         <run name="all-osdb">{COMMAND}:output_osdb.xml</run>
12 |     </test>
13 |     <test sets="custom">
14 |         <run name="custom_filter">{COMMAND}:output_custom.xml --gtest_filter={GTEST_FILTER}</run>
15 |     </test>
16 | </testset>
17 | 


--------------------------------------------------------------------------------
/src/CG.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef CG_HPP
44 | #define CG_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | #include "CGData.hpp"
49 | 
50 | int CG(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x,
51 |     const int max_iter, const double tolerance, int & niters, double & normr,  double & normr0,
52 |     double * times, bool doPreconditioning, bool verbose);
53 | 
54 | // this function will compute the Conjugate Gradient iterations.
55 | // geom - Domain and processor topology information
56 | // A - Matrix
57 | // b - constant
58 | // x - used for return value
59 | // max_iter - how many times we iterate
60 | // tolerance - Stopping tolerance for preconditioned iterations.
61 | // niters - number of iterations performed
62 | // normr - computed residual norm
63 | // normr0 - Original residual
64 | // times - array of timing information
65 | // doPreconditioning - bool to specify whether or not symmetric GS will be applied.
66 | 
67 | #endif  // CG_HPP
68 | 


--------------------------------------------------------------------------------
/src/CGData.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /* ************************************************************************
 16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
 17 |  *
 18 |  * Redistribution and use in source and binary forms, with or without modification,
 19 |  * are permitted provided that the following conditions are met:
 20 |  *
 21 |  * 1. Redistributions of source code must retain the above copyright notice, this
 22 |  *    list of conditions and the following disclaimer.
 23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 24 |  *    this list of conditions and the following disclaimer in the documentation
 25 |  *    and/or other materials provided with the distribution.
 26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 27 |  *    may be used to endorse or promote products derived from this software without
 28 |  *    specific prior written permission.
 29 |  *
 30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 39 |  * POSSIBILITY OF SUCH DAMAGE.
 40 |  *
 41 |  * ************************************************************************ */
 42 | 
 43 | /*!
 44 |  @file CGData.hpp
 45 | 
 46 |  HPCG data structure
 47 |  */
 48 | 
 49 | #ifndef CGDATA_HPP
 50 | #define CGDATA_HPP
 51 | 
 52 | #include "SparseMatrix.hpp"
 53 | #include "Vector.hpp"
 54 | 
 55 | struct CGData_STRUCT {
 56 |   Vector r; //!< pointer to residual vector
 57 |   Vector z; //!< pointer to preconditioned residual vector
 58 |   Vector p; //!< pointer to direction vector
 59 |   Vector Ap; //!< pointer to Krylov vector
 60 | };
 61 | typedef struct CGData_STRUCT CGData;
 62 | 
 63 | /*!
 64 |  Constructor for the data structure of CG vectors.
 65 | 
 66 |  @param[in]  A    the data structure that describes the problem matrix and its structure
 67 |  @param[out] data the data structure for CG vectors that will be allocated to get it ready for use in CG iterations
 68 |  */
 69 | inline void InitializeSparseCGData(SparseMatrix & A, CGData & data) {
 70 |   local_int_t nrow = A.localNumberOfRows;
 71 |   local_int_t ncol = A.localNumberOfColumns;
 72 |   InitializeVector(data.r, nrow);
 73 |   InitializeVector(data.z, ncol);
 74 |   InitializeVector(data.p, ncol);
 75 |   InitializeVector(data.Ap, nrow);
 76 |   return;
 77 | }
 78 | 
 79 | inline void HIPInitializeSparseCGData(SparseMatrix& A, CGData& data)
 80 | {
 81 |     HIPInitializeVector(data.r, A.localNumberOfRows);
 82 |     HIPInitializeVector(data.z, A.localNumberOfColumns);
 83 |     HIPInitializeVector(data.p, A.localNumberOfColumns);
 84 |     HIPInitializeVector(data.Ap, A.localNumberOfRows);
 85 | }
 86 | 
 87 | /*!
 88 |  Destructor for the CG vectors data.
 89 | 
 90 |  @param[inout] data the CG vectors data structure whose storage is deallocated
 91 |  */
 92 | inline void DeleteCGData(CGData & data) {
 93 | 
 94 |   DeleteVector (data.r);
 95 |   DeleteVector (data.z);
 96 |   DeleteVector (data.p);
 97 |   DeleteVector (data.Ap);
 98 |   return;
 99 | }
100 | 
101 | inline void HIPDeleteCGData(CGData& data)
102 | {
103 |     HIPDeleteVector (data.r);
104 |     HIPDeleteVector (data.z);
105 |     HIPDeleteVector (data.p);
106 |     HIPDeleteVector (data.Ap);
107 | }
108 | 
109 | #endif // CGDATA_HPP
110 | 
111 | 


--------------------------------------------------------------------------------
/src/CG_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef CG_REF_HPP
44 | #define CG_REF_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | #include "CGData.hpp"
49 | 
50 | int CG_ref(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x,
51 |     const int max_iter, const double tolerance, int & niters, double & normr,  double & normr0,
52 |     double * times, bool doPreconditioning, bool verbose);
53 | 
54 | // this function will compute the Conjugate Gradient iterations.
55 | // geom - Domain and processor topology information
56 | // A - Matrix
57 | // b - constant
58 | // x - used for return value
59 | // max_iter - how many times we iterate
60 | // tolerance - Stopping tolerance for preconditioned iterations.
61 | // niters - number of iterations performed
62 | // normr - computed residual norm
63 | // normr0 - Original residual
64 | // times - array of timing information
65 | // doPreconditioning - bool to specify whether or not symmetric GS will be applied.
66 | 
67 | #endif  // CG_REF_HPP
68 | 


--------------------------------------------------------------------------------
/src/CheckAspectRatio.cpp:
--------------------------------------------------------------------------------
 1 | //@HEADER
 2 | // ***************************************************
 3 | //
 4 | // HPCG: High Performance Conjugate Gradient Benchmark
 5 | //
 6 | // Contact:
 7 | // Michael A. Heroux ( maherou@sandia.gov)
 8 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 9 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
10 | //
11 | // ***************************************************
12 | //@HEADER
13 | 
14 | /*!
15 |  @file CheckAspectRatio.cpp
16 | 
17 |  HPCG routine
18 |  */
19 | 
20 | #include <algorithm>
21 | 
22 | #ifndef HPCG_NO_MPI
23 | #include <mpi.h>
24 | #endif
25 | 
26 | #include "hpcg.hpp"
27 | 
28 | #include "CheckAspectRatio.hpp"
29 | 
30 | int
31 | CheckAspectRatio(double smallest_ratio, int x, int y, int z, const char *what, bool DoIo) {
32 |   double current_ratio = std::min(std::min(x, y), z) / double(std::max(std::max(x, y), z));
33 | 
34 |   if (current_ratio < smallest_ratio) { // ratio of the smallest to the largest
35 |     if (DoIo) {
36 |       HPCG_fout << "The " << what << " sizes (" << x << "," << y << "," << z <<
37 |         ") are invalid because the ratio min(x,y,z)/max(x,y,z)=" << current_ratio <<
38 |         " is too small (at least " << smallest_ratio << " is required)." << std::endl;
39 |       HPCG_fout << "The shape should resemble a 3D cube. Please adjust and try again." << std::endl;
40 |       HPCG_fout.flush();
41 |     }
42 | 
43 | #ifndef HPCG_NO_MPI
44 |     MPI_Abort(MPI_COMM_WORLD, 127);
45 | #endif
46 | 
47 |     return 127;
48 |   }
49 | 
50 |   return 0;
51 | }
52 | 


--------------------------------------------------------------------------------
/src/CheckAspectRatio.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef CHECKASPECTRATIO_HPP
16 | #define CHECKASPECTRATIO_HPP
17 | extern int CheckAspectRatio(double smallest_ratio, int x, int y, int z, const char *what, bool DoIo);
18 | #endif // CHECKASPECTRATIO_HPP
19 | 
20 | 


--------------------------------------------------------------------------------
/src/CheckProblem.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef CHECKPROBLEM_HPP
16 | #define CHECKPROBLEM_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 | 
20 | void CheckProblem(SparseMatrix & A, Vector * b, Vector * x, Vector * xexact);
21 | #endif // CHECKPROBLEM_HPP
22 | 


--------------------------------------------------------------------------------
/src/ComputeDotProduct.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTEDOTPRODUCT_HPP
16 | #define COMPUTEDOTPRODUCT_HPP
17 | #include "Vector.hpp"
18 | int ComputeDotProduct(const local_int_t n, const Vector & x, const Vector & y,
19 |     double & result, double & time_allreduce, bool & isOptimized);
20 | 
21 | #endif // COMPUTEDOTPRODUCT_HPP
22 | 


--------------------------------------------------------------------------------
/src/ComputeDotProduct_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeDotProduct_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #ifndef HPCG_NO_MPI
22 | #include <mpi.h>
23 | #include "mytimer.hpp"
24 | #endif
25 | #ifndef HPCG_NO_OPENMP
26 | #include <omp.h>
27 | #endif
28 | #include <cassert>
29 | #include "ComputeDotProduct_ref.hpp"
30 | 
31 | /*!
32 |   Routine to compute the dot product of two vectors where:
33 | 
34 |   This is the reference dot-product implementation.  It _CANNOT_ be modified for the
35 |   purposes of this benchmark.
36 | 
37 |   @param[in] n the number of vector elements (on this processor)
38 |   @param[in] x, y the input vectors
39 |   @param[in] result a pointer to scalar value, on exit will contain result.
40 |   @param[out] time_allreduce the time it took to perform the communication between processes
41 | 
42 |   @return returns 0 upon success and non-zero otherwise
43 | 
44 |   @see ComputeDotProduct
45 | */
46 | int ComputeDotProduct_ref(const local_int_t n, const Vector & x, const Vector & y,
47 |     double & result, double & time_allreduce) {
48 |   assert(x.localLength>=n); // Test vector lengths
49 |   assert(y.localLength>=n);
50 | 
51 |   double local_result = 0.0;
52 |   double * xv = x.values;
53 |   double * yv = y.values;
54 |   if (yv==xv) {
55 | #ifndef HPCG_NO_OPENMP
56 |     #pragma omp parallel for reduction (+:local_result)
57 | #endif
58 |     for (local_int_t i=0; i<n; i++) local_result += xv[i]*xv[i];
59 |   } else {
60 | #ifndef HPCG_NO_OPENMP
61 |     #pragma omp parallel for reduction (+:local_result)
62 | #endif
63 |     for (local_int_t i=0; i<n; i++) local_result += xv[i]*yv[i];
64 |   }
65 | 
66 | #ifndef HPCG_NO_MPI
67 |   // Use MPI's reduce function to collect all partial sums
68 |   double t0 = mytimer();
69 |   double global_result = 0.0;
70 |   MPI_Allreduce(&local_result, &global_result, 1, MPI_DOUBLE, MPI_SUM,
71 |       MPI_COMM_WORLD);
72 |   result = global_result;
73 |   time_allreduce += mytimer() - t0;
74 | #else
75 |   time_allreduce += 0.0;
76 |   result = local_result;
77 | #endif
78 | 
79 |   return 0;
80 | }
81 | 


--------------------------------------------------------------------------------
/src/ComputeDotProduct_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTEDOTPRODUCT_REF_HPP
16 | #define COMPUTEDOTPRODUCT_REF_HPP
17 | #include "Vector.hpp"
18 | int ComputeDotProduct_ref(const local_int_t n, const Vector & x, const Vector & y,
19 |     double & result, double & time_allreduce);
20 | 
21 | #endif // COMPUTEDOTPRODUCT_REF_HPP
22 | 


--------------------------------------------------------------------------------
/src/ComputeMG.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /* ************************************************************************
 16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
 17 |  *
 18 |  * Redistribution and use in source and binary forms, with or without modification,
 19 |  * are permitted provided that the following conditions are met:
 20 |  *
 21 |  * 1. Redistributions of source code must retain the above copyright notice, this
 22 |  *    list of conditions and the following disclaimer.
 23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 24 |  *    this list of conditions and the following disclaimer in the documentation
 25 |  *    and/or other materials provided with the distribution.
 26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 27 |  *    may be used to endorse or promote products derived from this software without
 28 |  *    specific prior written permission.
 29 |  *
 30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 39 |  * POSSIBILITY OF SUCH DAMAGE.
 40 |  *
 41 |  * ************************************************************************ */
 42 | 
 43 | /*!
 44 |  @file ComputeMG.cpp
 45 | 
 46 |  HPCG routine
 47 |  */
 48 | 
 49 | #include "ComputeMG.hpp"
 50 | #include "ComputeSYMGS.hpp"
 51 | #include "ComputeSPMV.hpp"
 52 | #include "ComputeRestriction.hpp"
 53 | #include "ComputeProlongation.hpp"
 54 | 
 55 | /*!
 56 |   @param[in] A the known system matrix
 57 |   @param[in] r the input vector
 58 |   @param[inout] x On exit contains the result of the multigrid V-cycle with r as the RHS, x is the approximation to Ax = r.
 59 | 
 60 |   @return returns 0 upon success and non-zero otherwise
 61 | 
 62 |   @see ComputeMG_ref
 63 | */
 64 | int ComputeMG(const SparseMatrix& A, const Vector& r, Vector& x)
 65 | {
 66 |     assert(x.localLength == A.localNumberOfColumns);
 67 | 
 68 |     if(A.mgData != 0)
 69 |     {
 70 |         RETURN_IF_HPCG_ERROR(ComputeSYMGSZeroGuess(A, r, x));
 71 | 
 72 |         int numberOfPresmootherSteps = A.mgData->numberOfPresmootherSteps;
 73 | 
 74 |         for(int i = 1; i < numberOfPresmootherSteps; ++i)
 75 |         {
 76 |             RETURN_IF_HPCG_ERROR(ComputeSYMGS(A, r, x));
 77 |         }
 78 | 
 79 | #ifndef HPCG_REFERENCE
 80 |         RETURN_IF_HPCG_ERROR(ComputeFusedSpMVRestriction(A, r, x));
 81 | #else
 82 |         RETURN_IF_HPCG_ERROR(ComputeSPMV(A, x, *A.mgData->Axf));
 83 |         RETURN_IF_HPCG_ERROR(ComputeRestriction(A, r));
 84 | #endif
 85 | 
 86 |         RETURN_IF_HPCG_ERROR(ComputeMG(*A.Ac, *A.mgData->rc, *A.mgData->xc));
 87 |         RETURN_IF_HPCG_ERROR(ComputeProlongation(A, x));
 88 | 
 89 |         int numberOfPostsmootherSteps = A.mgData->numberOfPostsmootherSteps;
 90 | 
 91 |         for(int i = 0; i < numberOfPostsmootherSteps; ++i)
 92 |         {
 93 |             RETURN_IF_HPCG_ERROR(ComputeSYMGS(A, r, x));
 94 |         }
 95 |     }
 96 |     else
 97 |     {
 98 |         RETURN_IF_HPCG_ERROR(ComputeSYMGSZeroGuess(A, r, x));
 99 |     }
100 | 
101 |     return 0;
102 | }
103 | 


--------------------------------------------------------------------------------
/src/ComputeMG.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTEMG_HPP
16 | #define COMPUTEMG_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 | 
20 | int ComputeMG(const SparseMatrix  & A, const Vector & r, Vector & x);
21 | 
22 | #endif // COMPUTEMG_HPP
23 | 


--------------------------------------------------------------------------------
/src/ComputeMG_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeSYMGS_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #include "ComputeMG_ref.hpp"
22 | #include "ComputeSYMGS_ref.hpp"
23 | #include "ComputeSPMV_ref.hpp"
24 | #include "ComputeRestriction_ref.hpp"
25 | #include "ComputeProlongation_ref.hpp"
26 | #include <cassert>
27 | #include <iostream>
28 | 
29 | /*!
30 | 
31 |   @param[in] A the known system matrix
32 |   @param[in] r the input vector
33 |   @param[inout] x On exit contains the result of the multigrid V-cycle with r as the RHS, x is the approximation to Ax = r.
34 | 
35 |   @return returns 0 upon success and non-zero otherwise
36 | 
37 |   @see ComputeMG
38 | */
39 | int ComputeMG_ref(const SparseMatrix & A, const Vector & r, Vector & x) {
40 |   assert(x.localLength==A.localNumberOfColumns); // Make sure x contain space for halo values
41 | 
42 |   ZeroVector(x); // initialize x to zero
43 | 
44 |   int ierr = 0;
45 |   if (A.mgData!=0) { // Go to next coarse level if defined
46 |     int numberOfPresmootherSteps = A.mgData->numberOfPresmootherSteps;
47 |     for (int i=0; i< numberOfPresmootherSteps; ++i) ierr += ComputeSYMGS_ref(A, r, x);
48 |     if (ierr!=0) return ierr;
49 |     ierr = ComputeSPMV_ref(A, x, *A.mgData->Axf); if (ierr!=0) return ierr;
50 |     // Perform restriction operation using simple injection
51 |     ierr = ComputeRestriction_ref(A, r);  if (ierr!=0) return ierr;
52 |     ierr = ComputeMG_ref(*A.Ac,*A.mgData->rc, *A.mgData->xc);  if (ierr!=0) return ierr;
53 |     ierr = ComputeProlongation_ref(A, x);  if (ierr!=0) return ierr;
54 |     int numberOfPostsmootherSteps = A.mgData->numberOfPostsmootherSteps;
55 |     for (int i=0; i< numberOfPostsmootherSteps; ++i) ierr += ComputeSYMGS_ref(A, r, x);
56 |     if (ierr!=0) return ierr;
57 |   }
58 |   else {
59 |     ierr = ComputeSYMGS_ref(A, r, x);
60 |     if (ierr!=0) return ierr;
61 |   }
62 |   return 0;
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/src/ComputeMG_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTEMG_REF_HPP
16 | #define COMPUTEMG_REF_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 | 
20 | int ComputeMG_ref(const SparseMatrix  & A, const Vector & r, Vector & x);
21 | 
22 | #endif // COMPUTEMG_REF_HPP
23 | 


--------------------------------------------------------------------------------
/src/ComputeOptimalShapeXYZ.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <cmath>
  3 | #include <cstdlib>
  4 | 
  5 | #ifdef HPCG_CUBIC_RADICAL_SEARCH
  6 | #include <algorithm>
  7 | #endif
  8 | #include <map>
  9 | 
 10 | #include "ComputeOptimalShapeXYZ.hpp"
 11 | #include "MixedBaseCounter.hpp"
 12 | 
 13 | #ifdef HPCG_CUBIC_RADICAL_SEARCH
 14 | static int
 15 | min3(int a, int b, int c) {
 16 |   return std::min(a, std::min(b, c));
 17 | }
 18 | 
 19 | static int
 20 | max3(int a, int b, int c) {
 21 |   return std::max(a, std::max(b, c));
 22 | }
 23 | 
 24 | static void
 25 | cubic_radical_search(int n, int & x, int & y, int & z) {
 26 |   double best = 0.0;
 27 | 
 28 |   for (int f1 = (int)(pow(n,1.0/3.0)+0.5); f1 > 0; --f1)
 29 |     if (n % f1 == 0) {
 30 |       int n1 = n/f1;
 31 |       for (int f2 = (int)(pow(n1,0.5)+0.5); f2 > 0; --f2)
 32 |         if (n1 % f2 == 0) {
 33 |           int f3 = n1 / f2;
 34 |           double current = (double)min3(f1, f2, f3)/max3(f1, f2, f3);
 35 |           if (current > best) {
 36 |             best = current;
 37 |             x = f1;
 38 |             y = f2;
 39 |             z = f3;
 40 |           }
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | #else
 46 | 
 47 | static void
 48 | ComputePrimeFactors(int n, std::map<int, int> & factors) {
 49 |   int d, sq = int((sqrt(double(n)))+1L);
 50 |   div_t r;
 51 | 
 52 |   // remove 2 as a factor with shifts instead "/" and "%"
 53 |   for (; n > 1 && (n & 1) == 0; n >>= 1) {
 54 |     factors[2]++;
 55 |   }
 56 | 
 57 |   // keep removing subsequent odd numbers
 58 |   for (d = 3; d <= sq; d += 2) {
 59 |     while (1) {
 60 |       r = div(n, d);
 61 |       if (r.rem == 0) {
 62 |         factors[d]++;
 63 |         n = r.quot;
 64 |         continue;
 65 |       }
 66 |       break;
 67 |     }
 68 |   }
 69 |   if (n > 1 || factors.size() == 0)  // left with a prime or x==1
 70 |     factors[n]++;
 71 | }
 72 | 
 73 | static int
 74 | pow_i(int x, int p) {
 75 |   int v;
 76 | 
 77 |   if (0 == x || 1 == x) return x;
 78 | 
 79 |   if (p < 0)
 80 |     return 0;
 81 | 
 82 |   for (v = 1; p; p >>= 1) {
 83 |     if (1 & p)
 84 |       v *= x;
 85 |     x *= x;
 86 |   }
 87 | 
 88 |   return v;
 89 | }
 90 | 
 91 | #endif
 92 | 
 93 | void
 94 | ComputeOptimalShapeXYZ(int xyz, int & x, int & y, int & z) {
 95 | #ifdef HPCG_CUBIC_RADICAL_SEARCH
 96 |   cubic_radical_search( xyz, x, y, z);
 97 | #else
 98 |   std::map<int, int> factors;
 99 | 
100 |   ComputePrimeFactors( xyz, factors ); // factors are sorted: ascending order
101 | 
102 |   std::map<int, int>::iterator iter = factors.begin();
103 | 
104 |   // there is at least one prime factor
105 |   x = (iter++)->first; // cache the first factor, move to the next one
106 | 
107 |   y = iter != factors.end() ? (iter++)->first : y; // try to cache the second factor in "y"
108 | 
109 |   if (factors.size() == 1) { // only a single factor
110 |     z = pow_i(x, factors[x] / 3);
111 |     y = pow_i(x, factors[x] / 3 + ((factors[x] % 3) >= 2 ? 1 : 0));
112 |     x = pow_i(x, factors[x] / 3 + ((factors[x] % 3) >= 1 ? 1 : 0));
113 | 
114 |   } else if (factors.size() == 2 && factors[x] == 1 && factors[y] == 1) { // two distinct prime factors
115 |     z = 1;
116 | 
117 |   } else if (factors.size() == 2 && factors[x] + factors[y] == 3) { // three prime factors, one repeated
118 |     z = factors[x] == 2 ? x : y; // test which factor is repeated
119 | 
120 |   } else if (factors.size() == 3 && factors[x] == 1 && factors[y] == 1 && iter->second == 1) { // three distinct and single prime factors
121 |     z = iter->first;
122 | 
123 |   } else { // 3 or more prime factors so try all possible 3-subsets
124 | 
125 |     int i, distinct_factors[32+1], count_factors[32+1];
126 | 
127 |     i = 0;
128 |     for (std::map<int, int>::iterator iter = factors.begin(); iter != factors.end(); ++iter, ++i) {
129 |       distinct_factors[i] = iter->first;
130 |       count_factors[i]    = iter->second;
131 |     }
132 | 
133 |     // count total number of prime factors in "c_main" and distribute some factors into "c1"
134 |     MixedBaseCounter c_main(count_factors, factors.size()), c1(count_factors, factors.size());
135 | 
136 |     // at the beginning, minimum area is the maximum area
137 |     double area, min_area = 2.0 * xyz + 1.0;
138 | 
139 |     for (c1.next(); ! c1.is_zero(); c1.next()) {
140 |       MixedBaseCounter c2(c_main, c1); // "c2" gets the factors remaining in "c_main" that "c1" doesn't have
141 |       for (c2.next(); ! c2.is_zero(); c2.next()) {
142 |         int tf1 = c1.product(distinct_factors);
143 |         int tf2 = c2.product(distinct_factors);
144 |         int tf3 = xyz / tf1/ tf2; // we derive the third dimension, we don't keep track of the factors it has
145 | 
146 |         area = tf1 * double(tf2) + tf2 * double(tf3) + tf1 * double(tf3);
147 |         if (area < min_area) {
148 |           min_area = area;
149 |           x = tf1;
150 |           y = tf2;
151 |           z = tf3;
152 |         }
153 |       }
154 |     }
155 |   }
156 | #endif
157 | }
158 | 


--------------------------------------------------------------------------------
/src/ComputeOptimalShapeXYZ.hpp:
--------------------------------------------------------------------------------
1 | 
2 | void ComputeOptimalShapeXYZ(int xyz, int & x, int & y, int & z);
3 | 


--------------------------------------------------------------------------------
/src/ComputeProlongation.cpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright (c) 2019-2021 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | /*!
30 |  @file ComputeProlongation.cpp
31 | 
32 |  HPCG routine
33 |  */
34 | 
35 | #include "ComputeProlongation.hpp"
36 | 
37 | #include <hip/hip_runtime.h>
38 | 
39 | template <unsigned int BLOCKSIZE>
40 | __launch_bounds__(BLOCKSIZE)
41 | __global__ void kernel_prolongation(local_int_t size,
42 |                                     const local_int_t* __restrict__ f2cOperator,
43 |                                     const double* __restrict__ coarse,
44 |                                     double* __restrict__ fine,
45 |                                     const local_int_t* __restrict__ perm_fine,
46 |                                     const local_int_t* __restrict__ perm_coarse)
47 | {
48 |     local_int_t idx_coarse = blockIdx.x * BLOCKSIZE + threadIdx.x;
49 | 
50 |     if(idx_coarse >= size)
51 |     {
52 |         return;
53 |     }
54 | 
55 |     local_int_t idx_fine = __builtin_nontemporal_load(f2cOperator + idx_coarse);
56 |     local_int_t idx_perm = __builtin_nontemporal_load(perm_coarse + idx_coarse);
57 | 
58 |     fine[perm_fine[idx_fine]] += coarse[idx_perm];
59 | }
60 | 
61 | /*!
62 |   Routine to compute the coarse residual vector.
63 | 
64 |   @param[in]  Af - Fine grid sparse matrix object containing pointers to current coarse grid correction and the f2c operator.
65 |   @param[inout] xf - Fine grid solution vector, update with coarse grid correction.
66 | 
67 |   Note that the fine grid residual is never explicitly constructed.
68 |   We only compute it for the fine grid points that will be injected into corresponding coarse grid points.
69 | 
70 |   @return Returns zero on success and a non-zero value otherwise.
71 | */
72 | int ComputeProlongation(const SparseMatrix& Af, Vector& xf)
73 | {
74 |     dim3 blocks((Af.mgData->rc->localLength - 1) / 128 + 1);
75 |     dim3 threads(128);
76 | 
77 |     kernel_prolongation<128><<<blocks, threads, 0, stream_interior>>>(
78 |         Af.mgData->rc->localLength,
79 |         Af.mgData->d_f2cOperator,
80 |         Af.mgData->xc->d_values,
81 |         xf.d_values,
82 |         Af.perm,
83 |         Af.Ac->perm);
84 | 
85 |     return 0;
86 | }
87 | 


--------------------------------------------------------------------------------
/src/ComputeProlongation.hpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | #ifndef COMPUTEPROLONGATION_HPP
30 | #define COMPUTEPROLONGATION_HPP
31 | 
32 | #include "Vector.hpp"
33 | #include "SparseMatrix.hpp"
34 | 
35 | int ComputeProlongation(const SparseMatrix& Af, Vector& xf);
36 | 
37 | #endif // COMPUTEPROLONGATION_HPP
38 | 


--------------------------------------------------------------------------------
/src/ComputeProlongation_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeProlongation_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #ifndef HPCG_NO_OPENMP
22 | #include <omp.h>
23 | #endif
24 | 
25 | #include "ComputeProlongation_ref.hpp"
26 | 
27 | /*!
28 |   Routine to compute the coarse residual vector.
29 | 
30 |   @param[in]  Af - Fine grid sparse matrix object containing pointers to current coarse grid correction and the f2c operator.
31 |   @param[inout] xf - Fine grid solution vector, update with coarse grid correction.
32 | 
33 |   Note that the fine grid residual is never explicitly constructed.
34 |   We only compute it for the fine grid points that will be injected into corresponding coarse grid points.
35 | 
36 |   @return Returns zero on success and a non-zero value otherwise.
37 | */
38 | int ComputeProlongation_ref(const SparseMatrix & Af, Vector & xf) {
39 | 
40 |   double * xfv = xf.values;
41 |   double * xcv = Af.mgData->xc->values;
42 |   local_int_t * f2c = Af.mgData->f2cOperator;
43 |   local_int_t nc = Af.mgData->rc->localLength;
44 | 
45 | #ifndef HPCG_NO_OPENMP
46 | #pragma omp parallel for
47 | #endif
48 | // TODO: Somehow note that this loop can be safely vectorized since f2c has no repeated indices
49 |   for (local_int_t i=0; i<nc; ++i) xfv[f2c[i]] += xcv[i]; // This loop is safe to vectorize
50 | 
51 |   return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/src/ComputeProlongation_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTEPROLONGATION_REF_HPP
16 | #define COMPUTEPROLONGATION_REF_HPP
17 | #include "Vector.hpp"
18 | #include "SparseMatrix.hpp"
19 | int ComputeProlongation_ref(const SparseMatrix & Af, Vector & xf);
20 | #endif // COMPUTEPROLONGATION_REF_HPP
21 | 


--------------------------------------------------------------------------------
/src/ComputeResidual.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /* ************************************************************************
 16 |  * Modifications (c) 2019-2021 Advanced Micro Devices, Inc.
 17 |  *
 18 |  * Redistribution and use in source and binary forms, with or without modification,
 19 |  * are permitted provided that the following conditions are met:
 20 |  *
 21 |  * 1. Redistributions of source code must retain the above copyright notice, this
 22 |  *    list of conditions and the following disclaimer.
 23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 24 |  *    this list of conditions and the following disclaimer in the documentation
 25 |  *    and/or other materials provided with the distribution.
 26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 27 |  *    may be used to endorse or promote products derived from this software without
 28 |  *    specific prior written permission.
 29 |  *
 30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 39 |  * POSSIBILITY OF SUCH DAMAGE.
 40 |  *
 41 |  * ************************************************************************ */
 42 | 
 43 | /*!
 44 |  @file ComputeResidual.cpp
 45 | 
 46 |  HPCG routine
 47 |  */
 48 | #ifndef HPCG_NO_MPI
 49 | #include <mpi.h>
 50 | #endif
 51 | 
 52 | #include "ComputeResidual.hpp"
 53 | 
 54 | #include <hip/hip_runtime.h>
 55 | 
 56 | #ifdef OPT_ROCTX
 57 | #include <roctracer/roctx.h>
 58 | #endif
 59 | 
 60 | template <unsigned int BLOCKSIZE>
 61 | __device__ void reduce_max(local_int_t tid, double* data)
 62 | {
 63 |     __syncthreads();
 64 | 
 65 |     if(BLOCKSIZE > 512) { if(tid < 512 && tid + 512 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 512]); } __syncthreads(); }
 66 |     if(BLOCKSIZE > 256) { if(tid < 256 && tid + 256 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 256]); } __syncthreads(); }
 67 |     if(BLOCKSIZE > 128) { if(tid < 128 && tid + 128 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid + 128]); } __syncthreads(); }
 68 |     if(BLOCKSIZE >  64) { if(tid <  64 && tid +  64 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +  64]); } __syncthreads(); }
 69 |     if(BLOCKSIZE >  32) { if(tid <  32 && tid +  32 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +  32]); } __syncthreads(); }
 70 |     if(BLOCKSIZE >  16) { if(tid <  16 && tid +  16 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +  16]); } __syncthreads(); }
 71 |     if(BLOCKSIZE >   8) { if(tid <   8 && tid +   8 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +   8]); } __syncthreads(); }
 72 |     if(BLOCKSIZE >   4) { if(tid <   4 && tid +   4 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +   4]); } __syncthreads(); }
 73 |     if(BLOCKSIZE >   2) { if(tid <   2 && tid +   2 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +   2]); } __syncthreads(); }
 74 |     if(BLOCKSIZE >   1) { if(tid <   1 && tid +   1 < BLOCKSIZE) { data[tid] = max(data[tid], data[tid +   1]); } __syncthreads(); }
 75 | }
 76 | 
 77 | template <unsigned int BLOCKSIZE>
 78 | __launch_bounds__(BLOCKSIZE)
 79 | __global__ void kernel_residual_part1(local_int_t n,
 80 |                                       const double* __restrict__ v1,
 81 |                                       const double* __restrict__ v2,
 82 |                                       double* __restrict__ workspace)
 83 | {
 84 |     local_int_t tid = threadIdx.x;
 85 |     local_int_t gid = blockIdx.x * BLOCKSIZE + tid;
 86 |     local_int_t inc = gridDim.x * BLOCKSIZE;
 87 | 
 88 |     __shared__ double sdata[BLOCKSIZE];
 89 |     sdata[tid] = 0.0;
 90 | 
 91 |     for(local_int_t idx = gid; idx < n; idx += inc)
 92 |     {
 93 |         sdata[tid] = max(sdata[tid], fabs(v1[idx] - v2[idx]));
 94 |     }
 95 | 
 96 |     reduce_max<BLOCKSIZE>(tid, sdata);
 97 | 
 98 |     if(tid == 0)
 99 |     {
100 |         workspace[blockIdx.x] = sdata[0];
101 |     }
102 | }
103 | 
104 | template <unsigned int BLOCKSIZE>
105 | __launch_bounds__(BLOCKSIZE)
106 | __global__ void kernel_residual_part2(double* workspace)
107 | {
108 |     __shared__ double sdata[BLOCKSIZE];
109 |     sdata[threadIdx.x] = workspace[threadIdx.x];
110 | 
111 |     __syncthreads();
112 | 
113 |     reduce_max<BLOCKSIZE>(threadIdx.x, sdata);
114 | 
115 |     if(threadIdx.x == 0)
116 |     {
117 |         workspace[0] = sdata[0];
118 |     }
119 | }
120 | 
121 | int ComputeResidual(local_int_t n, const Vector& v1, const Vector& v2, double& residual)
122 | {
123 |     double* tmp = reinterpret_cast<double*>(workspace);
124 | 
125 |     kernel_residual_part1<256><<<256, 256, 0, stream_interior>>>(n,
126 |                                                                  v1.d_values,
127 |                                                                  v2.d_values,
128 |                                                                  tmp);
129 |     kernel_residual_part2<256><<<1, 256, 0, stream_interior>>>(tmp);
130 | 
131 |     double local_residual;
132 |     HIP_CHECK(hipMemcpyAsync(&local_residual, tmp, sizeof(double), hipMemcpyDeviceToHost, stream_interior));
133 |     HIP_CHECK(hipStreamSynchronize(stream_interior));
134 | 
135 | #ifndef HPCG_NO_MPI
136 |     double global_residual = 0.0;
137 | 
138 | #ifdef OPT_ROCTX
139 |     roctxRangePush("MPI AllReduce");
140 | #endif
141 |     MPI_Allreduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
142 | #ifdef OPT_ROCTX
143 |     roctxRangePop();
144 | #endif
145 | 
146 |     residual = global_residual;
147 | #else
148 |     residual = local_residual;
149 | #endif
150 | 
151 |     return 0;
152 | }
153 | 


--------------------------------------------------------------------------------
/src/ComputeResidual.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTERESIDUAL_HPP
16 | #define COMPUTERESIDUAL_HPP
17 | #include "Vector.hpp"
18 | int ComputeResidual(const local_int_t n, const Vector & v1, const Vector & v2, double & residual);
19 | #endif // COMPUTERESIDUAL_HPP
20 | 


--------------------------------------------------------------------------------
/src/ComputeResidual_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeResidual_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | #ifndef HPCG_NO_MPI
21 | #include <mpi.h>
22 | #endif
23 | #ifndef HPCG_NO_OPENMP
24 | #include <omp.h>
25 | #endif
26 | 
27 | #include "Vector.hpp"
28 | 
29 | #ifdef HPCG_DETAILED_DEBUG
30 | #include <fstream>
31 | #include "hpcg.hpp"
32 | #endif
33 | 
34 | #include <cmath>  // needed for fabs
35 | #include "ComputeResidual_ref.hpp"
36 | #ifdef HPCG_DETAILED_DEBUG
37 | #include <iostream>
38 | #endif
39 | 
40 | /*!
41 |   Routine to compute the inf-norm difference between two vectors where:
42 | 
43 |   @param[in]  n        number of vector elements (local to this processor)
44 |   @param[in]  v1, v2   input vectors
45 |   @param[out] residual pointer to scalar value; on exit, will contain result: inf-norm difference
46 | 
47 |   @return Returns zero on success and a non-zero value otherwise.
48 | */
49 | int ComputeResidual_ref(const local_int_t n, const Vector & v1, const Vector & v2, double & residual) {
50 | 
51 |   double * v1v = v1.values;
52 |   double * v2v = v2.values;
53 |   double local_residual = 0.0;
54 | 
55 | #ifndef HPCG_NO_OPENMP
56 |   #pragma omp parallel default(none) shared(local_residual, v1v, v2v)
57 |   {
58 |     double threadlocal_residual = 0.0;
59 |     #pragma omp for
60 |     for (local_int_t i=0; i<n; i++) {
61 |       double diff = std::fabs(v1v[i] - v2v[i]);
62 |       if (diff > threadlocal_residual) threadlocal_residual = diff;
63 |     }
64 |     #pragma omp critical
65 |     {
66 |       if (threadlocal_residual>local_residual) local_residual = threadlocal_residual;
67 |     }
68 |   }
69 | #else // No threading
70 |   for (local_int_t i=0; i<n; i++) {
71 |     double diff = std::fabs(v1v[i] - v2v[i]);
72 |     if (diff > local_residual) local_residual = diff;
73 | #ifdef HPCG_DETAILED_DEBUG
74 |     HPCG_fout << " Computed, exact, diff = " << v1v[i] << " " << v2v[i] << " " << diff << std::endl;
75 | #endif
76 |   }
77 | #endif
78 | 
79 | #ifndef HPCG_NO_MPI
80 |   // Use MPI's reduce function to collect all partial sums
81 |   double global_residual = 0;
82 |   MPI_Allreduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
83 |   residual = global_residual;
84 | #else
85 |   residual = local_residual;
86 | #endif
87 | 
88 |   return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/ComputeResidual_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTERESIDUAL_REF_HPP
16 | #define COMPUTERESIDUAL_REF_HPP
17 | #include "Vector.hpp"
18 | int ComputeResidual_ref(const local_int_t n, const Vector & v1, const Vector & v2, double & residual);
19 | #endif // COMPUTERESIDUAL_REF_HPP
20 | 


--------------------------------------------------------------------------------
/src/ComputeRestriction.hpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | #ifndef COMPUTERESTRICTION_HPP
30 | #define COMPUTERESTRICTION_HPP
31 | 
32 | #include "Vector.hpp"
33 | #include "SparseMatrix.hpp"
34 | 
35 | int ComputeRestriction(const SparseMatrix& A, const Vector& rf);
36 | int ComputeFusedSpMVRestriction(const SparseMatrix& A, const Vector& rf, Vector& xf);
37 | 
38 | #endif // COMPUTERESTRICTION_HPP
39 | 


--------------------------------------------------------------------------------
/src/ComputeRestriction_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeRestriction_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | 
22 | #ifndef HPCG_NO_OPENMP
23 | #include <omp.h>
24 | #endif
25 | 
26 | #include "ComputeRestriction_ref.hpp"
27 | 
28 | /*!
29 |   Routine to compute the coarse residual vector.
30 | 
31 |   @param[inout]  A - Sparse matrix object containing pointers to mgData->Axf, the fine grid matrix-vector product and mgData->rc the coarse residual vector.
32 |   @param[in]    rf - Fine grid RHS.
33 | 
34 | 
35 |   Note that the fine grid residual is never explicitly constructed.
36 |   We only compute it for the fine grid points that will be injected into corresponding coarse grid points.
37 | 
38 |   @return Returns zero on success and a non-zero value otherwise.
39 | */
40 | int ComputeRestriction_ref(const SparseMatrix & A, const Vector & rf) {
41 | 
42 |   double * Axfv = A.mgData->Axf->values;
43 |   double * rfv = rf.values;
44 |   double * rcv = A.mgData->rc->values;
45 |   local_int_t * f2c = A.mgData->f2cOperator;
46 |   local_int_t nc = A.mgData->rc->localLength;
47 | 
48 | #ifndef HPCG_NO_OPENMP
49 | #pragma omp parallel for
50 | #endif
51 |   for (local_int_t i=0; i<nc; ++i) rcv[i] = rfv[f2c[i]] - Axfv[f2c[i]];
52 | 
53 |   return 0;
54 | }
55 | 


--------------------------------------------------------------------------------
/src/ComputeRestriction_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTERESTRICTION_REF_HPP
16 | #define COMPUTERESTRICTION_REF_HPP
17 | #include "Vector.hpp"
18 | #include "SparseMatrix.hpp"
19 | int ComputeRestriction_ref(const SparseMatrix & A, const Vector & rf);
20 | #endif // COMPUTERESTRICTION_REF_HPP
21 | 


--------------------------------------------------------------------------------
/src/ComputeSPMV.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTESPMV_HPP
16 | #define COMPUTESPMV_HPP
17 | #include "Vector.hpp"
18 | #include "SparseMatrix.hpp"
19 | 
20 | int ComputeSPMV( const SparseMatrix & A, Vector & x, Vector & y);
21 | 
22 | #endif  // COMPUTESPMV_HPP
23 | 


--------------------------------------------------------------------------------
/src/ComputeSPMV_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeSPMV_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #include "ComputeSPMV_ref.hpp"
22 | 
23 | #ifndef HPCG_NO_MPI
24 | #include "ExchangeHalo.hpp"
25 | #endif
26 | 
27 | #ifndef HPCG_NO_OPENMP
28 | #include <omp.h>
29 | #endif
30 | #include <cassert>
31 | 
32 | /*!
33 |   Routine to compute matrix vector product y = Ax where:
34 |   Precondition: First call exchange_externals to get off-processor values of x
35 | 
36 |   This is the reference SPMV implementation.  It CANNOT be modified for the
37 |   purposes of this benchmark.
38 | 
39 |   @param[in]  A the known system matrix
40 |   @param[in]  x the known vector
41 |   @param[out] y the On exit contains the result: Ax.
42 | 
43 |   @return returns 0 upon success and non-zero otherwise
44 | 
45 |   @see ComputeSPMV
46 | */
47 | int ComputeSPMV_ref( const SparseMatrix & A, Vector & x, Vector & y) {
48 | 
49 |   assert(x.localLength>=A.localNumberOfColumns); // Test vector lengths
50 |   assert(y.localLength>=A.localNumberOfRows);
51 | 
52 | #ifndef HPCG_NO_MPI
53 |     ExchangeHalo(A,x);
54 | #endif
55 |   const double * const xv = x.values;
56 |   double * const yv = y.values;
57 |   const local_int_t nrow = A.localNumberOfRows;
58 | #ifndef HPCG_NO_OPENMP
59 |   #pragma omp parallel for
60 | #endif
61 |   for (local_int_t i=0; i< nrow; i++)  {
62 |     double sum = 0.0;
63 |     const double * const cur_vals = A.matrixValues[i];
64 |     const local_int_t * const cur_inds = A.mtxIndL[i];
65 |     const int cur_nnz = A.nonzerosInRow[i];
66 | 
67 |     for (int j=0; j< cur_nnz; j++)
68 |       sum += cur_vals[j]*xv[cur_inds[j]];
69 |     yv[i] = sum;
70 |   }
71 |   return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/src/ComputeSPMV_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTESPMV_REF_HPP
16 | #define COMPUTESPMV_REF_HPP
17 | #include "Vector.hpp"
18 | #include "SparseMatrix.hpp"
19 | 
20 | int ComputeSPMV_ref( const SparseMatrix & A, Vector  & x, Vector & y);
21 | 
22 | #endif  // COMPUTESPMV_REF_HPP
23 | 


--------------------------------------------------------------------------------
/src/ComputeSYMGS.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef COMPUTESYMGS_HPP
44 | #define COMPUTESYMGS_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | 
49 | int ComputeSYMGS(const SparseMatrix & A, const Vector& r, Vector& x);
50 | int ComputeSYMGSZeroGuess(const SparseMatrix & A, const Vector& r, Vector& x);
51 | 
52 | #endif // COMPUTESYMGS_HPP
53 | 


--------------------------------------------------------------------------------
/src/ComputeSYMGS_ref.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /*!
 16 |  @file ComputeSYMGS_ref.cpp
 17 | 
 18 |  HPCG routine
 19 |  */
 20 | 
 21 | #ifndef HPCG_NO_MPI
 22 | #include "ExchangeHalo.hpp"
 23 | #endif
 24 | #include "ComputeSYMGS_ref.hpp"
 25 | #include <cassert>
 26 | 
 27 | /*!
 28 |   Computes one step of symmetric Gauss-Seidel:
 29 | 
 30 |   Assumption about the structure of matrix A:
 31 |   - Each row 'i' of the matrix has nonzero diagonal value whose address is matrixDiagonal[i]
 32 |   - Entries in row 'i' are ordered such that:
 33 |        - lower triangular terms are stored before the diagonal element.
 34 |        - upper triangular terms are stored after the diagonal element.
 35 |        - No other assumptions are made about entry ordering.
 36 | 
 37 |   Symmetric Gauss-Seidel notes:
 38 |   - We use the input vector x as the RHS and start with an initial guess for y of all zeros.
 39 |   - We perform one forward sweep.  x should be initially zero on the first GS sweep, but we do not attempt to exploit this fact.
 40 |   - We then perform one back sweep.
 41 |   - For simplicity we include the diagonal contribution in the for-j loop, then correct the sum after
 42 | 
 43 |   @param[in] A the known system matrix
 44 |   @param[in] r the input vector
 45 |   @param[inout] x On entry, x should contain relevant values, on exit x contains the result of one symmetric GS sweep with r as the RHS.
 46 | 
 47 | 
 48 |   @warning Early versions of this kernel (Version 1.1 and earlier) had the r and x arguments in reverse order, and out of sync with other kernels.
 49 | 
 50 |   @return returns 0 upon success and non-zero otherwise
 51 | 
 52 |   @see ComputeSYMGS
 53 | */
 54 | int ComputeSYMGS_ref( const SparseMatrix & A, const Vector & r, Vector & x) {
 55 | 
 56 |   assert(x.localLength==A.localNumberOfColumns); // Make sure x contain space for halo values
 57 | 
 58 | #ifndef HPCG_NO_MPI
 59 |   ExchangeHalo(A,x);
 60 | #endif
 61 | 
 62 |   const local_int_t nrow = A.localNumberOfRows;
 63 |   double ** matrixDiagonal = A.matrixDiagonal;  // An array of pointers to the diagonal entries A.matrixValues
 64 |   const double * const rv = r.values;
 65 |   double * const xv = x.values;
 66 | 
 67 |   for (local_int_t i=0; i< nrow; i++) {
 68 |     const double * const currentValues = A.matrixValues[i];
 69 |     const local_int_t * const currentColIndices = A.mtxIndL[i];
 70 |     const int currentNumberOfNonzeros = A.nonzerosInRow[i];
 71 |     const double  currentDiagonal = matrixDiagonal[i][0]; // Current diagonal value
 72 |     double sum = rv[i]; // RHS value
 73 | 
 74 |     for (int j=0; j< currentNumberOfNonzeros; j++) {
 75 |       local_int_t curCol = currentColIndices[j];
 76 |       sum -= currentValues[j] * xv[curCol];
 77 |     }
 78 |     sum += xv[i]*currentDiagonal; // Remove diagonal contribution from previous loop
 79 | 
 80 |     xv[i] = sum/currentDiagonal;
 81 | 
 82 |   }
 83 | 
 84 |   // Now the back sweep.
 85 | 
 86 |   for (local_int_t i=nrow-1; i>=0; i--) {
 87 |     const double * const currentValues = A.matrixValues[i];
 88 |     const local_int_t * const currentColIndices = A.mtxIndL[i];
 89 |     const int currentNumberOfNonzeros = A.nonzerosInRow[i];
 90 |     const double  currentDiagonal = matrixDiagonal[i][0]; // Current diagonal value
 91 |     double sum = rv[i]; // RHS value
 92 | 
 93 |     for (int j = 0; j< currentNumberOfNonzeros; j++) {
 94 |       local_int_t curCol = currentColIndices[j];
 95 |       sum -= currentValues[j]*xv[curCol];
 96 |     }
 97 |     sum += xv[i]*currentDiagonal; // Remove diagonal contribution from previous loop
 98 | 
 99 |     xv[i] = sum/currentDiagonal;
100 |   }
101 | 
102 |   return 0;
103 | }
104 | 
105 | 


--------------------------------------------------------------------------------
/src/ComputeSYMGS_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTESYMGS_REF_HPP
16 | #define COMPUTESYMGS_REF_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 | 
20 | int ComputeSYMGS_ref( const SparseMatrix  & A, const Vector & r, Vector & x);
21 | 
22 | #endif // COMPUTESYMGS_REF_HPP
23 | 


--------------------------------------------------------------------------------
/src/ComputeWAXPBY.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef COMPUTEWAXPBY_HPP
44 | #define COMPUTEWAXPBY_HPP
45 | 
46 | #include "Vector.hpp"
47 | 
48 | int ComputeWAXPBY(local_int_t n,
49 |                   double alpha,
50 |                   const Vector& x,
51 |                   double beta,
52 |                   const Vector& y,
53 |                   Vector& w,
54 |                   bool& isOptimized);
55 | 
56 | int ComputeFusedWAXPBYDot(local_int_t n,
57 |                           double alpha,
58 |                           const Vector& x,
59 |                           Vector& y,
60 |                           double& result,
61 |                           double& time_allreduce);
62 | 
63 | #endif // COMPUTEWAXPBY_HPP
64 | 


--------------------------------------------------------------------------------
/src/ComputeWAXPBY_ref.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file ComputeWAXPBY_ref.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #include "ComputeWAXPBY_ref.hpp"
22 | #ifndef HPCG_NO_OPENMP
23 | #include <omp.h>
24 | #endif
25 | #include <cassert>
26 | /*!
27 |   Routine to compute the update of a vector with the sum of two
28 |   scaled vectors where: w = alpha*x + beta*y
29 | 
30 |   This is the reference WAXPBY impmentation.  It CANNOT be modified for the
31 |   purposes of this benchmark.
32 | 
33 |   @param[in] n the number of vector elements (on this processor)
34 |   @param[in] alpha, beta the scalars applied to x and y respectively.
35 |   @param[in] x, y the input vectors
36 |   @param[out] w the output vector.
37 | 
38 |   @return returns 0 upon success and non-zero otherwise
39 | 
40 |   @see ComputeWAXPBY
41 | */
42 | int ComputeWAXPBY_ref(const local_int_t n, const double alpha, const Vector & x,
43 |     const double beta, const Vector & y, Vector & w) {
44 | 
45 |   assert(x.localLength>=n); // Test vector lengths
46 |   assert(y.localLength>=n);
47 | 
48 |   const double * const xv = x.values;
49 |   const double * const yv = y.values;
50 |   double * const wv = w.values;
51 | 
52 |   if (alpha==1.0) {
53 | #ifndef HPCG_NO_OPENMP
54 |     #pragma omp parallel for
55 | #endif
56 |     for (local_int_t i=0; i<n; i++) wv[i] = xv[i] + beta * yv[i];
57 |   } else if (beta==1.0) {
58 | #ifndef HPCG_NO_OPENMP
59 |     #pragma omp parallel for
60 | #endif
61 |     for (local_int_t i=0; i<n; i++) wv[i] = alpha * xv[i] + yv[i];
62 |   } else  {
63 | #ifndef HPCG_NO_OPENMP
64 |     #pragma omp parallel for
65 | #endif
66 |     for (local_int_t i=0; i<n; i++) wv[i] = alpha * xv[i] + beta * yv[i];
67 |   }
68 | 
69 |   return 0;
70 | }
71 | 


--------------------------------------------------------------------------------
/src/ComputeWAXPBY_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef COMPUTEWAXPBY_REF_HPP
16 | #define COMPUTEWAXPBY_REF_HPP
17 | #include "Vector.hpp"
18 | int ComputeWAXPBY_ref(const local_int_t n, const double alpha, const Vector & x,
19 |     const double beta, const Vector & y, Vector & w);
20 | #endif // COMPUTEWAXPBY_REF_HPP
21 | 


--------------------------------------------------------------------------------
/src/ExchangeHalo.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef EXCHANGEHALO_HPP
44 | #define EXCHANGEHALO_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | 
49 | void ExchangeHalo(const SparseMatrix & A, Vector & x);
50 | 
51 | void PrepareSendBuffer(const SparseMatrix& A, const Vector& x);
52 | void ExchangeHaloAsync(const SparseMatrix& A, Vector& x);
53 | void ObtainRecvBuffer(const SparseMatrix& A, Vector& x);
54 | 
55 | #endif // EXCHANGEHALO_HPP
56 | 


--------------------------------------------------------------------------------
/src/GenerateCoarseProblem.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef GENERATECOARSEPROBLEM_HPP
44 | #define GENERATECOARSEPROBLEM_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | 
48 | void GenerateCoarseProblem(const SparseMatrix& A);
49 | void CopyCoarseProblemToHost(SparseMatrix& A);
50 | 
51 | #endif // GENERATECOARSEPROBLEM_HPP
52 | 


--------------------------------------------------------------------------------
/src/GenerateGeometry.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /*!
 16 |  @file GenerateGeometry.cpp
 17 | 
 18 |  HPCG routine
 19 |  */
 20 | 
 21 | #include <cmath>
 22 | #include <cstdlib>
 23 | #include <cassert>
 24 | 
 25 | #include "ComputeOptimalShapeXYZ.hpp"
 26 | #include "GenerateGeometry.hpp"
 27 | 
 28 | #ifdef HPCG_DEBUG
 29 | #include <fstream>
 30 | #include "hpcg.hpp"
 31 | using std::endl;
 32 | 
 33 | #endif
 34 | 
 35 | /*!
 36 |   Computes the factorization of the total number of processes into a
 37 |   3-dimensional process grid that is as close as possible to a cube. The
 38 |   quality of the factorization depends on the prime number structure of the
 39 |   total number of processes. It then stores this decompostion together with the
 40 |   parallel parameters of the run in the geometry data structure.
 41 | 
 42 |   @param[in]  size total number of MPI processes
 43 |   @param[in]  rank this process' rank among other MPI processes
 44 |   @param[in]  numThreads number of OpenMP threads in this process
 45 |   @param[in]  pz z-dimension processor ID where second zone of nz values start
 46 |   @param[in]  nx, ny, nz number of grid points for each local block in the x, y, and z dimensions, respectively
 47 |   @param[out] geom data structure that will store the above parameters and the factoring of total number of processes into three dimensions
 48 | */
 49 | void GenerateGeometry(int size, int rank, int numThreads,
 50 |   int pz, local_int_t zl, local_int_t zu,
 51 |   local_int_t nx, local_int_t ny, local_int_t nz,
 52 |   int npx, int npy, int npz,
 53 |   Geometry * geom)
 54 | {
 55 | 
 56 |   if (npx * npy * npz <= 0 || npx * npy * npz > size)
 57 |     ComputeOptimalShapeXYZ( size, npx, npy, npz );
 58 | 
 59 |   int * partz_ids = 0;
 60 |   local_int_t * partz_nz = 0;
 61 |   int npartz = 0;
 62 |   if (pz==0) { // No variation in nz sizes
 63 |     npartz = 1;
 64 |     partz_ids = new int[1];
 65 |     partz_nz = new local_int_t[1];
 66 |     partz_ids[0] = npz;
 67 |     partz_nz[0] = nz;
 68 |   }
 69 |   else {
 70 |     npartz = 2;
 71 |     partz_ids = new int[2];
 72 |     partz_ids[0] = pz;
 73 |     partz_ids[1] = npz;
 74 |     partz_nz = new local_int_t[2];
 75 |     partz_nz[0] = zl;
 76 |     partz_nz[1] = zu;
 77 |   }
 78 | //  partz_ids[npartz-1] = npz; // The last element of this array is always npz
 79 |   int ipartz_ids = 0;
 80 |   for (int i=0; i< npartz; ++i) {
 81 |     assert(ipartz_ids<partz_ids[i]);  // Make sure that z partitioning is consistent with computed npz value
 82 |     ipartz_ids = partz_ids[i];
 83 |   }
 84 | 
 85 |   // Now compute this process's indices in the 3D cube
 86 |   int ipz = rank/(npx*npy);
 87 |   int ipy = (rank-ipz*npx*npy)/npx;
 88 |   int ipx = rank%npx;
 89 | 
 90 | #ifdef HPCG_DEBUG
 91 |   if (rank==0)
 92 |     HPCG_fout   << "size = "<< size << endl
 93 |         << "nx  = " << nx << endl
 94 |         << "ny  = " << ny << endl
 95 |         << "nz  = " << nz << endl
 96 |         << "npx = " << npx << endl
 97 |         << "npy = " << npy << endl
 98 |         << "npz = " << npz << endl;
 99 | 
100 |   HPCG_fout    << "For rank = " << rank << endl
101 |       << "ipx = " << ipx << endl
102 |       << "ipy = " << ipy << endl
103 |       << "ipz = " << ipz << endl;
104 | 
105 |   assert(size>=npx*npy*npz);
106 | #endif
107 |   geom->size = size;
108 |   geom->rank = rank;
109 |   geom->numThreads = numThreads;
110 |   geom->nx = nx;
111 |   geom->ny = ny;
112 |   geom->nz = nz;
113 |   geom->npx = npx;
114 |   geom->npy = npy;
115 |   geom->npz = npz;
116 |   geom->pz = pz;
117 |   geom->npartz = npartz;
118 |   geom->partz_ids = partz_ids;
119 |   geom->partz_nz = partz_nz;
120 |   geom->ipx = ipx;
121 |   geom->ipy = ipy;
122 |   geom->ipz = ipz;
123 | 
124 | // These values should be defined to take into account changes in nx, ny, nz values
125 | // due to variable local grid sizes
126 |   global_int_t gnx = npx*nx;
127 |   global_int_t gny = npy*ny;
128 |   //global_int_t gnz = npz*nz;
129 |   // We now permit varying values for nz for any nx-by-ny plane of MPI processes.
130 |   // npartz is the number of different groups of nx-by-ny groups of processes.
131 |   // partz_ids is an array of length npartz where each value indicates the z process of the last process in the ith nx-by-ny group.
132 |   // partz_nz is an array of length npartz containing the value of nz for the ith group.
133 | 
134 |   //        With no variation, npartz = 1, partz_ids[0] = npz, partz_nz[0] = nz
135 | 
136 |   global_int_t gnz = 0;
137 |   ipartz_ids = 0;
138 | 
139 |   for (int i=0; i< npartz; ++i) {
140 |     ipartz_ids = partz_ids[i] - ipartz_ids;
141 |     gnz += partz_nz[i]*ipartz_ids;
142 |   }
143 |   //global_int_t giz0 = ipz*nz;
144 |   global_int_t giz0 = 0;
145 |   ipartz_ids = 0;
146 |   for (int i=0; i< npartz; ++i) {
147 |     int ipart_nz = partz_nz[i];
148 |     if (ipz < partz_ids[i]) {
149 |       giz0 += (ipz-ipartz_ids)*ipart_nz;
150 |       break;
151 |     } else {
152 |       ipartz_ids = partz_ids[i];
153 |       giz0 += ipartz_ids*ipart_nz;
154 |     }
155 | 
156 |   }
157 |   global_int_t gix0 = ipx*nx;
158 |   global_int_t giy0 = ipy*ny;
159 | 
160 | // Keep these values for later
161 |   geom->gnx = gnx;
162 |   geom->gny = gny;
163 |   geom->gnz = gnz;
164 |   geom->gix0 = gix0;
165 |   geom->giy0 = giy0;
166 |   geom->giz0 = giz0;
167 | 
168 |   return;
169 | }
170 | 


--------------------------------------------------------------------------------
/src/GenerateGeometry.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef GENERATEGEOMETRY_HPP
16 | #define GENERATEGEOMETRY_HPP
17 | #include "Geometry.hpp"
18 | void GenerateGeometry(int size, int rank, int numThreads, int pz, local_int_t zl, local_int_t zu, local_int_t nx, local_int_t ny, local_int_t nz, int npx, int npy, int npz, Geometry * geom);
19 | #endif // GENERATEGEOMETRY_HPP
20 | 


--------------------------------------------------------------------------------
/src/GenerateProblem.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef GENERATEPROBLEM_HPP
44 | #define GENERATEPROBLEM_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | #include "Vector.hpp"
48 | 
49 | void GenerateProblem(SparseMatrix& A, Vector* b, Vector* x, Vector* xexact);
50 | void CopyProblemToHost(SparseMatrix& A, Vector* b, Vector* x, Vector* xexact);
51 | 
52 | #endif // GENERATEPROBLEM_HPP
53 | 


--------------------------------------------------------------------------------
/src/GenerateProblem_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef GENERATEPROBLEM_REF_HPP
16 | #define GENERATEPROBLEM_REF_HPP
17 | #include "SparseMatrix.hpp"
18 | #include "Vector.hpp"
19 | 
20 | void GenerateProblem_ref(SparseMatrix & A, Vector * b, Vector * x, Vector * xexact);
21 | #endif // GENERATEPROBLEM_REF_HPP
22 | 


--------------------------------------------------------------------------------
/src/Geometry.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /*!
 16 |  @file Geometry.hpp
 17 | 
 18 |  HPCG data structure for problem geometry
 19 |  */
 20 | 
 21 | #ifndef GEOMETRY_HPP
 22 | #define GEOMETRY_HPP
 23 | 
 24 | /*!
 25 |   This defines the type for integers that have local subdomain dimension.
 26 | 
 27 |   Define as "long long" when local problem dimension is > 2^31
 28 | */
 29 | typedef int local_int_t;
 30 | //typedef long long local_int_t;
 31 | 
 32 | /*!
 33 |   This defines the type for integers that have global dimension
 34 | 
 35 |   Define as "long long" when global problem dimension is > 2^31
 36 | */
 37 | //typedef int global_int_t;
 38 | typedef long long global_int_t;
 39 | 
 40 | // This macro should be defined if the global_int_t is not long long
 41 | // in order to stop complaints from non-C++11 compliant compilers.
 42 | //#define HPCG_NO_LONG_LONG
 43 | 
 44 | /*!
 45 |   This is a data structure to contain all processor geometry information
 46 | */
 47 | struct Geometry_STRUCT {
 48 |   int size; //!< Number of MPI processes
 49 |   int rank; //!< This process' rank in the range [0 to size - 1]
 50 |   int numThreads; //!< This process' number of threads
 51 |   local_int_t nx;   //!< Number of x-direction grid points for each local subdomain
 52 |   local_int_t ny;   //!< Number of y-direction grid points for each local subdomain
 53 |   local_int_t nz;   //!< Number of z-direction grid points for each local subdomain
 54 |   int npx;  //!< Number of processors in x-direction
 55 |   int npy;  //!< Number of processors in y-direction
 56 |   int npz;  //!< Number of processors in z-direction
 57 |   int pz; //!< partition ID of z-dimension process that starts the second region of nz values
 58 |   int npartz; //!< Number of partitions with varying nz values
 59 |   int * partz_ids; //!< Array of partition ids of processor in z-direction where new value of nz starts (valid values are 1 to npz)
 60 |   local_int_t * partz_nz; //!< Array of length npartz containing the nz values for each partition
 61 |   int ipx;  //!< Current rank's x location in the npx by npy by npz processor grid
 62 |   int ipy;  //!< Current rank's y location in the npx by npy by npz processor grid
 63 |   int ipz;  //!< Current rank's z location in the npx by npy by npz processor grid
 64 |   global_int_t gnx;  //!< Global number of x-direction grid points
 65 |   global_int_t gny;  //!< Global number of y-direction grid points
 66 |   global_int_t gnz;  //!< Global number of z-direction grid points
 67 |   global_int_t gix0;  //!< Base global x index for this rank in the npx by npy by npz processor grid
 68 |   global_int_t giy0;  //!< Base global y index for this rank in the npx by npy by npz processor grid
 69 |   global_int_t giz0;  //!< Base global z index for this rank in the npx by npy by npz processor grid
 70 | 
 71 | };
 72 | typedef struct Geometry_STRUCT Geometry;
 73 | 
 74 | /*!
 75 |   Returns the rank of the MPI process that is assigned the global row index
 76 |   given as the input argument.
 77 | 
 78 |   @param[in] geom  The description of the problem's geometry.
 79 |   @param[in] index The global row index
 80 | 
 81 |   @return Returns the MPI rank of the process assigned the row
 82 | */
 83 | inline int ComputeRankOfMatrixRow(const Geometry & geom, global_int_t index) {
 84 |   global_int_t gnx = geom.gnx;
 85 |   global_int_t gny = geom.gny;
 86 | 
 87 |   global_int_t iz = index/(gny*gnx);
 88 |   global_int_t iy = (index-iz*gny*gnx)/gnx;
 89 |   global_int_t ix = index%gnx;
 90 |   // We now permit varying values for nz for any nx-by-ny plane of MPI processes.
 91 |   // npartz is the number of different groups of nx-by-ny groups of processes.
 92 |   // partz_ids is an array of length npartz where each value indicates the z process of the last process in the ith nx-by-ny group.
 93 |   // partz_nz is an array of length npartz containing the value of nz for the ith group.
 94 | 
 95 |   //        With no variation, npartz = 1, partz_ids[0] = npz, partz_nz[0] = nz
 96 | 
 97 |   int ipz = 0;
 98 |   int ipartz_ids = 0;
 99 |   for (int i=0; i< geom.npartz; ++i) {
100 |     int ipart_nz = geom.partz_nz[i];
101 |     ipartz_ids = geom.partz_ids[i] - ipartz_ids;
102 |     if (iz<= ipart_nz*ipartz_ids) {
103 |       ipz += iz/ipart_nz;
104 |       break;
105 |     } else {
106 |       ipz += ipartz_ids;
107 |       iz -= ipart_nz*ipartz_ids;
108 |     }
109 | 
110 |   }
111 | //  global_int_t ipz = iz/geom.nz;
112 |   int ipy = iy/geom.ny;
113 |   int ipx = ix/geom.nx;
114 |   int rank = ipx+ipy*geom.npx+ipz*geom.npy*geom.npx;
115 |   return rank;
116 | }
117 | 
118 | 
119 | /*!
120 |  Destructor for geometry data.
121 | 
122 |  @param[inout] data the geometry data structure whose storage is deallocated
123 |  */
124 | inline void DeleteGeometry(Geometry & geom) {
125 | 
126 |   delete [] geom.partz_nz;
127 |   delete [] geom.partz_ids;
128 | 
129 |   return;
130 | }
131 | 
132 | 
133 | 
134 | #endif // GEOMETRY_HPP
135 | 


--------------------------------------------------------------------------------
/src/MGData.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /* ************************************************************************
 16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
 17 |  *
 18 |  * Redistribution and use in source and binary forms, with or without modification,
 19 |  * are permitted provided that the following conditions are met:
 20 |  *
 21 |  * 1. Redistributions of source code must retain the above copyright notice, this
 22 |  *    list of conditions and the following disclaimer.
 23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 24 |  *    this list of conditions and the following disclaimer in the documentation
 25 |  *    and/or other materials provided with the distribution.
 26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 27 |  *    may be used to endorse or promote products derived from this software without
 28 |  *    specific prior written permission.
 29 |  *
 30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 39 |  * POSSIBILITY OF SUCH DAMAGE.
 40 |  *
 41 |  * ************************************************************************ */
 42 | 
 43 | /*!
 44 |  @file MGData.hpp
 45 | 
 46 |  HPCG data structure
 47 |  */
 48 | 
 49 | #ifndef MGDATA_HPP
 50 | #define MGDATA_HPP
 51 | 
 52 | #include <cassert>
 53 | #include <hip/hip_runtime_api.h>
 54 | 
 55 | #include "utils.hpp"
 56 | #include "SparseMatrix.hpp"
 57 | #include "Vector.hpp"
 58 | 
 59 | struct MGData_STRUCT {
 60 |   int numberOfPresmootherSteps; // Call ComputeSYMGS this many times prior to coarsening
 61 |   int numberOfPostsmootherSteps; // Call ComputeSYMGS this many times after coarsening
 62 |   local_int_t * f2cOperator; //!< 1D array containing the fine operator local IDs that will be injected into coarse space.
 63 |   Vector * rc; // coarse grid residual vector
 64 |   Vector * xc; // coarse grid solution vector
 65 |   Vector * Axf; // fine grid residual vector
 66 |   /*!
 67 |    This is for storing optimized data structres created in OptimizeProblem and
 68 |    used inside optimized ComputeSPMV().
 69 |    */
 70 |   void * optimizationData;
 71 | 
 72 |   local_int_t* d_f2cOperator; //!< f2cOperator on device
 73 |   local_int_t* d_c2fOperator;
 74 | };
 75 | typedef struct MGData_STRUCT MGData;
 76 | 
 77 | /*!
 78 |  Constructor for the data structure of CG vectors.
 79 | 
 80 |  @param[in] Ac - Fully-formed coarse matrix
 81 |  @param[in] f2cOperator -
 82 |  @param[out] data the data structure for CG vectors that will be allocated to get it ready for use in CG iterations
 83 |  */
 84 | inline void InitializeMGData(local_int_t* d_f2cOperator, local_int_t* d_c2fOperator, Vector* rc, Vector* xc, Vector* Axf, MGData & data) {
 85 |   data.numberOfPresmootherSteps = 1;
 86 |   data.numberOfPostsmootherSteps = 1;
 87 |   data.f2cOperator = nullptr;
 88 |   data.d_f2cOperator = d_f2cOperator; // Space for injection operator
 89 |   data.d_c2fOperator = d_c2fOperator;
 90 |   data.rc = rc;
 91 |   data.xc = xc;
 92 |   data.Axf = Axf;
 93 |   return;
 94 | }
 95 | 
 96 | /*!
 97 |  Destructor for the CG vectors data.
 98 | 
 99 |  @param[inout] data the MG data structure whose storage is deallocated
100 |  */
101 | inline void DeleteMGData(MGData & data) {
102 | 
103 |   if (data.f2cOperator) delete [] data.f2cOperator;
104 |   DeleteVector(*data.Axf);
105 |   DeleteVector(*data.rc);
106 |   DeleteVector(*data.xc);
107 | #ifdef HPCG_REFERENCE
108 |   HIPDeleteVector(*data.Axf);
109 | #endif
110 |   HIPDeleteVector(*data.rc);
111 |   HIPDeleteVector(*data.xc);
112 |   delete data.Axf;
113 |   delete data.rc;
114 |   delete data.xc;
115 | 
116 |   HIP_CHECK(deviceFree(data.d_f2cOperator));
117 |   HIP_CHECK(deviceFree(data.d_c2fOperator));
118 | 
119 |   return;
120 | }
121 | 
122 | #endif // MGDATA_HPP
123 | 
124 | 


--------------------------------------------------------------------------------
/src/Memory.hpp:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Redistribution and use in source and binary forms, with or without modification,
  5 |  * are permitted provided that the following conditions are met:
  6 |  *
  7 |  * 1. Redistributions of source code must retain the above copyright notice, this
  8 |  *    list of conditions and the following disclaimer.
  9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 10 |  *    this list of conditions and the following disclaimer in the documentation
 11 |  *    and/or other materials provided with the distribution.
 12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 13 |  *    may be used to endorse or promote products derived from this software without
 14 |  *    specific prior written permission.
 15 |  *
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 25 |  * POSSIBILITY OF SUCH DAMAGE.
 26 |  *
 27 |  * ************************************************************************ */
 28 | 
 29 | /*!
 30 |  @file Memory.hpp
 31 | 
 32 |  Device memory management
 33 |  */
 34 | 
 35 | #ifndef MEMORY_HPP
 36 | #define MEMORY_HPP
 37 | 
 38 | #include <cstdlib>
 39 | #include <list>
 40 | #include <string>
 41 | #include <hip/hip_runtime_api.h>
 42 | 
 43 | #include "Geometry.hpp"
 44 | 
 45 | struct hipMemObject_t
 46 | {
 47 |     size_t size;
 48 |     char* address;
 49 | };
 50 | 
 51 | class hipAllocator_t
 52 | {
 53 |     public:
 54 | 
 55 |     hipAllocator_t(void);
 56 |     ~hipAllocator_t(void);
 57 | 
 58 |     hipError_t Initialize(int rank,
 59 |                           int nprocs,
 60 |                           local_int_t nx,
 61 |                           local_int_t ny,
 62 |                           local_int_t nz);
 63 |     hipError_t Clear(void);
 64 | 
 65 |     hipError_t Alloc(void** ptr, size_t size);
 66 |     hipError_t Realloc(void* ptr, size_t size);
 67 |     hipError_t Free(void* ptr);
 68 | 
 69 |     inline size_t GetFreeMemory(void) const { return this->free_mem_; }
 70 |     inline size_t GetUsedMemory(void) const { return this->used_mem_; }
 71 |     inline size_t GetTotalMemory(void) const { return this->total_mem_; }
 72 | 
 73 |     private:
 74 | 
 75 |     // Current rank
 76 |     int rank_;
 77 | 
 78 |     // Returns the maximum memory requirements
 79 |     size_t ComputeMaxMemoryRequirements_(int nprocs,
 80 |                                          local_int_t nx,
 81 |                                          local_int_t ny,
 82 |                                          local_int_t nz) const;
 83 | 
 84 |     // Total memory size
 85 |     size_t total_mem_;
 86 | 
 87 |     // Free memory size
 88 |     size_t free_mem_;
 89 | 
 90 |     // Used memory size
 91 |     size_t used_mem_;
 92 | 
 93 |     // Device memory buffer
 94 |     char* buffer_;
 95 | 
 96 |     // List to keep track of allocations
 97 |     std::list<hipMemObject_t*> objects_;
 98 | };
 99 | 
100 | hipError_t deviceMalloc(void** ptr, size_t size);
101 | hipError_t deviceRealloc(void* ptr, size_t size);
102 | hipError_t deviceDefrag(void** ptr, size_t size);
103 | hipError_t deviceFree(void* ptr);
104 | 
105 | #endif // MEMORY_HPP
106 | 


--------------------------------------------------------------------------------
/src/MixedBaseCounter.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <map>
 3 | 
 4 | #include "MixedBaseCounter.hpp"
 5 | 
 6 | MixedBaseCounter::MixedBaseCounter(int *counts, int length) {
 7 |   this->length = length;
 8 | 
 9 |   int i;
10 | 
11 |   for (i = 0; i < 32; ++i) {
12 |     this->max_counts[i] = counts[i];
13 |     this->cur_counts[i] = 0;
14 |   }
15 |   // terminate with 0's
16 |   this->max_counts[i]      = this->cur_counts[i]      = 0;
17 |   this->max_counts[length] = this->cur_counts[length] = 0;
18 | }
19 | 
20 | MixedBaseCounter::MixedBaseCounter(MixedBaseCounter & left, MixedBaseCounter & right) {
21 |   this->length = left.length;
22 |   for (int i = 0; i < left.length; ++i) {
23 |     this->max_counts[i] = left.max_counts[i] - right.cur_counts[i];
24 |     this->cur_counts[i] = 0;
25 |   }
26 | }
27 | 
28 | void
29 | MixedBaseCounter::next() {
30 |   for (int i = 0; i < this->length; ++i) {
31 |     this->cur_counts[i]++;
32 |     if (this->cur_counts[i] > this->max_counts[i]) {
33 |       this->cur_counts[i] = 0;
34 |       continue;
35 |     }
36 |     break;
37 |   }
38 | }
39 | 
40 | int
41 | MixedBaseCounter::is_zero() {
42 |   for (int i = 0; i < this->length; ++i)
43 |     if (this->cur_counts[i])
44 |       return 0;
45 |   return 1;
46 | }
47 | 
48 | int
49 | MixedBaseCounter::product(int * multipliers) {
50 |   int k=0, x=1;
51 | 
52 |   for (int i = 0; i < this->length; ++i)
53 |     for (int j = 0; j < this->cur_counts[i]; ++j) {
54 |       k = 1;
55 |       x *= multipliers[i];
56 |     }
57 | 
58 |   return x * k;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/MixedBaseCounter.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class MixedBaseCounter {
 4 |   private:
 5 |     int length; //!< number of prime factor counts (cannot exceed 32 for a 32-bit integer)
 6 |     int max_counts[32+1]; //!< maximum value for prime factor counts
 7 |     int cur_counts[32+1]; //!< current prime factor counts
 8 | 
 9 |   public:
10 |     MixedBaseCounter(int *counts, int length);
11 |     MixedBaseCounter(MixedBaseCounter & left, MixedBaseCounter & right);
12 |     void next();
13 |     int is_zero();
14 |     int product(int * multipliers);
15 | };
16 | 


--------------------------------------------------------------------------------
/src/MultiColoring.hpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | #ifndef MULTICOLORING_HPP
30 | #define MULTICOLORING_HPP
31 | 
32 | #include "SparseMatrix.hpp"
33 | 
34 | void JPLColoring(SparseMatrix& A);
35 | 
36 | #endif // MULTICOLORING_HPP
37 | 


--------------------------------------------------------------------------------
/src/OptimizeProblem.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef OPTIMIZEPROBLEM_HPP
16 | #define OPTIMIZEPROBLEM_HPP
17 | 
18 | #include "SparseMatrix.hpp"
19 | #include "Vector.hpp"
20 | #include "CGData.hpp"
21 | 
22 | int OptimizeProblem(SparseMatrix & A, CGData & data,  Vector & b, Vector & x, Vector & xexact);
23 | 
24 | // This helper function should be implemented in a non-trivial way if OptimizeProblem is non-trivial
25 | // It should return as type double, the total number of bytes allocated and retained after calling OptimizeProblem.
26 | // This value will be used to report Gbytes used in ReportResults (the value returned will be divided by 1000000000.0).
27 | 
28 | double OptimizeProblemMemoryUse(const SparseMatrix & A);
29 | 
30 | #endif  // OPTIMIZEPROBLEM_HPP
31 | 


--------------------------------------------------------------------------------
/src/OutputFile.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | 
 16 | #include <fstream>
 17 | #include <list>
 18 | #include <sstream>
 19 | #include <string>
 20 | 
 21 | #include "OutputFile.hpp"
 22 | 
 23 | using std::string;
 24 | using std::stringstream;
 25 | using std::list;
 26 | using std::ofstream;
 27 | 
 28 | OutputFile::OutputFile(const string & name_arg, const string & version_arg)
 29 |   : name(name_arg), version(version_arg), eol("\n"), keySeparator("::") {}
 30 | 
 31 | OutputFile::OutputFile(void) : eol("\n"), keySeparator("::") {}
 32 | 
 33 | OutputFile::~OutputFile() {
 34 |   for (list<OutputFile*>::iterator it = descendants.begin(); it != descendants.end(); ++it) {
 35 |     delete *it;
 36 |   }
 37 | }
 38 | 
 39 | void
 40 | OutputFile::add(const string & key_arg, const string & value_arg) {
 41 |   descendants.push_back(allocKeyVal(key_arg, value_arg));
 42 | }
 43 | 
 44 | void
 45 | OutputFile::add(const string & key_arg, double value_arg) {
 46 |   stringstream ss;
 47 |   ss << value_arg;
 48 |   descendants.push_back(allocKeyVal(key_arg, ss.str()));
 49 | }
 50 | 
 51 | void
 52 | OutputFile::add(const string & key_arg, int value_arg) {
 53 |   stringstream ss;
 54 |   ss << value_arg;
 55 |   descendants.push_back(allocKeyVal(key_arg, ss.str()));
 56 | }
 57 | 
 58 | #ifndef HPCG_NO_LONG_LONG
 59 | 
 60 | void
 61 | OutputFile::add(const string & key_arg, long long value_arg) {
 62 |   stringstream ss;
 63 |   ss << value_arg;
 64 |   descendants.push_back(allocKeyVal(key_arg, ss.str()));
 65 | }
 66 | 
 67 | #endif
 68 | 
 69 | void
 70 | OutputFile::add(const string & key_arg, size_t value_arg) {
 71 |   stringstream ss;
 72 |   ss << value_arg;
 73 |   descendants.push_back(allocKeyVal(key_arg, ss.str()));
 74 | }
 75 | 
 76 | void
 77 | OutputFile::setKeyValue(const string & key_arg, const string & value_arg) {
 78 |   key = key_arg;
 79 |   value = value_arg;
 80 | }
 81 | 
 82 | OutputFile *
 83 | OutputFile::get(const string & key_arg) {
 84 |   for (list<OutputFile*>::iterator it = descendants.begin(); it != descendants.end(); ++it) {
 85 |     if ((*it)->key == key_arg)
 86 |       return *it;
 87 |   }
 88 | 
 89 |   return 0;
 90 | }
 91 | 
 92 | string
 93 | OutputFile::generateRecursive(string prefix) {
 94 |   string result = "";
 95 | 
 96 |   result += prefix + key + "=" + value + eol;
 97 | 
 98 |   for (list<OutputFile*>::iterator it = descendants.begin(); it != descendants.end(); ++it) {
 99 |     result += (*it)->generateRecursive(prefix + key + keySeparator);
100 |   }
101 | 
102 |   return result;
103 | }
104 | 
105 | string
106 | OutputFile::generate(void) {
107 |   string result = name + "\nversion=" + version + eol;
108 | 
109 |   for (list<OutputFile*>::iterator it = descendants.begin(); it != descendants.end(); ++it) {
110 |     result += (*it)->generateRecursive("");
111 |   }
112 | 
113 |   time_t rawtime;
114 |   time(&rawtime);
115 |   tm * ptm = localtime(&rawtime);
116 |   char sdate[256];
117 |   //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12
118 |   sprintf (sdate,"%04d-%02d-%02d_%02d-%02d-%02d",ptm->tm_year + 1900, ptm->tm_mon+1,
119 |         ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec);
120 | 
121 |   string filename = name + "_" + version + "_";
122 |   filename += string(sdate) + ".txt";
123 | 
124 |   ofstream myfile(filename.c_str());
125 |   myfile << result;
126 |   myfile.close();
127 | 
128 |   return result;
129 | }
130 | 
131 | OutputFile * OutputFile::allocKeyVal(const std::string & key_arg, const std::string & value_arg) {
132 |   OutputFile * of = new OutputFile();
133 |   of->setKeyValue(key_arg, value_arg);
134 |   return of;
135 | }
136 | 


--------------------------------------------------------------------------------
/src/OutputFile.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /*!
 16 |  @file Output_File.hpp
 17 | 
 18 |  HPCG output file classes
 19 |  */
 20 | 
 21 | #ifndef OUTPUTFILE_HPP
 22 | #define OUTPUTFILE_HPP
 23 | 
 24 | #include <list>
 25 | #include <string>
 26 | 
 27 | //! The OutputFile class for the uniform collecting and reporting of performance data for HPCG
 28 | 
 29 | /*!
 30 | 
 31 |   The OutputFile class facilitates easy collecting and reporting of
 32 |   key-value-formatted data that can be then registered with the HPCG results
 33 |   collection website. The keys may have hierarchy key1::key2::key3=val with
 34 |   double colon :: as a separator. A sample output may look like this (note how
 35 |   "major" and "micro" keys repeat with different ancestor keys):
 36 | 
 37 | \code
 38 | 
 39 | version=3.2.1alpha
 40 | version::major=3
 41 | version::minor=2
 42 | version::micro=1
 43 | version::release=alpha
 44 | axis=xyz
 45 | axis::major=x
 46 | axis::minor=y
 47 | 
 48 | \endcode
 49 | 
 50 | */
 51 | class OutputFile {
 52 | protected:
 53 |   std::list<OutputFile *> descendants; //!< descendant elements
 54 |   std::string name; //!< name of the benchmark
 55 |   std::string version; //!< version of the benchmark
 56 |   std::string key; //!< the key under which the element is stored
 57 |   std::string value; //!< the value of the stored element
 58 |   std::string eol; //!< end-of-line character sequence in the output file
 59 |   std::string keySeparator; //!< character sequence to separate keys in the output file
 60 | 
 61 |   //! Recursively generate output string from descendant list, and their descendants and so on
 62 |   std::string generateRecursive(std::string prefix);
 63 | 
 64 | public:
 65 |   static OutputFile * allocKeyVal(const std::string & key, const std::string & value);
 66 | 
 67 |   //! Constructor: accepts name and version as strings that are used to create a file name for printing results.
 68 |   /*!
 69 |     This constructor accepts and name and version number for the benchmark that
 70 |     are used to form a file name information for results that are generated by
 71 |     the generate() method.
 72 |     \param name (in) string containing name of the benchmark
 73 |     \param version (in) string containing the version of the benchmark
 74 |   */
 75 |   OutputFile(const std::string & name, const std::string & version);
 76 | 
 77 |   //! Default constructor: no-arguments accepted, should be used for descendant nodes
 78 |   /*!
 79 |     This no-argument constructor can be used for descendant nodes to provide
 80 |     key1::key2::key3=val output. Unlike the root node, descendant nodes do not
 81 |     have name and version but only store key-value pairs.
 82 |   */
 83 |   OutputFile(void);
 84 | 
 85 |   ~OutputFile();
 86 | 
 87 |   //! Create and add a descendant element with value of type "string"
 88 |   /*!
 89 |   Create and add a descendant element identified by "key" and associated with
 90 |   "value".  The element is added at the end of a list of previously added
 91 |   elements.
 92 | 
 93 |   @param[in] key   The key that identifies the added element and under which the element is stored
 94 |   @param[in] value The value stored by the element
 95 |   */
 96 |   void add(const std::string & key, const std::string & value);
 97 | 
 98 |   //! Create and add a descendant element with value of type "double"
 99 |   /*!
100 |   Create and add a descendant element identified by "key" and associated with
101 |   "value".  The element is added at the end of a list of previously added
102 |   elements.
103 | 
104 |   @param[in] key   The key that identifies the added element and under which the element is stored
105 |   @param[in] value The value stored by the element
106 |   */
107 |    void add(const std::string & key, double value);
108 | 
109 |   //! Create and add a descendant element with value of type "int"
110 |   /*!
111 |   Create and add a descendant element identified by "key" and associated with
112 |   "value".  The element is added at the end of a list of previously added
113 |   elements.
114 | 
115 |   @param[in] key   The key that identifies the added element and under which the element is stored
116 |   @param[in] value The value stored by the element
117 |   */
118 |    void add(const std::string & key, int value);
119 | 
120 | #ifndef HPCG_NO_LONG_LONG
121 |   //! Create and add a descendant element with value of type "long long"
122 |   /*!
123 |   Create and add a descendant element identified by "key" and associated with
124 |   "value".  The element is added at the end of a list of previously added
125 |   elements.
126 | 
127 |   @param[in] key   The key that identifies the added element and under which the element is stored
128 |   @param[in] value The value stored by the element
129 |   */
130 |    void add(const std::string & key, long long value);
131 | #endif
132 | 
133 |   //! Create and add a descendant element with value of type "size_t"
134 |   /*!
135 |   Create and add a descendant element identified by "key" and associated with
136 |   "value".  The element is added at the end of a list of previously added
137 |   elements.
138 | 
139 |   @param[in] key   The key that identifies the added element and under which the element is stored
140 |   @param[in] value The value stored by the element
141 |   */
142 |    void add(const std::string & key, size_t value);
143 | 
144 |   //! Key-Value setter method
145 |   /*!
146 |   Set the key and the value of this element.
147 | 
148 |   @param[in] key   The key that identifies this element and under which the element is stored
149 |   @param[in] value The value stored by the element
150 |   */
151 |   void setKeyValue(const std::string & key, const std::string & value);
152 | 
153 |   //! Get the element in the list with the given key or return NULL if not found
154 |   OutputFile * get(const std::string & key);
155 | 
156 |   //! Generate output string with results based on the stored key-value hierarchy
157 |   std::string generate(void);
158 | };
159 | 
160 | #endif // OUTPUTFILE_HPP
161 | 


--------------------------------------------------------------------------------
/src/Permute.hpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | #ifndef PERMUTE_HPP
30 | #define PERMUTE_HPP
31 | 
32 | #include "SparseMatrix.hpp"
33 | 
34 | void PermuteColumns(SparseMatrix& A);
35 | void PermuteRows(SparseMatrix& A);
36 | void PermuteVector(local_int_t size, Vector& v, const local_int_t* perm);
37 | 
38 | #endif // PERMUTE_HPP
39 | 


--------------------------------------------------------------------------------
/src/ReadHpcgDat.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #include <cstdio>
16 | 
17 | #include "ReadHpcgDat.hpp"
18 | 
19 | static int
20 | SkipUntilEol(FILE *stream) {
21 |   int chOrEof;
22 |   bool finished;
23 | 
24 |   do {
25 |     chOrEof = fgetc( stream );
26 |     finished = (chOrEof == EOF) || (chOrEof == '\n') || (chOrEof == '\r');
27 |   } while (! finished);
28 | 
29 |   if ('\r' == chOrEof) { // on Windows, \r might be followed by \n
30 |     int chOrEofExtra = fgetc( stream );
31 | 
32 |     if ('\n' == chOrEofExtra || EOF == chOrEofExtra)
33 |       chOrEof = chOrEofExtra;
34 |     else
35 |       ungetc(chOrEofExtra, stream);
36 |   }
37 | 
38 |   return chOrEof;
39 | }
40 | 
41 | int
42 | ReadHpcgDat(int *localDimensions, int *secondsPerRun, int *localProcDimensions) {
43 |   FILE * hpcgStream = fopen("hpcg.dat", "r");
44 | 
45 |   if (! hpcgStream)
46 |     return -1;
47 | 
48 |   SkipUntilEol(hpcgStream); // skip the first line
49 | 
50 |   SkipUntilEol(hpcgStream); // skip the second line
51 | 
52 |   for (int i = 0; i < 3; ++i)
53 |     if (fscanf(hpcgStream, "%d", localDimensions+i) != 1 || localDimensions[i] < 16)
54 |       localDimensions[i] = 16;
55 | 
56 |   SkipUntilEol( hpcgStream ); // skip the rest of the second line
57 | 
58 |   if (secondsPerRun!=0) { // Only read number of seconds if the pointer is non-zero
59 |     if (fscanf(hpcgStream, "%d", secondsPerRun) != 1 || secondsPerRun[0] < 0)
60 |       secondsPerRun[0] = 30 * 60; // 30 minutes
61 |   }
62 | 
63 |   SkipUntilEol( hpcgStream ); // skip the rest of the third line
64 | 
65 |   for (int i = 0; i < 3; ++i)
66 |     // the user didn't specify (or values are invalid) process dimensions
67 |     if (fscanf(hpcgStream, "%d", localProcDimensions+i) != 1 || localProcDimensions[i] < 1)
68 |       localProcDimensions[i] = 0; // value 0 means: "not specified" and it will be fixed later
69 | 
70 |   fclose(hpcgStream);
71 | 
72 |   return 0;
73 | }
74 | 


--------------------------------------------------------------------------------
/src/ReadHpcgDat.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef READHPCGDAT_HPP
16 | #define READHPCGDAT_HPP
17 | 
18 | int ReadHpcgDat(int *localDimensions, int *secondsPerRun, int *localProcDimensions);
19 | 
20 | #endif // READHPCGDAT_HPP
21 | 


--------------------------------------------------------------------------------
/src/ReportResults.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef REPORTRESULTS_HPP
44 | #define REPORTRESULTS_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | #include "TestCG.hpp"
48 | #include "TestSymmetry.hpp"
49 | #include "TestNorms.hpp"
50 | 
51 | double ComputeTotalGFlops(const SparseMatrix& A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters, int optMaxIters, double times[]);
52 | void ReportResults(const SparseMatrix & A, int numberOfMgLevels, int numberOfCgSets, int refMaxIters, int optMaxIters, double times[],
53 |     const TestCGData & testcg_data, const TestSymmetryData & testsymmetry_data, const TestNormsData & testnorms_data, int global_failure, bool quickPath);
54 | 
55 | #endif // REPORTRESULTS_HPP
56 | 


--------------------------------------------------------------------------------
/src/SetupHalo.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #ifndef SETUPHALO_HPP
44 | #define SETUPHALO_HPP
45 | 
46 | #include "SparseMatrix.hpp"
47 | 
48 | void SetupHalo(SparseMatrix& A);
49 | void CopyHaloToHost(SparseMatrix& A);
50 | 
51 | #endif // SETUPHALO_HPP
52 | 


--------------------------------------------------------------------------------
/src/SetupHalo_ref.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef SETUPHALO_REF_HPP
16 | #define SETUPHALO_REF_HPP
17 | #include "SparseMatrix.hpp"
18 | 
19 | void SetupHalo_ref(SparseMatrix & A);
20 | 
21 | #endif // SETUPHALO_REF_HPP
22 | 


--------------------------------------------------------------------------------
/src/TestCG.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file TestCG.hpp
17 | 
18 |  HPCG data structure
19 |  */
20 | 
21 | #ifndef TESTCG_HPP
22 | #define TESTCG_HPP
23 | 
24 | #include "hpcg.hpp"
25 | #include "SparseMatrix.hpp"
26 | #include "Vector.hpp"
27 | #include "CGData.hpp"
28 | 
29 | 
30 | struct TestCGData_STRUCT {
31 |   int count_pass; //!< number of succesful tests
32 |   int count_fail;  //!< number of succesful tests
33 |   int expected_niters_no_prec; //!< expected number of test CG iterations without preconditioning with diagonally dominant matrix (~12)
34 |   int expected_niters_prec; //!< expected number of test CG iterations with preconditioning and with diagonally dominant matrix (~1-2)
35 |   int niters_max_no_prec; //!< maximum number of test CG iterations without predictitioner
36 |   int niters_max_prec; //!< maximum number of test CG iterations without predictitioner
37 |   double normr; //!< residual norm achieved during test CG iterations
38 | };
39 | typedef struct TestCGData_STRUCT TestCGData;
40 | 
41 | extern int TestCG(SparseMatrix & A, CGData & data, Vector & b, Vector & x, TestCGData & testcg_data);
42 | 
43 | #endif  // TESTCG_HPP
44 | 
45 | 


--------------------------------------------------------------------------------
/src/TestNorms.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file TestNorms.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #include <cmath>
22 | #include "TestNorms.hpp"
23 | 
24 | /*!
25 |   Computes the mean and standard deviation of the array of norm results.
26 | 
27 |   @param[in] testnorms_data data structure with the results of norm test
28 | 
29 |   @return Returns 0 upon success or non-zero otherwise
30 | */
31 | int TestNorms(TestNormsData & testnorms_data) {
32 |  double mean_delta = 0.0;
33 |  for (int i= 0; i<testnorms_data.samples; ++i) mean_delta += (testnorms_data.values[i] - testnorms_data.values[0]);
34 |  double mean = testnorms_data.values[0] + mean_delta/(double)testnorms_data.samples;
35 |  testnorms_data.mean = mean;
36 | 
37 |  // Compute variance
38 |  double sumdiff = 0.0;
39 |  for (int i= 0; i<testnorms_data.samples; ++i) sumdiff += (testnorms_data.values[i] - mean) * (testnorms_data.values[i] - mean);
40 |  testnorms_data.variance = sumdiff/(double)testnorms_data.samples;
41 | 
42 |  // Determine if variation is sufficiently small to declare success
43 |  testnorms_data.pass = (testnorms_data.variance<1.0e-6);
44 | 
45 |  return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/TestNorms.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file TestNorms.hpp
17 | 
18 |  HPCG data structure
19 |  */
20 | 
21 | #ifndef TESTNORMS_HPP
22 | #define TESTNORMS_HPP
23 | 
24 | 
25 | struct TestNormsData_STRUCT {
26 |   double * values; //!< sample values
27 |   double   mean;   //!< mean of all sampes
28 |   double variance; //!< variance of mean
29 |   int    samples;  //!< number of samples
30 |   bool   pass;     //!< pass/fail indicator
31 | };
32 | typedef struct TestNormsData_STRUCT TestNormsData;
33 | 
34 | extern int TestNorms(TestNormsData & testnorms_data);
35 | 
36 | #endif  // TESTNORMS_HPP
37 | 


--------------------------------------------------------------------------------
/src/TestSymmetry.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file TestSymmetry.hpp
17 | 
18 |  HPCG data structures for symmetry testing
19 |  */
20 | 
21 | #ifndef TESTSYMMETRY_HPP
22 | #define TESTSYMMETRY_HPP
23 | 
24 | #include "hpcg.hpp"
25 | #include "SparseMatrix.hpp"
26 | #include "CGData.hpp"
27 | 
28 | struct TestSymmetryData_STRUCT {
29 |   double depsym_spmv;  //!< departure from symmetry for the SPMV kernel
30 |   double depsym_mg; //!< departure from symmetry for the MG kernel
31 |   int    count_fail;   //!< number of failures in the symmetry tests
32 | };
33 | typedef struct TestSymmetryData_STRUCT TestSymmetryData;
34 | 
35 | extern int TestSymmetry(SparseMatrix & A, Vector & b, Vector & xexact, TestSymmetryData & testsymmetry_data);
36 | 
37 | #endif  // TESTSYMMETRY_HPP
38 | 


--------------------------------------------------------------------------------
/src/Vector.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /* ************************************************************************
 16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
 17 |  *
 18 |  * Redistribution and use in source and binary forms, with or without modification,
 19 |  * are permitted provided that the following conditions are met:
 20 |  *
 21 |  * 1. Redistributions of source code must retain the above copyright notice, this
 22 |  *    list of conditions and the following disclaimer.
 23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 24 |  *    this list of conditions and the following disclaimer in the documentation
 25 |  *    and/or other materials provided with the distribution.
 26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 27 |  *    may be used to endorse or promote products derived from this software without
 28 |  *    specific prior written permission.
 29 |  *
 30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 39 |  * POSSIBILITY OF SUCH DAMAGE.
 40 |  *
 41 |  * ************************************************************************ */
 42 | 
 43 | /*!
 44 |  @file Vector.hpp
 45 | 
 46 |  HPCG data structures for dense vectors
 47 |  */
 48 | 
 49 | #ifndef VECTOR_HPP
 50 | #define VECTOR_HPP
 51 | #include <cassert>
 52 | #include <cstdlib>
 53 | #include <vector>
 54 | #include <hip/hip_runtime_api.h>
 55 | 
 56 | #include "utils.hpp"
 57 | #include "Geometry.hpp"
 58 | 
 59 | struct Vector_STRUCT {
 60 |   local_int_t localLength;  //!< length of local portion of the vector
 61 |   double * values = nullptr;          //!< array of values
 62 |   /*!
 63 |    This is for storing optimized data structures created in OptimizeProblem and
 64 |    used inside optimized ComputeSPMV().
 65 |    */
 66 |   void * optimizationData = nullptr;
 67 | 
 68 |   double* d_values = nullptr;
 69 | };
 70 | typedef struct Vector_STRUCT Vector;
 71 | 
 72 | /*!
 73 |   Initializes input vector.
 74 | 
 75 |   @param[in] v
 76 |   @param[in] localLength Length of local portion of input vector
 77 |  */
 78 | inline void InitializeVector(Vector & v, local_int_t localLength) {
 79 |   v.localLength = localLength;
 80 |   v.values = new double[localLength];
 81 |   v.optimizationData = 0;
 82 |   return;
 83 | }
 84 | 
 85 | inline void HIPInitializeVector(Vector& v, local_int_t localLength)
 86 | {
 87 |     v.localLength = localLength;
 88 |     v.optimizationData = 0;
 89 |     HIP_CHECK(deviceMalloc((void**)&v.d_values, sizeof(double) * localLength));
 90 | }
 91 | 
 92 | /*!
 93 |   Fill the input vector with zero values.
 94 | 
 95 |   @param[inout] v - On entrance v is initialized, on exit all its values are zero.
 96 |  */
 97 | inline void ZeroVector(Vector & v) {
 98 |   local_int_t localLength = v.localLength;
 99 |   double * vv = v.values;
100 |   for (int i=0; i<localLength; ++i) vv[i] = 0.0;
101 |   return;
102 | }
103 | 
104 | inline void HIPZeroVector(Vector& v)
105 | {
106 |     HIP_CHECK(hipMemsetAsync(v.d_values, 0, sizeof(double) * v.localLength, stream_interior));
107 | }
108 | 
109 | /*!
110 |   Multiply (scale) a specific vector entry by a given value.
111 | 
112 |   @param[inout] v Vector to be modified
113 |   @param[in] index Local index of entry to scale
114 |   @param[in] value Value to scale by
115 |  */
116 | inline void ScaleVectorValue(Vector & v, local_int_t index, double value) {
117 |   assert(index>=0 && index < v.localLength);
118 |   double * vv = v.values;
119 |   vv[index] *= value;
120 |   return;
121 | }
122 | /*!
123 |   Fill the input vector with pseudo-random values.
124 | 
125 |   @param[in] v
126 |  */
127 | inline void FillRandomVector(Vector & v) {
128 |   local_int_t localLength = v.localLength;
129 |   double * vv = v.values;
130 |   for (int i=0; i<localLength; ++i) vv[i] = rand() / (double)(RAND_MAX) + 1.0;
131 |   return;
132 | }
133 | 
134 | inline void HIPFillRandomVector(Vector& v)
135 | {
136 |   std::vector<double> rng(v.localLength);
137 |   for(int i = 0; i < v.localLength; ++i)
138 |   {
139 |     rng[i] = rand() / (double)(RAND_MAX) + 1.0;
140 |   }
141 | 
142 |   HIP_CHECK(hipMemcpy(v.d_values,
143 |                       rng.data(),
144 |                       sizeof(double) * v.localLength,
145 |                       hipMemcpyHostToDevice));
146 | }
147 | 
148 | /*!
149 |   Copy input vector to output vector.
150 | 
151 |   @param[in] v Input vector
152 |   @param[in] w Output vector
153 |  */
154 | inline void CopyVector(const Vector & v, Vector & w) {
155 |   local_int_t localLength = v.localLength;
156 |   assert(w.localLength >= localLength);
157 |   double * vv = v.values;
158 |   double * wv = w.values;
159 |   for (int i=0; i<localLength; ++i) wv[i] = vv[i];
160 |   return;
161 | }
162 | 
163 | inline void HIPCopyVector(const Vector& v, Vector& w)
164 | {
165 |     HIP_CHECK(hipMemcpyAsync(w.d_values,
166 |                              v.d_values,
167 |                              sizeof(double) * v.localLength,
168 |                              hipMemcpyDeviceToDevice,
169 |                              stream_interior));
170 | }
171 | 
172 | /*!
173 |   Deallocates the members of the data structure of the known system matrix provided they are not 0.
174 | 
175 |   @param[in] A the known system matrix
176 |  */
177 | inline void DeleteVector(Vector & v) {
178 | 
179 |   if (v.values) delete [] v.values;
180 |   v.localLength = 0;
181 |   return;
182 | }
183 | 
184 | inline void HIPDeleteVector(Vector& v)
185 | {
186 |     HIP_CHECK(deviceFree(v.d_values));
187 |     v.localLength = 0;
188 | }
189 | 
190 | #endif // VECTOR_HPP
191 | 


--------------------------------------------------------------------------------
/src/Version.hpp.in:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | #ifndef VERSION_HPP
30 | #define VERSION_HPP
31 | 
32 | // clang-format off
33 | #define __ROCHPCG_VER_MAJOR     @rochpcg_VERSION_MAJOR@
34 | #define __ROCHPCG_VER_MINOR     @rochpcg_VERSION_MINOR@
35 | #define __ROCHPCG_VER_PATCH     @rochpcg_VERSION_PATCH@
36 | #define __ROCHPCG_VER_TWEAK     @rochpcg_VERSION_TWEAK@
37 | // clang-format on
38 | 
39 | #define TO_STR2(x) #x
40 | #define TO_STR(x) TO_STR2(x)
41 | 
42 | #define __ROCHPCG_VER \
43 |     10000 * __ROCHPCG_VER_MAJOR + 100 * __ROCHPCG_VER_MINOR + __ROCHPCG_VER_PATCH
44 | 
45 | #endif // VERSION_HPP
46 | 


--------------------------------------------------------------------------------
/src/WriteProblem.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file WriteProblem.cpp
17 | 
18 |  HPCG routine
19 |  */
20 | 
21 | #include <cstdio>
22 | #include "WriteProblem.hpp"
23 | 
24 | 
25 | /*!
26 |   Routine to dump:
27 |    - matrix in row, col, val format for analysis with MATLAB
28 |    - x, xexact, b as simple arrays of numbers.
29 | 
30 |    Writes to A.dat, x.dat, xexact.dat and b.dat, respectivly.
31 | 
32 |    NOTE:  THIS CODE ONLY WORKS ON SINGLE PROCESSOR RUNS
33 | 
34 |    Read into MATLAB using:
35 | 
36 |        load A.dat
37 |        A=spconvert(A);
38 |        load x.dat
39 |        load xexact.dat
40 |        load b.dat
41 | 
42 |   @param[in] geom   The description of the problem's geometry.
43 |   @param[in] A      The known system matrix
44 |   @param[in] b      The known right hand side vector
45 |   @param[in] x      The solution vector computed by CG iteration
46 |   @param[in] xexact Generated exact solution
47 | 
48 |   @return Returns with -1 if used with more than one MPI process. Returns with 0 otherwise.
49 | 
50 |   @see GenerateProblem
51 | */
52 | int WriteProblem( const Geometry & geom, const SparseMatrix & A,
53 |     const Vector b, const Vector x, const Vector xexact) {
54 | 
55 |   if (geom.size!=1) return -1; //TODO Only works on one processor.  Need better error handler
56 |   const global_int_t nrow = A.totalNumberOfRows;
57 | 
58 |   FILE * fA = 0, * fx = 0, * fxexact = 0, * fb = 0;
59 |   fA = fopen("A.dat", "w");
60 |   fx = fopen("x.dat", "w");
61 |   fxexact = fopen("xexact.dat", "w");
62 |   fb = fopen("b.dat", "w");
63 | 
64 |   if (! fA || ! fx || ! fxexact || ! fb) {
65 |     if (fb) fclose(fb);
66 |     if (fxexact) fclose(fxexact);
67 |     if (fx) fclose(fx);
68 |     if (fA) fclose(fA);
69 |     return -1;
70 |   }
71 | 
72 |   for (global_int_t i=0; i< nrow; i++) {
73 |     const double * const currentRowValues = A.matrixValues[i];
74 |     const global_int_t * const currentRowIndices = A.mtxIndG[i];
75 |     const int currentNumberOfNonzeros = A.nonzerosInRow[i];
76 |     for (int j=0; j< currentNumberOfNonzeros; j++)
77 | #ifdef HPCG_NO_LONG_LONG
78 |       fprintf(fA, " %d %d %22.16e\n",i+1,(global_int_t)(currentRowIndices[j]+1),currentRowValues[j]);
79 | #else
80 |       fprintf(fA, " %lld %lld %22.16e\n",i+1,(global_int_t)(currentRowIndices[j]+1),currentRowValues[j]);
81 | #endif
82 |     fprintf(fx, "%22.16e\n",x.values[i]);
83 |     fprintf(fxexact, "%22.16e\n",xexact.values[i]);
84 |     fprintf(fb, "%22.16e\n",b.values[i]);
85 |   }
86 | 
87 |   fclose(fA);
88 |   fclose(fx);
89 |   fclose(fxexact);
90 |   fclose(fb);
91 |   return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/WriteProblem.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef WRITEPROBLEM_HPP
16 | #define WRITEPROBLEM_HPP
17 | #include "Geometry.hpp"
18 | #include "SparseMatrix.hpp"
19 | 
20 | int WriteProblem( const Geometry & geom, const SparseMatrix & A, const Vector b, const Vector x, const Vector xexact);
21 | #endif // WRITEPROBLEM_HPP
22 | 


--------------------------------------------------------------------------------
/src/YAML_Doc.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #include <cstdlib>
16 | #include <ctime>
17 | #include <iostream>
18 | #include <fstream>
19 | #include <sstream>
20 | #include "YAML_Doc.hpp"
21 | using namespace std;
22 | 
23 | /*!
24 |   Sets the application name and version which will become part of the YAML doc.
25 | 
26 |   @param[in] miniApp_Name application name
27 |   @param[in] miniApp_Version application name
28 |   @param[in] destination_Directory destination directory for the YAML document
29 |   @param[in] destination_FileName file name for the YAML document
30 | */
31 | YAML_Doc::YAML_Doc(const std::string & miniApp_Name, const std::string & miniApp_Version, const std::string & destination_Directory, const std::string & destination_FileName) {
32 |   miniAppName = miniApp_Name;
33 |   miniAppVersion = miniApp_Version;
34 |   destinationDirectory = destination_Directory;
35 |   destinationFileName = destination_FileName;
36 | }
37 | 
38 | //inherits the destructor from YAML_Element
39 | YAML_Doc::~YAML_Doc(void) {
40 | }
41 | 
42 | /*!
43 |   Generates YAML from the elements of the document and saves it to a file.
44 | 
45 |   @return returns the complete YAML document as a string
46 | */
47 | string YAML_Doc::generateYAML() {
48 |   string yaml;
49 | 
50 |   yaml =  yaml + miniAppName + " version: " + miniAppVersion + "\n";
51 | 
52 |   for (size_t i=0; i<children.size(); i++) {
53 |     yaml = yaml + children[i]->printYAML("");
54 |   }
55 | 
56 |   time_t rawtime;
57 |   tm * ptm;
58 |   time ( &rawtime );
59 |   ptm = localtime(&rawtime);
60 |   char sdate[256];
61 |   //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12
62 |   sprintf (sdate,"%04d.%02d.%02d.%02d.%02d.%02d",ptm->tm_year + 1900, ptm->tm_mon+1,
63 |       ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec);
64 | 
65 |   string filename;
66 |   if (destinationFileName=="")
67 |     filename = miniAppName + "-" + miniAppVersion + "_";
68 |   else
69 |     filename = destinationFileName;
70 |   filename = filename + string(sdate) + ".yaml";
71 |   if (destinationDirectory!="" && destinationDirectory!=".") {
72 |     string mkdir_cmd = "mkdir " + destinationDirectory;
73 |     int err = system(mkdir_cmd.c_str());
74 |     filename = destinationDirectory + "/" + destinationFileName;
75 |   } else
76 |     filename = "./" + filename;
77 | 
78 |   ofstream myfile;
79 |   myfile.open(filename.c_str());
80 |   myfile << yaml;
81 |   myfile.close();
82 |   return yaml;
83 | }
84 | 


--------------------------------------------------------------------------------
/src/YAML_Doc.hpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /*!
 16 |  @file YAML_Doc.hpp
 17 | 
 18 |  HPCG YAML classes
 19 |  */
 20 | 
 21 | // Changelog
 22 | //
 23 | // Version 0.1
 24 | // - Initial version.
 25 | //
 26 | /////////////////////////////////////////////////////////////////////////
 27 | 
 28 | #ifndef YAML_DOC_HPP
 29 | #define YAML_DOC_HPP
 30 | #include <string>
 31 | #include "YAML_Element.hpp"
 32 | 
 33 | //! The YAML_Doc class for the uniform collecting and reporting of performance data for HPCG
 34 | 
 35 | /*!
 36 | 
 37 | The YAML_Doc class works in conjunction with the YAML_Element class to facilitate easy collecting and reporting of YAML-formatted
 38 | data that can be then registered with the HPCG results collection website.
 39 | 
 40 | \code
 41 | 
 42 | //EXAMPLE CODE FOR GENERATING YAML
 43 | 
 44 |   YAML_Doc doc("hpcg","0.1");
 45 |   doc.add("final_residual",1.4523e-13);
 46 |   doc.add("time","4.893");
 47 | 
 48 | //note: the following line will remove the data (4.890) associated with "time"
 49 |   doc.get("time")->add("total",4.243);
 50 | 
 51 | //note:  the following line will likewise remove the data (1.243) associated with "time"
 52 |   doc.get("time")->get("total")->add("time",2.457);
 53 |   doc.get("time")->get("total")->add("flops",4.88e5);
 54 |   doc.get("time")->add("ddot",1.243);
 55 |   doc.get("time")->add("sparsemv","");
 56 |   doc.get("time")->get("sparsemv")->add("time",0.3445);
 57 |   doc.get("time")->get("sparsemv")->add("overhead","");
 58 |   doc.get("time")->get("sparsemv")->get("overhead")->add("time",0.0123);
 59 |   doc.get("time")->get("sparsemv")->get("overhead")->add("percentage",0.034);
 60 |   cout << doc.generateYAML() << endl;
 61 |   return 0;
 62 | 
 63 | \endcode
 64 | 
 65 | Below is the output generated by the above code:
 66 | 
 67 | \verbatim
 68 | 
 69 | final_residual: 1.4523e-13
 70 | time:
 71 |   total:
 72 |     time: 2.457
 73 |     flops: 4.88e5
 74 |   ddot: 1.243
 75 |   sparsemv:
 76 |     time: 0.3445
 77 |     overhead:
 78 |       time: 0.0123
 79 |       percentage: 0.034
 80 | 
 81 | \endverbatim
 82 | 
 83 | \note {No value is allowed to be attached to a key that has children.  If children are added to a key, the value is simply set to "".}
 84 | 
 85 | */
 86 | class YAML_Doc: public YAML_Element {
 87 | public:
 88 |   //! Constructor: accepts mini-application name and version as strings, optionally accepts directory and file name for printing results.
 89 |   /*!
 90 |     The sole constructor for this class accepts and name and version number for the mini-application as well as optional directory
 91 |     and file name information for results that are generated by the generateYAML() method.
 92 |     \param miniApp_Name (in) string containing name of the mini-application
 93 |     \param miniApp_Version (in) string containing the version of the mini-application
 94 |     \param destination_Directory (in, optional) path of directory where results file will be stored, relative to current working directory.
 95 |            If this value is not supplied, the results file will be stored in the current working directory.  If the directory does not exist
 96 |      it will be created.
 97 |     \param destination_FileName (in, optional) root name of the results file.  A suffix of ".yaml" will be automatically appended.  If no
 98 |            file name is specified the filename will be constructed by concatenating the miniAppName + miniAppVersion + ".yaml" strings.
 99 |   */
100 |   YAML_Doc(const std::string & miniApp_Name, const std::string & miniApp_Version, const std::string & destination_Directory = "", const std::string & destination_FileName = "");
101 |   //! Destructor
102 |   ~YAML_Doc();
103 |   //! Generate YAML results to standard out and to a file using specified directory and filename, using current directory and miniAppName + miniAppVersion + ".yaml" by default
104 |   std::string generateYAML();
105 | 
106 | protected:
107 |   std::string miniAppName; //!< the name of the application that generated the YAML output
108 |   std::string miniAppVersion; //!< the version of the application that generated the YAML output
109 |   std::string destinationDirectory; //!< the destination directory for the generated the YAML output
110 |   std::string destinationFileName; //!< the filename for the generated the YAML output
111 | };
112 | #endif // YAML_DOC_HPP
113 | 


--------------------------------------------------------------------------------
/src/YAML_Element.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | //@HEADER
  3 | // ***************************************************
  4 | //
  5 | // HPCG: High Performance Conjugate Gradient Benchmark
  6 | //
  7 | // Contact:
  8 | // Michael A. Heroux ( maherou@sandia.gov)
  9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
 10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
 11 | //
 12 | // ***************************************************
 13 | //@HEADER
 14 | 
 15 | /*!
 16 |  @file YAML_Element.cpp
 17 | 
 18 |  HPCG routine
 19 |  */
 20 | 
 21 | #include <iostream>
 22 | #include <fstream>
 23 | #include <sstream>
 24 | #include "YAML_Element.hpp"
 25 | using namespace std;
 26 | YAML_Element::YAML_Element(const std::string & key_arg, const std::string & value_arg) {
 27 |   key = key_arg;
 28 |   value = value_arg;
 29 | }
 30 | 
 31 | YAML_Element::~YAML_Element() {
 32 |   for (size_t i=0; i<children.size(); i++) {
 33 |     delete children[i];
 34 |   }
 35 |   children.clear();
 36 | }
 37 | 
 38 | /*!
 39 |   Add an element to the vector
 40 |   QUESTION: if an element is not added because the key already exists,
 41 |   will this lead to memory leakage?
 42 | 
 43 |   @param[in] key_arg   The key under which the element is stored
 44 |   @param[in] value_arg The value of the element
 45 | 
 46 |   @return Returns the added element
 47 | */
 48 | YAML_Element * YAML_Element::add(const std::string & key_arg, double value_arg) {
 49 |   this->value = "";
 50 |   string converted_value = convert_double_to_string(value_arg);
 51 |   YAML_Element * element = new YAML_Element(key_arg,converted_value);
 52 |   children.push_back(element);
 53 |   return element;
 54 | }
 55 | 
 56 | /*!
 57 |   Add an element to the vector
 58 | 
 59 |   @param[in] key_arg   The key under which the element is stored
 60 |   @param[in] value_arg The value of the element
 61 | 
 62 |   @return Returns the added element
 63 | */
 64 | YAML_Element * YAML_Element::add(const std::string & key_arg, int value_arg) {
 65 |   this->value = "";
 66 |   string converted_value = convert_int_to_string(value_arg);
 67 |   YAML_Element * element = new YAML_Element(key_arg,converted_value);
 68 |   children.push_back(element);
 69 |   return element;
 70 | }
 71 | 
 72 | #ifndef HPCG_NO_LONG_LONG
 73 | 
 74 | /*!
 75 |   Add an element to the vector
 76 | 
 77 |   @param[in] key_arg   The key under which the element is stored
 78 |   @param[in] value_arg The value of the element
 79 | 
 80 |   @return Returns the added element
 81 | */
 82 | YAML_Element * YAML_Element::add(const std::string & key_arg, long long value_arg) {
 83 |   this->value = "";
 84 |   string converted_value = convert_long_long_to_string(value_arg);
 85 |   YAML_Element * element = new YAML_Element(key_arg,converted_value);
 86 |   children.push_back(element);
 87 |   return element;
 88 | }
 89 | 
 90 | #endif
 91 | 
 92 | /*!
 93 |   Add an element to the vector
 94 | 
 95 |   @param[in] key_arg   The key under which the element is stored
 96 |   @param[in] value_arg The value of the element
 97 | 
 98 |   @return Returns the added element
 99 | */
100 | YAML_Element * YAML_Element::add(const std::string & key_arg, size_t value_arg) {
101 |   this->value = "";
102 |   string converted_value = convert_size_t_to_string(value_arg);
103 |   YAML_Element * element = new YAML_Element(key_arg,converted_value);
104 |   children.push_back(element);
105 |   return element;
106 | }
107 | 
108 | /*!
109 |   Add an element to the vector
110 | 
111 |   @param[in] key_arg   The key under which the element is stored
112 |   @param[in] value_arg The value of the element
113 | 
114 |   @return Returns the added element
115 | */
116 | YAML_Element * YAML_Element::add(const std::string & key_arg, const std::string & value_arg) {
117 |   this->value = "";
118 |   YAML_Element * element = new YAML_Element(key_arg, value_arg);
119 |   children.push_back(element);
120 |   return element;
121 | }
122 | 
123 | /*!
124 |   Returns the pointer to the YAML_Element for the given key.
125 |   @param[in] key_arg   The key under which the element was stored
126 | 
127 |   @return If found, returns the element, otherwise returns NULL
128 | */
129 | YAML_Element * YAML_Element::get(const std::string & key_arg) {
130 |   for (size_t i=0; i<children.size(); i++) {
131 |     if (children[i]->getKey() == key_arg) {
132 |       return children[i];
133 |     }
134 |   }
135 |   return 0;
136 | }
137 | 
138 | /*!
139 |   Prints a line of a YAML document.  Correct YAML depends on
140 |   correct spacing; the parameter space should be the proper
141 |   amount of space for the parent element
142 | 
143 |   @param[in] space spacing inserted at the beginning of the line
144 | 
145 |   @return Returns a single line of the YAML document without the leading white space
146 | */
147 | string YAML_Element::printYAML(std::string space) {
148 |   string yaml_line = space + key + ": " + value + "\n";
149 |   for (int i=0; i<2; i++) space = space + " ";
150 |   for (size_t i=0; i<children.size(); i++) {
151 |     yaml_line = yaml_line + children[i]->printYAML(space);
152 |   }
153 |   return yaml_line;
154 | }
155 | 
156 | /*!
157 |   Converts a double precision value to a string.
158 | 
159 |   @param[in] value_arg The value to be converted.
160 | */
161 | string YAML_Element::convert_double_to_string(double value_arg) {
162 |   stringstream strm;
163 |   strm << value_arg;
164 |   return strm.str();
165 | }
166 | 
167 | /*!
168 |   Converts a integer value to a string.
169 | 
170 |   @param[in] value_arg The value to be converted.
171 | */
172 | string YAML_Element::convert_int_to_string(int value_arg) {
173 |   stringstream strm;
174 |   strm << value_arg;
175 |   return strm.str();
176 | }
177 | 
178 | #ifndef HPCG_NO_LONG_LONG
179 | 
180 | /*!
181 |   Converts a "long long" integer value to a string.
182 | 
183 |   @param[in] value_arg The value to be converted.
184 | */
185 | string YAML_Element::convert_long_long_to_string(long long value_arg) {
186 |   stringstream strm;
187 |   strm << value_arg;
188 |   return strm.str();
189 | }
190 | 
191 | #endif
192 | 
193 | /*!
194 |   Converts a "size_t" integer value to a string.
195 | 
196 |   @param[in] value_arg The value to be converted.
197 | */
198 | string YAML_Element::convert_size_t_to_string(size_t value_arg) {
199 |   stringstream strm;
200 |   strm << value_arg;
201 |   return strm.str();
202 | }
203 | 


--------------------------------------------------------------------------------
/src/YAML_Element.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /*!
16 |  @file YAML_Element.hpp
17 | 
18 |  HPCG data structures for YAML output
19 |  */
20 | 
21 | // Changelog
22 | //
23 | // Version 0.1
24 | // - Initial version.
25 | //
26 | /////////////////////////////////////////////////////////////////////////
27 | 
28 | #ifndef YAML_ELEMENT_HPP
29 | #define YAML_ELEMENT_HPP
30 | #include <string>
31 | #include <vector>
32 | #include "Geometry.hpp"
33 | //! HPCG YAML_Element class, from the HPCG YAML_Element class for registering key-value pairs of performance data
34 | 
35 | /*!
36 |   HPCG generates a collection of performance data for each run of the executable.  YAML_Element, and
37 |   the related YAML_Doc class, provide a uniform facility for gathering and reporting this data using the YAML text format.
38 | */
39 | class YAML_Element {
40 | public:
41 | 
42 |   //! Default constructor.
43 |   YAML_Element () {key=""; value="";}
44 |   //! Construct with known key-value pair
45 |   YAML_Element (const std::string & key_arg, const std::string & value_arg);
46 |   //! Destructor
47 |   ~YAML_Element ();
48 |   //! Key accessor method
49 |   std::string getKey() {return key;}
50 |   //! Add a child element to an element list associated with this element, value of type double
51 |   YAML_Element * add(const std::string & key_arg, double value_arg);
52 |   //! Add a child element to an element list associated with this element, value of type int
53 |   YAML_Element * add(const std::string & key_arg, int value_arg);
54 | #ifndef HPCG_NO_LONG_LONG
55 |   //! Add a child element to an element list associated with this element, value of type long long
56 |   YAML_Element * add(const std::string & key_arg, long long value_arg);
57 | #endif
58 |   //! Add a child element to an element list associated with this element, value of type size_t
59 |   YAML_Element * add(const std::string & key_arg, size_t value_arg);
60 |   //! Add a child element to an element list associated with this element, value of type string
61 |   YAML_Element * add(const std::string & key_arg, const std::string & value_arg);
62 |   //! get the element in the list with the given key
63 |   YAML_Element * get(const std::string & key_arg);
64 |   std::string printYAML(std::string space);
65 | 
66 | protected:
67 |   std::string key; //!< the key under which the element is stored
68 |   std::string value; //!< the value of the stored element
69 |   std::vector<YAML_Element *> children; //!< children elements of this element
70 | 
71 | private:
72 |   std::string convert_double_to_string(double value_arg);
73 |   std::string convert_int_to_string(int value_arg);
74 | #ifndef HPCG_NO_LONG_LONG
75 |   std::string convert_long_long_to_string(long long value_arg);
76 | #endif
77 |   std::string convert_size_t_to_string(size_t value_arg);
78 | };
79 | #endif // YAML_ELEMENT_HPP
80 | 


--------------------------------------------------------------------------------
/src/finalize.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | #include <fstream>
44 | #include <hip/hip_runtime_api.h>
45 | 
46 | #include "utils.hpp"
47 | #include "hpcg.hpp"
48 | 
49 | /*!
50 |   Closes the I/O stream used for logging information throughout the HPCG run.
51 | 
52 |   @return returns 0 upon success and non-zero otherwise
53 | 
54 |   @see HPCG_Init
55 | */
56 | int
57 | HPCG_Finalize(void) {
58 |   HPCG_fout.close();
59 | 
60 |   // Destroy streams
61 |   HIP_CHECK(hipStreamDestroy(stream_interior));
62 |   HIP_CHECK(hipStreamDestroy(stream_halo));
63 | 
64 |   // Destroy events
65 |   HIP_CHECK(hipEventDestroy(halo_gather));
66 | 
67 |   // Free workspace
68 |   HIP_CHECK(deviceFree(workspace));
69 | 
70 | #ifdef HPCG_MEMMGMT
71 |   // Clear allocator
72 |   HIP_CHECK(allocator.Clear());
73 | #endif
74 | 
75 |   // Reset HIP device
76 |   hipDeviceReset();
77 | 
78 |   return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/src/hpcg.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /* ************************************************************************
16 |  * Modifications (c) 2019 Advanced Micro Devices, Inc.
17 |  *
18 |  * Redistribution and use in source and binary forms, with or without modification,
19 |  * are permitted provided that the following conditions are met:
20 |  *
21 |  * 1. Redistributions of source code must retain the above copyright notice, this
22 |  *    list of conditions and the following disclaimer.
23 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
24 |  *    this list of conditions and the following disclaimer in the documentation
25 |  *    and/or other materials provided with the distribution.
26 |  * 3. Neither the name of the copyright holder nor the names of its contributors
27 |  *    may be used to endorse or promote products derived from this software without
28 |  *    specific prior written permission.
29 |  *
30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
34 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
36 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 |  * POSSIBILITY OF SUCH DAMAGE.
40 |  *
41 |  * ************************************************************************ */
42 | 
43 | /*!
44 |  @file hpcg.hpp
45 | 
46 |  HPCG data structures and functions
47 |  */
48 | 
49 | #ifndef HPCG_HPP
50 | #define HPCG_HPP
51 | 
52 | #include <fstream>
53 | #include "Geometry.hpp"
54 | 
55 | extern std::ofstream HPCG_fout;
56 | 
57 | struct HPCG_Params_STRUCT {
58 |   int comm_size; //!< Number of MPI processes in MPI_COMM_WORLD
59 |   int comm_rank; //!< This process' MPI rank in the range [0 to comm_size - 1]
60 |   int numThreads; //!< This process' number of threads
61 |   local_int_t nx; //!< Number of processes in x-direction of 3D process grid
62 |   local_int_t ny; //!< Number of processes in y-direction of 3D process grid
63 |   local_int_t nz; //!< Number of processes in z-direction of 3D process grid
64 |   int runningTime; //!< Number of seconds to run the timed portion of the benchmark
65 |   int npx; //!< Number of x-direction grid points for each local subdomain
66 |   int npy; //!< Number of y-direction grid points for each local subdomain
67 |   int npz; //!< Number of z-direction grid points for each local subdomain
68 |   int pz; //!< Partition in the z processor dimension, default is npz
69 |   local_int_t zl; //!< nz for processors in the z dimension with value less than pz
70 |   local_int_t zu; //!< nz for processors in the z dimension with value greater than pz
71 |   int device; //!< HIP device
72 |   bool verify; //!< Do reference verification
73 |   double tol; //!< Exit tolerance if verification is skipped
74 | };
75 | /*!
76 |   HPCG_Params is a shorthand for HPCG_Params_STRUCT
77 |  */
78 | typedef HPCG_Params_STRUCT HPCG_Params;
79 | 
80 | extern int HPCG_Init(int * argc_p, char ** *argv_p, HPCG_Params & params);
81 | extern int HPCG_Finalize(void);
82 | 
83 | #endif // HPCG_HPP
84 | 


--------------------------------------------------------------------------------
/src/mytimer.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | /////////////////////////////////////////////////////////////////////////
16 | 
17 | // Function to return time in seconds.
18 | // If compiled with no flags, return CPU time (user and system).
19 | // If compiled with -DWALL, returns elapsed time.
20 | 
21 | /////////////////////////////////////////////////////////////////////////
22 | 
23 | #ifndef HPCG_NO_MPI
24 | #include <mpi.h>
25 | 
26 | double mytimer(void) {
27 |   return MPI_Wtime();
28 | }
29 | 
30 | #elif !defined(HPCG_NO_OPENMP)
31 | 
32 | // If this routine is compiled with HPCG_NO_MPI defined and not compiled with HPCG_NO_OPENMP then use the OpenMP timer
33 | #include <omp.h>
34 | double mytimer(void) {
35 |   return omp_get_wtime();
36 | }
37 | #else
38 | 
39 | #include <cstdlib>
40 | #include <sys/time.h>
41 | #include <sys/resource.h>
42 | double mytimer(void) {
43 |   struct timeval tp;
44 |   static long start=0, startu;
45 |   if (!start) {
46 |     gettimeofday(&tp, NULL);
47 |     start = tp.tv_sec;
48 |     startu = tp.tv_usec;
49 |     return 0.0;
50 |   }
51 |   gettimeofday(&tp, NULL);
52 |   return ((double) (tp.tv_sec - start)) + (tp.tv_usec-startu)/1000000.0 ;
53 | }
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/src/mytimer.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | //@HEADER
 3 | // ***************************************************
 4 | //
 5 | // HPCG: High Performance Conjugate Gradient Benchmark
 6 | //
 7 | // Contact:
 8 | // Michael A. Heroux ( maherou@sandia.gov)
 9 | // Jack Dongarra     (dongarra@eecs.utk.edu)
10 | // Piotr Luszczek    (luszczek@eecs.utk.edu)
11 | //
12 | // ***************************************************
13 | //@HEADER
14 | 
15 | #ifndef MYTIMER_HPP
16 | #define MYTIMER_HPP
17 | double mytimer(void);
18 | #endif // MYTIMER_HPP
19 | 


--------------------------------------------------------------------------------
/src/rochpcg_gtest_main.cpp:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Redistribution and use in source and binary forms, with or without modification,
  5 |  * are permitted provided that the following conditions are met:
  6 |  *
  7 |  * 1. Redistributions of source code must retain the above copyright notice, this
  8 |  *    list of conditions and the following disclaimer.
  9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 10 |  *    this list of conditions and the following disclaimer in the documentation
 11 |  *    and/or other materials provided with the distribution.
 12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 13 |  *    may be used to endorse or promote products derived from this software without
 14 |  *    specific prior written permission.
 15 |  *
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 25 |  * POSSIBILITY OF SUCH DAMAGE.
 26 |  *
 27 |  * ************************************************************************ */
 28 | 
 29 | /*!
 30 |  @file rochpcg_gtest_main.cpp
 31 | 
 32 |  HPCG routine
 33 |  */
 34 | 
 35 | #include <gtest/gtest.h>
 36 | #include <stdexcept>
 37 | #include <hip/hip_runtime_api.h>
 38 | 
 39 | #ifndef HPCG_NO_MPI
 40 | #include <mpi.h>
 41 | #endif
 42 | 
 43 | #include "Version.hpp"
 44 | 
 45 | int device_id;
 46 | 
 47 | int main(int argc, char* argv[])
 48 | {
 49 |     ::testing::InitGoogleTest(&argc, argv);
 50 | 
 51 |     int rank = 0;
 52 | #ifndef HPCG_NO_MPI
 53 |     MPI_Init(&argc, &argv);
 54 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 55 | #endif
 56 | 
 57 |     // Print rocHPCG version and device
 58 |     if(rank == 0)
 59 |     {
 60 |         printf("-------------------------------------------------------------------------\n");
 61 |         printf("rocHPCG version: %d.%d.%d-%s\n",
 62 |                __ROCHPCG_VER_MAJOR,
 63 |                __ROCHPCG_VER_MINOR,
 64 |                __ROCHPCG_VER_PATCH,
 65 |                TO_STR(__ROCHPCG_VER_TWEAK));
 66 |     }
 67 | 
 68 |     // Get device id from command line
 69 |     device_id = 0;
 70 | 
 71 |     for(int i = 1; i < argc; ++i)
 72 |     {
 73 |         if(strcmp(argv[i], "--device") == 0 && argc > i + 1)
 74 |         {
 75 |             device_id = atoi(argv[i + 1]);
 76 |         }
 77 |     }
 78 | 
 79 |     // Device query
 80 |     int device_count;
 81 |     hipError_t status = hipGetDeviceCount(&device_count);
 82 | 
 83 |     if(status != hipSuccess)
 84 |     {
 85 |         if(rank == 0)
 86 |         {
 87 |             fprintf(stderr, "Error: cannot get device count\n");
 88 |         }
 89 | 
 90 |         return -1;
 91 |     }
 92 |     else
 93 |     {
 94 |         if(rank == 0)
 95 |         {
 96 |             printf("There are %d devices\n", device_count);
 97 |         }
 98 |     }
 99 | 
100 |     for(int i = 0; i < device_count; ++i)
101 |     {
102 |         hipDeviceProp_t props;
103 |         status = hipGetDeviceProperties(&props, i);
104 | 
105 |         if(rank == 0)
106 |         {
107 |             if(status != hipSuccess)
108 |             {
109 |                 fprintf(stderr, "Error: cannot get device ID %d's properties\n", i);
110 |             }
111 |             else
112 |             {
113 |                 printf("Device ID %d : %s\n", i, props.name);
114 |                 printf("-------------------------------------------------------------------------\n");
115 |                 printf("with %ldMB memory, clock rate %dMHz @ computing capability %d.%d \n",
116 |                        props.totalGlobalMem >> 20,
117 |                        (int)(props.clockRate / 1000),
118 |                        props.major,
119 |                        props.minor);
120 |                 printf("maxGridDimX %d, sharedMemPerBlock %ldKB, maxThreadsPerBlock %d, wavefrontSize "
121 |                        "%d\n",
122 |                        props.maxGridSize[0],
123 |                        props.sharedMemPerBlock >> 10,
124 |                        props.maxThreadsPerBlock,
125 |                        props.warpSize);
126 | 
127 |                 printf("-------------------------------------------------------------------------\n");
128 |             }
129 |         }
130 |     }
131 | 
132 |     if(device_count <= device_id)
133 |     {
134 |         if(rank == 0)
135 |         {
136 |             fprintf(stderr, "Error: invalid device ID. There may not be such device ID. Exiting\n");
137 |         }
138 | 
139 |         return -1;
140 |     }
141 | 
142 |     status = hipSetDevice(device_id);
143 | 
144 |     if(rank == 0 && status != hipSuccess)
145 |     {
146 |         fprintf(stderr, "Error: cannot set device ID %d, there may not be such device ID\n", device_id);
147 |     }
148 | 
149 |     hipDeviceProp_t prop;
150 |     hipGetDeviceProperties(&prop, device_id);
151 |     printf("Using device ID %d (%s) for rocHPCG\n", device_id, prop.name);
152 | 
153 | #ifndef HPCG_NO_MPI
154 |     MPI_Barrier(MPI_COMM_WORLD);
155 | #endif
156 | 
157 |     if(rank == 0)
158 |     {
159 |         printf("-------------------------------------------------------------------------\n");
160 |     }
161 | 
162 | #ifndef HPCG_NO_MPI
163 |     MPI_Barrier(MPI_COMM_WORLD);
164 | #endif
165 | 
166 |     // Only rank 0 should listen
167 |     ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners();
168 | 
169 |     if(rank != 0)
170 |     {
171 |         delete listeners.Release(listeners.default_result_printer());
172 |     }
173 | 
174 |     int ret = RUN_ALL_TESTS();
175 | 
176 |     hipDeviceReset();
177 | 
178 | #ifndef HPCG_NO_MPI
179 |     MPI_Finalize();
180 | #endif
181 | 
182 |     return ret;
183 | }
184 | 


--------------------------------------------------------------------------------
/src/test_rochpcg.hpp:
--------------------------------------------------------------------------------
 1 | /* ************************************************************************
 2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without modification,
 5 |  * are permitted provided that the following conditions are met:
 6 |  *
 7 |  * 1. Redistributions of source code must retain the above copyright notice, this
 8 |  *    list of conditions and the following disclaimer.
 9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
10 |  *    this list of conditions and the following disclaimer in the documentation
11 |  *    and/or other materials provided with the distribution.
12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
13 |  *    may be used to endorse or promote products derived from this software without
14 |  *    specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 |  * POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  * ************************************************************************ */
28 | 
29 | #ifndef TEST_ROCHPCG_HPP
30 | #define TEST_ROCHPCG_HPP
31 | 
32 | extern int device_id;
33 | 
34 | #endif // TEST_ROCHPCG_HPP
35 | 


--------------------------------------------------------------------------------
/src/utils.hpp:
--------------------------------------------------------------------------------
  1 | /* ************************************************************************
  2 |  * Copyright (c) 2019 Advanced Micro Devices, Inc.
  3 |  *
  4 |  * Redistribution and use in source and binary forms, with or without modification,
  5 |  * are permitted provided that the following conditions are met:
  6 |  *
  7 |  * 1. Redistributions of source code must retain the above copyright notice, this
  8 |  *    list of conditions and the following disclaimer.
  9 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 10 |  *    this list of conditions and the following disclaimer in the documentation
 11 |  *    and/or other materials provided with the distribution.
 12 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 13 |  *    may be used to endorse or promote products derived from this software without
 14 |  *    specific prior written permission.
 15 |  *
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 19 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 20 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 22 |  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 23 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 24 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 25 |  * POSSIBILITY OF SUCH DAMAGE.
 26 |  *
 27 |  * ************************************************************************ */
 28 | 
 29 | #ifndef UTILS_HPP
 30 | #define UTILS_HPP
 31 | 
 32 | #include <cstdio>
 33 | #include <hip/hip_runtime_api.h>
 34 | 
 35 | #include "Memory.hpp"
 36 | 
 37 | // Streams
 38 | extern hipStream_t stream_interior;
 39 | extern hipStream_t stream_halo;
 40 | // Events
 41 | extern hipEvent_t halo_gather;
 42 | // Workspace
 43 | extern void* workspace;
 44 | // Memory allocator
 45 | extern hipAllocator_t allocator;
 46 | 
 47 | #define RNG_SEED 0x586744
 48 | #define MAX_COLORS 128
 49 | 
 50 | #define NULL_CHECK(ptr)                                 \
 51 | {                                                       \
 52 |     if(ptr == NULL)                                     \
 53 |     {                                                   \
 54 |         fprintf(stderr, "ERROR in file %s ; line %d\n", \
 55 |                 __FILE__,                               \
 56 |                 __LINE__);                              \
 57 |                                                         \
 58 |         hipDeviceReset();                               \
 59 |         exit(1);                                        \
 60 |     }                                                   \
 61 | }
 62 | 
 63 | #define HIP_CHECK(err)                                              \
 64 | {                                                                   \
 65 |     if(err != hipSuccess)                                           \
 66 |     {                                                               \
 67 |         fprintf(stderr, "HIP ERROR %s (%d) in file %s ; line %d\n", \
 68 |                 hipGetErrorString(err),                             \
 69 |                 err,                                                \
 70 |                 __FILE__,                                           \
 71 |                 __LINE__);                                          \
 72 |                                                                     \
 73 |         hipDeviceReset();                                           \
 74 |         exit(1);                                                    \
 75 |     }                                                               \
 76 | }
 77 | 
 78 | #define RETURN_IF_HIP_ERROR(err)    \
 79 | {                                   \
 80 |     if(err != hipSuccess)           \
 81 |     {                               \
 82 |         return err;                 \
 83 |     }                               \
 84 | }
 85 | 
 86 | #define RETURN_IF_HPCG_ERROR(err)   \
 87 | {                                   \
 88 |     if(err != 0)                    \
 89 |     {                               \
 90 |         return err;                 \
 91 |     }                               \
 92 | }
 93 | 
 94 | #define EXIT_IF_HPCG_ERROR(err) \
 95 | {                               \
 96 |     if(err != 0)                \
 97 |     {                           \
 98 |         hipDeviceReset();       \
 99 |         exit(1);                \
100 |     }                           \
101 | }
102 | 
103 | #endif // UTILS_HPP
104 | 


--------------------------------------------------------------------------------