├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── NOTICE
├── README.md
├── RELEASE
├── cmake
    ├── FindNvToolsExt.cmake
    ├── Findgdsync.cmake
    ├── Findgpump.cmake
    ├── Findmp.cmake
    ├── FindrocTX.cmake
    ├── Findumr.cmake
    ├── SetupBasics.cmake
    ├── SetupCombConfig.cmake
    ├── SetupCombOptions.cmake
    ├── SetupCompilers.cmake
    ├── SetupDependentOptions.cmake
    └── SetupPackages.cmake
├── host-configs
    ├── lc-builds
    │   ├── blueos
    │   │   ├── clang_X.cmake
    │   │   ├── gcc_X.cmake
    │   │   ├── nvcc_clang_X.cmake
    │   │   ├── nvcc_gcc_X.cmake
    │   │   ├── nvcc_xl_X.cmake
    │   │   ├── pgi_X.cmake
    │   │   └── xl_X.cmake
    │   ├── toss3
    │   │   ├── clang_X.cmake
    │   │   ├── gcc_X.cmake
    │   │   ├── hip_4_link_X.cmake
    │   │   ├── hip_X.cmake
    │   │   ├── icpc_X_gcc7headers.cmake
    │   │   ├── icpc_X_gcc8headers.cmake
    │   │   └── pgi_X.cmake
    │   └── toss4
    │   │   └── hip_X.cmake
    └── ubuntu-builds
    │   ├── clang_X.cmake
    │   ├── gcc_X.cmake
    │   ├── hip.cmake
    │   └── nvcc_gcc_X.cmake
├── include
    ├── Box3d.hpp
    ├── CommFactory.hpp
    ├── ExecContext.hpp
    ├── MeshData.hpp
    ├── MeshInfo.hpp
    ├── MessageBase.hpp
    ├── align.hpp
    ├── basic_mempool.hpp
    ├── comb.hpp
    ├── comm.hpp
    ├── comm_pol_gdsync.hpp
    ├── comm_pol_gpump.hpp
    ├── comm_pol_mock.hpp
    ├── comm_pol_mp.hpp
    ├── comm_pol_mpi.hpp
    ├── comm_pol_mpi_persistent.hpp
    ├── comm_pol_umr.hpp
    ├── comm_utils_gdsync.hpp
    ├── comm_utils_gpump.hpp
    ├── comm_utils_mp.hpp
    ├── comm_utils_mpi.hpp
    ├── comm_utils_umr.hpp
    ├── config.hpp.in
    ├── do_cycles.hpp
    ├── do_cycles_allocators.hpp
    ├── exec.hpp
    ├── exec_fused.hpp
    ├── exec_pol_cuda.hpp
    ├── exec_pol_cuda_graph.hpp
    ├── exec_pol_hip.hpp
    ├── exec_pol_mpi_type.hpp
    ├── exec_pol_omp.hpp
    ├── exec_pol_raja.hpp
    ├── exec_pol_seq.hpp
    ├── exec_utils.hpp
    ├── exec_utils_cuda.hpp
    ├── exec_utils_graph_launch.hpp
    ├── exec_utils_hip.hpp
    ├── memory.hpp
    ├── mutex.hpp
    ├── print.hpp
    └── profiling.hpp
├── scripts
    ├── basic_tests.bash
    ├── combine_output.lua
    ├── focused_cuda_graphs_tests.bash
    ├── focused_gdsync_tests.bash
    ├── focused_gpump_tests.bash
    ├── focused_mp_tests.bash
    ├── focused_mpi_type_tests.bash
    ├── focused_tests.bash
    ├── focused_umr_tests.bash
    ├── lc-builds
    │   ├── blueos_clang.sh
    │   ├── blueos_gcc.sh
    │   ├── blueos_nvcc_clang.sh
    │   ├── blueos_nvcc_gcc.sh
    │   ├── blueos_nvcc_xl.sh
    │   ├── blueos_pgi.sh
    │   ├── blueos_xl.sh
    │   ├── toss3_clang.sh
    │   ├── toss3_gcc.sh
    │   ├── toss3_hipcc.sh
    │   ├── toss3_icpc.sh
    │   ├── toss3_pgi.sh
    │   └── toss4_cray-mpich_amdclang.sh
    ├── mock_cuda_graphs_tests.bash
    ├── mock_tests.bash
    ├── run_nvprof.bash
    ├── run_tests.bash
    ├── scale_tests.bash
    ├── sep_out.bash
    └── ubuntu-builds
    │   ├── ubuntu_clang.sh
    │   ├── ubuntu_gcc.sh
    │   ├── ubuntu_hipcc.sh
    │   └── ubuntu_nvcc10_gcc8.sh
└── src
    ├── CMakeLists.txt
    ├── comb.cpp
    ├── do_cycles.cpp.in
    ├── print.cpp
    ├── print_timer.cpp
    ├── test_copy.cpp
    ├── test_cycles_basic.cpp
    ├── test_cycles_gdsync.cpp
    ├── test_cycles_gpump.cpp
    ├── test_cycles_mock.cpp
    ├── test_cycles_mp.cpp
    ├── test_cycles_mpi.cpp
    ├── test_cycles_mpi_persistent.cpp
    ├── test_cycles_umr.cpp
    └── warmup.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | /build_*
2 | /install_*
3 | *.swp
4 | *~
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tpl/blt"]
2 | 	path = blt
3 | 	url = https://github.com/LLNL/blt.git
4 | [submodule "tpl/RAJA"]
5 | 	path = tpl/RAJA
6 | 	url = https://github.com/LLNL/RAJA.git
7 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | ##############################################################################
  2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | ##
  4 | ## Produced at the Lawrence Livermore National Laboratory
  5 | ##
  6 | ## LLNL-CODE-758885
  7 | ##
  8 | ## All rights reserved.
  9 | ##
 10 | ## This file is part of Comb.
 11 | ##
 12 | ## For details, see https://github.com/LLNL/Comb
 13 | ## Please also see the LICENSE file for MIT license.
 14 | ##############################################################################
 15 | 
 16 | cmake_policy(SET CMP0042 NEW)
 17 | cmake_policy(SET CMP0048 NEW)
 18 | 
 19 | if (APPLE)
 20 |  cmake_policy(SET CMP0025 NEW)
 21 | endif()
 22 | 
 23 | include(CMakeDependentOption)
 24 | 
 25 | set(COMB_VERSION_MAJOR 0)
 26 | set(COMB_VERSION_MINOR 3)
 27 | set(COMB_VERSION_PATCHLEVEL 1)
 28 | 
 29 | set(COMB_LOADED "${COMB_VERSION_MAJOR}.${COMB_VERSION_MINOR}.${COMB_VERSION_PATCHLEVEL}")
 30 | 
 31 | project(COMB LANGUAGES CXX C VERSION ${COMB_LOADED})
 32 | 
 33 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
 34 | 
 35 | include(cmake/SetupCombOptions.cmake)
 36 | 
 37 | cmake_minimum_required(VERSION 3.14.5)
 38 | 
 39 | # Detect C++ standard and add appropriate flag _before_ loading BLT
 40 | set(COMPILERS_KNOWN_TO_CMAKE33 AppleClang Clang GNU MSVC)
 41 | 
 42 | include(CheckCXXCompilerFlag)
 43 | if(NOT DEFINED BLT_CXX_STD)
 44 |   if("cxx_std_17" IN_LIST CMAKE_CXX_KNOWN_FEATURES)
 45 |     set(BLT_CXX_STD c++17 CACHE STRING "Version of C++ standard")
 46 |     message("Using C++ standard: ${BLT_CXX_STD}")
 47 |   elseif("cxx_std_14" IN_LIST CMAKE_CXX_KNOWN_FEATURES)
 48 |     set(BLT_CXX_STD c++14 CACHE STRING "Version of C++ standard")
 49 |     message("Using C++ standard: ${BLT_CXX_STD}")
 50 |   elseif("${CMAKE_CXX_COMPILER_ID}" IN_LIST COMPILERS_KNOWN_TO_CMAKE33)
 51 |     set(BLT_CXX_STD c++14 CACHE STRING "Version of C++ standard")
 52 |     message("Using C++ standard: ${BLT_CXX_STD}")
 53 |   else() #cmake has no idea what to do, do it ourselves...
 54 |     foreach(flag_var "c++17" "c++14")
 55 |       CHECK_CXX_COMPILER_FLAG("-std=${flag_var}" COMPILER_SUPPORTS_${flag_var})
 56 |       if(COMPILER_SUPPORTS_${flag_var})
 57 |         set(BLT_CXX_STD ${flag_var} CACHE STRING "Version of C++ standard")
 58 |         message("Using C++ standard: ${BLT_CXX_STD}")
 59 |         break()
 60 |       endif()
 61 |     endforeach(flag_var)
 62 |   endif()
 63 | else() #check BLT_CXX_STD is high enough by disallowing the only invalid option
 64 |   if("${BLT_CXX_STD}" IN_LIST "c++98;c++11")
 65 |     message(FATAL_ERROR "RAJA requires minimum C++ standard of c++14")
 66 |   endif()
 67 | endif(NOT DEFINED BLT_CXX_STD)
 68 | 
 69 | set(CMAKE_CXX_EXTENSIONS OFF)
 70 | 
 71 | if (NOT BLT_LOADED)
 72 |   if (DEFINED BLT_SOURCE_DIR)
 73 |     if (NOT EXISTS ${BLT_SOURCE_DIR}/SetupBLT.cmake)
 74 |       message(FATAL_ERROR "Given BLT_SOURCE_DIR does not contain SetupBLT.cmake")
 75 |     endif()
 76 |   else ()
 77 |     set (BLT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/blt CACHE PATH "")
 78 | 
 79 |     if (NOT EXISTS ${BLT_SOURCE_DIR}/SetupBLT.cmake)
 80 |       message(FATAL_ERROR "\
 81 |       The BLT submodule is not present. \
 82 |       If in git repository run the following two commands:\n \
 83 |       git submodule init\n \
 84 |       git submodule update")
 85 |     endif ()
 86 |   endif ()
 87 | 
 88 |   include(${BLT_SOURCE_DIR}/SetupBLT.cmake)
 89 | endif()
 90 | 
 91 | # Setup options that depend on BLT
 92 | include(cmake/SetupDependentOptions.cmake)
 93 | # Setup basic CMake options
 94 | include(cmake/SetupBasics.cmake)
 95 | # Find third-party packages
 96 | include(cmake/SetupPackages.cmake)
 97 | # Setup vendor-specific compiler flags
 98 | include(cmake/SetupCompilers.cmake)
 99 | # Setup internal COMB configuration options
100 | include(cmake/SetupCombConfig.cmake)
101 | 
102 | add_subdirectory(src)
103 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | This work was produced under the auspices of the U.S. Department of Energy by
 2 | Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344.
 3 | 
 4 | This work was prepared as an account of work sponsored by an agency of the
 5 | United States Government. Neither the United States Government nor Lawrence
 6 | Livermore National Security, LLC, nor any of their employees makes any warranty,
 7 | expressed or implied, or assumes any legal liability or responsibility for the
 8 | accuracy, completeness, or usefulness of any information, apparatus, product, or
 9 | process disclosed, or represents that its use would not infringe privately owned
10 | rights. Reference herein to any specific commercial product, process, or service
11 | by trade name, trademark, manufacturer, or otherwise does not necessarily
12 | constitute or imply its endorsement, recommendation, or favoring by the United
13 | States Government or Lawrence Livermore National Security, LLC. The views and
14 | opinions of authors expressed herein do not necessarily state or reflect those
15 | of the United States Government or Lawrence Livermore National Security, LLC,
16 | and shall not be used for advertising or product endorsement purposes.
17 | 


--------------------------------------------------------------------------------
/RELEASE:
--------------------------------------------------------------------------------
 1 | Comb Version 0.3
 2 | 
 3 | Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 4 | Produced at the Lawrence Livermore National Laboratory.
 5 | All rights reserved. See details in Comb/LICENSE.
 6 | 
 7 | Open Source – MIT Distribution
 8 | LLNL-CODE-758885
 9 | 
10 | Created by Jason Burmark (burmark1@llnl.gov)
11 | 
12 | Contributors:
13 | 


--------------------------------------------------------------------------------
/cmake/FindNvToolsExt.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | ###############################################################################
17 | #
18 | # Setup nvToolsExt
19 | # This file defines:
20 | #  NVTOOLSEXT_FOUND - If nvToolsExt was found
21 | #  NVTOOLSEXT_INCLUDE_DIRS - The nvToolsExt include directories
22 | #  NVTOOLSEXT_LIBRARY - The nvToolsExt library
23 | 
24 | # first Check for CUDA_TOOLKIT_ROOT_DIR
25 | if(NOT CUDA_TOOLKIT_ROOT_DIR)
26 |     MESSAGE(FATAL_ERROR "Could not find NvToolsExt. NvToolsExt support needs explicit CUDA_TOOLKIT_ROOT_DIR")
27 | endif()
28 | 
29 | #find includes
30 | find_path( NVTOOLSEXT_INCLUDE_DIRS nvToolsExt.h
31 |            HINTS ${CUDA_TOOLKIT_ROOT_DIR}/include )
32 | 
33 | find_library( NVTOOLSEXT_LIBRARY NAMES nvToolsExt libnvToolsExt
34 |               HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib )
35 | 
36 | 
37 | include(FindPackageHandleStandardArgs)
38 | # handle the QUIETLY and REQUIRED arguments and set NVTOOLSEXT_FOUND to TRUE
39 | # if all listed variables are TRUE
40 | find_package_handle_standard_args(NVTOOLSEXT  DEFAULT_MSG
41 |                                   NVTOOLSEXT_INCLUDE_DIRS
42 |                                   NVTOOLSEXT_LIBRARY )
43 | 


--------------------------------------------------------------------------------
/cmake/Findgdsync.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | find_path(GDSYNC_PATH
17 |     NAMES "lib/libgdsync.so"
18 |     PATHS
19 |       ENV GDSYNC_DIR
20 |       /opt/ibm/spectrum_mpi/libgdsync
21 |     DOC "Path to gdsync library")
22 | 
23 | 
24 | if(GDSYNC_PATH)
25 |     message(STATUS "GDSYNC_PATH:  ${GDSYNC_PATH}")
26 |     set(GDSYNC_FOUND TRUE)
27 |     set(GDSYNC_CXX_COMPILE_FLAGS -I${GDSYNC_PATH}/include)
28 |     set(GDSYNC_INCLUDE_PATH      ${GDSYNC_PATH}/include)
29 |     set(GDSYNC_CXX_LINK_FLAGS    -L${GDSYNC_PATH}/lib)
30 |     set(GDSYNC_CXX_LIBRARIES     ${GDSYNC_PATH}/lib/libgdsync.so)
31 |     set(GDSYNC_ARCH              )
32 | else()
33 |     set(GDSYNC_FOUND FALSE)
34 |     message(WARNING "gdsync library not found")
35 | endif()
36 | 


--------------------------------------------------------------------------------
/cmake/Findgpump.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | find_path(GPUMP_PATH
17 |     NAMES "lib/libgpump.so"
18 |     PATHS
19 |       ENV GPUMP_DIR
20 |       /opt/ibm/spectrum_mpi/libgpump
21 |     DOC "Path to gpump library")
22 | 
23 | 
24 | if(GPUMP_PATH)
25 |     message(STATUS "GPUMP_PATH:  ${GPUMP_PATH}")
26 |     set(GPUMP_FOUND TRUE)
27 |     set(GPUMP_CXX_COMPILE_FLAGS -I${GPUMP_PATH}/include)
28 |     set(GPUMP_INCLUDE_PATH      ${GPUMP_PATH}/include)
29 |     set(GPUMP_CXX_LINK_FLAGS    -L${GPUMP_PATH}/lib)
30 |     set(GPUMP_CXX_LIBRARIES     ${GPUMP_PATH}/lib/libgpump.so)
31 |     set(GPUMP_ARCH              )
32 | else()
33 |     set(GPUMP_FOUND FALSE)
34 |     message(WARNING "gpump library not found")
35 | endif()
36 | 


--------------------------------------------------------------------------------
/cmake/Findmp.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | find_path(MP_PATH
17 |     NAMES "lib/libmp.so"
18 |     PATHS
19 |       ENV MP_DIR
20 |       /opt/ibm/spectrum_mpi/libmp
21 |     DOC "Path to mp library")
22 | 
23 | 
24 | if(MP_PATH)
25 |     message(STATUS "MP_PATH:  ${MP_PATH}")
26 |     set(MP_FOUND TRUE)
27 |     set(MP_CXX_COMPILE_FLAGS -I${MP_PATH}/include)
28 |     set(MP_INCLUDE_PATH      ${MP_PATH}/include)
29 |     set(MP_CXX_LINK_FLAGS    -L${MP_PATH}/lib)
30 |     set(MP_CXX_LIBRARIES     ${MP_PATH}/lib/libmp.so)
31 |     set(MP_ARCH              )
32 | else()
33 |     set(MP_FOUND FALSE)
34 |     message(WARNING "mp library not found")
35 | endif()
36 | 


--------------------------------------------------------------------------------
/cmake/FindrocTX.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | ###############################################################################
17 | #
18 | # Setup rocTX
19 | # This file defines:
20 | #  ROCTX_FOUND - If rocTX was found
21 | #  ROCTX_INCLUDE_DIRS - The rocTX include directories
22 | #  ROCTX_LIBRARY - The rocTX library
23 | 
24 | #find includes
25 | find_path( ROCTX_INCLUDE_DIRS
26 |   NAMES roctx.h
27 |   HINTS
28 |     ${ROCTX_DIR}/include
29 |     ${ROCTRACER_DIR}/include
30 |     ${HIP_ROOT_DIR}/../roctracer/include
31 |     ${HIP_ROOT_DIR}/../include )
32 | 
33 | find_library( ROCTX_LIBRARY
34 |   NAMES roctx64 libroctx64
35 |   HINTS
36 |     ${ROCTX_DIR}/lib
37 |     ${ROCTRACER_DIR}/lib
38 |     ${HIP_ROOT_DIR}/../roctracer/lib
39 |     ${HIP_ROOT_DIR}/../lib )
40 | 
41 | 
42 | include(FindPackageHandleStandardArgs)
43 | # handle the QUIETLY and REQUIRED arguments and set ROCTX_FOUND to TRUE
44 | # if all listed variables are TRUE
45 | find_package_handle_standard_args(ROCTX  DEFAULT_MSG
46 |                                   ROCTX_INCLUDE_DIRS
47 |                                   ROCTX_LIBRARY )
48 | 


--------------------------------------------------------------------------------
/cmake/Findumr.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | find_path(UMR_PATH
17 |     NAMES "lib/libumr.so"
18 |     PATHS
19 |       ENV UMR_DIR
20 |       /opt/ibm/spectrum_mpi/libumr
21 |     DOC "Path to umr library")
22 | 
23 | 
24 | if(UMR_PATH)
25 |     message(STATUS "UMR_PATH:  ${UMR_PATH}")
26 |     set(UMR_FOUND TRUE)
27 |     set(UMR_CXX_COMPILE_FLAGS -I${UMR_PATH}/include)
28 |     set(UMR_INCLUDE_PATH      ${UMR_PATH}/include)
29 |     set(UMR_CXX_LINK_FLAGS    -L${UMR_PATH}/lib)
30 |     set(UMR_CXX_LIBRARIES     ${UMR_PATH}/lib/libumr.so)
31 |     set(UMR_ARCH              )
32 | else()
33 |     set(UMR_FOUND FALSE)
34 |     message(WARNING "umr library not found")
35 | endif()
36 | 


--------------------------------------------------------------------------------
/cmake/SetupBasics.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 |  if(NOT CMAKE_BUILD_TYPE)
17 |    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, \
18 |    options are: Debug Release RelWithDebInfo" FORCE)
19 |  endif(NOT CMAKE_BUILD_TYPE)
20 | 


--------------------------------------------------------------------------------
/cmake/SetupCombConfig.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | # Set up COMB_ENABLE prefixed options
17 | set(COMB_ENABLE_MPI ${ENABLE_MPI})
18 | set(COMB_ENABLE_OPENMP ${ENABLE_OPENMP})
19 | set(COMB_ENABLE_CUDA ${ENABLE_CUDA})
20 | set(COMB_ENABLE_NV_TOOLS_EXT ${ENABLE_NV_TOOLS_EXT})
21 | set(COMB_ENABLE_CLANG_CUDA ${ENABLE_CLANG_CUDA})
22 | set(COMB_ENABLE_HIP ${ENABLE_HIP})
23 | set(COMB_ENABLE_ROCTX ${ENABLE_ROCTX})
24 | set(COMB_ENABLE_GDSYNC ${ENABLE_GDSYNC})
25 | set(COMB_ENABLE_GPUMP ${ENABLE_GPUMP})
26 | set(COMB_ENABLE_MP ${ENABLE_MP})
27 | set(COMB_ENABLE_UMR ${ENABLE_UMR})
28 | set(COMB_ENABLE_RAJA ${ENABLE_RAJA})
29 | set(COMB_ENABLE_CALIPER ${ENABLE_CALIPER})
30 | set(COMB_ENABLE_ADIAK ${ENABLE_ADIAK})
31 | 
32 | if (COMB_ENABLE_CUDA)
33 |   if(CUDA_VERSION VERSION_GREATER_EQUAL 10)
34 |     set(COMB_ENABLE_CUDA_GRAPH On)
35 |   else()
36 |     set(COMB_ENABLE_CUDA_GRAPH Off)
37 |   endif()
38 | endif()
39 | 
40 | set(COMB_CXX_COMPILER ${CMAKE_CXX_COMPILER})
41 | set(COMB_CUDA_COMPILER ${CMAKE_CUDA_COMPILER})
42 | set(COMB_HIP_COMPILER ${CMAKE_HIP_CLANG_COMPILER})
43 | 
44 | # Configure a header file with all the variables we found.
45 | configure_file(${PROJECT_SOURCE_DIR}/include/config.hpp.in
46 |   ${PROJECT_BINARY_DIR}/include/config.hpp)
47 | 


--------------------------------------------------------------------------------
/cmake/SetupCombOptions.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | # Enable MPI by by default
17 | set(ENABLE_MPI On CACHE BOOL "Build MPI support")
18 | 
19 | # Build options
20 | set(COMB_ENABLE_GDSYNC Off CACHE BOOL "Build GDSYNC support")
21 | set(COMB_ENABLE_GPUMP Off CACHE BOOL "Build GPUMP support")
22 | set(COMB_ENABLE_MP Off CACHE BOOL "Build MP support")
23 | set(COMB_ENABLE_UMR Off CACHE BOOL "Build UMR support")
24 | set(COMB_ENABLE_RAJA ON CACHE BOOL "Build RAJA support")
25 | set(COMB_ENABLE_CALIPER Off CACHE BOOL "Build Caliper support")
26 | set(COMB_ENABLE_ADIAK Off CACHE BOOL "Build Adiak support")
27 | 
28 | option(COMB_ENABLE_LOG "Build logging support" Off)
29 | 
30 | # Build options for libraries, disable extras
31 | option(ENABLE_TESTS "Build tests" Off)
32 | option(ENABLE_REPRODUCERS "Build issue reproducers" Off)
33 | option(ENABLE_EXAMPLES "Build simple examples" Off)
34 | option(ENABLE_EXERCISES "Build exercises " Off)
35 | option(ENABLE_MODULES "Enable modules in supporting compilers (clang)" Off)
36 | 
37 | if (ENABLE_CUDA)
38 |   # Separable compilation is required by comb, set before load BLT
39 |   set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "")
40 |   if (NOT DEFINED CUDA_ARCH)
41 |     message(STATUS "CUDA compute architecture set to Comb default sm_35 since it was not specified")
42 |     set(CUDA_ARCH "sm_35" CACHE STRING "Set CUDA_ARCH to Comb minimum supported" FORCE)
43 |   endif()
44 | endif()
45 | 
46 | if (ENABLE_HIP)
47 |   # Separable compilation is required by comb, set before load BLT
48 |   # set(HIP_SEPARABLE_COMPILATION ON CACHE BOOL "")
49 | endif()
50 | 


--------------------------------------------------------------------------------
/cmake/SetupCompilers.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "")
17 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "")
18 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "")
19 | 
20 | if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
21 |   if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
22 |     message(FATAL_ERROR "COMB requires GCC 4.9 or greater!")
23 |   endif ()
24 | endif()
25 | 
26 | set(COMB_COMPILER "COMB_COMPILER_${CMAKE_CXX_COMPILER_ID}")
27 | 
28 | if ( MSVC )
29 |   if (NOT BUILD_SHARED_LIBS)
30 |     foreach(flag_var
31 |         CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
32 |         CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
33 |       if(${flag_var} MATCHES "/MD")
34 |         string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
35 |       endif(${flag_var} MATCHES "/MD")
36 |     endforeach(flag_var)
37 |   endif()
38 | endif()
39 | 
40 | if (COMB_ENABLE_CUDA)
41 |   set(CMAKE_CUDA_STANDARD 14)
42 |   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr -Xcudafe \"--display_error_number\"")
43 | 
44 |   if (NOT COMB_HOST_CONFIG_LOADED)
45 |     set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
46 |     set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0")
47 |     set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
48 |     set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2")
49 |   endif()
50 | endif()
51 | # end COMB_ENABLE_CUDA section
52 | 
53 | if (COMB_ENABLE_HIP)
54 | 
55 |   set(CMAKE_HIP_STANDARD "14" CACHE STRING "Version of C++ standard for HIP Builds")
56 | 
57 | endif()
58 | # end COMB_ENABLE_HIP section
59 | 


--------------------------------------------------------------------------------
/cmake/SetupDependentOptions.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | ##
17 | ## Here are the CMake dependent options in COMB.
18 | ##
19 | 
20 | cmake_dependent_option(COMB_ENABLE_MPI "Build MPI support" On "ENABLE_MPI" Off)
21 | cmake_dependent_option(COMB_ENABLE_OPENMP "Build OpenMP support" On "ENABLE_OPENMP" Off)
22 | cmake_dependent_option(COMB_ENABLE_CUDA "Build CUDA support" On "ENABLE_CUDA" Off)
23 | cmake_dependent_option(COMB_ENABLE_HIP "Build HIP support" On "ENABLE_HIP" Off)
24 | cmake_dependent_option(COMB_ENABLE_CLANG_CUDA "Build Clang CUDA support" On "ENABLE_CLANG_CUDA" Off)
25 | 
26 | cmake_dependent_option(COMB_ENABLE_NV_TOOLS_EXT "Build NV_TOOLS_EXT support" On "COMB_ENABLE_CUDA" Off)
27 | cmake_dependent_option(COMB_ENABLE_ROCTX "Build ENABLE_ROCTX support" On "COMB_ENABLE_HIP" Off)
28 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/clang_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/gcc_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions -finline-limit=20000" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions -finline-limit=20000" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/nvcc_clang_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -fopenmp")
23 | 
24 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "")
25 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "")
26 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "")
27 | 
28 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
29 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/nvcc_gcc_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -finline-functions -Xcompiler -fopenmp")
23 | 
24 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "")
25 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "")
26 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "")
27 | 
28 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
29 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/nvcc_xl_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_XLC" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g9" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -qsmp=omp:noopt" CACHE STRING "")
21 | set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,muldefs" CACHE STRING "")
22 | 
23 | set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -qxlcompatmacros -Xcompiler -qalias=noansi -Xcompiler -qsmp=omp -Xcompiler -qhot -Xcompiler -qnoeh -Xcompiler -qsuppress=1500-029 -Xcompiler -qsuppress=1500-036")
24 | 
25 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "")
26 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "")
27 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "")
28 | 
29 | # Suppressed XLC warnings:
30 | # - 1500-029 cannot inline
31 | # - 1500-036 nostrict optimizations may alter code semantics
32 | #   (can be countered with -qstrict, with less optimization)
33 | 
34 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
35 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/pgi_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_PGI" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fast -mp" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-fast -g -mp" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -mp" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/blueos/xl_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_XLC" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -qsmp=omp:noopt " CACHE STRING "")
21 | set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,muldefs" CACHE STRING "")
22 | 
23 | # Suppressed XLC warnings:
24 | # - 1500-029 cannot inline
25 | # - 1500-036 nostrict optimizations may alter code semantics
26 | #   (can be countered with -qstrict, with less optimization)
27 | 
28 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
29 | 
30 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/clang_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -msse4.2 -funroll-loops -finline-functions" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -msse4.2 -funroll-loops -finline-functions" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/gcc_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions -finline-limit=20000" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions -finline-limit=20000" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/hip_4_link_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(HIP_COMMON_OPT_FLAGS )
23 | set(HIP_COMMON_DEBUG_FLAGS)
24 | set(HOST_OPT_FLAGS)
25 | 
26 | if(DEFINED ROCM_ROOT_DIR)
27 |   set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags -L${ROCM_ROOT_DIR}/hip/lib -L${ROCM_ROOT_DIR}/lib -L${ROCM_ROOT_DIR}/lib64 -Wl,-rpath,${ROCM_ROOT_DIR}/hip/lib:${ROCM_ROOT_DIR}/lib:${ROCM_ROOT_DIR}/lib64 -lamdhip64 -lhsakmt -lhsa-runtime64" CACHE PATH "")
28 | endif()
29 | 
30 | if(CMAKE_BUILD_TYPE MATCHES Release)
31 |   set(COMB_HIPCC_FLAGS "-fPIC -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
32 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
33 |   set(COMB_HIPCC_FLAGS "-fPIC -g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
34 | elseif(CMAKE_BUILD_TYPE MATCHES Debug)
35 |   set(COMB_HIPCC_FLAGS "-fPIC -g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "")
36 | endif()
37 | 
38 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
39 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/hip_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(HIP_COMMON_OPT_FLAGS )
23 | set(HIP_COMMON_DEBUG_FLAGS)
24 | set(HOST_OPT_FLAGS)
25 | 
26 | if(CMAKE_BUILD_TYPE MATCHES Release)
27 |   set(COMB_HIPCC_FLAGS "-fPIC -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
28 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
29 |   set(COMB_HIPCC_FLAGS "-fPIC -g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
30 | elseif(CMAKE_BUILD_TYPE MATCHES Debug)
31 |   set(COMB_HIPCC_FLAGS "-fPIC -g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "")
32 | endif()
33 | 
34 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
35 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/icpc_X_gcc7headers.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_ICC" CACHE STRING "")
17 | 
18 | set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-7.1.0/bin/g++")
19 | 
20 | set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 -fp-model source -unroll-aggressive -finline-functions -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "")
21 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g -fp-model source -unroll-aggressive -finline-functions -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "")
22 | set(CMAKE_CXX_FLAGS_DEBUG "${COMMON_FLAGS} -O0 -g" CACHE STRING "")
23 | 
24 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
25 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/icpc_X_gcc8headers.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_ICC" CACHE STRING "")
17 | 
18 | set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-8.1.0/bin/g++")
19 | 
20 | set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 -march=native -ansi-alias -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "")
21 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g -march=native -ansi-alias -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "")
22 | set(CMAKE_CXX_FLAGS_DEBUG "${COMMON_FLAGS} -O0 -g" CACHE STRING "")
23 | 
24 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
25 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss3/pgi_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_PGI" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fast -mp" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -fast -mp" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -mp" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/lc-builds/toss4/hip_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(HIP_COMMON_OPT_FLAGS )
23 | set(HIP_COMMON_DEBUG_FLAGS)
24 | set(HOST_OPT_FLAGS)
25 | 
26 | if(CMAKE_BUILD_TYPE MATCHES Release)
27 |   set(COMB_HIPCC_FLAGS "-fPIC -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
28 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
29 |   set(COMB_HIPCC_FLAGS "-fPIC -g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
30 | elseif(CMAKE_BUILD_TYPE MATCHES Debug)
31 |   set(COMB_HIPCC_FLAGS "-fPIC -g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "")
32 | endif()
33 | 
34 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
35 | 


--------------------------------------------------------------------------------
/host-configs/ubuntu-builds/clang_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -msse4.2 -funroll-loops -finline-functions" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -msse4.2 -funroll-loops -finline-functions" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/ubuntu-builds/gcc_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions -finline-limit=20000" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions -finline-limit=20000" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
23 | 


--------------------------------------------------------------------------------
/host-configs/ubuntu-builds/hip.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(HIP_ROOT_DIR "${ROCM_DIR}/hip" CACHE PATH "HIP ROOT directory path")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | # set(HIP_COMMON_OPT_FLAGS  "--amdgpu-target=gfx900")
23 | set(HIP_COMMON_OPT_FLAGS )
24 | set(HIP_COMMON_DEBUG_FLAGS)
25 | set(HOST_OPT_FLAGS)
26 | 
27 | if (ENABLE_OPENMP)
28 | 	set(HIP_COMMON_OPT_FLAGS "-fopenmp ${HIP_COMMON_OPT_FLAGS}")
29 | endif()
30 | 
31 | if(CMAKE_BUILD_TYPE MATCHES Release)
32 |   set(COMB_HIPCC_FLAGS "-O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
33 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
34 |   set(COMB_HIPCC_FLAGS "-g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "")
35 | elseif(CMAKE_BUILD_TYPE MATCHES Debug)
36 |   set(COMB_HIPCC_FLAGS "-g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "")
37 | endif()
38 | 
39 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
40 | 


--------------------------------------------------------------------------------
/host-configs/ubuntu-builds/nvcc_gcc_X.cmake:
--------------------------------------------------------------------------------
 1 | ##############################################################################
 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | ##
 4 | ## Produced at the Lawrence Livermore National Laboratory
 5 | ##
 6 | ## LLNL-CODE-758885
 7 | ##
 8 | ## All rights reserved.
 9 | ##
10 | ## This file is part of Comb.
11 | ##
12 | ## For details, see https://github.com/LLNL/Comb
13 | ## Please also see the LICENSE file for MIT license.
14 | ##############################################################################
15 | 
16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "")
17 | 
18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions" CACHE STRING "")
19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions" CACHE STRING "")
20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
21 | 
22 | if(NOT DEFINED CUDA_ARCH)
23 |   message(FATAL_ERROR "CUDA_ARCH NOT DEFINED")
24 | endif()
25 | 
26 | set(COMB_NVCC_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-extended-lambda" CACHE STRING "")
27 | set(COMB_NVCC_FLAGS_RELEASE        "-O3"                                             CACHE STRING "")
28 | set(COMB_NVCC_FLAGS_RELWITHDEBINFO "-O2 -g -lineinfo"                                CACHE STRING "")
29 | set(COMB_NVCC_FLAGS_MINSIZEREL     "-Os"                                             CACHE STRING "")
30 | set(COMB_NVCC_FLAGS_DEBUG          "-O0 -g -G"                                       CACHE STRING "")
31 | 
32 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "")
33 | 


--------------------------------------------------------------------------------
/include/MeshData.hpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #ifndef _MESHDATA_HPP
17 | #define _MESHDATA_HPP
18 | 
19 | #include "config.hpp"
20 | 
21 | #include "memory.hpp"
22 | #include "MeshInfo.hpp"
23 | 
24 | struct MeshData
25 | {
26 |   COMB::Allocator& aloc;
27 |   MeshInfo const& info;
28 |   DataT* ptr;
29 | 
30 |   MeshData(MeshInfo const& meshinfo, COMB::Allocator& aloc_)
31 |     : aloc(aloc_)
32 |     , info(meshinfo)
33 |     , ptr(nullptr)
34 |   {
35 | 
36 |   }
37 | 
38 |   void allocate()
39 |   {
40 |     if (ptr == nullptr) {
41 |       ptr = (DataT*)aloc.allocate(info.totallen*sizeof(DataT));
42 |     }
43 |   }
44 | 
45 |   bool operator==(MeshData const& other) const
46 |   {
47 |     return aloc.name() == other.aloc.name() &&
48 |            info == other.info &&
49 |            ptr == other.ptr;
50 |   }
51 | 
52 |   DataT* data() const
53 |   {
54 |     return ptr;
55 |   }
56 | 
57 |   void deallocate()
58 |   {
59 |     if (ptr != nullptr) {
60 |       aloc.deallocate(ptr);
61 |       ptr = nullptr;
62 |     }
63 |   }
64 | 
65 |   ~MeshData()
66 |   {
67 |     deallocate();
68 |   }
69 | };
70 | 
71 | #endif // _MESHDATA_HPP
72 | 
73 | 


--------------------------------------------------------------------------------
/include/align.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  ******************************************************************************
 3 |  *
 4 |  * \file
 5 |  *
 6 |  * \brief   RAJA header file containing an implementation of std align.
 7 |  *
 8 |  ******************************************************************************
 9 |  */
10 | 
11 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 | // Copyright (c) 2016-18, Lawrence Livermore National Security, LLC.
13 | //
14 | // Produced at the Lawrence Livermore National Laboratory
15 | //
16 | // LLNL-CODE-689114
17 | //
18 | // All rights reserved.
19 | //
20 | // This file is part of RAJA.
21 | //
22 | // For details about use and distribution, please read RAJA/LICENSE.
23 | //
24 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
25 | 
26 | #ifndef COMBRAJA_ALIGN_HPP
27 | #define COMBRAJA_ALIGN_HPP
28 | 
29 | //#include "RAJA/config.hpp"
30 | #include "config.hpp"
31 | 
32 | #define COMBRAJA_INLINE inline
33 | namespace COMBRAJA
34 | {
35 | 
36 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
37 | // Taken from libc++
38 | // See libc++ license in docs/Licenses/libc++ License
39 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
40 | COMBRAJA_INLINE
41 | void* align(size_t alignment, size_t size, void*& ptr, size_t& space)
42 | {
43 |   void* r = nullptr;
44 |   if (size <= space) {
45 |     char* p1 = static_cast<char*>(ptr);
46 |     char* p2 = reinterpret_cast<char*>(
47 |         reinterpret_cast<size_t>(p1 + (alignment - 1)) & -alignment);
48 |     size_t d = static_cast<size_t>(p2 - p1);
49 |     if (d <= space - size) {
50 |       r = p2;
51 |       ptr = r;
52 |       space -= d;
53 |     }
54 |   }
55 |   return r;
56 | }
57 | 
58 | }  // end namespace COMBRAJA
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/include/comm_utils_gdsync.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _UTILS_GDSYNC_HPP
 17 | #define _UTILS_GDSYNC_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #ifdef COMB_ENABLE_GDSYNC
 22 | 
 23 | #include <libgdsync.h>
 24 | 
 25 | #include <cassert>
 26 | #include <cstdio>
 27 | 
 28 | #include "exec_utils.hpp"
 29 | #include "comm_utils_mpi.hpp"
 30 | 
 31 | namespace detail {
 32 | 
 33 | namespace gdsync {
 34 | 
 35 | inline struct ::gdsync* init(MPI_Comm mpi_comm)
 36 | {
 37 |   // LOGPRINTF("gdsync_init rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD));
 38 |   struct ::gdsync* g = gdsync_init(mpi_comm);
 39 |   // LOGPRINTF("gdsync_init rank(w%i) done -> %p\n", MPI::Comm_rank(MPI_COMM_WORLD), g);
 40 |   assert(g != nullptr);
 41 |   return g;
 42 | }
 43 | 
 44 | inline void term(struct ::gdsync* g)
 45 | {
 46 |   // LOGPRINTF("gdsync_term(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD));
 47 |   gdsync_term(g);
 48 | }
 49 | 
 50 | inline void connect_propose(struct ::gdsync* g, int target)
 51 | {
 52 |   // LOGPRINTF("gdsync_connect_propose(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target);
 53 |   gdsync_connect_propose(g, target);
 54 | }
 55 | 
 56 | inline void connect_accept(struct ::gdsync* g, int target)
 57 | {
 58 |   // LOGPRINTF("gdsync_connect_accept(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target);
 59 |   gdsync_connect_accept(g, target);
 60 | }
 61 | 
 62 | inline void disconnect(struct ::gdsync* g, int target)
 63 | {
 64 |   // LOGPRINTF("gdsync_disconnect(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target);
 65 |   gdsync_disconnect(g, target);
 66 | }
 67 | 
 68 | inline struct ::ibv_mr* register_region(struct ::gdsync* g, void* ptr, size_t size)
 69 | {
 70 |   // LOGPRINTF("gdsync_register_region(%p) rank(w%i) %p[%zu]\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size);
 71 |   struct ::ibv_mr* mr = gdsync_register_region(g, ptr, size);
 72 |   // LOGPRINTF("gdsync_register_region(%p) rank(w%i) %p[%zu] done -> %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size, mr);
 73 |   return mr;
 74 | }
 75 | 
 76 | inline void deregister_region(struct ::gdsync* g, struct ::ibv_mr* mr)
 77 | {
 78 |   // LOGPRINTF("gdsync_deregister_region(%p) rank(w%i) %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), mr);
 79 |   gdsync_deregister_region(g, mr);
 80 | }
 81 | 
 82 | inline void cork(struct ::gdsync* g)
 83 | {
 84 |   // LOGPRINTF("gdsync_cork(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD));
 85 |   gdsync_cork(g);
 86 | }
 87 | 
 88 | inline void uncork(struct ::gdsync* g, cudaStream_t stream)
 89 | {
 90 |   // LOGPRINTF("gdsync_uncork(%p) rank(w%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), (void*)stream);
 91 |   gdsync_uncork(g, stream);
 92 | }
 93 | 
 94 | inline void receive(struct ::gdsync* g, int src, struct ::ibv_mr* buf_mr, size_t offset, size_t size)
 95 | {
 96 |   // LOGPRINTF("gdsync_receive(%p) rank(w%i) %p+%zu[%zu] src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, src);
 97 |   gdsync_receive(g, src, buf_mr, offset, size);
 98 | }
 99 | 
100 | inline void stream_wait_recv_complete(struct ::gdsync* g, int src, cudaStream_t stream)
101 | {
102 |   // LOGPRINTF("gdsync_stream_wait_recv_complete(%p) rank(w%i) src(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, (void*)stream);
103 |   gdsync_stream_wait_recv_complete(g, src, stream);
104 | }
105 | 
106 | inline void cpu_ack_recv(struct ::gdsync* g, int src)
107 | {
108 |   // LOGPRINTF("gdsync_cpu_ack_recv(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src);
109 |   gdsync_cpu_ack_recv(g, src);
110 | }
111 | 
112 | inline int is_receive_complete(struct ::gdsync* g, int src)
113 | {
114 |   // LOGPRINTF("gdsync_is_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src);
115 |   int complete = gdsync_is_receive_complete(g, src);
116 |   // LOGPRINTF("gdsync_is_receive_complete(%p) rank(w%i) src(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, complete);
117 |   return complete;
118 | }
119 | 
120 | inline void wait_receive_complete(struct ::gdsync* g, int src)
121 | {
122 |   // LOGPRINTF("gdsync_wait_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src);
123 |   gdsync_wait_receive_complete(g, src);
124 | }
125 | 
126 | inline void stream_send(struct ::gdsync* g, int dest, cudaStream_t stream, struct ::ibv_mr* buf_mr, size_t offset, size_t size)
127 | {
128 |   // LOGPRINTF("gdsync_stream_send(%p) rank(w%i) %p+%zu[%zu] dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest, (void*)stream);
129 |   gdsync_stream_send(g, dest, stream, buf_mr, offset, size);
130 | }
131 | 
132 | inline void isend(struct ::gdsync* g, int dest, struct ::ibv_mr* buf_mr, size_t offset, size_t size)
133 | {
134 |   // LOGPRINTF("gdsync_isend(%p) rank(w%i) %p+%zu[%zu] dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest);
135 |   gdsync_isend(g, dest, buf_mr, offset, size);
136 | }
137 | 
138 | inline void stream_wait_send_complete(struct ::gdsync* g, int dest, cudaStream_t stream)
139 | {
140 |   // LOGPRINTF("gdsync_stream_wait_send_complete(%p) rank(w%i) dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, stream);
141 |   gdsync_stream_wait_send_complete(g, dest, stream);
142 | }
143 | 
144 | inline void cpu_ack_isend(struct ::gdsync* g, int dest)
145 | {
146 |   // LOGPRINTF("gdsync_cpu_ack_isend(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest);
147 |   gdsync_cpu_ack_isend(g, dest);
148 | }
149 | 
150 | inline int is_send_complete(struct ::gdsync* g, int dest)
151 | {
152 |   // LOGPRINTF("gdsync_is_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest);
153 |   int complete = gdsync_is_send_complete(g, dest);
154 |   // LOGPRINTF("gdsync_is_send_complete(%p) rank(w%i) dst(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, complete);
155 |   return complete;
156 | }
157 | 
158 | inline void wait_send_complete(struct ::gdsync* g, int dest)
159 | {
160 |   // LOGPRINTF("gdsync_wait_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest);
161 |   gdsync_wait_send_complete(g, dest);
162 | }
163 | 
164 | } // namespace gdsync
165 | 
166 | } // namespace detail
167 | 
168 | #endif // COMB_ENABLE_GDSYNC
169 | 
170 | #endif // _UTILS_GDSYNC_HPP
171 | 
172 | 


--------------------------------------------------------------------------------
/include/comm_utils_gpump.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _UTILS_GPUMP_HPP
 17 | #define _UTILS_GPUMP_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #ifdef COMB_ENABLE_GPUMP
 22 | 
 23 | #include <libgpump.h>
 24 | 
 25 | #include <cassert>
 26 | #include <cstdio>
 27 | 
 28 | #include "exec_utils.hpp"
 29 | #include "comm_utils_mpi.hpp"
 30 | 
 31 | namespace detail {
 32 | 
 33 | namespace gpump {
 34 | 
 35 | inline struct ::gpump* init(MPI_Comm mpi_comm)
 36 | {
 37 |   // LOGPRINTF("gpump_init rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD));
 38 |   struct ::gpump* g = gpump_init(mpi_comm);
 39 |   // LOGPRINTF("gpump_init rank(w%i) done -> %p\n", MPI::Comm_rank(MPI_COMM_WORLD), g);
 40 |   assert(g != nullptr);
 41 |   return g;
 42 | }
 43 | 
 44 | inline void term(struct ::gpump* g)
 45 | {
 46 |   // LOGPRINTF("gpump_term(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD));
 47 |   gpump_term(g);
 48 | }
 49 | 
 50 | inline void connect_propose(struct ::gpump* g, int target)
 51 | {
 52 |   // LOGPRINTF("gpump_connect_propose(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target);
 53 |   gpump_connect_propose(g, target);
 54 | }
 55 | 
 56 | inline void connect_accept(struct ::gpump* g, int target)
 57 | {
 58 |   // LOGPRINTF("gpump_connect_accept(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target);
 59 |   gpump_connect_accept(g, target);
 60 | }
 61 | 
 62 | inline void disconnect(struct ::gpump* g, int target)
 63 | {
 64 |   // LOGPRINTF("gpump_disconnect(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target);
 65 |   gpump_disconnect(g, target);
 66 | }
 67 | 
 68 | inline struct ::ibv_mr* register_region(struct ::gpump* g, void* ptr, size_t size)
 69 | {
 70 |   // LOGPRINTF("gpump_register_region(%p) rank(w%i) %p[%zu]\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size);
 71 |   struct ::ibv_mr* mr = gpump_register_region(g, ptr, size);
 72 |   // LOGPRINTF("gpump_register_region(%p) rank(w%i) %p[%zu] done -> %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size, mr);
 73 |   return mr;
 74 | }
 75 | 
 76 | inline void deregister_region(struct ::gpump* g, struct ::ibv_mr* mr)
 77 | {
 78 |   // LOGPRINTF("gpump_deregister_region(%p) rank(w%i) %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), mr);
 79 |   gpump_deregister_region(g, mr);
 80 | }
 81 | 
 82 | inline void cork(struct ::gpump* g)
 83 | {
 84 |   // LOGPRINTF("gpump_cork(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD));
 85 |   gpump_cork(g);
 86 | }
 87 | 
 88 | inline void uncork(struct ::gpump* g, cudaStream_t stream)
 89 | {
 90 |   // LOGPRINTF("gpump_uncork(%p) rank(w%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), (void*)stream);
 91 |   gpump_uncork(g, stream);
 92 | }
 93 | 
 94 | inline void receive(struct ::gpump* g, int src, struct ::ibv_mr* buf_mr, size_t offset, size_t size)
 95 | {
 96 |   // LOGPRINTF("gpump_receive(%p) rank(w%i) %p+%zu[%zu] src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, src);
 97 |   gpump_receive(g, src, buf_mr, offset, size);
 98 | }
 99 | 
100 | inline void stream_wait_recv_complete(struct ::gpump* g, int src, cudaStream_t stream)
101 | {
102 |   // LOGPRINTF("gpump_stream_wait_recv_complete(%p) rank(w%i) src(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, (void*)stream);
103 |   gpump_stream_wait_recv_complete(g, src, stream);
104 | }
105 | 
106 | inline void cpu_ack_recv(struct ::gpump* g, int src)
107 | {
108 |   // LOGPRINTF("gpump_cpu_ack_recv(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src);
109 |   gpump_cpu_ack_recv(g, src);
110 | }
111 | 
112 | inline int is_receive_complete(struct ::gpump* g, int src)
113 | {
114 |   // LOGPRINTF("gpump_is_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src);
115 |   int complete = gpump_is_receive_complete(g, src);
116 |   // LOGPRINTF("gpump_is_receive_complete(%p) rank(w%i) src(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, complete);
117 |   return complete;
118 | }
119 | 
120 | inline void wait_receive_complete(struct ::gpump* g, int src)
121 | {
122 |   // LOGPRINTF("gpump_wait_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src);
123 |   gpump_wait_receive_complete(g, src);
124 | }
125 | 
126 | inline void stream_send(struct ::gpump* g, int dest, cudaStream_t stream, struct ::ibv_mr* buf_mr, size_t offset, size_t size)
127 | {
128 |   // LOGPRINTF("gpump_stream_send(%p) rank(w%i) %p+%zu[%zu] dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest, (void*)stream);
129 |   gpump_stream_send(g, dest, stream, buf_mr, offset, size);
130 | }
131 | 
132 | inline void isend(struct ::gpump* g, int dest, struct ::ibv_mr* buf_mr, size_t offset, size_t size)
133 | {
134 |   // LOGPRINTF("gpump_isend(%p) rank(w%i) %p+%zu[%zu] dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest);
135 |   gpump_isend(g, dest, buf_mr, offset, size);
136 | }
137 | 
138 | inline void stream_wait_send_complete(struct ::gpump* g, int dest, cudaStream_t stream)
139 | {
140 |   // LOGPRINTF("gpump_stream_wait_send_complete(%p) rank(w%i) dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, stream);
141 |   gpump_stream_wait_send_complete(g, dest, stream);
142 | }
143 | 
144 | inline void cpu_ack_isend(struct ::gpump* g, int dest)
145 | {
146 |   // LOGPRINTF("gpump_cpu_ack_isend(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest);
147 |   gpump_cpu_ack_isend(g, dest);
148 | }
149 | 
150 | inline int is_send_complete(struct ::gpump* g, int dest)
151 | {
152 |   // LOGPRINTF("gpump_is_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest);
153 |   int complete = gpump_is_send_complete(g, dest);
154 |   // LOGPRINTF("gpump_is_send_complete(%p) rank(w%i) dst(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, complete);
155 |   return complete;
156 | }
157 | 
158 | inline void wait_send_complete(struct ::gpump* g, int dest)
159 | {
160 |   // LOGPRINTF("gpump_wait_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest);
161 |   gpump_wait_send_complete(g, dest);
162 | }
163 | 
164 | } // namespace gpump
165 | 
166 | } // namespace detail
167 | 
168 | #endif // COMB_ENABLE_GPUMP
169 | 
170 | #endif // _UTILS_GPUMP_HPP
171 | 
172 | 


--------------------------------------------------------------------------------
/include/comm_utils_mp.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _UTILS_MP_HPP
 17 | #define _UTILS_MP_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #ifdef COMB_ENABLE_MP
 22 | 
 23 | #include <mp.h>
 24 | 
 25 | #include <cassert>
 26 | #include <cstdio>
 27 | #include <vector>
 28 | #include <numeric>
 29 | 
 30 | #include "exec_utils.hpp"
 31 | #include "exec_utils_cuda.hpp"
 32 | #include "comm_utils_mpi.hpp"
 33 | 
 34 | namespace detail {
 35 | 
 36 | namespace mp {
 37 | 
 38 | inline void init(MPI_Comm mpi_comm)
 39 | {
 40 |   // LOGPRINTF("mp_init rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD));
 41 |   int nranks = MPI::Comm_size(mpi_comm);
 42 |   std::vector<int> ranks(nranks);
 43 |   std::iota(ranks.begin(), ranks.end(), 0);
 44 |   int gpuid = COMB::detail::cuda::get_device();
 45 |   auto ret = mp_init(mpi_comm, ranks.data(), nranks, MP_INIT_DEFAULT, gpuid);
 46 |   // LOGPRINTF("mp_init rank(w%i) done\n", MPI::Comm_rank(MPI_COMM_WORLD));
 47 |   assert(ret == MP_SUCCESS);
 48 | }
 49 | 
 50 | inline void finalize()
 51 | {
 52 |   // LOGPRINTF("mp_finalize() rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD));
 53 |   mp_finalize();
 54 | }
 55 | 
 56 | inline mp_reg_t register_(void* ptr, size_t size)
 57 | {
 58 |   mp_reg_t reg;
 59 |   // LOGPRINTF("mp_register() rank(w%i) %p[%zu]\n", MPI::Comm_rank(MPI_COMM_WORLD), ptr, size);
 60 |   auto ret = mp_register(ptr, size, &reg);
 61 |   // LOGPRINTF("mp_register() rank(w%i) %p[%zu] done -> %p\n", MPI::Comm_rank(MPI_COMM_WORLD), ptr, size, (void*)reg);
 62 |   assert(ret == MP_SUCCESS);
 63 |   return reg;
 64 | }
 65 | 
 66 | inline void deregister(mp_reg_t& reg)
 67 | {
 68 |   // LOGPRINTF("mp_deregister() rank(w%i) %p\n", MPI::Comm_rank(MPI_COMM_WORLD), (void*)reg);
 69 |   auto ret = mp_deregister(&reg);
 70 |   assert(ret == MP_SUCCESS);
 71 |   reg = nullptr;
 72 | }
 73 | 
 74 | inline void irecv(void *buf, size_t size, int src, mp_reg_t* reg, mp_request_t *req)
 75 | {
 76 |   // LOGPRINTF("mp_irecv() rank(w%i) %p[%zu] src(%i) reg(%p) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, src, reg, req);
 77 |   auto ret = mp_irecv(buf, size, src, reg, req);
 78 |   assert(ret == MP_SUCCESS);
 79 | }
 80 | 
 81 | inline void isend(void *buf, size_t size, int src, mp_reg_t* reg, mp_request_t *req)
 82 | {
 83 |   // LOGPRINTF("mp_isend() rank(w%i) %p[%zu] src(%i) reg(%p) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, src, reg, req);
 84 |   auto ret = mp_isend(buf, size, src, reg, req);
 85 |   assert(ret == MP_SUCCESS);
 86 | }
 87 | 
 88 | inline void send_on_stream(void *buf, size_t size, int dst, mp_reg_t* reg, mp_request_t *req, cudaStream_t stream)
 89 | {
 90 |   // LOGPRINTF("mp_send_on_stream() rank(w%i) %p[%zu] dst(%i) reg(%p) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, dst, reg, req, (void*)stream);
 91 |   auto ret = mp_send_on_stream(buf, size, dst, reg, req, stream);
 92 |   assert(ret == MP_SUCCESS);
 93 | }
 94 | 
 95 | inline void isend_on_stream(void *buf, size_t size, int dst, mp_reg_t* reg, mp_request_t *req, cudaStream_t stream)
 96 | {
 97 |   // LOGPRINTF("mp_isend_on_stream() rank(w%i) %p[%zu] dst(%i) reg(%p) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, dst, reg, req, (void*)stream);
 98 |   auto ret = mp_isend_on_stream(buf, size, dst, reg, req, stream);
 99 |   assert(ret == MP_SUCCESS);
100 | }
101 | 
102 | inline void wait_on_stream(mp_request_t *req, cudaStream_t stream)
103 | {
104 |   // LOGPRINTF("mp_wait_on_stream() rank(w%i) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), req, (void*)stream);
105 |   auto ret = mp_wait_on_stream(req, stream);
106 |   assert(ret == MP_SUCCESS);
107 | }
108 | 
109 | inline void wait_all_on_stream(size_t count, mp_request_t *req, cudaStream_t stream)
110 | {
111 |   // LOGPRINTF("mp_wait_all_on_stream() rank(w%i) count (%zu) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), count, req, (void*)stream);
112 |   auto ret = mp_wait_all_on_stream(count, req, stream);
113 |   assert(ret == MP_SUCCESS);
114 | }
115 | 
116 | inline void wait(mp_request_t *req)
117 | {
118 |   // LOGPRINTF("mp_wait() rank(w%i) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), req);
119 |   auto ret = mp_wait(req);
120 |   assert(ret == MP_SUCCESS);
121 | }
122 | 
123 | inline void wait_all(size_t count, mp_request_t *req)
124 | {
125 |   // LOGPRINTF("mp_wait_all() rank(w%i) count (%zu) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), count, req);
126 |   auto ret = mp_wait_all(count, req);
127 |   assert(ret == MP_SUCCESS);
128 | }
129 | 
130 | inline void progress_all(size_t count, mp_request_t *req)
131 | {
132 |   // LOGPRINTF("mp_progress_all() rank(w%i) count (%zu) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), count, req);
133 |   auto ret = mp_progress_all(count, req);
134 |   assert(ret == MP_SUCCESS);
135 | }
136 | 
137 | } // namespace mp
138 | 
139 | } // namespace detail
140 | 
141 | #endif // COMB_ENABLE_MP
142 | 
143 | #endif // _UTILS_MP_HPP
144 | 
145 | 


--------------------------------------------------------------------------------
/include/comm_utils_umr.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _UTILS_UMR_HPP
 17 | #define _UTILS_UMR_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #ifdef COMB_ENABLE_UMR
 22 | 
 23 | #include <cassert>
 24 | #include <cstdio>
 25 | 
 26 | #include <umr.h>
 27 | 
 28 | namespace detail {
 29 | 
 30 | namespace UMR {
 31 | 
 32 | inline int Init_thread(int* argc, char***argv, int required)
 33 | {
 34 |   int provided = required;
 35 |   // LOGPRINTF("UMR_Init_thread\n");
 36 |   int ret = UMR_Init_thread(argc, argv, required, &provided);
 37 |   // LOGPRINTF("UMR_Init_thread done rank(w%i)\n", Comm_rank(UMR_COMM_WORLD));
 38 |   assert(ret == UMR_SUCCESS);
 39 |   //assert(required == provided);
 40 |   return provided;
 41 | }
 42 | 
 43 | inline void Finalize()
 44 | {
 45 |   // LOGPRINTF("UMR_Finalize\n");
 46 |   int ret = UMR_Finalize();
 47 |   assert(ret == UMR_SUCCESS);
 48 | }
 49 | 
 50 | inline void Irecv(void *buf, int count, UMR_Datatype umr_type, int src, int tag, UMR_Comm comm, UMR_Request *request)
 51 | {
 52 |   // LOGPRINTF("UMR_Irecv rank(w%i) %p[%i] src(%i) tag(%i)\n", Comm_rank(UMR_COMM_WORLD), buf, count, src, tag);
 53 |   int ret = UMR_Irecv(buf, count, umr_type, src, tag, comm, request);
 54 |   assert(ret == UMR_SUCCESS);
 55 | }
 56 | 
 57 | inline void Isend(const void *buf, int count, UMR_Datatype umr_type, int dest, int tag, UMR_Comm comm, UMR_Request *request)
 58 | {
 59 |   // LOGPRINTF("UMR_Isend rank(w%i) %p[%i] dst(%i) tag(%i)\n", Comm_rank(UMR_COMM_WORLD), buf, count, dest, tag);
 60 |   int ret = UMR_Isend(buf, count, umr_type, dest, tag, comm, request);
 61 |   assert(ret == UMR_SUCCESS);
 62 | }
 63 | 
 64 | inline void Wait(UMR_Request *request, UMR_Status *status)
 65 | {
 66 |   // LOGPRINTF("UMR_Wait rank(w%i)\n", Comm_rank(UMR_COMM_WORLD));
 67 |   int ret = UMR_Wait(request, status);
 68 |   assert(ret == UMR_SUCCESS);
 69 | }
 70 | 
 71 | inline bool Test(UMR_Request *request, UMR_Status *status)
 72 | {
 73 |   int completed = 0;
 74 |   // LOGPRINTF("UMR_Test rank(w%i)\n", Comm_rank(UMR_COMM_WORLD));
 75 |   int ret = UMR_Test(request, &completed, status);
 76 |   assert(ret == UMR_SUCCESS);
 77 |   return completed;
 78 | }
 79 | 
 80 | inline int Waitany(int count, UMR_Request *requests, UMR_Status *status)
 81 | {
 82 |   int idx = -1;
 83 |   // LOGPRINTF("UMR_Waitany rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count);
 84 |   int ret = UMR_Waitany(count, requests, &idx, status);
 85 |   assert(ret == UMR_SUCCESS);
 86 |   return idx;
 87 | }
 88 | 
 89 | inline int Testany(int count, UMR_Request *requests, UMR_Status *status)
 90 | {
 91 |   int completed = 0;
 92 |   int indx = -1;
 93 |   // LOGPRINTF("UMR_Testany rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count);
 94 |   int ret = UMR_Testany(count, requests, &indx, &completed, status);
 95 |   assert(ret == UMR_SUCCESS);
 96 |   return completed ? indx : -1;
 97 | }
 98 | 
 99 | inline int Waitsome(int incount, UMR_Request *requests, int* indcs, UMR_Status *statuses)
100 | {
101 |   int outcount = 0;
102 |   // LOGPRINTF("UMR_Waitsome rank(w%i) incount(%i)\n", Comm_rank(UMR_COMM_WORLD), incount);
103 |   int ret = UMR_Waitsome(incount, requests, &outcount, indcs, statuses);
104 |   assert(ret == UMR_SUCCESS);
105 |   return outcount;
106 | }
107 | 
108 | inline int Testsome(int incount, UMR_Request *requests, int* indcs, UMR_Status *statuses)
109 | {
110 |   int outcount = 0;
111 |   // LOGPRINTF("UMR_Testsome rank(w%i) incount(%i)\n", Comm_rank(UMR_COMM_WORLD), incount);
112 |   int ret = UMR_Testsome(incount, requests, &outcount, indcs, statuses);
113 |   assert(ret == UMR_SUCCESS);
114 |   return outcount;
115 | }
116 | 
117 | inline void Waitall(int count, UMR_Request *requests, UMR_Status *statuses)
118 | {
119 |   // LOGPRINTF("UMR_Waitall rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count);
120 |   int ret = UMR_Waitall(count, requests, statuses);
121 |   assert(ret == UMR_SUCCESS);
122 | }
123 | 
124 | inline bool Testall(int count, UMR_Request *requests, UMR_Status *statuses)
125 | {
126 |   int completed = 0;
127 |   // LOGPRINTF("UMR_Testall rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count);
128 |   int ret = UMR_Testall(count, requests, &completed, statuses);
129 |   assert(ret == UMR_SUCCESS);
130 |   return completed;
131 | }
132 | 
133 | } // namespace UMR
134 | 
135 | } // namespace detail
136 | 
137 | #endif // COMB_ENABLE_UMR
138 | 
139 | #endif // _UTILS_UMR_HPP
140 | 
141 | 


--------------------------------------------------------------------------------
/include/exec.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _EXEC_HPP
 17 | #define _EXEC_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #include <cstdio>
 22 | #include <cstdlib>
 23 | #include <cassert>
 24 | 
 25 | #include <type_traits>
 26 | 
 27 | #include "exec_utils.hpp"
 28 | #include "memory.hpp"
 29 | #include "ExecContext.hpp"
 30 | 
 31 | #include "exec_fused.hpp"
 32 | 
 33 | #include "exec_pol_seq.hpp"
 34 | #include "exec_pol_omp.hpp"
 35 | #include "exec_pol_cuda.hpp"
 36 | #include "exec_pol_cuda_graph.hpp"
 37 | #include "exec_pol_hip.hpp"
 38 | #include "exec_pol_mpi_type.hpp"
 39 | #include "exec_pol_raja.hpp"
 40 | 
 41 | namespace COMB {
 42 | 
 43 | template < typename my_context_type >
 44 | struct ContextHolder
 45 | {
 46 |   using context_type = my_context_type;
 47 | 
 48 |   bool m_available = false;
 49 | 
 50 |   bool available() const
 51 |   {
 52 |     return m_available;
 53 |   }
 54 | 
 55 |   template < typename ... Ts >
 56 |   void create(Ts&&... args)
 57 |   {
 58 |     destroy();
 59 |     m_context = new context_type(std::forward<Ts>(args)...);
 60 |   }
 61 | 
 62 |   context_type& get()
 63 |   {
 64 |     assert(m_context != nullptr);
 65 |     return *m_context;
 66 |   }
 67 | 
 68 |   void destroy()
 69 |   {
 70 |     if (m_context) {
 71 |       delete m_context;
 72 |       m_context = nullptr;
 73 |     }
 74 |   }
 75 | 
 76 |   ~ContextHolder()
 77 |   {
 78 |     destroy();
 79 |   }
 80 | 
 81 | private:
 82 |   context_type* m_context = nullptr;
 83 | };
 84 | 
 85 | struct Executors
 86 | {
 87 |   Executors()
 88 |   { }
 89 | 
 90 |   Executors(Executors const&) = delete;
 91 |   Executors(Executors &&) = delete;
 92 |   Executors& operator=(Executors const&) = delete;
 93 |   Executors& operator=(Executors &&) = delete;
 94 | 
 95 |   void create_executors(Allocators& alocs)
 96 |   {
 97 |     base_cpu.create();
 98 | #ifdef COMB_ENABLE_MPI
 99 |     base_mpi.create();
100 | #endif
101 | #ifdef COMB_ENABLE_CUDA
102 |     base_cuda.create();
103 | #endif
104 | #ifdef COMB_ENABLE_HIP
105 |     base_hip.create();
106 | #endif
107 | #ifdef COMB_ENABLE_RAJA
108 |     base_raja_cpu.create();
109 | #ifdef COMB_ENABLE_CUDA
110 |     base_raja_cuda.create();
111 | #endif
112 | #ifdef COMB_ENABLE_HIP
113 |     base_raja_hip.create();
114 | #endif
115 | #endif
116 | 
117 |     seq.create(base_cpu.get(), alocs.host.allocator());
118 | #ifdef COMB_ENABLE_OPENMP
119 |     omp.create(base_cpu.get(), alocs.host.allocator());
120 | #endif
121 | #ifdef COMB_ENABLE_CUDA
122 |     cuda.create(base_cuda.get(), (alocs.access.use_device_preferred_for_cuda_util_aloc) ? alocs.cuda_managed_device_preferred_host_accessed.allocator() : alocs.cuda_hostpinned.allocator());
123 | #endif
124 | #ifdef COMB_ENABLE_CUDA_GRAPH
125 |     cuda_graph.create(base_cuda.get(), (alocs.access.use_device_preferred_for_cuda_util_aloc) ? alocs.cuda_managed_device_preferred_host_accessed.allocator() : alocs.cuda_hostpinned.allocator());
126 | #endif
127 | #ifdef COMB_ENABLE_HIP
128 |     hip.create(base_hip.get(), (alocs.access.use_device_for_hip_util_aloc) ? alocs.hip_device.allocator() : alocs.hip_hostpinned_coarse.allocator());
129 | #endif
130 | #ifdef COMB_ENABLE_MPI
131 |     mpi_type.create(base_mpi.get(), alocs.host.allocator());
132 | #endif
133 | #ifdef COMB_ENABLE_RAJA
134 |     raja_seq.create(base_raja_cpu.get(), alocs.host.allocator());
135 | #ifdef COMB_ENABLE_OPENMP
136 |     raja_omp.create(base_raja_cpu.get(), alocs.host.allocator());
137 | #endif
138 | #ifdef COMB_ENABLE_CUDA
139 |     raja_cuda.create(base_raja_cuda.get(), (alocs.access.use_device_preferred_for_cuda_util_aloc) ? alocs.cuda_managed_device_preferred_host_accessed.allocator() : alocs.cuda_hostpinned.allocator());
140 | #endif
141 | #ifdef COMB_ENABLE_HIP
142 |     raja_hip.create(base_raja_hip.get(), (alocs.access.use_device_for_hip_util_aloc) ? alocs.hip_device.allocator() : alocs.hip_hostpinned_coarse.allocator());
143 | #endif
144 | #endif
145 |   }
146 | 
147 |   ContextHolder<CPUContext> base_cpu;
148 | #ifdef COMB_ENABLE_MPI
149 |   ContextHolder<MPIContext> base_mpi;
150 | #endif
151 | #ifdef COMB_ENABLE_CUDA
152 |   ContextHolder<CudaContext> base_cuda;
153 | #endif
154 | #ifdef COMB_ENABLE_HIP
155 |   ContextHolder<HipContext> base_hip;
156 | #endif
157 | #ifdef COMB_ENABLE_RAJA
158 |   ContextHolder<RAJAContext<RAJA::resources::Host>> base_raja_cpu;
159 | #ifdef COMB_ENABLE_CUDA
160 |   ContextHolder<RAJAContext<RAJA::resources::Cuda>> base_raja_cuda;
161 | #endif
162 | #ifdef COMB_ENABLE_HIP
163 |   ContextHolder<RAJAContext<RAJA::resources::Hip>> base_raja_hip;
164 | #endif
165 | #endif
166 | 
167 |   ContextHolder<ExecContext<seq_pol>> seq;
168 | #ifdef COMB_ENABLE_OPENMP
169 |   ContextHolder<ExecContext<omp_pol>> omp;
170 | #endif
171 | #ifdef COMB_ENABLE_CUDA
172 |   ContextHolder<ExecContext<cuda_pol>> cuda;
173 | #ifdef COMB_ENABLE_CUDA_GRAPH
174 |   ContextHolder<ExecContext<cuda_graph_pol>> cuda_graph;
175 | #endif
176 | #endif
177 | #ifdef COMB_ENABLE_HIP
178 |   ContextHolder<ExecContext<hip_pol>> hip;
179 | #endif
180 | #ifdef COMB_ENABLE_MPI
181 |   ContextHolder<ExecContext<mpi_type_pol>> mpi_type;
182 | #endif
183 | #ifdef COMB_ENABLE_RAJA
184 |   ContextHolder<ExecContext<raja_seq_pol>> raja_seq;
185 | #ifdef COMB_ENABLE_OPENMP
186 |   ContextHolder<ExecContext<raja_omp_pol>> raja_omp;
187 | #endif
188 | #ifdef COMB_ENABLE_CUDA
189 |   ContextHolder<ExecContext<raja_cuda_pol>> raja_cuda;
190 | #endif
191 | #ifdef COMB_ENABLE_HIP
192 |   ContextHolder<ExecContext<raja_hip_pol>> raja_hip;
193 | #endif
194 | #endif
195 | };
196 | 
197 | } // namespace COMB
198 | 
199 | #endif // _EXEC_HPP
200 | 


--------------------------------------------------------------------------------
/include/exec_pol_cuda_graph.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _POL_CUDA_GRAPH_HPP
 17 | #define _POL_CUDA_GRAPH_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #include "memory.hpp"
 22 | 
 23 | #ifdef COMB_ENABLE_CUDA_GRAPH
 24 | #include "exec_utils_graph_launch.hpp"
 25 | 
 26 | struct cuda_graph_pol {
 27 |   static const bool async = true;
 28 |   static const char* get_name() { return "cudaGraph"; }
 29 |   using event_type = cuda::graph_launch::event_type;
 30 |   using component_type = cuda::graph_launch::component;
 31 |   using group_type = cuda::graph_launch::group;
 32 | };
 33 | 
 34 | template < >
 35 | struct ExecContext<cuda_graph_pol> : CudaContext
 36 | {
 37 |   using pol = cuda_graph_pol;
 38 |   using event_type = typename pol::event_type;
 39 |   using component_type = typename pol::component_type;
 40 |   using group_type = typename pol::group_type;
 41 | 
 42 |   using base = CudaContext;
 43 | 
 44 |   COMB::Allocator& util_aloc;
 45 | 
 46 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS
 47 |   component_type m_component;
 48 | #endif
 49 | 
 50 | 
 51 |   ExecContext(base const& b, COMB::Allocator& util_aloc_)
 52 |     : base(b)
 53 |     , util_aloc(util_aloc_)
 54 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS
 55 |     , m_component{base(*this)}
 56 | #endif
 57 |   { }
 58 | 
 59 |   void ensure_waitable()
 60 |   {
 61 |     cuda::graph_launch::force_launch(base::stream_launch());
 62 |   }
 63 | 
 64 |   template < typename context >
 65 |   void waitOn(context& con)
 66 |   {
 67 |     con.ensure_waitable();
 68 |     base::waitOn(con);
 69 |   }
 70 | 
 71 |   void synchronize()
 72 |   {
 73 |     cuda::graph_launch::synchronize(base::stream_launch());
 74 |   }
 75 | 
 76 |   group_type create_group()
 77 |   {
 78 |     return cuda::graph_launch::create_group();
 79 |   }
 80 | 
 81 |   void start_group(group_type group)
 82 |   {
 83 |     cuda::graph_launch::set_active_group(group);
 84 |   }
 85 | 
 86 |   void finish_group(group_type)
 87 |   {
 88 |     cuda::graph_launch::force_launch(base::stream_launch());
 89 |   }
 90 | 
 91 |   void destroy_group(group_type group)
 92 |   {
 93 |     cuda::graph_launch::destroy_group(group);
 94 |   }
 95 | 
 96 |   component_type create_component()
 97 |   {
 98 |     return component_type{};
 99 |   }
100 | 
101 |   void start_component(group_type, component_type component)
102 |   {
103 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS
104 |     m_component = component;
105 |     m_component.m_con.waitOn(base(*this));
106 | #endif
107 |   }
108 | 
109 |   void finish_component(group_type, component_type component)
110 |   {
111 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS
112 |     base::waitOn(component.m_con);
113 |     m_component.m_con = base(*this);
114 | #endif
115 |   }
116 | 
117 |   void destroy_component(component_type)
118 |   {
119 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS
120 |     m_component.m_con = base(*this);
121 | #endif
122 |   }
123 | 
124 |   event_type createEvent()
125 |   {
126 |     return cuda::graph_launch::createEvent();
127 |   }
128 | 
129 |   void recordEvent(event_type& event)
130 |   {
131 |     return cuda::graph_launch::recordEvent(event, base::stream());
132 |   }
133 | 
134 |   void finish_component_recordEvent(group_type group, component_type component, event_type& event)
135 |   {
136 |     finish_component(group, component);
137 |     recordEvent(event);
138 |   }
139 | 
140 |   bool queryEvent(event_type& event)
141 |   {
142 |     return cuda::graph_launch::queryEvent(event);
143 |   }
144 | 
145 |   void waitEvent(event_type& event)
146 |   {
147 |     cuda::graph_launch::waitEvent(event);
148 |   }
149 | 
150 |   void destroyEvent(event_type& event)
151 |   {
152 |     cuda::graph_launch::destroyEvent(event);
153 |   }
154 | 
155 |   template < typename body_type >
156 |   void for_all(IdxT len, body_type&& body)
157 |   {
158 |     cuda::graph_launch::for_all(len, std::forward<body_type>(body)
159 | #ifdef COMB_GRAPH_KERNEL_LAUNCH
160 |         , m_component.m_con.stream_launch()
161 | #endif
162 |         );
163 |     // m_component.m_con.synchronize();
164 |   }
165 | 
166 |   template < typename body_type >
167 |   void for_all_2d(IdxT len0, IdxT len1, body_type&& body)
168 |   {
169 |     cuda::graph_launch::for_all_2d(len0, len1, std::forward<body_type>(body)
170 | #ifdef COMB_GRAPH_KERNEL_LAUNCH
171 |         , m_component.m_con.stream_launch()
172 | #endif
173 |         );
174 |     // m_component.m_con.synchronize();
175 |   }
176 | 
177 |   template < typename body_type >
178 |   void for_all_3d(IdxT len0, IdxT len1, IdxT len2, body_type&& body)
179 |   {
180 |     cuda::graph_launch::for_all_3d(len0, len1, len2, std::forward<body_type>(body)
181 | #ifdef COMB_GRAPH_KERNEL_LAUNCH
182 |         , m_component.m_con.stream_launch()
183 | #endif
184 |         );
185 |     // m_component.m_con.synchronize();
186 |   }
187 | 
188 |   template < typename body_type >
189 |   void fused(IdxT len_outer, IdxT len_inner, IdxT len_hint, body_type&& body_in)
190 |   {
191 |     COMB::ignore_unused(len_hint);
192 |     for (IdxT i_outer = 0; i_outer < len_outer; ++i_outer) {
193 |       auto body = body_in;
194 |       body.set_outer(i_outer);
195 |       for (IdxT i_inner = 0; i_inner < len_inner; ++i_inner) {
196 |         body.set_inner(i_inner);
197 |         cuda::graph_launch::for_all(body.len, body
198 | #ifdef COMB_GRAPH_KERNEL_LAUNCH
199 |             , m_component.m_con.stream_launch()
200 | #endif
201 |             );
202 |       }
203 |     }
204 |     // m_component.m_con.synchronize();
205 |   }
206 | 
207 | };
208 | 
209 | #endif // COMB_ENABLE_CUDA_GRAPH
210 | 
211 | #endif // _POL_CUDA_GRAPH_HPP
212 | 


--------------------------------------------------------------------------------
/include/exec_pol_mpi_type.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _POL_MPI_TYPE_HPP
 17 | #define _POL_MPI_TYPE_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | 
 22 | #include "memory.hpp"
 23 | 
 24 | #ifdef COMB_ENABLE_MPI
 25 | 
 26 | struct mpi_type_component
 27 | {
 28 |   void* ptr = nullptr;
 29 | };
 30 | 
 31 | struct mpi_type_group
 32 | {
 33 |   void* ptr = nullptr;
 34 | };
 35 | 
 36 | // execution policy indicating that message packing/unpacking should be done
 37 | // in MPI using MPI_Types
 38 | struct mpi_type_pol {
 39 |   static const bool async = false;
 40 |   static const char* get_name() { return "mpi_type"; }
 41 |   using event_type = int;
 42 |   using component_type = mpi_type_component;
 43 |   using group_type = mpi_type_group;
 44 | };
 45 | 
 46 | template < >
 47 | struct ExecContext<mpi_type_pol> : MPIContext
 48 | {
 49 |   using pol = mpi_type_pol;
 50 |   using event_type = typename pol::event_type;
 51 |   using component_type = typename pol::component_type;
 52 |   using group_type = typename pol::group_type;
 53 | 
 54 |   using base = MPIContext;
 55 | 
 56 |   COMB::Allocator& util_aloc;
 57 | 
 58 | 
 59 |   ExecContext(base const& b, COMB::Allocator& util_aloc_)
 60 |     : base(b)
 61 |     , util_aloc(util_aloc_)
 62 |   { }
 63 | 
 64 |   void ensure_waitable()
 65 |   {
 66 | 
 67 |   }
 68 | 
 69 |   template < typename context >
 70 |   void waitOn(context& con)
 71 |   {
 72 |     con.ensure_waitable();
 73 |     base::waitOn(con);
 74 |   }
 75 | 
 76 |   // synchronization functions
 77 |   void synchronize()
 78 |   {
 79 |   }
 80 | 
 81 |   group_type create_group()
 82 |   {
 83 |     return group_type{};
 84 |   }
 85 | 
 86 |   void start_group(group_type)
 87 |   {
 88 |   }
 89 | 
 90 |   void finish_group(group_type)
 91 |   {
 92 |   }
 93 | 
 94 |   void destroy_group(group_type)
 95 |   {
 96 | 
 97 |   }
 98 | 
 99 |   component_type create_component()
100 |   {
101 |     return component_type{};
102 |   }
103 | 
104 |   void start_component(group_type, component_type)
105 |   {
106 | 
107 |   }
108 | 
109 |   void finish_component(group_type, component_type)
110 |   {
111 | 
112 |   }
113 | 
114 |   void destroy_component(component_type)
115 |   {
116 | 
117 |   }
118 | 
119 |   // event creation functions
120 |   event_type createEvent()
121 |   {
122 |     return event_type{};
123 |   }
124 | 
125 |   // event record functions
126 |   void recordEvent(event_type&)
127 |   {
128 |   }
129 | 
130 |   void finish_component_recordEvent(group_type group, component_type component, event_type& event)
131 |   {
132 |     finish_component(group, component);
133 |     recordEvent(event);
134 |   }
135 | 
136 |   // event query functions
137 |   bool queryEvent(event_type&)
138 |   {
139 |     return true;
140 |   }
141 | 
142 |   // event wait functions
143 |   void waitEvent(event_type&)
144 |   {
145 |   }
146 | 
147 |   // event destroy functions
148 |   void destroyEvent(event_type&)
149 |   {
150 |   }
151 | 
152 |   // template < typename body_type >
153 |   // void for_all(IdxT len, body_type&& body)
154 |   // {
155 |   //   COMB::ignore_unused(pol, len, body);
156 |   //   static_assert(false, "This method should never be used");
157 |   // }
158 | 
159 |   // template < typename body_type >
160 |   // void for_all_2d(IdxT len0, IdxT len1, body_type&& body)
161 |   // {
162 |   //   COMB::ignore_unused(pol, len0, len1, body);
163 |   //   static_assert(false, "This method should never be used");
164 |   // }
165 | 
166 |   // template < typename body_type >
167 |   // void for_all_3d(IdxT len0, IdxT len1, IdxT len2, body_type&& body)
168 |   // {
169 |   //   COMB::ignore_unused(pol, len0, len1, len2, body);
170 |   //   static_assert(false, "This method should never be used");
171 |   // }
172 | 
173 |   // template < typename body_type >
174 |   // void fused(IdxT len_outer, IdxT len_inner, IdxT len_hint, body_type&& body_in)
175 |   // {
176 |   //   COMB::ignore_unused(pol, len_outer, len_inner, body_in);
177 |   //   static_assert(false, "This method should never be used");
178 |   // }
179 | 
180 | };
181 | 
182 | #endif
183 | 
184 | #endif // _POL_MPI_TYPE_HPP
185 | 


--------------------------------------------------------------------------------
/include/exec_pol_seq.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _POL_SEQ_HPP
 17 | #define _POL_SEQ_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #include "exec_utils.hpp"
 22 | #include "memory.hpp"
 23 | 
 24 | struct seq_component
 25 | {
 26 |   void* ptr = nullptr;
 27 | };
 28 | 
 29 | struct seq_group
 30 | {
 31 |   void* ptr = nullptr;
 32 | };
 33 | 
 34 | struct seq_pol {
 35 |   static const bool async = false;
 36 |   static const char* get_name() { return "seq"; }
 37 |   using event_type = int;
 38 |   using component_type = seq_component;
 39 |   using group_type = seq_group;
 40 | };
 41 | 
 42 | template < >
 43 | struct ExecContext<seq_pol> : CPUContext
 44 | {
 45 |   using pol = seq_pol;
 46 |   using event_type = typename pol::event_type;
 47 |   using component_type = typename pol::component_type;
 48 |   using group_type = typename pol::group_type;
 49 | 
 50 |   using base = CPUContext;
 51 | 
 52 |   COMB::Allocator& util_aloc;
 53 | 
 54 | 
 55 |   ExecContext(base const& b, COMB::Allocator& util_aloc_)
 56 |     : base(b)
 57 |     , util_aloc(util_aloc_)
 58 |   { }
 59 | 
 60 |   void ensure_waitable()
 61 |   {
 62 | 
 63 |   }
 64 | 
 65 |   template < typename context >
 66 |   void waitOn(context& con)
 67 |   {
 68 |     con.ensure_waitable();
 69 |     base::waitOn(con);
 70 |   }
 71 | 
 72 |   // synchronization functions
 73 |   void synchronize()
 74 |   {
 75 |   }
 76 | 
 77 |   group_type create_group()
 78 |   {
 79 |     return group_type{};
 80 |   }
 81 | 
 82 |   void start_group(group_type)
 83 |   {
 84 |   }
 85 | 
 86 |   void finish_group(group_type)
 87 |   {
 88 |   }
 89 | 
 90 |   void destroy_group(group_type)
 91 |   {
 92 | 
 93 |   }
 94 | 
 95 |   component_type create_component()
 96 |   {
 97 |     return component_type{};
 98 |   }
 99 | 
100 |   void start_component(group_type, component_type)
101 |   {
102 | 
103 |   }
104 | 
105 |   void finish_component(group_type, component_type)
106 |   {
107 | 
108 |   }
109 | 
110 |   void destroy_component(component_type)
111 |   {
112 | 
113 |   }
114 | 
115 |   // event creation functions
116 |   event_type createEvent()
117 |   {
118 |     return event_type{};
119 |   }
120 | 
121 |   // event record functions
122 |   void recordEvent(event_type&)
123 |   {
124 |   }
125 | 
126 |   void finish_component_recordEvent(group_type group, component_type component, event_type& event)
127 |   {
128 |     finish_component(group, component);
129 |     recordEvent(event);
130 |   }
131 | 
132 |   // event query functions
133 |   bool queryEvent(event_type&)
134 |   {
135 |     return true;
136 |   }
137 | 
138 |   // event wait functions
139 |   void waitEvent(event_type&)
140 |   {
141 |   }
142 | 
143 |   // event destroy functions
144 |   void destroyEvent(event_type&)
145 |   {
146 |   }
147 | 
148 |   // for_all functions
149 |   template < typename body_type >
150 |   void for_all(IdxT len, body_type&& body)
151 |   {
152 |     for(IdxT i0 = 0; i0 < len; ++i0) {
153 |       body(i0);
154 |     }
155 |     // base::synchronize();
156 |   }
157 | 
158 |   template < typename body_type >
159 |   void for_all_2d(IdxT len0, IdxT len1, body_type&& body)
160 |   {
161 |     for(IdxT i0 = 0; i0 < len0; ++i0) {
162 |       for(IdxT i1 = 0; i1 < len1; ++i1) {
163 |         body(i0, i1);
164 |       }
165 |     }
166 |     // base::synchronize();
167 |   }
168 | 
169 |   template < typename body_type >
170 |   void for_all_3d(IdxT len0, IdxT len1, IdxT len2, body_type&& body)
171 |   {
172 |     for(IdxT i0 = 0; i0 < len0; ++i0) {
173 |       for(IdxT i1 = 0; i1 < len1; ++i1) {
174 |         for(IdxT i2 = 0; i2 < len2; ++i2) {
175 |           body(i0, i1, i2);
176 |         }
177 |       }
178 |     }
179 |     // base::synchronize();
180 |   }
181 | 
182 |   template < typename body_type >
183 |   void fused(IdxT len_outer, IdxT len_inner, IdxT len_hint, body_type&& body_in)
184 |   {
185 |     COMB::ignore_unused(len_hint);
186 |     for (IdxT i_outer = 0; i_outer < len_outer; ++i_outer) {
187 |       auto body = body_in;
188 |       body.set_outer(i_outer);
189 |       for (IdxT i_inner = 0; i_inner < len_inner; ++i_inner) {
190 |         body.set_inner(i_inner);
191 |         for (IdxT i = 0; i < body.len; ++i) {
192 |           body(i);
193 |         }
194 |       }
195 |     }
196 |     // base::synchronize();
197 |   }
198 | 
199 | 
200 | };
201 | 
202 | #endif // _POL_SEQ_HPP
203 | 


--------------------------------------------------------------------------------
/include/exec_utils.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _UTILS_HPP
 17 | #define _UTILS_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #include "print.hpp"
 22 | 
 23 | #include <cassert>
 24 | #include <cstdio>
 25 | 
 26 | using IdxT = int;
 27 | using LidxT = int;
 28 | using DataT = double;
 29 | 
 30 | 
 31 | namespace detail {
 32 | 
 33 | // std::exchange
 34 | // taken from https://en.cppreference.com/w/cpp/utility/exchange
 35 | // license http://creativecommons.org/licenses/by-sa/3.0/
 36 | template < typename T, typename U = T >
 37 | T exchange(T& obj, U&& new_value)
 38 | {
 39 |   T old_value = std::move(obj);
 40 |   obj = std::forward<U>(new_value);
 41 |   return old_value;
 42 | }
 43 | 
 44 | template < typename T, typename ... types >
 45 | struct Count;
 46 | 
 47 | template < typename T >
 48 | struct Count<T> {
 49 |   static const size_t value = 0;
 50 | };
 51 | 
 52 | template < typename T, typename ... types >
 53 | struct Count<T, T, types...> {
 54 |   static const size_t value = 1 + Count<T, types...>::value;
 55 | };
 56 | 
 57 | template < typename T, typename T0, typename ... types >
 58 | struct Count<T, T0, types...> {
 59 |   static const size_t value = Count<T, types...>::value;
 60 | };
 61 | 
 62 | struct indexer_kji {
 63 |   IdxT ijlen, ilen;
 64 |   indexer_kji(IdxT ijlen_, IdxT ilen_) : ijlen(ijlen_), ilen(ilen_) {}
 65 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT k, IdxT j, IdxT i) const { return i + j * ilen + k * ijlen; }
 66 | };
 67 | struct indexer_ji {
 68 |   IdxT ilen;
 69 |   indexer_ji(IdxT ilen_) : ilen(ilen_) {}
 70 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT j, IdxT i) const { return i + j * ilen; }
 71 | };
 72 | struct indexer_i {
 73 |   indexer_i() {}
 74 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT i) const { return i; }
 75 | };
 76 | 
 77 | struct indexer_offset_kji {
 78 |   IdxT ijlen, ilen;
 79 |   IdxT imin, jmin, kmin;
 80 |   indexer_offset_kji(IdxT kmin_, IdxT jmin_, IdxT imin_, IdxT ijlen_, IdxT ilen_)
 81 |     : ijlen(ijlen_), ilen(ilen_)
 82 |     , imin(imin_), jmin(jmin_), kmin(kmin_) {}
 83 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT k, IdxT j, IdxT i) const { return (i+imin) + (j+jmin) * ilen + (k+kmin) * ijlen; }
 84 | };
 85 | 
 86 | struct indexer_list_kji {
 87 |   LidxT const* indices;
 88 |   IdxT ijlen, ilen;
 89 |   indexer_list_kji(LidxT const* indices_, IdxT ijlen_, IdxT ilen_) : indices(indices_), ijlen(ijlen_), ilen(ilen_) {}
 90 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT k, IdxT j, IdxT i) const { return indices[i + j * ilen + k * ijlen]; }
 91 | };
 92 | struct indexer_list_ji {
 93 |   LidxT const* indices;
 94 |   IdxT ilen;
 95 |   indexer_list_ji(LidxT const* indices_, IdxT ilen_) : indices(indices_), ilen(ilen_) {}
 96 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT j, IdxT i) const { return indices[i + j * ilen]; }
 97 | };
 98 | struct indexer_list_i {
 99 |   LidxT const* indices;
100 |   indexer_list_i(LidxT const* indices_) : indices(indices_) {}
101 |   COMB_HOST COMB_DEVICE IdxT operator()(IdxT i) const { return indices[i]; }
102 | };
103 | 
104 | template < typename T_src, typename I_src, typename T_dst, typename I_dst >
105 | struct copy_idxr_idxr {
106 |   T_src const* ptr_src;
107 |   T_dst* ptr_dst;
108 |   I_src idxr_src;
109 |   I_dst idxr_dst;
110 |   copy_idxr_idxr(T_src const* const& ptr_src_, I_src const& idxr_src_, T_dst* const& ptr_dst_, I_dst const& idxr_dst_) : ptr_src(ptr_src_), ptr_dst(ptr_dst_), idxr_src(idxr_src_), idxr_dst(idxr_dst_) {}
111 |   template < typename ... Ts >
112 |   COMB_HOST COMB_DEVICE void operator()(Ts... args) const
113 |   {
114 |     IdxT dst_i = idxr_dst(args...);
115 |     IdxT src_i = idxr_src(args...);
116 |     // LOGPRINTF("copy_idxr_idxr %p[%i]{%f} = %p[%i]{%f} (%i)\n",
117 |     //                           ptr_dst, dst_i, (double)ptr_dst[dst_i],
118 |     //                           ptr_src, src_i, (double)ptr_src[src_i], args...);
119 |     ptr_dst[dst_i] = ptr_src[src_i];
120 |   }
121 | };
122 | 
123 | template < typename T_src, typename I_src, typename T_dst, typename I_dst >
124 | copy_idxr_idxr<T_src, I_src, T_dst, I_dst> make_copy_idxr_idxr(T_src* const& ptr_src, I_src const& idxr_src, T_dst* const& ptr_dst, I_dst const& idxr_dst) {
125 |   return copy_idxr_idxr<T_src, I_src, T_dst, I_dst>(ptr_src, idxr_src, ptr_dst, idxr_dst);
126 | }
127 | 
128 | template < typename I_src, typename T_dst, typename I_dst >
129 | struct set_idxr_idxr {
130 |   T_dst* ptr_dst;
131 |   I_src idxr_src;
132 |   I_dst idxr_dst;
133 |   set_idxr_idxr(I_src const& idxr_src_, T_dst* const& ptr_dst_, I_dst const& idxr_dst_)
134 |     : ptr_dst(ptr_dst_)
135 |     , idxr_src(idxr_src_)
136 |     , idxr_dst(idxr_dst_)
137 |   { }
138 |   template < typename ... Ts >
139 |   COMB_HOST COMB_DEVICE void operator()(Ts... args) const
140 |   {
141 |     IdxT dst_i = idxr_dst(args...);
142 |     IdxT src_i = idxr_src(args...);
143 |     // LOGPRINTF("set_idxr_idxr %p[%i]{%f} = %i (%i %i %i)\n",
144 |     //                           ptr_dst, dst_i, (double)ptr_dst[dst_i],
145 |     //                           src_i, args...);
146 |     ptr_dst[dst_i] = src_i;
147 |   }
148 | };
149 | 
150 | template < typename I_src, typename T_dst, typename I_dst >
151 | set_idxr_idxr<I_src, T_dst, I_dst> make_set_idxr_idxr(I_src const& idxr_src, T_dst* const& ptr_dst, I_dst const& idxr_dst) {
152 |   return set_idxr_idxr<I_src, T_dst, I_dst>(idxr_src, ptr_dst, idxr_dst);
153 | }
154 | 
155 | } // namespace detail
156 | 
157 | #endif // _UTILS_HPP
158 | 
159 | 


--------------------------------------------------------------------------------
/include/exec_utils_cuda.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _CUDA_UTILS_HPP
 17 | #define _CUDA_UTILS_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #ifdef COMB_ENABLE_CUDA
 22 | 
 23 | #include <cassert>
 24 | #include <cstdio>
 25 | 
 26 | #include <cuda.h>
 27 | #include <nvToolsExt.h>
 28 | #include <nvToolsExtCuda.h>
 29 | // #include <mpi.h>
 30 | 
 31 | namespace COMB {
 32 | 
 33 | #define cudaCheck(...) ::COMB::cudaCheckError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__)
 34 | 
 35 | inline void cudaCheckError(const char* str, cudaError_t code, const char* file, int line)
 36 | {
 37 |   if (code != cudaSuccess) {
 38 |     fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, cudaGetErrorName(code), cudaGetErrorString(code), file, line); fflush(stderr);
 39 |     assert(0);
 40 |     // MPI_Abort(MPI_COMM_WORLD, 1);
 41 |   }
 42 | }
 43 | 
 44 | #define cudaCheckReady(...) ::COMB::cudaCheckReadyError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__)
 45 | 
 46 | inline bool cudaCheckReadyError(const char* str, cudaError_t code, const char* file, int line)
 47 | {
 48 |   if (code == cudaSuccess) {
 49 |     return true;
 50 |   } else if (code != cudaErrorNotReady) {
 51 |     fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, cudaGetErrorName(code), cudaGetErrorString(code), file, line); fflush(stderr);
 52 |     assert(0);
 53 |     // MPI_Abort(MPI_COMM_WORLD, 1);
 54 |   }
 55 |   return false;
 56 | }
 57 | 
 58 | 
 59 | namespace detail {
 60 | 
 61 | namespace cuda {
 62 | 
 63 | inline int get_device_impl() {
 64 |   int d = -1;
 65 |   cudaCheck(cudaGetDevice(&d));
 66 |   return d;
 67 | }
 68 | 
 69 | inline int get_device() {
 70 |   static int d = get_device_impl();
 71 |   return d;
 72 | }
 73 | 
 74 | inline cudaDeviceProp get_properties_impl() {
 75 |   cudaDeviceProp p;
 76 |   cudaCheck(cudaGetDeviceProperties(&p, get_device()));
 77 |   return p;
 78 | }
 79 | 
 80 | inline cudaDeviceProp get_properties() {
 81 |   static cudaDeviceProp p = get_properties_impl();
 82 |   return p;
 83 | }
 84 | 
 85 | inline int get_concurrent_managed_access() {
 86 |   static int accessible =
 87 | #if defined(CUDART_VERSION) && CUDART_VERSION >= 8000
 88 |     get_properties().concurrentManagedAccess;
 89 | #else
 90 |     false;
 91 | #endif
 92 |   return accessible;
 93 | }
 94 | 
 95 | inline int get_host_accessible_from_device() {
 96 |   static int accessible =
 97 | #if defined(CUDART_VERSION) && CUDART_VERSION >= 9000
 98 |     get_properties().pageableMemoryAccess;
 99 | #else
100 |     false;
101 | #endif
102 |   return accessible;
103 | }
104 | 
105 | inline int get_device_accessible_from_host() {
106 |   static int accessible =
107 |     false;
108 |   return accessible;
109 | }
110 | 
111 | inline int get_num_sm() {
112 |   static int num_sm = get_properties().multiProcessorCount;
113 |   return num_sm;
114 | }
115 | 
116 | inline int get_arch() {
117 |   static int cuda_arch = 100*get_properties().major + 10*get_properties().minor;
118 |   return cuda_arch;
119 | }
120 | 
121 | } // namespace cuda
122 | 
123 | } // namespace detail
124 | 
125 | } // namespace COMB
126 | 
127 | #endif
128 | 
129 | #endif // _CUDA_UTILS_HPP
130 | 


--------------------------------------------------------------------------------
/include/exec_utils_hip.hpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #ifndef _HIP_UTILS_HPP
 17 | #define _HIP_UTILS_HPP
 18 | 
 19 | #include "config.hpp"
 20 | 
 21 | #ifdef COMB_ENABLE_HIP
 22 | 
 23 | #include <cassert>
 24 | #include <cstdio>
 25 | 
 26 | #include <hip/hip_runtime.h>
 27 | #include <roctracer/roctx.h>
 28 | // #include <mpi.h>
 29 | 
 30 | namespace COMB {
 31 | 
 32 | #define hipCheck(...) ::COMB::hipCheckError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__)
 33 | 
 34 | inline void hipCheckError(const char* str, hipError_t code, const char* file, int line)
 35 | {
 36 |   if (code != hipSuccess) {
 37 |     fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, hipGetErrorName(code), hipGetErrorString(code), file, line); fflush(stderr);
 38 |     assert(0);
 39 |     // MPI_Abort(MPI_COMM_WORLD, 1);
 40 |   }
 41 | }
 42 | 
 43 | #define hipCheckReady(...) ::COMB::hipCheckReadyError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__)
 44 | 
 45 | inline bool hipCheckReadyError(const char* str, hipError_t code, const char* file, int line)
 46 | {
 47 |   if (code == hipSuccess) {
 48 |     return true;
 49 |   } else if (code != hipErrorNotReady) {
 50 |     fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, hipGetErrorName(code), hipGetErrorString(code), file, line); fflush(stderr);
 51 |     assert(0);
 52 |     // MPI_Abort(MPI_COMM_WORLD, 1);
 53 |   }
 54 |   return false;
 55 | }
 56 | 
 57 | 
 58 | namespace detail {
 59 | 
 60 | namespace hip {
 61 | 
 62 | inline int get_device_impl() {
 63 |   int d = -1;
 64 |   hipCheck(hipGetDevice(&d));
 65 |   return d;
 66 | }
 67 | 
 68 | inline int get_device() {
 69 |   static int d = get_device_impl();
 70 |   return d;
 71 | }
 72 | 
 73 | inline hipDeviceProp_t get_properties_impl() {
 74 |   hipDeviceProp_t p;
 75 |   hipCheck(hipGetDeviceProperties(&p, get_device()));
 76 |   return p;
 77 | }
 78 | 
 79 | inline hipDeviceProp_t get_properties() {
 80 |   static hipDeviceProp_t p = get_properties_impl();
 81 |   return p;
 82 | }
 83 | 
 84 | inline int get_concurrent_managed_access() {
 85 |   static int accessible =
 86 |     true;
 87 |   return accessible;
 88 | }
 89 | 
 90 | inline int get_host_accessible_from_device() {
 91 |   static int accessible =
 92 |     false;
 93 |   return accessible;
 94 | }
 95 | 
 96 | inline int get_device_accessible_from_host() {
 97 |   static int accessible =
 98 |     true;
 99 |   return accessible;
100 | }
101 | 
102 | inline int get_num_cu() {
103 |   static int num_cu = get_properties().multiProcessorCount;
104 |   return num_cu;
105 | }
106 | 
107 | inline int get_arch() {
108 |   static int hip_arch = 100*get_properties().major + 10*get_properties().minor;
109 |   return hip_arch;
110 | }
111 | 
112 | } // namespace hip
113 | 
114 | } // namespace detail
115 | 
116 | } // namespace COMB
117 | 
118 | #endif
119 | 
120 | #endif // _HIP_UTILS_HPP
121 | 


--------------------------------------------------------------------------------
/include/mutex.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | ******************************************************************************
 3 | *
 4 | * \file
 5 | *
 6 | * \brief   Header file providing functionality similar to std mutex header.
 7 | *
 8 | ******************************************************************************
 9 | */
10 | 
11 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
12 | // Copyright (c) 2016-18, Lawrence Livermore National Security, LLC.
13 | //
14 | // Produced at the Lawrence Livermore National Laboratory
15 | //
16 | // LLNL-CODE-689114
17 | //
18 | // All rights reserved.
19 | //
20 | // This file is part of RAJA.
21 | //
22 | // For details about use and distribution, please read RAJA/LICENSE.
23 | //
24 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
25 | 
26 | #ifndef COMBRAJA_util_mutex_HPP
27 | #define COMBRAJA_util_mutex_HPP
28 | 
29 | //#include "RAJA/config.hpp"
30 | #include "config.hpp"
31 | 
32 | #if defined(COMB_ENABLE_OPENMP)
33 | #include <omp.h>
34 | #endif
35 | 
36 | namespace COMBRAJA
37 | {
38 | 
39 | #if defined(COMB_ENABLE_OPENMP)
40 | namespace omp
41 | {
42 | 
43 | //! class wrapping omp_lock_t with std::mutex interface
44 | class mutex
45 | {
46 | public:
47 |   using native_handle_type = omp_lock_t;
48 | 
49 |   mutex() { omp_init_lock(&m_lock); }
50 | 
51 |   mutex(const mutex&) = delete;
52 |   mutex(mutex&&) = delete;
53 |   mutex& operator=(const mutex&) = delete;
54 |   mutex& operator=(mutex&&) = delete;
55 | 
56 |   void lock() { omp_set_lock(&m_lock); }
57 | 
58 |   bool try_lock() { return omp_test_lock(&m_lock) != 0; }
59 | 
60 |   void unlock() { omp_unset_lock(&m_lock); }
61 | 
62 |   native_handle_type& native_handle() { return m_lock; }
63 | 
64 |   ~mutex() { omp_destroy_lock(&m_lock); }
65 | 
66 | private:
67 |   native_handle_type m_lock;
68 | };
69 | 
70 | }  // namespace omp
71 | #endif  // closing endif for if defined(COMB_ENABLE_OPENMP)
72 | 
73 | //! class providing functionality of std::lock_guard
74 | template <typename mutex_type>
75 | class lock_guard
76 | {
77 | public:
78 |   explicit lock_guard(mutex_type& m) : m_mutex(m) { m_mutex.lock(); }
79 | 
80 |   lock_guard(const lock_guard&) = delete;
81 |   lock_guard(lock_guard&&) = delete;
82 |   lock_guard& operator=(const lock_guard&) = delete;
83 |   lock_guard& operator=(lock_guard&&) = delete;
84 | 
85 |   ~lock_guard() { m_mutex.unlock(); }
86 | 
87 | private:
88 |   mutex_type& m_mutex;
89 | };
90 | 
91 | }  // namespace COMBRAJA
92 | 
93 | #endif  // closing endif for header file include guard
94 | 


--------------------------------------------------------------------------------
/include/print.hpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #ifndef _PRINT_HPP
17 | #define _PRINT_HPP
18 | 
19 | #include "config.hpp"
20 | 
21 | #include <cstdio>
22 | 
23 | 
24 | #define COMB_SERIALIZE_HELPER(a) #a
25 | #define COMB_SERIALIZE(a) COMB_SERIALIZE_HELPER(a)
26 | 
27 | enum struct FileGroup
28 | { out_any     // stdout, any proc
29 | , out_master  // stdout, rank 0 only
30 | , err_any     // stderr, any proc
31 | , err_master  // stderr, rank 0 only
32 | , proc        // per process file, any proc
33 | , summary     // per run summary file, rank 0 only
34 | , summary_csv // per run summary csv file, rank 0 only
35 | , all         // out_master, proc, summary, summary_csv
36 | };
37 | 
38 | extern int   mpi_rank;
39 | extern FILE* comb_out_file;
40 | extern FILE* comb_err_file;
41 | extern FILE* comb_proc_file;
42 | extern FILE* comb_summary_file;
43 | 
44 | extern void comb_setup_files();
45 | extern void comb_teardown_files();
46 | 
47 | extern void fgprintf(FileGroup fg, const char* fmt, ...);
48 | extern void print_proc_memory_stats();
49 | 
50 | #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
51 | #define FFLUSH(f) static_cast<void>(0)
52 | #else
53 | #define FFLUSH(f) fflush(f)
54 | #endif
55 | 
56 |  #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
57 | #define FGPRINTF(fg, ...) printf(__VA_ARGS__)
58 | #else
59 | #define FGPRINTF(fg, ...) fgprintf(fg, __VA_ARGS__)
60 | #endif
61 | 
62 | #ifdef COMB_ENABLE_LOG
63 | #define LOGPRINTF(...) FGPRINTF(FileGroup::proc, __VA_ARGS__)
64 | #else
65 | #define LOGPRINTF(...) do { COMB::ignore_unused(__VA_ARGS__); } while(0)
66 | #endif
67 | 
68 | #endif // _PRINT_HPP
69 | 
70 | 


--------------------------------------------------------------------------------
/scripts/basic_tests.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | nodes=$1
 4 | procs=$2
 5 | procs_per_side=$3
 6 | 
 7 | # Choose a command to run mpi based on the system being used
 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
 9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
10 |       # Command used to run mpi on sierra systems
11 |       run_mpi="lrun -N$nodes -p$procs"
12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
16 |       # Command used to run mpi on EA systems
17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
18 |    else
19 |       # Command used to run mpi on slurm scheduled systems
20 |       run_mpi="srun -N$nodes -n$procs"
21 |    fi
22 | else
23 |    # Command used to run mpi with mpirun
24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
25 |    # Note: you may need to use additional options to get reasonable mpi behavior
26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
28 |    run_mpi="mpirun -np $procs"
29 | 
30 |    # Command used to run mpi with mpiexec
31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
32 |    # run_mpi="mpiexec -n $procs"
33 | fi
34 | 
35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
36 | # Your scheduler may help with this
37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
39 | # Ex:
40 | #   bash:
41 | #     mpirun -np 1 bind_script comb
42 | #   bind_script:
43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
44 | #     exec $@
45 | 
46 | # Comb executable or symlink
47 | run_comb="$(pwd)/comb"
48 | 
49 | # Choose arguments for comb
50 | # elements on one side of the cube for each process
51 | elems_per_procs_per_side=100 # 180
52 | # overall size of the grid
53 | let size=procs_per_side*elems_per_procs_per_side
54 | comb_args="${size}_${size}_${size}"
55 | # divide the grid into a number of procs per side
56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
57 | # set the grid to be periodic in each dimension
58 | comb_args="${comb_args} -periodic 1_1_1"
59 | # set the halo width or number of ghost zones
60 | comb_args="${comb_args} -ghost 1_1_1"
61 | # set number of grid variables
62 | comb_args="${comb_args} -vars 3"
63 | # set number of communication cycles
64 | comb_args="${comb_args} -cycles 25" # 100
65 | # set cutoff between large and small message packing/unpacking kernels
66 | comb_args="${comb_args} -comm cutoff 250"
67 | # set the number of omp threads per process
68 | comb_args="${comb_args} -omp_threads 10"
69 | # enable tests passing cuda device or managed memory to mpi
70 | # comb_args="${comb_args} -cuda_aware_mpi"
71 | # enable basic execution test (disables all others)
72 | comb_args="${comb_args} -basic_only"
73 | 
74 | # set up the base command to run a test
75 | # use sep_out.bash to separate each rank's output
76 | run_test_base="${run_mpi} ${run_comb}"
77 | 
78 | # Run a test with this comm method
79 | echo "${run_test_base} ${comb_args}"
80 | ${run_test_base} ${comb_args}
81 | 
82 | echo "done"
83 | 


--------------------------------------------------------------------------------
/scripts/focused_cuda_graphs_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | # comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # enable cuda graph execution tests
 76 | comb_args="${comb_args} -exec enable cuda_graph"
 77 | # disable host memory tests
 78 | comb_args="${comb_args} -memory disable host"
 79 | # enable cuda managed memory tests
 80 | comb_args="${comb_args} -memory enable cuda_managed"
 81 | # enable mock communication tests
 82 | comb_args="${comb_args} -comm enable mock"
 83 | # enable mpi communication tests
 84 | comb_args="${comb_args} -comm enable mpi"
 85 | 
 86 | # set up arguments for communication method
 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
 88 | 
 89 | # set up the base command to run a test
 90 | # use sep_out.bash to separate each rank's output
 91 | run_test_base="${run_mpi} ${run_comb}"
 92 | 
 93 | # for each communication method
 94 | for comm_method in "${wait_any_method}"; do
 95 | 
 96 |    # Run a test with this comm method
 97 |    echo "${run_test_base} ${comm_method} ${comb_args}"
 98 |    ${run_test_base} ${comm_method} ${comb_args}
 99 | 
100 | done
101 | 
102 | echo "done"
103 | 


--------------------------------------------------------------------------------
/scripts/focused_gdsync_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | # comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # disable host memory tests
 76 | comb_args="${comb_args} -memory disable host"
 77 | # enable cuda managed memory tests
 78 | comb_args="${comb_args} -memory enable cuda_managed"
 79 | # enable mock communication tests
 80 | comb_args="${comb_args} -comm enable mock"
 81 | # enable mpi communication tests
 82 | comb_args="${comb_args} -comm enable mpi"
 83 | # enable gdsync communication tests
 84 | comb_args="${comb_args} -comm enable gdsync"
 85 | 
 86 | # set up arguments for communication method
 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
 88 | 
 89 | # set up the base command to run a test
 90 | # use sep_out.bash to separate each rank's output
 91 | run_test_base="${run_mpi} ${run_comb}"
 92 | 
 93 | # for each communication method
 94 | for comm_method in "${wait_any_method}"; do
 95 | 
 96 |    # Run a test with this comm method
 97 |    echo "${run_test_base} ${comm_method} ${comb_args}"
 98 |    ${run_test_base} ${comm_method} ${comb_args}
 99 | 
100 | done
101 | 
102 | echo "done"
103 | 


--------------------------------------------------------------------------------
/scripts/focused_gpump_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | # comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # disable host memory tests
 76 | comb_args="${comb_args} -memory disable host"
 77 | # enable cuda managed memory tests
 78 | comb_args="${comb_args} -memory enable cuda_managed"
 79 | # enable mock communication tests
 80 | comb_args="${comb_args} -comm enable mock"
 81 | # enable mpi communication tests
 82 | comb_args="${comb_args} -comm enable mpi"
 83 | # enable gpump communication tests
 84 | comb_args="${comb_args} -comm enable gpump"
 85 | 
 86 | # set up arguments for communication method
 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
 88 | 
 89 | # set up the base command to run a test
 90 | # use sep_out.bash to separate each rank's output
 91 | run_test_base="${run_mpi} ${run_comb}"
 92 | 
 93 | # for each communication method
 94 | for comm_method in "${wait_any_method}"; do
 95 | 
 96 |    # Run a test with this comm method
 97 |    echo "${run_test_base} ${comm_method} ${comb_args}"
 98 |    ${run_test_base} ${comm_method} ${comb_args}
 99 | 
100 | done
101 | 
102 | echo "done"
103 | 


--------------------------------------------------------------------------------
/scripts/focused_mp_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | # comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # disable host memory tests
 76 | comb_args="${comb_args} -memory disable host"
 77 | # enable cuda managed memory tests
 78 | comb_args="${comb_args} -memory enable cuda_managed"
 79 | # enable mock communication tests
 80 | comb_args="${comb_args} -comm enable mock"
 81 | # enable mpi communication tests
 82 | comb_args="${comb_args} -comm enable mpi"
 83 | # enable mp communication tests
 84 | comb_args="${comb_args} -comm enable mp"
 85 | 
 86 | # set up arguments for communication method
 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
 88 | 
 89 | # set up the base command to run a test
 90 | # use sep_out.bash to separate each rank's output
 91 | run_test_base="${run_mpi} ${run_comb}"
 92 | 
 93 | # for each communication method
 94 | for comm_method in "${wait_any_method}"; do
 95 | 
 96 |    # Run a test with this comm method
 97 |    echo "${run_test_base} ${comm_method} ${comb_args}"
 98 |    ${run_test_base} ${comm_method} ${comb_args}
 99 | 
100 | done
101 | 
102 | echo "done"
103 | 


--------------------------------------------------------------------------------
/scripts/focused_mpi_type_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # enable cuda execution tests
 76 | comb_args="${comb_args} -exec enable mpi_type"
 77 | # disable host memory tests
 78 | comb_args="${comb_args} -memory disable host"
 79 | # enable cuda managed memory tests
 80 | comb_args="${comb_args} -memory enable cuda_device"
 81 | # enable cuda managed memory tests
 82 | comb_args="${comb_args} -memory enable cuda_managed"
 83 | # enable mock communication tests
 84 | comb_args="${comb_args} -comm enable mock"
 85 | # enable mpi communication tests
 86 | comb_args="${comb_args} -comm enable mpi"
 87 | 
 88 | # set up arguments for communication method
 89 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any"
 90 | 
 91 | # set up the base command to run a test
 92 | # use sep_out.bash to separate each rank's output
 93 | run_test_base="${run_mpi} ${run_comb}"
 94 | 
 95 | # for each communication method
 96 | for comm_method in "${wait_any_method}"; do
 97 | 
 98 |    # Run a test with this comm method
 99 |    echo "${run_test_base} ${comm_method} ${comb_args}"
100 |    ${run_test_base} ${comm_method} ${comb_args}
101 | 
102 | done
103 | 
104 | echo "done"
105 | 


--------------------------------------------------------------------------------
/scripts/focused_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | # comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # disable host memory tests
 76 | comb_args="${comb_args} -memory disable host"
 77 | # enable cuda managed memory tests
 78 | comb_args="${comb_args} -memory enable cuda_managed"
 79 | # enable mock communication tests
 80 | comb_args="${comb_args} -comm enable mock"
 81 | # enable mpi communication tests
 82 | comb_args="${comb_args} -comm enable mpi"
 83 | 
 84 | # set up arguments for communication method
 85 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any"
 86 | 
 87 | # set up the base command to run a test
 88 | # use sep_out.bash to separate each rank's output
 89 | run_test_base="${run_mpi} ${run_comb}"
 90 | 
 91 | # for each communication method
 92 | for comm_method in "${wait_any_method}"; do
 93 | 
 94 |    # Run a test with this comm method
 95 |    echo "${run_test_base} ${comm_method} ${comb_args}"
 96 |    ${run_test_base} ${comm_method} ${comb_args}
 97 | 
 98 | done
 99 | 
100 | echo "done"
101 | 


--------------------------------------------------------------------------------
/scripts/focused_umr_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # Choose a command to run mpi based on the system being used
  8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
  9 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 10 |       # Command used to run mpi on sierra systems
 11 |       run_mpi="lrun -N$nodes -p$procs"
 12 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 13 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 14 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 15 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 16 |       # Command used to run mpi on EA systems
 17 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 18 |    else
 19 |       # Command used to run mpi on slurm scheduled systems
 20 |       run_mpi="srun -N$nodes -n$procs"
 21 |    fi
 22 | else
 23 |    # Command used to run mpi with mpirun
 24 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 25 |    # Note: you may need to use additional options to get reasonable mpi behavior
 26 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 27 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 28 |    run_mpi="mpirun -np $procs"
 29 | 
 30 |    # Command used to run mpi with mpiexec
 31 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 32 |    # run_mpi="mpiexec -n $procs"
 33 | fi
 34 | 
 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 36 | # Your scheduler may help with this
 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 39 | # Ex:
 40 | #   bash:
 41 | #     mpirun -np 1 bind_script comb
 42 | #   bind_script:
 43 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 44 | #     exec $@
 45 | 
 46 | # Comb executable or symlink
 47 | run_comb="$(pwd)/comb"
 48 | 
 49 | # Choose arguments for comb
 50 | # elements on one side of the cube for each process
 51 | elems_per_procs_per_side=100 # 180
 52 | # overall size of the grid
 53 | let size=procs_per_side*elems_per_procs_per_side
 54 | comb_args="${size}_${size}_${size}"
 55 | # divide the grid into a number of procs per side
 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 57 | # set the grid to be periodic in each dimension
 58 | comb_args="${comb_args} -periodic 1_1_1"
 59 | # set the halo width or number of ghost zones
 60 | comb_args="${comb_args} -ghost 1_1_1"
 61 | # set number of grid variables
 62 | comb_args="${comb_args} -vars 3"
 63 | # set number of communication cycles
 64 | comb_args="${comb_args} -cycles 25" # 100
 65 | # set cutoff between large and small message packing/unpacking kernels
 66 | comb_args="${comb_args} -comm cutoff 250"
 67 | # set the number of omp threads per process
 68 | comb_args="${comb_args} -omp_threads 10"
 69 | # enable tests passing cuda device or managed memory to mpi
 70 | # comb_args="${comb_args} -cuda_aware_mpi"
 71 | # disable seq execution tests
 72 | comb_args="${comb_args} -exec disable seq"
 73 | # enable cuda execution tests
 74 | comb_args="${comb_args} -exec enable cuda"
 75 | # disable host memory tests
 76 | comb_args="${comb_args} -memory disable host"
 77 | # enable cuda managed memory tests
 78 | comb_args="${comb_args} -memory enable cuda_managed"
 79 | # enable mock communication tests
 80 | comb_args="${comb_args} -comm enable mock"
 81 | # enable mpi communication tests
 82 | comb_args="${comb_args} -comm enable mpi"
 83 | # enable umr communication tests
 84 | comb_args="${comb_args} -comm enable umr"
 85 | 
 86 | # set up arguments for communication method
 87 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any"
 88 | 
 89 | # set up the base command to run a test
 90 | # use sep_out.bash to separate each rank's output
 91 | run_test_base="${run_mpi} ${run_comb}"
 92 | 
 93 | # for each communication method
 94 | for comm_method in "${wait_any_method}"; do
 95 | 
 96 |    # Run a test with this comm method
 97 |    echo "${run_test_base} ${comm_method} ${comb_args}"
 98 |    ${run_test_base} ${comm_method} ${comb_args}
 99 | 
100 | done
101 | 
102 | echo "done"
103 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_clang.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    blueos_clang.sh 11.0.1"
22 |   echo "  -or - "
23 |   echo "    blueos_clang.sh ibm-10.0.1-gcc-8.3.1"
24 |   exit
25 | fi
26 | 
27 | COMP_VER=$1
28 | shift 1
29 | 
30 | BUILD_SUFFIX=lc_blueos-clang-${COMP_VER}
31 | 
32 | echo
33 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
34 | echo "Configuration extra arguments:"
35 | echo "   $@"
36 | echo
37 | 
38 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
39 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
40 | 
41 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
42 | 
43 | module load cmake/3.14.5
44 | 
45 | cmake \
46 |   -DCMAKE_BUILD_TYPE=Release \
47 |   -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-${COMP_VER}/bin/mpiclang++ \
48 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \
49 |   -C ../host-configs/lc-builds/blueos/clang_X.cmake \
50 |   -DENABLE_MPI=On \
51 |   -DENABLE_OPENMP=On \
52 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
53 |   "$@" \
54 |   ..
55 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    blueos_gcc.sh 8.3.1"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | BUILD_SUFFIX=lc_blueos-gcc-${COMP_VER}
29 | 
30 | echo
31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
32 | echo "Configuration extra arguments:"
33 | echo "   $@"
34 | echo
35 | 
36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
38 | 
39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
40 | 
41 | module load cmake/3.14.5
42 | 
43 | cmake \
44 |   -DCMAKE_BUILD_TYPE=Release \
45 |   -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-${COMP_VER}/bin/mpig++ \
46 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/g++ \
47 |   -C ../host-configs/lc-builds/blueos/gcc_X.cmake \
48 |   -DENABLE_MPI=On \
49 |   -DENABLE_OPENMP=On \
50 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
51 |   "$@" \
52 |   ..
53 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_nvcc_clang.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [[ $# -lt 3 ]]; then
19 |   echo
20 |   echo "You must pass 3 arguments to the script (in this order): "
21 |   echo "   1) compiler version number for nvcc"
22 |   echo "   2) CUDA compute architecture"
23 |   echo "   3) compiler version number for clang. "
24 |   echo
25 |   echo "For example: "
26 |   echo "    blueos_nvcc_clang.sh 10.2.89 sm_70 10.0.1"
27 |   exit
28 | fi
29 | 
30 | COMP_NVCC_VER=$1
31 | COMP_ARCH=$2
32 | COMP_CLANG_VER=$3
33 | shift 3
34 | 
35 | BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-clang${COMP_CLANG_VER}
36 | 
37 | echo
38 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
39 | echo "Configuration extra arguments:"
40 | echo "   $@"
41 | echo
42 | 
43 | rm -rf build_${BUILD_SUFFIX} >/dev/null
44 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
45 | 
46 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
47 | 
48 | module load cmake/3.14.5
49 | 
50 | cmake \
51 |   -DCMAKE_BUILD_TYPE=Release \
52 |   -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-${COMP_CLANG_VER}/bin/mpiclang++ \
53 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_CLANG_VER}/bin/clang++ \
54 |   -DBLT_CXX_STD=c++14 \
55 |   -C ../host-configs/lc-builds/blueos/nvcc_clang_X.cmake \
56 |   -DENABLE_MPI=On \
57 |   -DENABLE_OPENMP=On \
58 |   -DENABLE_CUDA=On \
59 |   -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \
60 |   -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \
61 |   -DCUDA_ARCH=${COMP_ARCH} \
62 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
63 |   "$@" \
64 |   ..
65 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_nvcc_gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [[ $# -lt 3 ]]; then
19 |   echo
20 |   echo "You must pass 3 arguments to the script (in this order): "
21 |   echo "   1) compiler version number for nvcc"
22 |   echo "   2) CUDA compute architecture"
23 |   echo "   3) compiler version number for gcc. "
24 |   echo
25 |   echo "For example: "
26 |   echo "    blueos_nvcc_gcc.sh 10.2.89 sm_70 8.3.1"
27 |   exit
28 | fi
29 | 
30 | COMP_NVCC_VER=$1
31 | COMP_ARCH=$2
32 | COMP_GCC_VER=$3
33 | shift 3
34 | 
35 | BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-gcc${COMP_GCC_VER}
36 | 
37 | echo
38 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
39 | echo "Configuration extra arguments:"
40 | echo "   $@"
41 | echo
42 | 
43 | rm -rf build_${BUILD_SUFFIX} >/dev/null
44 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
45 | 
46 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
47 | 
48 | module load cmake/3.14.5
49 | 
50 | cmake \
51 |   -DCMAKE_BUILD_TYPE=Release \
52 |   -DMPI_CXX_COMPILER=/opt/openmpi/4.0/gnu/bin/mpiCC \
53 |   -DMPI_C_COMPILER=/opt/openmpi/4.0/gnu/bin/mpicc \
54 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_GCC_VER}/bin/g++ \
55 |   -DCMAKE_C_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_GCC_VER}/bin/gcc \
56 |   -DBLT_CXX_STD=c++14 \
57 |   -C ../host-configs/lc-builds/blueos/nvcc_gcc_X.cmake \
58 |   -DENABLE_MPI=On \
59 |   -DENABLE_OPENMP=On \
60 |   -DENABLE_CUDA=On \
61 |   -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \
62 |   -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \
63 |   -DCUDA_ARCH=${COMP_ARCH} \
64 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
65 |   "$@" \
66 |   ..
67 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_nvcc_xl.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [[ $# -lt 3 ]]; then
19 |   echo
20 |   echo "You must pass 3 arguments to the script (in this order): "
21 |   echo "   1) compiler version number for nvcc"
22 |   echo "   2) CUDA compute architecture"
23 |   echo "   3) compiler version number for xl. "
24 |   echo
25 |   echo "For example: "
26 |   echo "    blueos_nvcc_xl.sh 11.1.1 sm_70 2021.03.31"
27 |   exit
28 | fi
29 | 
30 | COMP_NVCC_VER=$1
31 | COMP_ARCH=$2
32 | COMP_XL_VER=$3
33 | shift 3
34 | 
35 | BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-xl${COMP_XL_VER}
36 | 
37 | echo
38 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
39 | echo "Configuration extra arguments:"
40 | echo "   $@"
41 | echo
42 | 
43 | rm -rf build_${BUILD_SUFFIX} >/dev/null
44 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
45 | 
46 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
47 | 
48 | module load cmake/3.14.5
49 | 
50 | cmake \
51 |   -DCMAKE_BUILD_TYPE=Release \
52 |   -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-xl-${COMP_XL_VER}/bin/mpixlc++_r \
53 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/xl/xl-${COMP_XL_VER}/bin/xlc++_r \
54 |   -DBLT_CXX_STD=c++14 \
55 |   -C ../host-configs/lc-builds/blueos/nvcc_xl_X.cmake \
56 |   -DENABLE_MPI=On \
57 |   -DENABLE_OPENMP=On \
58 |   -DENABLE_CUDA=On \
59 |   -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \
60 |   -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \
61 |   -DCUDA_ARCH=${COMP_ARCH} \
62 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
63 |   "$@" \
64 |   ..
65 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_pgi.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    blueos_pgi.sh 21.1"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | BUILD_SUFFIX=lc_blueos-pgi-${COMP_VER}
29 | 
30 | echo
31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
32 | echo "Configuration extra arguments:"
33 | echo "   $@"
34 | echo
35 | 
36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
38 | 
39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
40 | 
41 | module load cmake/3.14.5
42 | 
43 | cmake \
44 |   -DCMAKE_BUILD_TYPE=Release \
45 |   -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-pgi-${COMP_VER}/bin/mpipgc++ \
46 |   -DMPI_C_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-pgi-${COMP_VER}/bin/mpipgcc \
47 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgc++ \
48 |   -DCMAKE_C_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgcc \
49 |   -C ../host-configs/lc-builds/blueos/pgi_X.cmake \
50 |   -DENABLE_MPI=On \
51 |   -DENABLE_OPENMP=On \
52 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
53 |   "$@" \
54 |   ..
55 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/blueos_xl.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    blueos_xl.sh 2021.03.31"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | BUILD_SUFFIX=lc_blueos-xl-${COMP_VER}
29 | 
30 | echo
31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
32 | echo "Configuration extra arguments:"
33 | echo "   $@"
34 | echo
35 | 
36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
38 | 
39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
40 | 
41 | module load cmake/3.14.5
42 | 
43 | cmake \
44 |   -DCMAKE_BUILD_TYPE=Release \
45 |   -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-xl-${COMP_VER}/bin/mpixlc++_r \
46 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/xl/xl-${COMP_VER}/bin/xlc++_r \
47 |   -DBLT_CXX_STD=c++14 \
48 |   -C ../host-configs/lc-builds/blueos/xl_X.cmake \
49 |   -DENABLE_MPI=On \
50 |   -DENABLE_OPENMP=On \
51 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
52 |   "$@" \
53 |   ..
54 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/toss3_clang.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    toss3_clang.sh 10.0.1"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | BUILD_SUFFIX=lc_toss3-clang-${COMP_VER}
29 | 
30 | echo
31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
32 | echo "Configuration extra arguments:"
33 | echo "   $@"
34 | echo
35 | 
36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
38 | 
39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
40 | 
41 | module load cmake/3.14.5
42 | 
43 | cmake \
44 |   -DCMAKE_BUILD_TYPE=Release \
45 |   -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-clang-${COMP_VER}/bin/mpic++ \
46 |   -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-clang-${COMP_VER}/bin/mpicc \
47 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \
48 |   -DCMAKE_C_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang \
49 |   -C ../host-configs/lc-builds/toss3/clang_X.cmake \
50 |   -DENABLE_MPI=On \
51 |   -DENABLE_OPENMP=On \
52 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
53 |   "$@" \
54 |   ..
55 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/toss3_gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    toss3_gcc.sh 8.3.1"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | BUILD_SUFFIX=lc_toss3-gcc-${COMP_VER}
29 | 
30 | echo
31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
32 | echo "Configuration extra arguments:"
33 | echo "   $@"
34 | echo
35 | 
36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
38 | 
39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
40 | 
41 | module load cmake/3.14.5
42 | 
43 | cmake \
44 |   -DCMAKE_BUILD_TYPE=Release \
45 |   -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-gcc-${COMP_VER}/bin/mpic++ \
46 |   -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-gcc-${COMP_VER}/bin/mpicc \
47 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/g++ \
48 |   -DCMAKE_C_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/gcc \
49 |   -C ../host-configs/lc-builds/toss3/gcc_X.cmake \
50 |   -DENABLE_MPI=On \
51 |   -DENABLE_OPENMP=On \
52 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
53 |   "$@" \
54 |   ..
55 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/toss3_hipcc.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ##############################################################################
  4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  5 | ##
  6 | ## Produced at the Lawrence Livermore National Laboratory
  7 | ##
  8 | ## LLNL-CODE-758885
  9 | ##
 10 | ## All rights reserved.
 11 | ##
 12 | ## This file is part of Comb.
 13 | ##
 14 | ## For details, see https://github.com/LLNL/Comb
 15 | ## Please also see the LICENSE file for MIT license.
 16 | ##############################################################################
 17 | 
 18 | if [[ $# -lt 2 ]]; then
 19 |   echo
 20 |   echo "You must pass 2 or more arguments to the script (in this order): "
 21 |   echo "   1) compiler version number"
 22 |   echo "   2) HIP compute architecture"
 23 |   echo "   3...) optional arguments to cmake"
 24 |   echo
 25 |   echo "For example: "
 26 |   echo "    toss3_hipcc.sh 4.1.0 gfx906"
 27 |   echo "    toss3_hipcc.sh 4.1.0 gfx906 -DBLT_CXX_STD=c++11"
 28 |   exit
 29 | fi
 30 | 
 31 | COMP_VER=$1
 32 | COMP_ARCH=$2
 33 | shift 2
 34 | 
 35 | HIP_CLANG_FLAGS="--offload-arch=${COMP_ARCH}"
 36 | HOSTCONFIG="hip_X"
 37 | 
 38 | if [[ ${COMP_VER} == 4.5.* ]]
 39 | then
 40 |   HIP_CLANG_FLAGS="${HIP_CLANG_FLAGS} -mllvm -amdgpu-fixed-function-abi=1"
 41 |   HOSTCONFIG="hip_4_link_X"
 42 | elif [[ ${COMP_VER} == 4.* ]]
 43 | then
 44 |   HOSTCONFIG="hip_4_link_X"
 45 | elif [[ ${COMP_VER} == 3.* ]]
 46 | then
 47 |   HOSTCONFIG="hip_X"
 48 | else
 49 |   echo "Unknown hip version, using ${HOSTCONFIG} host-config"
 50 | fi
 51 | 
 52 | BUILD_SUFFIX=lc_toss3-hipcc-${COMP_VER}-${COMP_ARCH}
 53 | 
 54 | echo
 55 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
 56 | echo "Configuration extra arguments:"
 57 | echo "   $@"
 58 | echo
 59 | 
 60 | rm -rf build_${BUILD_SUFFIX} >/dev/null
 61 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
 62 | 
 63 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
 64 | 
 65 | module load cmake/3.14.5
 66 | 
 67 | # unload rocm to avoid configuration problems where the loaded rocm and COMP_VER
 68 | # are inconsistent causing the rocprim from the module to be used unexpectedly
 69 | module unload rocm
 70 | 
 71 | 
 72 | cmake \
 73 |   -DCMAKE_BUILD_TYPE=Release \
 74 |   -DMPI_CXX_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-8.1.14-rocmcc-${COMP_VER}/bin/mpicxx \
 75 |   -DMPI_C_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-8.1.14-rocmcc-${COMP_VER}/bin/mpicc \
 76 |   -DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \
 77 |   -DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \
 78 |   -DHIP_CLANG_PATH=/opt/rocm-${COMP_VER}/llvm/bin \
 79 |   -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/clang \
 80 |   -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/clang++ \
 81 |   -DHIP_HIPCC_FLAGS=--offload-arch=${COMP_ARCH} \
 82 |   -C "../host-configs/lc-builds/toss3/${HOSTCONFIG}.cmake" \
 83 |   -DENABLE_MPI=On \
 84 |   -DENABLE_HIP=On \
 85 |   -DENABLE_OPENMP=Off \
 86 |   -DENABLE_CUDA=Off \
 87 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
 88 |   "$@" \
 89 |   ..
 90 | 
 91 | echo
 92 | echo "***********************************************************************"
 93 | echo
 94 | echo "cd into directory build_${BUILD_SUFFIX} and run make to build Comb"
 95 | echo
 96 | echo "  Please note that you have to have a consistent build environment"
 97 | echo "  when you make Comb as cmake may reconfigure; unload the rocm module"
 98 | echo "  or load the appropriate rocm module (${COMP_VER}) when building."
 99 | echo
100 | echo "    module unload rocm"
101 | echo "    srun -n1 make"
102 | echo
103 | echo "  Also note that libmodules.so is in the cce install. You may have to"
104 | echo "  add that to your LD_LIBRARY_PATH to run."
105 | echo
106 | echo "    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cray/pe/cce/13.0.2/cce-clang/x86_64/lib:/opt/cray/pe/cce/13.0.2/cce/x86_64/lib"
107 | echo "    srun -n1 ./bin/comb"
108 | echo
109 | echo "***********************************************************************"
110 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/toss3_icpc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    toss3_icpc.sh 19.1.0"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | COMP_MAJOR_VER=${COMP_VER:0:2}
29 | GCC_HEADER_VER=7
30 | USE_TBB=On
31 | 
32 | if [ ${COMP_MAJOR_VER} -gt 18 ]
33 | then
34 |   GCC_HEADER_VER=8
35 | fi
36 | 
37 | if [ ${COMP_MAJOR_VER} -lt 18 ]
38 | then
39 |   USE_TBB=Off
40 | fi
41 | 
42 | BUILD_SUFFIX=lc_toss3-icpc-${COMP_VER}
43 | 
44 | echo
45 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
46 | echo "Configuration extra arguments:"
47 | echo "   $@"
48 | echo
49 | 
50 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
51 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
52 | 
53 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
54 | 
55 | module load cmake/3.14.5
56 | 
57 | cmake \
58 |   -DCMAKE_BUILD_TYPE=Release \
59 |   -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-intel-${COMP_VER}/bin/mpic++ \
60 |   -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-intel-${COMP_VER}/bin/mpicc \
61 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icpc \
62 |   -DCMAKE_C_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icc \
63 |   -DBLT_CXX_STD=c++14 \
64 |   -C ../host-configs/lc-builds/toss3/icpc_X_gcc${GCC_HEADER_VER}headers.cmake \
65 |   -DENABLE_MPI=On \
66 |   -DENABLE_OPENMP=On \
67 |   -DENABLE_TBB=${USE_TBB} \
68 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
69 |   "$@" \
70 |   ..
71 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/toss3_pgi.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | if [ "$1" == "" ]; then
19 |   echo
20 |   echo "You must pass a compiler version number to script. For example,"
21 |   echo "    toss3_pgi.sh 20.1"
22 |   exit
23 | fi
24 | 
25 | COMP_VER=$1
26 | shift 1
27 | 
28 | BUILD_SUFFIX=lc_toss3-pgi-${COMP_VER}
29 | 
30 | echo
31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
32 | echo "Configuration extra arguments:"
33 | echo "   $@"
34 | echo
35 | 
36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
38 | 
39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
40 | 
41 | module load cmake/3.14.5
42 | 
43 | cmake \
44 |   -DCMAKE_BUILD_TYPE=Release \
45 |   -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-pgi-${COMP_VER}/bin/mpic++ \
46 |   -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-pgi-${COMP_VER}/bin/mpicc \
47 |   -DCMAKE_CXX_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgc++ \
48 |   -DCMAKE_C_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgcc \
49 |   -C ../host-configs/lc-builds/toss3/pgi_X.cmake \
50 |   -DENABLE_MPI=On \
51 |   -DENABLE_OPENMP=On \
52 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
53 |   "$@" \
54 |   ..
55 | 


--------------------------------------------------------------------------------
/scripts/lc-builds/toss4_cray-mpich_amdclang.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ##############################################################################
  4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  5 | ##
  6 | ## Produced at the Lawrence Livermore National Laboratory
  7 | ##
  8 | ## LLNL-CODE-758885
  9 | ##
 10 | ## All rights reserved.
 11 | ##
 12 | ## This file is part of Comb.
 13 | ##
 14 | ## For details, see https://github.com/LLNL/Comb
 15 | ## Please also see the LICENSE file for MIT license.
 16 | ##############################################################################
 17 | 
 18 | if [[ $# -lt 2 ]]; then
 19 |   echo
 20 |   echo "You must pass 2 or more arguments to the script (in this order): "
 21 |   echo "   1) cray-mpich compiler version number"
 22 |   echo "   2) compiler version number"
 23 |   echo "   3) HIP compute architecture"
 24 |   echo "   4...) optional arguments to cmake"
 25 |   echo
 26 |   echo "For example: "
 27 |   echo "    toss4_amdclang.sh 8.1.15 5.1.0 gfx906"
 28 |   echo "    toss4_amdclang.sh 8.1.15 5.1.0 gfx906 -DBLT_CXX_STD=c++17"
 29 |   exit
 30 | fi
 31 | 
 32 | MPI_VER=$1
 33 | COMP_VER=$2
 34 | COMP_ARCH=$3
 35 | shift 3
 36 | 
 37 | MY_HIP_ARCH_FLAGS="--offload-arch=${COMP_ARCH}"
 38 | HOSTCONFIG="hip_X"
 39 | 
 40 | if [[ ${COMP_VER} == 4.5.* ]]
 41 | then
 42 | ##HIP_CLANG_FLAGS="${MY_HIP_ARCH_FLAGS} -mllvm -amdgpu-fixed-function-abi=1"
 43 |   HOSTCONFIG="hip_4_link_X"
 44 | elif [[ ${COMP_VER} == 4.* ]]
 45 | then
 46 |   HOSTCONFIG="hip_4_link_X"
 47 | elif [[ ${COMP_VER} == 3.* ]]
 48 | then
 49 |   HOSTCONFIG="hip_X"
 50 | else
 51 |   echo "Unknown hip version, using ${HOSTCONFIG} host-config"
 52 | fi
 53 | 
 54 | BUILD_SUFFIX=lc_toss4-cray-mpich-${MPI_VER}-amdclang-${COMP_VER}-${COMP_ARCH}
 55 | 
 56 | echo
 57 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
 58 | echo "Configuration extra arguments:"
 59 | echo "   $@"
 60 | echo
 61 | 
 62 | rm -rf build_${BUILD_SUFFIX} >/dev/null
 63 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
 64 | 
 65 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd ..
 66 | 
 67 | module load cmake/3.14.5
 68 | 
 69 | # unload rocm to avoid configuration problems where the loaded rocm and COMP_VER
 70 | # are inconsistent causing the rocprim from the module to be used unexpectedly
 71 | module unload rocm
 72 | 
 73 | 
 74 | cmake \
 75 |   -DCMAKE_BUILD_TYPE=Release \
 76 |   -DMPI_CXX_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpicxx \
 77 |   -DMPI_C_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpicc \
 78 |   -DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \
 79 |   -DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \
 80 |   -DHIP_CLANG_PATH=/opt/rocm-${COMP_VER}/llvm/bin \
 81 |   -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang \
 82 |   -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang++ \
 83 |   -DCMAKE_HIP_ARCHITECTURES="${MY_HIP_ARCH_FLAGS}" \
 84 |   -C "../host-configs/lc-builds/toss4/${HOSTCONFIG}.cmake" \
 85 |   -DENABLE_MPI=On \
 86 |   -DENABLE_HIP=On \
 87 |   -DENABLE_OPENMP=Off \
 88 |   -DENABLE_CUDA=Off \
 89 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
 90 |   "$@" \
 91 |   ..
 92 | 
 93 | echo
 94 | echo "***********************************************************************"
 95 | echo
 96 | echo "cd into directory build_${BUILD_SUFFIX} and run make to build Comb"
 97 | echo
 98 | echo "  Please note that you have to have a consistent build environment"
 99 | echo "  when you make Comb as cmake may reconfigure; unload the rocm module"
100 | echo "  or load the appropriate rocm module (${COMP_VER}) when building."
101 | echo
102 | echo "    module unload rocm"
103 | echo "    srun -n1 make"
104 | echo
105 | echo "  Also note that libmodules.so is in the cce install. You may have to"
106 | echo "  add that to your LD_LIBRARY_PATH to run."
107 | echo
108 | echo "    export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/cray/pe/cce/13.0.2/cce-clang/x86_64/lib:/opt/cray/pe/cce/13.0.2/cce/x86_64/lib:/opt/rocm-5.1.0/llvm/lib"
109 | echo "    srun -n1 ./bin/comb"
110 | echo
111 | echo "***********************************************************************"
112 | 


--------------------------------------------------------------------------------
/scripts/mock_cuda_graphs_tests.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script should only be used with comb built without mpi
 4 | 
 5 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 6 | # Your scheduler may help with this
 7 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 8 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 9 | # Ex:
10 | #   bash:
11 | #     mpirun -np 1 bind_script comb
12 | #   bind_script:
13 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
14 | #     exec $@
15 | 
16 | # Comb executable or symlink
17 | run_comb="$(pwd)/comb"
18 | 
19 | if [ ! -x "${run_comb}" ]; then
20 |    echo "comb executable not found at ${run_comb}"
21 |    exit 1
22 | fi
23 | 
24 | # Choose arguments for comb
25 | # elements on one side of the cube for each process
26 | elems_per_procs_per_side=100 # 50 100 200
27 | # overall size of the grid
28 | let size=elems_per_procs_per_side
29 | comb_args="${size}_${size}_${size}"
30 | # divide the grid into a number of procs per side
31 | comb_args="${comb_args} -divide 1_1_1"
32 | # set the grid to be periodic in each dimension
33 | comb_args="${comb_args} -periodic 1_1_1"
34 | # set the halo width or number of ghost zones
35 | comb_args="${comb_args} -ghost 1_1_1"
36 | # set number of grid variables
37 | comb_args="${comb_args} -vars 3"
38 | # set number of communication cycles
39 | comb_args="${comb_args} -cycles 25" # 100
40 | # set cutoff between large and small message packing/unpacking kernels
41 | comb_args="${comb_args} -comm cutoff 250"
42 | # set the number of omp threads per process
43 | comb_args="${comb_args} -omp_threads 1"
44 | # disable seq execution tests
45 | comb_args="${comb_args} -exec disable seq"
46 | # enable cuda execution tests
47 | comb_args="${comb_args} -exec enable cuda"
48 | # enable cuda graph execution tests
49 | comb_args="${comb_args} -exec enable cuda_graph"
50 | # disable host memory tests
51 | comb_args="${comb_args} -memory disable host"
52 | # enable cuda managed memory tests
53 | comb_args="${comb_args} -memory enable cuda_managed"
54 | # enable mock communication tests
55 | comb_args="${comb_args} -comm enable mock"
56 | # disable mpi communication tests
57 | comb_args="${comb_args} -comm disable mpi"
58 | # disable fusing packs per variable per message, pack each boundary separately even those in the same message
59 | comb_args="${comb_args} -comm disallow per_message_pack_fusing"
60 | # disable fusing packs per message group, pack each message separately
61 | # comb_args="${comb_args} -comm disallow message_group_pack_fusing"
62 | # use device preferred memory instead of host pinned memory for device utility allocations, used by fused kernels
63 | # comb_args="${comb_args} -use_device_preferred_for_cuda_util_aloc"
64 | 
65 | 
66 | # set up arguments for communication method
67 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
68 | 
69 | # set up the base command to run a test
70 | # use sep_out.bash to separate each rank's output
71 | run_test_base="${run_comb}"
72 | 
73 | # for each communication method
74 | for comm_method in "${wait_any_method}"; do
75 | 
76 |    # Run a test with this comm method
77 |    echo "${run_test_base} ${comm_method} ${comb_args}"
78 |    ${run_test_base} ${comm_method} ${comb_args}
79 | 
80 | done
81 | 
82 | echo "done"
83 | 


--------------------------------------------------------------------------------
/scripts/mock_tests.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script should only be used with comb built without mpi
 4 | 
 5 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 6 | # Your scheduler may help with this
 7 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 8 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 9 | # Ex:
10 | #   bash:
11 | #     mpirun -np 1 bind_script comb
12 | #   bind_script:
13 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
14 | #     exec $@
15 | 
16 | # Comb executable or symlink
17 | run_comb="$(pwd)/comb"
18 | 
19 | if [ ! -x "${run_comb}" ]; then
20 |    echo "comb executable not found at ${run_comb}"
21 |    exit 1
22 | fi
23 | 
24 | # Choose arguments for comb
25 | # elements on one side of the cube for each process
26 | elems_per_procs_per_side=100 # 50 100 200
27 | # overall size of the grid
28 | let size=elems_per_procs_per_side
29 | comb_args="${size}_${size}_${size}"
30 | # divide the grid into a number of procs per side
31 | comb_args="${comb_args} -divide 1_1_1"
32 | # set the grid to be periodic in each dimension
33 | comb_args="${comb_args} -periodic 1_1_1"
34 | # set the halo width or number of ghost zones
35 | comb_args="${comb_args} -ghost 1_1_1"
36 | # set number of grid variables
37 | comb_args="${comb_args} -vars 3"
38 | # set number of communication cycles
39 | comb_args="${comb_args} -cycles 25" # 100
40 | # set cutoff between large and small message packing/unpacking kernels
41 | comb_args="${comb_args} -comm cutoff 250"
42 | # set the number of omp threads per process
43 | comb_args="${comb_args} -omp_threads 1"
44 | # disable seq execution tests
45 | comb_args="${comb_args} -exec enable seq"
46 | # disable host memory tests
47 | comb_args="${comb_args} -memory enable host"
48 | # enable mock communication tests
49 | comb_args="${comb_args} -comm enable mock"
50 | # disable mpi communication tests
51 | comb_args="${comb_args} -comm disable mpi"
52 | # disable fusing packs per variable per message
53 | comb_args="${comb_args} -comm disallow per_message_pack_fusing"
54 | 
55 | 
56 | # set up arguments for communication method
57 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
58 | 
59 | # set up the base command to run a test
60 | # use sep_out.bash to separate each rank's output
61 | run_test_base="${run_comb}"
62 | 
63 | # for each communication method
64 | for comm_method in "${wait_any_method}"; do
65 | 
66 |    # Run a test with this comm method
67 |    echo "${run_test_base} ${comm_method} ${comb_args}"
68 |    ${run_test_base} ${comm_method} ${comb_args}
69 | 
70 | done
71 | 
72 | echo "done"
73 | 


--------------------------------------------------------------------------------
/scripts/run_nvprof.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # runs nvprof on each rank separately creating a .nvprof output file
 4 | # separates the output of each mpi rank into a different file
 5 | 
 6 | ARGS="$@"
 7 | ARGS_UNDERSCORE="$(sed s/\ /_/g <<<$ARGS)"
 8 | ARGS_UNDERSCORE="$(sed s-/-@-g <<<$ARGS_UNDERSCORE)"
 9 | ARGS_UNDERSCORE="$(echo $ARGS_UNDERSCORE | cut -c -192)"
10 | 
11 | # find the environment variable with the mpi rank of this process
12 | if [[ ! "x" == "x$JSM_NAMESPACE_RANK" ]]; then
13 |    RANK=${JSM_NAMESPACE_RANK}
14 | 	RANK_VAR="JSM_NAMESPACE_RANK"
15 | elif [[ ! "x" == "x$OMPI_COMM_WORLD_RANK" ]]; then
16 |    RANK=${OMPI_COMM_WORLD_RANK}
17 | 	RANK_VAR="OMPI_COMM_WORLD_RANK"
18 | elif [[ ! "x" == "x$MPIRUN_RANK" ]]; then
19 |    RANK=${MPIRUN_RANK}
20 | 	RANK_VAR="MPIRUN_RANK"
21 | else
22 | 	echo "Could not find mpirank" 1>&2
23 | 	exit 1
24 | fi
25 | 
26 | # attempt to find the name of the node this mpi rank is running on
27 | if [[ ! "x" == "x$nodename" ]]; then
28 | 	NODE="$nodename"
29 | 	NODE_VAR="nodename"
30 | elif [[ ! "x" == "x$SLURMD_NODENAME" ]]; then
31 | 	NODE="$SLURMD_NODENAME"
32 | 	NODE_VAR="SLURMD_NODENAME"
33 | elif [[ ! "x" == "x$LCSCHEDCLUSTER" ]]; then
34 | 	NODE="$LCSCHEDCLUSTER"
35 | 	NODE_VAR="LCSCHEDCLUSTER"
36 | fi
37 | 
38 | # create an identifier for this process using its rank
39 | PROC_NAME="${RANK}"
40 | PROC_NAME_VAR="%q{${RANK_VAR}}"
41 | 
42 | # add the nodename to the process identifier if available
43 | if [[ ! "x" == "x$NODE_VAR" ]]; then
44 | 	PROC_NAME="${PROC_NAME}_${NODE}"
45 | 	PROC_NAME_VAR="${PROC_NAME_VAR}_%q{${NODE_VAR}}"
46 | fi
47 | 
48 | # use args and rank to make file name
49 | OUT_FILE_NVPROF="runnvprof.${ARGS_UNDERSCORE}.${PROC_NAME_VAR}"
50 | OUT_FILE="runnvprof.${ARGS_UNDERSCORE}.${PROC_NAME}"
51 | if [ -f "$OUT_FILE" ]; then
52 | 	echo "File already exists $OUT_FILE" 1>&2
53 | 	exit 1
54 | fi
55 | 
56 | # options to pass to nvprof
57 | NVPROF_OPTS="-o ${OUT_FILE_NVPROF}.nvprof"
58 | # NVPROF_OPTS="$NVPROF_OPTS --profile-from-start off"
59 | # NVPROF_OPTS="$NVPROF_OPTS -f"
60 | # NVPROF_OPTS="$NVPROF_OPTS --process-name \"MPI Rank ${PROC_NAME_VAR}\""
61 | # NVPROF_OPTS="$NVPROF_OPTS --system-profiling on"
62 | # NVPROF_OPTS="$NVPROF_OPTS --demangling on"
63 | # NVPROF_OPTS="$NVPROF_OPTS --cpu-profiling off"
64 | # NVPROF_OPTS="$NVPROF_OPTS --unified-memory-profiling per-process-device"
65 | # NVPROF_OPTS="$NVPROF_OPTS --cpu-thread-tracing on"
66 | 
67 | # find nvprof
68 | NVPROF="$(which nvprof)"
69 | if [ ! -f "$NVPROF" ]; then
70 | 	echo "Could not find $NVPROF" 1>&2
71 | 	exit 1
72 | fi
73 | NVPROF="$NVPROF $NVPROF_OPTS"
74 | 
75 | # print the command to be executed for the 0th rank
76 | if [[ "x0" == "x$RANK" ]]; then
77 | 	echo "$NVPROF $ARGS &> ${OUT_FILE}"
78 | fi
79 | # execute nvprof and the executable and redirect its output to a file
80 | exec $NVPROF $ARGS &> "${OUT_FILE}"
81 | 


--------------------------------------------------------------------------------
/scripts/run_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | procs_per_node=-1
  4 | procs_per_side=""
  5 | test_script=""
  6 | 
  7 | positional_arg=0
  8 | 
  9 | ################################################################################
 10 | #
 11 | # Usage:
 12 | #     run_tests.bash [args to run_tests.bash] procs_per_side test_script
 13 | #
 14 | # Parse any args for this script beginning with - and consume them using shift
 15 | # leave the program to profile, if any, and its args
 16 | #
 17 | # Examples:
 18 | #     run_tests.bash 2 focused_tests.bash
 19 | #       # Launch focused_tests.bash with 2x2x2 procs with default procs per node
 20 | #
 21 | #     run_rocprof -gui [optional rocprof profile file]
 22 | #       # run the rocprof gui (only available on x86 machines currently)
 23 | #       #   and optionally view the given profile
 24 | #
 25 | ################################################################################
 26 | while [ "$#" -gt 0 ]; do
 27 | 
 28 |    if [[ "$1" =~ ^\-.* ]]; then
 29 | 
 30 |       if [[ "x$1" == "x-ppn" || "x$1" == "x--procs-per-node" ]]; then
 31 | 
 32 |          if [ "$#" -le 1 ]; then
 33 |             echo "missing argument to $1" 1>&2
 34 |             exit 1
 35 |          fi
 36 | 
 37 |          natural_re='^[0-9]+$'
 38 |          if ! [[ "$2" =~ $natural_re ]]; then
 39 |             echo "invalid arguments $1 $2: argument to $1 must be a number" 1>&2
 40 |             exit 1
 41 |          fi
 42 | 
 43 |          procs_per_node="$2"
 44 |          shift
 45 | 
 46 |       else
 47 | 
 48 |          echo "unknown arg $1" 1>&2
 49 |          exit 1
 50 | 
 51 |       fi
 52 | 
 53 |    else
 54 | 
 55 |       if [[ "x$positional_arg" == "x0" ]]; then
 56 | 
 57 |          procs_per_side="$1"
 58 | 
 59 |       elif [[ "x$positional_arg" == "x1" ]]; then
 60 | 
 61 |          test_script="$1"
 62 | 
 63 |       else
 64 | 
 65 |          echo "Found extra positional arg $1" 1>&2
 66 |          exit 1
 67 | 
 68 |       fi
 69 | 
 70 |       let positional_arg=positional_arg+1
 71 |    fi
 72 | 
 73 |    shift
 74 | 
 75 | done
 76 | 
 77 | if [[ "x" == "x$procs_per_side" ]]; then
 78 |    echo "First positional arg procs_per_side not given" 1>&2
 79 |    exit 1
 80 | fi
 81 | if [[ "x" == "x$test_script" ]]; then
 82 |    echo "Second positional arg test_script not given" 1>&2
 83 |    exit 1
 84 | fi
 85 | 
 86 | let procs=procs_per_side*procs_per_side*procs_per_side
 87 | 
 88 | if [ ! -f  "$test_script" ]; then
 89 |    echo "tests script $test_script not found"
 90 |    exit 1
 91 | fi
 92 | 
 93 | # Choose a command to get nodes
 94 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
 95 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 96 |       # Command used to get nodes on sierra systems
 97 | 
 98 |       if [[ "x-1" == "x$procs_per_node" ]]; then
 99 |          procs_per_node=4
100 |       fi
101 |       let nodes=(procs+procs_per_node-1)/procs_per_node
102 | 
103 |       # get_nodes="bsub -nnodes ${nodes} -core_isolation 2 -W 240 -G guests -Is -XF"
104 |       get_nodes="lalloc ${nodes} -W 240 --shared-launch"
105 | 
106 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
107 |       # Command used to get nodes on EA systems
108 | 
109 |       if [[ "x-1" == "x$procs_per_node" ]]; then
110 |          procs_per_node=4
111 |       fi
112 |       let nodes=(procs+procs_per_node-1)/procs_per_node
113 | 
114 |       get_nodes="bsub -n ${procs} -R \"span[ptile=${procs_per_node}]\" -W 240 -G guests -Is -XF"
115 | 
116 |    elif [[ "x$SYS_TYPE" =~ xtoss_4_x86_64_ib_cray ]]; then
117 |       # Command used to get nodes on ElCap EA systems
118 | 
119 |       if [[ "x-1" == "x$procs_per_node" ]]; then
120 |          procs_per_node=1
121 |       fi
122 |       let nodes=(procs+procs_per_node-1)/procs_per_node
123 | 
124 |       get_nodes="salloc -N${nodes} -t 240 --exclusive"
125 | 
126 |    else
127 |       # Command used to get nodes on slurm scheduled systems
128 | 
129 |       if [[ "x-1" == "x$procs_per_node" ]]; then
130 |          procs_per_node=1
131 |       fi
132 |       let nodes=(procs+procs_per_node-1)/procs_per_node
133 | 
134 |       get_nodes="salloc -N${nodes} -t 240 --exclusive"
135 | 
136 |    fi
137 | else
138 |    # Command used to get nodes on other systems
139 |    if [[ "x-1" == "x$procs_per_node" ]]; then
140 |       procs_per_node=1
141 |    fi
142 |    let nodes=(procs+procs_per_node-1)/procs_per_node
143 | 
144 |    # Don't know how to get nodes, defer to mpi in next script
145 |    get_nodes=""
146 | 
147 | fi
148 | 
149 | run_tests="$test_script $nodes $procs $procs_per_side"
150 | 
151 | full_test="${get_nodes} ${run_tests}"
152 | 
153 | echo "${full_test}"
154 | time ${full_test}
155 | 


--------------------------------------------------------------------------------
/scripts/scale_tests.bash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | nodes=$1
  4 | procs=$2
  5 | procs_per_side=$3
  6 | 
  7 | # extra arguments to comb (always starts with a space or is empty)
  8 | comb_xargs=""
  9 | 
 10 | # Choose a command to run mpi based on the system being used
 11 | if [[ ! "x" == "x$SYS_TYPE" ]]; then
 12 |    if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then
 13 |       # Command used to run mpi on sierra systems
 14 |       run_mpi="lrun -N$nodes -p$procs"
 15 |       # add arguments to turn on cuda aware mpi (optionally disable gpu direct)
 16 |       # run_mpi="${run_mpi} --smpiargs \"-gpu\""
 17 |       # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\""
 18 |       # comb_xargs="${comb_xargs} -cuda_aware_mpi"
 19 |    elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then
 20 |       # Command used to run mpi on EA systems
 21 |       run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind"
 22 |    else
 23 |       # Command used to run mpi on slurm scheduled systems
 24 |       run_mpi="srun -N$nodes -n$procs"
 25 |    fi
 26 | else
 27 |    # Command used to run mpi with mpirun
 28 |    # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php
 29 |    # Note: you may need to use additional options to get reasonable mpi behavior
 30 |    # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
 31 |    # --hostfile my_hosts            https://www.open-mpi.org/faq/?category=running#mpirun-host
 32 |    run_mpi="mpirun -np $procs"
 33 | 
 34 |    # Command used to run mpi with mpiexec
 35 |    # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html
 36 |    # run_mpi="mpiexec -n $procs"
 37 | fi
 38 | 
 39 | # Note: you may need to bind processes to cores to get reasonable openmp behavior
 40 | # Your scheduler may help with this
 41 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads
 42 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/
 43 | # Ex:
 44 | #   bash:
 45 | #     mpirun -np 1 bind_script comb
 46 | #   bind_script:
 47 | #     export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node
 48 | #     exec $@
 49 | 
 50 | # Comb executable or symlink
 51 | run_comb="$(pwd)/comb"
 52 | 
 53 | # Choose arguments for comb
 54 | # overall size of the grid
 55 | let size=procs_per_side*100
 56 | comb_args="${size}_${size}_${size}"
 57 | # divide the grid into a number of procs per side
 58 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}"
 59 | # set the grid to be periodic in each dimension
 60 | comb_args="${comb_args} -periodic 1_1_1"
 61 | # set the halo width or number of ghost zones
 62 | comb_args="${comb_args} -ghost 1_1_1"
 63 | # set number of grid variables
 64 | comb_args="${comb_args} -vars 3"
 65 | # set number of communication cycles
 66 | comb_args="${comb_args} -cycles 100"
 67 | # set cutoff between large and small message packing/unpacking kernels
 68 | comb_args="${comb_args} -comm cutoff 250"
 69 | # set the number of omp threads per process
 70 | comb_args="${comb_args} -omp_threads 10"
 71 | # enable all execution tests
 72 | comb_args="${comb_args} -exec enable all"
 73 | # enable all memory tests
 74 | comb_args="${comb_args} -memory enable all"
 75 | # enable all communication tests
 76 | comb_args="${comb_args} -comm enable all"
 77 | # disable mpi_type execution tests (MPI Packing)
 78 | # comb_args="${comb_args} -exec disable mpi_type"
 79 | 
 80 | # add extra arguments for features enabled outside of the comb args block
 81 | comb_args="${comb_args}${comb_xargs}"
 82 | 
 83 | # set up arguments for a variety of communication methods
 84 | wait_all_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all"
 85 | wait_some_method="-comm post_recv wait_some -comm post_send wait_some -comm wait_recv wait_some -comm wait_send wait_some"
 86 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any"
 87 | 
 88 | test_all_method="-comm post_recv wait_all -comm post_send test_all -comm wait_recv wait_all -comm wait_send wait_all"
 89 | test_some_method="-comm post_recv wait_some -comm post_send test_some -comm wait_recv wait_some -comm wait_send wait_some"
 90 | test_any_method="-comm post_recv wait_any -comm post_send test_any -comm wait_recv wait_any -comm wait_send wait_any"
 91 | 
 92 | # set up the base command to run a test
 93 | # use sep_out.bash to separate each rank's output
 94 | run_test_base="${run_mpi} ${run_comb}"
 95 | 
 96 | # for each communication method
 97 | for comm_method in "${wait_all_method}" "${wait_some_method}" "${wait_any_method}" "${test_all_method}" "${test_some_method}" "${test_any_method}"; do
 98 | 
 99 |    # Run a test with this comm method
100 |    echo "${run_test_base} ${comm_method} ${comb_args}"
101 |    ${run_test_base} ${comm_method} ${comb_args}
102 | 
103 | done
104 | 
105 | echo "done"
106 | 


--------------------------------------------------------------------------------
/scripts/sep_out.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # separates the output of each mpi rank into a different file
 4 | 
 5 | ARGS="$@"
 6 | ARGS_UNDERSCORE="$(sed s/\ /_/g <<<$ARGS)"
 7 | ARGS_UNDERSCORE="$(sed s-/-@-g <<<$ARGS_UNDERSCORE)"
 8 | ARGS_UNDERSCORE="$(echo $ARGS_UNDERSCORE | cut -c -192)"
 9 | 
10 | # attempt to find the environment variable with the mpi rank of this process
11 | if [[ ! "x" == "x$JSM_NAMESPACE_RANK" ]]; then
12 |    RANK=${JSM_NAMESPACE_RANK}
13 | elif [[ ! "x" == "x$SLURM_PROCID" ]]; then
14 |    RANK=${SLURM_PROCID}
15 | elif [[ ! "x" == "x$OMPI_COMM_WORLD_RANK" ]]; then
16 |    RANK=${OMPI_COMM_WORLD_RANK}
17 | elif [[ ! "x" == "x$MPIRUN_RANK" ]]; then
18 |    RANK=${MPIRUN_RANK}
19 | else
20 |    echo "sep_out.bash Could not find mpirank" 1>&2
21 |    exit 1
22 | fi
23 | 
24 | # use args and rank to make file name
25 | OUT_FILE="sepout.${ARGS_UNDERSCORE}.${RANK}"
26 | if [ -f "$OUT_FILE" ]; then
27 |    echo "File already exists $OUT_FILE" 1>&2
28 |    exit 1
29 | fi
30 | 
31 | # print the command to be executed for the 0th rank
32 | if [[ "x0" == "x$RANK" ]]; then
33 |    echo "$ARGS &> $OUT_FILE"
34 | fi
35 | # execute the executable and redirect its output to a file
36 | exec $ARGS &> $OUT_FILE
37 | 


--------------------------------------------------------------------------------
/scripts/ubuntu-builds/ubuntu_clang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | BUILD_SUFFIX=ubuntu-clang
19 | 
20 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
22 | 
23 | # module load cmake/3.14.5
24 | 
25 | cmake \
26 |   -DCMAKE_BUILD_TYPE=Release \
27 |   -DCMAKE_C_COMPILER=/usr/bin/clang \
28 |   -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \
29 |   -C ../host-configs/ubuntu-builds/clang_X.cmake \
30 |   -DENABLE_OPENMP=ON \
31 |   -DENABLE_CUDA=OFF \
32 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
33 |   "$@" \
34 |   ..
35 | 


--------------------------------------------------------------------------------
/scripts/ubuntu-builds/ubuntu_gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | BUILD_SUFFIX=ubuntu-gcc
19 | 
20 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null
21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
22 | 
23 | # module load cmake/3.14.5
24 | 
25 | cmake \
26 |   -DCMAKE_BUILD_TYPE=Release \
27 |   -DCMAKE_C_COMPILER=/usr/bin/gcc \
28 |   -DCMAKE_CXX_COMPILER=/usr/bin/g++ \
29 |   -C ../host-configs/ubuntu-builds/gcc_X.cmake \
30 |   -DENABLE_OPENMP=ON \
31 |   -DENABLE_CUDA=OFF \
32 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
33 |   "$@" \
34 |   ..
35 | 


--------------------------------------------------------------------------------
/scripts/ubuntu-builds/ubuntu_hipcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | BUILD_SUFFIX=ubuntu-hipcc
19 | 
20 | rm -rf build_${BUILD_SUFFIX} >/dev/null
21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
22 | 
23 | cmake \
24 |   -DCMAKE_BUILD_TYPE=Debug \
25 |   -DCMAKE_C_COMPILER=/usr/bin/gcc \
26 |   -DCMAKE_CXX_COMPILER=/usr/bin/g++ \
27 |   -C ../host-configs/ubuntu-builds/hip.cmake \
28 |   -DENABLE_OPENMP=OFF \
29 |   -DENABLE_CUDA=OFF \
30 |   -DENABLE_HIP=ON \
31 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
32 |   "$@" \
33 |   ..
34 | 


--------------------------------------------------------------------------------
/scripts/ubuntu-builds/ubuntu_nvcc10_gcc8.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ##############################################################################
 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 5 | ##
 6 | ## Produced at the Lawrence Livermore National Laboratory
 7 | ##
 8 | ## LLNL-CODE-758885
 9 | ##
10 | ## All rights reserved.
11 | ##
12 | ## This file is part of Comb.
13 | ##
14 | ## For details, see https://github.com/LLNL/Comb
15 | ## Please also see the LICENSE file for MIT license.
16 | ##############################################################################
17 | 
18 | BUILD_SUFFIX=ubuntu-nvcc10-gcc8
19 | 
20 | rm -rf build_${BUILD_SUFFIX} >/dev/null
21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX}
22 | 
23 | cmake \
24 |   -DCMAKE_BUILD_TYPE=Release \
25 |   -DCMAKE_C_COMPILER=/usr/bin/gcc-8 \
26 |   -DCMAKE_CXX_COMPILER=/usr/bin/g++-8 \
27 |   -C ../host-configs/ubuntu-builds/nvcc_gcc_X.cmake \
28 |   -DENABLE_OPENMP=ON \
29 |   -DENABLE_CUDA=ON \
30 |   -DCMAKE_CUDA_COMPILER=/usr/bin/nvcc \
31 |   -DCUDA_ARCH=sm_70 \
32 |   -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
33 |   "$@" \
34 |   ..
35 | 


--------------------------------------------------------------------------------
/src/do_cycles.cpp.in:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #include "comb.hpp"
17 | #include "comm_pol_@COMPOL@.hpp"
18 | #include "do_cycles.hpp"
19 | 
20 | namespace COMB {
21 | 
22 | // instantiate function templates required by do_cycles_allocators.hpp
23 | template void do_cycles<@COMPOL@_pol,
24 |                         ExecContext<@EXECMESH@_pol>,
25 |                         ExecContext<@EXECMANY@_pol>,
26 |                         ExecContext<@EXECFEW@_pol>>(
27 |     CommContext<@COMPOL@_pol>& con_comm_in,
28 |     CommInfo& comm_info, MeshInfo& info,
29 |     IdxT num_vars, IdxT ncycles,
30 |     ContextHolder<ExecContext<@EXECMESH@_pol>>& con_mesh_in, AllocatorInfo& aloc_mesh_in,
31 |     ContextHolder<ExecContext<@EXECMANY@_pol>>& con_many_in, AllocatorInfo& aloc_many_in,
32 |     ContextHolder<ExecContext<@EXECFEW@_pol>>& con_few_in,   AllocatorInfo& aloc_few_in,
33 |     Timer& tm, Timer& tm_total);
34 | 
35 | } // namespace COMB
36 | 


--------------------------------------------------------------------------------
/src/print_timer.cpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #include "config.hpp"
 17 | 
 18 | #include "comb.hpp"
 19 | 
 20 | #include "CommFactory.hpp"
 21 | 
 22 | #include <algorithm>
 23 | 
 24 | namespace COMB {
 25 | 
 26 | namespace detail {
 27 | 
 28 | void print_timer(CommInfo& comminfo, Timer& tm, const char* prefix, int max_name_len, bool print_header_row) {
 29 | 
 30 |   auto res = tm.getStats();
 31 | 
 32 |   double* sums = new double[res.size()];
 33 |   double* mins = new double[res.size()];
 34 |   double* maxs = new double[res.size()];
 35 |   long  * nums = new long  [res.size()];
 36 | 
 37 |   for (int i = 0; i < (int)res.size(); ++i) {
 38 |     sums[i] = res[i].sum;
 39 |     mins[i] = res[i].min;
 40 |     maxs[i] = res[i].max;
 41 |     nums[i] = res[i].num;
 42 |   }
 43 | 
 44 |   double* final_sums = nullptr;
 45 |   double* final_mins = nullptr;
 46 |   double* final_maxs = nullptr;
 47 |   long  * final_nums = nullptr;
 48 |   if (comminfo.rank == 0) {
 49 |     final_sums = new double[res.size()];
 50 |     final_mins = new double[res.size()];
 51 |     final_maxs = new double[res.size()];
 52 |     final_nums = new long  [res.size()];
 53 |   }
 54 | 
 55 | #ifdef COMB_ENABLE_MPI
 56 |   MPI_Reduce(sums, final_sums, res.size(), MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
 57 |   MPI_Reduce(mins, final_mins, res.size(), MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
 58 |   MPI_Reduce(maxs, final_maxs, res.size(), MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
 59 |   MPI_Reduce(nums, final_nums, res.size(), MPI_LONG,   MPI_SUM, 0, MPI_COMM_WORLD);
 60 | #else
 61 |   if (comminfo.rank == 0) {
 62 |     for (int i = 0; i < (int)res.size(); ++i) {
 63 |       final_sums[i] = sums[i];
 64 |       final_mins[i] = mins[i];
 65 |       final_maxs[i] = maxs[i];
 66 |       final_nums[i] = nums[i];
 67 |     }
 68 |   }
 69 | #endif
 70 | 
 71 |   if (comminfo.rank == 0) {
 72 | 
 73 |     for (int i = 0; i < (int)res.size(); ++i) {
 74 |       int padding = max_name_len - res[i].name.size();
 75 |       fgprintf(FileGroup::summary, "%s%s:%*s num %ld avg %.9f s min %.9f s max %.9f s\n",
 76 |                              prefix, res[i].name.c_str(), padding, "", final_nums[i], final_sums[i]/final_nums[i], final_mins[i], final_maxs[i]);
 77 |     }
 78 | 
 79 |     if (!res.empty() && print_header_row) {
 80 |       int padding = max_name_len - 0;
 81 |       fgprintf(FileGroup::summary_csv, "%s%s%*s, %9s, %11s, %11s, %11s\n",
 82 |                                prefix, "", padding, "", "number", "average(s)", "min(s)", "max(s)");
 83 |     }
 84 |     for (int i = 0; i < (int)res.size(); ++i) {
 85 |       int padding = max_name_len - res[i].name.size();
 86 |       fgprintf(FileGroup::summary_csv, "%s%s%*s, %9ld, %.9f, %.9f, %.9f\n",
 87 |                              prefix, res[i].name.c_str(), padding, "", final_nums[i], final_sums[i]/final_nums[i], final_mins[i], final_maxs[i]);
 88 |     }
 89 | 
 90 |     delete[] final_sums;
 91 |     delete[] final_mins;
 92 |     delete[] final_maxs;
 93 |     delete[] final_nums;
 94 |   }
 95 | 
 96 |   for (int i = 0; i < (int)res.size(); ++i) {
 97 |     int padding = max_name_len - res[i].name.size();
 98 |     fgprintf(FileGroup::proc, "%s%s:%*s num %ld avg %.9f s min %.9f s max %.9f s\n",
 99 |                         prefix, res[i].name.c_str(), padding, "", nums[i], sums[i]/nums[i], mins[i], maxs[i]);
100 |   }
101 | 
102 |   delete[] sums;
103 |   delete[] mins;
104 |   delete[] maxs;
105 |   delete[] nums;
106 | }
107 | 
108 | int get_max_name_len(Timer& tm)
109 | {
110 |   int max_name_len = 0;
111 | 
112 |   auto res = tm.getStats();
113 |   for (auto& stat : res) {
114 |     max_name_len = std::max(max_name_len, (int)stat.name.size());
115 |   }
116 | 
117 |   return max_name_len;
118 | }
119 | 
120 | } // namespace detail
121 | 
122 | void print_timer(CommInfo& comminfo, Timer& tm, const char* prefix)
123 | {
124 |   int max_name_len = detail::get_max_name_len(tm);
125 |   constexpr bool print_header_row = true;
126 | 
127 |   detail::print_timer(comminfo, tm, prefix, max_name_len, print_header_row);
128 | }
129 | 
130 | void print_timers(CommInfo& comminfo, Timer& tm0, Timer& tm1, const char* prefix)
131 | {
132 |   int max_name_len = detail::get_max_name_len(tm0);
133 |   max_name_len = std::max(detail::get_max_name_len(tm1), max_name_len);
134 |   constexpr bool print_header_row = true;
135 | 
136 |   detail::print_timer(comminfo, tm0, prefix, max_name_len, print_header_row);
137 |   detail::print_timer(comminfo, tm1, prefix, max_name_len, !print_header_row);
138 | }
139 | 
140 | void print_message_info(CommInfo& comminfo, MeshInfo& info,
141 |                         COMB::Allocator& aloc_unused,
142 |                         IdxT num_vars,
143 |                         bool print_packing_sizes,
144 |                         bool print_message_sizes)
145 | {
146 |   if (!(print_packing_sizes || print_message_sizes)) {
147 |     return;
148 |   }
149 | 
150 |   const char* prefix = "";
151 | 
152 |   if (print_packing_sizes) {
153 |     fgprintf(FileGroup::all, "%sprint message and packing sizes to proc file(s)\n",
154 |         prefix);
155 |   } else if (print_message_sizes) {
156 |     fgprintf(FileGroup::all, "%sprint message sizes to proc file(s)\n",
157 |         prefix);
158 |   }
159 | 
160 |   Range r0("print_message_info", Range::green);
161 | 
162 |   std::vector<MeshData> vars;
163 |   vars.reserve(num_vars);
164 | 
165 |   {
166 |     CommFactory factory(comminfo);
167 | 
168 |     for (IdxT i = 0; i < num_vars; ++i) {
169 | 
170 |       vars.push_back(MeshData(info, aloc_unused));
171 | 
172 |       factory.add_var(vars[i]);
173 |     }
174 | 
175 |     factory.print_message_info(print_packing_sizes, print_message_sizes);
176 |   }
177 | 
178 | }
179 | 
180 | } // namespace COMB
181 | 


--------------------------------------------------------------------------------
/src/test_cycles_gdsync.cpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #include "comb.hpp"
17 | 
18 | #ifdef COMB_ENABLE_GDSYNC
19 | 
20 | #include "comm_pol_gdsync.hpp"
21 | #include "do_cycles_allocators.hpp"
22 | 
23 | namespace COMB {
24 | 
25 | void test_cycles_gdsync(CommInfo& comminfo, MeshInfo& info,
26 |                         COMB::Executors& exec,
27 |                         COMB::Allocators& alloc,
28 |                         IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
29 | {
30 |   CommContext<gdsync_pol> con_comm{exec.base_cuda.get()};
31 | 
32 | #ifdef COMB_ENABLE_CUDA
33 |   AllocatorInfo& cpu_many_aloc = alloc.cuda_device;
34 |   AllocatorInfo& cpu_few_aloc  = alloc.cuda_device;
35 | 
36 |   AllocatorInfo& gpu_many_aloc = alloc.cuda_device;
37 |   AllocatorInfo& gpu_few_aloc  = alloc.cuda_device;
38 | #else
39 |   AllocatorInfo& cpu_many_aloc = alloc.invalid;
40 |   AllocatorInfo& cpu_few_aloc  = alloc.invalid;
41 | 
42 |   AllocatorInfo& gpu_many_aloc = alloc.invalid;
43 |   AllocatorInfo& gpu_few_aloc  = alloc.invalid;
44 | #endif
45 | 
46 |   do_cycles_allocators(con_comm,
47 |                        comminfo, info,
48 |                        exec,
49 |                        alloc,
50 |                        cpu_many_aloc, cpu_few_aloc,
51 |                        gpu_many_aloc, gpu_few_aloc,
52 |                        num_vars, ncycles, tm, tm_total);
53 | 
54 | }
55 | 
56 | } // namespace COMB
57 | 
58 | #endif // COMB_ENABLE_GDSYNC
59 | 


--------------------------------------------------------------------------------
/src/test_cycles_gpump.cpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #include "comb.hpp"
17 | 
18 | #ifdef COMB_ENABLE_GPUMP
19 | 
20 | #include "comm_pol_gpump.hpp"
21 | #include "do_cycles_allocators.hpp"
22 | 
23 | namespace COMB {
24 | 
25 | void test_cycles_gpump(CommInfo& comminfo, MeshInfo& info,
26 |                        COMB::Executors& exec,
27 |                        COMB::Allocators& alloc,
28 |                        IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
29 | {
30 |   CommContext<gpump_pol> con_comm{exec.base_cuda.get()};
31 | 
32 | #ifdef COMB_ENABLE_CUDA
33 |   AllocatorInfo& cpu_many_aloc = alloc.cuda_device;
34 |   AllocatorInfo& cpu_few_aloc  = alloc.cuda_device;
35 | 
36 |   AllocatorInfo& gpu_many_aloc = alloc.cuda_device;
37 |   AllocatorInfo& gpu_few_aloc  = alloc.cuda_device;
38 | #else
39 |   AllocatorInfo& cpu_many_aloc = alloc.invalid;
40 |   AllocatorInfo& cpu_few_aloc  = alloc.invalid;
41 | 
42 |   AllocatorInfo& gpu_many_aloc = alloc.invalid;
43 |   AllocatorInfo& gpu_few_aloc  = alloc.invalid;
44 | #endif
45 | 
46 |   do_cycles_allocators(con_comm,
47 |                        comminfo, info,
48 |                        exec,
49 |                        alloc,
50 |                        cpu_many_aloc, cpu_few_aloc,
51 |                        gpu_many_aloc, gpu_few_aloc,
52 |                        num_vars, ncycles, tm, tm_total);
53 | 
54 | }
55 | 
56 | } // namespace COMB
57 | 
58 | #endif // COMB_ENABLE_GPUMP
59 | 


--------------------------------------------------------------------------------
/src/test_cycles_mock.cpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #include "comb.hpp"
 17 | 
 18 | #include "comm_pol_mock.hpp"
 19 | #include "do_cycles_allocators.hpp"
 20 | 
 21 | namespace COMB {
 22 | 
 23 | void test_cycles_mock(CommInfo& comminfo, MeshInfo& info,
 24 |                       COMB::Executors& exec,
 25 |                       COMB::Allocators& alloc,
 26 |                       IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
 27 | {
 28 | #ifdef COMB_ENABLE_MPI
 29 |   CommContext<mock_pol> con_comm{exec.base_mpi.get()};
 30 | #else
 31 |   CommContext<mock_pol> con_comm{exec.base_cpu.get()};
 32 | #endif
 33 | 
 34 |   {
 35 |     // mock host memory tests
 36 |     AllocatorInfo& cpu_many_aloc = alloc.host;
 37 |     AllocatorInfo& cpu_few_aloc  = alloc.host;
 38 | 
 39 |   #if defined(COMB_ENABLE_CUDA)
 40 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned;
 41 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_hostpinned;
 42 |   #elif defined(COMB_ENABLE_HIP)
 43 |     AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned;
 44 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_hostpinned;
 45 |   #else
 46 |     AllocatorInfo& gpu_many_aloc = alloc.invalid;
 47 |     AllocatorInfo& gpu_few_aloc  = alloc.invalid;
 48 |   #endif
 49 | 
 50 |     do_cycles_allocators(con_comm,
 51 |                          comminfo, info,
 52 |                          exec,
 53 |                          alloc,
 54 |                          cpu_many_aloc, cpu_few_aloc,
 55 |                          gpu_many_aloc, gpu_few_aloc,
 56 |                          num_vars, ncycles, tm, tm_total);
 57 |   }
 58 | 
 59 | #ifdef COMB_ENABLE_CUDA
 60 |   {
 61 |     // mock cuda memory tests
 62 |     AllocatorInfo& cpu_many_aloc = alloc.cuda_device;
 63 |     AllocatorInfo& cpu_few_aloc  = alloc.cuda_device;
 64 | 
 65 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_device;
 66 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_device;
 67 | 
 68 |     do_cycles_allocators(con_comm,
 69 |                          comminfo, info,
 70 |                          exec,
 71 |                          alloc,
 72 |                          cpu_many_aloc, cpu_few_aloc,
 73 |                          gpu_many_aloc, gpu_few_aloc,
 74 |                          num_vars, ncycles, tm, tm_total);
 75 |   }
 76 | #endif
 77 | 
 78 | #ifdef COMB_ENABLE_HIP
 79 |   {
 80 |     // mock hip memory tests
 81 |     AllocatorInfo& cpu_many_aloc = alloc.hip_device;
 82 |     AllocatorInfo& cpu_few_aloc  = alloc.hip_device;
 83 | 
 84 |     AllocatorInfo& gpu_many_aloc = alloc.hip_device;
 85 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_device;
 86 | 
 87 |     do_cycles_allocators(con_comm,
 88 |                          comminfo, info,
 89 |                          exec,
 90 |                          alloc,
 91 |                          cpu_many_aloc, cpu_few_aloc,
 92 |                          gpu_many_aloc, gpu_few_aloc,
 93 |                          num_vars, ncycles, tm, tm_total);
 94 |   }
 95 | #endif
 96 | 
 97 | }
 98 | 
 99 | } // namespace COMB
100 | 


--------------------------------------------------------------------------------
/src/test_cycles_mp.cpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #include "comb.hpp"
17 | 
18 | #ifdef COMB_ENABLE_MP
19 | 
20 | #include "comm_pol_mp.hpp"
21 | #include "do_cycles_allocators.hpp"
22 | 
23 | namespace COMB {
24 | 
25 | void test_cycles_mp(CommInfo& comminfo, MeshInfo& info,
26 |                        COMB::Executors& exec,
27 |                        COMB::Allocators& alloc,
28 |                        IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
29 | {
30 |   CommContext<mp_pol> con_comm{exec.base_cuda.get()};
31 | 
32 | #ifdef COMB_ENABLE_CUDA
33 |   AllocatorInfo& cpu_many_aloc = alloc.cuda_device;
34 |   AllocatorInfo& cpu_few_aloc  = alloc.cuda_device;
35 | 
36 |   AllocatorInfo& gpu_many_aloc = alloc.cuda_device;
37 |   AllocatorInfo& gpu_few_aloc  = alloc.cuda_device;
38 | #else
39 |   AllocatorInfo& cpu_many_aloc = alloc.invalid;
40 |   AllocatorInfo& cpu_few_aloc  = alloc.invalid;
41 | 
42 |   AllocatorInfo& gpu_many_aloc = alloc.invalid;
43 |   AllocatorInfo& gpu_few_aloc  = alloc.invalid;
44 | #endif
45 | 
46 |   do_cycles_allocators(con_comm,
47 |                        comminfo, info,
48 |                        exec,
49 |                        alloc,
50 |                        cpu_many_aloc, cpu_few_aloc,
51 |                        gpu_many_aloc, gpu_few_aloc,
52 |                        num_vars, ncycles, tm, tm_total);
53 | 
54 | }
55 | 
56 | } // namespace COMB
57 | 
58 | #endif // COMB_ENABLE_MP
59 | 


--------------------------------------------------------------------------------
/src/test_cycles_mpi.cpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #include "comb.hpp"
 17 | 
 18 | #ifdef COMB_ENABLE_MPI
 19 | 
 20 | #include "comm_pol_mpi.hpp"
 21 | #include "do_cycles_allocators.hpp"
 22 | 
 23 | namespace COMB {
 24 | 
 25 | void test_cycles_mpi(CommInfo& comminfo, MeshInfo& info,
 26 |                      COMB::Executors& exec,
 27 |                      COMB::Allocators& alloc,
 28 |                      IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
 29 | {
 30 |   CommContext<mpi_pol> con_comm{exec.base_mpi.get()};
 31 | 
 32 |   {
 33 |     // mpi host memory tests
 34 |     AllocatorInfo& cpu_many_aloc = alloc.host;
 35 |     AllocatorInfo& cpu_few_aloc  = alloc.host;
 36 | 
 37 | #if defined(COMB_ENABLE_CUDA)
 38 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned;
 39 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_hostpinned;
 40 | #elif defined(COMB_ENABLE_HIP)
 41 |     AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned;
 42 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_hostpinned;
 43 | #else
 44 |     AllocatorInfo& gpu_many_aloc = alloc.invalid;
 45 |     AllocatorInfo& gpu_few_aloc  = alloc.invalid;
 46 | #endif
 47 | 
 48 |     do_cycles_allocators(con_comm,
 49 |                          comminfo, info,
 50 |                          exec,
 51 |                          alloc,
 52 |                          cpu_many_aloc, cpu_few_aloc,
 53 |                          gpu_many_aloc, gpu_few_aloc,
 54 |                          num_vars, ncycles, tm, tm_total);
 55 |   }
 56 | 
 57 | #ifdef COMB_ENABLE_CUDA
 58 |   {
 59 |     // mpi cuda memory tests
 60 |     AllocatorInfo& cpu_many_aloc = alloc.cuda_device;
 61 |     AllocatorInfo& cpu_few_aloc  = alloc.cuda_device;
 62 | 
 63 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_device;
 64 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_device;
 65 | 
 66 |     do_cycles_allocators(con_comm,
 67 |                          comminfo, info,
 68 |                          exec,
 69 |                          alloc,
 70 |                          cpu_many_aloc, cpu_few_aloc,
 71 |                          gpu_many_aloc, gpu_few_aloc,
 72 |                          num_vars, ncycles, tm, tm_total);
 73 |   }
 74 | #endif
 75 | 
 76 | #ifdef COMB_ENABLE_HIP
 77 |   {
 78 |     // mpi hip memory tests
 79 |     AllocatorInfo& cpu_many_aloc = alloc.hip_device;
 80 |     AllocatorInfo& cpu_few_aloc  = alloc.hip_device;
 81 | 
 82 |     AllocatorInfo& gpu_many_aloc = alloc.hip_device;
 83 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_device;
 84 | 
 85 |     do_cycles_allocators(con_comm,
 86 |                          comminfo, info,
 87 |                          exec,
 88 |                          alloc,
 89 |                          cpu_many_aloc, cpu_few_aloc,
 90 |                          gpu_many_aloc, gpu_few_aloc,
 91 |                          num_vars, ncycles, tm, tm_total);
 92 |   }
 93 | #endif
 94 | 
 95 | }
 96 | 
 97 | } // namespace COMB
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/src/test_cycles_mpi_persistent.cpp:
--------------------------------------------------------------------------------
  1 | //////////////////////////////////////////////////////////////////////////////
  2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
  3 | //
  4 | // Produced at the Lawrence Livermore National Laboratory
  5 | //
  6 | // LLNL-CODE-758885
  7 | //
  8 | // All rights reserved.
  9 | //
 10 | // This file is part of Comb.
 11 | //
 12 | // For details, see https://github.com/LLNL/Comb
 13 | // Please also see the LICENSE file for MIT license.
 14 | //////////////////////////////////////////////////////////////////////////////
 15 | 
 16 | #include "comb.hpp"
 17 | 
 18 | #ifdef COMB_ENABLE_MPI
 19 | 
 20 | #include "comm_pol_mpi_persistent.hpp"
 21 | #include "do_cycles_allocators.hpp"
 22 | 
 23 | namespace COMB {
 24 | 
 25 | void test_cycles_mpi_persistent(CommInfo& comminfo, MeshInfo& info,
 26 |                      COMB::Executors& exec,
 27 |                      COMB::Allocators& alloc,
 28 |                      IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
 29 | {
 30 |   CommContext<mpi_persistent_pol> con_comm{exec.base_mpi.get()};
 31 | 
 32 |   {
 33 |     // mpi host memory tests
 34 |     AllocatorInfo& cpu_many_aloc = alloc.host;
 35 |     AllocatorInfo& cpu_few_aloc  = alloc.host;
 36 | 
 37 | #if defined(COMB_ENABLE_CUDA)
 38 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned;
 39 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_hostpinned;
 40 | #elif defined(COMB_ENABLE_HIP)
 41 |     AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned;
 42 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_hostpinned;
 43 | #else
 44 |     AllocatorInfo& gpu_many_aloc = alloc.invalid;
 45 |     AllocatorInfo& gpu_few_aloc  = alloc.invalid;
 46 | #endif
 47 | 
 48 |     do_cycles_allocators(con_comm,
 49 |                          comminfo, info,
 50 |                          exec,
 51 |                          alloc,
 52 |                          cpu_many_aloc, cpu_few_aloc,
 53 |                          gpu_many_aloc, gpu_few_aloc,
 54 |                          num_vars, ncycles, tm, tm_total);
 55 |   }
 56 | 
 57 | #ifdef COMB_ENABLE_CUDA
 58 |   {
 59 |     // mpi cuda memory tests
 60 |     AllocatorInfo& cpu_many_aloc = alloc.cuda_device;
 61 |     AllocatorInfo& cpu_few_aloc  = alloc.cuda_device;
 62 | 
 63 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_device;
 64 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_device;
 65 | 
 66 |     do_cycles_allocators(con_comm,
 67 |                          comminfo, info,
 68 |                          exec,
 69 |                          alloc,
 70 |                          cpu_many_aloc, cpu_few_aloc,
 71 |                          gpu_many_aloc, gpu_few_aloc,
 72 |                          num_vars, ncycles, tm, tm_total);
 73 |   }
 74 | #endif
 75 | 
 76 | #ifdef COMB_ENABLE_HIP
 77 |   {
 78 |     // mpi hip memory tests
 79 |     AllocatorInfo& cpu_many_aloc = alloc.hip_device;
 80 |     AllocatorInfo& cpu_few_aloc  = alloc.hip_device;
 81 | 
 82 |     AllocatorInfo& gpu_many_aloc = alloc.hip_device;
 83 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_device;
 84 | 
 85 |     do_cycles_allocators(con_comm,
 86 |                          comminfo, info,
 87 |                          exec,
 88 |                          alloc,
 89 |                          cpu_many_aloc, cpu_few_aloc,
 90 |                          gpu_many_aloc, gpu_few_aloc,
 91 |                          num_vars, ncycles, tm, tm_total);
 92 |   }
 93 | #endif
 94 | 
 95 | }
 96 | 
 97 | } // namespace COMB
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/src/test_cycles_umr.cpp:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC.
 3 | //
 4 | // Produced at the Lawrence Livermore National Laboratory
 5 | //
 6 | // LLNL-CODE-758885
 7 | //
 8 | // All rights reserved.
 9 | //
10 | // This file is part of Comb.
11 | //
12 | // For details, see https://github.com/LLNL/Comb
13 | // Please also see the LICENSE file for MIT license.
14 | //////////////////////////////////////////////////////////////////////////////
15 | 
16 | #include "comb.hpp"
17 | 
18 | #ifdef COMB_ENABLE_UMR
19 | 
20 | #include "comm_pol_umr.hpp"
21 | #include "do_cycles_allocators.hpp"
22 | 
23 | namespace COMB {
24 | 
25 | void test_cycles_umr(CommInfo& comminfo, MeshInfo& info,
26 |                      COMB::Executors& exec,
27 |                      COMB::Allocators& alloc,
28 |                      IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total)
29 | {
30 |   CommContext<umr_pol> con_comm{exec.base_mpi.get()};
31 | 
32 |   AllocatorInfo& cpu_many_aloc = alloc.host;
33 |   AllocatorInfo& cpu_few_aloc  = alloc.host;
34 | 
35 | #if defined(COMB_ENABLE_CUDA)
36 |     AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned;
37 |     AllocatorInfo& gpu_few_aloc  = alloc.cuda_hostpinned;
38 | #elif defined(COMB_ENABLE_HIP)
39 |     AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned;
40 |     AllocatorInfo& gpu_few_aloc  = alloc.hip_hostpinned;
41 | #else
42 |     AllocatorInfo& gpu_many_aloc = alloc.invalid;
43 |     AllocatorInfo& gpu_few_aloc  = alloc.invalid;
44 | #endif
45 | 
46 |   do_cycles_allocators(con_comm,
47 |                        comminfo, info,
48 |                        exec,
49 |                        alloc,
50 |                        cpu_many_aloc, cpu_few_aloc,
51 |                        gpu_many_aloc, gpu_few_aloc,
52 |                        num_vars, ncycles, tm, tm_total);
53 | 
54 | }
55 | 
56 | } // namespace COMB
57 | 
58 | #endif // COMB_ENABLE_UMR
59 | 


--------------------------------------------------------------------------------