├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── NOTICE ├── README.md ├── RELEASE ├── cmake ├── FindNvToolsExt.cmake ├── Findgdsync.cmake ├── Findgpump.cmake ├── Findmp.cmake ├── FindrocTX.cmake ├── Findumr.cmake ├── SetupBasics.cmake ├── SetupCombConfig.cmake ├── SetupCombOptions.cmake ├── SetupCompilers.cmake ├── SetupDependentOptions.cmake └── SetupPackages.cmake ├── host-configs ├── lc-builds │ ├── blueos │ │ ├── clang_X.cmake │ │ ├── gcc_X.cmake │ │ ├── nvcc_clang_X.cmake │ │ ├── nvcc_gcc_X.cmake │ │ ├── nvcc_xl_X.cmake │ │ ├── pgi_X.cmake │ │ └── xl_X.cmake │ ├── toss3 │ │ ├── clang_X.cmake │ │ ├── gcc_X.cmake │ │ ├── hip_4_link_X.cmake │ │ ├── hip_X.cmake │ │ ├── icpc_X_gcc7headers.cmake │ │ ├── icpc_X_gcc8headers.cmake │ │ └── pgi_X.cmake │ └── toss4 │ │ └── hip_X.cmake └── ubuntu-builds │ ├── clang_X.cmake │ ├── gcc_X.cmake │ ├── hip.cmake │ └── nvcc_gcc_X.cmake ├── include ├── Box3d.hpp ├── CommFactory.hpp ├── ExecContext.hpp ├── MeshData.hpp ├── MeshInfo.hpp ├── MessageBase.hpp ├── align.hpp ├── basic_mempool.hpp ├── comb.hpp ├── comm.hpp ├── comm_pol_gdsync.hpp ├── comm_pol_gpump.hpp ├── comm_pol_mock.hpp ├── comm_pol_mp.hpp ├── comm_pol_mpi.hpp ├── comm_pol_mpi_persistent.hpp ├── comm_pol_umr.hpp ├── comm_utils_gdsync.hpp ├── comm_utils_gpump.hpp ├── comm_utils_mp.hpp ├── comm_utils_mpi.hpp ├── comm_utils_umr.hpp ├── config.hpp.in ├── do_cycles.hpp ├── do_cycles_allocators.hpp ├── exec.hpp ├── exec_fused.hpp ├── exec_pol_cuda.hpp ├── exec_pol_cuda_graph.hpp ├── exec_pol_hip.hpp ├── exec_pol_mpi_type.hpp ├── exec_pol_omp.hpp ├── exec_pol_raja.hpp ├── exec_pol_seq.hpp ├── exec_utils.hpp ├── exec_utils_cuda.hpp ├── exec_utils_graph_launch.hpp ├── exec_utils_hip.hpp ├── memory.hpp ├── mutex.hpp ├── print.hpp └── profiling.hpp ├── scripts ├── basic_tests.bash ├── combine_output.lua ├── focused_cuda_graphs_tests.bash ├── focused_gdsync_tests.bash ├── focused_gpump_tests.bash ├── focused_mp_tests.bash ├── focused_mpi_type_tests.bash ├── focused_tests.bash ├── focused_umr_tests.bash ├── lc-builds │ ├── blueos_clang.sh │ ├── blueos_gcc.sh │ ├── blueos_nvcc_clang.sh │ ├── blueos_nvcc_gcc.sh │ ├── blueos_nvcc_xl.sh │ ├── blueos_pgi.sh │ ├── blueos_xl.sh │ ├── toss3_clang.sh │ ├── toss3_gcc.sh │ ├── toss3_hipcc.sh │ ├── toss3_icpc.sh │ ├── toss3_pgi.sh │ └── toss4_cray-mpich_amdclang.sh ├── mock_cuda_graphs_tests.bash ├── mock_tests.bash ├── run_nvprof.bash ├── run_tests.bash ├── scale_tests.bash ├── sep_out.bash └── ubuntu-builds │ ├── ubuntu_clang.sh │ ├── ubuntu_gcc.sh │ ├── ubuntu_hipcc.sh │ └── ubuntu_nvcc10_gcc8.sh └── src ├── CMakeLists.txt ├── comb.cpp ├── do_cycles.cpp.in ├── print.cpp ├── print_timer.cpp ├── test_copy.cpp ├── test_cycles_basic.cpp ├── test_cycles_gdsync.cpp ├── test_cycles_gpump.cpp ├── test_cycles_mock.cpp ├── test_cycles_mp.cpp ├── test_cycles_mpi.cpp ├── test_cycles_mpi_persistent.cpp ├── test_cycles_umr.cpp └── warmup.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | /build_* 2 | /install_* 3 | *.swp 4 | *~ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tpl/blt"] 2 | path = blt 3 | url = https://github.com/LLNL/blt.git 4 | [submodule "tpl/RAJA"] 5 | path = tpl/RAJA 6 | url = https://github.com/LLNL/RAJA.git 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | cmake_policy(SET CMP0042 NEW) 17 | cmake_policy(SET CMP0048 NEW) 18 | 19 | if (APPLE) 20 | cmake_policy(SET CMP0025 NEW) 21 | endif() 22 | 23 | include(CMakeDependentOption) 24 | 25 | set(COMB_VERSION_MAJOR 0) 26 | set(COMB_VERSION_MINOR 3) 27 | set(COMB_VERSION_PATCHLEVEL 1) 28 | 29 | set(COMB_LOADED "${COMB_VERSION_MAJOR}.${COMB_VERSION_MINOR}.${COMB_VERSION_PATCHLEVEL}") 30 | 31 | project(COMB LANGUAGES CXX C VERSION ${COMB_LOADED}) 32 | 33 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 34 | 35 | include(cmake/SetupCombOptions.cmake) 36 | 37 | cmake_minimum_required(VERSION 3.14.5) 38 | 39 | # Detect C++ standard and add appropriate flag _before_ loading BLT 40 | set(COMPILERS_KNOWN_TO_CMAKE33 AppleClang Clang GNU MSVC) 41 | 42 | include(CheckCXXCompilerFlag) 43 | if(NOT DEFINED BLT_CXX_STD) 44 | if("cxx_std_17" IN_LIST CMAKE_CXX_KNOWN_FEATURES) 45 | set(BLT_CXX_STD c++17 CACHE STRING "Version of C++ standard") 46 | message("Using C++ standard: ${BLT_CXX_STD}") 47 | elseif("cxx_std_14" IN_LIST CMAKE_CXX_KNOWN_FEATURES) 48 | set(BLT_CXX_STD c++14 CACHE STRING "Version of C++ standard") 49 | message("Using C++ standard: ${BLT_CXX_STD}") 50 | elseif("${CMAKE_CXX_COMPILER_ID}" IN_LIST COMPILERS_KNOWN_TO_CMAKE33) 51 | set(BLT_CXX_STD c++14 CACHE STRING "Version of C++ standard") 52 | message("Using C++ standard: ${BLT_CXX_STD}") 53 | else() #cmake has no idea what to do, do it ourselves... 54 | foreach(flag_var "c++17" "c++14") 55 | CHECK_CXX_COMPILER_FLAG("-std=${flag_var}" COMPILER_SUPPORTS_${flag_var}) 56 | if(COMPILER_SUPPORTS_${flag_var}) 57 | set(BLT_CXX_STD ${flag_var} CACHE STRING "Version of C++ standard") 58 | message("Using C++ standard: ${BLT_CXX_STD}") 59 | break() 60 | endif() 61 | endforeach(flag_var) 62 | endif() 63 | else() #check BLT_CXX_STD is high enough by disallowing the only invalid option 64 | if("${BLT_CXX_STD}" IN_LIST "c++98;c++11") 65 | message(FATAL_ERROR "RAJA requires minimum C++ standard of c++14") 66 | endif() 67 | endif(NOT DEFINED BLT_CXX_STD) 68 | 69 | set(CMAKE_CXX_EXTENSIONS OFF) 70 | 71 | if (NOT BLT_LOADED) 72 | if (DEFINED BLT_SOURCE_DIR) 73 | if (NOT EXISTS ${BLT_SOURCE_DIR}/SetupBLT.cmake) 74 | message(FATAL_ERROR "Given BLT_SOURCE_DIR does not contain SetupBLT.cmake") 75 | endif() 76 | else () 77 | set (BLT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/blt CACHE PATH "") 78 | 79 | if (NOT EXISTS ${BLT_SOURCE_DIR}/SetupBLT.cmake) 80 | message(FATAL_ERROR "\ 81 | The BLT submodule is not present. \ 82 | If in git repository run the following two commands:\n \ 83 | git submodule init\n \ 84 | git submodule update") 85 | endif () 86 | endif () 87 | 88 | include(${BLT_SOURCE_DIR}/SetupBLT.cmake) 89 | endif() 90 | 91 | # Setup options that depend on BLT 92 | include(cmake/SetupDependentOptions.cmake) 93 | # Setup basic CMake options 94 | include(cmake/SetupBasics.cmake) 95 | # Find third-party packages 96 | include(cmake/SetupPackages.cmake) 97 | # Setup vendor-specific compiler flags 98 | include(cmake/SetupCompilers.cmake) 99 | # Setup internal COMB configuration options 100 | include(cmake/SetupCombConfig.cmake) 101 | 102 | add_subdirectory(src) 103 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | This work was produced under the auspices of the U.S. Department of Energy by 2 | Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344. 3 | 4 | This work was prepared as an account of work sponsored by an agency of the 5 | United States Government. Neither the United States Government nor Lawrence 6 | Livermore National Security, LLC, nor any of their employees makes any warranty, 7 | expressed or implied, or assumes any legal liability or responsibility for the 8 | accuracy, completeness, or usefulness of any information, apparatus, product, or 9 | process disclosed, or represents that its use would not infringe privately owned 10 | rights. Reference herein to any specific commercial product, process, or service 11 | by trade name, trademark, manufacturer, or otherwise does not necessarily 12 | constitute or imply its endorsement, recommendation, or favoring by the United 13 | States Government or Lawrence Livermore National Security, LLC. The views and 14 | opinions of authors expressed herein do not necessarily state or reflect those 15 | of the United States Government or Lawrence Livermore National Security, LLC, 16 | and shall not be used for advertising or product endorsement purposes. 17 | -------------------------------------------------------------------------------- /RELEASE: -------------------------------------------------------------------------------- 1 | Comb Version 0.3 2 | 3 | Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 4 | Produced at the Lawrence Livermore National Laboratory. 5 | All rights reserved. See details in Comb/LICENSE. 6 | 7 | Open Source – MIT Distribution 8 | LLNL-CODE-758885 9 | 10 | Created by Jason Burmark (burmark1@llnl.gov) 11 | 12 | Contributors: 13 | -------------------------------------------------------------------------------- /cmake/FindNvToolsExt.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | ############################################################################### 17 | # 18 | # Setup nvToolsExt 19 | # This file defines: 20 | # NVTOOLSEXT_FOUND - If nvToolsExt was found 21 | # NVTOOLSEXT_INCLUDE_DIRS - The nvToolsExt include directories 22 | # NVTOOLSEXT_LIBRARY - The nvToolsExt library 23 | 24 | # first Check for CUDA_TOOLKIT_ROOT_DIR 25 | if(NOT CUDA_TOOLKIT_ROOT_DIR) 26 | MESSAGE(FATAL_ERROR "Could not find NvToolsExt. NvToolsExt support needs explicit CUDA_TOOLKIT_ROOT_DIR") 27 | endif() 28 | 29 | #find includes 30 | find_path( NVTOOLSEXT_INCLUDE_DIRS nvToolsExt.h 31 | HINTS ${CUDA_TOOLKIT_ROOT_DIR}/include ) 32 | 33 | find_library( NVTOOLSEXT_LIBRARY NAMES nvToolsExt libnvToolsExt 34 | HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib ) 35 | 36 | 37 | include(FindPackageHandleStandardArgs) 38 | # handle the QUIETLY and REQUIRED arguments and set NVTOOLSEXT_FOUND to TRUE 39 | # if all listed variables are TRUE 40 | find_package_handle_standard_args(NVTOOLSEXT DEFAULT_MSG 41 | NVTOOLSEXT_INCLUDE_DIRS 42 | NVTOOLSEXT_LIBRARY ) 43 | -------------------------------------------------------------------------------- /cmake/Findgdsync.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | find_path(GDSYNC_PATH 17 | NAMES "lib/libgdsync.so" 18 | PATHS 19 | ENV GDSYNC_DIR 20 | /opt/ibm/spectrum_mpi/libgdsync 21 | DOC "Path to gdsync library") 22 | 23 | 24 | if(GDSYNC_PATH) 25 | message(STATUS "GDSYNC_PATH: ${GDSYNC_PATH}") 26 | set(GDSYNC_FOUND TRUE) 27 | set(GDSYNC_CXX_COMPILE_FLAGS -I${GDSYNC_PATH}/include) 28 | set(GDSYNC_INCLUDE_PATH ${GDSYNC_PATH}/include) 29 | set(GDSYNC_CXX_LINK_FLAGS -L${GDSYNC_PATH}/lib) 30 | set(GDSYNC_CXX_LIBRARIES ${GDSYNC_PATH}/lib/libgdsync.so) 31 | set(GDSYNC_ARCH ) 32 | else() 33 | set(GDSYNC_FOUND FALSE) 34 | message(WARNING "gdsync library not found") 35 | endif() 36 | -------------------------------------------------------------------------------- /cmake/Findgpump.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | find_path(GPUMP_PATH 17 | NAMES "lib/libgpump.so" 18 | PATHS 19 | ENV GPUMP_DIR 20 | /opt/ibm/spectrum_mpi/libgpump 21 | DOC "Path to gpump library") 22 | 23 | 24 | if(GPUMP_PATH) 25 | message(STATUS "GPUMP_PATH: ${GPUMP_PATH}") 26 | set(GPUMP_FOUND TRUE) 27 | set(GPUMP_CXX_COMPILE_FLAGS -I${GPUMP_PATH}/include) 28 | set(GPUMP_INCLUDE_PATH ${GPUMP_PATH}/include) 29 | set(GPUMP_CXX_LINK_FLAGS -L${GPUMP_PATH}/lib) 30 | set(GPUMP_CXX_LIBRARIES ${GPUMP_PATH}/lib/libgpump.so) 31 | set(GPUMP_ARCH ) 32 | else() 33 | set(GPUMP_FOUND FALSE) 34 | message(WARNING "gpump library not found") 35 | endif() 36 | -------------------------------------------------------------------------------- /cmake/Findmp.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | find_path(MP_PATH 17 | NAMES "lib/libmp.so" 18 | PATHS 19 | ENV MP_DIR 20 | /opt/ibm/spectrum_mpi/libmp 21 | DOC "Path to mp library") 22 | 23 | 24 | if(MP_PATH) 25 | message(STATUS "MP_PATH: ${MP_PATH}") 26 | set(MP_FOUND TRUE) 27 | set(MP_CXX_COMPILE_FLAGS -I${MP_PATH}/include) 28 | set(MP_INCLUDE_PATH ${MP_PATH}/include) 29 | set(MP_CXX_LINK_FLAGS -L${MP_PATH}/lib) 30 | set(MP_CXX_LIBRARIES ${MP_PATH}/lib/libmp.so) 31 | set(MP_ARCH ) 32 | else() 33 | set(MP_FOUND FALSE) 34 | message(WARNING "mp library not found") 35 | endif() 36 | -------------------------------------------------------------------------------- /cmake/FindrocTX.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | ############################################################################### 17 | # 18 | # Setup rocTX 19 | # This file defines: 20 | # ROCTX_FOUND - If rocTX was found 21 | # ROCTX_INCLUDE_DIRS - The rocTX include directories 22 | # ROCTX_LIBRARY - The rocTX library 23 | 24 | #find includes 25 | find_path( ROCTX_INCLUDE_DIRS 26 | NAMES roctx.h 27 | HINTS 28 | ${ROCTX_DIR}/include 29 | ${ROCTRACER_DIR}/include 30 | ${HIP_ROOT_DIR}/../roctracer/include 31 | ${HIP_ROOT_DIR}/../include ) 32 | 33 | find_library( ROCTX_LIBRARY 34 | NAMES roctx64 libroctx64 35 | HINTS 36 | ${ROCTX_DIR}/lib 37 | ${ROCTRACER_DIR}/lib 38 | ${HIP_ROOT_DIR}/../roctracer/lib 39 | ${HIP_ROOT_DIR}/../lib ) 40 | 41 | 42 | include(FindPackageHandleStandardArgs) 43 | # handle the QUIETLY and REQUIRED arguments and set ROCTX_FOUND to TRUE 44 | # if all listed variables are TRUE 45 | find_package_handle_standard_args(ROCTX DEFAULT_MSG 46 | ROCTX_INCLUDE_DIRS 47 | ROCTX_LIBRARY ) 48 | -------------------------------------------------------------------------------- /cmake/Findumr.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | find_path(UMR_PATH 17 | NAMES "lib/libumr.so" 18 | PATHS 19 | ENV UMR_DIR 20 | /opt/ibm/spectrum_mpi/libumr 21 | DOC "Path to umr library") 22 | 23 | 24 | if(UMR_PATH) 25 | message(STATUS "UMR_PATH: ${UMR_PATH}") 26 | set(UMR_FOUND TRUE) 27 | set(UMR_CXX_COMPILE_FLAGS -I${UMR_PATH}/include) 28 | set(UMR_INCLUDE_PATH ${UMR_PATH}/include) 29 | set(UMR_CXX_LINK_FLAGS -L${UMR_PATH}/lib) 30 | set(UMR_CXX_LIBRARIES ${UMR_PATH}/lib/libumr.so) 31 | set(UMR_ARCH ) 32 | else() 33 | set(UMR_FOUND FALSE) 34 | message(WARNING "umr library not found") 35 | endif() 36 | -------------------------------------------------------------------------------- /cmake/SetupBasics.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | if(NOT CMAKE_BUILD_TYPE) 17 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, \ 18 | options are: Debug Release RelWithDebInfo" FORCE) 19 | endif(NOT CMAKE_BUILD_TYPE) 20 | -------------------------------------------------------------------------------- /cmake/SetupCombConfig.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | # Set up COMB_ENABLE prefixed options 17 | set(COMB_ENABLE_MPI ${ENABLE_MPI}) 18 | set(COMB_ENABLE_OPENMP ${ENABLE_OPENMP}) 19 | set(COMB_ENABLE_CUDA ${ENABLE_CUDA}) 20 | set(COMB_ENABLE_NV_TOOLS_EXT ${ENABLE_NV_TOOLS_EXT}) 21 | set(COMB_ENABLE_CLANG_CUDA ${ENABLE_CLANG_CUDA}) 22 | set(COMB_ENABLE_HIP ${ENABLE_HIP}) 23 | set(COMB_ENABLE_ROCTX ${ENABLE_ROCTX}) 24 | set(COMB_ENABLE_GDSYNC ${ENABLE_GDSYNC}) 25 | set(COMB_ENABLE_GPUMP ${ENABLE_GPUMP}) 26 | set(COMB_ENABLE_MP ${ENABLE_MP}) 27 | set(COMB_ENABLE_UMR ${ENABLE_UMR}) 28 | set(COMB_ENABLE_RAJA ${ENABLE_RAJA}) 29 | set(COMB_ENABLE_CALIPER ${ENABLE_CALIPER}) 30 | set(COMB_ENABLE_ADIAK ${ENABLE_ADIAK}) 31 | 32 | if (COMB_ENABLE_CUDA) 33 | if(CUDA_VERSION VERSION_GREATER_EQUAL 10) 34 | set(COMB_ENABLE_CUDA_GRAPH On) 35 | else() 36 | set(COMB_ENABLE_CUDA_GRAPH Off) 37 | endif() 38 | endif() 39 | 40 | set(COMB_CXX_COMPILER ${CMAKE_CXX_COMPILER}) 41 | set(COMB_CUDA_COMPILER ${CMAKE_CUDA_COMPILER}) 42 | set(COMB_HIP_COMPILER ${CMAKE_HIP_CLANG_COMPILER}) 43 | 44 | # Configure a header file with all the variables we found. 45 | configure_file(${PROJECT_SOURCE_DIR}/include/config.hpp.in 46 | ${PROJECT_BINARY_DIR}/include/config.hpp) 47 | -------------------------------------------------------------------------------- /cmake/SetupCombOptions.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | # Enable MPI by by default 17 | set(ENABLE_MPI On CACHE BOOL "Build MPI support") 18 | 19 | # Build options 20 | set(COMB_ENABLE_GDSYNC Off CACHE BOOL "Build GDSYNC support") 21 | set(COMB_ENABLE_GPUMP Off CACHE BOOL "Build GPUMP support") 22 | set(COMB_ENABLE_MP Off CACHE BOOL "Build MP support") 23 | set(COMB_ENABLE_UMR Off CACHE BOOL "Build UMR support") 24 | set(COMB_ENABLE_RAJA ON CACHE BOOL "Build RAJA support") 25 | set(COMB_ENABLE_CALIPER Off CACHE BOOL "Build Caliper support") 26 | set(COMB_ENABLE_ADIAK Off CACHE BOOL "Build Adiak support") 27 | 28 | option(COMB_ENABLE_LOG "Build logging support" Off) 29 | 30 | # Build options for libraries, disable extras 31 | option(ENABLE_TESTS "Build tests" Off) 32 | option(ENABLE_REPRODUCERS "Build issue reproducers" Off) 33 | option(ENABLE_EXAMPLES "Build simple examples" Off) 34 | option(ENABLE_EXERCISES "Build exercises " Off) 35 | option(ENABLE_MODULES "Enable modules in supporting compilers (clang)" Off) 36 | 37 | if (ENABLE_CUDA) 38 | # Separable compilation is required by comb, set before load BLT 39 | set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "") 40 | if (NOT DEFINED CUDA_ARCH) 41 | message(STATUS "CUDA compute architecture set to Comb default sm_35 since it was not specified") 42 | set(CUDA_ARCH "sm_35" CACHE STRING "Set CUDA_ARCH to Comb minimum supported" FORCE) 43 | endif() 44 | endif() 45 | 46 | if (ENABLE_HIP) 47 | # Separable compilation is required by comb, set before load BLT 48 | # set(HIP_SEPARABLE_COMPILATION ON CACHE BOOL "") 49 | endif() 50 | -------------------------------------------------------------------------------- /cmake/SetupCompilers.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") 17 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") 18 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") 19 | 20 | if (CMAKE_CXX_COMPILER_ID MATCHES GNU) 21 | if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) 22 | message(FATAL_ERROR "COMB requires GCC 4.9 or greater!") 23 | endif () 24 | endif() 25 | 26 | set(COMB_COMPILER "COMB_COMPILER_${CMAKE_CXX_COMPILER_ID}") 27 | 28 | if ( MSVC ) 29 | if (NOT BUILD_SHARED_LIBS) 30 | foreach(flag_var 31 | CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE 32 | CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) 33 | if(${flag_var} MATCHES "/MD") 34 | string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") 35 | endif(${flag_var} MATCHES "/MD") 36 | endforeach(flag_var) 37 | endif() 38 | endif() 39 | 40 | if (COMB_ENABLE_CUDA) 41 | set(CMAKE_CUDA_STANDARD 14) 42 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr -Xcudafe \"--display_error_number\"") 43 | 44 | if (NOT COMB_HOST_CONFIG_LOADED) 45 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3") 46 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0") 47 | set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os") 48 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2") 49 | endif() 50 | endif() 51 | # end COMB_ENABLE_CUDA section 52 | 53 | if (COMB_ENABLE_HIP) 54 | 55 | set(CMAKE_HIP_STANDARD "14" CACHE STRING "Version of C++ standard for HIP Builds") 56 | 57 | endif() 58 | # end COMB_ENABLE_HIP section 59 | -------------------------------------------------------------------------------- /cmake/SetupDependentOptions.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | ## 17 | ## Here are the CMake dependent options in COMB. 18 | ## 19 | 20 | cmake_dependent_option(COMB_ENABLE_MPI "Build MPI support" On "ENABLE_MPI" Off) 21 | cmake_dependent_option(COMB_ENABLE_OPENMP "Build OpenMP support" On "ENABLE_OPENMP" Off) 22 | cmake_dependent_option(COMB_ENABLE_CUDA "Build CUDA support" On "ENABLE_CUDA" Off) 23 | cmake_dependent_option(COMB_ENABLE_HIP "Build HIP support" On "ENABLE_HIP" Off) 24 | cmake_dependent_option(COMB_ENABLE_CLANG_CUDA "Build Clang CUDA support" On "ENABLE_CLANG_CUDA" Off) 25 | 26 | cmake_dependent_option(COMB_ENABLE_NV_TOOLS_EXT "Build NV_TOOLS_EXT support" On "COMB_ENABLE_CUDA" Off) 27 | cmake_dependent_option(COMB_ENABLE_ROCTX "Build ENABLE_ROCTX support" On "COMB_ENABLE_HIP" Off) 28 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/clang_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/gcc_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions -finline-limit=20000" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions -finline-limit=20000" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/nvcc_clang_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -fopenmp") 23 | 24 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "") 25 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "") 26 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "") 27 | 28 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 29 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/nvcc_gcc_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -finline-functions -Xcompiler -fopenmp") 23 | 24 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "") 25 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "") 26 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "") 27 | 28 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 29 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/nvcc_xl_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_XLC" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g9" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -qsmp=omp:noopt" CACHE STRING "") 21 | set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,muldefs" CACHE STRING "") 22 | 23 | set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -qxlcompatmacros -Xcompiler -qalias=noansi -Xcompiler -qsmp=omp -Xcompiler -qhot -Xcompiler -qnoeh -Xcompiler -qsuppress=1500-029 -Xcompiler -qsuppress=1500-036") 24 | 25 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "") 26 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "") 27 | set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "") 28 | 29 | # Suppressed XLC warnings: 30 | # - 1500-029 cannot inline 31 | # - 1500-036 nostrict optimizations may alter code semantics 32 | # (can be countered with -qstrict, with less optimization) 33 | 34 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 35 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/pgi_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_PGI" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fast -mp" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-fast -g -mp" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -mp" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/lc-builds/blueos/xl_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_XLC" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -qsmp=omp:noopt " CACHE STRING "") 21 | set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,muldefs" CACHE STRING "") 22 | 23 | # Suppressed XLC warnings: 24 | # - 1500-029 cannot inline 25 | # - 1500-036 nostrict optimizations may alter code semantics 26 | # (can be countered with -qstrict, with less optimization) 27 | 28 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 29 | 30 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/clang_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -msse4.2 -funroll-loops -finline-functions" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -msse4.2 -funroll-loops -finline-functions" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/gcc_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions -finline-limit=20000" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions -finline-limit=20000" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/hip_4_link_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(HIP_COMMON_OPT_FLAGS ) 23 | set(HIP_COMMON_DEBUG_FLAGS) 24 | set(HOST_OPT_FLAGS) 25 | 26 | if(DEFINED ROCM_ROOT_DIR) 27 | set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags -L${ROCM_ROOT_DIR}/hip/lib -L${ROCM_ROOT_DIR}/lib -L${ROCM_ROOT_DIR}/lib64 -Wl,-rpath,${ROCM_ROOT_DIR}/hip/lib:${ROCM_ROOT_DIR}/lib:${ROCM_ROOT_DIR}/lib64 -lamdhip64 -lhsakmt -lhsa-runtime64" CACHE PATH "") 28 | endif() 29 | 30 | if(CMAKE_BUILD_TYPE MATCHES Release) 31 | set(COMB_HIPCC_FLAGS "-fPIC -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 32 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) 33 | set(COMB_HIPCC_FLAGS "-fPIC -g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 34 | elseif(CMAKE_BUILD_TYPE MATCHES Debug) 35 | set(COMB_HIPCC_FLAGS "-fPIC -g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "") 36 | endif() 37 | 38 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 39 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/hip_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(HIP_COMMON_OPT_FLAGS ) 23 | set(HIP_COMMON_DEBUG_FLAGS) 24 | set(HOST_OPT_FLAGS) 25 | 26 | if(CMAKE_BUILD_TYPE MATCHES Release) 27 | set(COMB_HIPCC_FLAGS "-fPIC -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 28 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) 29 | set(COMB_HIPCC_FLAGS "-fPIC -g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 30 | elseif(CMAKE_BUILD_TYPE MATCHES Debug) 31 | set(COMB_HIPCC_FLAGS "-fPIC -g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "") 32 | endif() 33 | 34 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 35 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/icpc_X_gcc7headers.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_ICC" CACHE STRING "") 17 | 18 | set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-7.1.0/bin/g++") 19 | 20 | set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 -fp-model source -unroll-aggressive -finline-functions -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "") 21 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g -fp-model source -unroll-aggressive -finline-functions -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "") 22 | set(CMAKE_CXX_FLAGS_DEBUG "${COMMON_FLAGS} -O0 -g" CACHE STRING "") 23 | 24 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 25 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/icpc_X_gcc8headers.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_ICC" CACHE STRING "") 17 | 18 | set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-8.1.0/bin/g++") 19 | 20 | set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 -march=native -ansi-alias -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "") 21 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g -march=native -ansi-alias -axCORE-AVX2 -diag-disable cpu-dispatch" CACHE STRING "") 22 | set(CMAKE_CXX_FLAGS_DEBUG "${COMMON_FLAGS} -O0 -g" CACHE STRING "") 23 | 24 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 25 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss3/pgi_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_PGI" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fast -mp" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -fast -mp" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -mp" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/lc-builds/toss4/hip_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(HIP_COMMON_OPT_FLAGS ) 23 | set(HIP_COMMON_DEBUG_FLAGS) 24 | set(HOST_OPT_FLAGS) 25 | 26 | if(CMAKE_BUILD_TYPE MATCHES Release) 27 | set(COMB_HIPCC_FLAGS "-fPIC -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 28 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) 29 | set(COMB_HIPCC_FLAGS "-fPIC -g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 30 | elseif(CMAKE_BUILD_TYPE MATCHES Debug) 31 | set(COMB_HIPCC_FLAGS "-fPIC -g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "") 32 | endif() 33 | 34 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 35 | -------------------------------------------------------------------------------- /host-configs/ubuntu-builds/clang_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_CLANG" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -msse4.2 -funroll-loops -finline-functions" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -msse4.2 -funroll-loops -finline-functions" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/ubuntu-builds/gcc_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions -finline-limit=20000" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions -finline-limit=20000" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 23 | -------------------------------------------------------------------------------- /host-configs/ubuntu-builds/hip.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(HIP_ROOT_DIR "${ROCM_DIR}/hip" CACHE PATH "HIP ROOT directory path") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-O2" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | # set(HIP_COMMON_OPT_FLAGS "--amdgpu-target=gfx900") 23 | set(HIP_COMMON_OPT_FLAGS ) 24 | set(HIP_COMMON_DEBUG_FLAGS) 25 | set(HOST_OPT_FLAGS) 26 | 27 | if (ENABLE_OPENMP) 28 | set(HIP_COMMON_OPT_FLAGS "-fopenmp ${HIP_COMMON_OPT_FLAGS}") 29 | endif() 30 | 31 | if(CMAKE_BUILD_TYPE MATCHES Release) 32 | set(COMB_HIPCC_FLAGS "-O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 33 | elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) 34 | set(COMB_HIPCC_FLAGS "-g -O2 ${HIP_COMMON_OPT_FLAGS} ${HOST_OPT_FLAGS}" CACHE STRING "") 35 | elseif(CMAKE_BUILD_TYPE MATCHES Debug) 36 | set(COMB_HIPCC_FLAGS "-g -O0 ${HIP_COMMON_DEBUG_FLAGS}" CACHE STRING "") 37 | endif() 38 | 39 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 40 | -------------------------------------------------------------------------------- /host-configs/ubuntu-builds/nvcc_gcc_X.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | ## 4 | ## Produced at the Lawrence Livermore National Laboratory 5 | ## 6 | ## LLNL-CODE-758885 7 | ## 8 | ## All rights reserved. 9 | ## 10 | ## This file is part of Comb. 11 | ## 12 | ## For details, see https://github.com/LLNL/Comb 13 | ## Please also see the LICENSE file for MIT license. 14 | ############################################################################## 15 | 16 | set(COMB_COMPILER "COMB_COMPILER_GNU" CACHE STRING "") 17 | 18 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions" CACHE STRING "") 19 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions" CACHE STRING "") 20 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") 21 | 22 | if(NOT DEFINED CUDA_ARCH) 23 | message(FATAL_ERROR "CUDA_ARCH NOT DEFINED") 24 | endif() 25 | 26 | set(COMB_NVCC_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-extended-lambda" CACHE STRING "") 27 | set(COMB_NVCC_FLAGS_RELEASE "-O3" CACHE STRING "") 28 | set(COMB_NVCC_FLAGS_RELWITHDEBINFO "-O2 -g -lineinfo" CACHE STRING "") 29 | set(COMB_NVCC_FLAGS_MINSIZEREL "-Os" CACHE STRING "") 30 | set(COMB_NVCC_FLAGS_DEBUG "-O0 -g -G" CACHE STRING "") 31 | 32 | set(COMB_HOST_CONFIG_LOADED On CACHE BOOL "") 33 | -------------------------------------------------------------------------------- /include/MeshData.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _MESHDATA_HPP 17 | #define _MESHDATA_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #include "memory.hpp" 22 | #include "MeshInfo.hpp" 23 | 24 | struct MeshData 25 | { 26 | COMB::Allocator& aloc; 27 | MeshInfo const& info; 28 | DataT* ptr; 29 | 30 | MeshData(MeshInfo const& meshinfo, COMB::Allocator& aloc_) 31 | : aloc(aloc_) 32 | , info(meshinfo) 33 | , ptr(nullptr) 34 | { 35 | 36 | } 37 | 38 | void allocate() 39 | { 40 | if (ptr == nullptr) { 41 | ptr = (DataT*)aloc.allocate(info.totallen*sizeof(DataT)); 42 | } 43 | } 44 | 45 | bool operator==(MeshData const& other) const 46 | { 47 | return aloc.name() == other.aloc.name() && 48 | info == other.info && 49 | ptr == other.ptr; 50 | } 51 | 52 | DataT* data() const 53 | { 54 | return ptr; 55 | } 56 | 57 | void deallocate() 58 | { 59 | if (ptr != nullptr) { 60 | aloc.deallocate(ptr); 61 | ptr = nullptr; 62 | } 63 | } 64 | 65 | ~MeshData() 66 | { 67 | deallocate(); 68 | } 69 | }; 70 | 71 | #endif // _MESHDATA_HPP 72 | 73 | -------------------------------------------------------------------------------- /include/align.hpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ****************************************************************************** 3 | * 4 | * \file 5 | * 6 | * \brief RAJA header file containing an implementation of std align. 7 | * 8 | ****************************************************************************** 9 | */ 10 | 11 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// 12 | // Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. 13 | // 14 | // Produced at the Lawrence Livermore National Laboratory 15 | // 16 | // LLNL-CODE-689114 17 | // 18 | // All rights reserved. 19 | // 20 | // This file is part of RAJA. 21 | // 22 | // For details about use and distribution, please read RAJA/LICENSE. 23 | // 24 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// 25 | 26 | #ifndef COMBRAJA_ALIGN_HPP 27 | #define COMBRAJA_ALIGN_HPP 28 | 29 | //#include "RAJA/config.hpp" 30 | #include "config.hpp" 31 | 32 | #define COMBRAJA_INLINE inline 33 | namespace COMBRAJA 34 | { 35 | 36 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// 37 | // Taken from libc++ 38 | // See libc++ license in docs/Licenses/libc++ License 39 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// 40 | COMBRAJA_INLINE 41 | void* align(size_t alignment, size_t size, void*& ptr, size_t& space) 42 | { 43 | void* r = nullptr; 44 | if (size <= space) { 45 | char* p1 = static_cast(ptr); 46 | char* p2 = reinterpret_cast( 47 | reinterpret_cast(p1 + (alignment - 1)) & -alignment); 48 | size_t d = static_cast(p2 - p1); 49 | if (d <= space - size) { 50 | r = p2; 51 | ptr = r; 52 | space -= d; 53 | } 54 | } 55 | return r; 56 | } 57 | 58 | } // end namespace COMBRAJA 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /include/comm_utils_gdsync.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _UTILS_GDSYNC_HPP 17 | #define _UTILS_GDSYNC_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #ifdef COMB_ENABLE_GDSYNC 22 | 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #include "exec_utils.hpp" 29 | #include "comm_utils_mpi.hpp" 30 | 31 | namespace detail { 32 | 33 | namespace gdsync { 34 | 35 | inline struct ::gdsync* init(MPI_Comm mpi_comm) 36 | { 37 | // LOGPRINTF("gdsync_init rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD)); 38 | struct ::gdsync* g = gdsync_init(mpi_comm); 39 | // LOGPRINTF("gdsync_init rank(w%i) done -> %p\n", MPI::Comm_rank(MPI_COMM_WORLD), g); 40 | assert(g != nullptr); 41 | return g; 42 | } 43 | 44 | inline void term(struct ::gdsync* g) 45 | { 46 | // LOGPRINTF("gdsync_term(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD)); 47 | gdsync_term(g); 48 | } 49 | 50 | inline void connect_propose(struct ::gdsync* g, int target) 51 | { 52 | // LOGPRINTF("gdsync_connect_propose(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target); 53 | gdsync_connect_propose(g, target); 54 | } 55 | 56 | inline void connect_accept(struct ::gdsync* g, int target) 57 | { 58 | // LOGPRINTF("gdsync_connect_accept(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target); 59 | gdsync_connect_accept(g, target); 60 | } 61 | 62 | inline void disconnect(struct ::gdsync* g, int target) 63 | { 64 | // LOGPRINTF("gdsync_disconnect(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target); 65 | gdsync_disconnect(g, target); 66 | } 67 | 68 | inline struct ::ibv_mr* register_region(struct ::gdsync* g, void* ptr, size_t size) 69 | { 70 | // LOGPRINTF("gdsync_register_region(%p) rank(w%i) %p[%zu]\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size); 71 | struct ::ibv_mr* mr = gdsync_register_region(g, ptr, size); 72 | // LOGPRINTF("gdsync_register_region(%p) rank(w%i) %p[%zu] done -> %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size, mr); 73 | return mr; 74 | } 75 | 76 | inline void deregister_region(struct ::gdsync* g, struct ::ibv_mr* mr) 77 | { 78 | // LOGPRINTF("gdsync_deregister_region(%p) rank(w%i) %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), mr); 79 | gdsync_deregister_region(g, mr); 80 | } 81 | 82 | inline void cork(struct ::gdsync* g) 83 | { 84 | // LOGPRINTF("gdsync_cork(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD)); 85 | gdsync_cork(g); 86 | } 87 | 88 | inline void uncork(struct ::gdsync* g, cudaStream_t stream) 89 | { 90 | // LOGPRINTF("gdsync_uncork(%p) rank(w%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), (void*)stream); 91 | gdsync_uncork(g, stream); 92 | } 93 | 94 | inline void receive(struct ::gdsync* g, int src, struct ::ibv_mr* buf_mr, size_t offset, size_t size) 95 | { 96 | // LOGPRINTF("gdsync_receive(%p) rank(w%i) %p+%zu[%zu] src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, src); 97 | gdsync_receive(g, src, buf_mr, offset, size); 98 | } 99 | 100 | inline void stream_wait_recv_complete(struct ::gdsync* g, int src, cudaStream_t stream) 101 | { 102 | // LOGPRINTF("gdsync_stream_wait_recv_complete(%p) rank(w%i) src(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, (void*)stream); 103 | gdsync_stream_wait_recv_complete(g, src, stream); 104 | } 105 | 106 | inline void cpu_ack_recv(struct ::gdsync* g, int src) 107 | { 108 | // LOGPRINTF("gdsync_cpu_ack_recv(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src); 109 | gdsync_cpu_ack_recv(g, src); 110 | } 111 | 112 | inline int is_receive_complete(struct ::gdsync* g, int src) 113 | { 114 | // LOGPRINTF("gdsync_is_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src); 115 | int complete = gdsync_is_receive_complete(g, src); 116 | // LOGPRINTF("gdsync_is_receive_complete(%p) rank(w%i) src(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, complete); 117 | return complete; 118 | } 119 | 120 | inline void wait_receive_complete(struct ::gdsync* g, int src) 121 | { 122 | // LOGPRINTF("gdsync_wait_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src); 123 | gdsync_wait_receive_complete(g, src); 124 | } 125 | 126 | inline void stream_send(struct ::gdsync* g, int dest, cudaStream_t stream, struct ::ibv_mr* buf_mr, size_t offset, size_t size) 127 | { 128 | // LOGPRINTF("gdsync_stream_send(%p) rank(w%i) %p+%zu[%zu] dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest, (void*)stream); 129 | gdsync_stream_send(g, dest, stream, buf_mr, offset, size); 130 | } 131 | 132 | inline void isend(struct ::gdsync* g, int dest, struct ::ibv_mr* buf_mr, size_t offset, size_t size) 133 | { 134 | // LOGPRINTF("gdsync_isend(%p) rank(w%i) %p+%zu[%zu] dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest); 135 | gdsync_isend(g, dest, buf_mr, offset, size); 136 | } 137 | 138 | inline void stream_wait_send_complete(struct ::gdsync* g, int dest, cudaStream_t stream) 139 | { 140 | // LOGPRINTF("gdsync_stream_wait_send_complete(%p) rank(w%i) dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, stream); 141 | gdsync_stream_wait_send_complete(g, dest, stream); 142 | } 143 | 144 | inline void cpu_ack_isend(struct ::gdsync* g, int dest) 145 | { 146 | // LOGPRINTF("gdsync_cpu_ack_isend(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest); 147 | gdsync_cpu_ack_isend(g, dest); 148 | } 149 | 150 | inline int is_send_complete(struct ::gdsync* g, int dest) 151 | { 152 | // LOGPRINTF("gdsync_is_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest); 153 | int complete = gdsync_is_send_complete(g, dest); 154 | // LOGPRINTF("gdsync_is_send_complete(%p) rank(w%i) dst(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, complete); 155 | return complete; 156 | } 157 | 158 | inline void wait_send_complete(struct ::gdsync* g, int dest) 159 | { 160 | // LOGPRINTF("gdsync_wait_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest); 161 | gdsync_wait_send_complete(g, dest); 162 | } 163 | 164 | } // namespace gdsync 165 | 166 | } // namespace detail 167 | 168 | #endif // COMB_ENABLE_GDSYNC 169 | 170 | #endif // _UTILS_GDSYNC_HPP 171 | 172 | -------------------------------------------------------------------------------- /include/comm_utils_gpump.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _UTILS_GPUMP_HPP 17 | #define _UTILS_GPUMP_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #ifdef COMB_ENABLE_GPUMP 22 | 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #include "exec_utils.hpp" 29 | #include "comm_utils_mpi.hpp" 30 | 31 | namespace detail { 32 | 33 | namespace gpump { 34 | 35 | inline struct ::gpump* init(MPI_Comm mpi_comm) 36 | { 37 | // LOGPRINTF("gpump_init rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD)); 38 | struct ::gpump* g = gpump_init(mpi_comm); 39 | // LOGPRINTF("gpump_init rank(w%i) done -> %p\n", MPI::Comm_rank(MPI_COMM_WORLD), g); 40 | assert(g != nullptr); 41 | return g; 42 | } 43 | 44 | inline void term(struct ::gpump* g) 45 | { 46 | // LOGPRINTF("gpump_term(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD)); 47 | gpump_term(g); 48 | } 49 | 50 | inline void connect_propose(struct ::gpump* g, int target) 51 | { 52 | // LOGPRINTF("gpump_connect_propose(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target); 53 | gpump_connect_propose(g, target); 54 | } 55 | 56 | inline void connect_accept(struct ::gpump* g, int target) 57 | { 58 | // LOGPRINTF("gpump_connect_accept(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target); 59 | gpump_connect_accept(g, target); 60 | } 61 | 62 | inline void disconnect(struct ::gpump* g, int target) 63 | { 64 | // LOGPRINTF("gpump_disconnect(%p) rank(w%i) %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), target); 65 | gpump_disconnect(g, target); 66 | } 67 | 68 | inline struct ::ibv_mr* register_region(struct ::gpump* g, void* ptr, size_t size) 69 | { 70 | // LOGPRINTF("gpump_register_region(%p) rank(w%i) %p[%zu]\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size); 71 | struct ::ibv_mr* mr = gpump_register_region(g, ptr, size); 72 | // LOGPRINTF("gpump_register_region(%p) rank(w%i) %p[%zu] done -> %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), ptr, size, mr); 73 | return mr; 74 | } 75 | 76 | inline void deregister_region(struct ::gpump* g, struct ::ibv_mr* mr) 77 | { 78 | // LOGPRINTF("gpump_deregister_region(%p) rank(w%i) %p\n", g, MPI::Comm_rank(MPI_COMM_WORLD), mr); 79 | gpump_deregister_region(g, mr); 80 | } 81 | 82 | inline void cork(struct ::gpump* g) 83 | { 84 | // LOGPRINTF("gpump_cork(%p) rank(w%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD)); 85 | gpump_cork(g); 86 | } 87 | 88 | inline void uncork(struct ::gpump* g, cudaStream_t stream) 89 | { 90 | // LOGPRINTF("gpump_uncork(%p) rank(w%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), (void*)stream); 91 | gpump_uncork(g, stream); 92 | } 93 | 94 | inline void receive(struct ::gpump* g, int src, struct ::ibv_mr* buf_mr, size_t offset, size_t size) 95 | { 96 | // LOGPRINTF("gpump_receive(%p) rank(w%i) %p+%zu[%zu] src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, src); 97 | gpump_receive(g, src, buf_mr, offset, size); 98 | } 99 | 100 | inline void stream_wait_recv_complete(struct ::gpump* g, int src, cudaStream_t stream) 101 | { 102 | // LOGPRINTF("gpump_stream_wait_recv_complete(%p) rank(w%i) src(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, (void*)stream); 103 | gpump_stream_wait_recv_complete(g, src, stream); 104 | } 105 | 106 | inline void cpu_ack_recv(struct ::gpump* g, int src) 107 | { 108 | // LOGPRINTF("gpump_cpu_ack_recv(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src); 109 | gpump_cpu_ack_recv(g, src); 110 | } 111 | 112 | inline int is_receive_complete(struct ::gpump* g, int src) 113 | { 114 | // LOGPRINTF("gpump_is_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src); 115 | int complete = gpump_is_receive_complete(g, src); 116 | // LOGPRINTF("gpump_is_receive_complete(%p) rank(w%i) src(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src, complete); 117 | return complete; 118 | } 119 | 120 | inline void wait_receive_complete(struct ::gpump* g, int src) 121 | { 122 | // LOGPRINTF("gpump_wait_receive_complete(%p) rank(w%i) src(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), src); 123 | gpump_wait_receive_complete(g, src); 124 | } 125 | 126 | inline void stream_send(struct ::gpump* g, int dest, cudaStream_t stream, struct ::ibv_mr* buf_mr, size_t offset, size_t size) 127 | { 128 | // LOGPRINTF("gpump_stream_send(%p) rank(w%i) %p+%zu[%zu] dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest, (void*)stream); 129 | gpump_stream_send(g, dest, stream, buf_mr, offset, size); 130 | } 131 | 132 | inline void isend(struct ::gpump* g, int dest, struct ::ibv_mr* buf_mr, size_t offset, size_t size) 133 | { 134 | // LOGPRINTF("gpump_isend(%p) rank(w%i) %p+%zu[%zu] dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), buf_mr, offset, size, dest); 135 | gpump_isend(g, dest, buf_mr, offset, size); 136 | } 137 | 138 | inline void stream_wait_send_complete(struct ::gpump* g, int dest, cudaStream_t stream) 139 | { 140 | // LOGPRINTF("gpump_stream_wait_send_complete(%p) rank(w%i) dst(%i) stream(%p)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, stream); 141 | gpump_stream_wait_send_complete(g, dest, stream); 142 | } 143 | 144 | inline void cpu_ack_isend(struct ::gpump* g, int dest) 145 | { 146 | // LOGPRINTF("gpump_cpu_ack_isend(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest); 147 | gpump_cpu_ack_isend(g, dest); 148 | } 149 | 150 | inline int is_send_complete(struct ::gpump* g, int dest) 151 | { 152 | // LOGPRINTF("gpump_is_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest); 153 | int complete = gpump_is_send_complete(g, dest); 154 | // LOGPRINTF("gpump_is_send_complete(%p) rank(w%i) dst(%i) done -> %i\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest, complete); 155 | return complete; 156 | } 157 | 158 | inline void wait_send_complete(struct ::gpump* g, int dest) 159 | { 160 | // LOGPRINTF("gpump_wait_send_complete(%p) rank(w%i) dst(%i)\n", g, MPI::Comm_rank(MPI_COMM_WORLD), dest); 161 | gpump_wait_send_complete(g, dest); 162 | } 163 | 164 | } // namespace gpump 165 | 166 | } // namespace detail 167 | 168 | #endif // COMB_ENABLE_GPUMP 169 | 170 | #endif // _UTILS_GPUMP_HPP 171 | 172 | -------------------------------------------------------------------------------- /include/comm_utils_mp.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _UTILS_MP_HPP 17 | #define _UTILS_MP_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #ifdef COMB_ENABLE_MP 22 | 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "exec_utils.hpp" 31 | #include "exec_utils_cuda.hpp" 32 | #include "comm_utils_mpi.hpp" 33 | 34 | namespace detail { 35 | 36 | namespace mp { 37 | 38 | inline void init(MPI_Comm mpi_comm) 39 | { 40 | // LOGPRINTF("mp_init rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD)); 41 | int nranks = MPI::Comm_size(mpi_comm); 42 | std::vector ranks(nranks); 43 | std::iota(ranks.begin(), ranks.end(), 0); 44 | int gpuid = COMB::detail::cuda::get_device(); 45 | auto ret = mp_init(mpi_comm, ranks.data(), nranks, MP_INIT_DEFAULT, gpuid); 46 | // LOGPRINTF("mp_init rank(w%i) done\n", MPI::Comm_rank(MPI_COMM_WORLD)); 47 | assert(ret == MP_SUCCESS); 48 | } 49 | 50 | inline void finalize() 51 | { 52 | // LOGPRINTF("mp_finalize() rank(w%i)\n", MPI::Comm_rank(MPI_COMM_WORLD)); 53 | mp_finalize(); 54 | } 55 | 56 | inline mp_reg_t register_(void* ptr, size_t size) 57 | { 58 | mp_reg_t reg; 59 | // LOGPRINTF("mp_register() rank(w%i) %p[%zu]\n", MPI::Comm_rank(MPI_COMM_WORLD), ptr, size); 60 | auto ret = mp_register(ptr, size, ®); 61 | // LOGPRINTF("mp_register() rank(w%i) %p[%zu] done -> %p\n", MPI::Comm_rank(MPI_COMM_WORLD), ptr, size, (void*)reg); 62 | assert(ret == MP_SUCCESS); 63 | return reg; 64 | } 65 | 66 | inline void deregister(mp_reg_t& reg) 67 | { 68 | // LOGPRINTF("mp_deregister() rank(w%i) %p\n", MPI::Comm_rank(MPI_COMM_WORLD), (void*)reg); 69 | auto ret = mp_deregister(®); 70 | assert(ret == MP_SUCCESS); 71 | reg = nullptr; 72 | } 73 | 74 | inline void irecv(void *buf, size_t size, int src, mp_reg_t* reg, mp_request_t *req) 75 | { 76 | // LOGPRINTF("mp_irecv() rank(w%i) %p[%zu] src(%i) reg(%p) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, src, reg, req); 77 | auto ret = mp_irecv(buf, size, src, reg, req); 78 | assert(ret == MP_SUCCESS); 79 | } 80 | 81 | inline void isend(void *buf, size_t size, int src, mp_reg_t* reg, mp_request_t *req) 82 | { 83 | // LOGPRINTF("mp_isend() rank(w%i) %p[%zu] src(%i) reg(%p) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, src, reg, req); 84 | auto ret = mp_isend(buf, size, src, reg, req); 85 | assert(ret == MP_SUCCESS); 86 | } 87 | 88 | inline void send_on_stream(void *buf, size_t size, int dst, mp_reg_t* reg, mp_request_t *req, cudaStream_t stream) 89 | { 90 | // LOGPRINTF("mp_send_on_stream() rank(w%i) %p[%zu] dst(%i) reg(%p) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, dst, reg, req, (void*)stream); 91 | auto ret = mp_send_on_stream(buf, size, dst, reg, req, stream); 92 | assert(ret == MP_SUCCESS); 93 | } 94 | 95 | inline void isend_on_stream(void *buf, size_t size, int dst, mp_reg_t* reg, mp_request_t *req, cudaStream_t stream) 96 | { 97 | // LOGPRINTF("mp_isend_on_stream() rank(w%i) %p[%zu] dst(%i) reg(%p) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), buf, size, dst, reg, req, (void*)stream); 98 | auto ret = mp_isend_on_stream(buf, size, dst, reg, req, stream); 99 | assert(ret == MP_SUCCESS); 100 | } 101 | 102 | inline void wait_on_stream(mp_request_t *req, cudaStream_t stream) 103 | { 104 | // LOGPRINTF("mp_wait_on_stream() rank(w%i) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), req, (void*)stream); 105 | auto ret = mp_wait_on_stream(req, stream); 106 | assert(ret == MP_SUCCESS); 107 | } 108 | 109 | inline void wait_all_on_stream(size_t count, mp_request_t *req, cudaStream_t stream) 110 | { 111 | // LOGPRINTF("mp_wait_all_on_stream() rank(w%i) count (%zu) req(%p) stream(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), count, req, (void*)stream); 112 | auto ret = mp_wait_all_on_stream(count, req, stream); 113 | assert(ret == MP_SUCCESS); 114 | } 115 | 116 | inline void wait(mp_request_t *req) 117 | { 118 | // LOGPRINTF("mp_wait() rank(w%i) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), req); 119 | auto ret = mp_wait(req); 120 | assert(ret == MP_SUCCESS); 121 | } 122 | 123 | inline void wait_all(size_t count, mp_request_t *req) 124 | { 125 | // LOGPRINTF("mp_wait_all() rank(w%i) count (%zu) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), count, req); 126 | auto ret = mp_wait_all(count, req); 127 | assert(ret == MP_SUCCESS); 128 | } 129 | 130 | inline void progress_all(size_t count, mp_request_t *req) 131 | { 132 | // LOGPRINTF("mp_progress_all() rank(w%i) count (%zu) req(%p)\n", MPI::Comm_rank(MPI_COMM_WORLD), count, req); 133 | auto ret = mp_progress_all(count, req); 134 | assert(ret == MP_SUCCESS); 135 | } 136 | 137 | } // namespace mp 138 | 139 | } // namespace detail 140 | 141 | #endif // COMB_ENABLE_MP 142 | 143 | #endif // _UTILS_MP_HPP 144 | 145 | -------------------------------------------------------------------------------- /include/comm_utils_umr.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _UTILS_UMR_HPP 17 | #define _UTILS_UMR_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #ifdef COMB_ENABLE_UMR 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | namespace detail { 29 | 30 | namespace UMR { 31 | 32 | inline int Init_thread(int* argc, char***argv, int required) 33 | { 34 | int provided = required; 35 | // LOGPRINTF("UMR_Init_thread\n"); 36 | int ret = UMR_Init_thread(argc, argv, required, &provided); 37 | // LOGPRINTF("UMR_Init_thread done rank(w%i)\n", Comm_rank(UMR_COMM_WORLD)); 38 | assert(ret == UMR_SUCCESS); 39 | //assert(required == provided); 40 | return provided; 41 | } 42 | 43 | inline void Finalize() 44 | { 45 | // LOGPRINTF("UMR_Finalize\n"); 46 | int ret = UMR_Finalize(); 47 | assert(ret == UMR_SUCCESS); 48 | } 49 | 50 | inline void Irecv(void *buf, int count, UMR_Datatype umr_type, int src, int tag, UMR_Comm comm, UMR_Request *request) 51 | { 52 | // LOGPRINTF("UMR_Irecv rank(w%i) %p[%i] src(%i) tag(%i)\n", Comm_rank(UMR_COMM_WORLD), buf, count, src, tag); 53 | int ret = UMR_Irecv(buf, count, umr_type, src, tag, comm, request); 54 | assert(ret == UMR_SUCCESS); 55 | } 56 | 57 | inline void Isend(const void *buf, int count, UMR_Datatype umr_type, int dest, int tag, UMR_Comm comm, UMR_Request *request) 58 | { 59 | // LOGPRINTF("UMR_Isend rank(w%i) %p[%i] dst(%i) tag(%i)\n", Comm_rank(UMR_COMM_WORLD), buf, count, dest, tag); 60 | int ret = UMR_Isend(buf, count, umr_type, dest, tag, comm, request); 61 | assert(ret == UMR_SUCCESS); 62 | } 63 | 64 | inline void Wait(UMR_Request *request, UMR_Status *status) 65 | { 66 | // LOGPRINTF("UMR_Wait rank(w%i)\n", Comm_rank(UMR_COMM_WORLD)); 67 | int ret = UMR_Wait(request, status); 68 | assert(ret == UMR_SUCCESS); 69 | } 70 | 71 | inline bool Test(UMR_Request *request, UMR_Status *status) 72 | { 73 | int completed = 0; 74 | // LOGPRINTF("UMR_Test rank(w%i)\n", Comm_rank(UMR_COMM_WORLD)); 75 | int ret = UMR_Test(request, &completed, status); 76 | assert(ret == UMR_SUCCESS); 77 | return completed; 78 | } 79 | 80 | inline int Waitany(int count, UMR_Request *requests, UMR_Status *status) 81 | { 82 | int idx = -1; 83 | // LOGPRINTF("UMR_Waitany rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count); 84 | int ret = UMR_Waitany(count, requests, &idx, status); 85 | assert(ret == UMR_SUCCESS); 86 | return idx; 87 | } 88 | 89 | inline int Testany(int count, UMR_Request *requests, UMR_Status *status) 90 | { 91 | int completed = 0; 92 | int indx = -1; 93 | // LOGPRINTF("UMR_Testany rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count); 94 | int ret = UMR_Testany(count, requests, &indx, &completed, status); 95 | assert(ret == UMR_SUCCESS); 96 | return completed ? indx : -1; 97 | } 98 | 99 | inline int Waitsome(int incount, UMR_Request *requests, int* indcs, UMR_Status *statuses) 100 | { 101 | int outcount = 0; 102 | // LOGPRINTF("UMR_Waitsome rank(w%i) incount(%i)\n", Comm_rank(UMR_COMM_WORLD), incount); 103 | int ret = UMR_Waitsome(incount, requests, &outcount, indcs, statuses); 104 | assert(ret == UMR_SUCCESS); 105 | return outcount; 106 | } 107 | 108 | inline int Testsome(int incount, UMR_Request *requests, int* indcs, UMR_Status *statuses) 109 | { 110 | int outcount = 0; 111 | // LOGPRINTF("UMR_Testsome rank(w%i) incount(%i)\n", Comm_rank(UMR_COMM_WORLD), incount); 112 | int ret = UMR_Testsome(incount, requests, &outcount, indcs, statuses); 113 | assert(ret == UMR_SUCCESS); 114 | return outcount; 115 | } 116 | 117 | inline void Waitall(int count, UMR_Request *requests, UMR_Status *statuses) 118 | { 119 | // LOGPRINTF("UMR_Waitall rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count); 120 | int ret = UMR_Waitall(count, requests, statuses); 121 | assert(ret == UMR_SUCCESS); 122 | } 123 | 124 | inline bool Testall(int count, UMR_Request *requests, UMR_Status *statuses) 125 | { 126 | int completed = 0; 127 | // LOGPRINTF("UMR_Testall rank(w%i) count(%i)\n", Comm_rank(UMR_COMM_WORLD), count); 128 | int ret = UMR_Testall(count, requests, &completed, statuses); 129 | assert(ret == UMR_SUCCESS); 130 | return completed; 131 | } 132 | 133 | } // namespace UMR 134 | 135 | } // namespace detail 136 | 137 | #endif // COMB_ENABLE_UMR 138 | 139 | #endif // _UTILS_UMR_HPP 140 | 141 | -------------------------------------------------------------------------------- /include/exec.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _EXEC_HPP 17 | #define _EXEC_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | #include "exec_utils.hpp" 28 | #include "memory.hpp" 29 | #include "ExecContext.hpp" 30 | 31 | #include "exec_fused.hpp" 32 | 33 | #include "exec_pol_seq.hpp" 34 | #include "exec_pol_omp.hpp" 35 | #include "exec_pol_cuda.hpp" 36 | #include "exec_pol_cuda_graph.hpp" 37 | #include "exec_pol_hip.hpp" 38 | #include "exec_pol_mpi_type.hpp" 39 | #include "exec_pol_raja.hpp" 40 | 41 | namespace COMB { 42 | 43 | template < typename my_context_type > 44 | struct ContextHolder 45 | { 46 | using context_type = my_context_type; 47 | 48 | bool m_available = false; 49 | 50 | bool available() const 51 | { 52 | return m_available; 53 | } 54 | 55 | template < typename ... Ts > 56 | void create(Ts&&... args) 57 | { 58 | destroy(); 59 | m_context = new context_type(std::forward(args)...); 60 | } 61 | 62 | context_type& get() 63 | { 64 | assert(m_context != nullptr); 65 | return *m_context; 66 | } 67 | 68 | void destroy() 69 | { 70 | if (m_context) { 71 | delete m_context; 72 | m_context = nullptr; 73 | } 74 | } 75 | 76 | ~ContextHolder() 77 | { 78 | destroy(); 79 | } 80 | 81 | private: 82 | context_type* m_context = nullptr; 83 | }; 84 | 85 | struct Executors 86 | { 87 | Executors() 88 | { } 89 | 90 | Executors(Executors const&) = delete; 91 | Executors(Executors &&) = delete; 92 | Executors& operator=(Executors const&) = delete; 93 | Executors& operator=(Executors &&) = delete; 94 | 95 | void create_executors(Allocators& alocs) 96 | { 97 | base_cpu.create(); 98 | #ifdef COMB_ENABLE_MPI 99 | base_mpi.create(); 100 | #endif 101 | #ifdef COMB_ENABLE_CUDA 102 | base_cuda.create(); 103 | #endif 104 | #ifdef COMB_ENABLE_HIP 105 | base_hip.create(); 106 | #endif 107 | #ifdef COMB_ENABLE_RAJA 108 | base_raja_cpu.create(); 109 | #ifdef COMB_ENABLE_CUDA 110 | base_raja_cuda.create(); 111 | #endif 112 | #ifdef COMB_ENABLE_HIP 113 | base_raja_hip.create(); 114 | #endif 115 | #endif 116 | 117 | seq.create(base_cpu.get(), alocs.host.allocator()); 118 | #ifdef COMB_ENABLE_OPENMP 119 | omp.create(base_cpu.get(), alocs.host.allocator()); 120 | #endif 121 | #ifdef COMB_ENABLE_CUDA 122 | cuda.create(base_cuda.get(), (alocs.access.use_device_preferred_for_cuda_util_aloc) ? alocs.cuda_managed_device_preferred_host_accessed.allocator() : alocs.cuda_hostpinned.allocator()); 123 | #endif 124 | #ifdef COMB_ENABLE_CUDA_GRAPH 125 | cuda_graph.create(base_cuda.get(), (alocs.access.use_device_preferred_for_cuda_util_aloc) ? alocs.cuda_managed_device_preferred_host_accessed.allocator() : alocs.cuda_hostpinned.allocator()); 126 | #endif 127 | #ifdef COMB_ENABLE_HIP 128 | hip.create(base_hip.get(), (alocs.access.use_device_for_hip_util_aloc) ? alocs.hip_device.allocator() : alocs.hip_hostpinned_coarse.allocator()); 129 | #endif 130 | #ifdef COMB_ENABLE_MPI 131 | mpi_type.create(base_mpi.get(), alocs.host.allocator()); 132 | #endif 133 | #ifdef COMB_ENABLE_RAJA 134 | raja_seq.create(base_raja_cpu.get(), alocs.host.allocator()); 135 | #ifdef COMB_ENABLE_OPENMP 136 | raja_omp.create(base_raja_cpu.get(), alocs.host.allocator()); 137 | #endif 138 | #ifdef COMB_ENABLE_CUDA 139 | raja_cuda.create(base_raja_cuda.get(), (alocs.access.use_device_preferred_for_cuda_util_aloc) ? alocs.cuda_managed_device_preferred_host_accessed.allocator() : alocs.cuda_hostpinned.allocator()); 140 | #endif 141 | #ifdef COMB_ENABLE_HIP 142 | raja_hip.create(base_raja_hip.get(), (alocs.access.use_device_for_hip_util_aloc) ? alocs.hip_device.allocator() : alocs.hip_hostpinned_coarse.allocator()); 143 | #endif 144 | #endif 145 | } 146 | 147 | ContextHolder base_cpu; 148 | #ifdef COMB_ENABLE_MPI 149 | ContextHolder base_mpi; 150 | #endif 151 | #ifdef COMB_ENABLE_CUDA 152 | ContextHolder base_cuda; 153 | #endif 154 | #ifdef COMB_ENABLE_HIP 155 | ContextHolder base_hip; 156 | #endif 157 | #ifdef COMB_ENABLE_RAJA 158 | ContextHolder> base_raja_cpu; 159 | #ifdef COMB_ENABLE_CUDA 160 | ContextHolder> base_raja_cuda; 161 | #endif 162 | #ifdef COMB_ENABLE_HIP 163 | ContextHolder> base_raja_hip; 164 | #endif 165 | #endif 166 | 167 | ContextHolder> seq; 168 | #ifdef COMB_ENABLE_OPENMP 169 | ContextHolder> omp; 170 | #endif 171 | #ifdef COMB_ENABLE_CUDA 172 | ContextHolder> cuda; 173 | #ifdef COMB_ENABLE_CUDA_GRAPH 174 | ContextHolder> cuda_graph; 175 | #endif 176 | #endif 177 | #ifdef COMB_ENABLE_HIP 178 | ContextHolder> hip; 179 | #endif 180 | #ifdef COMB_ENABLE_MPI 181 | ContextHolder> mpi_type; 182 | #endif 183 | #ifdef COMB_ENABLE_RAJA 184 | ContextHolder> raja_seq; 185 | #ifdef COMB_ENABLE_OPENMP 186 | ContextHolder> raja_omp; 187 | #endif 188 | #ifdef COMB_ENABLE_CUDA 189 | ContextHolder> raja_cuda; 190 | #endif 191 | #ifdef COMB_ENABLE_HIP 192 | ContextHolder> raja_hip; 193 | #endif 194 | #endif 195 | }; 196 | 197 | } // namespace COMB 198 | 199 | #endif // _EXEC_HPP 200 | -------------------------------------------------------------------------------- /include/exec_pol_cuda_graph.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _POL_CUDA_GRAPH_HPP 17 | #define _POL_CUDA_GRAPH_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #include "memory.hpp" 22 | 23 | #ifdef COMB_ENABLE_CUDA_GRAPH 24 | #include "exec_utils_graph_launch.hpp" 25 | 26 | struct cuda_graph_pol { 27 | static const bool async = true; 28 | static const char* get_name() { return "cudaGraph"; } 29 | using event_type = cuda::graph_launch::event_type; 30 | using component_type = cuda::graph_launch::component; 31 | using group_type = cuda::graph_launch::group; 32 | }; 33 | 34 | template < > 35 | struct ExecContext : CudaContext 36 | { 37 | using pol = cuda_graph_pol; 38 | using event_type = typename pol::event_type; 39 | using component_type = typename pol::component_type; 40 | using group_type = typename pol::group_type; 41 | 42 | using base = CudaContext; 43 | 44 | COMB::Allocator& util_aloc; 45 | 46 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS 47 | component_type m_component; 48 | #endif 49 | 50 | 51 | ExecContext(base const& b, COMB::Allocator& util_aloc_) 52 | : base(b) 53 | , util_aloc(util_aloc_) 54 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS 55 | , m_component{base(*this)} 56 | #endif 57 | { } 58 | 59 | void ensure_waitable() 60 | { 61 | cuda::graph_launch::force_launch(base::stream_launch()); 62 | } 63 | 64 | template < typename context > 65 | void waitOn(context& con) 66 | { 67 | con.ensure_waitable(); 68 | base::waitOn(con); 69 | } 70 | 71 | void synchronize() 72 | { 73 | cuda::graph_launch::synchronize(base::stream_launch()); 74 | } 75 | 76 | group_type create_group() 77 | { 78 | return cuda::graph_launch::create_group(); 79 | } 80 | 81 | void start_group(group_type group) 82 | { 83 | cuda::graph_launch::set_active_group(group); 84 | } 85 | 86 | void finish_group(group_type) 87 | { 88 | cuda::graph_launch::force_launch(base::stream_launch()); 89 | } 90 | 91 | void destroy_group(group_type group) 92 | { 93 | cuda::graph_launch::destroy_group(group); 94 | } 95 | 96 | component_type create_component() 97 | { 98 | return component_type{}; 99 | } 100 | 101 | void start_component(group_type, component_type component) 102 | { 103 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS 104 | m_component = component; 105 | m_component.m_con.waitOn(base(*this)); 106 | #endif 107 | } 108 | 109 | void finish_component(group_type, component_type component) 110 | { 111 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS 112 | base::waitOn(component.m_con); 113 | m_component.m_con = base(*this); 114 | #endif 115 | } 116 | 117 | void destroy_component(component_type) 118 | { 119 | #ifdef COMB_GRAPH_KERNEL_LAUNCH_COMPONENT_STREAMS 120 | m_component.m_con = base(*this); 121 | #endif 122 | } 123 | 124 | event_type createEvent() 125 | { 126 | return cuda::graph_launch::createEvent(); 127 | } 128 | 129 | void recordEvent(event_type& event) 130 | { 131 | return cuda::graph_launch::recordEvent(event, base::stream()); 132 | } 133 | 134 | void finish_component_recordEvent(group_type group, component_type component, event_type& event) 135 | { 136 | finish_component(group, component); 137 | recordEvent(event); 138 | } 139 | 140 | bool queryEvent(event_type& event) 141 | { 142 | return cuda::graph_launch::queryEvent(event); 143 | } 144 | 145 | void waitEvent(event_type& event) 146 | { 147 | cuda::graph_launch::waitEvent(event); 148 | } 149 | 150 | void destroyEvent(event_type& event) 151 | { 152 | cuda::graph_launch::destroyEvent(event); 153 | } 154 | 155 | template < typename body_type > 156 | void for_all(IdxT len, body_type&& body) 157 | { 158 | cuda::graph_launch::for_all(len, std::forward(body) 159 | #ifdef COMB_GRAPH_KERNEL_LAUNCH 160 | , m_component.m_con.stream_launch() 161 | #endif 162 | ); 163 | // m_component.m_con.synchronize(); 164 | } 165 | 166 | template < typename body_type > 167 | void for_all_2d(IdxT len0, IdxT len1, body_type&& body) 168 | { 169 | cuda::graph_launch::for_all_2d(len0, len1, std::forward(body) 170 | #ifdef COMB_GRAPH_KERNEL_LAUNCH 171 | , m_component.m_con.stream_launch() 172 | #endif 173 | ); 174 | // m_component.m_con.synchronize(); 175 | } 176 | 177 | template < typename body_type > 178 | void for_all_3d(IdxT len0, IdxT len1, IdxT len2, body_type&& body) 179 | { 180 | cuda::graph_launch::for_all_3d(len0, len1, len2, std::forward(body) 181 | #ifdef COMB_GRAPH_KERNEL_LAUNCH 182 | , m_component.m_con.stream_launch() 183 | #endif 184 | ); 185 | // m_component.m_con.synchronize(); 186 | } 187 | 188 | template < typename body_type > 189 | void fused(IdxT len_outer, IdxT len_inner, IdxT len_hint, body_type&& body_in) 190 | { 191 | COMB::ignore_unused(len_hint); 192 | for (IdxT i_outer = 0; i_outer < len_outer; ++i_outer) { 193 | auto body = body_in; 194 | body.set_outer(i_outer); 195 | for (IdxT i_inner = 0; i_inner < len_inner; ++i_inner) { 196 | body.set_inner(i_inner); 197 | cuda::graph_launch::for_all(body.len, body 198 | #ifdef COMB_GRAPH_KERNEL_LAUNCH 199 | , m_component.m_con.stream_launch() 200 | #endif 201 | ); 202 | } 203 | } 204 | // m_component.m_con.synchronize(); 205 | } 206 | 207 | }; 208 | 209 | #endif // COMB_ENABLE_CUDA_GRAPH 210 | 211 | #endif // _POL_CUDA_GRAPH_HPP 212 | -------------------------------------------------------------------------------- /include/exec_pol_mpi_type.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _POL_MPI_TYPE_HPP 17 | #define _POL_MPI_TYPE_HPP 18 | 19 | #include "config.hpp" 20 | 21 | 22 | #include "memory.hpp" 23 | 24 | #ifdef COMB_ENABLE_MPI 25 | 26 | struct mpi_type_component 27 | { 28 | void* ptr = nullptr; 29 | }; 30 | 31 | struct mpi_type_group 32 | { 33 | void* ptr = nullptr; 34 | }; 35 | 36 | // execution policy indicating that message packing/unpacking should be done 37 | // in MPI using MPI_Types 38 | struct mpi_type_pol { 39 | static const bool async = false; 40 | static const char* get_name() { return "mpi_type"; } 41 | using event_type = int; 42 | using component_type = mpi_type_component; 43 | using group_type = mpi_type_group; 44 | }; 45 | 46 | template < > 47 | struct ExecContext : MPIContext 48 | { 49 | using pol = mpi_type_pol; 50 | using event_type = typename pol::event_type; 51 | using component_type = typename pol::component_type; 52 | using group_type = typename pol::group_type; 53 | 54 | using base = MPIContext; 55 | 56 | COMB::Allocator& util_aloc; 57 | 58 | 59 | ExecContext(base const& b, COMB::Allocator& util_aloc_) 60 | : base(b) 61 | , util_aloc(util_aloc_) 62 | { } 63 | 64 | void ensure_waitable() 65 | { 66 | 67 | } 68 | 69 | template < typename context > 70 | void waitOn(context& con) 71 | { 72 | con.ensure_waitable(); 73 | base::waitOn(con); 74 | } 75 | 76 | // synchronization functions 77 | void synchronize() 78 | { 79 | } 80 | 81 | group_type create_group() 82 | { 83 | return group_type{}; 84 | } 85 | 86 | void start_group(group_type) 87 | { 88 | } 89 | 90 | void finish_group(group_type) 91 | { 92 | } 93 | 94 | void destroy_group(group_type) 95 | { 96 | 97 | } 98 | 99 | component_type create_component() 100 | { 101 | return component_type{}; 102 | } 103 | 104 | void start_component(group_type, component_type) 105 | { 106 | 107 | } 108 | 109 | void finish_component(group_type, component_type) 110 | { 111 | 112 | } 113 | 114 | void destroy_component(component_type) 115 | { 116 | 117 | } 118 | 119 | // event creation functions 120 | event_type createEvent() 121 | { 122 | return event_type{}; 123 | } 124 | 125 | // event record functions 126 | void recordEvent(event_type&) 127 | { 128 | } 129 | 130 | void finish_component_recordEvent(group_type group, component_type component, event_type& event) 131 | { 132 | finish_component(group, component); 133 | recordEvent(event); 134 | } 135 | 136 | // event query functions 137 | bool queryEvent(event_type&) 138 | { 139 | return true; 140 | } 141 | 142 | // event wait functions 143 | void waitEvent(event_type&) 144 | { 145 | } 146 | 147 | // event destroy functions 148 | void destroyEvent(event_type&) 149 | { 150 | } 151 | 152 | // template < typename body_type > 153 | // void for_all(IdxT len, body_type&& body) 154 | // { 155 | // COMB::ignore_unused(pol, len, body); 156 | // static_assert(false, "This method should never be used"); 157 | // } 158 | 159 | // template < typename body_type > 160 | // void for_all_2d(IdxT len0, IdxT len1, body_type&& body) 161 | // { 162 | // COMB::ignore_unused(pol, len0, len1, body); 163 | // static_assert(false, "This method should never be used"); 164 | // } 165 | 166 | // template < typename body_type > 167 | // void for_all_3d(IdxT len0, IdxT len1, IdxT len2, body_type&& body) 168 | // { 169 | // COMB::ignore_unused(pol, len0, len1, len2, body); 170 | // static_assert(false, "This method should never be used"); 171 | // } 172 | 173 | // template < typename body_type > 174 | // void fused(IdxT len_outer, IdxT len_inner, IdxT len_hint, body_type&& body_in) 175 | // { 176 | // COMB::ignore_unused(pol, len_outer, len_inner, body_in); 177 | // static_assert(false, "This method should never be used"); 178 | // } 179 | 180 | }; 181 | 182 | #endif 183 | 184 | #endif // _POL_MPI_TYPE_HPP 185 | -------------------------------------------------------------------------------- /include/exec_pol_seq.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _POL_SEQ_HPP 17 | #define _POL_SEQ_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #include "exec_utils.hpp" 22 | #include "memory.hpp" 23 | 24 | struct seq_component 25 | { 26 | void* ptr = nullptr; 27 | }; 28 | 29 | struct seq_group 30 | { 31 | void* ptr = nullptr; 32 | }; 33 | 34 | struct seq_pol { 35 | static const bool async = false; 36 | static const char* get_name() { return "seq"; } 37 | using event_type = int; 38 | using component_type = seq_component; 39 | using group_type = seq_group; 40 | }; 41 | 42 | template < > 43 | struct ExecContext : CPUContext 44 | { 45 | using pol = seq_pol; 46 | using event_type = typename pol::event_type; 47 | using component_type = typename pol::component_type; 48 | using group_type = typename pol::group_type; 49 | 50 | using base = CPUContext; 51 | 52 | COMB::Allocator& util_aloc; 53 | 54 | 55 | ExecContext(base const& b, COMB::Allocator& util_aloc_) 56 | : base(b) 57 | , util_aloc(util_aloc_) 58 | { } 59 | 60 | void ensure_waitable() 61 | { 62 | 63 | } 64 | 65 | template < typename context > 66 | void waitOn(context& con) 67 | { 68 | con.ensure_waitable(); 69 | base::waitOn(con); 70 | } 71 | 72 | // synchronization functions 73 | void synchronize() 74 | { 75 | } 76 | 77 | group_type create_group() 78 | { 79 | return group_type{}; 80 | } 81 | 82 | void start_group(group_type) 83 | { 84 | } 85 | 86 | void finish_group(group_type) 87 | { 88 | } 89 | 90 | void destroy_group(group_type) 91 | { 92 | 93 | } 94 | 95 | component_type create_component() 96 | { 97 | return component_type{}; 98 | } 99 | 100 | void start_component(group_type, component_type) 101 | { 102 | 103 | } 104 | 105 | void finish_component(group_type, component_type) 106 | { 107 | 108 | } 109 | 110 | void destroy_component(component_type) 111 | { 112 | 113 | } 114 | 115 | // event creation functions 116 | event_type createEvent() 117 | { 118 | return event_type{}; 119 | } 120 | 121 | // event record functions 122 | void recordEvent(event_type&) 123 | { 124 | } 125 | 126 | void finish_component_recordEvent(group_type group, component_type component, event_type& event) 127 | { 128 | finish_component(group, component); 129 | recordEvent(event); 130 | } 131 | 132 | // event query functions 133 | bool queryEvent(event_type&) 134 | { 135 | return true; 136 | } 137 | 138 | // event wait functions 139 | void waitEvent(event_type&) 140 | { 141 | } 142 | 143 | // event destroy functions 144 | void destroyEvent(event_type&) 145 | { 146 | } 147 | 148 | // for_all functions 149 | template < typename body_type > 150 | void for_all(IdxT len, body_type&& body) 151 | { 152 | for(IdxT i0 = 0; i0 < len; ++i0) { 153 | body(i0); 154 | } 155 | // base::synchronize(); 156 | } 157 | 158 | template < typename body_type > 159 | void for_all_2d(IdxT len0, IdxT len1, body_type&& body) 160 | { 161 | for(IdxT i0 = 0; i0 < len0; ++i0) { 162 | for(IdxT i1 = 0; i1 < len1; ++i1) { 163 | body(i0, i1); 164 | } 165 | } 166 | // base::synchronize(); 167 | } 168 | 169 | template < typename body_type > 170 | void for_all_3d(IdxT len0, IdxT len1, IdxT len2, body_type&& body) 171 | { 172 | for(IdxT i0 = 0; i0 < len0; ++i0) { 173 | for(IdxT i1 = 0; i1 < len1; ++i1) { 174 | for(IdxT i2 = 0; i2 < len2; ++i2) { 175 | body(i0, i1, i2); 176 | } 177 | } 178 | } 179 | // base::synchronize(); 180 | } 181 | 182 | template < typename body_type > 183 | void fused(IdxT len_outer, IdxT len_inner, IdxT len_hint, body_type&& body_in) 184 | { 185 | COMB::ignore_unused(len_hint); 186 | for (IdxT i_outer = 0; i_outer < len_outer; ++i_outer) { 187 | auto body = body_in; 188 | body.set_outer(i_outer); 189 | for (IdxT i_inner = 0; i_inner < len_inner; ++i_inner) { 190 | body.set_inner(i_inner); 191 | for (IdxT i = 0; i < body.len; ++i) { 192 | body(i); 193 | } 194 | } 195 | } 196 | // base::synchronize(); 197 | } 198 | 199 | 200 | }; 201 | 202 | #endif // _POL_SEQ_HPP 203 | -------------------------------------------------------------------------------- /include/exec_utils.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _UTILS_HPP 17 | #define _UTILS_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #include "print.hpp" 22 | 23 | #include 24 | #include 25 | 26 | using IdxT = int; 27 | using LidxT = int; 28 | using DataT = double; 29 | 30 | 31 | namespace detail { 32 | 33 | // std::exchange 34 | // taken from https://en.cppreference.com/w/cpp/utility/exchange 35 | // license http://creativecommons.org/licenses/by-sa/3.0/ 36 | template < typename T, typename U = T > 37 | T exchange(T& obj, U&& new_value) 38 | { 39 | T old_value = std::move(obj); 40 | obj = std::forward(new_value); 41 | return old_value; 42 | } 43 | 44 | template < typename T, typename ... types > 45 | struct Count; 46 | 47 | template < typename T > 48 | struct Count { 49 | static const size_t value = 0; 50 | }; 51 | 52 | template < typename T, typename ... types > 53 | struct Count { 54 | static const size_t value = 1 + Count::value; 55 | }; 56 | 57 | template < typename T, typename T0, typename ... types > 58 | struct Count { 59 | static const size_t value = Count::value; 60 | }; 61 | 62 | struct indexer_kji { 63 | IdxT ijlen, ilen; 64 | indexer_kji(IdxT ijlen_, IdxT ilen_) : ijlen(ijlen_), ilen(ilen_) {} 65 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT k, IdxT j, IdxT i) const { return i + j * ilen + k * ijlen; } 66 | }; 67 | struct indexer_ji { 68 | IdxT ilen; 69 | indexer_ji(IdxT ilen_) : ilen(ilen_) {} 70 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT j, IdxT i) const { return i + j * ilen; } 71 | }; 72 | struct indexer_i { 73 | indexer_i() {} 74 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT i) const { return i; } 75 | }; 76 | 77 | struct indexer_offset_kji { 78 | IdxT ijlen, ilen; 79 | IdxT imin, jmin, kmin; 80 | indexer_offset_kji(IdxT kmin_, IdxT jmin_, IdxT imin_, IdxT ijlen_, IdxT ilen_) 81 | : ijlen(ijlen_), ilen(ilen_) 82 | , imin(imin_), jmin(jmin_), kmin(kmin_) {} 83 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT k, IdxT j, IdxT i) const { return (i+imin) + (j+jmin) * ilen + (k+kmin) * ijlen; } 84 | }; 85 | 86 | struct indexer_list_kji { 87 | LidxT const* indices; 88 | IdxT ijlen, ilen; 89 | indexer_list_kji(LidxT const* indices_, IdxT ijlen_, IdxT ilen_) : indices(indices_), ijlen(ijlen_), ilen(ilen_) {} 90 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT k, IdxT j, IdxT i) const { return indices[i + j * ilen + k * ijlen]; } 91 | }; 92 | struct indexer_list_ji { 93 | LidxT const* indices; 94 | IdxT ilen; 95 | indexer_list_ji(LidxT const* indices_, IdxT ilen_) : indices(indices_), ilen(ilen_) {} 96 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT j, IdxT i) const { return indices[i + j * ilen]; } 97 | }; 98 | struct indexer_list_i { 99 | LidxT const* indices; 100 | indexer_list_i(LidxT const* indices_) : indices(indices_) {} 101 | COMB_HOST COMB_DEVICE IdxT operator()(IdxT i) const { return indices[i]; } 102 | }; 103 | 104 | template < typename T_src, typename I_src, typename T_dst, typename I_dst > 105 | struct copy_idxr_idxr { 106 | T_src const* ptr_src; 107 | T_dst* ptr_dst; 108 | I_src idxr_src; 109 | I_dst idxr_dst; 110 | copy_idxr_idxr(T_src const* const& ptr_src_, I_src const& idxr_src_, T_dst* const& ptr_dst_, I_dst const& idxr_dst_) : ptr_src(ptr_src_), ptr_dst(ptr_dst_), idxr_src(idxr_src_), idxr_dst(idxr_dst_) {} 111 | template < typename ... Ts > 112 | COMB_HOST COMB_DEVICE void operator()(Ts... args) const 113 | { 114 | IdxT dst_i = idxr_dst(args...); 115 | IdxT src_i = idxr_src(args...); 116 | // LOGPRINTF("copy_idxr_idxr %p[%i]{%f} = %p[%i]{%f} (%i)\n", 117 | // ptr_dst, dst_i, (double)ptr_dst[dst_i], 118 | // ptr_src, src_i, (double)ptr_src[src_i], args...); 119 | ptr_dst[dst_i] = ptr_src[src_i]; 120 | } 121 | }; 122 | 123 | template < typename T_src, typename I_src, typename T_dst, typename I_dst > 124 | copy_idxr_idxr make_copy_idxr_idxr(T_src* const& ptr_src, I_src const& idxr_src, T_dst* const& ptr_dst, I_dst const& idxr_dst) { 125 | return copy_idxr_idxr(ptr_src, idxr_src, ptr_dst, idxr_dst); 126 | } 127 | 128 | template < typename I_src, typename T_dst, typename I_dst > 129 | struct set_idxr_idxr { 130 | T_dst* ptr_dst; 131 | I_src idxr_src; 132 | I_dst idxr_dst; 133 | set_idxr_idxr(I_src const& idxr_src_, T_dst* const& ptr_dst_, I_dst const& idxr_dst_) 134 | : ptr_dst(ptr_dst_) 135 | , idxr_src(idxr_src_) 136 | , idxr_dst(idxr_dst_) 137 | { } 138 | template < typename ... Ts > 139 | COMB_HOST COMB_DEVICE void operator()(Ts... args) const 140 | { 141 | IdxT dst_i = idxr_dst(args...); 142 | IdxT src_i = idxr_src(args...); 143 | // LOGPRINTF("set_idxr_idxr %p[%i]{%f} = %i (%i %i %i)\n", 144 | // ptr_dst, dst_i, (double)ptr_dst[dst_i], 145 | // src_i, args...); 146 | ptr_dst[dst_i] = src_i; 147 | } 148 | }; 149 | 150 | template < typename I_src, typename T_dst, typename I_dst > 151 | set_idxr_idxr make_set_idxr_idxr(I_src const& idxr_src, T_dst* const& ptr_dst, I_dst const& idxr_dst) { 152 | return set_idxr_idxr(idxr_src, ptr_dst, idxr_dst); 153 | } 154 | 155 | } // namespace detail 156 | 157 | #endif // _UTILS_HPP 158 | 159 | -------------------------------------------------------------------------------- /include/exec_utils_cuda.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _CUDA_UTILS_HPP 17 | #define _CUDA_UTILS_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #ifdef COMB_ENABLE_CUDA 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | // #include 30 | 31 | namespace COMB { 32 | 33 | #define cudaCheck(...) ::COMB::cudaCheckError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__) 34 | 35 | inline void cudaCheckError(const char* str, cudaError_t code, const char* file, int line) 36 | { 37 | if (code != cudaSuccess) { 38 | fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, cudaGetErrorName(code), cudaGetErrorString(code), file, line); fflush(stderr); 39 | assert(0); 40 | // MPI_Abort(MPI_COMM_WORLD, 1); 41 | } 42 | } 43 | 44 | #define cudaCheckReady(...) ::COMB::cudaCheckReadyError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__) 45 | 46 | inline bool cudaCheckReadyError(const char* str, cudaError_t code, const char* file, int line) 47 | { 48 | if (code == cudaSuccess) { 49 | return true; 50 | } else if (code != cudaErrorNotReady) { 51 | fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, cudaGetErrorName(code), cudaGetErrorString(code), file, line); fflush(stderr); 52 | assert(0); 53 | // MPI_Abort(MPI_COMM_WORLD, 1); 54 | } 55 | return false; 56 | } 57 | 58 | 59 | namespace detail { 60 | 61 | namespace cuda { 62 | 63 | inline int get_device_impl() { 64 | int d = -1; 65 | cudaCheck(cudaGetDevice(&d)); 66 | return d; 67 | } 68 | 69 | inline int get_device() { 70 | static int d = get_device_impl(); 71 | return d; 72 | } 73 | 74 | inline cudaDeviceProp get_properties_impl() { 75 | cudaDeviceProp p; 76 | cudaCheck(cudaGetDeviceProperties(&p, get_device())); 77 | return p; 78 | } 79 | 80 | inline cudaDeviceProp get_properties() { 81 | static cudaDeviceProp p = get_properties_impl(); 82 | return p; 83 | } 84 | 85 | inline int get_concurrent_managed_access() { 86 | static int accessible = 87 | #if defined(CUDART_VERSION) && CUDART_VERSION >= 8000 88 | get_properties().concurrentManagedAccess; 89 | #else 90 | false; 91 | #endif 92 | return accessible; 93 | } 94 | 95 | inline int get_host_accessible_from_device() { 96 | static int accessible = 97 | #if defined(CUDART_VERSION) && CUDART_VERSION >= 9000 98 | get_properties().pageableMemoryAccess; 99 | #else 100 | false; 101 | #endif 102 | return accessible; 103 | } 104 | 105 | inline int get_device_accessible_from_host() { 106 | static int accessible = 107 | false; 108 | return accessible; 109 | } 110 | 111 | inline int get_num_sm() { 112 | static int num_sm = get_properties().multiProcessorCount; 113 | return num_sm; 114 | } 115 | 116 | inline int get_arch() { 117 | static int cuda_arch = 100*get_properties().major + 10*get_properties().minor; 118 | return cuda_arch; 119 | } 120 | 121 | } // namespace cuda 122 | 123 | } // namespace detail 124 | 125 | } // namespace COMB 126 | 127 | #endif 128 | 129 | #endif // _CUDA_UTILS_HPP 130 | -------------------------------------------------------------------------------- /include/exec_utils_hip.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _HIP_UTILS_HPP 17 | #define _HIP_UTILS_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #ifdef COMB_ENABLE_HIP 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | // #include 29 | 30 | namespace COMB { 31 | 32 | #define hipCheck(...) ::COMB::hipCheckError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__) 33 | 34 | inline void hipCheckError(const char* str, hipError_t code, const char* file, int line) 35 | { 36 | if (code != hipSuccess) { 37 | fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, hipGetErrorName(code), hipGetErrorString(code), file, line); fflush(stderr); 38 | assert(0); 39 | // MPI_Abort(MPI_COMM_WORLD, 1); 40 | } 41 | } 42 | 43 | #define hipCheckReady(...) ::COMB::hipCheckReadyError(#__VA_ARGS__, __VA_ARGS__, __FILE__, __LINE__) 44 | 45 | inline bool hipCheckReadyError(const char* str, hipError_t code, const char* file, int line) 46 | { 47 | if (code == hipSuccess) { 48 | return true; 49 | } else if (code != hipErrorNotReady) { 50 | fprintf(stderr, "Error performing %s; %s %s %s:%i\n", str, hipGetErrorName(code), hipGetErrorString(code), file, line); fflush(stderr); 51 | assert(0); 52 | // MPI_Abort(MPI_COMM_WORLD, 1); 53 | } 54 | return false; 55 | } 56 | 57 | 58 | namespace detail { 59 | 60 | namespace hip { 61 | 62 | inline int get_device_impl() { 63 | int d = -1; 64 | hipCheck(hipGetDevice(&d)); 65 | return d; 66 | } 67 | 68 | inline int get_device() { 69 | static int d = get_device_impl(); 70 | return d; 71 | } 72 | 73 | inline hipDeviceProp_t get_properties_impl() { 74 | hipDeviceProp_t p; 75 | hipCheck(hipGetDeviceProperties(&p, get_device())); 76 | return p; 77 | } 78 | 79 | inline hipDeviceProp_t get_properties() { 80 | static hipDeviceProp_t p = get_properties_impl(); 81 | return p; 82 | } 83 | 84 | inline int get_concurrent_managed_access() { 85 | static int accessible = 86 | true; 87 | return accessible; 88 | } 89 | 90 | inline int get_host_accessible_from_device() { 91 | static int accessible = 92 | false; 93 | return accessible; 94 | } 95 | 96 | inline int get_device_accessible_from_host() { 97 | static int accessible = 98 | true; 99 | return accessible; 100 | } 101 | 102 | inline int get_num_cu() { 103 | static int num_cu = get_properties().multiProcessorCount; 104 | return num_cu; 105 | } 106 | 107 | inline int get_arch() { 108 | static int hip_arch = 100*get_properties().major + 10*get_properties().minor; 109 | return hip_arch; 110 | } 111 | 112 | } // namespace hip 113 | 114 | } // namespace detail 115 | 116 | } // namespace COMB 117 | 118 | #endif 119 | 120 | #endif // _HIP_UTILS_HPP 121 | -------------------------------------------------------------------------------- /include/mutex.hpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ****************************************************************************** 3 | * 4 | * \file 5 | * 6 | * \brief Header file providing functionality similar to std mutex header. 7 | * 8 | ****************************************************************************** 9 | */ 10 | 11 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// 12 | // Copyright (c) 2016-18, Lawrence Livermore National Security, LLC. 13 | // 14 | // Produced at the Lawrence Livermore National Laboratory 15 | // 16 | // LLNL-CODE-689114 17 | // 18 | // All rights reserved. 19 | // 20 | // This file is part of RAJA. 21 | // 22 | // For details about use and distribution, please read RAJA/LICENSE. 23 | // 24 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// 25 | 26 | #ifndef COMBRAJA_util_mutex_HPP 27 | #define COMBRAJA_util_mutex_HPP 28 | 29 | //#include "RAJA/config.hpp" 30 | #include "config.hpp" 31 | 32 | #if defined(COMB_ENABLE_OPENMP) 33 | #include 34 | #endif 35 | 36 | namespace COMBRAJA 37 | { 38 | 39 | #if defined(COMB_ENABLE_OPENMP) 40 | namespace omp 41 | { 42 | 43 | //! class wrapping omp_lock_t with std::mutex interface 44 | class mutex 45 | { 46 | public: 47 | using native_handle_type = omp_lock_t; 48 | 49 | mutex() { omp_init_lock(&m_lock); } 50 | 51 | mutex(const mutex&) = delete; 52 | mutex(mutex&&) = delete; 53 | mutex& operator=(const mutex&) = delete; 54 | mutex& operator=(mutex&&) = delete; 55 | 56 | void lock() { omp_set_lock(&m_lock); } 57 | 58 | bool try_lock() { return omp_test_lock(&m_lock) != 0; } 59 | 60 | void unlock() { omp_unset_lock(&m_lock); } 61 | 62 | native_handle_type& native_handle() { return m_lock; } 63 | 64 | ~mutex() { omp_destroy_lock(&m_lock); } 65 | 66 | private: 67 | native_handle_type m_lock; 68 | }; 69 | 70 | } // namespace omp 71 | #endif // closing endif for if defined(COMB_ENABLE_OPENMP) 72 | 73 | //! class providing functionality of std::lock_guard 74 | template 75 | class lock_guard 76 | { 77 | public: 78 | explicit lock_guard(mutex_type& m) : m_mutex(m) { m_mutex.lock(); } 79 | 80 | lock_guard(const lock_guard&) = delete; 81 | lock_guard(lock_guard&&) = delete; 82 | lock_guard& operator=(const lock_guard&) = delete; 83 | lock_guard& operator=(lock_guard&&) = delete; 84 | 85 | ~lock_guard() { m_mutex.unlock(); } 86 | 87 | private: 88 | mutex_type& m_mutex; 89 | }; 90 | 91 | } // namespace COMBRAJA 92 | 93 | #endif // closing endif for header file include guard 94 | -------------------------------------------------------------------------------- /include/print.hpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #ifndef _PRINT_HPP 17 | #define _PRINT_HPP 18 | 19 | #include "config.hpp" 20 | 21 | #include 22 | 23 | 24 | #define COMB_SERIALIZE_HELPER(a) #a 25 | #define COMB_SERIALIZE(a) COMB_SERIALIZE_HELPER(a) 26 | 27 | enum struct FileGroup 28 | { out_any // stdout, any proc 29 | , out_master // stdout, rank 0 only 30 | , err_any // stderr, any proc 31 | , err_master // stderr, rank 0 only 32 | , proc // per process file, any proc 33 | , summary // per run summary file, rank 0 only 34 | , summary_csv // per run summary csv file, rank 0 only 35 | , all // out_master, proc, summary, summary_csv 36 | }; 37 | 38 | extern int mpi_rank; 39 | extern FILE* comb_out_file; 40 | extern FILE* comb_err_file; 41 | extern FILE* comb_proc_file; 42 | extern FILE* comb_summary_file; 43 | 44 | extern void comb_setup_files(); 45 | extern void comb_teardown_files(); 46 | 47 | extern void fgprintf(FileGroup fg, const char* fmt, ...); 48 | extern void print_proc_memory_stats(); 49 | 50 | #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) 51 | #define FFLUSH(f) static_cast(0) 52 | #else 53 | #define FFLUSH(f) fflush(f) 54 | #endif 55 | 56 | #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) 57 | #define FGPRINTF(fg, ...) printf(__VA_ARGS__) 58 | #else 59 | #define FGPRINTF(fg, ...) fgprintf(fg, __VA_ARGS__) 60 | #endif 61 | 62 | #ifdef COMB_ENABLE_LOG 63 | #define LOGPRINTF(...) FGPRINTF(FileGroup::proc, __VA_ARGS__) 64 | #else 65 | #define LOGPRINTF(...) do { COMB::ignore_unused(__VA_ARGS__); } while(0) 66 | #endif 67 | 68 | #endif // _PRINT_HPP 69 | 70 | -------------------------------------------------------------------------------- /scripts/basic_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # enable basic execution test (disables all others) 72 | comb_args="${comb_args} -basic_only" 73 | 74 | # set up the base command to run a test 75 | # use sep_out.bash to separate each rank's output 76 | run_test_base="${run_mpi} ${run_comb}" 77 | 78 | # Run a test with this comm method 79 | echo "${run_test_base} ${comb_args}" 80 | ${run_test_base} ${comb_args} 81 | 82 | echo "done" 83 | -------------------------------------------------------------------------------- /scripts/focused_cuda_graphs_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # enable cuda graph execution tests 76 | comb_args="${comb_args} -exec enable cuda_graph" 77 | # disable host memory tests 78 | comb_args="${comb_args} -memory disable host" 79 | # enable cuda managed memory tests 80 | comb_args="${comb_args} -memory enable cuda_managed" 81 | # enable mock communication tests 82 | comb_args="${comb_args} -comm enable mock" 83 | # enable mpi communication tests 84 | comb_args="${comb_args} -comm enable mpi" 85 | 86 | # set up arguments for communication method 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 88 | 89 | # set up the base command to run a test 90 | # use sep_out.bash to separate each rank's output 91 | run_test_base="${run_mpi} ${run_comb}" 92 | 93 | # for each communication method 94 | for comm_method in "${wait_any_method}"; do 95 | 96 | # Run a test with this comm method 97 | echo "${run_test_base} ${comm_method} ${comb_args}" 98 | ${run_test_base} ${comm_method} ${comb_args} 99 | 100 | done 101 | 102 | echo "done" 103 | -------------------------------------------------------------------------------- /scripts/focused_gdsync_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # disable host memory tests 76 | comb_args="${comb_args} -memory disable host" 77 | # enable cuda managed memory tests 78 | comb_args="${comb_args} -memory enable cuda_managed" 79 | # enable mock communication tests 80 | comb_args="${comb_args} -comm enable mock" 81 | # enable mpi communication tests 82 | comb_args="${comb_args} -comm enable mpi" 83 | # enable gdsync communication tests 84 | comb_args="${comb_args} -comm enable gdsync" 85 | 86 | # set up arguments for communication method 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 88 | 89 | # set up the base command to run a test 90 | # use sep_out.bash to separate each rank's output 91 | run_test_base="${run_mpi} ${run_comb}" 92 | 93 | # for each communication method 94 | for comm_method in "${wait_any_method}"; do 95 | 96 | # Run a test with this comm method 97 | echo "${run_test_base} ${comm_method} ${comb_args}" 98 | ${run_test_base} ${comm_method} ${comb_args} 99 | 100 | done 101 | 102 | echo "done" 103 | -------------------------------------------------------------------------------- /scripts/focused_gpump_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # disable host memory tests 76 | comb_args="${comb_args} -memory disable host" 77 | # enable cuda managed memory tests 78 | comb_args="${comb_args} -memory enable cuda_managed" 79 | # enable mock communication tests 80 | comb_args="${comb_args} -comm enable mock" 81 | # enable mpi communication tests 82 | comb_args="${comb_args} -comm enable mpi" 83 | # enable gpump communication tests 84 | comb_args="${comb_args} -comm enable gpump" 85 | 86 | # set up arguments for communication method 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 88 | 89 | # set up the base command to run a test 90 | # use sep_out.bash to separate each rank's output 91 | run_test_base="${run_mpi} ${run_comb}" 92 | 93 | # for each communication method 94 | for comm_method in "${wait_any_method}"; do 95 | 96 | # Run a test with this comm method 97 | echo "${run_test_base} ${comm_method} ${comb_args}" 98 | ${run_test_base} ${comm_method} ${comb_args} 99 | 100 | done 101 | 102 | echo "done" 103 | -------------------------------------------------------------------------------- /scripts/focused_mp_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # disable host memory tests 76 | comb_args="${comb_args} -memory disable host" 77 | # enable cuda managed memory tests 78 | comb_args="${comb_args} -memory enable cuda_managed" 79 | # enable mock communication tests 80 | comb_args="${comb_args} -comm enable mock" 81 | # enable mpi communication tests 82 | comb_args="${comb_args} -comm enable mpi" 83 | # enable mp communication tests 84 | comb_args="${comb_args} -comm enable mp" 85 | 86 | # set up arguments for communication method 87 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 88 | 89 | # set up the base command to run a test 90 | # use sep_out.bash to separate each rank's output 91 | run_test_base="${run_mpi} ${run_comb}" 92 | 93 | # for each communication method 94 | for comm_method in "${wait_any_method}"; do 95 | 96 | # Run a test with this comm method 97 | echo "${run_test_base} ${comm_method} ${comb_args}" 98 | ${run_test_base} ${comm_method} ${comb_args} 99 | 100 | done 101 | 102 | echo "done" 103 | -------------------------------------------------------------------------------- /scripts/focused_mpi_type_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # enable cuda execution tests 76 | comb_args="${comb_args} -exec enable mpi_type" 77 | # disable host memory tests 78 | comb_args="${comb_args} -memory disable host" 79 | # enable cuda managed memory tests 80 | comb_args="${comb_args} -memory enable cuda_device" 81 | # enable cuda managed memory tests 82 | comb_args="${comb_args} -memory enable cuda_managed" 83 | # enable mock communication tests 84 | comb_args="${comb_args} -comm enable mock" 85 | # enable mpi communication tests 86 | comb_args="${comb_args} -comm enable mpi" 87 | 88 | # set up arguments for communication method 89 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any" 90 | 91 | # set up the base command to run a test 92 | # use sep_out.bash to separate each rank's output 93 | run_test_base="${run_mpi} ${run_comb}" 94 | 95 | # for each communication method 96 | for comm_method in "${wait_any_method}"; do 97 | 98 | # Run a test with this comm method 99 | echo "${run_test_base} ${comm_method} ${comb_args}" 100 | ${run_test_base} ${comm_method} ${comb_args} 101 | 102 | done 103 | 104 | echo "done" 105 | -------------------------------------------------------------------------------- /scripts/focused_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # disable host memory tests 76 | comb_args="${comb_args} -memory disable host" 77 | # enable cuda managed memory tests 78 | comb_args="${comb_args} -memory enable cuda_managed" 79 | # enable mock communication tests 80 | comb_args="${comb_args} -comm enable mock" 81 | # enable mpi communication tests 82 | comb_args="${comb_args} -comm enable mpi" 83 | 84 | # set up arguments for communication method 85 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any" 86 | 87 | # set up the base command to run a test 88 | # use sep_out.bash to separate each rank's output 89 | run_test_base="${run_mpi} ${run_comb}" 90 | 91 | # for each communication method 92 | for comm_method in "${wait_any_method}"; do 93 | 94 | # Run a test with this comm method 95 | echo "${run_test_base} ${comm_method} ${comb_args}" 96 | ${run_test_base} ${comm_method} ${comb_args} 97 | 98 | done 99 | 100 | echo "done" 101 | -------------------------------------------------------------------------------- /scripts/focused_umr_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # Choose a command to run mpi based on the system being used 8 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 9 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 10 | # Command used to run mpi on sierra systems 11 | run_mpi="lrun -N$nodes -p$procs" 12 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 13 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 14 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 15 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 16 | # Command used to run mpi on EA systems 17 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 18 | else 19 | # Command used to run mpi on slurm scheduled systems 20 | run_mpi="srun -N$nodes -n$procs" 21 | fi 22 | else 23 | # Command used to run mpi with mpirun 24 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 25 | # Note: you may need to use additional options to get reasonable mpi behavior 26 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 27 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 28 | run_mpi="mpirun -np $procs" 29 | 30 | # Command used to run mpi with mpiexec 31 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 32 | # run_mpi="mpiexec -n $procs" 33 | fi 34 | 35 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 36 | # Your scheduler may help with this 37 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 38 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 39 | # Ex: 40 | # bash: 41 | # mpirun -np 1 bind_script comb 42 | # bind_script: 43 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 44 | # exec $@ 45 | 46 | # Comb executable or symlink 47 | run_comb="$(pwd)/comb" 48 | 49 | # Choose arguments for comb 50 | # elements on one side of the cube for each process 51 | elems_per_procs_per_side=100 # 180 52 | # overall size of the grid 53 | let size=procs_per_side*elems_per_procs_per_side 54 | comb_args="${size}_${size}_${size}" 55 | # divide the grid into a number of procs per side 56 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 57 | # set the grid to be periodic in each dimension 58 | comb_args="${comb_args} -periodic 1_1_1" 59 | # set the halo width or number of ghost zones 60 | comb_args="${comb_args} -ghost 1_1_1" 61 | # set number of grid variables 62 | comb_args="${comb_args} -vars 3" 63 | # set number of communication cycles 64 | comb_args="${comb_args} -cycles 25" # 100 65 | # set cutoff between large and small message packing/unpacking kernels 66 | comb_args="${comb_args} -comm cutoff 250" 67 | # set the number of omp threads per process 68 | comb_args="${comb_args} -omp_threads 10" 69 | # enable tests passing cuda device or managed memory to mpi 70 | # comb_args="${comb_args} -cuda_aware_mpi" 71 | # disable seq execution tests 72 | comb_args="${comb_args} -exec disable seq" 73 | # enable cuda execution tests 74 | comb_args="${comb_args} -exec enable cuda" 75 | # disable host memory tests 76 | comb_args="${comb_args} -memory disable host" 77 | # enable cuda managed memory tests 78 | comb_args="${comb_args} -memory enable cuda_managed" 79 | # enable mock communication tests 80 | comb_args="${comb_args} -comm enable mock" 81 | # enable mpi communication tests 82 | comb_args="${comb_args} -comm enable mpi" 83 | # enable umr communication tests 84 | comb_args="${comb_args} -comm enable umr" 85 | 86 | # set up arguments for communication method 87 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any" 88 | 89 | # set up the base command to run a test 90 | # use sep_out.bash to separate each rank's output 91 | run_test_base="${run_mpi} ${run_comb}" 92 | 93 | # for each communication method 94 | for comm_method in "${wait_any_method}"; do 95 | 96 | # Run a test with this comm method 97 | echo "${run_test_base} ${comm_method} ${comb_args}" 98 | ${run_test_base} ${comm_method} ${comb_args} 99 | 100 | done 101 | 102 | echo "done" 103 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_clang.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " blueos_clang.sh 11.0.1" 22 | echo " -or - " 23 | echo " blueos_clang.sh ibm-10.0.1-gcc-8.3.1" 24 | exit 25 | fi 26 | 27 | COMP_VER=$1 28 | shift 1 29 | 30 | BUILD_SUFFIX=lc_blueos-clang-${COMP_VER} 31 | 32 | echo 33 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 34 | echo "Configuration extra arguments:" 35 | echo " $@" 36 | echo 37 | 38 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 39 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 40 | 41 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 42 | 43 | module load cmake/3.14.5 44 | 45 | cmake \ 46 | -DCMAKE_BUILD_TYPE=Release \ 47 | -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-${COMP_VER}/bin/mpiclang++ \ 48 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \ 49 | -C ../host-configs/lc-builds/blueos/clang_X.cmake \ 50 | -DENABLE_MPI=On \ 51 | -DENABLE_OPENMP=On \ 52 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 53 | "$@" \ 54 | .. 55 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_gcc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " blueos_gcc.sh 8.3.1" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | BUILD_SUFFIX=lc_blueos-gcc-${COMP_VER} 29 | 30 | echo 31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 32 | echo "Configuration extra arguments:" 33 | echo " $@" 34 | echo 35 | 36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 38 | 39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 40 | 41 | module load cmake/3.14.5 42 | 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-${COMP_VER}/bin/mpig++ \ 46 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/g++ \ 47 | -C ../host-configs/lc-builds/blueos/gcc_X.cmake \ 48 | -DENABLE_MPI=On \ 49 | -DENABLE_OPENMP=On \ 50 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 51 | "$@" \ 52 | .. 53 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_nvcc_clang.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [[ $# -lt 3 ]]; then 19 | echo 20 | echo "You must pass 3 arguments to the script (in this order): " 21 | echo " 1) compiler version number for nvcc" 22 | echo " 2) CUDA compute architecture" 23 | echo " 3) compiler version number for clang. " 24 | echo 25 | echo "For example: " 26 | echo " blueos_nvcc_clang.sh 10.2.89 sm_70 10.0.1" 27 | exit 28 | fi 29 | 30 | COMP_NVCC_VER=$1 31 | COMP_ARCH=$2 32 | COMP_CLANG_VER=$3 33 | shift 3 34 | 35 | BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-clang${COMP_CLANG_VER} 36 | 37 | echo 38 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 39 | echo "Configuration extra arguments:" 40 | echo " $@" 41 | echo 42 | 43 | rm -rf build_${BUILD_SUFFIX} >/dev/null 44 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 45 | 46 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 47 | 48 | module load cmake/3.14.5 49 | 50 | cmake \ 51 | -DCMAKE_BUILD_TYPE=Release \ 52 | -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-${COMP_CLANG_VER}/bin/mpiclang++ \ 53 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_CLANG_VER}/bin/clang++ \ 54 | -DBLT_CXX_STD=c++14 \ 55 | -C ../host-configs/lc-builds/blueos/nvcc_clang_X.cmake \ 56 | -DENABLE_MPI=On \ 57 | -DENABLE_OPENMP=On \ 58 | -DENABLE_CUDA=On \ 59 | -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ 60 | -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ 61 | -DCUDA_ARCH=${COMP_ARCH} \ 62 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 63 | "$@" \ 64 | .. 65 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_nvcc_gcc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [[ $# -lt 3 ]]; then 19 | echo 20 | echo "You must pass 3 arguments to the script (in this order): " 21 | echo " 1) compiler version number for nvcc" 22 | echo " 2) CUDA compute architecture" 23 | echo " 3) compiler version number for gcc. " 24 | echo 25 | echo "For example: " 26 | echo " blueos_nvcc_gcc.sh 10.2.89 sm_70 8.3.1" 27 | exit 28 | fi 29 | 30 | COMP_NVCC_VER=$1 31 | COMP_ARCH=$2 32 | COMP_GCC_VER=$3 33 | shift 3 34 | 35 | BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-gcc${COMP_GCC_VER} 36 | 37 | echo 38 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 39 | echo "Configuration extra arguments:" 40 | echo " $@" 41 | echo 42 | 43 | rm -rf build_${BUILD_SUFFIX} >/dev/null 44 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 45 | 46 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 47 | 48 | module load cmake/3.14.5 49 | 50 | cmake \ 51 | -DCMAKE_BUILD_TYPE=Release \ 52 | -DMPI_CXX_COMPILER=/opt/openmpi/4.0/gnu/bin/mpiCC \ 53 | -DMPI_C_COMPILER=/opt/openmpi/4.0/gnu/bin/mpicc \ 54 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_GCC_VER}/bin/g++ \ 55 | -DCMAKE_C_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_GCC_VER}/bin/gcc \ 56 | -DBLT_CXX_STD=c++14 \ 57 | -C ../host-configs/lc-builds/blueos/nvcc_gcc_X.cmake \ 58 | -DENABLE_MPI=On \ 59 | -DENABLE_OPENMP=On \ 60 | -DENABLE_CUDA=On \ 61 | -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ 62 | -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ 63 | -DCUDA_ARCH=${COMP_ARCH} \ 64 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 65 | "$@" \ 66 | .. 67 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_nvcc_xl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [[ $# -lt 3 ]]; then 19 | echo 20 | echo "You must pass 3 arguments to the script (in this order): " 21 | echo " 1) compiler version number for nvcc" 22 | echo " 2) CUDA compute architecture" 23 | echo " 3) compiler version number for xl. " 24 | echo 25 | echo "For example: " 26 | echo " blueos_nvcc_xl.sh 11.1.1 sm_70 2021.03.31" 27 | exit 28 | fi 29 | 30 | COMP_NVCC_VER=$1 31 | COMP_ARCH=$2 32 | COMP_XL_VER=$3 33 | shift 3 34 | 35 | BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-xl${COMP_XL_VER} 36 | 37 | echo 38 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 39 | echo "Configuration extra arguments:" 40 | echo " $@" 41 | echo 42 | 43 | rm -rf build_${BUILD_SUFFIX} >/dev/null 44 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 45 | 46 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 47 | 48 | module load cmake/3.14.5 49 | 50 | cmake \ 51 | -DCMAKE_BUILD_TYPE=Release \ 52 | -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-xl-${COMP_XL_VER}/bin/mpixlc++_r \ 53 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/xl/xl-${COMP_XL_VER}/bin/xlc++_r \ 54 | -DBLT_CXX_STD=c++14 \ 55 | -C ../host-configs/lc-builds/blueos/nvcc_xl_X.cmake \ 56 | -DENABLE_MPI=On \ 57 | -DENABLE_OPENMP=On \ 58 | -DENABLE_CUDA=On \ 59 | -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ 60 | -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ 61 | -DCUDA_ARCH=${COMP_ARCH} \ 62 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 63 | "$@" \ 64 | .. 65 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_pgi.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " blueos_pgi.sh 21.1" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | BUILD_SUFFIX=lc_blueos-pgi-${COMP_VER} 29 | 30 | echo 31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 32 | echo "Configuration extra arguments:" 33 | echo " $@" 34 | echo 35 | 36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 38 | 39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 40 | 41 | module load cmake/3.14.5 42 | 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-pgi-${COMP_VER}/bin/mpipgc++ \ 46 | -DMPI_C_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-pgi-${COMP_VER}/bin/mpipgcc \ 47 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgc++ \ 48 | -DCMAKE_C_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgcc \ 49 | -C ../host-configs/lc-builds/blueos/pgi_X.cmake \ 50 | -DENABLE_MPI=On \ 51 | -DENABLE_OPENMP=On \ 52 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 53 | "$@" \ 54 | .. 55 | -------------------------------------------------------------------------------- /scripts/lc-builds/blueos_xl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " blueos_xl.sh 2021.03.31" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | BUILD_SUFFIX=lc_blueos-xl-${COMP_VER} 29 | 30 | echo 31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 32 | echo "Configuration extra arguments:" 33 | echo " $@" 34 | echo 35 | 36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 38 | 39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 40 | 41 | module load cmake/3.14.5 42 | 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DMPI_CXX_COMPILER=/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-xl-${COMP_VER}/bin/mpixlc++_r \ 46 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/xl/xl-${COMP_VER}/bin/xlc++_r \ 47 | -DBLT_CXX_STD=c++14 \ 48 | -C ../host-configs/lc-builds/blueos/xl_X.cmake \ 49 | -DENABLE_MPI=On \ 50 | -DENABLE_OPENMP=On \ 51 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 52 | "$@" \ 53 | .. 54 | -------------------------------------------------------------------------------- /scripts/lc-builds/toss3_clang.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " toss3_clang.sh 10.0.1" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | BUILD_SUFFIX=lc_toss3-clang-${COMP_VER} 29 | 30 | echo 31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 32 | echo "Configuration extra arguments:" 33 | echo " $@" 34 | echo 35 | 36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 38 | 39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 40 | 41 | module load cmake/3.14.5 42 | 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-clang-${COMP_VER}/bin/mpic++ \ 46 | -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-clang-${COMP_VER}/bin/mpicc \ 47 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \ 48 | -DCMAKE_C_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang \ 49 | -C ../host-configs/lc-builds/toss3/clang_X.cmake \ 50 | -DENABLE_MPI=On \ 51 | -DENABLE_OPENMP=On \ 52 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 53 | "$@" \ 54 | .. 55 | -------------------------------------------------------------------------------- /scripts/lc-builds/toss3_gcc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " toss3_gcc.sh 8.3.1" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | BUILD_SUFFIX=lc_toss3-gcc-${COMP_VER} 29 | 30 | echo 31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 32 | echo "Configuration extra arguments:" 33 | echo " $@" 34 | echo 35 | 36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 38 | 39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 40 | 41 | module load cmake/3.14.5 42 | 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-gcc-${COMP_VER}/bin/mpic++ \ 46 | -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-gcc-${COMP_VER}/bin/mpicc \ 47 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/g++ \ 48 | -DCMAKE_C_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/gcc \ 49 | -C ../host-configs/lc-builds/toss3/gcc_X.cmake \ 50 | -DENABLE_MPI=On \ 51 | -DENABLE_OPENMP=On \ 52 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 53 | "$@" \ 54 | .. 55 | -------------------------------------------------------------------------------- /scripts/lc-builds/toss3_hipcc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [[ $# -lt 2 ]]; then 19 | echo 20 | echo "You must pass 2 or more arguments to the script (in this order): " 21 | echo " 1) compiler version number" 22 | echo " 2) HIP compute architecture" 23 | echo " 3...) optional arguments to cmake" 24 | echo 25 | echo "For example: " 26 | echo " toss3_hipcc.sh 4.1.0 gfx906" 27 | echo " toss3_hipcc.sh 4.1.0 gfx906 -DBLT_CXX_STD=c++11" 28 | exit 29 | fi 30 | 31 | COMP_VER=$1 32 | COMP_ARCH=$2 33 | shift 2 34 | 35 | HIP_CLANG_FLAGS="--offload-arch=${COMP_ARCH}" 36 | HOSTCONFIG="hip_X" 37 | 38 | if [[ ${COMP_VER} == 4.5.* ]] 39 | then 40 | HIP_CLANG_FLAGS="${HIP_CLANG_FLAGS} -mllvm -amdgpu-fixed-function-abi=1" 41 | HOSTCONFIG="hip_4_link_X" 42 | elif [[ ${COMP_VER} == 4.* ]] 43 | then 44 | HOSTCONFIG="hip_4_link_X" 45 | elif [[ ${COMP_VER} == 3.* ]] 46 | then 47 | HOSTCONFIG="hip_X" 48 | else 49 | echo "Unknown hip version, using ${HOSTCONFIG} host-config" 50 | fi 51 | 52 | BUILD_SUFFIX=lc_toss3-hipcc-${COMP_VER}-${COMP_ARCH} 53 | 54 | echo 55 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 56 | echo "Configuration extra arguments:" 57 | echo " $@" 58 | echo 59 | 60 | rm -rf build_${BUILD_SUFFIX} >/dev/null 61 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 62 | 63 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 64 | 65 | module load cmake/3.14.5 66 | 67 | # unload rocm to avoid configuration problems where the loaded rocm and COMP_VER 68 | # are inconsistent causing the rocprim from the module to be used unexpectedly 69 | module unload rocm 70 | 71 | 72 | cmake \ 73 | -DCMAKE_BUILD_TYPE=Release \ 74 | -DMPI_CXX_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-8.1.14-rocmcc-${COMP_VER}/bin/mpicxx \ 75 | -DMPI_C_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-8.1.14-rocmcc-${COMP_VER}/bin/mpicc \ 76 | -DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \ 77 | -DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \ 78 | -DHIP_CLANG_PATH=/opt/rocm-${COMP_VER}/llvm/bin \ 79 | -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/clang \ 80 | -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/clang++ \ 81 | -DHIP_HIPCC_FLAGS=--offload-arch=${COMP_ARCH} \ 82 | -C "../host-configs/lc-builds/toss3/${HOSTCONFIG}.cmake" \ 83 | -DENABLE_MPI=On \ 84 | -DENABLE_HIP=On \ 85 | -DENABLE_OPENMP=Off \ 86 | -DENABLE_CUDA=Off \ 87 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 88 | "$@" \ 89 | .. 90 | 91 | echo 92 | echo "***********************************************************************" 93 | echo 94 | echo "cd into directory build_${BUILD_SUFFIX} and run make to build Comb" 95 | echo 96 | echo " Please note that you have to have a consistent build environment" 97 | echo " when you make Comb as cmake may reconfigure; unload the rocm module" 98 | echo " or load the appropriate rocm module (${COMP_VER}) when building." 99 | echo 100 | echo " module unload rocm" 101 | echo " srun -n1 make" 102 | echo 103 | echo " Also note that libmodules.so is in the cce install. You may have to" 104 | echo " add that to your LD_LIBRARY_PATH to run." 105 | echo 106 | echo " export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cray/pe/cce/13.0.2/cce-clang/x86_64/lib:/opt/cray/pe/cce/13.0.2/cce/x86_64/lib" 107 | echo " srun -n1 ./bin/comb" 108 | echo 109 | echo "***********************************************************************" 110 | -------------------------------------------------------------------------------- /scripts/lc-builds/toss3_icpc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " toss3_icpc.sh 19.1.0" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | COMP_MAJOR_VER=${COMP_VER:0:2} 29 | GCC_HEADER_VER=7 30 | USE_TBB=On 31 | 32 | if [ ${COMP_MAJOR_VER} -gt 18 ] 33 | then 34 | GCC_HEADER_VER=8 35 | fi 36 | 37 | if [ ${COMP_MAJOR_VER} -lt 18 ] 38 | then 39 | USE_TBB=Off 40 | fi 41 | 42 | BUILD_SUFFIX=lc_toss3-icpc-${COMP_VER} 43 | 44 | echo 45 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 46 | echo "Configuration extra arguments:" 47 | echo " $@" 48 | echo 49 | 50 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 51 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 52 | 53 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 54 | 55 | module load cmake/3.14.5 56 | 57 | cmake \ 58 | -DCMAKE_BUILD_TYPE=Release \ 59 | -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-intel-${COMP_VER}/bin/mpic++ \ 60 | -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-intel-${COMP_VER}/bin/mpicc \ 61 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icpc \ 62 | -DCMAKE_C_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icc \ 63 | -DBLT_CXX_STD=c++14 \ 64 | -C ../host-configs/lc-builds/toss3/icpc_X_gcc${GCC_HEADER_VER}headers.cmake \ 65 | -DENABLE_MPI=On \ 66 | -DENABLE_OPENMP=On \ 67 | -DENABLE_TBB=${USE_TBB} \ 68 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 69 | "$@" \ 70 | .. 71 | -------------------------------------------------------------------------------- /scripts/lc-builds/toss3_pgi.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [ "$1" == "" ]; then 19 | echo 20 | echo "You must pass a compiler version number to script. For example," 21 | echo " toss3_pgi.sh 20.1" 22 | exit 23 | fi 24 | 25 | COMP_VER=$1 26 | shift 1 27 | 28 | BUILD_SUFFIX=lc_toss3-pgi-${COMP_VER} 29 | 30 | echo 31 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 32 | echo "Configuration extra arguments:" 33 | echo " $@" 34 | echo 35 | 36 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 37 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 38 | 39 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 40 | 41 | module load cmake/3.14.5 42 | 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DMPI_CXX_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-pgi-${COMP_VER}/bin/mpic++ \ 46 | -DMPI_C_COMPILER=/usr/tce/packages/mvapich2/mvapich2-2.3-pgi-${COMP_VER}/bin/mpicc \ 47 | -DCMAKE_CXX_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgc++ \ 48 | -DCMAKE_C_COMPILER=/usr/tce/packages/pgi/pgi-${COMP_VER}/bin/pgcc \ 49 | -C ../host-configs/lc-builds/toss3/pgi_X.cmake \ 50 | -DENABLE_MPI=On \ 51 | -DENABLE_OPENMP=On \ 52 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 53 | "$@" \ 54 | .. 55 | -------------------------------------------------------------------------------- /scripts/lc-builds/toss4_cray-mpich_amdclang.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | if [[ $# -lt 2 ]]; then 19 | echo 20 | echo "You must pass 2 or more arguments to the script (in this order): " 21 | echo " 1) cray-mpich compiler version number" 22 | echo " 2) compiler version number" 23 | echo " 3) HIP compute architecture" 24 | echo " 4...) optional arguments to cmake" 25 | echo 26 | echo "For example: " 27 | echo " toss4_amdclang.sh 8.1.15 5.1.0 gfx906" 28 | echo " toss4_amdclang.sh 8.1.15 5.1.0 gfx906 -DBLT_CXX_STD=c++17" 29 | exit 30 | fi 31 | 32 | MPI_VER=$1 33 | COMP_VER=$2 34 | COMP_ARCH=$3 35 | shift 3 36 | 37 | MY_HIP_ARCH_FLAGS="--offload-arch=${COMP_ARCH}" 38 | HOSTCONFIG="hip_X" 39 | 40 | if [[ ${COMP_VER} == 4.5.* ]] 41 | then 42 | ##HIP_CLANG_FLAGS="${MY_HIP_ARCH_FLAGS} -mllvm -amdgpu-fixed-function-abi=1" 43 | HOSTCONFIG="hip_4_link_X" 44 | elif [[ ${COMP_VER} == 4.* ]] 45 | then 46 | HOSTCONFIG="hip_4_link_X" 47 | elif [[ ${COMP_VER} == 3.* ]] 48 | then 49 | HOSTCONFIG="hip_X" 50 | else 51 | echo "Unknown hip version, using ${HOSTCONFIG} host-config" 52 | fi 53 | 54 | BUILD_SUFFIX=lc_toss4-cray-mpich-${MPI_VER}-amdclang-${COMP_VER}-${COMP_ARCH} 55 | 56 | echo 57 | echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" 58 | echo "Configuration extra arguments:" 59 | echo " $@" 60 | echo 61 | 62 | rm -rf build_${BUILD_SUFFIX} >/dev/null 63 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 64 | 65 | mkdir scripts && cd scripts && ln -s ../../scripts/*.bash . && ln -s ../bin/comb . && cd .. 66 | 67 | module load cmake/3.14.5 68 | 69 | # unload rocm to avoid configuration problems where the loaded rocm and COMP_VER 70 | # are inconsistent causing the rocprim from the module to be used unexpectedly 71 | module unload rocm 72 | 73 | 74 | cmake \ 75 | -DCMAKE_BUILD_TYPE=Release \ 76 | -DMPI_CXX_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpicxx \ 77 | -DMPI_C_COMPILER=/usr/tce/packages/cray-mpich-tce/cray-mpich-${MPI_VER}-rocmcc-${COMP_VER}/bin/mpicc \ 78 | -DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \ 79 | -DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \ 80 | -DHIP_CLANG_PATH=/opt/rocm-${COMP_VER}/llvm/bin \ 81 | -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang \ 82 | -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/llvm/bin/amdclang++ \ 83 | -DCMAKE_HIP_ARCHITECTURES="${MY_HIP_ARCH_FLAGS}" \ 84 | -C "../host-configs/lc-builds/toss4/${HOSTCONFIG}.cmake" \ 85 | -DENABLE_MPI=On \ 86 | -DENABLE_HIP=On \ 87 | -DENABLE_OPENMP=Off \ 88 | -DENABLE_CUDA=Off \ 89 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 90 | "$@" \ 91 | .. 92 | 93 | echo 94 | echo "***********************************************************************" 95 | echo 96 | echo "cd into directory build_${BUILD_SUFFIX} and run make to build Comb" 97 | echo 98 | echo " Please note that you have to have a consistent build environment" 99 | echo " when you make Comb as cmake may reconfigure; unload the rocm module" 100 | echo " or load the appropriate rocm module (${COMP_VER}) when building." 101 | echo 102 | echo " module unload rocm" 103 | echo " srun -n1 make" 104 | echo 105 | echo " Also note that libmodules.so is in the cce install. You may have to" 106 | echo " add that to your LD_LIBRARY_PATH to run." 107 | echo 108 | echo " export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/opt/cray/pe/cce/13.0.2/cce-clang/x86_64/lib:/opt/cray/pe/cce/13.0.2/cce/x86_64/lib:/opt/rocm-5.1.0/llvm/lib" 109 | echo " srun -n1 ./bin/comb" 110 | echo 111 | echo "***********************************************************************" 112 | -------------------------------------------------------------------------------- /scripts/mock_cuda_graphs_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script should only be used with comb built without mpi 4 | 5 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 6 | # Your scheduler may help with this 7 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 8 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 9 | # Ex: 10 | # bash: 11 | # mpirun -np 1 bind_script comb 12 | # bind_script: 13 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 14 | # exec $@ 15 | 16 | # Comb executable or symlink 17 | run_comb="$(pwd)/comb" 18 | 19 | if [ ! -x "${run_comb}" ]; then 20 | echo "comb executable not found at ${run_comb}" 21 | exit 1 22 | fi 23 | 24 | # Choose arguments for comb 25 | # elements on one side of the cube for each process 26 | elems_per_procs_per_side=100 # 50 100 200 27 | # overall size of the grid 28 | let size=elems_per_procs_per_side 29 | comb_args="${size}_${size}_${size}" 30 | # divide the grid into a number of procs per side 31 | comb_args="${comb_args} -divide 1_1_1" 32 | # set the grid to be periodic in each dimension 33 | comb_args="${comb_args} -periodic 1_1_1" 34 | # set the halo width or number of ghost zones 35 | comb_args="${comb_args} -ghost 1_1_1" 36 | # set number of grid variables 37 | comb_args="${comb_args} -vars 3" 38 | # set number of communication cycles 39 | comb_args="${comb_args} -cycles 25" # 100 40 | # set cutoff between large and small message packing/unpacking kernels 41 | comb_args="${comb_args} -comm cutoff 250" 42 | # set the number of omp threads per process 43 | comb_args="${comb_args} -omp_threads 1" 44 | # disable seq execution tests 45 | comb_args="${comb_args} -exec disable seq" 46 | # enable cuda execution tests 47 | comb_args="${comb_args} -exec enable cuda" 48 | # enable cuda graph execution tests 49 | comb_args="${comb_args} -exec enable cuda_graph" 50 | # disable host memory tests 51 | comb_args="${comb_args} -memory disable host" 52 | # enable cuda managed memory tests 53 | comb_args="${comb_args} -memory enable cuda_managed" 54 | # enable mock communication tests 55 | comb_args="${comb_args} -comm enable mock" 56 | # disable mpi communication tests 57 | comb_args="${comb_args} -comm disable mpi" 58 | # disable fusing packs per variable per message, pack each boundary separately even those in the same message 59 | comb_args="${comb_args} -comm disallow per_message_pack_fusing" 60 | # disable fusing packs per message group, pack each message separately 61 | # comb_args="${comb_args} -comm disallow message_group_pack_fusing" 62 | # use device preferred memory instead of host pinned memory for device utility allocations, used by fused kernels 63 | # comb_args="${comb_args} -use_device_preferred_for_cuda_util_aloc" 64 | 65 | 66 | # set up arguments for communication method 67 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 68 | 69 | # set up the base command to run a test 70 | # use sep_out.bash to separate each rank's output 71 | run_test_base="${run_comb}" 72 | 73 | # for each communication method 74 | for comm_method in "${wait_any_method}"; do 75 | 76 | # Run a test with this comm method 77 | echo "${run_test_base} ${comm_method} ${comb_args}" 78 | ${run_test_base} ${comm_method} ${comb_args} 79 | 80 | done 81 | 82 | echo "done" 83 | -------------------------------------------------------------------------------- /scripts/mock_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script should only be used with comb built without mpi 4 | 5 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 6 | # Your scheduler may help with this 7 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 8 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 9 | # Ex: 10 | # bash: 11 | # mpirun -np 1 bind_script comb 12 | # bind_script: 13 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 14 | # exec $@ 15 | 16 | # Comb executable or symlink 17 | run_comb="$(pwd)/comb" 18 | 19 | if [ ! -x "${run_comb}" ]; then 20 | echo "comb executable not found at ${run_comb}" 21 | exit 1 22 | fi 23 | 24 | # Choose arguments for comb 25 | # elements on one side of the cube for each process 26 | elems_per_procs_per_side=100 # 50 100 200 27 | # overall size of the grid 28 | let size=elems_per_procs_per_side 29 | comb_args="${size}_${size}_${size}" 30 | # divide the grid into a number of procs per side 31 | comb_args="${comb_args} -divide 1_1_1" 32 | # set the grid to be periodic in each dimension 33 | comb_args="${comb_args} -periodic 1_1_1" 34 | # set the halo width or number of ghost zones 35 | comb_args="${comb_args} -ghost 1_1_1" 36 | # set number of grid variables 37 | comb_args="${comb_args} -vars 3" 38 | # set number of communication cycles 39 | comb_args="${comb_args} -cycles 25" # 100 40 | # set cutoff between large and small message packing/unpacking kernels 41 | comb_args="${comb_args} -comm cutoff 250" 42 | # set the number of omp threads per process 43 | comb_args="${comb_args} -omp_threads 1" 44 | # disable seq execution tests 45 | comb_args="${comb_args} -exec enable seq" 46 | # disable host memory tests 47 | comb_args="${comb_args} -memory enable host" 48 | # enable mock communication tests 49 | comb_args="${comb_args} -comm enable mock" 50 | # disable mpi communication tests 51 | comb_args="${comb_args} -comm disable mpi" 52 | # disable fusing packs per variable per message 53 | comb_args="${comb_args} -comm disallow per_message_pack_fusing" 54 | 55 | 56 | # set up arguments for communication method 57 | wait_any_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 58 | 59 | # set up the base command to run a test 60 | # use sep_out.bash to separate each rank's output 61 | run_test_base="${run_comb}" 62 | 63 | # for each communication method 64 | for comm_method in "${wait_any_method}"; do 65 | 66 | # Run a test with this comm method 67 | echo "${run_test_base} ${comm_method} ${comb_args}" 68 | ${run_test_base} ${comm_method} ${comb_args} 69 | 70 | done 71 | 72 | echo "done" 73 | -------------------------------------------------------------------------------- /scripts/run_nvprof.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # runs nvprof on each rank separately creating a .nvprof output file 4 | # separates the output of each mpi rank into a different file 5 | 6 | ARGS="$@" 7 | ARGS_UNDERSCORE="$(sed s/\ /_/g <<<$ARGS)" 8 | ARGS_UNDERSCORE="$(sed s-/-@-g <<<$ARGS_UNDERSCORE)" 9 | ARGS_UNDERSCORE="$(echo $ARGS_UNDERSCORE | cut -c -192)" 10 | 11 | # find the environment variable with the mpi rank of this process 12 | if [[ ! "x" == "x$JSM_NAMESPACE_RANK" ]]; then 13 | RANK=${JSM_NAMESPACE_RANK} 14 | RANK_VAR="JSM_NAMESPACE_RANK" 15 | elif [[ ! "x" == "x$OMPI_COMM_WORLD_RANK" ]]; then 16 | RANK=${OMPI_COMM_WORLD_RANK} 17 | RANK_VAR="OMPI_COMM_WORLD_RANK" 18 | elif [[ ! "x" == "x$MPIRUN_RANK" ]]; then 19 | RANK=${MPIRUN_RANK} 20 | RANK_VAR="MPIRUN_RANK" 21 | else 22 | echo "Could not find mpirank" 1>&2 23 | exit 1 24 | fi 25 | 26 | # attempt to find the name of the node this mpi rank is running on 27 | if [[ ! "x" == "x$nodename" ]]; then 28 | NODE="$nodename" 29 | NODE_VAR="nodename" 30 | elif [[ ! "x" == "x$SLURMD_NODENAME" ]]; then 31 | NODE="$SLURMD_NODENAME" 32 | NODE_VAR="SLURMD_NODENAME" 33 | elif [[ ! "x" == "x$LCSCHEDCLUSTER" ]]; then 34 | NODE="$LCSCHEDCLUSTER" 35 | NODE_VAR="LCSCHEDCLUSTER" 36 | fi 37 | 38 | # create an identifier for this process using its rank 39 | PROC_NAME="${RANK}" 40 | PROC_NAME_VAR="%q{${RANK_VAR}}" 41 | 42 | # add the nodename to the process identifier if available 43 | if [[ ! "x" == "x$NODE_VAR" ]]; then 44 | PROC_NAME="${PROC_NAME}_${NODE}" 45 | PROC_NAME_VAR="${PROC_NAME_VAR}_%q{${NODE_VAR}}" 46 | fi 47 | 48 | # use args and rank to make file name 49 | OUT_FILE_NVPROF="runnvprof.${ARGS_UNDERSCORE}.${PROC_NAME_VAR}" 50 | OUT_FILE="runnvprof.${ARGS_UNDERSCORE}.${PROC_NAME}" 51 | if [ -f "$OUT_FILE" ]; then 52 | echo "File already exists $OUT_FILE" 1>&2 53 | exit 1 54 | fi 55 | 56 | # options to pass to nvprof 57 | NVPROF_OPTS="-o ${OUT_FILE_NVPROF}.nvprof" 58 | # NVPROF_OPTS="$NVPROF_OPTS --profile-from-start off" 59 | # NVPROF_OPTS="$NVPROF_OPTS -f" 60 | # NVPROF_OPTS="$NVPROF_OPTS --process-name \"MPI Rank ${PROC_NAME_VAR}\"" 61 | # NVPROF_OPTS="$NVPROF_OPTS --system-profiling on" 62 | # NVPROF_OPTS="$NVPROF_OPTS --demangling on" 63 | # NVPROF_OPTS="$NVPROF_OPTS --cpu-profiling off" 64 | # NVPROF_OPTS="$NVPROF_OPTS --unified-memory-profiling per-process-device" 65 | # NVPROF_OPTS="$NVPROF_OPTS --cpu-thread-tracing on" 66 | 67 | # find nvprof 68 | NVPROF="$(which nvprof)" 69 | if [ ! -f "$NVPROF" ]; then 70 | echo "Could not find $NVPROF" 1>&2 71 | exit 1 72 | fi 73 | NVPROF="$NVPROF $NVPROF_OPTS" 74 | 75 | # print the command to be executed for the 0th rank 76 | if [[ "x0" == "x$RANK" ]]; then 77 | echo "$NVPROF $ARGS &> ${OUT_FILE}" 78 | fi 79 | # execute nvprof and the executable and redirect its output to a file 80 | exec $NVPROF $ARGS &> "${OUT_FILE}" 81 | -------------------------------------------------------------------------------- /scripts/run_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | procs_per_node=-1 4 | procs_per_side="" 5 | test_script="" 6 | 7 | positional_arg=0 8 | 9 | ################################################################################ 10 | # 11 | # Usage: 12 | # run_tests.bash [args to run_tests.bash] procs_per_side test_script 13 | # 14 | # Parse any args for this script beginning with - and consume them using shift 15 | # leave the program to profile, if any, and its args 16 | # 17 | # Examples: 18 | # run_tests.bash 2 focused_tests.bash 19 | # # Launch focused_tests.bash with 2x2x2 procs with default procs per node 20 | # 21 | # run_rocprof -gui [optional rocprof profile file] 22 | # # run the rocprof gui (only available on x86 machines currently) 23 | # # and optionally view the given profile 24 | # 25 | ################################################################################ 26 | while [ "$#" -gt 0 ]; do 27 | 28 | if [[ "$1" =~ ^\-.* ]]; then 29 | 30 | if [[ "x$1" == "x-ppn" || "x$1" == "x--procs-per-node" ]]; then 31 | 32 | if [ "$#" -le 1 ]; then 33 | echo "missing argument to $1" 1>&2 34 | exit 1 35 | fi 36 | 37 | natural_re='^[0-9]+$' 38 | if ! [[ "$2" =~ $natural_re ]]; then 39 | echo "invalid arguments $1 $2: argument to $1 must be a number" 1>&2 40 | exit 1 41 | fi 42 | 43 | procs_per_node="$2" 44 | shift 45 | 46 | else 47 | 48 | echo "unknown arg $1" 1>&2 49 | exit 1 50 | 51 | fi 52 | 53 | else 54 | 55 | if [[ "x$positional_arg" == "x0" ]]; then 56 | 57 | procs_per_side="$1" 58 | 59 | elif [[ "x$positional_arg" == "x1" ]]; then 60 | 61 | test_script="$1" 62 | 63 | else 64 | 65 | echo "Found extra positional arg $1" 1>&2 66 | exit 1 67 | 68 | fi 69 | 70 | let positional_arg=positional_arg+1 71 | fi 72 | 73 | shift 74 | 75 | done 76 | 77 | if [[ "x" == "x$procs_per_side" ]]; then 78 | echo "First positional arg procs_per_side not given" 1>&2 79 | exit 1 80 | fi 81 | if [[ "x" == "x$test_script" ]]; then 82 | echo "Second positional arg test_script not given" 1>&2 83 | exit 1 84 | fi 85 | 86 | let procs=procs_per_side*procs_per_side*procs_per_side 87 | 88 | if [ ! -f "$test_script" ]; then 89 | echo "tests script $test_script not found" 90 | exit 1 91 | fi 92 | 93 | # Choose a command to get nodes 94 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 95 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 96 | # Command used to get nodes on sierra systems 97 | 98 | if [[ "x-1" == "x$procs_per_node" ]]; then 99 | procs_per_node=4 100 | fi 101 | let nodes=(procs+procs_per_node-1)/procs_per_node 102 | 103 | # get_nodes="bsub -nnodes ${nodes} -core_isolation 2 -W 240 -G guests -Is -XF" 104 | get_nodes="lalloc ${nodes} -W 240 --shared-launch" 105 | 106 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 107 | # Command used to get nodes on EA systems 108 | 109 | if [[ "x-1" == "x$procs_per_node" ]]; then 110 | procs_per_node=4 111 | fi 112 | let nodes=(procs+procs_per_node-1)/procs_per_node 113 | 114 | get_nodes="bsub -n ${procs} -R \"span[ptile=${procs_per_node}]\" -W 240 -G guests -Is -XF" 115 | 116 | elif [[ "x$SYS_TYPE" =~ xtoss_4_x86_64_ib_cray ]]; then 117 | # Command used to get nodes on ElCap EA systems 118 | 119 | if [[ "x-1" == "x$procs_per_node" ]]; then 120 | procs_per_node=1 121 | fi 122 | let nodes=(procs+procs_per_node-1)/procs_per_node 123 | 124 | get_nodes="salloc -N${nodes} -t 240 --exclusive" 125 | 126 | else 127 | # Command used to get nodes on slurm scheduled systems 128 | 129 | if [[ "x-1" == "x$procs_per_node" ]]; then 130 | procs_per_node=1 131 | fi 132 | let nodes=(procs+procs_per_node-1)/procs_per_node 133 | 134 | get_nodes="salloc -N${nodes} -t 240 --exclusive" 135 | 136 | fi 137 | else 138 | # Command used to get nodes on other systems 139 | if [[ "x-1" == "x$procs_per_node" ]]; then 140 | procs_per_node=1 141 | fi 142 | let nodes=(procs+procs_per_node-1)/procs_per_node 143 | 144 | # Don't know how to get nodes, defer to mpi in next script 145 | get_nodes="" 146 | 147 | fi 148 | 149 | run_tests="$test_script $nodes $procs $procs_per_side" 150 | 151 | full_test="${get_nodes} ${run_tests}" 152 | 153 | echo "${full_test}" 154 | time ${full_test} 155 | -------------------------------------------------------------------------------- /scripts/scale_tests.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nodes=$1 4 | procs=$2 5 | procs_per_side=$3 6 | 7 | # extra arguments to comb (always starts with a space or is empty) 8 | comb_xargs="" 9 | 10 | # Choose a command to run mpi based on the system being used 11 | if [[ ! "x" == "x$SYS_TYPE" ]]; then 12 | if [[ "x$SYS_TYPE" =~ xblueos.*_p9 ]]; then 13 | # Command used to run mpi on sierra systems 14 | run_mpi="lrun -N$nodes -p$procs" 15 | # add arguments to turn on cuda aware mpi (optionally disable gpu direct) 16 | # run_mpi="${run_mpi} --smpiargs \"-gpu\"" 17 | # run_mpi="${run_mpi} --smpiargs \"-gpu -disable_gdr\"" 18 | # comb_xargs="${comb_xargs} -cuda_aware_mpi" 19 | elif [[ "x$SYS_TYPE" =~ xblueos.* ]]; then 20 | # Command used to run mpi on EA systems 21 | run_mpi="mpirun -np $procs /usr/tcetmp/bin/mpibind" 22 | else 23 | # Command used to run mpi on slurm scheduled systems 24 | run_mpi="srun -N$nodes -n$procs" 25 | fi 26 | else 27 | # Command used to run mpi with mpirun 28 | # https://www.open-mpi.org/doc/v2.0/man1/mpirun.1.php 29 | # Note: you may need to use additional options to get reasonable mpi behavior 30 | # --host=hostname0,hostname1,... https://www.open-mpi.org/faq/?category=running#mpirun-hostfile 31 | # --hostfile my_hosts https://www.open-mpi.org/faq/?category=running#mpirun-host 32 | run_mpi="mpirun -np $procs" 33 | 34 | # Command used to run mpi with mpiexec 35 | # https://www.mpich.org/static/docs/v3.1/www1/mpiexec.html 36 | # run_mpi="mpiexec -n $procs" 37 | fi 38 | 39 | # Note: you may need to bind processes to cores to get reasonable openmp behavior 40 | # Your scheduler may help with this 41 | # Otherwise you may need to set environment variables for each proc to bind it to cores/threads 42 | # http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/ 43 | # Ex: 44 | # bash: 45 | # mpirun -np 1 bind_script comb 46 | # bind_script: 47 | # export OMP_PLACES={0,2} # this depends on the local rank of the process if running more than one process per node 48 | # exec $@ 49 | 50 | # Comb executable or symlink 51 | run_comb="$(pwd)/comb" 52 | 53 | # Choose arguments for comb 54 | # overall size of the grid 55 | let size=procs_per_side*100 56 | comb_args="${size}_${size}_${size}" 57 | # divide the grid into a number of procs per side 58 | comb_args="${comb_args} -divide ${procs_per_side}_${procs_per_side}_${procs_per_side}" 59 | # set the grid to be periodic in each dimension 60 | comb_args="${comb_args} -periodic 1_1_1" 61 | # set the halo width or number of ghost zones 62 | comb_args="${comb_args} -ghost 1_1_1" 63 | # set number of grid variables 64 | comb_args="${comb_args} -vars 3" 65 | # set number of communication cycles 66 | comb_args="${comb_args} -cycles 100" 67 | # set cutoff between large and small message packing/unpacking kernels 68 | comb_args="${comb_args} -comm cutoff 250" 69 | # set the number of omp threads per process 70 | comb_args="${comb_args} -omp_threads 10" 71 | # enable all execution tests 72 | comb_args="${comb_args} -exec enable all" 73 | # enable all memory tests 74 | comb_args="${comb_args} -memory enable all" 75 | # enable all communication tests 76 | comb_args="${comb_args} -comm enable all" 77 | # disable mpi_type execution tests (MPI Packing) 78 | # comb_args="${comb_args} -exec disable mpi_type" 79 | 80 | # add extra arguments for features enabled outside of the comb args block 81 | comb_args="${comb_args}${comb_xargs}" 82 | 83 | # set up arguments for a variety of communication methods 84 | wait_all_method="-comm post_recv wait_all -comm post_send wait_all -comm wait_recv wait_all -comm wait_send wait_all" 85 | wait_some_method="-comm post_recv wait_some -comm post_send wait_some -comm wait_recv wait_some -comm wait_send wait_some" 86 | wait_any_method="-comm post_recv wait_any -comm post_send wait_any -comm wait_recv wait_any -comm wait_send wait_any" 87 | 88 | test_all_method="-comm post_recv wait_all -comm post_send test_all -comm wait_recv wait_all -comm wait_send wait_all" 89 | test_some_method="-comm post_recv wait_some -comm post_send test_some -comm wait_recv wait_some -comm wait_send wait_some" 90 | test_any_method="-comm post_recv wait_any -comm post_send test_any -comm wait_recv wait_any -comm wait_send wait_any" 91 | 92 | # set up the base command to run a test 93 | # use sep_out.bash to separate each rank's output 94 | run_test_base="${run_mpi} ${run_comb}" 95 | 96 | # for each communication method 97 | for comm_method in "${wait_all_method}" "${wait_some_method}" "${wait_any_method}" "${test_all_method}" "${test_some_method}" "${test_any_method}"; do 98 | 99 | # Run a test with this comm method 100 | echo "${run_test_base} ${comm_method} ${comb_args}" 101 | ${run_test_base} ${comm_method} ${comb_args} 102 | 103 | done 104 | 105 | echo "done" 106 | -------------------------------------------------------------------------------- /scripts/sep_out.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # separates the output of each mpi rank into a different file 4 | 5 | ARGS="$@" 6 | ARGS_UNDERSCORE="$(sed s/\ /_/g <<<$ARGS)" 7 | ARGS_UNDERSCORE="$(sed s-/-@-g <<<$ARGS_UNDERSCORE)" 8 | ARGS_UNDERSCORE="$(echo $ARGS_UNDERSCORE | cut -c -192)" 9 | 10 | # attempt to find the environment variable with the mpi rank of this process 11 | if [[ ! "x" == "x$JSM_NAMESPACE_RANK" ]]; then 12 | RANK=${JSM_NAMESPACE_RANK} 13 | elif [[ ! "x" == "x$SLURM_PROCID" ]]; then 14 | RANK=${SLURM_PROCID} 15 | elif [[ ! "x" == "x$OMPI_COMM_WORLD_RANK" ]]; then 16 | RANK=${OMPI_COMM_WORLD_RANK} 17 | elif [[ ! "x" == "x$MPIRUN_RANK" ]]; then 18 | RANK=${MPIRUN_RANK} 19 | else 20 | echo "sep_out.bash Could not find mpirank" 1>&2 21 | exit 1 22 | fi 23 | 24 | # use args and rank to make file name 25 | OUT_FILE="sepout.${ARGS_UNDERSCORE}.${RANK}" 26 | if [ -f "$OUT_FILE" ]; then 27 | echo "File already exists $OUT_FILE" 1>&2 28 | exit 1 29 | fi 30 | 31 | # print the command to be executed for the 0th rank 32 | if [[ "x0" == "x$RANK" ]]; then 33 | echo "$ARGS &> $OUT_FILE" 34 | fi 35 | # execute the executable and redirect its output to a file 36 | exec $ARGS &> $OUT_FILE 37 | -------------------------------------------------------------------------------- /scripts/ubuntu-builds/ubuntu_clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | BUILD_SUFFIX=ubuntu-clang 19 | 20 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 22 | 23 | # module load cmake/3.14.5 24 | 25 | cmake \ 26 | -DCMAKE_BUILD_TYPE=Release \ 27 | -DCMAKE_C_COMPILER=/usr/bin/clang \ 28 | -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \ 29 | -C ../host-configs/ubuntu-builds/clang_X.cmake \ 30 | -DENABLE_OPENMP=ON \ 31 | -DENABLE_CUDA=OFF \ 32 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 33 | "$@" \ 34 | .. 35 | -------------------------------------------------------------------------------- /scripts/ubuntu-builds/ubuntu_gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | BUILD_SUFFIX=ubuntu-gcc 19 | 20 | rm -rf build_${BUILD_SUFFIX} 2>/dev/null 21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 22 | 23 | # module load cmake/3.14.5 24 | 25 | cmake \ 26 | -DCMAKE_BUILD_TYPE=Release \ 27 | -DCMAKE_C_COMPILER=/usr/bin/gcc \ 28 | -DCMAKE_CXX_COMPILER=/usr/bin/g++ \ 29 | -C ../host-configs/ubuntu-builds/gcc_X.cmake \ 30 | -DENABLE_OPENMP=ON \ 31 | -DENABLE_CUDA=OFF \ 32 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 33 | "$@" \ 34 | .. 35 | -------------------------------------------------------------------------------- /scripts/ubuntu-builds/ubuntu_hipcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | BUILD_SUFFIX=ubuntu-hipcc 19 | 20 | rm -rf build_${BUILD_SUFFIX} >/dev/null 21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 22 | 23 | cmake \ 24 | -DCMAKE_BUILD_TYPE=Debug \ 25 | -DCMAKE_C_COMPILER=/usr/bin/gcc \ 26 | -DCMAKE_CXX_COMPILER=/usr/bin/g++ \ 27 | -C ../host-configs/ubuntu-builds/hip.cmake \ 28 | -DENABLE_OPENMP=OFF \ 29 | -DENABLE_CUDA=OFF \ 30 | -DENABLE_HIP=ON \ 31 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 32 | "$@" \ 33 | .. 34 | -------------------------------------------------------------------------------- /scripts/ubuntu-builds/ubuntu_nvcc10_gcc8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############################################################################## 4 | ## Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 5 | ## 6 | ## Produced at the Lawrence Livermore National Laboratory 7 | ## 8 | ## LLNL-CODE-758885 9 | ## 10 | ## All rights reserved. 11 | ## 12 | ## This file is part of Comb. 13 | ## 14 | ## For details, see https://github.com/LLNL/Comb 15 | ## Please also see the LICENSE file for MIT license. 16 | ############################################################################## 17 | 18 | BUILD_SUFFIX=ubuntu-nvcc10-gcc8 19 | 20 | rm -rf build_${BUILD_SUFFIX} >/dev/null 21 | mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} 22 | 23 | cmake \ 24 | -DCMAKE_BUILD_TYPE=Release \ 25 | -DCMAKE_C_COMPILER=/usr/bin/gcc-8 \ 26 | -DCMAKE_CXX_COMPILER=/usr/bin/g++-8 \ 27 | -C ../host-configs/ubuntu-builds/nvcc_gcc_X.cmake \ 28 | -DENABLE_OPENMP=ON \ 29 | -DENABLE_CUDA=ON \ 30 | -DCMAKE_CUDA_COMPILER=/usr/bin/nvcc \ 31 | -DCUDA_ARCH=sm_70 \ 32 | -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ 33 | "$@" \ 34 | .. 35 | -------------------------------------------------------------------------------- /src/do_cycles.cpp.in: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | #include "comm_pol_@COMPOL@.hpp" 18 | #include "do_cycles.hpp" 19 | 20 | namespace COMB { 21 | 22 | // instantiate function templates required by do_cycles_allocators.hpp 23 | template void do_cycles<@COMPOL@_pol, 24 | ExecContext<@EXECMESH@_pol>, 25 | ExecContext<@EXECMANY@_pol>, 26 | ExecContext<@EXECFEW@_pol>>( 27 | CommContext<@COMPOL@_pol>& con_comm_in, 28 | CommInfo& comm_info, MeshInfo& info, 29 | IdxT num_vars, IdxT ncycles, 30 | ContextHolder>& con_mesh_in, AllocatorInfo& aloc_mesh_in, 31 | ContextHolder>& con_many_in, AllocatorInfo& aloc_many_in, 32 | ContextHolder>& con_few_in, AllocatorInfo& aloc_few_in, 33 | Timer& tm, Timer& tm_total); 34 | 35 | } // namespace COMB 36 | -------------------------------------------------------------------------------- /src/print_timer.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "config.hpp" 17 | 18 | #include "comb.hpp" 19 | 20 | #include "CommFactory.hpp" 21 | 22 | #include 23 | 24 | namespace COMB { 25 | 26 | namespace detail { 27 | 28 | void print_timer(CommInfo& comminfo, Timer& tm, const char* prefix, int max_name_len, bool print_header_row) { 29 | 30 | auto res = tm.getStats(); 31 | 32 | double* sums = new double[res.size()]; 33 | double* mins = new double[res.size()]; 34 | double* maxs = new double[res.size()]; 35 | long * nums = new long [res.size()]; 36 | 37 | for (int i = 0; i < (int)res.size(); ++i) { 38 | sums[i] = res[i].sum; 39 | mins[i] = res[i].min; 40 | maxs[i] = res[i].max; 41 | nums[i] = res[i].num; 42 | } 43 | 44 | double* final_sums = nullptr; 45 | double* final_mins = nullptr; 46 | double* final_maxs = nullptr; 47 | long * final_nums = nullptr; 48 | if (comminfo.rank == 0) { 49 | final_sums = new double[res.size()]; 50 | final_mins = new double[res.size()]; 51 | final_maxs = new double[res.size()]; 52 | final_nums = new long [res.size()]; 53 | } 54 | 55 | #ifdef COMB_ENABLE_MPI 56 | MPI_Reduce(sums, final_sums, res.size(), MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 57 | MPI_Reduce(mins, final_mins, res.size(), MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); 58 | MPI_Reduce(maxs, final_maxs, res.size(), MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); 59 | MPI_Reduce(nums, final_nums, res.size(), MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); 60 | #else 61 | if (comminfo.rank == 0) { 62 | for (int i = 0; i < (int)res.size(); ++i) { 63 | final_sums[i] = sums[i]; 64 | final_mins[i] = mins[i]; 65 | final_maxs[i] = maxs[i]; 66 | final_nums[i] = nums[i]; 67 | } 68 | } 69 | #endif 70 | 71 | if (comminfo.rank == 0) { 72 | 73 | for (int i = 0; i < (int)res.size(); ++i) { 74 | int padding = max_name_len - res[i].name.size(); 75 | fgprintf(FileGroup::summary, "%s%s:%*s num %ld avg %.9f s min %.9f s max %.9f s\n", 76 | prefix, res[i].name.c_str(), padding, "", final_nums[i], final_sums[i]/final_nums[i], final_mins[i], final_maxs[i]); 77 | } 78 | 79 | if (!res.empty() && print_header_row) { 80 | int padding = max_name_len - 0; 81 | fgprintf(FileGroup::summary_csv, "%s%s%*s, %9s, %11s, %11s, %11s\n", 82 | prefix, "", padding, "", "number", "average(s)", "min(s)", "max(s)"); 83 | } 84 | for (int i = 0; i < (int)res.size(); ++i) { 85 | int padding = max_name_len - res[i].name.size(); 86 | fgprintf(FileGroup::summary_csv, "%s%s%*s, %9ld, %.9f, %.9f, %.9f\n", 87 | prefix, res[i].name.c_str(), padding, "", final_nums[i], final_sums[i]/final_nums[i], final_mins[i], final_maxs[i]); 88 | } 89 | 90 | delete[] final_sums; 91 | delete[] final_mins; 92 | delete[] final_maxs; 93 | delete[] final_nums; 94 | } 95 | 96 | for (int i = 0; i < (int)res.size(); ++i) { 97 | int padding = max_name_len - res[i].name.size(); 98 | fgprintf(FileGroup::proc, "%s%s:%*s num %ld avg %.9f s min %.9f s max %.9f s\n", 99 | prefix, res[i].name.c_str(), padding, "", nums[i], sums[i]/nums[i], mins[i], maxs[i]); 100 | } 101 | 102 | delete[] sums; 103 | delete[] mins; 104 | delete[] maxs; 105 | delete[] nums; 106 | } 107 | 108 | int get_max_name_len(Timer& tm) 109 | { 110 | int max_name_len = 0; 111 | 112 | auto res = tm.getStats(); 113 | for (auto& stat : res) { 114 | max_name_len = std::max(max_name_len, (int)stat.name.size()); 115 | } 116 | 117 | return max_name_len; 118 | } 119 | 120 | } // namespace detail 121 | 122 | void print_timer(CommInfo& comminfo, Timer& tm, const char* prefix) 123 | { 124 | int max_name_len = detail::get_max_name_len(tm); 125 | constexpr bool print_header_row = true; 126 | 127 | detail::print_timer(comminfo, tm, prefix, max_name_len, print_header_row); 128 | } 129 | 130 | void print_timers(CommInfo& comminfo, Timer& tm0, Timer& tm1, const char* prefix) 131 | { 132 | int max_name_len = detail::get_max_name_len(tm0); 133 | max_name_len = std::max(detail::get_max_name_len(tm1), max_name_len); 134 | constexpr bool print_header_row = true; 135 | 136 | detail::print_timer(comminfo, tm0, prefix, max_name_len, print_header_row); 137 | detail::print_timer(comminfo, tm1, prefix, max_name_len, !print_header_row); 138 | } 139 | 140 | void print_message_info(CommInfo& comminfo, MeshInfo& info, 141 | COMB::Allocator& aloc_unused, 142 | IdxT num_vars, 143 | bool print_packing_sizes, 144 | bool print_message_sizes) 145 | { 146 | if (!(print_packing_sizes || print_message_sizes)) { 147 | return; 148 | } 149 | 150 | const char* prefix = ""; 151 | 152 | if (print_packing_sizes) { 153 | fgprintf(FileGroup::all, "%sprint message and packing sizes to proc file(s)\n", 154 | prefix); 155 | } else if (print_message_sizes) { 156 | fgprintf(FileGroup::all, "%sprint message sizes to proc file(s)\n", 157 | prefix); 158 | } 159 | 160 | Range r0("print_message_info", Range::green); 161 | 162 | std::vector vars; 163 | vars.reserve(num_vars); 164 | 165 | { 166 | CommFactory factory(comminfo); 167 | 168 | for (IdxT i = 0; i < num_vars; ++i) { 169 | 170 | vars.push_back(MeshData(info, aloc_unused)); 171 | 172 | factory.add_var(vars[i]); 173 | } 174 | 175 | factory.print_message_info(print_packing_sizes, print_message_sizes); 176 | } 177 | 178 | } 179 | 180 | } // namespace COMB 181 | -------------------------------------------------------------------------------- /src/test_cycles_gdsync.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #ifdef COMB_ENABLE_GDSYNC 19 | 20 | #include "comm_pol_gdsync.hpp" 21 | #include "do_cycles_allocators.hpp" 22 | 23 | namespace COMB { 24 | 25 | void test_cycles_gdsync(CommInfo& comminfo, MeshInfo& info, 26 | COMB::Executors& exec, 27 | COMB::Allocators& alloc, 28 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 29 | { 30 | CommContext con_comm{exec.base_cuda.get()}; 31 | 32 | #ifdef COMB_ENABLE_CUDA 33 | AllocatorInfo& cpu_many_aloc = alloc.cuda_device; 34 | AllocatorInfo& cpu_few_aloc = alloc.cuda_device; 35 | 36 | AllocatorInfo& gpu_many_aloc = alloc.cuda_device; 37 | AllocatorInfo& gpu_few_aloc = alloc.cuda_device; 38 | #else 39 | AllocatorInfo& cpu_many_aloc = alloc.invalid; 40 | AllocatorInfo& cpu_few_aloc = alloc.invalid; 41 | 42 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 43 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 44 | #endif 45 | 46 | do_cycles_allocators(con_comm, 47 | comminfo, info, 48 | exec, 49 | alloc, 50 | cpu_many_aloc, cpu_few_aloc, 51 | gpu_many_aloc, gpu_few_aloc, 52 | num_vars, ncycles, tm, tm_total); 53 | 54 | } 55 | 56 | } // namespace COMB 57 | 58 | #endif // COMB_ENABLE_GDSYNC 59 | -------------------------------------------------------------------------------- /src/test_cycles_gpump.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #ifdef COMB_ENABLE_GPUMP 19 | 20 | #include "comm_pol_gpump.hpp" 21 | #include "do_cycles_allocators.hpp" 22 | 23 | namespace COMB { 24 | 25 | void test_cycles_gpump(CommInfo& comminfo, MeshInfo& info, 26 | COMB::Executors& exec, 27 | COMB::Allocators& alloc, 28 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 29 | { 30 | CommContext con_comm{exec.base_cuda.get()}; 31 | 32 | #ifdef COMB_ENABLE_CUDA 33 | AllocatorInfo& cpu_many_aloc = alloc.cuda_device; 34 | AllocatorInfo& cpu_few_aloc = alloc.cuda_device; 35 | 36 | AllocatorInfo& gpu_many_aloc = alloc.cuda_device; 37 | AllocatorInfo& gpu_few_aloc = alloc.cuda_device; 38 | #else 39 | AllocatorInfo& cpu_many_aloc = alloc.invalid; 40 | AllocatorInfo& cpu_few_aloc = alloc.invalid; 41 | 42 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 43 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 44 | #endif 45 | 46 | do_cycles_allocators(con_comm, 47 | comminfo, info, 48 | exec, 49 | alloc, 50 | cpu_many_aloc, cpu_few_aloc, 51 | gpu_many_aloc, gpu_few_aloc, 52 | num_vars, ncycles, tm, tm_total); 53 | 54 | } 55 | 56 | } // namespace COMB 57 | 58 | #endif // COMB_ENABLE_GPUMP 59 | -------------------------------------------------------------------------------- /src/test_cycles_mock.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #include "comm_pol_mock.hpp" 19 | #include "do_cycles_allocators.hpp" 20 | 21 | namespace COMB { 22 | 23 | void test_cycles_mock(CommInfo& comminfo, MeshInfo& info, 24 | COMB::Executors& exec, 25 | COMB::Allocators& alloc, 26 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 27 | { 28 | #ifdef COMB_ENABLE_MPI 29 | CommContext con_comm{exec.base_mpi.get()}; 30 | #else 31 | CommContext con_comm{exec.base_cpu.get()}; 32 | #endif 33 | 34 | { 35 | // mock host memory tests 36 | AllocatorInfo& cpu_many_aloc = alloc.host; 37 | AllocatorInfo& cpu_few_aloc = alloc.host; 38 | 39 | #if defined(COMB_ENABLE_CUDA) 40 | AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned; 41 | AllocatorInfo& gpu_few_aloc = alloc.cuda_hostpinned; 42 | #elif defined(COMB_ENABLE_HIP) 43 | AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned; 44 | AllocatorInfo& gpu_few_aloc = alloc.hip_hostpinned; 45 | #else 46 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 47 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 48 | #endif 49 | 50 | do_cycles_allocators(con_comm, 51 | comminfo, info, 52 | exec, 53 | alloc, 54 | cpu_many_aloc, cpu_few_aloc, 55 | gpu_many_aloc, gpu_few_aloc, 56 | num_vars, ncycles, tm, tm_total); 57 | } 58 | 59 | #ifdef COMB_ENABLE_CUDA 60 | { 61 | // mock cuda memory tests 62 | AllocatorInfo& cpu_many_aloc = alloc.cuda_device; 63 | AllocatorInfo& cpu_few_aloc = alloc.cuda_device; 64 | 65 | AllocatorInfo& gpu_many_aloc = alloc.cuda_device; 66 | AllocatorInfo& gpu_few_aloc = alloc.cuda_device; 67 | 68 | do_cycles_allocators(con_comm, 69 | comminfo, info, 70 | exec, 71 | alloc, 72 | cpu_many_aloc, cpu_few_aloc, 73 | gpu_many_aloc, gpu_few_aloc, 74 | num_vars, ncycles, tm, tm_total); 75 | } 76 | #endif 77 | 78 | #ifdef COMB_ENABLE_HIP 79 | { 80 | // mock hip memory tests 81 | AllocatorInfo& cpu_many_aloc = alloc.hip_device; 82 | AllocatorInfo& cpu_few_aloc = alloc.hip_device; 83 | 84 | AllocatorInfo& gpu_many_aloc = alloc.hip_device; 85 | AllocatorInfo& gpu_few_aloc = alloc.hip_device; 86 | 87 | do_cycles_allocators(con_comm, 88 | comminfo, info, 89 | exec, 90 | alloc, 91 | cpu_many_aloc, cpu_few_aloc, 92 | gpu_many_aloc, gpu_few_aloc, 93 | num_vars, ncycles, tm, tm_total); 94 | } 95 | #endif 96 | 97 | } 98 | 99 | } // namespace COMB 100 | -------------------------------------------------------------------------------- /src/test_cycles_mp.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #ifdef COMB_ENABLE_MP 19 | 20 | #include "comm_pol_mp.hpp" 21 | #include "do_cycles_allocators.hpp" 22 | 23 | namespace COMB { 24 | 25 | void test_cycles_mp(CommInfo& comminfo, MeshInfo& info, 26 | COMB::Executors& exec, 27 | COMB::Allocators& alloc, 28 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 29 | { 30 | CommContext con_comm{exec.base_cuda.get()}; 31 | 32 | #ifdef COMB_ENABLE_CUDA 33 | AllocatorInfo& cpu_many_aloc = alloc.cuda_device; 34 | AllocatorInfo& cpu_few_aloc = alloc.cuda_device; 35 | 36 | AllocatorInfo& gpu_many_aloc = alloc.cuda_device; 37 | AllocatorInfo& gpu_few_aloc = alloc.cuda_device; 38 | #else 39 | AllocatorInfo& cpu_many_aloc = alloc.invalid; 40 | AllocatorInfo& cpu_few_aloc = alloc.invalid; 41 | 42 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 43 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 44 | #endif 45 | 46 | do_cycles_allocators(con_comm, 47 | comminfo, info, 48 | exec, 49 | alloc, 50 | cpu_many_aloc, cpu_few_aloc, 51 | gpu_many_aloc, gpu_few_aloc, 52 | num_vars, ncycles, tm, tm_total); 53 | 54 | } 55 | 56 | } // namespace COMB 57 | 58 | #endif // COMB_ENABLE_MP 59 | -------------------------------------------------------------------------------- /src/test_cycles_mpi.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #ifdef COMB_ENABLE_MPI 19 | 20 | #include "comm_pol_mpi.hpp" 21 | #include "do_cycles_allocators.hpp" 22 | 23 | namespace COMB { 24 | 25 | void test_cycles_mpi(CommInfo& comminfo, MeshInfo& info, 26 | COMB::Executors& exec, 27 | COMB::Allocators& alloc, 28 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 29 | { 30 | CommContext con_comm{exec.base_mpi.get()}; 31 | 32 | { 33 | // mpi host memory tests 34 | AllocatorInfo& cpu_many_aloc = alloc.host; 35 | AllocatorInfo& cpu_few_aloc = alloc.host; 36 | 37 | #if defined(COMB_ENABLE_CUDA) 38 | AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned; 39 | AllocatorInfo& gpu_few_aloc = alloc.cuda_hostpinned; 40 | #elif defined(COMB_ENABLE_HIP) 41 | AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned; 42 | AllocatorInfo& gpu_few_aloc = alloc.hip_hostpinned; 43 | #else 44 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 45 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 46 | #endif 47 | 48 | do_cycles_allocators(con_comm, 49 | comminfo, info, 50 | exec, 51 | alloc, 52 | cpu_many_aloc, cpu_few_aloc, 53 | gpu_many_aloc, gpu_few_aloc, 54 | num_vars, ncycles, tm, tm_total); 55 | } 56 | 57 | #ifdef COMB_ENABLE_CUDA 58 | { 59 | // mpi cuda memory tests 60 | AllocatorInfo& cpu_many_aloc = alloc.cuda_device; 61 | AllocatorInfo& cpu_few_aloc = alloc.cuda_device; 62 | 63 | AllocatorInfo& gpu_many_aloc = alloc.cuda_device; 64 | AllocatorInfo& gpu_few_aloc = alloc.cuda_device; 65 | 66 | do_cycles_allocators(con_comm, 67 | comminfo, info, 68 | exec, 69 | alloc, 70 | cpu_many_aloc, cpu_few_aloc, 71 | gpu_many_aloc, gpu_few_aloc, 72 | num_vars, ncycles, tm, tm_total); 73 | } 74 | #endif 75 | 76 | #ifdef COMB_ENABLE_HIP 77 | { 78 | // mpi hip memory tests 79 | AllocatorInfo& cpu_many_aloc = alloc.hip_device; 80 | AllocatorInfo& cpu_few_aloc = alloc.hip_device; 81 | 82 | AllocatorInfo& gpu_many_aloc = alloc.hip_device; 83 | AllocatorInfo& gpu_few_aloc = alloc.hip_device; 84 | 85 | do_cycles_allocators(con_comm, 86 | comminfo, info, 87 | exec, 88 | alloc, 89 | cpu_many_aloc, cpu_few_aloc, 90 | gpu_many_aloc, gpu_few_aloc, 91 | num_vars, ncycles, tm, tm_total); 92 | } 93 | #endif 94 | 95 | } 96 | 97 | } // namespace COMB 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /src/test_cycles_mpi_persistent.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #ifdef COMB_ENABLE_MPI 19 | 20 | #include "comm_pol_mpi_persistent.hpp" 21 | #include "do_cycles_allocators.hpp" 22 | 23 | namespace COMB { 24 | 25 | void test_cycles_mpi_persistent(CommInfo& comminfo, MeshInfo& info, 26 | COMB::Executors& exec, 27 | COMB::Allocators& alloc, 28 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 29 | { 30 | CommContext con_comm{exec.base_mpi.get()}; 31 | 32 | { 33 | // mpi host memory tests 34 | AllocatorInfo& cpu_many_aloc = alloc.host; 35 | AllocatorInfo& cpu_few_aloc = alloc.host; 36 | 37 | #if defined(COMB_ENABLE_CUDA) 38 | AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned; 39 | AllocatorInfo& gpu_few_aloc = alloc.cuda_hostpinned; 40 | #elif defined(COMB_ENABLE_HIP) 41 | AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned; 42 | AllocatorInfo& gpu_few_aloc = alloc.hip_hostpinned; 43 | #else 44 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 45 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 46 | #endif 47 | 48 | do_cycles_allocators(con_comm, 49 | comminfo, info, 50 | exec, 51 | alloc, 52 | cpu_many_aloc, cpu_few_aloc, 53 | gpu_many_aloc, gpu_few_aloc, 54 | num_vars, ncycles, tm, tm_total); 55 | } 56 | 57 | #ifdef COMB_ENABLE_CUDA 58 | { 59 | // mpi cuda memory tests 60 | AllocatorInfo& cpu_many_aloc = alloc.cuda_device; 61 | AllocatorInfo& cpu_few_aloc = alloc.cuda_device; 62 | 63 | AllocatorInfo& gpu_many_aloc = alloc.cuda_device; 64 | AllocatorInfo& gpu_few_aloc = alloc.cuda_device; 65 | 66 | do_cycles_allocators(con_comm, 67 | comminfo, info, 68 | exec, 69 | alloc, 70 | cpu_many_aloc, cpu_few_aloc, 71 | gpu_many_aloc, gpu_few_aloc, 72 | num_vars, ncycles, tm, tm_total); 73 | } 74 | #endif 75 | 76 | #ifdef COMB_ENABLE_HIP 77 | { 78 | // mpi hip memory tests 79 | AllocatorInfo& cpu_many_aloc = alloc.hip_device; 80 | AllocatorInfo& cpu_few_aloc = alloc.hip_device; 81 | 82 | AllocatorInfo& gpu_many_aloc = alloc.hip_device; 83 | AllocatorInfo& gpu_few_aloc = alloc.hip_device; 84 | 85 | do_cycles_allocators(con_comm, 86 | comminfo, info, 87 | exec, 88 | alloc, 89 | cpu_many_aloc, cpu_few_aloc, 90 | gpu_many_aloc, gpu_few_aloc, 91 | num_vars, ncycles, tm, tm_total); 92 | } 93 | #endif 94 | 95 | } 96 | 97 | } // namespace COMB 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /src/test_cycles_umr.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2018-2022, Lawrence Livermore National Security, LLC. 3 | // 4 | // Produced at the Lawrence Livermore National Laboratory 5 | // 6 | // LLNL-CODE-758885 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is part of Comb. 11 | // 12 | // For details, see https://github.com/LLNL/Comb 13 | // Please also see the LICENSE file for MIT license. 14 | ////////////////////////////////////////////////////////////////////////////// 15 | 16 | #include "comb.hpp" 17 | 18 | #ifdef COMB_ENABLE_UMR 19 | 20 | #include "comm_pol_umr.hpp" 21 | #include "do_cycles_allocators.hpp" 22 | 23 | namespace COMB { 24 | 25 | void test_cycles_umr(CommInfo& comminfo, MeshInfo& info, 26 | COMB::Executors& exec, 27 | COMB::Allocators& alloc, 28 | IdxT num_vars, IdxT ncycles, Timer& tm, Timer& tm_total) 29 | { 30 | CommContext con_comm{exec.base_mpi.get()}; 31 | 32 | AllocatorInfo& cpu_many_aloc = alloc.host; 33 | AllocatorInfo& cpu_few_aloc = alloc.host; 34 | 35 | #if defined(COMB_ENABLE_CUDA) 36 | AllocatorInfo& gpu_many_aloc = alloc.cuda_hostpinned; 37 | AllocatorInfo& gpu_few_aloc = alloc.cuda_hostpinned; 38 | #elif defined(COMB_ENABLE_HIP) 39 | AllocatorInfo& gpu_many_aloc = alloc.hip_hostpinned; 40 | AllocatorInfo& gpu_few_aloc = alloc.hip_hostpinned; 41 | #else 42 | AllocatorInfo& gpu_many_aloc = alloc.invalid; 43 | AllocatorInfo& gpu_few_aloc = alloc.invalid; 44 | #endif 45 | 46 | do_cycles_allocators(con_comm, 47 | comminfo, info, 48 | exec, 49 | alloc, 50 | cpu_many_aloc, cpu_few_aloc, 51 | gpu_many_aloc, gpu_few_aloc, 52 | num_vars, ncycles, tm, tm_total); 53 | 54 | } 55 | 56 | } // namespace COMB 57 | 58 | #endif // COMB_ENABLE_UMR 59 | --------------------------------------------------------------------------------