├── CHANGES ├── COPYRIGHT ├── Makefile.am ├── Makefile.in ├── README ├── aclocal.m4 ├── compile ├── config.guess ├── config.sub ├── configure ├── configure.ac ├── depcomp ├── get_local_rank ├── install-sh ├── ltmain.sh ├── missing ├── mpi ├── Makefile.am ├── Makefile.in ├── collective │ ├── Makefile.am │ ├── Makefile.in │ ├── kernel.cu │ ├── osu_allgather.c │ ├── osu_allgatherv.c │ ├── osu_allreduce.c │ ├── osu_alltoall.c │ ├── osu_alltoallv.c │ ├── osu_barrier.c │ ├── osu_bcast.c │ ├── osu_coll.c │ ├── osu_coll.h │ ├── osu_gather.c │ ├── osu_gatherv.c │ ├── osu_iallgather.c │ ├── osu_iallgatherv.c │ ├── osu_ialltoall.c │ ├── osu_ialltoallv.c │ ├── osu_ialltoallw.c │ ├── osu_ibarrier.c │ ├── osu_ibcast.c │ ├── osu_igather.c │ ├── osu_igatherv.c │ ├── osu_iscatter.c │ ├── osu_iscatterv.c │ ├── osu_reduce.c │ ├── osu_reduce_scatter.c │ ├── osu_scatter.c │ └── osu_scatterv.c ├── one-sided │ ├── Makefile.am │ ├── Makefile.in │ ├── osu_1sc.c │ ├── osu_1sc.h │ ├── osu_acc_latency.c │ ├── osu_cas_latency.c │ ├── osu_fop_latency.c │ ├── osu_get_acc_latency.c │ ├── osu_get_bw.c │ ├── osu_get_latency.c │ ├── osu_put_bibw.c │ ├── osu_put_bw.c │ └── osu_put_latency.c ├── pt2pt │ ├── Makefile.am │ ├── Makefile.in │ ├── osu_bibw.c │ ├── osu_bw.c │ ├── osu_latency.c │ ├── osu_latency_mt.c │ ├── osu_mbw_mr.c │ ├── osu_multi_lat.c │ ├── osu_pt2pt.c │ └── osu_pt2pt.h └── startup │ ├── Makefile.am │ ├── Makefile.in │ ├── osu_hello.c │ └── osu_init.c ├── openshmem ├── Makefile.am ├── Makefile.in ├── osu_coll.h ├── osu_common.c ├── osu_common.h ├── osu_oshm_atomics.c ├── osu_oshm_barrier.c ├── osu_oshm_broadcast.c ├── osu_oshm_collect.c ├── osu_oshm_fcollect.c ├── osu_oshm_get.c ├── osu_oshm_put.c ├── osu_oshm_put_mr.c └── osu_oshm_reduce.c ├── upc ├── Makefile.am ├── Makefile.in ├── osu_coll.h ├── osu_common.c ├── osu_common.h ├── osu_upc_all_barrier.c ├── osu_upc_all_broadcast.c ├── osu_upc_all_exchange.c ├── osu_upc_all_gather.c ├── osu_upc_all_gather_all.c ├── osu_upc_all_reduce.c ├── osu_upc_all_scatter.c ├── osu_upc_memget.c └── osu_upc_memput.c └── upcxx ├── Makefile.am ├── Makefile.in ├── osu_coll.h ├── osu_common.c ├── osu_common.h ├── osu_upcxx_allgather.cpp ├── osu_upcxx_alltoall.cpp ├── osu_upcxx_async_copy_get.cpp ├── osu_upcxx_async_copy_put.cpp ├── osu_upcxx_bcast.cpp ├── osu_upcxx_gather.cpp ├── osu_upcxx_reduce.cpp └── osu_upcxx_scatter.cpp /COPYRIGHT: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | Copyright (c) 2001-2016, The Ohio State University. All rights 4 | reserved. 5 | 6 | The OMB (OSU Micro Benchmarks) software package is developed by the team 7 | members of The Ohio State University's Network-Based Computing Laboratory 8 | (NBCL), headed by Professor Dhabaleswar K. (DK) Panda. 9 | 10 | Contact: 11 | Prof. Dhabaleswar K. (DK) Panda 12 | Dept. of Computer Science and Engineering 13 | The Ohio State University 14 | 2015 Neil Avenue 15 | Columbus, OH - 43210-1277 16 | Tel: (614)-292-5199; Fax: (614)-292-2911 17 | E-mail:panda@cse.ohio-state.edu 18 | 19 | This program is available under BSD licensing. 20 | 21 | Redistribution and use in source and binary forms, with or without 22 | modification, are permitted provided that the following conditions are 23 | met: 24 | 25 | (1) Redistributions of source code must retain the above copyright 26 | notice, this list of conditions and the following disclaimer. 27 | 28 | (2) Redistributions in binary form must reproduce the above copyright 29 | notice, this list of conditions and the following disclaimer in the 30 | documentation and/or other materials provided with the distribution. 31 | 32 | (3) Neither the name of The Ohio State University nor the names of 33 | their contributors may be used to endorse or promote products derived 34 | from this software without specific prior written permission. 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 37 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 39 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 40 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 43 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 44 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 45 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = 2 | 3 | if CUDA 4 | dist_pkglibexec_SCRIPTS = get_local_rank 5 | endif 6 | 7 | if MPI 8 | SUBDIRS += mpi 9 | endif 10 | 11 | if OSHM 12 | SUBDIRS += openshmem 13 | endif 14 | 15 | if UPC 16 | SUBDIRS += upc 17 | endif 18 | 19 | if UPCXX 20 | SUBDIRS += upcxx 21 | endif 22 | 23 | EXTRA_DIST = README CHANGES COPYRIGHT 24 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # -*- Autoconf -*- 2 | # Process this file with autoconf to produce a configure script. 3 | 4 | AC_PREREQ([2.59]) 5 | AC_INIT([OSU-Micro-Benchmarks], [5.3], [mvapich-discuss@cse.ohio-state.edu]) 6 | AC_CONFIG_SRCDIR([mpi/pt2pt/osu_latency.c]) 7 | 8 | AM_INIT_AUTOMAKE([foreign]) 9 | LT_INIT 10 | 11 | AC_ARG_ENABLE([openacc], 12 | [AS_HELP_STRING([--enable-openacc], 13 | [Enable OpenACC benchmarks]) 14 | ], 15 | [], 16 | [enable_openacc=no]) 17 | 18 | AC_ARG_ENABLE([cuda], 19 | [AS_HELP_STRING([--enable-cuda], 20 | [Enable CUDA benchmarks (default is no). Specify 21 | --enable-cuda=basic to enable basic cuda support 22 | without using cuda kernel support for 23 | non-blocking collectives]) 24 | ], 25 | [], 26 | [enable_cuda=no]) 27 | 28 | AC_ARG_WITH([cuda], 29 | [AS_HELP_STRING([--with-cuda=@<:@CUDA installation path@:>@], 30 | [Provide path to CUDA installation]) 31 | ], 32 | [AS_CASE([$with_cuda], 33 | [yes|no], [], 34 | [CPPFLAGS="-I$with_cuda/include $CPPFLAGS" 35 | LDFLAGS="-L$with_cuda/lib64 -Wl,-rpath=$with_cuda/lib64 -L$with_cuda/lib -Wl,-rpath=$with_cuda/lib $LDFLAGS"]) 36 | ]) 37 | 38 | AC_ARG_WITH([cuda-include], 39 | [AS_HELP_STRING([--with-cuda-include=@<:@CUDA include path@>:@], 40 | [Provide path to CUDA include files]) 41 | ], 42 | [AS_CASE([$with_cuda_include], 43 | [yes|no], [], 44 | [CPPFLAGS="-I$with_cuda_include $CPPFLAGS"]) 45 | ]) 46 | 47 | AC_ARG_WITH([cuda-libpath], 48 | [AS_HELP_STRING([--with-cuda-libpath=@<:@CUDA library path@>:@], 49 | [Provide path to CUDA library files]) 50 | ], 51 | [AS_CASE([$with_cuda_libpath], 52 | [yes|no], [], 53 | [LDFLAGS="-L$with_cuda_libpath -Wl,-rpath=$with_cuda_libpath $LDFLAGS"]) 54 | ]) 55 | 56 | # Checks for programs. 57 | AC_PROG_CC([mpicc oshcc upcc upc++]) 58 | 59 | # Checks for mpicxx used for compiling kernel.cu in nbc benchmarks and/or the 60 | # upc++ compiler for upcxx benchmarks 61 | AC_PROG_CXX([mpicxx upc++]) 62 | 63 | # Checks for libraries. 64 | AC_SEARCH_LIBS([sqrt], [m]) 65 | AC_SEARCH_LIBS([pthread_create], [pthread]) 66 | 67 | # Checks for header files. 68 | AC_CHECK_HEADERS([stdlib.h string.h sys/time.h unistd.h]) 69 | 70 | # Checks for typedefs, structures, and compiler characteristics. 71 | AC_C_INLINE 72 | 73 | # Checks for library functions. 74 | AC_CHECK_FUNCS([getpagesize gettimeofday memset sqrt]) 75 | 76 | AS_IF([test "x$enable_embedded" = xyes], [ 77 | AS_IF([test x"$enable_mpi3" = xyes], [mpi3_library=true]) 78 | AS_IF([test x"$enable_mpi2" = xyes], [mpi2_library=true]) 79 | AS_IF([test x"$enable_mpi" = xyes], [mpi_library=true]) 80 | AS_IF([test x"$enable_oshm" = xyes], [oshm_library=true]) 81 | AS_IF([test x"$enable_upc" = xyes], [upc_compiler=true]) 82 | AS_IF([test x"$enable_upcxx" = xyes], [upcxx_compiler=true]) 83 | ], [ 84 | AC_CHECK_FUNC([MPI_Init], [mpi_library=true]) 85 | AC_CHECK_FUNC([MPI_Accumulate], [mpi2_library=true]) 86 | AC_CHECK_FUNC([MPI_Get_accumulate], [mpi3_library=true]) 87 | AC_CHECK_FUNC([shmem_barrier_all], [oshm_library=true]) 88 | AC_CHECK_FUNC([upc_memput], [upc_compiler=true]) 89 | AC_CHECK_DECL([upcxx_alltoall], [upcxx_compiler=true], [], 90 | [#include ]) 91 | ]) 92 | 93 | AM_CONDITIONAL([EMBEDDED_BUILD], [test x"$enable_embedded" = xyes]) 94 | AM_CONDITIONAL([BUILD_PROFILING_LIB], [test x"$with_plib" = xyes]) 95 | AC_SUBST([PMPILIBNAME], [$PMILIBNAME]) 96 | AC_SUBST([MPILIBNAME], [$MPILIBNAME]) 97 | 98 | AS_IF([test "x$enable_openacc" = xyes], [ 99 | AC_CHECK_HEADERS([openacc.h], [], 100 | [AC_MSG_ERROR([cannot include openacc.h])]) 101 | AC_DEFINE([_ENABLE_OPENACC_], [1], [Enable OpenACC]) 102 | ]) 103 | 104 | AS_CASE([$enable_cuda], 105 | [yes], [build_cuda_kernels=yes; build_cuda=yes], 106 | [basic], [build_cuda=yes]) 107 | 108 | AS_IF([test "x$build_cuda" = xyes], [ 109 | AC_SEARCH_LIBS([cuPointerGetAttribute], [cuda], [], 110 | [AC_MSG_ERROR([cannot link with -lcuda])]) 111 | AC_SEARCH_LIBS([cudaFree], [cudart], [], 112 | [AC_MSG_ERROR([cannot link with -lcudart])]) 113 | AC_CHECK_HEADERS([cuda.h], [], 114 | [AC_MSG_ERROR([cannot include cuda.h])]) 115 | AC_DEFINE([_ENABLE_CUDA_], [1], [Enable CUDA]) 116 | ]) 117 | 118 | AS_IF([test "xbuild_cuda_kernels" = xyes], [ 119 | AC_DEFINE([_ENABLE_CUDA_KERNEL_], [1], [Enable CUDA Kernel]) 120 | ]) 121 | 122 | AM_CONDITIONAL([MPI2_LIBRARY], [test x$mpi2_library = xtrue]) 123 | AM_CONDITIONAL([MPI3_LIBRARY], [test x$mpi3_library = xtrue]) 124 | AM_CONDITIONAL([CUDA], [test x$build_cuda = xyes]) 125 | AM_CONDITIONAL([CUDA_KERNELS], [test x$build_cuda_kernels = xyes]) 126 | AM_CONDITIONAL([OPENACC], [test x$enable_openacc = xyes]) 127 | AM_CONDITIONAL([OSHM], [test x$oshm_library = xtrue]) 128 | AM_CONDITIONAL([MPI], [test x$mpi_library = xtrue]) 129 | AM_CONDITIONAL([UPC], [test x$upc_compiler = xtrue]) 130 | AM_CONDITIONAL([UPCXX], [test x$upcxx_compiler = xtrue]) 131 | 132 | AC_DEFINE([FIELD_WIDTH], [18], [Width of field used to report numbers]) 133 | AC_DEFINE([FLOAT_PRECISION], [2], [Precision of reported numbers]) 134 | 135 | AC_CONFIG_FILES([Makefile mpi/Makefile mpi/pt2pt/Makefile mpi/startup/Makefile 136 | mpi/one-sided/Makefile mpi/collective/Makefile 137 | openshmem/Makefile upc/Makefile upcxx/Makefile]) 138 | AC_OUTPUT 139 | -------------------------------------------------------------------------------- /get_local_rank: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LOCAL_RANK=$MV2_COMM_WORLD_LOCAL_RANK 4 | exec $* 5 | -------------------------------------------------------------------------------- /mpi/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = pt2pt collective startup 2 | 3 | if MPI2_LIBRARY 4 | SUBDIRS += one-sided 5 | endif 6 | -------------------------------------------------------------------------------- /mpi/collective/Makefile.am: -------------------------------------------------------------------------------- 1 | NVCC = nvcc 2 | NVCFLAGS = -cuda -maxrregcount 32 3 | SUFFIXES = .cu .cpp 4 | .cu.cpp: 5 | $(NVCC) $(NVCFLAGS) $(INCLUDES) $(CPPFLAGS) --output-file $@ $< 6 | 7 | collectivedir = $(pkglibexecdir)/mpi/collective 8 | collective_PROGRAMS = osu_alltoallv osu_allgatherv osu_scatterv osu_gatherv osu_reduce_scatter osu_barrier osu_reduce osu_allreduce osu_alltoall osu_bcast osu_gather osu_allgather osu_scatter osu_iallgather osu_ibcast osu_ialltoall osu_ibarrier osu_igather osu_iscatter osu_iscatterv osu_igatherv osu_iallgatherv osu_ialltoallv osu_ialltoallw 9 | 10 | osu_alltoallv_SOURCES = osu_alltoallv.c osu_coll.c osu_coll.h 11 | osu_allgatherv_SOURCES = osu_allgatherv.c osu_coll.c osu_coll.h 12 | osu_scatterv_SOURCES = osu_scatterv.c osu_coll.c osu_coll.h 13 | osu_gather_SOURCES = osu_gather.c osu_coll.c osu_coll.h 14 | osu_gatherv_SOURCES = osu_gatherv.c osu_coll.c osu_coll.h 15 | osu_reduce_scatter_SOURCES = osu_reduce_scatter.c osu_coll.c osu_coll.h 16 | osu_barrier_SOURCES = osu_barrier.c osu_coll.c osu_coll.h 17 | osu_reduce_SOURCES = osu_reduce.c osu_coll.c osu_coll.h 18 | osu_allreduce_SOURCES = osu_allreduce.c osu_coll.c osu_coll.h 19 | osu_bcast_SOURCES = osu_bcast.c osu_coll.c osu_coll.h 20 | osu_alltoall_SOURCES = osu_alltoall.c osu_coll.c osu_coll.h 21 | osu_ialltoall_SOURCES = osu_ialltoall.c osu_coll.c osu_coll.h 22 | osu_ialltoallv_SOURCES = osu_ialltoallv.c osu_coll.c osu_coll.h 23 | osu_ialltoallw_SOURCES = osu_ialltoallw.c osu_coll.c osu_coll.h 24 | osu_ibarrier_SOURCES = osu_ibarrier.c osu_coll.c osu_coll.h 25 | osu_ibcast_SOURCES = osu_ibcast.c osu_coll.c osu_coll.h 26 | osu_igather_SOURCES = osu_igather.c osu_coll.c osu_coll.h 27 | osu_igatherv_SOURCES = osu_igatherv.c osu_coll.c osu_coll.h 28 | osu_allgather_SOURCES = osu_allgather.c osu_coll.c osu_coll.h 29 | osu_iallgather_SOURCES = osu_iallgather.c osu_coll.c osu_coll.h 30 | osu_iallgatherv_SOURCES = osu_iallgatherv.c osu_coll.c osu_coll.h 31 | osu_scatter_SOURCES = osu_scatter.c osu_coll.c osu_coll.h 32 | osu_iscatter_SOURCES = osu_iscatter.c osu_coll.c osu_coll.h 33 | osu_iscatterv_SOURCES = osu_iscatterv.c osu_coll.c osu_coll.h 34 | 35 | if CUDA_KERNELS 36 | osu_alltoall_SOURCES += kernel.cu 37 | osu_alltoallv_SOURCES += kernel.cu 38 | osu_allgather_SOURCES += kernel.cu 39 | osu_allgatherv_SOURCES += kernel.cu 40 | osu_barrier_SOURCES += kernel.cu 41 | osu_bcast_SOURCES += kernel.cu 42 | osu_scatter_SOURCES += kernel.cu 43 | osu_scatterv_SOURCES += kernel.cu 44 | osu_gather_SOURCES += kernel.cu 45 | osu_gatherv_SOURCES += kernel.cu 46 | osu_allreduce_SOURCES += kernel.cu 47 | osu_reduce_SOURCES += kernel.cu 48 | osu_reduce_scatter_SOURCES += kernel.cu 49 | osu_ialltoall_SOURCES += kernel.cu 50 | osu_ialltoallv_SOURCES += kernel.cu 51 | osu_ialltoallw_SOURCES += kernel.cu 52 | osu_iallgather_SOURCES += kernel.cu 53 | osu_iallgatherv_SOURCES += kernel.cu 54 | osu_ibarrier_SOURCES += kernel.cu 55 | osu_ibcast_SOURCES += kernel.cu 56 | osu_iscatter_SOURCES += kernel.cu 57 | osu_iscatterv_SOURCES += kernel.cu 58 | osu_igather_SOURCES += kernel.cu 59 | osu_igatherv_SOURCES += kernel.cu 60 | endif 61 | 62 | if EMBEDDED_BUILD 63 | AM_LDFLAGS = 64 | AM_CPPFLAGS = -I$(top_builddir)/../src/include \ 65 | -I${top_srcdir}/../src/include 66 | if BUILD_PROFILING_LIB 67 | AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la 68 | endif 69 | AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la 70 | endif 71 | 72 | if OPENACC 73 | AM_CFLAGS = -acc 74 | endif 75 | -------------------------------------------------------------------------------- /mpi/collective/kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | 11 | __global__ 12 | void compute_kernel(float a, float * x, float * y, int N) 13 | { 14 | int i = blockIdx.x * blockDim.x + threadIdx.x; 15 | 16 | int count = 0; 17 | 18 | if (i < N) { 19 | for(count=0; count < (N/8); count++) { 20 | y[i] = a * x[i] + y[i]; 21 | } 22 | } 23 | } 24 | 25 | extern "C" 26 | void 27 | call_kernel(float a, float * d_x, float * d_y, int N, cudaStream_t * stream) 28 | { 29 | compute_kernel<<<(N+255)/256, 256, 0, *stream>>>(a, d_x, d_y, N); 30 | } 31 | 32 | 33 | -------------------------------------------------------------------------------- /mpi/collective/osu_allgather.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Allgather Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | char *sendbuf, *recvbuf; 20 | int po_ret; 21 | size_t bufsize; 22 | 23 | set_header(HEADER); 24 | set_benchmark_name("osu_allgather"); 25 | enable_accel_support(); 26 | po_ret = process_options(argc, argv); 27 | 28 | if (po_okay == po_ret && none != options.accel) { 29 | if (init_accel()) { 30 | fprintf(stderr, "Error initializing device\n"); 31 | exit(EXIT_FAILURE); 32 | } 33 | } 34 | 35 | MPI_Init(&argc, &argv); 36 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 37 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 38 | 39 | switch (po_ret) { 40 | case po_bad_usage: 41 | print_bad_usage_message(rank); 42 | MPI_Finalize(); 43 | exit(EXIT_FAILURE); 44 | case po_help_message: 45 | print_help_message(rank); 46 | MPI_Finalize(); 47 | exit(EXIT_SUCCESS); 48 | case po_version_message: 49 | print_version_message(rank); 50 | MPI_Finalize(); 51 | exit(EXIT_SUCCESS); 52 | case po_okay: 53 | break; 54 | } 55 | 56 | if(numprocs < 2) { 57 | if (rank == 0) { 58 | fprintf(stderr, "This test requires at least two processes\n"); 59 | } 60 | 61 | MPI_Finalize(); 62 | exit(EXIT_FAILURE); 63 | } 64 | 65 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 66 | options.max_message_size = options.max_mem_limit / numprocs; 67 | } 68 | 69 | if (allocate_buffer((void**)&sendbuf, options.max_message_size, options.accel)) { 70 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 71 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 72 | } 73 | set_buffer(sendbuf, options.accel, 1, options.max_message_size); 74 | 75 | bufsize = options.max_message_size * numprocs; 76 | if (allocate_buffer((void**)&recvbuf, bufsize, 77 | options.accel)) { 78 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 79 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 80 | } 81 | set_buffer(recvbuf, options.accel, 0, bufsize); 82 | 83 | print_preamble(rank); 84 | 85 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 86 | 87 | if(size > LARGE_MESSAGE_SIZE) { 88 | options.skip = options.skip_large; 89 | options.iterations = options.iterations_large; 90 | } 91 | 92 | MPI_Barrier(MPI_COMM_WORLD); 93 | timer=0.0; 94 | for(i=0; i < options.iterations + options.skip ; i++) { 95 | t_start = MPI_Wtime(); 96 | MPI_Allgather( sendbuf, size, MPI_CHAR, 97 | recvbuf, size, MPI_CHAR, MPI_COMM_WORLD ); 98 | 99 | t_stop = MPI_Wtime(); 100 | 101 | if(i >= options.skip) { 102 | timer+= t_stop-t_start; 103 | } 104 | MPI_Barrier(MPI_COMM_WORLD); 105 | 106 | } 107 | 108 | MPI_Barrier(MPI_COMM_WORLD); 109 | 110 | latency = (double)(timer * 1e6) / options.iterations; 111 | 112 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 113 | MPI_COMM_WORLD); 114 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 115 | MPI_COMM_WORLD); 116 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 117 | MPI_COMM_WORLD); 118 | avg_time = avg_time/numprocs; 119 | 120 | print_stats(rank, size, avg_time, min_time, max_time); 121 | MPI_Barrier(MPI_COMM_WORLD); 122 | } 123 | 124 | free_buffer(sendbuf, options.accel); 125 | free_buffer(recvbuf, options.accel); 126 | 127 | MPI_Finalize(); 128 | 129 | if (none != options.accel) { 130 | if (cleanup_accel()) { 131 | fprintf(stderr, "Error cleaning up device\n"); 132 | exit(EXIT_FAILURE); 133 | } 134 | } 135 | 136 | return EXIT_SUCCESS; 137 | } 138 | /* vi: set sw=4 sts=4 tw=80: */ 139 | -------------------------------------------------------------------------------- /mpi/collective/osu_allgatherv.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Allgatherv Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size, disp; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | char *sendbuf, *recvbuf; 20 | int *rdispls=NULL, *recvcounts=NULL; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_allgather"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) { 71 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 72 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 73 | } 74 | if (allocate_buffer((void**)&rdispls, numprocs*sizeof(int), none)) { 75 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 76 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 77 | } 78 | 79 | if (allocate_buffer((void**)&sendbuf, options.max_message_size, options.accel)) { 80 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 81 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 82 | } 83 | set_buffer(sendbuf, options.accel, 1, options.max_message_size); 84 | 85 | bufsize = options.max_message_size * numprocs; 86 | if (allocate_buffer((void**)&recvbuf, bufsize, 87 | options.accel)) { 88 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 89 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 90 | } 91 | set_buffer(recvbuf, options.accel, 0, bufsize); 92 | 93 | print_preamble(rank); 94 | 95 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 96 | if(size > LARGE_MESSAGE_SIZE) { 97 | options.skip = options.skip_large; 98 | options.iterations = options.iterations_large; 99 | } 100 | 101 | MPI_Barrier(MPI_COMM_WORLD); 102 | 103 | disp =0; 104 | for ( i = 0; i < numprocs; i++) { 105 | recvcounts[i] = size; 106 | rdispls[i] = disp; 107 | disp += size; 108 | } 109 | 110 | MPI_Barrier(MPI_COMM_WORLD); 111 | timer=0.0; 112 | for(i=0; i < options.iterations + options.skip ; i++) { 113 | 114 | t_start = MPI_Wtime(); 115 | 116 | MPI_Allgatherv(sendbuf, size, MPI_CHAR, recvbuf, recvcounts, rdispls, MPI_CHAR, MPI_COMM_WORLD); 117 | 118 | t_stop = MPI_Wtime(); 119 | 120 | if(i >= options.skip) { 121 | timer+= t_stop-t_start; 122 | } 123 | MPI_Barrier(MPI_COMM_WORLD); 124 | 125 | } 126 | 127 | MPI_Barrier(MPI_COMM_WORLD); 128 | 129 | latency = (double)(timer * 1e6) / options.iterations; 130 | 131 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 132 | MPI_COMM_WORLD); 133 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 134 | MPI_COMM_WORLD); 135 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 136 | MPI_COMM_WORLD); 137 | avg_time = avg_time/numprocs; 138 | 139 | print_stats(rank, size, avg_time, min_time, max_time); 140 | MPI_Barrier(MPI_COMM_WORLD); 141 | } 142 | 143 | free_buffer(rdispls, none); 144 | free_buffer(recvcounts, none); 145 | free_buffer(sendbuf, options.accel); 146 | free_buffer(recvbuf, options.accel); 147 | 148 | MPI_Finalize(); 149 | 150 | if (none != options.accel) { 151 | if (cleanup_accel()) { 152 | fprintf(stderr, "Error cleaning up device\n"); 153 | exit(EXIT_FAILURE); 154 | } 155 | } 156 | 157 | return EXIT_SUCCESS; 158 | } 159 | /* vi: set sw=4 sts=4 tw=80: */ 160 | -------------------------------------------------------------------------------- /mpi/collective/osu_allreduce.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Allreduce Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | float *sendbuf, *recvbuf; 20 | int po_ret; 21 | size_t bufsize; 22 | 23 | set_header(HEADER); 24 | set_benchmark_name("osu_allreduce"); 25 | enable_accel_support(); 26 | po_ret = process_options(argc, argv); 27 | 28 | if (po_okay == po_ret && none != options.accel) { 29 | if (init_accel()) { 30 | fprintf(stderr, "Error initializing device\n"); 31 | exit(EXIT_FAILURE); 32 | } 33 | } 34 | 35 | MPI_Init(&argc, &argv); 36 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 37 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 38 | 39 | switch (po_ret) { 40 | case po_bad_usage: 41 | print_bad_usage_message(rank); 42 | MPI_Finalize(); 43 | exit(EXIT_FAILURE); 44 | case po_help_message: 45 | print_help_message(rank); 46 | MPI_Finalize(); 47 | exit(EXIT_SUCCESS); 48 | case po_version_message: 49 | print_version_message(rank); 50 | MPI_Finalize(); 51 | exit(EXIT_SUCCESS); 52 | case po_okay: 53 | break; 54 | } 55 | 56 | if(numprocs < 2) { 57 | if (rank == 0) { 58 | fprintf(stderr, "This test requires at least two processes\n"); 59 | } 60 | 61 | MPI_Finalize(); 62 | exit(EXIT_FAILURE); 63 | } 64 | 65 | if (options.max_message_size > options.max_mem_limit) { 66 | options.max_message_size = options.max_mem_limit; 67 | } 68 | 69 | options.min_message_size /= sizeof(float); 70 | if (options.min_message_size < DEFAULT_MIN_MESSAGE_SIZE) { 71 | options.min_message_size = DEFAULT_MIN_MESSAGE_SIZE; 72 | } 73 | 74 | bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); 75 | if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { 76 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 77 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 78 | } 79 | set_buffer(sendbuf, options.accel, 1, bufsize); 80 | 81 | bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); 82 | if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) { 83 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 84 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 85 | } 86 | set_buffer(recvbuf, options.accel, 0, bufsize); 87 | 88 | print_preamble(rank); 89 | 90 | for(size=options.min_message_size; size*sizeof(float) <= options.max_message_size; size *= 2) { 91 | 92 | if(size > LARGE_MESSAGE_SIZE) { 93 | options.skip = options.skip_large; 94 | options.iterations = options.iterations_large; 95 | } 96 | 97 | MPI_Barrier(MPI_COMM_WORLD); 98 | 99 | timer=0.0; 100 | for(i=0; i < options.iterations + options.skip ; i++) { 101 | t_start = MPI_Wtime(); 102 | MPI_Allreduce(sendbuf, recvbuf, size, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD ); 103 | t_stop=MPI_Wtime(); 104 | if(i>=options.skip){ 105 | 106 | timer+=t_stop-t_start; 107 | } 108 | MPI_Barrier(MPI_COMM_WORLD); 109 | } 110 | latency = (double)(timer * 1e6) / options.iterations; 111 | 112 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 113 | MPI_COMM_WORLD); 114 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 115 | MPI_COMM_WORLD); 116 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 117 | MPI_COMM_WORLD); 118 | avg_time = avg_time/numprocs; 119 | 120 | print_stats(rank, size * sizeof(float), avg_time, min_time, max_time); 121 | MPI_Barrier(MPI_COMM_WORLD); 122 | } 123 | 124 | free_buffer(sendbuf, options.accel); 125 | free_buffer(recvbuf, options.accel); 126 | 127 | MPI_Finalize(); 128 | 129 | if (none != options.accel) { 130 | if (cleanup_accel()) { 131 | fprintf(stderr, "Error cleaning up device\n"); 132 | exit(EXIT_FAILURE); 133 | } 134 | } 135 | 136 | return EXIT_SUCCESS; 137 | } 138 | -------------------------------------------------------------------------------- /mpi/collective/osu_alltoall.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s All-to-All Personalized Exchange Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int 14 | main (int argc, char *argv[]) 15 | { 16 | int i, numprocs, rank, size; 17 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 18 | double timer=0.0; 19 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 20 | char * sendbuf = NULL, * recvbuf = NULL; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_alltoall"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | bufsize = options.max_message_size * numprocs; 71 | 72 | if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { 73 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 74 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 75 | } 76 | 77 | set_buffer(sendbuf, options.accel, 1, bufsize); 78 | 79 | if (allocate_buffer((void**)&recvbuf, options.max_message_size * numprocs, 80 | options.accel)) { 81 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 82 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 83 | } 84 | 85 | set_buffer(recvbuf, options.accel, 0, bufsize); 86 | print_preamble(rank); 87 | 88 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 89 | if (size > LARGE_MESSAGE_SIZE) { 90 | options.skip = options.skip_large; 91 | options.iterations = options.iterations_large; 92 | } 93 | 94 | MPI_Barrier(MPI_COMM_WORLD); 95 | timer=0.0; 96 | 97 | for (i=0; i < options.iterations + options.skip ; i++) { 98 | t_start = MPI_Wtime(); 99 | MPI_Alltoall(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 100 | MPI_COMM_WORLD); 101 | t_stop = MPI_Wtime(); 102 | 103 | if (i >= options.skip) { 104 | timer+=t_stop-t_start; 105 | } 106 | MPI_Barrier(MPI_COMM_WORLD); 107 | } 108 | latency = (double)(timer * 1e6) / options.iterations; 109 | 110 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 111 | MPI_COMM_WORLD); 112 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 113 | MPI_COMM_WORLD); 114 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 115 | MPI_COMM_WORLD); 116 | avg_time = avg_time/numprocs; 117 | 118 | print_stats(rank, size, avg_time, min_time, max_time); 119 | MPI_Barrier(MPI_COMM_WORLD); 120 | } 121 | 122 | free_buffer(sendbuf, options.accel); 123 | free_buffer(recvbuf, options.accel); 124 | 125 | MPI_Finalize(); 126 | 127 | if (none != options.accel) { 128 | if (cleanup_accel()) { 129 | fprintf(stderr, "Error cleaning up device\n"); 130 | exit(EXIT_FAILURE); 131 | } 132 | } 133 | 134 | return EXIT_SUCCESS; 135 | } 136 | 137 | /* vi: set sw=4 sts=4 tw=80: */ 138 | -------------------------------------------------------------------------------- /mpi/collective/osu_alltoallv.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s All-to-Allv Personalized Exchange Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i = 0, rank = 0, size, numprocs, disp; 16 | double latency=0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | char *sendbuf=NULL, *recvbuf=NULL; 20 | int *rdispls=NULL, *recvcounts=NULL, *sdispls=NULL, *sendcounts=NULL; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_alltoallv"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) { 71 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 72 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 73 | } 74 | if (allocate_buffer((void**)&sendcounts, numprocs*sizeof(int), none)) { 75 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 76 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 77 | } 78 | 79 | if (allocate_buffer((void**)&rdispls, numprocs*sizeof(int), none)) { 80 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 81 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 82 | } 83 | if (allocate_buffer((void**)&sdispls, numprocs*sizeof(int), none)) { 84 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 85 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 86 | } 87 | 88 | bufsize = options.max_message_size * numprocs; 89 | if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { 90 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 91 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 92 | } 93 | set_buffer(sendbuf, options.accel, 1, bufsize); 94 | 95 | if (allocate_buffer((void**)&recvbuf, bufsize, 96 | options.accel)) { 97 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 98 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 99 | } 100 | set_buffer(recvbuf, options.accel, 0, bufsize); 101 | 102 | print_preamble(rank); 103 | 104 | MPI_Barrier(MPI_COMM_WORLD); 105 | 106 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 107 | if(size > LARGE_MESSAGE_SIZE) { 108 | options.skip = options.skip_large; 109 | options.iterations = options.iterations_large; 110 | } 111 | 112 | disp =0; 113 | for ( i = 0; i < numprocs; i++) { 114 | recvcounts[i] = size; 115 | sendcounts[i] = size; 116 | rdispls[i] = disp; 117 | sdispls[i] = disp; 118 | disp += size; 119 | 120 | } 121 | 122 | MPI_Barrier(MPI_COMM_WORLD); 123 | 124 | timer=0.0; 125 | for(i = 0; i < options.iterations + options.skip; i++) { 126 | t_start = MPI_Wtime(); 127 | 128 | MPI_Alltoallv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf, recvcounts, rdispls, MPI_CHAR, 129 | MPI_COMM_WORLD); 130 | 131 | t_stop = MPI_Wtime(); 132 | 133 | if(i>=options.skip) 134 | { 135 | timer+=t_stop-t_start; 136 | } 137 | MPI_Barrier(MPI_COMM_WORLD); 138 | } 139 | 140 | latency = (double)(timer * 1e6) / options.iterations; 141 | 142 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 143 | MPI_COMM_WORLD); 144 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 145 | MPI_COMM_WORLD); 146 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 147 | MPI_COMM_WORLD); 148 | avg_time = avg_time/numprocs; 149 | 150 | print_stats(rank, size, avg_time, min_time, max_time); 151 | 152 | MPI_Barrier(MPI_COMM_WORLD); 153 | } 154 | 155 | free_buffer(rdispls, none); 156 | free_buffer(sdispls, none); 157 | free_buffer(recvcounts, none); 158 | free_buffer(sendcounts, none); 159 | free_buffer(sendbuf, options.accel); 160 | free_buffer(recvbuf, options.accel); 161 | 162 | MPI_Finalize(); 163 | 164 | if (none != options.accel) { 165 | if (cleanup_accel()) { 166 | fprintf(stderr, "Error cleaning up device\n"); 167 | exit(EXIT_FAILURE); 168 | } 169 | } 170 | 171 | return EXIT_SUCCESS; 172 | } 173 | 174 | /* vi: set sw=4 sts=4 tw=80: */ 175 | -------------------------------------------------------------------------------- /mpi/collective/osu_barrier.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Barrier Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include "osu_coll.h" 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int i = 0, rank; 17 | int numprocs; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 20 | double timer=0.0; 21 | int po_ret; 22 | 23 | set_header(HEADER); 24 | set_benchmark_name("osu_barrier"); 25 | enable_accel_support(); 26 | po_ret = process_options(argc, argv); 27 | 28 | if (po_okay == po_ret && none != options.accel) { 29 | if (init_accel()) { 30 | fprintf(stderr, "Error initializing device\n"); 31 | exit(EXIT_FAILURE); 32 | } 33 | } 34 | 35 | options.show_size = 0; 36 | 37 | MPI_Init(&argc, &argv); 38 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 39 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 40 | 41 | switch (po_ret) { 42 | case po_bad_usage: 43 | print_bad_usage_message(rank); 44 | MPI_Finalize(); 45 | exit(EXIT_FAILURE); 46 | case po_help_message: 47 | print_help_message(rank); 48 | MPI_Finalize(); 49 | exit(EXIT_SUCCESS); 50 | case po_version_message: 51 | print_version_message(rank); 52 | MPI_Finalize(); 53 | exit(EXIT_SUCCESS); 54 | case po_okay: 55 | break; 56 | } 57 | 58 | if(numprocs < 2) { 59 | if(rank == 0) { 60 | fprintf(stderr, "This test requires at least two processes\n"); 61 | } 62 | 63 | MPI_Finalize(); 64 | 65 | return EXIT_FAILURE; 66 | } 67 | 68 | print_preamble(rank); 69 | 70 | options.skip = options.skip_large; 71 | options.iterations = options.iterations_large; 72 | timer = 0.0; 73 | 74 | for(i=0; i < options.iterations + options.skip ; i++) { 75 | t_start = MPI_Wtime(); 76 | MPI_Barrier(MPI_COMM_WORLD); 77 | t_stop = MPI_Wtime(); 78 | 79 | if(i>=options.skip){ 80 | timer+=t_stop-t_start; 81 | } 82 | } 83 | 84 | MPI_Barrier(MPI_COMM_WORLD); 85 | 86 | latency = (timer * 1e6) / options.iterations; 87 | 88 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 89 | MPI_COMM_WORLD); 90 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 91 | MPI_COMM_WORLD); 92 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 93 | MPI_COMM_WORLD); 94 | avg_time = avg_time/numprocs; 95 | 96 | print_stats(rank, 0, avg_time, min_time, max_time); 97 | MPI_Finalize(); 98 | 99 | return EXIT_SUCCESS; 100 | } 101 | 102 | /* vi: set sw=4 sts=4 tw=80: */ 103 | -------------------------------------------------------------------------------- /mpi/collective/osu_bcast.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Broadcast Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i = 0, rank, size; 16 | int numprocs; 17 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 18 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 19 | double timer=0.0; 20 | char *buffer=NULL; 21 | int po_ret; 22 | 23 | set_header(HEADER); 24 | set_benchmark_name("osu_bcast"); 25 | enable_accel_support(); 26 | po_ret = process_options(argc, argv); 27 | 28 | if (po_okay == po_ret && none != options.accel) { 29 | if (init_accel()) { 30 | fprintf(stderr, "Error initializing device\n"); 31 | exit(EXIT_FAILURE); 32 | } 33 | } 34 | 35 | MPI_Init(&argc, &argv); 36 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 37 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 38 | 39 | switch (po_ret) { 40 | case po_bad_usage: 41 | print_bad_usage_message(rank); 42 | MPI_Finalize(); 43 | exit(EXIT_FAILURE); 44 | case po_help_message: 45 | print_help_message(rank); 46 | MPI_Finalize(); 47 | exit(EXIT_SUCCESS); 48 | case po_version_message: 49 | print_version_message(rank); 50 | MPI_Finalize(); 51 | exit(EXIT_SUCCESS); 52 | case po_okay: 53 | break; 54 | } 55 | 56 | if(numprocs < 2) { 57 | if (rank == 0) { 58 | fprintf(stderr, "This test requires at least two processes\n"); 59 | } 60 | 61 | MPI_Finalize(); 62 | exit(EXIT_FAILURE); 63 | } 64 | 65 | if (options.max_message_size > options.max_mem_limit) { 66 | options.max_message_size = options.max_mem_limit; 67 | } 68 | 69 | if (allocate_buffer((void**)&buffer, options.max_message_size, options.accel)) { 70 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 71 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 72 | } 73 | set_buffer(buffer, options.accel, 1, options.max_message_size); 74 | 75 | print_preamble(rank); 76 | 77 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 78 | if(size > LARGE_MESSAGE_SIZE) { 79 | options.skip = options.skip_large; 80 | options.iterations = options.iterations_large; 81 | } 82 | 83 | timer=0.0; 84 | for(i=0; i < options.iterations + options.skip ; i++) { 85 | t_start = MPI_Wtime(); 86 | MPI_Bcast(buffer, size, MPI_CHAR, 0, MPI_COMM_WORLD); 87 | t_stop = MPI_Wtime(); 88 | 89 | if(i>=options.skip){ 90 | timer+=t_stop-t_start; 91 | } 92 | MPI_Barrier(MPI_COMM_WORLD); 93 | 94 | } 95 | 96 | MPI_Barrier(MPI_COMM_WORLD); 97 | 98 | latency = (timer * 1e6) / options.iterations; 99 | 100 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 101 | MPI_COMM_WORLD); 102 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 103 | MPI_COMM_WORLD); 104 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 105 | MPI_COMM_WORLD); 106 | avg_time = avg_time/numprocs; 107 | 108 | print_stats(rank, size, avg_time, min_time, max_time); 109 | } 110 | 111 | free_buffer(buffer, options.accel); 112 | 113 | MPI_Finalize(); 114 | 115 | if (none != options.accel) { 116 | if (cleanup_accel()) { 117 | fprintf(stderr, "Error cleaning up device\n"); 118 | exit(EXIT_FAILURE); 119 | } 120 | } 121 | 122 | return EXIT_SUCCESS; 123 | } 124 | 125 | /* vi: set sw=4 sts=4 tw=80: */ 126 | -------------------------------------------------------------------------------- /mpi/collective/osu_gather.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Gather Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int 14 | main (int argc, char *argv[]) 15 | { 16 | int i, numprocs, rank, size; 17 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 18 | double timer=0.0; 19 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 20 | char * sendbuf = NULL, * recvbuf = NULL; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_gather"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | if (0 == rank) { 71 | bufsize = options.max_message_size * numprocs; 72 | if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) { 73 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 74 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 75 | } 76 | set_buffer(recvbuf, options.accel, 1, bufsize); 77 | } 78 | 79 | if (allocate_buffer((void**)&sendbuf, options.max_message_size, 80 | options.accel)) { 81 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 82 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 83 | } 84 | set_buffer(sendbuf, options.accel, 0, options.max_message_size); 85 | 86 | print_preamble(rank); 87 | 88 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 89 | if (size > LARGE_MESSAGE_SIZE) { 90 | options.skip = options.skip_large; 91 | options.iterations = options.iterations_large; 92 | } 93 | 94 | MPI_Barrier(MPI_COMM_WORLD); 95 | timer=0.0; 96 | 97 | for (i=0; i < options.iterations + options.skip ; i++) { 98 | t_start = MPI_Wtime(); 99 | MPI_Gather(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 0, 100 | MPI_COMM_WORLD); 101 | t_stop = MPI_Wtime(); 102 | 103 | if (i >= options.skip) { 104 | timer+=t_stop-t_start; 105 | } 106 | MPI_Barrier(MPI_COMM_WORLD); 107 | } 108 | latency = (double)(timer * 1e6) / options.iterations; 109 | 110 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 111 | MPI_COMM_WORLD); 112 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 113 | MPI_COMM_WORLD); 114 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 115 | MPI_COMM_WORLD); 116 | avg_time = avg_time/numprocs; 117 | 118 | print_stats(rank, size, avg_time, min_time, max_time); 119 | MPI_Barrier(MPI_COMM_WORLD); 120 | } 121 | 122 | if (0 == rank) { 123 | free_buffer(recvbuf, options.accel); 124 | } 125 | free_buffer(sendbuf, options.accel); 126 | 127 | MPI_Finalize(); 128 | 129 | if (none != options.accel) { 130 | if (cleanup_accel()) { 131 | fprintf(stderr, "Error cleaning up device\n"); 132 | exit(EXIT_FAILURE); 133 | } 134 | } 135 | 136 | return EXIT_SUCCESS; 137 | } 138 | 139 | /* vi: set sw=4 sts=4 tw=80: */ 140 | -------------------------------------------------------------------------------- /mpi/collective/osu_gatherv.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Gatherv Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size, disp; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | char *sendbuf, *recvbuf; 20 | int *rdispls, *recvcounts; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_gatherv"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | if (0 == rank) { 71 | if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) { 72 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 73 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 74 | } 75 | if (allocate_buffer((void**)&rdispls, numprocs*sizeof(int), none)) { 76 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 77 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 78 | } 79 | 80 | bufsize = options.max_message_size * numprocs; 81 | if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) { 82 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 83 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 84 | } 85 | set_buffer(recvbuf, options.accel, 1, bufsize); 86 | } 87 | 88 | if (allocate_buffer((void**)&sendbuf, options.max_message_size, 89 | options.accel)) { 90 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 91 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 92 | } 93 | set_buffer(sendbuf, options.accel, 0, options.max_message_size); 94 | 95 | print_preamble(rank); 96 | 97 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 98 | 99 | if(size > LARGE_MESSAGE_SIZE) { 100 | options.skip = options.skip_large; 101 | options.iterations = options.iterations_large; 102 | } 103 | 104 | MPI_Barrier(MPI_COMM_WORLD); 105 | 106 | if (0 == rank) { 107 | disp =0; 108 | for ( i = 0; i < numprocs; i++) { 109 | recvcounts[i] = size; 110 | rdispls[i] = disp; 111 | disp += size; 112 | } 113 | } 114 | 115 | MPI_Barrier(MPI_COMM_WORLD); 116 | timer=0.0; 117 | for(i=0; i < options.iterations + options.skip ; i++) { 118 | 119 | t_start = MPI_Wtime(); 120 | 121 | MPI_Gatherv(sendbuf, size, MPI_CHAR, recvbuf, recvcounts, rdispls, MPI_CHAR, 0, MPI_COMM_WORLD); 122 | 123 | t_stop = MPI_Wtime(); 124 | 125 | if(i >= options.skip) { 126 | timer+= t_stop-t_start; 127 | } 128 | MPI_Barrier(MPI_COMM_WORLD); 129 | 130 | } 131 | 132 | MPI_Barrier(MPI_COMM_WORLD); 133 | 134 | latency = (double)(timer * 1e6) / options.iterations; 135 | 136 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 137 | MPI_COMM_WORLD); 138 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 139 | MPI_COMM_WORLD); 140 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 141 | MPI_COMM_WORLD); 142 | avg_time = avg_time/numprocs; 143 | 144 | print_stats(rank, size, avg_time, min_time, max_time); 145 | MPI_Barrier(MPI_COMM_WORLD); 146 | } 147 | 148 | if (0 == rank) { 149 | free_buffer(rdispls, none); 150 | free_buffer(recvcounts, none); 151 | free_buffer(recvbuf, options.accel); 152 | } 153 | free_buffer(sendbuf, options.accel); 154 | 155 | MPI_Finalize(); 156 | 157 | if (none != options.accel) { 158 | if (cleanup_accel()) { 159 | fprintf(stderr, "Error cleaning up device\n"); 160 | exit(EXIT_FAILURE); 161 | } 162 | } 163 | 164 | return EXIT_SUCCESS; 165 | } 166 | /* vi: set sw=4 sts=4 tw=80: */ 167 | -------------------------------------------------------------------------------- /mpi/collective/osu_ibarrier.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Non-blocking Barrier Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include "osu_coll.h" 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int i = 0, rank, size = 0; 17 | int numprocs; 18 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 19 | double test_time = 0.0, test_total = 0.0; 20 | double tcomp = 0.0, tcomp_total=0.0, latency_in_secs=0.0; 21 | double wait_time = 0.0, init_time = 0.0; 22 | double init_total = 0.0, wait_total = 0.0; 23 | double timer = 0.0; 24 | int po_ret; 25 | 26 | set_header(HEADER); 27 | set_benchmark_name("osu_ibarrier"); 28 | enable_accel_support(); 29 | po_ret = process_options(argc, argv); 30 | 31 | if (po_okay == po_ret && none != options.accel) { 32 | if (init_accel()) { 33 | fprintf(stderr, "Error initializing device\n"); 34 | exit(EXIT_FAILURE); 35 | } 36 | } 37 | 38 | options.show_size = 0; 39 | 40 | MPI_Init(&argc, &argv); 41 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 42 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 43 | MPI_Request request; 44 | MPI_Status status; 45 | 46 | switch (po_ret) { 47 | case po_bad_usage: 48 | print_bad_usage_message(rank); 49 | MPI_Finalize(); 50 | exit(EXIT_FAILURE); 51 | case po_help_message: 52 | print_help_message(rank); 53 | MPI_Finalize(); 54 | exit(EXIT_SUCCESS); 55 | case po_version_message: 56 | print_version_message(rank); 57 | MPI_Finalize(); 58 | exit(EXIT_SUCCESS); 59 | case po_okay: 60 | break; 61 | } 62 | 63 | if(numprocs < 2) { 64 | if(rank == 0) { 65 | fprintf(stderr, "This test requires at least two processes\n"); 66 | } 67 | 68 | MPI_Finalize(); 69 | 70 | return EXIT_FAILURE; 71 | } 72 | 73 | print_preamble_nbc(rank); 74 | 75 | options.skip = options.skip_large; 76 | options.iterations = iterations_large; 77 | timer = 0.0; 78 | 79 | for(i=0; i < options.iterations + options.skip ; i++) { 80 | t_start = MPI_Wtime(); 81 | MPI_Ibarrier(MPI_COMM_WORLD, &request); 82 | MPI_Wait(&request,&status); 83 | t_stop = MPI_Wtime(); 84 | 85 | if(i>=options.skip){ 86 | timer+=t_stop-t_start; 87 | } 88 | } 89 | 90 | MPI_Barrier(MPI_COMM_WORLD); 91 | 92 | latency = (timer * 1e6) / options.iterations; 93 | 94 | /* Comm. latency in seconds, fed to dummy_compute */ 95 | latency_in_secs = timer/options.iterations; 96 | 97 | init_arrays(latency_in_secs); 98 | 99 | MPI_Barrier(MPI_COMM_WORLD); 100 | 101 | timer = 0.0; tcomp_total = 0; tcomp = 0; 102 | init_total = 0.0; wait_total = 0.0; 103 | test_time = 0.0, test_total = 0.0; 104 | 105 | for(i=0; i < options.iterations + options.skip ; i++) { 106 | t_start = MPI_Wtime(); 107 | 108 | init_time = MPI_Wtime(); 109 | MPI_Ibarrier(MPI_COMM_WORLD, &request); 110 | init_time = MPI_Wtime() - init_time; 111 | 112 | tcomp = MPI_Wtime(); 113 | test_time = dummy_compute(latency_in_secs, &request); 114 | tcomp = MPI_Wtime() - tcomp; 115 | 116 | wait_time = MPI_Wtime(); 117 | MPI_Wait(&request,&status); 118 | wait_time = MPI_Wtime() - wait_time; 119 | 120 | t_stop = MPI_Wtime(); 121 | 122 | if(i>=options.skip){ 123 | timer += t_stop-t_start; 124 | tcomp_total += tcomp; 125 | test_total += test_time; 126 | init_total += init_time; 127 | wait_total += wait_time; 128 | } 129 | MPI_Barrier(MPI_COMM_WORLD); 130 | } 131 | 132 | MPI_Barrier (MPI_COMM_WORLD); 133 | 134 | calculate_and_print_stats(rank, size, numprocs, 135 | timer, latency, 136 | test_total, tcomp_total, 137 | wait_total, init_total); 138 | 139 | MPI_Finalize(); 140 | 141 | return EXIT_SUCCESS; 142 | } 143 | 144 | /* vi: set sw=4 sts=4 tw=80: */ 145 | -------------------------------------------------------------------------------- /mpi/collective/osu_ibcast.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Non-Blocking Broadcast Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include "osu_coll.h" 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | int i = 0, rank, size; 17 | int numprocs; 18 | double test_time = 0.0, test_total = 0.0; 19 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 20 | double tcomp = 0.0, tcomp_total=0.0, latency_in_secs=0.0; 21 | double timer=0.0; 22 | double wait_time = 0.0, init_time = 0.0; 23 | double init_total = 0.0, wait_total = 0.0; 24 | char *buffer=NULL; 25 | int po_ret; 26 | 27 | set_header(HEADER); 28 | set_benchmark_name("osu_ibcast"); 29 | enable_accel_support(); 30 | po_ret = process_options(argc, argv); 31 | 32 | if (po_okay == po_ret && none != options.accel) { 33 | if (init_accel()) { 34 | fprintf(stderr, "Error initializing device\n"); 35 | exit(EXIT_FAILURE); 36 | } 37 | } 38 | 39 | MPI_Init(&argc, &argv); 40 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 41 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 42 | MPI_Request request; 43 | MPI_Status status; 44 | 45 | switch (po_ret) { 46 | case po_bad_usage: 47 | print_bad_usage_message(rank); 48 | MPI_Finalize(); 49 | exit(EXIT_FAILURE); 50 | case po_help_message: 51 | print_help_message(rank); 52 | MPI_Finalize(); 53 | exit(EXIT_SUCCESS); 54 | case po_version_message: 55 | print_version_message(rank); 56 | MPI_Finalize(); 57 | exit(EXIT_SUCCESS); 58 | case po_okay: 59 | break; 60 | } 61 | 62 | if(numprocs < 2) { 63 | if (rank == 0) { 64 | fprintf(stderr, "This test requires at least two processes\n"); 65 | } 66 | 67 | MPI_Finalize(); 68 | exit(EXIT_FAILURE); 69 | } 70 | 71 | if (options.max_message_size > options.max_mem_limit) { 72 | options.max_message_size = options.max_mem_limit; 73 | } 74 | 75 | if (allocate_buffer((void**)&buffer, options.max_message_size, options.accel)) { 76 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 77 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 78 | } 79 | 80 | if(rank==0) 81 | set_buffer(buffer, options.accel, 1, options.max_message_size); 82 | else 83 | set_buffer(buffer, options.accel, 0, options.max_message_size); 84 | 85 | print_preamble_nbc(rank); 86 | 87 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 88 | if(size > LARGE_MESSAGE_SIZE) { 89 | options.skip = options.skip_large; 90 | options.iterations = options.iterations_large; 91 | } 92 | 93 | timer = 0.0; 94 | 95 | for(i=0; i < options.iterations + options.skip ; i++) { 96 | t_start = MPI_Wtime(); 97 | MPI_Ibcast(buffer, size, MPI_CHAR, 0, MPI_COMM_WORLD, &request); 98 | MPI_Wait(&request,&status); 99 | 100 | t_stop = MPI_Wtime(); 101 | 102 | if(i>=options.skip){ 103 | timer += t_stop-t_start; 104 | } 105 | MPI_Barrier(MPI_COMM_WORLD); 106 | } 107 | 108 | MPI_Barrier(MPI_COMM_WORLD); 109 | 110 | latency = (timer * 1e6) / options.iterations; 111 | 112 | /* Comm. latency in seconds, fed to dummy_compute */ 113 | latency_in_secs = timer/options.iterations; 114 | 115 | init_arrays(latency_in_secs); 116 | 117 | MPI_Barrier(MPI_COMM_WORLD); 118 | 119 | timer = 0.0; tcomp_total = 0; tcomp = 0; 120 | init_total = 0.0; wait_total = 0.0; 121 | test_time = 0.0, test_total = 0.0; 122 | 123 | for(i=0; i < options.iterations + options.skip ; i++) { 124 | t_start = MPI_Wtime(); 125 | init_time = MPI_Wtime(); 126 | MPI_Ibcast(buffer, size, MPI_CHAR, 0, MPI_COMM_WORLD, &request); 127 | init_time = MPI_Wtime() - init_time; 128 | 129 | tcomp = MPI_Wtime(); 130 | test_time = dummy_compute(latency_in_secs, &request); 131 | tcomp = MPI_Wtime() - tcomp; 132 | 133 | wait_time = MPI_Wtime(); 134 | MPI_Wait(&request,&status); 135 | wait_time = MPI_Wtime() - wait_time; 136 | 137 | t_stop = MPI_Wtime(); 138 | 139 | if(i>=options.skip){ 140 | timer += t_stop-t_start; 141 | tcomp_total += tcomp; 142 | init_total += init_time; 143 | test_total += test_time; 144 | wait_total += wait_time; 145 | } 146 | MPI_Barrier(MPI_COMM_WORLD); 147 | } 148 | 149 | MPI_Barrier (MPI_COMM_WORLD); 150 | 151 | calculate_and_print_stats(rank, size, numprocs, 152 | timer, latency, 153 | test_total, tcomp_total, 154 | wait_total, init_total); 155 | } 156 | 157 | free_buffer(buffer, options.accel); 158 | 159 | MPI_Finalize(); 160 | 161 | if (none != options.accel) { 162 | if (cleanup_accel()) { 163 | fprintf(stderr, "Error cleaning up device\n"); 164 | exit(EXIT_FAILURE); 165 | } 166 | } 167 | 168 | return EXIT_SUCCESS; 169 | } 170 | 171 | /* vi: set sw=4 sts=4 tw=80: */ 172 | -------------------------------------------------------------------------------- /mpi/collective/osu_reduce.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Reduce Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | float *sendbuf, *recvbuf; 20 | int po_ret; 21 | size_t bufsize; 22 | 23 | set_header(HEADER); 24 | set_benchmark_name("osu_reduce"); 25 | enable_accel_support(); 26 | po_ret = process_options(argc, argv); 27 | 28 | if (po_okay == po_ret && none != options.accel) { 29 | if (init_accel()) { 30 | fprintf(stderr, "Error initializing device\n"); 31 | exit(EXIT_FAILURE); 32 | } 33 | } 34 | 35 | MPI_Init(&argc, &argv); 36 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 37 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 38 | 39 | switch (po_ret) { 40 | case po_bad_usage: 41 | print_bad_usage_message(rank); 42 | MPI_Finalize(); 43 | exit(EXIT_FAILURE); 44 | case po_help_message: 45 | print_help_message(rank); 46 | MPI_Finalize(); 47 | exit(EXIT_SUCCESS); 48 | case po_version_message: 49 | print_version_message(rank); 50 | MPI_Finalize(); 51 | exit(EXIT_SUCCESS); 52 | case po_okay: 53 | break; 54 | } 55 | 56 | if(numprocs < 2) { 57 | if (rank == 0) { 58 | fprintf(stderr, "This test requires at least two processes\n"); 59 | } 60 | 61 | MPI_Finalize(); 62 | exit(EXIT_FAILURE); 63 | } 64 | 65 | if (options.max_message_size > options.max_mem_limit) { 66 | options.max_message_size = options.max_mem_limit; 67 | } 68 | 69 | options.min_message_size /= sizeof(float); 70 | if (options.min_message_size < DEFAULT_MIN_MESSAGE_SIZE) { 71 | options.min_message_size = DEFAULT_MIN_MESSAGE_SIZE; 72 | } 73 | 74 | bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); 75 | if (allocate_buffer((void**)&recvbuf, bufsize, 76 | options.accel)) { 77 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 78 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 79 | } 80 | set_buffer(recvbuf, options.accel, 1, bufsize); 81 | 82 | bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); 83 | if (allocate_buffer((void**)&sendbuf, bufsize, 84 | options.accel)) { 85 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 86 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 87 | } 88 | set_buffer(sendbuf, options.accel, 0, bufsize); 89 | 90 | print_preamble(rank); 91 | 92 | for(size=options.min_message_size; size*sizeof(float) <= options.max_message_size; size *= 2) { 93 | 94 | if(size > LARGE_MESSAGE_SIZE) { 95 | options.skip = options.skip_large; 96 | options.iterations = options.iterations_large; 97 | } 98 | 99 | MPI_Barrier(MPI_COMM_WORLD); 100 | 101 | timer=0.0; 102 | for(i=0; i < options.iterations + options.skip ; i++) { 103 | t_start = MPI_Wtime(); 104 | 105 | MPI_Reduce(sendbuf, recvbuf, size, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD ); 106 | t_stop=MPI_Wtime(); 107 | if(i>=options.skip){ 108 | 109 | timer+=t_stop-t_start; 110 | } 111 | MPI_Barrier(MPI_COMM_WORLD); 112 | } 113 | latency = (double)(timer * 1e6) / options.iterations; 114 | 115 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 116 | MPI_COMM_WORLD); 117 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 118 | MPI_COMM_WORLD); 119 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 120 | MPI_COMM_WORLD); 121 | avg_time = avg_time/numprocs; 122 | 123 | print_stats(rank, size * sizeof(float), avg_time, min_time, max_time); 124 | MPI_Barrier(MPI_COMM_WORLD); 125 | } 126 | 127 | free_buffer(recvbuf, options.accel); 128 | free_buffer(sendbuf, options.accel); 129 | 130 | MPI_Finalize(); 131 | 132 | if (none != options.accel) { 133 | if (cleanup_accel()) { 134 | fprintf(stderr, "Error cleaning up device\n"); 135 | exit(EXIT_FAILURE); 136 | } 137 | } 138 | 139 | return EXIT_SUCCESS; 140 | } 141 | -------------------------------------------------------------------------------- /mpi/collective/osu_reduce_scatter.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Reduce_scatter Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | float *sendbuf, *recvbuf; 20 | int *recvcounts; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_scatter"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if (options.max_message_size > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit; 68 | } 69 | 70 | options.min_message_size /= sizeof(float); 71 | if (options.min_message_size < DEFAULT_MIN_MESSAGE_SIZE) { 72 | options.min_message_size = DEFAULT_MIN_MESSAGE_SIZE; 73 | } 74 | 75 | if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) { 76 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 77 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 78 | } 79 | 80 | bufsize = sizeof(float)*(options.max_message_size/sizeof(float)); 81 | if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { 82 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 83 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 84 | } 85 | set_buffer(sendbuf, options.accel, 1, bufsize); 86 | 87 | bufsize = sizeof(float)*((options.max_message_size/numprocs + 1)/sizeof(float)); 88 | if (allocate_buffer((void**)&recvbuf, bufsize, 89 | options.accel)) { 90 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 91 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 92 | } 93 | set_buffer(recvbuf, options.accel, 0, bufsize); 94 | 95 | print_preamble(rank); 96 | 97 | for(size=options.min_message_size; size*sizeof(float) <= options.max_message_size; size *= 2) { 98 | 99 | if(size > LARGE_MESSAGE_SIZE) { 100 | options.skip = options.skip_large; 101 | options.iterations = options.iterations_large; 102 | } 103 | 104 | int portion=0, remainder=0; 105 | portion=size/numprocs; 106 | remainder=size%numprocs; 107 | 108 | for (i=0; i=options.skip){ 130 | 131 | timer+=t_stop-t_start; 132 | } 133 | MPI_Barrier(MPI_COMM_WORLD); 134 | } 135 | latency = (double)(timer * 1e6) / options.iterations; 136 | 137 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 138 | MPI_COMM_WORLD); 139 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 140 | MPI_COMM_WORLD); 141 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 142 | MPI_COMM_WORLD); 143 | avg_time = avg_time/numprocs; 144 | 145 | print_stats(rank, size * sizeof(float), avg_time, min_time, max_time); 146 | MPI_Barrier(MPI_COMM_WORLD); 147 | } 148 | 149 | free_buffer(recvcounts, none); 150 | free_buffer(sendbuf, options.accel); 151 | free_buffer(recvbuf, options.accel); 152 | 153 | MPI_Finalize(); 154 | 155 | if (none != options.accel) { 156 | if (cleanup_accel()) { 157 | fprintf(stderr, "Error cleaning up device\n"); 158 | exit(EXIT_FAILURE); 159 | } 160 | } 161 | 162 | return EXIT_SUCCESS; 163 | } 164 | -------------------------------------------------------------------------------- /mpi/collective/osu_scatter.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Scatter Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int 14 | main (int argc, char *argv[]) 15 | { 16 | int i, numprocs, rank, size; 17 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 18 | double timer=0.0; 19 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 20 | char * sendbuf = NULL, * recvbuf = NULL; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_scatter"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | if (0 == rank) { 71 | bufsize = options.max_message_size * numprocs; 72 | if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { 73 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 74 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 75 | } 76 | set_buffer(sendbuf, options.accel, 1, bufsize); 77 | } 78 | 79 | if (allocate_buffer((void**)&recvbuf, options.max_message_size, 80 | options.accel)) { 81 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 82 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 83 | } 84 | set_buffer(recvbuf, options.accel, 0, options.max_message_size); 85 | 86 | print_preamble(rank); 87 | 88 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 89 | if (size > LARGE_MESSAGE_SIZE) { 90 | options.skip = options.skip_large; 91 | options.iterations = options.iterations_large; 92 | } 93 | 94 | MPI_Barrier(MPI_COMM_WORLD); 95 | timer=0.0; 96 | 97 | for (i=0; i < options.iterations + options.skip ; i++) { 98 | t_start = MPI_Wtime(); 99 | MPI_Scatter(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 0, 100 | MPI_COMM_WORLD); 101 | t_stop = MPI_Wtime(); 102 | 103 | if (i >= options.skip) { 104 | timer+=t_stop-t_start; 105 | } 106 | MPI_Barrier(MPI_COMM_WORLD); 107 | } 108 | latency = (double)(timer * 1e6) / options.iterations; 109 | 110 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 111 | MPI_COMM_WORLD); 112 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 113 | MPI_COMM_WORLD); 114 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 115 | MPI_COMM_WORLD); 116 | avg_time = avg_time/numprocs; 117 | 118 | print_stats(rank, size, avg_time, min_time, max_time); 119 | MPI_Barrier(MPI_COMM_WORLD); 120 | } 121 | 122 | if (0 == rank) { 123 | free_buffer(sendbuf, options.accel); 124 | } 125 | free_buffer(recvbuf, options.accel); 126 | 127 | MPI_Finalize(); 128 | 129 | if (none != options.accel) { 130 | if (cleanup_accel()) { 131 | fprintf(stderr, "Error cleaning up device\n"); 132 | exit(EXIT_FAILURE); 133 | } 134 | } 135 | 136 | return EXIT_SUCCESS; 137 | } 138 | 139 | /* vi: set sw=4 sts=4 tw=80: */ 140 | -------------------------------------------------------------------------------- /mpi/collective/osu_scatterv.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Scatterv Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include "osu_coll.h" 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int i, numprocs, rank, size, disp; 16 | double latency = 0.0, t_start = 0.0, t_stop = 0.0; 17 | double timer=0.0; 18 | double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 19 | char *sendbuf, *recvbuf; 20 | int *sdispls=NULL, *sendcounts=NULL; 21 | int po_ret; 22 | size_t bufsize; 23 | 24 | set_header(HEADER); 25 | set_benchmark_name("osu_scatterv"); 26 | enable_accel_support(); 27 | po_ret = process_options(argc, argv); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | MPI_Init(&argc, &argv); 37 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 38 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 39 | 40 | switch (po_ret) { 41 | case po_bad_usage: 42 | print_bad_usage_message(rank); 43 | MPI_Finalize(); 44 | exit(EXIT_FAILURE); 45 | case po_help_message: 46 | print_help_message(rank); 47 | MPI_Finalize(); 48 | exit(EXIT_SUCCESS); 49 | case po_version_message: 50 | print_version_message(rank); 51 | MPI_Finalize(); 52 | exit(EXIT_SUCCESS); 53 | case po_okay: 54 | break; 55 | } 56 | 57 | if(numprocs < 2) { 58 | if (rank == 0) { 59 | fprintf(stderr, "This test requires at least two processes\n"); 60 | } 61 | 62 | MPI_Finalize(); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if ((options.max_message_size * numprocs) > options.max_mem_limit) { 67 | options.max_message_size = options.max_mem_limit / numprocs; 68 | } 69 | 70 | if (0 == rank) { 71 | if (allocate_buffer((void**)&sendcounts, numprocs*sizeof(int), none)) { 72 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 73 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 74 | } 75 | if (allocate_buffer((void**)&sdispls, numprocs*sizeof(int), none)) { 76 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 77 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 78 | } 79 | 80 | bufsize = options.max_message_size * numprocs; 81 | if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) { 82 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 83 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 84 | } 85 | set_buffer(sendbuf, options.accel, 1, bufsize); 86 | } 87 | 88 | if (allocate_buffer((void**)&recvbuf, options.max_message_size, 89 | options.accel)) { 90 | fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank); 91 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 92 | } 93 | set_buffer(recvbuf, options.accel, 0, options.max_message_size); 94 | 95 | print_preamble(rank); 96 | 97 | for(size=options.min_message_size; size <= options.max_message_size; size *= 2) { 98 | 99 | if(size > LARGE_MESSAGE_SIZE) { 100 | options.skip = options.skip_large; 101 | options.iterations = options.iterations_large; 102 | } 103 | 104 | MPI_Barrier(MPI_COMM_WORLD); 105 | 106 | if (0 == rank) { 107 | disp =0; 108 | for ( i = 0; i < numprocs; i++) { 109 | sendcounts[i] = size; 110 | sdispls[i] = disp; 111 | disp += size; 112 | } 113 | } 114 | 115 | MPI_Barrier(MPI_COMM_WORLD); 116 | 117 | timer=0.0; 118 | 119 | for(i=0; i < options.iterations + options.skip ; i++) { 120 | 121 | t_start = MPI_Wtime(); 122 | MPI_Scatterv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf, 123 | size, MPI_CHAR, 0, MPI_COMM_WORLD); 124 | 125 | t_stop = MPI_Wtime(); 126 | if(i >= options.skip) { 127 | timer+=t_stop-t_start; 128 | } 129 | MPI_Barrier(MPI_COMM_WORLD); 130 | } 131 | latency = (double)(timer * 1e6) / options.iterations; 132 | 133 | MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, 134 | MPI_COMM_WORLD); 135 | MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, 136 | MPI_COMM_WORLD); 137 | MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, 138 | MPI_COMM_WORLD); 139 | avg_time = avg_time/numprocs; 140 | 141 | print_stats(rank, size, avg_time, min_time, max_time); 142 | MPI_Barrier(MPI_COMM_WORLD); 143 | } 144 | 145 | if (0 == rank) { 146 | free_buffer(sendcounts, none); 147 | free_buffer(sdispls, none); 148 | free_buffer(sendbuf, options.accel); 149 | } 150 | free_buffer(recvbuf, options.accel); 151 | 152 | MPI_Finalize(); 153 | 154 | if (none != options.accel) { 155 | if (cleanup_accel()) { 156 | fprintf(stderr, "Error cleaning up device\n"); 157 | exit(EXIT_FAILURE); 158 | } 159 | } 160 | 161 | return EXIT_SUCCESS; 162 | } 163 | 164 | /* vi: set sw=4 sts=4 tw=80: */ 165 | -------------------------------------------------------------------------------- /mpi/one-sided/Makefile.am: -------------------------------------------------------------------------------- 1 | one_sideddir = $(pkglibexecdir)/mpi/one-sided 2 | one_sided_PROGRAMS = osu_acc_latency osu_get_bw osu_get_latency osu_put_bibw osu_put_bw osu_put_latency 3 | 4 | if MPI3_LIBRARY 5 | one_sided_PROGRAMS += osu_get_acc_latency osu_fop_latency osu_cas_latency 6 | endif 7 | 8 | osu_put_latency_SOURCES = osu_put_latency.c osu_1sc.c osu_1sc.h 9 | osu_put_bw_SOURCES = osu_put_bw.c osu_1sc.c osu_1sc.h 10 | osu_put_bibw_SOURCES = osu_put_bibw.c osu_1sc.c osu_1sc.h 11 | osu_get_latency_SOURCES = osu_get_latency.c osu_1sc.c osu_1sc.h 12 | osu_get_bw_SOURCES = osu_get_bw.c osu_1sc.c osu_1sc.h 13 | osu_acc_latency_SOURCES = osu_acc_latency.c osu_1sc.c osu_1sc.h 14 | osu_fop_latency_SOURCES = osu_fop_latency.c osu_1sc.c osu_1sc.h 15 | osu_cas_latency_SOURCES = osu_cas_latency.c osu_1sc.c osu_1sc.h 16 | 17 | if EMBEDDED_BUILD 18 | AM_LDFLAGS = 19 | AM_CPPFLAGS = -I$(top_builddir)/../src/include \ 20 | -I${top_srcdir}/../src/include 21 | if BUILD_PROFILING_LIB 22 | AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la 23 | endif 24 | AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la 25 | endif 26 | 27 | if OPENACC 28 | AM_CFLAGS = -acc 29 | endif 30 | -------------------------------------------------------------------------------- /mpi/one-sided/osu_1sc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2003-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #ifdef _ENABLE_OPENACC_ 20 | #include 21 | #endif 22 | 23 | #ifdef _ENABLE_CUDA_ 24 | #include 25 | #include 26 | #endif 27 | 28 | #define MAX_ALIGNMENT 65536 29 | 30 | #ifndef FIELD_WIDTH 31 | # define FIELD_WIDTH 20 32 | #endif 33 | 34 | #ifndef FLOAT_PRECISION 35 | # define FLOAT_PRECISION 2 36 | #endif 37 | 38 | #define CHECK(stmt) \ 39 | do { \ 40 | int errno = (stmt); \ 41 | if (0 != errno) { \ 42 | fprintf(stderr, "[%s:%d] function call failed with %d \n",\ 43 | __FILE__, __LINE__, errno); \ 44 | exit(EXIT_FAILURE); \ 45 | } \ 46 | assert(0 == errno); \ 47 | } while (0) 48 | 49 | #define MPI_CHECK(stmt) \ 50 | do { \ 51 | int mpi_errno = (stmt); \ 52 | if (MPI_SUCCESS != mpi_errno) { \ 53 | fprintf(stderr, "[%s:%d] MPI call failed with %d \n", \ 54 | __FILE__, __LINE__,mpi_errno); \ 55 | exit(EXIT_FAILURE); \ 56 | } \ 57 | assert(MPI_SUCCESS == mpi_errno); \ 58 | } while (0) 59 | 60 | #ifdef _ENABLE_CUDA_ 61 | # define CUDA_ENABLED 1 62 | #else 63 | # define CUDA_ENABLED 0 64 | #endif 65 | 66 | #ifdef _ENABLE_OPENACC_ 67 | # define OPENACC_ENABLED 1 68 | #else 69 | # define OPENACC_ENABLED 0 70 | #endif 71 | 72 | /*structures, enumerators and such*/ 73 | /* Window creation */ 74 | typedef enum { 75 | WIN_CREATE=0, 76 | #if MPI_VERSION >= 3 77 | WIN_ALLOCATE, 78 | WIN_DYNAMIC 79 | #endif 80 | } WINDOW; 81 | 82 | /* Synchronization */ 83 | typedef enum { 84 | LOCK=0, 85 | PSCW, 86 | FENCE, 87 | #if MPI_VERSION >= 3 88 | FLUSH, 89 | FLUSH_LOCAL, 90 | LOCK_ALL, 91 | #endif 92 | } SYNC; 93 | 94 | enum po_ret_type { 95 | po_cuda_not_avail, 96 | po_openacc_not_avail, 97 | po_bad_usage, 98 | po_help_message, 99 | po_okay, 100 | }; 101 | 102 | enum accel_type { 103 | none, 104 | cuda, 105 | openacc 106 | }; 107 | 108 | enum options_type { 109 | all_sync, 110 | active_sync 111 | }; 112 | 113 | struct options_t { 114 | char rank0; 115 | char rank1; 116 | enum accel_type accel; 117 | int loop; 118 | int loop_large; 119 | int skip; 120 | int skip_large; 121 | }; 122 | 123 | extern struct options_t options; 124 | 125 | /*variables*/ 126 | extern char const *win_info[20]; 127 | extern char const *sync_info[20]; 128 | 129 | #ifdef _ENABLE_CUDA_ 130 | extern CUcontext cuContext; 131 | #endif 132 | 133 | extern MPI_Aint disp_remote; 134 | extern MPI_Aint disp_local; 135 | 136 | /*function declarations*/ 137 | void usage (int, char const *); 138 | int process_options (int, char **, WINDOW*, SYNC*, int); 139 | void allocate_memory(int, char *, char *, char **, char **, 140 | char **win_base, int, WINDOW, MPI_Win *); 141 | void free_memory (void *, void *, MPI_Win, int); 142 | void allocate_atomic_memory(int, char *, char *, char *, 143 | char *, char **, char **, char **, char **, 144 | char **win_base, int, WINDOW, MPI_Win *); 145 | void free_atomic_memory (void *, void *, void *, void *, MPI_Win, int); 146 | int init_accel (); 147 | int cleanup_accel (); 148 | -------------------------------------------------------------------------------- /mpi/pt2pt/Makefile.am: -------------------------------------------------------------------------------- 1 | pt2ptdir = $(pkglibexecdir)/mpi/pt2pt 2 | pt2pt_PROGRAMS = osu_bibw osu_bw osu_latency osu_mbw_mr osu_multi_lat 3 | 4 | osu_bw_SOURCES = osu_bw.c osu_pt2pt.c osu_pt2pt.h 5 | osu_bibw_SOURCES = osu_bibw.c osu_pt2pt.c osu_pt2pt.h 6 | osu_latency_SOURCES = osu_latency.c osu_pt2pt.c osu_pt2pt.h 7 | osu_multi_lat_SOURCES = osu_multi_lat.c osu_pt2pt.c osu_pt2pt.h 8 | osu_latency_mt_SOURCES = osu_latency_mt.c osu_pt2pt.c osu_pt2pt.h 9 | 10 | if MPI2_LIBRARY 11 | pt2pt_PROGRAMS += osu_latency_mt 12 | endif 13 | 14 | if EMBEDDED_BUILD 15 | AM_LDFLAGS = 16 | AM_CPPFLAGS = -I$(top_builddir)/../src/include \ 17 | -I${top_srcdir}/../src/include 18 | if BUILD_PROFILING_LIB 19 | AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la 20 | endif 21 | AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la 22 | endif 23 | 24 | if OPENACC 25 | AM_CFLAGS = -acc 26 | endif 27 | 28 | -------------------------------------------------------------------------------- /mpi/pt2pt/osu_bibw.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Bi-Directional Bandwidth Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int myid, numprocs, i, j; 16 | int size; 17 | char *s_buf, *r_buf; 18 | double t_start = 0.0, t_end = 0.0, t = 0.0; 19 | int window_size = 64; 20 | int po_ret = process_options(argc, argv, BW); 21 | 22 | if (po_okay == po_ret && none != options.accel) { 23 | if (init_accel()) { 24 | fprintf(stderr, "Error initializing device\n"); 25 | exit(EXIT_FAILURE); 26 | } 27 | } 28 | set_header(HEADER); 29 | 30 | MPI_Init(&argc, &argv); 31 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 32 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 33 | 34 | if (0 == myid) { 35 | switch (po_ret) { 36 | case po_cuda_not_avail: 37 | fprintf(stderr, "CUDA support not enabled. Please recompile " 38 | "benchmark with CUDA support.\n"); 39 | break; 40 | case po_openacc_not_avail: 41 | fprintf(stderr, "OPENACC support not enabled. Please " 42 | "recompile benchmark with OPENACC support.\n"); 43 | break; 44 | case po_bad_usage: 45 | case po_help_message: 46 | usage("osu_bibw"); 47 | break; 48 | } 49 | } 50 | 51 | switch (po_ret) { 52 | case po_cuda_not_avail: 53 | case po_openacc_not_avail: 54 | case po_bad_usage: 55 | MPI_Finalize(); 56 | exit(EXIT_FAILURE); 57 | case po_help_message: 58 | MPI_Finalize(); 59 | exit(EXIT_SUCCESS); 60 | case po_okay: 61 | break; 62 | } 63 | 64 | if(numprocs != 2) { 65 | if(myid == 0) { 66 | fprintf(stderr, "This test requires exactly two processes\n"); 67 | } 68 | 69 | MPI_Finalize(); 70 | exit(EXIT_FAILURE); 71 | } 72 | 73 | if (allocate_memory(&s_buf, &r_buf, myid)) { 74 | /* Error allocating memory */ 75 | MPI_Finalize(); 76 | exit(EXIT_FAILURE); 77 | } 78 | 79 | print_header(myid, BW); 80 | 81 | /* Bi-Directional Bandwidth test */ 82 | for(size = 1; size <= MAX_MSG_SIZE; size *= 2) { 83 | /* touch the data */ 84 | touch_data(s_buf, r_buf, myid, size); 85 | 86 | if(size > LARGE_MESSAGE_SIZE) { 87 | options.loop = options.loop_large; 88 | options.skip = options.skip_large; 89 | window_size = WINDOW_SIZE_LARGE; 90 | } 91 | 92 | if(myid == 0) { 93 | for(i = 0; i < options.loop + options.skip; i++) { 94 | if(i == options.skip) { 95 | t_start = MPI_Wtime(); 96 | } 97 | 98 | for(j = 0; j < window_size; j++) { 99 | MPI_Irecv(r_buf, size, MPI_CHAR, 1, 10, MPI_COMM_WORLD, 100 | recv_request + j); 101 | } 102 | 103 | for(j = 0; j < window_size; j++) { 104 | MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD, 105 | send_request + j); 106 | } 107 | 108 | MPI_Waitall(window_size, send_request, reqstat); 109 | MPI_Waitall(window_size, recv_request, reqstat); 110 | } 111 | 112 | t_end = MPI_Wtime(); 113 | t = t_end - t_start; 114 | 115 | } 116 | 117 | else if(myid == 1) { 118 | for(i = 0; i < options.loop + options.skip; i++) { 119 | for(j = 0; j < window_size; j++) { 120 | MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD, 121 | recv_request + j); 122 | } 123 | 124 | for (j = 0; j < window_size; j++) { 125 | MPI_Isend(s_buf, size, MPI_CHAR, 0, 10, MPI_COMM_WORLD, 126 | send_request + j); 127 | } 128 | 129 | MPI_Waitall(window_size, send_request, reqstat); 130 | MPI_Waitall(window_size, recv_request, reqstat); 131 | } 132 | } 133 | 134 | if(myid == 0) { 135 | double tmp = size / 1e6 * options.loop * window_size * 2; 136 | 137 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 138 | FLOAT_PRECISION, tmp / t); 139 | fflush(stdout); 140 | } 141 | } 142 | 143 | free_memory(s_buf, r_buf, myid); 144 | MPI_Finalize(); 145 | 146 | if (none != options.accel) { 147 | if (cleanup_accel()) { 148 | fprintf(stderr, "Error cleaning up device\n"); 149 | exit(EXIT_FAILURE); 150 | } 151 | } 152 | 153 | return EXIT_SUCCESS; 154 | } 155 | 156 | 157 | -------------------------------------------------------------------------------- /mpi/pt2pt/osu_bw.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Bandwidth Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | 14 | int 15 | main (int argc, char *argv[]) 16 | { 17 | int myid, numprocs, i, j; 18 | int size; 19 | char *s_buf, *r_buf; 20 | double t_start = 0.0, t_end = 0.0, t = 0.0; 21 | int window_size = 64; 22 | int po_ret = process_options(argc, argv, BW); 23 | 24 | if (po_okay == po_ret && none != options.accel) { 25 | if (init_accel()) { 26 | fprintf(stderr, "Error initializing device\n"); 27 | exit(EXIT_FAILURE); 28 | } 29 | } 30 | 31 | set_header(HEADER); 32 | 33 | MPI_Init(&argc, &argv); 34 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 35 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 36 | 37 | if (0 == myid) { 38 | switch (po_ret) { 39 | case po_cuda_not_avail: 40 | fprintf(stderr, "CUDA support not enabled. Please recompile " 41 | "benchmark with CUDA support.\n"); 42 | break; 43 | case po_openacc_not_avail: 44 | fprintf(stderr, "OPENACC support not enabled. Please " 45 | "recompile benchmark with OPENACC support.\n"); 46 | break; 47 | case po_bad_usage: 48 | case po_help_message: 49 | usage("osu_bw"); 50 | break; 51 | } 52 | } 53 | 54 | switch (po_ret) { 55 | case po_cuda_not_avail: 56 | case po_openacc_not_avail: 57 | case po_bad_usage: 58 | MPI_Finalize(); 59 | exit(EXIT_FAILURE); 60 | case po_help_message: 61 | MPI_Finalize(); 62 | exit(EXIT_SUCCESS); 63 | case po_okay: 64 | break; 65 | } 66 | 67 | if(numprocs != 2) { 68 | if(myid == 0) { 69 | fprintf(stderr, "This test requires exactly two processes\n"); 70 | } 71 | 72 | MPI_Finalize(); 73 | exit(EXIT_FAILURE); 74 | } 75 | 76 | if (allocate_memory(&s_buf, &r_buf, myid)) { 77 | /* Error allocating memory */ 78 | MPI_Finalize(); 79 | exit(EXIT_FAILURE); 80 | } 81 | 82 | print_header(myid, BW); 83 | 84 | /* Bandwidth test */ 85 | for(size = 1; size <= MAX_MSG_SIZE; size *= 2) { 86 | touch_data(s_buf, r_buf, myid, size); 87 | 88 | if(size > LARGE_MESSAGE_SIZE) { 89 | options.loop = options.loop_large; 90 | options.skip = options.skip_large; 91 | window_size = WINDOW_SIZE_LARGE; 92 | } 93 | 94 | if(myid == 0) { 95 | for(i = 0; i < options.loop + options.skip; i++) { 96 | if(i == options.skip) { 97 | t_start = MPI_Wtime(); 98 | } 99 | 100 | for(j = 0; j < window_size; j++) { 101 | MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD, 102 | request + j); 103 | } 104 | 105 | MPI_Waitall(window_size, request, reqstat); 106 | MPI_Recv(r_buf, 4, MPI_CHAR, 1, 101, MPI_COMM_WORLD, 107 | &reqstat[0]); 108 | } 109 | 110 | t_end = MPI_Wtime(); 111 | t = t_end - t_start; 112 | } 113 | 114 | else if(myid == 1) { 115 | for(i = 0; i < options.loop + options.skip; i++) { 116 | for(j = 0; j < window_size; j++) { 117 | MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD, 118 | request + j); 119 | } 120 | 121 | MPI_Waitall(window_size, request, reqstat); 122 | MPI_Send(s_buf, 4, MPI_CHAR, 0, 101, MPI_COMM_WORLD); 123 | } 124 | } 125 | 126 | if(myid == 0) { 127 | double tmp = size / 1e6 * options.loop * window_size; 128 | 129 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 130 | FLOAT_PRECISION, tmp / t); 131 | fflush(stdout); 132 | } 133 | } 134 | 135 | free_memory(s_buf, r_buf, myid); 136 | MPI_Finalize(); 137 | 138 | if (none != options.accel) { 139 | if (cleanup_accel()) { 140 | fprintf(stderr, "Error cleaning up device\n"); 141 | exit(EXIT_FAILURE); 142 | } 143 | } 144 | 145 | return EXIT_SUCCESS; 146 | } 147 | -------------------------------------------------------------------------------- /mpi/pt2pt/osu_latency.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI%s Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | #include 12 | 13 | int 14 | main (int argc, char *argv[]) 15 | { 16 | int myid, numprocs, i; 17 | int size; 18 | MPI_Status reqstat; 19 | char *s_buf, *r_buf; 20 | double t_start = 0.0, t_end = 0.0; 21 | int po_ret = process_options(argc, argv, LAT); 22 | 23 | if (po_okay == po_ret && none != options.accel) { 24 | if (init_accel()) { 25 | fprintf(stderr, "Error initializing device\n"); 26 | exit(EXIT_FAILURE); 27 | } 28 | } 29 | 30 | set_header(HEADER); 31 | 32 | MPI_Init(&argc, &argv); 33 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 34 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 35 | 36 | if (0 == myid) { 37 | switch (po_ret) { 38 | case po_cuda_not_avail: 39 | fprintf(stderr, "CUDA support not enabled. Please recompile " 40 | "benchmark with CUDA support.\n"); 41 | break; 42 | case po_openacc_not_avail: 43 | fprintf(stderr, "OPENACC support not enabled. Please " 44 | "recompile benchmark with OPENACC support.\n"); 45 | break; 46 | case po_bad_usage: 47 | case po_help_message: 48 | usage("osu_latency"); 49 | break; 50 | } 51 | } 52 | 53 | switch (po_ret) { 54 | case po_cuda_not_avail: 55 | case po_openacc_not_avail: 56 | case po_bad_usage: 57 | MPI_Finalize(); 58 | exit(EXIT_FAILURE); 59 | case po_help_message: 60 | MPI_Finalize(); 61 | exit(EXIT_SUCCESS); 62 | case po_okay: 63 | break; 64 | } 65 | 66 | if(numprocs != 2) { 67 | if(myid == 0) { 68 | fprintf(stderr, "This test requires exactly two processes\n"); 69 | } 70 | 71 | MPI_Finalize(); 72 | exit(EXIT_FAILURE); 73 | } 74 | 75 | if (allocate_memory(&s_buf, &r_buf, myid)) { 76 | /* Error allocating memory */ 77 | MPI_Finalize(); 78 | exit(EXIT_FAILURE); 79 | } 80 | 81 | print_header(myid, LAT); 82 | 83 | 84 | /* Latency test */ 85 | for(size = 0; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) { 86 | touch_data(s_buf, r_buf, myid, size); 87 | 88 | if(size > LARGE_MESSAGE_SIZE) { 89 | options.loop = options.loop_large; 90 | options.skip = options.skip_large; 91 | } 92 | 93 | MPI_Barrier(MPI_COMM_WORLD); 94 | 95 | if(myid == 0) { 96 | for(i = 0; i < options.loop + options.skip; i++) { 97 | if(i == options.skip) t_start = MPI_Wtime(); 98 | 99 | MPI_Send(s_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD); 100 | MPI_Recv(r_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD, &reqstat); 101 | } 102 | 103 | t_end = MPI_Wtime(); 104 | } 105 | 106 | else if(myid == 1) { 107 | for(i = 0; i < options.loop + options.skip; i++) { 108 | MPI_Recv(r_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD, &reqstat); 109 | MPI_Send(s_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD); 110 | } 111 | } 112 | 113 | if(myid == 0) { 114 | double latency = (t_end - t_start) * 1e6 / (2.0 * options.loop); 115 | 116 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 117 | FLOAT_PRECISION, latency); 118 | fflush(stdout); 119 | } 120 | } 121 | 122 | free_memory(s_buf, r_buf, myid); 123 | MPI_Finalize(); 124 | 125 | if (none != options.accel) { 126 | if (cleanup_accel()) { 127 | fprintf(stderr, "Error cleaning up device\n"); 128 | exit(EXIT_FAILURE); 129 | } 130 | } 131 | 132 | return EXIT_SUCCESS; 133 | } 134 | 135 | -------------------------------------------------------------------------------- /mpi/pt2pt/osu_multi_lat.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU MPI Multi Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | 14 | #define MAX_MSG_SIZE (1<<22) 15 | #define MAX_STEPS (22+1) 16 | 17 | char *s_buf, *r_buf; 18 | 19 | static void multi_latency(int rank, int pairs); 20 | 21 | int main(int argc, char* argv[]) 22 | { 23 | unsigned long align_size = sysconf(_SC_PAGESIZE); 24 | int rank, nprocs; 25 | int pairs; 26 | 27 | int po_ret = process_options(argc, argv, LAT); 28 | 29 | if (po_okay == po_ret && none != options.accel) { 30 | if (init_accel()) { 31 | fprintf(stderr, "Error initializing device\n"); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | set_header(HEADER); 36 | MPI_Init(&argc, &argv); 37 | 38 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 39 | MPI_Comm_size(MPI_COMM_WORLD, &nprocs); 40 | 41 | pairs = nprocs/2; 42 | 43 | if (0 == rank) { 44 | switch (po_ret) { 45 | case po_cuda_not_avail: 46 | fprintf(stderr, "CUDA support not enabled. Please recompile " 47 | "benchmark with CUDA support.\n"); 48 | break; 49 | case po_openacc_not_avail: 50 | fprintf(stderr, "OPENACC support not enabled. Please " 51 | "recompile benchmark with OPENACC support.\n"); 52 | break; 53 | case po_bad_usage: 54 | case po_help_message: 55 | usage("osu_multi_lat"); 56 | break; 57 | } 58 | } 59 | 60 | switch (po_ret) { 61 | case po_cuda_not_avail: 62 | case po_openacc_not_avail: 63 | case po_bad_usage: 64 | MPI_Finalize(); 65 | exit(EXIT_FAILURE); 66 | case po_help_message: 67 | MPI_Finalize(); 68 | exit(EXIT_SUCCESS); 69 | case po_okay: 70 | break; 71 | } 72 | 73 | if (posix_memalign((void**)&s_buf, align_size, MAX_MSG_SIZE)) { 74 | fprintf(stderr, "Error allocating host memory\n"); 75 | return EXIT_FAILURE; 76 | } 77 | 78 | if (posix_memalign((void**)&r_buf, align_size, MAX_MSG_SIZE)) { 79 | fprintf(stderr, "Error allocating host memory\n"); 80 | return EXIT_FAILURE; 81 | } 82 | 83 | memset(s_buf, 0, MAX_MSG_SIZE); 84 | memset(r_buf, 0, MAX_MSG_SIZE); 85 | 86 | if(rank == 0) { 87 | fprintf(stdout, HEADER); 88 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); 89 | fflush(stdout); 90 | } 91 | 92 | MPI_Barrier(MPI_COMM_WORLD); 93 | 94 | multi_latency(rank, pairs); 95 | 96 | MPI_Barrier(MPI_COMM_WORLD); 97 | 98 | MPI_Finalize(); 99 | 100 | free(r_buf); 101 | free(s_buf); 102 | 103 | return EXIT_SUCCESS; 104 | } 105 | 106 | static void multi_latency(int rank, int pairs) 107 | { 108 | int size, partner; 109 | int i; 110 | double t_start = 0.0, t_end = 0.0, 111 | latency = 0.0, total_lat = 0.0, 112 | avg_lat = 0.0; 113 | 114 | MPI_Status reqstat; 115 | 116 | 117 | for(size = 0; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) { 118 | 119 | MPI_Barrier(MPI_COMM_WORLD); 120 | 121 | if(size > LARGE_MESSAGE_SIZE) { 122 | options.loop = options.loop_large; 123 | options.skip = options.skip_large; 124 | } else { 125 | options.loop = options.loop; 126 | options.skip = options.skip; 127 | } 128 | 129 | if (rank < pairs) { 130 | partner = rank + pairs; 131 | 132 | for (i = 0; i < options.loop + options.skip; i++) { 133 | 134 | if (i == options.skip) { 135 | t_start = MPI_Wtime(); 136 | MPI_Barrier(MPI_COMM_WORLD); 137 | } 138 | 139 | MPI_Send(s_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD); 140 | MPI_Recv(r_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD, 141 | &reqstat); 142 | } 143 | 144 | t_end = MPI_Wtime(); 145 | 146 | } else { 147 | partner = rank - pairs; 148 | 149 | for (i = 0; i < options.loop + options.skip; i++) { 150 | 151 | if (i == options.skip) { 152 | t_start = MPI_Wtime(); 153 | MPI_Barrier(MPI_COMM_WORLD); 154 | } 155 | 156 | MPI_Recv(r_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD, 157 | &reqstat); 158 | MPI_Send(s_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD); 159 | } 160 | 161 | t_end = MPI_Wtime(); 162 | } 163 | 164 | latency = (t_end - t_start) * 1.0e6 / (2.0 * options.loop); 165 | 166 | MPI_Reduce(&latency, &total_lat, 1, MPI_DOUBLE, MPI_SUM, 0, 167 | MPI_COMM_WORLD); 168 | 169 | avg_lat = total_lat/(double) (pairs * 2); 170 | 171 | if(0 == rank) { 172 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 173 | FLOAT_PRECISION, avg_lat); 174 | fflush(stdout); 175 | } 176 | } 177 | } 178 | 179 | /* vi: set sw=4 sts=4 tw=80: */ 180 | -------------------------------------------------------------------------------- /mpi/pt2pt/osu_pt2pt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #ifndef OSU_PT2PT_H 11 | #define OSU_PT2PT_H 1 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #ifdef _ENABLE_CUDA_ 22 | #include "cuda.h" 23 | #include "cuda_runtime.h" 24 | #endif 25 | 26 | #ifdef _ENABLE_OPENACC_ 27 | #include 28 | #endif 29 | 30 | #ifdef PACKAGE_VERSION 31 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 32 | #else 33 | # define HEADER "# " BENCHMARK "\n" 34 | #endif 35 | 36 | #ifndef FIELD_WIDTH 37 | # define FIELD_WIDTH 20 38 | #endif 39 | 40 | #ifndef FLOAT_PRECISION 41 | # define FLOAT_PRECISION 2 42 | #endif 43 | 44 | #define MAX_REQ_NUM 1000 45 | 46 | #define MAX_MSG_SIZE (1<<22) 47 | #define MYBUFSIZE (MAX_MSG_SIZE) 48 | 49 | #define WINDOW_SIZE_LARGE 64 50 | #define LARGE_MESSAGE_SIZE 8192 51 | 52 | #ifdef _ENABLE_OPENACC_ 53 | # define OPENACC_ENABLED 1 54 | #else 55 | # define OPENACC_ENABLED 0 56 | #endif 57 | 58 | #ifdef _ENABLE_CUDA_ 59 | # define CUDA_ENABLED 1 60 | #else 61 | # define CUDA_ENABLED 0 62 | #endif 63 | 64 | extern MPI_Request request[MAX_REQ_NUM]; 65 | extern MPI_Status reqstat[MAX_REQ_NUM]; 66 | extern MPI_Request send_request[MAX_REQ_NUM]; 67 | extern MPI_Request recv_request[MAX_REQ_NUM]; 68 | 69 | #ifdef _ENABLE_CUDA_ 70 | extern CUcontext cuContext; 71 | #endif 72 | 73 | #define BW 0 74 | #define LAT 1 75 | 76 | #define BW_LOOP_SMALL 100 77 | #define BW_SKIP_SMALL 10 78 | #define BW_LOOP_LARGE 20 79 | #define BW_SKIP_LARGE 2 80 | 81 | #define LAT_LOOP_SMALL 10000 82 | #define LAT_SKIP_SMALL 100 83 | #define LAT_LOOP_LARGE 1000 84 | #define LAT_SKIP_LARGE 10 85 | 86 | enum po_ret_type { 87 | po_cuda_not_avail, 88 | po_openacc_not_avail, 89 | po_bad_usage, 90 | po_help_message, 91 | po_okay, 92 | }; 93 | 94 | enum accel_type { 95 | none, 96 | cuda, 97 | openacc 98 | }; 99 | 100 | struct options_t { 101 | char src; 102 | char dst; 103 | enum accel_type accel; 104 | int loop; 105 | int loop_large; 106 | int skip; 107 | int skip_large; 108 | char managedSend; 109 | char managedRecv; 110 | }; 111 | 112 | extern struct options_t options; 113 | 114 | void usage (char const *); 115 | int process_options (int argc, char *argv[], int type); 116 | int allocate_memory (char **sbuf, char **rbuf, int rank); 117 | void print_header (int rank, int type); 118 | void touch_data (void *sbuf, void *rbuf, int rank, size_t size); 119 | void free_memory (void *sbuf, void *rbuf, int rank); 120 | int init_accel (void); 121 | int cleanup_accel (void); 122 | 123 | void set_header (const char * header); 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /mpi/startup/Makefile.am: -------------------------------------------------------------------------------- 1 | startupdir = $(pkglibexecdir)/mpi/startup 2 | startup_PROGRAMS = osu_init osu_hello 3 | 4 | if EMBEDDED_BUILD 5 | AM_LDFLAGS = 6 | AM_CPPFLAGS = -I$(top_builddir)/../src/include \ 7 | -I${top_srcdir}/../src/include 8 | if BUILD_PROFILING_LIB 9 | AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la 10 | endif 11 | AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la 12 | endif 13 | 14 | -------------------------------------------------------------------------------- /mpi/startup/osu_hello.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #include 11 | 12 | int main(int argc, char **argv) { 13 | MPI_Init(&argc, &argv); 14 | MPI_Finalize(); 15 | return 0; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /mpi/startup/osu_init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | int 16 | main (int argc, char *argv[]) 17 | { 18 | int myid, numprocs; 19 | struct timespec tp_before, tp_after; 20 | long duration = 0, min, max, avg; 21 | 22 | clock_gettime(CLOCK_REALTIME, &tp_before); 23 | MPI_Init(&argc, &argv); 24 | clock_gettime(CLOCK_REALTIME, &tp_after); 25 | 26 | duration = (tp_after.tv_sec - tp_before.tv_sec) * 1e3; 27 | duration += (tp_after.tv_nsec - tp_before.tv_nsec) / 1e6; 28 | 29 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 30 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 31 | 32 | MPI_Reduce(&duration, &min, 1, MPI_LONG, MPI_MIN, 0, MPI_COMM_WORLD); 33 | MPI_Reduce(&duration, &max, 1, MPI_LONG, MPI_MAX, 0, MPI_COMM_WORLD); 34 | MPI_Reduce(&duration, &avg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); 35 | avg = avg/numprocs; 36 | 37 | if(myid == 0) { 38 | printf("nprocs: %d, min: %ld, max: %ld, avg: %ld\n", numprocs, min, max, avg); 39 | } 40 | 41 | MPI_Finalize(); 42 | 43 | return EXIT_SUCCESS; 44 | } 45 | 46 | -------------------------------------------------------------------------------- /openshmem/Makefile.am: -------------------------------------------------------------------------------- 1 | openshmemdir = $(pkglibexecdir)/openshmem 2 | openshmem_PROGRAMS = osu_oshm_get osu_oshm_put osu_oshm_put_mr osu_oshm_atomics osu_oshm_barrier osu_oshm_broadcast osu_oshm_fcollect osu_oshm_collect osu_oshm_reduce 3 | 4 | osu_oshm_get_SOURCES = osu_oshm_get.c osu_common.c osu_common.h 5 | osu_oshm_put_SOURCES = osu_oshm_put.c osu_common.c osu_common.h 6 | osu_oshm_put_mr_SOURCES = osu_oshm_put_mr.c osu_common.c osu_common.h 7 | osu_oshm_atomics_SOURCES = osu_oshm_atomics.c osu_common.c osu_common.h 8 | osu_oshm_barrier_SOURCES = osu_oshm_barrier.c osu_common.c osu_common.h osu_coll.h 9 | osu_oshm_broadcast_SOURCES = osu_oshm_broadcast.c osu_common.c osu_common.h osu_coll.h 10 | osu_oshm_collect_SOURCES = osu_oshm_collect.c osu_common.c osu_common.h osu_coll.h 11 | osu_oshm_fcollect_SOURCES = osu_oshm_fcollect.c osu_common.c osu_common.h osu_coll.h 12 | osu_oshm_reduce_SOURCES = osu_oshm_reduce.c osu_common.c osu_common.h osu_coll.h 13 | 14 | -------------------------------------------------------------------------------- /openshmem/osu_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | int64_t getMicrosecondTimeStamp() 17 | { 18 | int64_t retval; 19 | struct timeval tv; 20 | if (gettimeofday(&tv, NULL)) { 21 | perror("gettimeofday"); 22 | abort(); 23 | } 24 | retval = ((int64_t)tv.tv_sec) * 1000000 + tv.tv_usec; 25 | return retval; 26 | } 27 | -------------------------------------------------------------------------------- /openshmem/osu_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #ifndef _OSU_COMMON_H_ 11 | #define _OSU_COMMON_H_ 12 | 13 | #define TIME() getMicrosecondTimeStamp() 14 | int64_t getMicrosecondTimeStamp(); 15 | 16 | #endif /* _OSU_COMMON_H */ 17 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_barrier.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM Barrier Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include "osu_coll.h" 19 | 20 | long pSyncBarrier1[_SHMEM_BARRIER_SYNC_SIZE]; 21 | long pSyncBarrier2[_SHMEM_BARRIER_SYNC_SIZE]; 22 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE]; 23 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE]; 24 | 25 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 26 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 27 | 28 | int main(int argc, char *argv[]) 29 | { 30 | int i = 0, rank; 31 | int skip, numprocs; 32 | static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 33 | static double latency = 0.0; 34 | int64_t t_start = 0, t_stop = 0, timer=0; 35 | int full = 0, t; 36 | 37 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE; 38 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE; 39 | for ( t = 0; t < _SHMEM_BARRIER_SYNC_SIZE; t += 1) pSyncBarrier1[t] = _SHMEM_SYNC_VALUE; 40 | for ( t = 0; t < _SHMEM_BARRIER_SYNC_SIZE; t += 1) pSyncBarrier2[t] = _SHMEM_SYNC_VALUE; 41 | 42 | start_pes(0); 43 | rank = _my_pe(); 44 | numprocs = _num_pes(); 45 | 46 | if (process_args(argc, argv, rank, NULL, &full)) { 47 | return EXIT_SUCCESS; 48 | } 49 | 50 | if(numprocs < 2) { 51 | if(rank == 0) { 52 | fprintf(stderr, "This test requires at least two processes\n"); 53 | } 54 | return EXIT_FAILURE; 55 | } 56 | 57 | print_header(rank, full); 58 | 59 | skip = SKIP_LARGE; 60 | iterations = iterations_large; 61 | timer=0; 62 | 63 | for(i=0; i < iterations + skip ; i++) { 64 | t_start = TIME(); 65 | if(i%2) 66 | shmem_barrier(0, 0, numprocs, pSyncBarrier1); 67 | else 68 | shmem_barrier(0, 0, numprocs, pSyncBarrier2); 69 | t_stop = TIME(); 70 | 71 | if(i>=skip){ 72 | timer+=t_stop-t_start; 73 | } 74 | } 75 | 76 | shmem_barrier_all(); 77 | 78 | latency = (1.0 * timer) / iterations; 79 | shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 80 | shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2); 81 | shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 82 | 83 | avg_time = avg_time/numprocs; 84 | print_data(rank, full, 0, avg_time, min_time, max_time, iterations); 85 | 86 | return EXIT_SUCCESS; 87 | } 88 | 89 | /* vi: set sw=4 sts=4 tw=80: */ 90 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_broadcast.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM Broadcast Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "osu_common.h" 17 | #include "osu_coll.h" 18 | #include 19 | 20 | long pSyncBcast1[_SHMEM_BCAST_SYNC_SIZE]; 21 | long pSyncBcast2[_SHMEM_BCAST_SYNC_SIZE]; 22 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE]; 23 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE]; 24 | 25 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 26 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 27 | 28 | int main(int argc, char *argv[]) 29 | { 30 | int i = 0, rank, size; 31 | int skip, numprocs; 32 | static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 33 | static double latency = 0.0; 34 | int64_t t_start = 0, t_stop = 0, timer=0; 35 | char *buffer=NULL; 36 | int max_msg_size = 1048576, full = 0; 37 | int t; 38 | 39 | for ( t = 0; t < _SHMEM_BCAST_SYNC_SIZE; t += 1) pSyncBcast1[t] = _SHMEM_SYNC_VALUE; 40 | for ( t = 0; t < _SHMEM_BCAST_SYNC_SIZE; t += 1) pSyncBcast2[t] = _SHMEM_SYNC_VALUE; 41 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE; 42 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE; 43 | 44 | start_pes(0); 45 | rank = _my_pe(); 46 | numprocs = _num_pes(); 47 | 48 | if (process_args(argc, argv, rank, &max_msg_size, &full)) { 49 | return 0; 50 | } 51 | 52 | if(numprocs < 2) { 53 | if(rank == 0) { 54 | fprintf(stderr, "This test requires at least two processes\n"); 55 | } 56 | return -1; 57 | } 58 | print_header(rank, full); 59 | 60 | buffer = shmalloc(max_msg_size * sizeof(char)); 61 | if(NULL == buffer) { 62 | fprintf(stderr, "malloc failed.\n"); 63 | exit(1); 64 | } 65 | 66 | memset(buffer,1, max_msg_size); 67 | 68 | for(size=1; size <=max_msg_size/sizeof(uint32_t); size *= 2) { 69 | if(size > LARGE_MESSAGE_SIZE) { 70 | skip = SKIP_LARGE; 71 | iterations = iterations_large; 72 | } 73 | else { 74 | skip = SKIP; 75 | } 76 | 77 | timer=0; 78 | for(i=0; i < iterations + skip ; i++) { 79 | t_start = TIME(); 80 | if(i%2) 81 | shmem_broadcast32(buffer, buffer, size, 0, 0, 0, numprocs, pSyncBcast1); 82 | else 83 | shmem_broadcast32(buffer, buffer, size, 0, 0, 0, numprocs, pSyncBcast2); 84 | t_stop = TIME(); 85 | 86 | if(i>=skip){ 87 | timer+=t_stop-t_start; 88 | } 89 | shmem_barrier_all(); 90 | } 91 | shmem_barrier_all(); 92 | latency = (1.0 * timer) / iterations; 93 | 94 | shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 95 | shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2); 96 | shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 97 | avg_time = avg_time/numprocs; 98 | 99 | print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations); 100 | } 101 | 102 | shfree(buffer); 103 | return EXIT_SUCCESS; 104 | } 105 | 106 | /* vi: set sw=4 sts=4 tw=80: */ 107 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_collect.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM Collect Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | */ 8 | 9 | /* 10 | This program is available under BSD licensing. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are 14 | met: 15 | 16 | (1) Redistributions of source code must retain the above copyright 17 | notice, this list of conditions and the following disclaimer. 18 | 19 | (2) Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | (3) Neither the name of The Ohio State University nor the names of 24 | their contributors may be used to endorse or promote products derived 25 | from this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | */ 40 | 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include "osu_common.h" 46 | #include "osu_coll.h" 47 | #include 48 | 49 | long pSyncCollect1[_SHMEM_COLLECT_SYNC_SIZE]; 50 | long pSyncCollect2[_SHMEM_COLLECT_SYNC_SIZE]; 51 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE]; 52 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE]; 53 | 54 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 55 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 56 | 57 | int main(int argc, char *argv[]) 58 | { 59 | int i, numprocs, rank, size; 60 | unsigned long align_size = sysconf(_SC_PAGESIZE); 61 | int skip; 62 | static double latency = 0.0; 63 | int64_t t_start = 0, t_stop = 0, timer=0; 64 | static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 65 | char *recvbuff, *sendbuff; 66 | int max_msg_size = 1048576, full = 0, t; 67 | uint64_t requested_mem_limit = 0; 68 | 69 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE; 70 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE; 71 | for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect1[t] = _SHMEM_SYNC_VALUE; 72 | for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect2[t] = _SHMEM_SYNC_VALUE; 73 | 74 | start_pes(0); 75 | rank = _my_pe(); 76 | numprocs = _num_pes(); 77 | 78 | if (process_args(argc, argv, rank, &max_msg_size, &full)) { 79 | return 0; 80 | } 81 | 82 | if(numprocs < 2) { 83 | if(rank == 0) { 84 | fprintf(stderr, "This test requires at least two processes\n"); 85 | } 86 | return -1; 87 | } 88 | 89 | requested_mem_limit = (uint64_t) (max_msg_size) * numprocs; 90 | if( requested_mem_limit > max_mem_limit) { 91 | max_msg_size = max_mem_limit/numprocs; 92 | } 93 | 94 | print_header(rank, full); 95 | 96 | recvbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size 97 | * numprocs); 98 | if (NULL == recvbuff) { 99 | fprintf(stderr, "shmemalign failed.\n"); 100 | exit(1); 101 | } 102 | 103 | sendbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size); 104 | if (NULL == sendbuff) { 105 | fprintf(stderr, "shmemalign failed.\n"); 106 | exit(1); 107 | } 108 | 109 | memset(recvbuff, 1, max_msg_size*numprocs); 110 | memset(sendbuff, 0, max_msg_size); 111 | 112 | for(size=1; size <= max_msg_size/sizeof(uint32_t); size *= 2) { 113 | 114 | if(size > LARGE_MESSAGE_SIZE) { 115 | skip = SKIP_LARGE; 116 | iterations = iterations_large; 117 | } else { 118 | skip = SKIP; 119 | } 120 | 121 | shmem_barrier_all(); 122 | 123 | timer=0; 124 | for(i=0; i < iterations + skip ; i++) { 125 | t_start = TIME(); 126 | if(i%2) 127 | shmem_collect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect1); 128 | else 129 | shmem_collect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect2); 130 | t_stop = TIME(); 131 | 132 | if(i >= skip) { 133 | timer+= t_stop-t_start; 134 | } 135 | shmem_barrier_all(); 136 | } 137 | 138 | shmem_barrier_all(); 139 | latency = (double)(timer * 1.0) / iterations; 140 | 141 | shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 142 | shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2); 143 | shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 144 | avg_time = avg_time/numprocs; 145 | 146 | print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations); 147 | } 148 | 149 | shmem_barrier_all(); 150 | shfree(recvbuff); 151 | shfree(sendbuff); 152 | 153 | return EXIT_SUCCESS; 154 | } 155 | 156 | /* vi: set sw=4 sts=4 tw=80: */ 157 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_fcollect.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM FCollect Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | */ 8 | 9 | /* 10 | This program is available under BSD licensing. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are 14 | met: 15 | 16 | (1) Redistributions of source code must retain the above copyright 17 | notice, this list of conditions and the following disclaimer. 18 | 19 | (2) Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | (3) Neither the name of The Ohio State University nor the names of 24 | their contributors may be used to endorse or promote products derived 25 | from this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | */ 40 | 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include "osu_common.h" 46 | #include "osu_coll.h" 47 | #include 48 | 49 | long pSyncCollect1[_SHMEM_COLLECT_SYNC_SIZE]; 50 | long pSyncCollect2[_SHMEM_COLLECT_SYNC_SIZE]; 51 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE]; 52 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE]; 53 | 54 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 55 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 56 | 57 | int main(int argc, char *argv[]) 58 | { 59 | int i, numprocs, rank, size; 60 | unsigned long align_size = sysconf(_SC_PAGESIZE); 61 | int skip; 62 | static double latency = 0.0; 63 | int64_t t_start = 0, t_stop = 0, timer=0; 64 | static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 65 | char *recvbuff, *sendbuff; 66 | int max_msg_size = 1048576, full = 0, t; 67 | uint64_t requested_mem_limit = 0; 68 | 69 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE; 70 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE; 71 | for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect1[t] = _SHMEM_SYNC_VALUE; 72 | for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect2[t] = _SHMEM_SYNC_VALUE; 73 | 74 | start_pes(0); 75 | rank = _my_pe(); 76 | numprocs = _num_pes(); 77 | 78 | if (process_args(argc, argv, rank, &max_msg_size, &full)) { 79 | return 0; 80 | } 81 | 82 | if(numprocs < 2) { 83 | if(rank == 0) { 84 | fprintf(stderr, "This test requires at least two processes\n"); 85 | } 86 | return -1; 87 | } 88 | 89 | requested_mem_limit = (uint64_t) (max_msg_size) * numprocs; 90 | if( requested_mem_limit > max_mem_limit) { 91 | max_msg_size = max_mem_limit/numprocs; 92 | } 93 | 94 | print_header(rank, full); 95 | 96 | recvbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size 97 | * numprocs); 98 | if (NULL == recvbuff) { 99 | fprintf(stderr, "shmemalign failed.\n"); 100 | exit(1); 101 | } 102 | 103 | sendbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size); 104 | if (NULL == sendbuff) { 105 | fprintf(stderr, "shmemalign failed.\n"); 106 | exit(1); 107 | } 108 | 109 | memset(recvbuff, 1, max_msg_size*numprocs); 110 | memset(sendbuff, 0, max_msg_size); 111 | 112 | for(size=1; size <= max_msg_size/sizeof(uint32_t); size *= 2) { 113 | 114 | if(size > LARGE_MESSAGE_SIZE) { 115 | skip = SKIP_LARGE; 116 | iterations = iterations_large; 117 | } else { 118 | skip = SKIP; 119 | } 120 | 121 | shmem_barrier_all(); 122 | 123 | timer=0; 124 | for(i=0; i < iterations + skip ; i++) { 125 | t_start = TIME(); 126 | if(i%2) 127 | shmem_fcollect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect1); 128 | else 129 | shmem_fcollect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect2); 130 | t_stop = TIME(); 131 | 132 | if(i >= skip) { 133 | timer+= t_stop-t_start; 134 | } 135 | shmem_barrier_all(); 136 | } 137 | 138 | shmem_barrier_all(); 139 | 140 | latency = (double)(timer * 1.0) / iterations; 141 | shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 142 | shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2); 143 | shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 144 | avg_time = avg_time/numprocs; 145 | 146 | print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations); 147 | } 148 | 149 | shmem_barrier_all(); 150 | shfree(recvbuff); 151 | shfree(sendbuff); 152 | 153 | return EXIT_SUCCESS; 154 | } 155 | 156 | /* vi: set sw=4 sts=4 tw=80: */ 157 | 158 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_get.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM Get Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | 19 | #define MESSAGE_ALIGNMENT 64 20 | #define MAX_MSG_SIZE (1<<20) 21 | #define MYBUFSIZE (MAX_MSG_SIZE + MESSAGE_ALIGNMENT) 22 | 23 | char s_buf_original[MYBUFSIZE]; 24 | char r_buf_original[MYBUFSIZE]; 25 | 26 | int skip = 1000; 27 | int loop = 10000; 28 | int skip_large = 10; 29 | int loop_large = 100; 30 | int large_message_size = 8192; 31 | 32 | #ifdef PACKAGE_VERSION 33 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 34 | #else 35 | # define HEADER "# " BENCHMARK "\n" 36 | #endif 37 | 38 | #ifndef FIELD_WIDTH 39 | # define FIELD_WIDTH 20 40 | #endif 41 | 42 | #ifndef FLOAT_PRECISION 43 | # define FLOAT_PRECISION 2 44 | #endif 45 | 46 | static void usage(int myid) 47 | { 48 | if(myid == 0) { 49 | fprintf(stderr, "Invalid arguments. Usage: \n"); 50 | } 51 | } 52 | 53 | int main(int argc, char *argv[]) 54 | { 55 | int myid, numprocs, i; 56 | int size; 57 | char *s_buf, *r_buf; 58 | char *s_buf_heap, *r_buf_heap; 59 | int align_size; 60 | int64_t t_start = 0, t_end = 0; 61 | int use_heap = 0; //default uses global 62 | 63 | start_pes(0); 64 | myid = _my_pe(); 65 | numprocs = _num_pes(); 66 | 67 | if(numprocs != 2) { 68 | if(myid == 0) { 69 | fprintf(stderr, "This test requires exactly two processes\n"); 70 | } 71 | 72 | return EXIT_FAILURE; 73 | } 74 | 75 | if(argc != 2) { 76 | usage(myid); 77 | 78 | return EXIT_FAILURE; 79 | } 80 | 81 | if(0 == strncmp(argv[1], "heap", strlen("heap"))){ 82 | use_heap = 1; 83 | } else if(0 == strncmp(argv[1], "global", strlen("global"))){ 84 | use_heap = 0; 85 | } else { 86 | usage(myid); 87 | return EXIT_FAILURE; 88 | } 89 | 90 | align_size = MESSAGE_ALIGNMENT; 91 | 92 | /**************Allocating Memory*********************/ 93 | 94 | if(use_heap){ 95 | 96 | s_buf_heap = shmalloc(MYBUFSIZE); 97 | r_buf_heap = shmalloc(MYBUFSIZE); 98 | 99 | s_buf = 100 | (char *) (((unsigned long) s_buf_heap + (align_size - 1)) / 101 | align_size * align_size); 102 | 103 | r_buf = 104 | (char *) (((unsigned long) r_buf_heap + (align_size - 1)) / 105 | align_size * align_size); 106 | } else { 107 | 108 | s_buf = 109 | (char *) (((unsigned long) s_buf_original + (align_size - 1)) / 110 | align_size * align_size); 111 | 112 | r_buf = 113 | (char *) (((unsigned long) r_buf_original + (align_size - 1)) / 114 | align_size * align_size); 115 | } 116 | 117 | /**************Memory Allocation Done*********************/ 118 | 119 | if(myid == 0) { 120 | fprintf(stdout, HEADER); 121 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); 122 | fflush(stdout); 123 | } 124 | 125 | for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) { 126 | 127 | /* touch the data */ 128 | for(i = 0; i < size; i++) { 129 | s_buf[i] = 'a'; 130 | r_buf[i] = 'b'; 131 | } 132 | 133 | if(size > large_message_size) { 134 | loop = loop_large = 100; 135 | skip = skip_large = 0; 136 | } 137 | 138 | shmem_barrier_all(); 139 | 140 | if(myid == 0) 141 | { 142 | for(i = 0; i < loop + skip; i++) { 143 | if(i == skip) t_start = TIME(); 144 | 145 | shmem_getmem(r_buf, s_buf, size, 1); 146 | } 147 | 148 | t_end = TIME(); 149 | } 150 | shmem_barrier_all(); 151 | 152 | if(myid == 0) { 153 | double latency = (1.0 * (t_end-t_start)) / loop; 154 | 155 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 156 | FLOAT_PRECISION, latency); 157 | fflush(stdout); 158 | } 159 | } 160 | 161 | shmem_barrier_all(); 162 | 163 | if(use_heap){ 164 | shfree(s_buf_heap); 165 | shfree(r_buf_heap); 166 | } 167 | 168 | shmem_barrier_all(); 169 | return EXIT_SUCCESS; 170 | } 171 | 172 | /* vi: set sw=4 sts=4 tw=80: */ 173 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_put.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM Put Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | 19 | #define MESSAGE_ALIGNMENT 64 20 | #define MAX_MSG_SIZE (1<<20) 21 | #define MYBUFSIZE (MAX_MSG_SIZE + MESSAGE_ALIGNMENT) 22 | 23 | char s_buf_original[MYBUFSIZE]; 24 | char r_buf_original[MYBUFSIZE]; 25 | 26 | int skip = 1000; 27 | int loop = 10000; 28 | int skip_large = 10; 29 | int loop_large = 100; 30 | int large_message_size = 8192; 31 | 32 | #ifdef PACKAGE_VERSION 33 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 34 | #else 35 | # define HEADER "# " BENCHMARK "\n" 36 | #endif 37 | 38 | #ifndef FIELD_WIDTH 39 | # define FIELD_WIDTH 20 40 | #endif 41 | 42 | #ifndef FLOAT_PRECISION 43 | # define FLOAT_PRECISION 2 44 | #endif 45 | 46 | static void usage(int myid) 47 | { 48 | if(myid == 0) { 49 | fprintf(stderr, "Invalid arguments. Usage: \n"); 50 | } 51 | } 52 | 53 | int main(int argc, char *argv[]) 54 | { 55 | int myid, numprocs, i; 56 | int size; 57 | char *s_buf, *r_buf; 58 | char *s_buf_heap, *r_buf_heap; 59 | int align_size; 60 | int64_t t_start = 0, t_end = 0; 61 | int use_heap = 0; //default uses global 62 | 63 | start_pes(0); 64 | myid = _my_pe(); 65 | numprocs = _num_pes(); 66 | 67 | if(numprocs != 2) { 68 | if(myid == 0) { 69 | fprintf(stderr, "This test requires exactly two processes\n"); 70 | } 71 | 72 | return EXIT_FAILURE; 73 | } 74 | 75 | if(argc != 2) { 76 | usage(myid); 77 | 78 | return EXIT_FAILURE; 79 | } 80 | 81 | if(0 == strncmp(argv[1], "heap", strlen("heap"))){ 82 | use_heap = 1; 83 | } else if(0 == strncmp(argv[1], "global", strlen("global"))){ 84 | use_heap = 0; 85 | } else { 86 | usage(myid); 87 | return EXIT_FAILURE; 88 | } 89 | 90 | align_size = MESSAGE_ALIGNMENT; 91 | 92 | /**************Allocating Memory*********************/ 93 | 94 | if(use_heap){ 95 | 96 | s_buf_heap = shmalloc(MYBUFSIZE); 97 | r_buf_heap = shmalloc(MYBUFSIZE); 98 | 99 | s_buf = 100 | (char *) (((unsigned long) s_buf_heap + (align_size - 1)) / 101 | align_size * align_size); 102 | 103 | r_buf = 104 | (char *) (((unsigned long) r_buf_heap + (align_size - 1)) / 105 | align_size * align_size); 106 | } else { 107 | 108 | s_buf = 109 | (char *) (((unsigned long) s_buf_original + (align_size - 1)) / 110 | align_size * align_size); 111 | 112 | r_buf = 113 | (char *) (((unsigned long) r_buf_original + (align_size - 1)) / 114 | align_size * align_size); 115 | } 116 | 117 | /**************Memory Allocation Done*********************/ 118 | 119 | if(myid == 0) { 120 | fprintf(stdout, HEADER); 121 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); 122 | fflush(stdout); 123 | } 124 | 125 | for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) { 126 | 127 | /* touch the data */ 128 | for(i = 0; i < size; i++) { 129 | s_buf[i] = 'a'; 130 | r_buf[i] = 'b'; 131 | } 132 | 133 | if(size > large_message_size) { 134 | loop = loop_large = 100; 135 | skip = skip_large = 0; 136 | } 137 | 138 | shmem_barrier_all(); 139 | 140 | if(myid == 0) 141 | { 142 | for(i = 0; i < loop + skip; i++) { 143 | if(i == skip) t_start = TIME(); 144 | 145 | shmem_putmem(r_buf, s_buf, size, 1); 146 | shmem_quiet(); 147 | } 148 | 149 | t_end = TIME(); 150 | } 151 | shmem_barrier_all(); 152 | 153 | if(myid == 0) { 154 | double latency = (1.0 * (t_end-t_start)) / loop; 155 | 156 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 157 | FLOAT_PRECISION, latency); 158 | fflush(stdout); 159 | } 160 | } 161 | 162 | shmem_barrier_all(); 163 | 164 | if(use_heap){ 165 | shfree(s_buf_heap); 166 | shfree(r_buf_heap); 167 | } 168 | 169 | shmem_barrier_all(); 170 | return EXIT_SUCCESS; 171 | } 172 | 173 | /* vi: set sw=4 sts=4 tw=80: */ 174 | -------------------------------------------------------------------------------- /openshmem/osu_oshm_reduce.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU OpenSHMEM Reduce Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | */ 8 | 9 | /* 10 | This program is available under BSD licensing. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are 14 | met: 15 | 16 | (1) Redistributions of source code must retain the above copyright 17 | notice, this list of conditions and the following disclaimer. 18 | 19 | (2) Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | (3) Neither the name of The Ohio State University nor the names of 24 | their contributors may be used to endorse or promote products derived 25 | from this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | */ 40 | 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include "osu_common.h" 46 | #include "osu_coll.h" 47 | #include 48 | 49 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE]; 50 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE]; 51 | 52 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 53 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; 54 | 55 | int main(int argc, char *argv[]) 56 | { 57 | int i, numprocs, rank, size; 58 | unsigned long align_size = sysconf(_SC_PAGESIZE); 59 | int skip; 60 | static double latency = 0.0; 61 | int64_t t_start = 0, t_stop = 0, timer=0; 62 | static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 63 | float *sendbuf, *recvbuf; 64 | int max_msg_size = 1048576, full = 0, t; 65 | 66 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE; 67 | for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE; 68 | 69 | start_pes(0); 70 | rank = _my_pe(); 71 | numprocs = _num_pes(); 72 | 73 | if (process_args(argc, argv, rank, &max_msg_size, &full)) { 74 | return EXIT_SUCCESS; 75 | } 76 | 77 | if(numprocs < 2) { 78 | if(rank == 0) { 79 | fprintf(stderr, "This test requires at least two processes\n"); 80 | } 81 | return EXIT_FAILURE; 82 | } 83 | 84 | int nreduce = max_msg_size/sizeof(float); 85 | float *pWrkF1 = shmalloc(MAX(nreduce/2+1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE)); 86 | float *pWrkF2 = shmalloc(MAX(nreduce/2+1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE)); 87 | 88 | print_header(rank, full); 89 | 90 | recvbuf = (float *)shmemalign(align_size, max_msg_size); 91 | if (NULL == recvbuf) { 92 | fprintf(stderr, "shmemalign failed.\n"); 93 | exit(1); 94 | } 95 | 96 | sendbuf = (float *)shmemalign(align_size, max_msg_size); 97 | if (NULL == sendbuf) { 98 | fprintf(stderr, "shmemalign failed.\n"); 99 | exit(1); 100 | } 101 | 102 | memset(sendbuf, 1, max_msg_size); 103 | memset(recvbuf, 0, max_msg_size); 104 | 105 | for(size=1; size*sizeof(float)<= max_msg_size; size *= 2) { 106 | 107 | if(size > LARGE_MESSAGE_SIZE) { 108 | skip = SKIP_LARGE; 109 | iterations = iterations_large; 110 | } else { 111 | skip = SKIP; 112 | } 113 | 114 | shmem_barrier_all(); 115 | 116 | timer=0; 117 | for(i=0; i < iterations + skip ; i++) { 118 | t_start = TIME(); 119 | 120 | if(i%2) 121 | shmem_float_sum_to_all(recvbuf, sendbuf, size, 0, 0, numprocs, pWrkF1, pSyncRed1); 122 | else 123 | shmem_float_sum_to_all(recvbuf, sendbuf, size, 0, 0, numprocs, pWrkF2, pSyncRed2); 124 | 125 | t_stop=TIME(); 126 | 127 | if(i>=skip){ 128 | timer+=t_stop-t_start; 129 | } 130 | shmem_barrier_all(); 131 | } 132 | 133 | latency = (double)(timer * 1.0) / iterations; 134 | shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 135 | shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2); 136 | shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1); 137 | avg_time = avg_time/numprocs; 138 | 139 | print_data(rank, full, sizeof(float)*size, avg_time, min_time, max_time, iterations); 140 | shmem_barrier_all(); 141 | } 142 | 143 | shmem_barrier_all(); 144 | 145 | shfree(pWrkF1); 146 | shfree(pWrkF2); 147 | 148 | shfree(recvbuf); 149 | shfree(sendbuf); 150 | 151 | return EXIT_SUCCESS; 152 | } 153 | 154 | /* vi: set sw=4 sts=4 tw=80: */ 155 | 156 | -------------------------------------------------------------------------------- /upc/Makefile.am: -------------------------------------------------------------------------------- 1 | upcdir = $(pkglibexecdir)/upc 2 | upc_PROGRAMS = osu_upc_memget osu_upc_memput osu_upc_all_barrier \ 3 | osu_upc_all_broadcast osu_upc_all_exchange \ 4 | osu_upc_all_gather_all osu_upc_all_gather osu_upc_all_reduce \ 5 | osu_upc_all_scatter 6 | 7 | osu_upc_all_barrier_SOURCES = osu_upc_all_barrier.c osu_common.c osu_common.h osu_coll.h 8 | osu_upc_all_broadcast_SOURCES = osu_upc_all_broadcast.c osu_common.c osu_common.h osu_coll.h 9 | osu_upc_all_exchange_SOURCES = osu_upc_all_exchange.c osu_common.c osu_common.h osu_coll.h 10 | osu_upc_all_gather_SOURCES = osu_upc_all_gather.c osu_common.c osu_common.h osu_coll.h 11 | osu_upc_all_gather_all_SOURCES = osu_upc_all_gather_all.c osu_common.c osu_common.h osu_coll.h 12 | osu_upc_all_reduce_SOURCES = osu_upc_all_reduce.c osu_common.c osu_common.h osu_coll.h 13 | osu_upc_all_scatter_SOURCES = osu_upc_all_scatter.c osu_common.c osu_common.h osu_coll.h 14 | -------------------------------------------------------------------------------- /upc/osu_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | int64_t getMicrosecondTimeStamp() 17 | { 18 | int64_t retval; 19 | struct timeval tv; 20 | if (gettimeofday(&tv, NULL)) { 21 | perror("gettimeofday"); 22 | abort(); 23 | } 24 | retval = ((int64_t)tv.tv_sec) * 1000000 + tv.tv_usec; 25 | return retval; 26 | } 27 | -------------------------------------------------------------------------------- /upc/osu_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #ifndef _OSU_COMMON_H_ 11 | #define _OSU_COMMON_H_ 12 | 13 | #define TIME() getMicrosecondTimeStamp() 14 | int64_t getMicrosecondTimeStamp(); 15 | 16 | #endif /* _OSU_COMMON_H */ 17 | -------------------------------------------------------------------------------- /upc/osu_upc_all_barrier.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC Barrier Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include "osu_coll.h" 16 | #include "osu_common.h" 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | #ifdef PACKAGE_VERSION 23 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 24 | #else 25 | # define HEADER "# " BENCHMARK "\n" 26 | #endif 27 | 28 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 29 | 30 | shared double avg_time, max_time, min_time; 31 | shared double latency[THREADS]; 32 | 33 | int main(int argc, char *argv[]) 34 | { 35 | int i = 0; 36 | int skip; 37 | int64_t t_start = 0, t_stop = 0, timer=0; 38 | int full = 0; 39 | 40 | if (process_args(argc, argv, MYTHREAD, NULL, &full, HEADER)) { 41 | return 0; 42 | } 43 | 44 | if(THREADS < 2) { 45 | if(MYTHREAD == 0) { 46 | fprintf(stderr, "This test requires at least two processes\n"); 47 | } 48 | return -1; 49 | } 50 | print_header(HEADER, MYTHREAD, full); 51 | upc_barrier; 52 | 53 | skip = SKIP; 54 | timer=0; 55 | for(i=0; i < iterations + skip ; i++) { 56 | t_start = TIME(); 57 | upc_barrier; 58 | t_stop = TIME(); 59 | 60 | if(i>=skip){ 61 | timer+=t_stop-t_start; 62 | } 63 | } 64 | upc_barrier; 65 | latency[MYTHREAD] = (1.0 * timer) / iterations; 66 | 67 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 68 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 69 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 70 | if(!MYTHREAD) 71 | avg_time = avg_time/THREADS; 72 | 73 | print_data(MYTHREAD, full, 0, avg_time, min_time, max_time, iterations); 74 | upc_barrier; 75 | return EXIT_SUCCESS; 76 | } 77 | 78 | /* vi: set sw=4 sts=4 tw=80: */ 79 | -------------------------------------------------------------------------------- /upc/osu_upc_all_broadcast.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC Broadcast Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include 19 | 20 | #ifdef PACKAGE_VERSION 21 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 22 | #else 23 | # define HEADER "# " BENCHMARK "\n" 24 | #endif 25 | #include "osu_coll.h" 26 | 27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 28 | 29 | shared [] char *src; 30 | shared char *dst; 31 | 32 | shared double avg_time, max_time, min_time; 33 | shared double latency[THREADS]; 34 | 35 | int main(int argc, char *argv[]) 36 | { 37 | int i = 0, size; 38 | int skip; 39 | int64_t t_start = 0, t_stop = 0, timer=0; 40 | int max_msg_size = 1<<20, full = 0; 41 | 42 | if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) { 43 | return 0; 44 | } 45 | 46 | if(THREADS < 2) { 47 | if(MYTHREAD == 0) { 48 | fprintf(stderr, "This test requires at least two processes\n"); 49 | } 50 | return -1; 51 | } 52 | 53 | print_header(HEADER, MYTHREAD, full); 54 | 55 | src = upc_all_alloc(1, max_msg_size*sizeof(char)); 56 | dst = upc_all_alloc(THREADS, max_msg_size*sizeof(char)); 57 | 58 | if(NULL == src || NULL == dst) { 59 | fprintf(stderr, "malloc failed.\n"); 60 | exit(1); 61 | } 62 | 63 | for(size=1; size <=max_msg_size; size *= 2) { 64 | if(size > LARGE_MESSAGE_SIZE) { 65 | skip = SKIP_LARGE; 66 | iterations = iterations_large; 67 | } 68 | else { 69 | skip = SKIP; 70 | } 71 | 72 | timer=0; 73 | for(i=0; i < iterations + skip ; i++) { 74 | t_start = TIME(); 75 | upc_all_broadcast(dst, src, size, SYNC_MODE ); 76 | t_stop = TIME(); 77 | 78 | if(i>=skip){ 79 | timer+=t_stop-t_start; 80 | } 81 | upc_barrier; 82 | } 83 | upc_barrier; 84 | latency[MYTHREAD] = (1.0 * timer) / iterations; 85 | 86 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 87 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 88 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 89 | if(!MYTHREAD) 90 | avg_time = avg_time/THREADS; 91 | 92 | print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations); 93 | } 94 | 95 | return EXIT_SUCCESS; 96 | } 97 | 98 | /* vi: set sw=4 sts=4 tw=80: */ 99 | -------------------------------------------------------------------------------- /upc/osu_upc_all_exchange.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC Exchange Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include "osu_coll.h" 19 | #include 20 | 21 | #ifdef PACKAGE_VERSION 22 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 23 | #else 24 | # define HEADER "# " BENCHMARK "\n" 25 | #endif 26 | 27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 28 | 29 | shared char *src, *dst; 30 | 31 | shared double avg_time, max_time, min_time; 32 | shared double latency[THREADS]; 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | int i = 0, size; 37 | int skip; 38 | int64_t t_start = 0, t_stop = 0, timer=0; 39 | int max_msg_size = 1<<20, full = 0; 40 | 41 | if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) { 42 | return 0; 43 | } 44 | 45 | if(THREADS < 2) { 46 | if(MYTHREAD == 0) { 47 | fprintf(stderr, "This test requires at least two processes\n"); 48 | } 49 | return -1; 50 | } 51 | print_header(HEADER, MYTHREAD, full); 52 | 53 | src = upc_all_alloc(THREADS*THREADS, max_msg_size*sizeof(char)); 54 | dst = upc_all_alloc(THREADS*THREADS, max_msg_size*sizeof(char)); 55 | upc_barrier; 56 | 57 | if(NULL == src || NULL == dst) { 58 | fprintf(stderr, "malloc failed.\n"); 59 | exit(1); 60 | } 61 | 62 | for(size=1; size <=max_msg_size; size *= 2) { 63 | if(size > LARGE_MESSAGE_SIZE) { 64 | skip = SKIP_LARGE; 65 | iterations = iterations_large; 66 | } 67 | else { 68 | skip = SKIP; 69 | } 70 | 71 | timer=0; 72 | for(i=0; i < iterations + skip ; i++) { 73 | t_start = TIME(); 74 | upc_all_exchange(dst, src, size, SYNC_MODE); 75 | t_stop = TIME(); 76 | 77 | if(i>=skip){ 78 | timer+=t_stop-t_start; 79 | } 80 | upc_barrier; 81 | } 82 | upc_barrier; 83 | latency[MYTHREAD] = (1.0 * timer) / iterations; 84 | 85 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 86 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 87 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 88 | if(!MYTHREAD) 89 | avg_time = avg_time/THREADS; 90 | 91 | print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations); 92 | } 93 | 94 | upc_barrier; 95 | return EXIT_SUCCESS; 96 | } 97 | 98 | /* vi: set sw=4 sts=4 tw=80: */ 99 | -------------------------------------------------------------------------------- /upc/osu_upc_all_gather.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC Gather Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include "osu_coll.h" 19 | #include 20 | 21 | #ifdef PACKAGE_VERSION 22 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 23 | #else 24 | # define HEADER "# " BENCHMARK "\n" 25 | #endif 26 | 27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 28 | 29 | shared char *src, *dst; 30 | 31 | shared double avg_time, max_time, min_time; 32 | shared double latency[THREADS]; 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | int i = 0, size; 37 | int skip; 38 | int64_t t_start = 0, t_stop = 0, timer=0; 39 | int max_msg_size = 1<<20, full = 0; 40 | 41 | if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) { 42 | return 0; 43 | } 44 | 45 | if(THREADS < 2) { 46 | if(MYTHREAD == 0) { 47 | fprintf(stderr, "This test requires at least two processes\n"); 48 | } 49 | return -1; 50 | } 51 | print_header(HEADER, MYTHREAD, full); 52 | 53 | src = upc_all_alloc(THREADS, max_msg_size*sizeof(char)); 54 | dst = upc_all_alloc(1, THREADS*max_msg_size*sizeof(char)); 55 | upc_barrier; 56 | 57 | if(NULL == src || NULL == dst) { 58 | fprintf(stderr, "malloc failed.\n"); 59 | exit(1); 60 | } 61 | 62 | for(size=1; size <=max_msg_size; size *= 2) { 63 | if(size > LARGE_MESSAGE_SIZE) { 64 | skip = SKIP_LARGE; 65 | iterations = iterations_large; 66 | } 67 | else { 68 | skip = SKIP; 69 | } 70 | 71 | timer=0; 72 | for(i=0; i < iterations + skip ; i++) { 73 | t_start = TIME(); 74 | upc_all_gather(dst, src, size, SYNC_MODE ); 75 | t_stop = TIME(); 76 | 77 | if(i>=skip){ 78 | timer+=t_stop-t_start; 79 | } 80 | upc_barrier; 81 | } 82 | upc_barrier; 83 | latency[MYTHREAD] = (1.0 * timer) / iterations; 84 | 85 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 86 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 87 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 88 | if(!MYTHREAD) 89 | avg_time = avg_time/THREADS; 90 | 91 | print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations); 92 | } 93 | 94 | return EXIT_SUCCESS; 95 | } 96 | 97 | /* vi: set sw=4 sts=4 tw=80: */ 98 | -------------------------------------------------------------------------------- /upc/osu_upc_all_gather_all.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC GatherAll Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include "osu_coll.h" 19 | #include 20 | 21 | #ifdef PACKAGE_VERSION 22 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 23 | #else 24 | # define HEADER "# " BENCHMARK "\n" 25 | #endif 26 | 27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 28 | 29 | shared char *src, *dst; 30 | 31 | shared double avg_time, max_time, min_time; 32 | shared double latency[THREADS]; 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | int i = 0, size; 37 | int skip; 38 | int64_t t_start = 0, t_stop = 0, timer=0; 39 | int max_msg_size = 1<<20, full = 0; 40 | 41 | if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) { 42 | return 0; 43 | } 44 | 45 | if(THREADS < 2) { 46 | if(MYTHREAD == 0) { 47 | fprintf(stderr, "This test requires at least two processes\n"); 48 | } 49 | return -1; 50 | } 51 | print_header(HEADER, MYTHREAD, full); 52 | 53 | src = upc_all_alloc(THREADS, max_msg_size*sizeof(char)); 54 | dst = upc_all_alloc(THREADS*THREADS, max_msg_size*sizeof(char)); 55 | upc_barrier; 56 | 57 | if(NULL == src || NULL == dst) { 58 | fprintf(stderr, "malloc failed.\n"); 59 | exit(1); 60 | } 61 | 62 | for(size=1; size <=max_msg_size; size *= 2) { 63 | if(size > LARGE_MESSAGE_SIZE) { 64 | skip = SKIP_LARGE; 65 | iterations = iterations_large; 66 | } 67 | else { 68 | skip = SKIP; 69 | } 70 | 71 | timer=0; 72 | for(i=0; i < iterations + skip ; i++) { 73 | t_start = TIME(); 74 | upc_all_gather_all(dst, src, size, SYNC_MODE ); 75 | t_stop = TIME(); 76 | 77 | if(i>=skip){ 78 | timer+=t_stop-t_start; 79 | } 80 | upc_barrier; 81 | } 82 | upc_barrier; 83 | latency[MYTHREAD] = (1.0 * timer) / iterations; 84 | 85 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 86 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 87 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 88 | if(!MYTHREAD) 89 | avg_time = avg_time/THREADS; 90 | 91 | print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations); 92 | } 93 | 94 | upc_barrier; 95 | return EXIT_SUCCESS; 96 | } 97 | 98 | /* vi: set sw=4 sts=4 tw=80: */ 99 | -------------------------------------------------------------------------------- /upc/osu_upc_all_reduce.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC Reduce Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include "osu_coll.h" 19 | #include 20 | 21 | #ifdef PACKAGE_VERSION 22 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 23 | #else 24 | # define HEADER "# " BENCHMARK "\n" 25 | #endif 26 | 27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 28 | 29 | shared char *src; 30 | shared double avg_time, max_time, min_time; 31 | shared double latency[THREADS]; 32 | shared char dst; 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | int i = 0, size; 37 | int skip; 38 | int64_t t_start = 0, t_stop = 0, timer=0; 39 | int max_msg_size = 1<<20, full = 0; 40 | 41 | if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) { 42 | return 0; 43 | } 44 | 45 | if(THREADS < 2) { 46 | if(MYTHREAD == 0) { 47 | fprintf(stderr, "This test requires at least two processes\n"); 48 | } 49 | return -1; 50 | } 51 | print_header(HEADER, MYTHREAD, full); 52 | 53 | src = upc_all_alloc(THREADS, max_msg_size*sizeof(char)); 54 | 55 | if(NULL == src) { 56 | fprintf(stderr, "malloc failed.\n"); 57 | exit(1); 58 | } 59 | 60 | for(size=1; size <=max_msg_size; size *= 2) { 61 | if(size > LARGE_MESSAGE_SIZE) { 62 | skip = SKIP_LARGE; 63 | iterations = iterations_large; 64 | } 65 | else { 66 | skip = SKIP; 67 | } 68 | 69 | timer=0; 70 | for(i=0; i < iterations + skip ; i++) { 71 | upc_barrier; 72 | t_start = TIME(); 73 | upc_all_reduceC(&dst, src, UPC_MAX, size * THREADS, size, NULL, SYNC_MODE); 74 | t_stop = TIME(); 75 | 76 | if(i>=skip){ 77 | timer+=t_stop-t_start; 78 | } 79 | upc_barrier; 80 | } 81 | upc_barrier; 82 | latency[MYTHREAD] = (1.0 * timer) / iterations; 83 | 84 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 85 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 86 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 87 | if(!MYTHREAD) 88 | avg_time = avg_time/THREADS; 89 | 90 | print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations); 91 | } 92 | 93 | return EXIT_SUCCESS; 94 | } 95 | 96 | /* vi: set sw=4 sts=4 tw=80: */ 97 | -------------------------------------------------------------------------------- /upc/osu_upc_all_scatter.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC Scatter Latency Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "osu_common.h" 18 | #include "osu_coll.h" 19 | #include 20 | 21 | #ifdef PACKAGE_VERSION 22 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 23 | #else 24 | # define HEADER "# " BENCHMARK "\n" 25 | #endif 26 | 27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC) 28 | 29 | shared char *dst, *src; 30 | 31 | shared double avg_time, max_time, min_time; 32 | shared double latency[THREADS]; 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | int i = 0, size; 37 | int skip; 38 | int64_t t_start = 0, t_stop = 0, timer=0; 39 | int max_msg_size = 1<<20, full = 0; 40 | 41 | if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) { 42 | return 0; 43 | } 44 | 45 | if(THREADS < 2) { 46 | if(MYTHREAD == 0) { 47 | fprintf(stderr, "This test requires at least two processes\n"); 48 | } 49 | return -1; 50 | } 51 | print_header(HEADER, MYTHREAD, full); 52 | 53 | dst = upc_all_alloc(THREADS, max_msg_size*sizeof(char)); 54 | src = upc_all_alloc(1, THREADS*max_msg_size*sizeof(char)); 55 | 56 | if(NULL == dst || NULL == src) { 57 | fprintf(stderr, "malloc failed.\n"); 58 | exit(1); 59 | } 60 | 61 | for(size=1; size <=max_msg_size; size *= 2) { 62 | if(size > LARGE_MESSAGE_SIZE) { 63 | skip = SKIP_LARGE; 64 | iterations = iterations_large; 65 | } 66 | else { 67 | skip = SKIP; 68 | } 69 | 70 | timer=0; 71 | for(i=0; i < iterations + skip ; i++) { 72 | t_start = TIME(); 73 | 74 | upc_all_scatter(dst, src, size, SYNC_MODE ); 75 | t_stop = TIME(); 76 | 77 | if(i>=skip){ 78 | timer+=t_stop-t_start; 79 | } 80 | upc_barrier; 81 | } 82 | upc_barrier; 83 | latency[MYTHREAD] = (1.0 * timer) / iterations; 84 | 85 | upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE); 86 | upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE); 87 | upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE); 88 | if(!MYTHREAD) 89 | avg_time = avg_time/THREADS; 90 | 91 | print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations); 92 | } 93 | 94 | return EXIT_SUCCESS; 95 | } 96 | 97 | /* vi: set sw=4 sts=4 tw=80: */ 98 | -------------------------------------------------------------------------------- /upc/osu_upc_memget.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC MEMGET Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #define MAX_MSG_SIZE (1<<22) 17 | #define SKIP_LARGE 10 18 | #define LOOP_LARGE 100 19 | #define LARGE_MESSAGE_SIZE 8192 20 | 21 | int skip = 1000; 22 | int loop = 10000; 23 | 24 | #ifdef PACKAGE_VERSION 25 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 26 | #else 27 | # define HEADER "# " BENCHMARK "\n" 28 | #endif 29 | 30 | #ifndef FIELD_WIDTH 31 | # define FIELD_WIDTH 20 32 | #endif 33 | 34 | #ifndef FLOAT_PRECISION 35 | # define FLOAT_PRECISION 2 36 | #endif 37 | 38 | void wtime(double *t) 39 | { 40 | static int sec = -1; 41 | struct timeval tv; 42 | gettimeofday(&tv, (void *)0); 43 | if (sec < 0) sec = tv.tv_sec; 44 | *t = (tv.tv_sec - sec)*1.0e+6 + tv.tv_usec; 45 | } 46 | 47 | int main(int argc, char **argv) 48 | { 49 | int iters=0; 50 | double t_start, t_end; 51 | int peerid = (MYTHREAD+1)%THREADS; 52 | int iamsender = 0; 53 | int i; 54 | 55 | if( THREADS == 1 ) { 56 | if(MYTHREAD == 0) { 57 | fprintf(stderr, "This test requires at least two UPC threads\n"); 58 | } 59 | return 0; 60 | } 61 | 62 | if ( MYTHREAD < THREADS/2 ) 63 | iamsender = 1; 64 | 65 | shared char *data = upc_all_alloc(THREADS, MAX_MSG_SIZE*2); 66 | shared [] char *remote = (shared [] char *)(data + peerid); 67 | char *local = ((char *)(data+MYTHREAD)) + MAX_MSG_SIZE; 68 | 69 | if ( !MYTHREAD ) { 70 | fprintf(stdout, HEADER); 71 | fprintf(stdout, "# [ pairs: %d ]\n", THREADS/2); 72 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); 73 | fflush(stdout); 74 | } 75 | 76 | for (int size = 1; size <= MAX_MSG_SIZE; size*=2) { 77 | 78 | if ( iamsender ) 79 | for(i = 0; i < size; i++) { 80 | local[i] = 'a'; 81 | } 82 | else 83 | for(i = 0; i < size; i++) { 84 | local[i] = 'b'; 85 | } 86 | 87 | upc_barrier; 88 | 89 | if(size > LARGE_MESSAGE_SIZE) { 90 | loop = LOOP_LARGE; 91 | skip = SKIP_LARGE; 92 | } 93 | 94 | if( iamsender ) 95 | { 96 | for ( i = 0; i < loop + skip; i++) { 97 | if(i == skip) { 98 | upc_barrier; 99 | wtime(&t_start); 100 | } 101 | 102 | upc_memget(local, remote, size); 103 | } 104 | 105 | upc_barrier; 106 | 107 | wtime(&t_end); 108 | if( !MYTHREAD ) 109 | { 110 | double latency = (t_end - t_start)/(1.0 * loop); 111 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 112 | FLOAT_PRECISION, latency); 113 | fflush(stdout); 114 | } 115 | } else 116 | { 117 | upc_barrier; 118 | upc_barrier; 119 | } 120 | 121 | } 122 | return 0; 123 | } 124 | -------------------------------------------------------------------------------- /upc/osu_upc_memput.c: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC MEMPUT Test" 2 | /* 3 | * Copyright (C) 2002-2016 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define MAX_MSG_SIZE (1<<22) 18 | #define SKIP_LARGE 10 19 | #define LOOP_LARGE 100 20 | #define LARGE_MESSAGE_SIZE 8192 21 | 22 | int skip = 1000; 23 | int loop = 10000; 24 | 25 | #ifdef PACKAGE_VERSION 26 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 27 | #else 28 | # define HEADER "# " BENCHMARK "\n" 29 | #endif 30 | 31 | #ifndef FIELD_WIDTH 32 | # define FIELD_WIDTH 20 33 | #endif 34 | 35 | #ifndef FLOAT_PRECISION 36 | # define FLOAT_PRECISION 2 37 | #endif 38 | 39 | 40 | void wtime(double *t) 41 | { 42 | static int sec = -1; 43 | struct timeval tv; 44 | gettimeofday(&tv, (void *)0); 45 | if (sec < 0) sec = tv.tv_sec; 46 | *t = (tv.tv_sec - sec)*1.0e+6 + tv.tv_usec; 47 | } 48 | 49 | int main(int argc, char **argv) 50 | { 51 | int iters=0; 52 | double t_start, t_end; 53 | int peerid = (MYTHREAD+1)%THREADS; 54 | int iamsender = 0; 55 | int i; 56 | 57 | if( THREADS == 1 ) { 58 | if(MYTHREAD == 0) { 59 | fprintf(stderr, "This test requires at least two UPC threads\n"); 60 | } 61 | return 0; 62 | } 63 | 64 | if ( MYTHREAD < THREADS/2 ) 65 | iamsender = 1; 66 | 67 | shared char *data = upc_all_alloc(THREADS, MAX_MSG_SIZE*2); 68 | shared [] char *remote = (shared [] char *)(data + peerid); 69 | char *local = ((char *)(data+MYTHREAD)) + MAX_MSG_SIZE; 70 | 71 | if ( !MYTHREAD ) { 72 | fprintf(stdout, HEADER); 73 | fprintf(stdout, "# [ pairs: %d ]\n", THREADS/2); 74 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); 75 | fflush(stdout); 76 | } 77 | 78 | for (int size = 1; size <= MAX_MSG_SIZE; size*=2) { 79 | 80 | if ( iamsender ) 81 | for(i = 0; i < size; i++) { 82 | local[i] = 'a'; 83 | } 84 | else 85 | for(i = 0; i < size; i++) { 86 | local[i] = 'b'; 87 | } 88 | 89 | upc_barrier; 90 | 91 | if(size > LARGE_MESSAGE_SIZE) { 92 | loop = LOOP_LARGE; 93 | skip = SKIP_LARGE; 94 | } 95 | 96 | if( iamsender ) 97 | { 98 | for (i = 0; i < loop + skip; i++) { 99 | if(i == skip) { 100 | upc_barrier; 101 | wtime(&t_start); 102 | } 103 | 104 | upc_memput(remote, local, size); 105 | upc_fence; 106 | } 107 | 108 | upc_barrier; 109 | 110 | wtime(&t_end); 111 | if( !MYTHREAD ) 112 | { 113 | double latency = (t_end - t_start)/(1.0 * loop); 114 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 115 | FLOAT_PRECISION, latency); 116 | fflush(stdout); 117 | } 118 | } else 119 | { 120 | upc_barrier; 121 | upc_barrier; 122 | } 123 | 124 | } 125 | return 0; 126 | } 127 | -------------------------------------------------------------------------------- /upcxx/Makefile.am: -------------------------------------------------------------------------------- 1 | upcdir = $(pkglibexecdir)/upcxx 2 | upc_PROGRAMS = osu_upcxx_allgather osu_upcxx_alltoall osu_upcxx_bcast \ 3 | osu_upcxx_gather osu_upcxx_reduce osu_upcxx_scatter \ 4 | osu_upcxx_async_copy_get osu_upcxx_async_copy_put 5 | 6 | osu_upcxx_allgather_SOURCES = osu_upcxx_allgather.cpp osu_coll.h \ 7 | osu_common.c osu_common.h 8 | osu_upcxx_alltoall_SOURCES = osu_upcxx_alltoall.cpp osu_coll.h \ 9 | osu_common.c osu_common.h 10 | osu_upcxx_bcast_SOURCES = osu_upcxx_bcast.cpp osu_coll.h \ 11 | osu_common.c osu_common.h 12 | osu_upcxx_gather_SOURCES = osu_upcxx_gather.cpp osu_coll.h \ 13 | osu_common.c osu_common.h 14 | osu_upcxx_reduce_SOURCES = osu_upcxx_reduce.cpp osu_coll.h \ 15 | osu_common.c osu_common.h 16 | osu_upcxx_scatter_SOURCES = osu_upcxx_scatter.cpp osu_coll.h \ 17 | osu_common.c osu_common.h 18 | osu_upcxx_async_copy_get_SOURCES = osu_upcxx_async_copy_get.cpp \ 19 | osu_common.c osu_common.h 20 | osu_upcxx_async_copy_put_SOURCES = osu_upcxx_async_copy_put.cpp \ 21 | osu_common.c osu_common.h 22 | -------------------------------------------------------------------------------- /upcxx/osu_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #ifdef __cplusplus 16 | extern "C" double getMicrosecondTimeStamp (void); 17 | #endif /* #ifdef __cplusplus */ 18 | 19 | double 20 | getMicrosecondTimeStamp (void) 21 | { 22 | double retval; 23 | struct timeval tv; 24 | 25 | if (gettimeofday(&tv, NULL)) { 26 | perror("gettimeofday"); 27 | abort(); 28 | } 29 | 30 | retval = tv.tv_sec * (double)1e6 + tv.tv_usec; 31 | 32 | return retval; 33 | } 34 | -------------------------------------------------------------------------------- /upcxx/osu_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 3 | * (NBCL), The Ohio State University. 4 | * 5 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 6 | * 7 | * For detailed copyright and licensing information, please refer to the 8 | * copyright file COPYRIGHT in the top level OMB directory. 9 | */ 10 | #ifndef _OSU_COMMON_H_ 11 | #define _OSU_COMMON_H_ 12 | 13 | #ifdef PACKAGE_VERSION 14 | # define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n" 15 | #else 16 | # define HEADER "# " BENCHMARK "\n" 17 | #endif 18 | 19 | #ifndef FIELD_WIDTH 20 | # define FIELD_WIDTH 20 21 | #endif 22 | 23 | #ifndef FLOAT_PRECISION 24 | # define FLOAT_PRECISION 2 25 | #endif 26 | 27 | #define TIME() getMicrosecondTimeStamp() 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif /* #ifdef __cplusplus */ 32 | 33 | double getMicrosecondTimeStamp(); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif /* #ifdef __cplusplus */ 38 | 39 | #endif /* _OSU_COMMON_H */ 40 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_allgather.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ AllGather Latency Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define root 0 19 | #define VERIFY 0 20 | 21 | using namespace std; 22 | using namespace upcxx; 23 | 24 | int 25 | main (int argc, char *argv[]) 26 | { 27 | init(&argc, &argv); 28 | 29 | global_ptr src; 30 | global_ptr dst; 31 | global_ptr time_src; 32 | global_ptr time_dst; 33 | 34 | double avg_time, max_time, min_time; 35 | int i = 0, size; 36 | int skip; 37 | int64_t t_start = 0, t_stop = 0, timer=0; 38 | int max_msg_size = 1<<20, full = 0; 39 | 40 | if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) { 41 | return 0; 42 | } 43 | 44 | if (ranks() < 2) { 45 | if (myrank() == 0) { 46 | fprintf(stderr, "This test requires at least two processes\n"); 47 | } 48 | return -1; 49 | } 50 | 51 | src = allocate (myrank(), max_msg_size*sizeof(char)); 52 | dst = allocate (myrank(), max_msg_size*sizeof(char)*ranks()); 53 | 54 | assert(src != NULL); 55 | assert(dst != NULL); 56 | 57 | time_src = allocate (myrank(), 1); 58 | time_dst = allocate (root, 1); 59 | 60 | assert(time_src != NULL); 61 | assert(time_dst != NULL); 62 | 63 | /* 64 | * put a barrier since allocate is non-blocking in upc++ 65 | */ 66 | barrier(); 67 | 68 | print_header(HEADER, myrank(), full); 69 | 70 | for (size=1; size <=max_msg_size; size *= 2) { 71 | if (size > LARGE_MESSAGE_SIZE) { 72 | skip = SKIP_LARGE; 73 | iterations = iterations_large; 74 | } else { 75 | skip = SKIP; 76 | } 77 | 78 | timer=0; 79 | for(i=0; i < iterations + skip ; i++) { 80 | //t_start = TIME(); 81 | t_start = getMicrosecondTimeStamp(); 82 | 83 | upcxx_allgather((char *)src, (char *)dst, size*sizeof(char)); 84 | t_stop = getMicrosecondTimeStamp(); 85 | 86 | if(i>=skip){ 87 | timer+=t_stop-t_start; 88 | } 89 | barrier(); 90 | } 91 | 92 | barrier(); 93 | 94 | double* lsrc = (double *)time_src; 95 | lsrc[0] = (1.0 * timer) / iterations; 96 | 97 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 98 | UPCXX_MAX, UPCXX_DOUBLE); 99 | if (myrank()==root) { 100 | double* ldst = (double *)time_dst; 101 | max_time = ldst[0]; 102 | } 103 | 104 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 105 | UPCXX_MIN, UPCXX_DOUBLE); 106 | if (myrank()==root) { 107 | double* ldst = (double *)time_dst; 108 | min_time = ldst[0]; 109 | } 110 | 111 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 112 | UPCXX_SUM, UPCXX_DOUBLE); 113 | if (myrank()==root) { 114 | double* ldst = (double *)time_dst; 115 | avg_time = ldst[0]/ranks(); 116 | } 117 | 118 | barrier (); 119 | 120 | print_data(myrank(), full, size*sizeof(char), avg_time, min_time, 121 | max_time, iterations); 122 | } 123 | 124 | deallocate(src); 125 | deallocate(dst); 126 | deallocate(time_src); 127 | deallocate(time_dst); 128 | 129 | finalize(); 130 | 131 | return EXIT_SUCCESS; 132 | } 133 | 134 | /* vi: set sw=4 sts=4 tw=80: */ 135 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_alltoall.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ AlltoAll Latency Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define root 0 19 | #define VERIFY 0 20 | 21 | using namespace std; 22 | using namespace upcxx; 23 | 24 | int 25 | main (int argc, char *argv[]) 26 | { 27 | init(&argc, &argv); 28 | 29 | global_ptr src; 30 | global_ptr dst; 31 | global_ptr time_src; 32 | global_ptr time_dst; 33 | 34 | double avg_time, max_time, min_time; 35 | int i = 0, size; 36 | int skip; 37 | int64_t t_start = 0, t_stop = 0, timer=0; 38 | int max_msg_size = 1<<20, full = 0; 39 | 40 | if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) { 41 | return 0; 42 | } 43 | 44 | if (ranks() < 2) { 45 | if (myrank() == 0) { 46 | fprintf(stderr, "This test requires at least two processes\n"); 47 | } 48 | return -1; 49 | } 50 | 51 | src = allocate (myrank(), max_msg_size*sizeof(char)*ranks()); 52 | dst = allocate (myrank(), max_msg_size*sizeof(char)*ranks()); 53 | 54 | assert(src != NULL); 55 | assert(dst != NULL); 56 | 57 | time_src = allocate (myrank(), 1); 58 | time_dst = allocate (root, 1); 59 | 60 | assert(time_src != NULL); 61 | assert(time_dst != NULL); 62 | 63 | /* 64 | * put a barrier since allocate is non-blocking in upc++ 65 | */ 66 | barrier(); 67 | 68 | print_header(HEADER, myrank(), full); 69 | 70 | for (size=1; size <=max_msg_size; size *= 2) { 71 | if (size > LARGE_MESSAGE_SIZE) { 72 | skip = SKIP_LARGE; 73 | iterations = iterations_large; 74 | } else { 75 | skip = SKIP; 76 | } 77 | 78 | timer=0; 79 | for (i=0; i < iterations + skip ; i++) { 80 | t_start = getMicrosecondTimeStamp(); 81 | upcxx_alltoall((char *)src, (char *)dst, size*sizeof(char)); 82 | t_stop = getMicrosecondTimeStamp(); 83 | 84 | if (i>=skip) { 85 | timer+=t_stop-t_start; 86 | } 87 | barrier(); 88 | } 89 | 90 | barrier(); 91 | 92 | double* lsrc = (double *)time_src; 93 | lsrc[0] = (1.0 * timer) / iterations; 94 | 95 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 96 | UPCXX_MAX, UPCXX_DOUBLE); 97 | if (myrank()==root) { 98 | double* ldst = (double *)time_dst; 99 | max_time = ldst[0]; 100 | } 101 | 102 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 103 | UPCXX_MIN, UPCXX_DOUBLE); 104 | if (myrank()==root) { 105 | double* ldst = (double *)time_dst; 106 | min_time = ldst[0]; 107 | } 108 | 109 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 110 | UPCXX_SUM, UPCXX_DOUBLE); 111 | if (myrank()==root) { 112 | double* ldst = (double *)time_dst; 113 | avg_time = ldst[0]/ranks(); 114 | } 115 | 116 | barrier (); 117 | 118 | print_data(myrank(), full, size*sizeof(char), avg_time, min_time, 119 | max_time, iterations); 120 | } 121 | 122 | deallocate(src); 123 | deallocate(dst); 124 | deallocate(time_src); 125 | deallocate(time_dst); 126 | 127 | finalize(); 128 | 129 | return EXIT_SUCCESS; 130 | } 131 | 132 | /* vi: set sw=4 sts=4 tw=80: */ 133 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_async_copy_get.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ Async Copy (Get) Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace upcxx; 19 | 20 | #define VERIFY 0 21 | #define MAX_MSG_SIZE (1<<22) 22 | #define SKIP_LARGE 10 23 | #define LOOP_LARGE 100 24 | #define LARGE_MESSAGE_SIZE 8192 25 | 26 | int skip = 1000; 27 | int loop = 10000; 28 | 29 | int 30 | main (int argc, char **argv) 31 | { 32 | init(&argc, &argv); 33 | 34 | int iters=0; 35 | double t_start, t_end; 36 | int peerid = (myrank()+1)%ranks(); 37 | int iamsender = 0; 38 | int i; 39 | 40 | if (ranks() == 1) { 41 | if (myrank() == 0) { 42 | fprintf(stderr, "This test requires at least two UPC threads\n"); 43 | } 44 | return 0; 45 | } 46 | 47 | if (myrank() < ranks()/2) { 48 | iamsender = 1; 49 | } 50 | 51 | shared_array, 1> data_ptrs (ranks()); 52 | 53 | /* 54 | * allocate memory to each global pointer. 55 | */ 56 | data_ptrs[myrank()] = allocate(myrank(), sizeof(char) 57 | * MAX_MSG_SIZE); 58 | 59 | /* 60 | * put a barrier since allocate is non-blocking in upc++ 61 | */ 62 | barrier(); 63 | 64 | /* 65 | * my peer's pointer from where I will memget. 66 | */ 67 | global_ptr remote = data_ptrs[peerid]; 68 | 69 | /* 70 | * cast my global pointer to a local pointer. 71 | */ 72 | global_ptr local = (global_ptr)data_ptrs[myrank()]; 73 | 74 | barrier(); 75 | 76 | if (!myrank()) { 77 | fprintf(stdout, HEADER); 78 | fprintf(stdout, "# [ pairs: %d ]\n", ranks()/2); 79 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); 80 | fflush(stdout); 81 | } 82 | 83 | for (int size = 1; size <= MAX_MSG_SIZE; size*=2) { 84 | if (iamsender) { 85 | for (i = 0; i < size; i++) { 86 | char *lptr = (char *)local; 87 | lptr[i] = 'a'; 88 | } 89 | } else { 90 | for (i = 0; i < size; i++) { 91 | char *lptr = (char *)local; 92 | lptr[i] = 'b'; 93 | } 94 | } 95 | 96 | barrier(); 97 | 98 | if (size > LARGE_MESSAGE_SIZE) { 99 | loop = LOOP_LARGE; 100 | skip = SKIP_LARGE; 101 | } 102 | 103 | if (iamsender) { 104 | for ( i = 0; i < loop + skip; i++) { 105 | if (i == skip) { 106 | barrier(); 107 | t_start = getMicrosecondTimeStamp(); 108 | } 109 | 110 | async_copy(remote, local, size); 111 | } 112 | 113 | async_wait(); 114 | barrier(); 115 | 116 | t_end = getMicrosecondTimeStamp(); 117 | if (!myrank()) { 118 | double latency = (t_end - t_start)/(1.0 * loop); 119 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 120 | FLOAT_PRECISION, latency); 121 | fflush(stdout); 122 | } 123 | } else { 124 | barrier(); 125 | barrier(); 126 | } 127 | } 128 | 129 | if (VERIFY) { 130 | if (iamsender) { 131 | /* 132 | * my local and my remote ptr should have same data 133 | */ 134 | char *lptr = (char *)local; 135 | for (int i = 0; i < MIN(20, MAX_MSG_SIZE); i++) { 136 | printf("sender_rank():%d --- lptr[%d]=%c , rptr[%d]=%c \n", 137 | myrank(), i, lptr[i], i, (char)remote[i]); 138 | 139 | } 140 | } 141 | } 142 | 143 | deallocate(local); 144 | barrier(); 145 | finalize(); 146 | 147 | return 0; 148 | } 149 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_async_copy_put.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ Async Copy (Put) Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace upcxx; 19 | 20 | #define VERIFY 0 21 | #define MAX_MSG_SIZE (1<<22) 22 | #define SKIP_LARGE 10 23 | #define LOOP_LARGE 100 24 | #define LARGE_MESSAGE_SIZE 8192 25 | 26 | int skip = 1000; 27 | int loop = 10000; 28 | 29 | int 30 | main (int argc, char **argv) 31 | { 32 | init(&argc, &argv); 33 | 34 | int iters=0; 35 | double t_start, t_end; 36 | int peerid = (ranks()+1)%ranks(); 37 | int iamsender = 0; 38 | int i; 39 | 40 | if (ranks() == 1) { 41 | if (myrank() == 0) { 42 | fprintf(stderr, "This test requires at least two UPC threads\n"); 43 | } 44 | return 0; 45 | } 46 | 47 | if (myrank() < ranks()/2) { 48 | iamsender = 1; 49 | } 50 | 51 | /* 52 | * a shared array of global pointers. 53 | */ 54 | shared_array, 1> data_ptrs (ranks()); 55 | 56 | /* 57 | * allocate memory to each global pointer. 58 | */ 59 | data_ptrs[myrank()] = allocate(myrank(), sizeof(char) * MAX_MSG_SIZE); 60 | 61 | /* 62 | * put a barrier since allocate is non-blocking in upc++ 63 | */ 64 | barrier(); 65 | 66 | /* 67 | * my peer's pointer from where I will memput. 68 | */ 69 | global_ptr remote = data_ptrs[peerid]; 70 | 71 | /* 72 | * cast my global pointer to a local pointer. 73 | */ 74 | global_ptr local = data_ptrs[myrank()]; 75 | 76 | barrier(); 77 | 78 | if (!myrank()) { 79 | fprintf(stdout, HEADER); 80 | fprintf(stdout, "# [ pairs: %d ]\n", ranks()/2); 81 | fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, 82 | "Latency (us)"); 83 | fflush(stdout); 84 | } 85 | 86 | for (int size = 1; size <= MAX_MSG_SIZE; size*=2) { 87 | if (iamsender) { 88 | for(i = 0; i < size; i++) { 89 | char *lptr = (char *)local; 90 | lptr[i] = 'a'; 91 | } 92 | } else { 93 | for(i = 0; i < size; i++) { 94 | char *lptr = (char *)local; 95 | lptr[i] = 'b'; 96 | } 97 | } 98 | 99 | barrier(); 100 | 101 | if (size > LARGE_MESSAGE_SIZE) { 102 | loop = LOOP_LARGE; 103 | skip = SKIP_LARGE; 104 | } 105 | 106 | if (iamsender) { 107 | for (i = 0; i < loop + skip; i++) { 108 | if(i == skip) { 109 | barrier(); 110 | t_start = getMicrosecondTimeStamp(); 111 | } 112 | 113 | async_copy(local, remote, size); 114 | } 115 | async_wait(); 116 | 117 | barrier(); 118 | 119 | t_end = getMicrosecondTimeStamp(); 120 | 121 | if (!myrank()) { 122 | double latency = (t_end - t_start)/(1.0 * loop); 123 | fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, 124 | FLOAT_PRECISION, latency); 125 | fflush(stdout); 126 | } 127 | } else { 128 | barrier(); 129 | barrier(); 130 | } 131 | } 132 | 133 | if (VERIFY) { 134 | if (iamsender) { 135 | /* 136 | * my local and my remote ptr should have same data 137 | */ 138 | char *lptr = (char *)local; 139 | for (int i = 0; i < MIN(20, MAX_MSG_SIZE); i++) { 140 | printf ("sender_rank():%d --- lptr[%d]=%c , rptr[%d]=%c \n", 141 | myrank(), i, lptr[i], i, (char)remote[i]); 142 | } 143 | } 144 | } 145 | 146 | deallocate(local); 147 | barrier(); 148 | finalize(); 149 | 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_bcast.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ Broadcast Latency Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace std; 19 | using namespace upcxx; 20 | 21 | #define root 0 22 | #define VERIFY 0 23 | 24 | int 25 | main (int argc, char *argv[]) 26 | { 27 | init(&argc, &argv); 28 | 29 | global_ptr src; 30 | global_ptr dst; 31 | global_ptr time_src; 32 | global_ptr time_dst; 33 | 34 | double avg_time, max_time, min_time; 35 | int i = 0, size; 36 | int skip; 37 | int64_t t_start = 0, t_stop = 0, timer=0; 38 | int max_msg_size = 1<<20, full = 0; 39 | 40 | if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) { 41 | return 0; 42 | } 43 | 44 | if (ranks() < 2) { 45 | if (myrank() == 0) { 46 | fprintf(stderr, "This test requires at least two processes\n"); 47 | } 48 | return -1; 49 | } 50 | 51 | src = allocate (root, max_msg_size*sizeof(char)); 52 | dst = allocate (myrank(), max_msg_size*sizeof(char)); 53 | 54 | assert(src != NULL); 55 | assert(dst != NULL); 56 | 57 | time_src = allocate (myrank(), 1); 58 | time_dst = allocate (root, 1); 59 | 60 | assert(time_src != NULL); 61 | assert(time_dst != NULL); 62 | 63 | /* 64 | * put a barrier since allocate is non-blocking in upc++ 65 | */ 66 | barrier(); 67 | 68 | print_header(HEADER, myrank(), full); 69 | 70 | for (size=1; size <=max_msg_size; size *= 2) { 71 | if (size > LARGE_MESSAGE_SIZE) { 72 | skip = SKIP_LARGE; 73 | iterations = iterations_large; 74 | } else { 75 | skip = SKIP; 76 | } 77 | 78 | timer=0; 79 | for (i=0; i < iterations + skip ; i++) { 80 | t_start = getMicrosecondTimeStamp(); 81 | upcxx_bcast((char *)src, (char *)dst, size*sizeof(char), root); 82 | t_stop = getMicrosecondTimeStamp(); 83 | 84 | if (i>=skip) { 85 | timer+=t_stop-t_start; 86 | } 87 | barrier(); 88 | } 89 | 90 | barrier(); 91 | 92 | double* lsrc = (double *)time_src; 93 | lsrc[0] = (1.0 * timer) / iterations; 94 | 95 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 96 | UPCXX_MAX, UPCXX_DOUBLE); 97 | if (myrank()==root) { 98 | double* ldst = (double *)time_dst; 99 | max_time = ldst[0]; 100 | } 101 | 102 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 103 | UPCXX_MIN, UPCXX_DOUBLE); 104 | if (myrank()==root) { 105 | double* ldst = (double *)time_dst; 106 | min_time = ldst[0]; 107 | } 108 | 109 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 110 | UPCXX_SUM, UPCXX_DOUBLE); 111 | if (myrank()==root) { 112 | double* ldst = (double *)time_dst; 113 | avg_time = ldst[0]/ranks(); 114 | } 115 | 116 | barrier(); 117 | 118 | print_data(myrank(), full, size*sizeof(char), avg_time, min_time, 119 | max_time, iterations); 120 | } 121 | 122 | deallocate(src); 123 | deallocate(dst); 124 | deallocate(time_src); 125 | deallocate(time_dst); 126 | 127 | finalize(); 128 | 129 | return EXIT_SUCCESS; 130 | } 131 | 132 | /* vi: set sw=4 sts=4 tw=80: */ 133 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_gather.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ Gather Latency Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace std; 19 | using namespace upcxx; 20 | 21 | #define root 0 22 | #define VERIFY 1 23 | 24 | shared_lock sl; 25 | 26 | int 27 | main (int argc, char *argv[]) 28 | { 29 | init(&argc, &argv); 30 | 31 | global_ptr src; 32 | global_ptr dst; 33 | global_ptr time_src; 34 | global_ptr time_dst; 35 | 36 | double avg_time, max_time, min_time; 37 | int i = 0, size; 38 | int skip; 39 | int64_t t_start = 0, t_stop = 0, timer=0; 40 | int max_msg_size = 1<<20, full = 0; 41 | 42 | if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) { 43 | return 0; 44 | } 45 | 46 | if(ranks() < 2) { 47 | if(myrank() == 0) { 48 | fprintf(stderr, "This test requires at least two processes\n"); 49 | } 50 | return -1; 51 | } 52 | 53 | src = allocate (myrank(), max_msg_size*sizeof(char)); 54 | dst = allocate (root, max_msg_size*sizeof(char)*ranks()); 55 | 56 | assert(src != NULL); 57 | assert(dst != NULL); 58 | 59 | time_src = allocate (myrank(), 1); 60 | time_dst = allocate (root, 1); 61 | 62 | assert(time_src != NULL); 63 | assert(time_dst != NULL); 64 | 65 | /* 66 | * put a barrier since allocate is non-blocking in upc++ 67 | */ 68 | barrier(); 69 | 70 | print_header(HEADER, myrank(), full); 71 | 72 | for (size=1; size <=max_msg_size; size *= 2) { 73 | if (size > LARGE_MESSAGE_SIZE) { 74 | skip = SKIP_LARGE; 75 | iterations = iterations_large; 76 | } else { 77 | skip = SKIP; 78 | } 79 | 80 | timer=0; 81 | for(i=0; i < iterations + skip ; i++) { 82 | t_start = getMicrosecondTimeStamp(); 83 | upcxx_gather((char *)src, (char *)dst, size*sizeof(char), root); 84 | t_stop = getMicrosecondTimeStamp(); 85 | 86 | if (i>=skip) { 87 | timer+=t_stop-t_start; 88 | } 89 | barrier(); 90 | } 91 | 92 | barrier(); 93 | 94 | double* lsrc = (double *)time_src; 95 | lsrc[0] = (1.0 * timer) / iterations; 96 | 97 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 98 | UPCXX_MAX, UPCXX_DOUBLE); 99 | if (myrank()==root) { 100 | double* ldst = (double *)time_dst; 101 | max_time = ldst[0]; 102 | } 103 | 104 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 105 | UPCXX_MIN, UPCXX_DOUBLE); 106 | if (myrank()==root) { 107 | double* ldst = (double *)time_dst; 108 | min_time = ldst[0]; 109 | } 110 | 111 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 112 | UPCXX_SUM, UPCXX_DOUBLE); 113 | if (myrank()==root) { 114 | double* ldst = (double *)time_dst; 115 | avg_time = ldst[0]/ranks(); 116 | } 117 | 118 | barrier(); 119 | 120 | print_data(myrank(), full, size*sizeof(char), avg_time, min_time, 121 | max_time, iterations); 122 | } 123 | 124 | deallocate(src); 125 | deallocate(dst); 126 | deallocate(time_src); 127 | deallocate(time_dst); 128 | 129 | finalize(); 130 | 131 | return EXIT_SUCCESS; 132 | } 133 | 134 | /* vi: set sw=4 sts=4 tw=80: */ 135 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_reduce.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ Reduce Latency Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace std; 19 | using namespace upcxx; 20 | 21 | #define root 0 22 | #define VERIFY 0 23 | 24 | int 25 | main (int argc, char *argv[]) 26 | { 27 | init(&argc, &argv); 28 | 29 | global_ptr src; 30 | global_ptr dst; 31 | global_ptr time_src; 32 | global_ptr time_dst; 33 | 34 | double avg_time, max_time, min_time; 35 | int i = 0, size; 36 | int skip; 37 | int64_t t_start = 0, t_stop = 0, timer=0; 38 | int max_msg_size = 1<<20, full = 0; 39 | 40 | if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) { 41 | return 0; 42 | } 43 | 44 | if (ranks() < 2) { 45 | if (myrank() == 0) { 46 | fprintf(stderr, "This test requires at least two processes\n"); 47 | } 48 | return -1; 49 | } 50 | 51 | src = allocate (myrank(), max_msg_size*sizeof(char)); 52 | dst = allocate (root, max_msg_size*sizeof(char)); 53 | 54 | assert(src != NULL); 55 | assert(dst != NULL); 56 | 57 | time_src = allocate (myrank(), 1); 58 | time_dst = allocate (root, 1); 59 | 60 | assert(time_src != NULL); 61 | assert(time_dst != NULL); 62 | 63 | /* 64 | * put a barrier since allocate is non-blocking in upc++ 65 | */ 66 | barrier(); 67 | 68 | print_header(HEADER, myrank(), full); 69 | 70 | for (size=1; size <=max_msg_size; size *= 2) { 71 | if (size > LARGE_MESSAGE_SIZE) { 72 | skip = SKIP_LARGE; 73 | iterations = iterations_large; 74 | } else { 75 | skip = SKIP; 76 | } 77 | 78 | timer=0; 79 | for (i=0; i < iterations + skip ; i++) { 80 | t_start = getMicrosecondTimeStamp(); 81 | upcxx_reduce((char *)src, (char *)dst, size*sizeof(char), 82 | root, UPCXX_SUM, UPCXX_CHAR); 83 | t_stop = getMicrosecondTimeStamp(); 84 | 85 | if (i>=skip){ 86 | timer+=t_stop-t_start; 87 | } 88 | barrier(); 89 | } 90 | 91 | barrier(); 92 | 93 | double* lsrc = (double *)time_src; 94 | lsrc[0] = (1.0 * timer) / iterations; 95 | 96 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 97 | UPCXX_MAX, UPCXX_DOUBLE); 98 | if (myrank()==root) { 99 | double* ldst = (double *)time_dst; 100 | max_time = ldst[0]; 101 | } 102 | 103 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 104 | UPCXX_MIN, UPCXX_DOUBLE); 105 | if (myrank()==root) { 106 | double* ldst = (double *)time_dst; 107 | min_time = ldst[0]; 108 | } 109 | 110 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 111 | UPCXX_SUM, UPCXX_DOUBLE); 112 | if (myrank()==root) { 113 | double* ldst = (double *)time_dst; 114 | avg_time = ldst[0]/ranks(); 115 | } 116 | 117 | barrier(); 118 | 119 | print_data(myrank(), full, size*sizeof(char), avg_time, min_time, 120 | max_time, iterations); 121 | } 122 | 123 | deallocate(src); 124 | deallocate(dst); 125 | deallocate(time_src); 126 | deallocate(time_dst); 127 | 128 | finalize(); 129 | 130 | return EXIT_SUCCESS; 131 | } 132 | 133 | /* vi: set sw=4 sts=4 tw=80: */ 134 | -------------------------------------------------------------------------------- /upcxx/osu_upcxx_scatter.cpp: -------------------------------------------------------------------------------- 1 | #define BENCHMARK "OSU UPC++ Scatter Latency Test" 2 | /* 3 | * Copyright (C) 2002-2015 the Network-Based Computing Laboratory 4 | * (NBCL), The Ohio State University. 5 | * 6 | * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu) 7 | * 8 | * For detailed copyright and licensing information, please refer to the 9 | * copyright file COPYRIGHT in the top level OMB directory. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace std; 19 | using namespace upcxx; 20 | 21 | #define root 0 22 | #define VERIFY 0 23 | 24 | int 25 | main (int argc, char *argv[]) 26 | { 27 | init(&argc, &argv); 28 | 29 | global_ptr src; 30 | global_ptr dst; 31 | global_ptr time_src; 32 | global_ptr time_dst; 33 | 34 | double avg_time, max_time, min_time; 35 | int i = 0, size; 36 | int skip; 37 | int64_t t_start = 0, t_stop = 0, timer=0; 38 | int max_msg_size = 1<<20, full = 0; 39 | 40 | if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) { 41 | return 0; 42 | } 43 | 44 | if (ranks() < 2) { 45 | if (myrank() == 0) { 46 | fprintf(stderr, "This test requires at least two processes\n"); 47 | } 48 | return -1; 49 | } 50 | 51 | src = allocate (root, max_msg_size*sizeof(char)*ranks()); 52 | dst = allocate (myrank(), max_msg_size*sizeof(char)); 53 | 54 | assert(src != NULL); 55 | assert(dst != NULL); 56 | 57 | time_src = allocate (myrank(), 1); //for each node's local result 58 | time_dst = allocate (root, 1); //for reduction result on root 59 | 60 | assert(time_src != NULL); 61 | assert(time_dst != NULL); 62 | 63 | /* 64 | * put a barrier since allocate is non-blocking in upc++ 65 | */ 66 | barrier(); 67 | 68 | print_header(HEADER, myrank(), full); 69 | 70 | for (size=1; size <=max_msg_size; size *= 2) { 71 | if (size > LARGE_MESSAGE_SIZE) { 72 | skip = SKIP_LARGE; 73 | iterations = iterations_large; 74 | } else { 75 | skip = SKIP; 76 | } 77 | 78 | timer=0; 79 | for (i=0; i < iterations + skip ; i++) { 80 | t_start = getMicrosecondTimeStamp(); 81 | upcxx_scatter((char *)src, (char *)dst, size*sizeof(char), root); 82 | t_stop = getMicrosecondTimeStamp(); 83 | 84 | if (i>=skip) { 85 | timer+=t_stop-t_start; 86 | } 87 | barrier(); 88 | } 89 | 90 | barrier(); 91 | 92 | double* lsrc = (double *)time_src; 93 | lsrc[0] = (1.0 * timer) / iterations; 94 | 95 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 96 | UPCXX_MAX, UPCXX_DOUBLE); 97 | if (myrank()==root) { 98 | double* ldst = (double *)time_dst; 99 | max_time = ldst[0]; 100 | } 101 | 102 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 103 | UPCXX_MIN, UPCXX_DOUBLE); 104 | if (myrank()==root) { 105 | double* ldst = (double *)time_dst; 106 | min_time = ldst[0]; 107 | } 108 | 109 | upcxx_reduce((double *)time_src, (double *)time_dst, 1, root, 110 | UPCXX_SUM, UPCXX_DOUBLE); 111 | if (myrank()==root) { 112 | double* ldst = (double *)time_dst; 113 | avg_time = ldst[0]/ranks(); 114 | } 115 | 116 | barrier(); 117 | 118 | print_data(myrank(), full, size*sizeof(char), avg_time, min_time, 119 | max_time, iterations); 120 | } 121 | 122 | deallocate(src); 123 | deallocate(dst); 124 | deallocate(time_src); 125 | deallocate(time_dst); 126 | 127 | finalize(); 128 | 129 | return EXIT_SUCCESS; 130 | } 131 | 132 | /* vi: set sw=4 sts=4 tw=80: */ 133 | --------------------------------------------------------------------------------