├── CHANGES
├── COPYRIGHT
├── Makefile.am
├── Makefile.in
├── README
├── aclocal.m4
├── compile
├── config.guess
├── config.sub
├── configure
├── configure.ac
├── depcomp
├── get_local_rank
├── install-sh
├── ltmain.sh
├── missing
├── mpi
    ├── Makefile.am
    ├── Makefile.in
    ├── collective
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── kernel.cu
    │   ├── osu_allgather.c
    │   ├── osu_allgatherv.c
    │   ├── osu_allreduce.c
    │   ├── osu_alltoall.c
    │   ├── osu_alltoallv.c
    │   ├── osu_barrier.c
    │   ├── osu_bcast.c
    │   ├── osu_coll.c
    │   ├── osu_coll.h
    │   ├── osu_gather.c
    │   ├── osu_gatherv.c
    │   ├── osu_iallgather.c
    │   ├── osu_iallgatherv.c
    │   ├── osu_ialltoall.c
    │   ├── osu_ialltoallv.c
    │   ├── osu_ialltoallw.c
    │   ├── osu_ibarrier.c
    │   ├── osu_ibcast.c
    │   ├── osu_igather.c
    │   ├── osu_igatherv.c
    │   ├── osu_iscatter.c
    │   ├── osu_iscatterv.c
    │   ├── osu_reduce.c
    │   ├── osu_reduce_scatter.c
    │   ├── osu_scatter.c
    │   └── osu_scatterv.c
    ├── one-sided
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── osu_1sc.c
    │   ├── osu_1sc.h
    │   ├── osu_acc_latency.c
    │   ├── osu_cas_latency.c
    │   ├── osu_fop_latency.c
    │   ├── osu_get_acc_latency.c
    │   ├── osu_get_bw.c
    │   ├── osu_get_latency.c
    │   ├── osu_put_bibw.c
    │   ├── osu_put_bw.c
    │   └── osu_put_latency.c
    ├── pt2pt
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── osu_bibw.c
    │   ├── osu_bw.c
    │   ├── osu_latency.c
    │   ├── osu_latency_mt.c
    │   ├── osu_mbw_mr.c
    │   ├── osu_multi_lat.c
    │   ├── osu_pt2pt.c
    │   └── osu_pt2pt.h
    └── startup
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── osu_hello.c
    │   └── osu_init.c
├── openshmem
    ├── Makefile.am
    ├── Makefile.in
    ├── osu_coll.h
    ├── osu_common.c
    ├── osu_common.h
    ├── osu_oshm_atomics.c
    ├── osu_oshm_barrier.c
    ├── osu_oshm_broadcast.c
    ├── osu_oshm_collect.c
    ├── osu_oshm_fcollect.c
    ├── osu_oshm_get.c
    ├── osu_oshm_put.c
    ├── osu_oshm_put_mr.c
    └── osu_oshm_reduce.c
├── upc
    ├── Makefile.am
    ├── Makefile.in
    ├── osu_coll.h
    ├── osu_common.c
    ├── osu_common.h
    ├── osu_upc_all_barrier.c
    ├── osu_upc_all_broadcast.c
    ├── osu_upc_all_exchange.c
    ├── osu_upc_all_gather.c
    ├── osu_upc_all_gather_all.c
    ├── osu_upc_all_reduce.c
    ├── osu_upc_all_scatter.c
    ├── osu_upc_memget.c
    └── osu_upc_memput.c
└── upcxx
    ├── Makefile.am
    ├── Makefile.in
    ├── osu_coll.h
    ├── osu_common.c
    ├── osu_common.h
    ├── osu_upcxx_allgather.cpp
    ├── osu_upcxx_alltoall.cpp
    ├── osu_upcxx_async_copy_get.cpp
    ├── osu_upcxx_async_copy_put.cpp
    ├── osu_upcxx_bcast.cpp
    ├── osu_upcxx_gather.cpp
    ├── osu_upcxx_reduce.cpp
    └── osu_upcxx_scatter.cpp


/COPYRIGHT:
--------------------------------------------------------------------------------
 1 |                          COPYRIGHT
 2 | 
 3 | Copyright (c) 2001-2016, The Ohio State University. All rights
 4 | reserved.
 5 | 
 6 | The OMB (OSU Micro Benchmarks) software package is developed by the team
 7 | members of The Ohio State University's Network-Based Computing Laboratory
 8 | (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
 9 | 
10 | Contact:
11 | Prof. Dhabaleswar K. (DK) Panda
12 | Dept. of Computer Science and Engineering
13 | The Ohio State University
14 | 2015 Neil Avenue
15 | Columbus, OH - 43210-1277
16 | Tel: (614)-292-5199; Fax: (614)-292-2911
17 | E-mail:panda@cse.ohio-state.edu
18 | 
19 | This program is available under BSD licensing.
20 | 
21 | Redistribution and use in source and binary forms, with or without
22 | modification, are permitted provided that the following conditions are
23 | met:
24 | 
25 | (1) Redistributions of source code must retain the above copyright
26 | notice, this list of conditions and the following disclaimer.
27 | 
28 | (2) Redistributions in binary form must reproduce the above copyright
29 | notice, this list of conditions and the following disclaimer in the
30 | documentation and/or other materials provided with the distribution.
31 | 
32 | (3) Neither the name of The Ohio State University nor the names of
33 | their contributors may be used to endorse or promote products derived
34 | from this software without specific prior written permission.
35 | 
36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
37 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
39 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
40 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
43 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
44 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
45 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
46 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 | 


--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
 1 | SUBDIRS =
 2 | 
 3 | if CUDA
 4 |     dist_pkglibexec_SCRIPTS = get_local_rank
 5 | endif
 6 | 
 7 | if MPI
 8 |     SUBDIRS += mpi
 9 | endif
10 | 
11 | if OSHM
12 |     SUBDIRS += openshmem
13 | endif
14 | 
15 | if UPC
16 |     SUBDIRS += upc
17 | endif
18 | 
19 | if UPCXX
20 |     SUBDIRS += upcxx
21 | endif
22 | 
23 | EXTRA_DIST = README CHANGES COPYRIGHT
24 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
  1 | #                                               -*- Autoconf -*-
  2 | # Process this file with autoconf to produce a configure script.
  3 | 
  4 | AC_PREREQ([2.59])
  5 | AC_INIT([OSU-Micro-Benchmarks], [5.3], [mvapich-discuss@cse.ohio-state.edu])
  6 | AC_CONFIG_SRCDIR([mpi/pt2pt/osu_latency.c])
  7 | 
  8 | AM_INIT_AUTOMAKE([foreign])
  9 | LT_INIT
 10 | 
 11 | AC_ARG_ENABLE([openacc],
 12 |               [AS_HELP_STRING([--enable-openacc],
 13 |                               [Enable OpenACC benchmarks])
 14 |               ],
 15 |               [],
 16 |               [enable_openacc=no])
 17 | 
 18 | AC_ARG_ENABLE([cuda],
 19 |               [AS_HELP_STRING([--enable-cuda],
 20 | 			      [Enable CUDA benchmarks (default is no).  Specify
 21 | 			       --enable-cuda=basic to enable basic cuda support
 22 | 			       without using cuda kernel support for
 23 | 			       non-blocking collectives])
 24 |               ],
 25 |               [],
 26 |               [enable_cuda=no])
 27 | 
 28 | AC_ARG_WITH([cuda],
 29 |             [AS_HELP_STRING([--with-cuda=@<:@CUDA installation path@:>@],
 30 |                             [Provide path to CUDA installation])
 31 |             ],
 32 |             [AS_CASE([$with_cuda],
 33 |                      [yes|no], [],
 34 |                      [CPPFLAGS="-I$with_cuda/include $CPPFLAGS"
 35 |                       LDFLAGS="-L$with_cuda/lib64 -Wl,-rpath=$with_cuda/lib64 -L$with_cuda/lib -Wl,-rpath=$with_cuda/lib $LDFLAGS"])
 36 |             ])
 37 | 
 38 | AC_ARG_WITH([cuda-include],
 39 |             [AS_HELP_STRING([--with-cuda-include=@<:@CUDA include path@>:@],
 40 |                             [Provide path to CUDA include files])
 41 |             ],
 42 |             [AS_CASE([$with_cuda_include],
 43 |                      [yes|no], [],
 44 |                      [CPPFLAGS="-I$with_cuda_include $CPPFLAGS"])
 45 |             ])
 46 | 
 47 | AC_ARG_WITH([cuda-libpath],
 48 |             [AS_HELP_STRING([--with-cuda-libpath=@<:@CUDA library path@>:@],
 49 |                             [Provide path to CUDA library files])
 50 |             ],
 51 |             [AS_CASE([$with_cuda_libpath],
 52 |                      [yes|no], [],
 53 |                      [LDFLAGS="-L$with_cuda_libpath -Wl,-rpath=$with_cuda_libpath $LDFLAGS"])
 54 |             ])
 55 | 
 56 | # Checks for programs.
 57 | AC_PROG_CC([mpicc oshcc upcc upc++])
 58 | 
 59 | # Checks for mpicxx used for compiling kernel.cu in nbc benchmarks and/or the
 60 | # upc++ compiler for upcxx benchmarks
 61 | AC_PROG_CXX([mpicxx upc++])
 62 | 
 63 | # Checks for libraries.
 64 | AC_SEARCH_LIBS([sqrt], [m])
 65 | AC_SEARCH_LIBS([pthread_create], [pthread])
 66 | 
 67 | # Checks for header files.
 68 | AC_CHECK_HEADERS([stdlib.h string.h sys/time.h unistd.h])
 69 | 
 70 | # Checks for typedefs, structures, and compiler characteristics.
 71 | AC_C_INLINE
 72 | 
 73 | # Checks for library functions.
 74 | AC_CHECK_FUNCS([getpagesize gettimeofday memset sqrt])
 75 | 
 76 | AS_IF([test "x$enable_embedded" = xyes], [
 77 |        AS_IF([test x"$enable_mpi3" = xyes], [mpi3_library=true])
 78 |        AS_IF([test x"$enable_mpi2" = xyes], [mpi2_library=true])
 79 |        AS_IF([test x"$enable_mpi" = xyes], [mpi_library=true])
 80 |        AS_IF([test x"$enable_oshm" = xyes], [oshm_library=true])
 81 |        AS_IF([test x"$enable_upc" = xyes], [upc_compiler=true])
 82 |        AS_IF([test x"$enable_upcxx" = xyes], [upcxx_compiler=true])
 83 |       ], [
 84 |        AC_CHECK_FUNC([MPI_Init], [mpi_library=true])
 85 |        AC_CHECK_FUNC([MPI_Accumulate], [mpi2_library=true])
 86 |        AC_CHECK_FUNC([MPI_Get_accumulate], [mpi3_library=true])
 87 |        AC_CHECK_FUNC([shmem_barrier_all], [oshm_library=true])
 88 |        AC_CHECK_FUNC([upc_memput], [upc_compiler=true])
 89 |        AC_CHECK_DECL([upcxx_alltoall], [upcxx_compiler=true], [],
 90 |                      [#include <upcxx.h>])
 91 |       ])
 92 | 
 93 | AM_CONDITIONAL([EMBEDDED_BUILD], [test x"$enable_embedded" = xyes])
 94 | AM_CONDITIONAL([BUILD_PROFILING_LIB], [test x"$with_plib" = xyes])
 95 | AC_SUBST([PMPILIBNAME], [$PMILIBNAME])
 96 | AC_SUBST([MPILIBNAME], [$MPILIBNAME])
 97 | 
 98 | AS_IF([test "x$enable_openacc" = xyes], [
 99 |        AC_CHECK_HEADERS([openacc.h], [],
100 |                         [AC_MSG_ERROR([cannot include openacc.h])])
101 |        AC_DEFINE([_ENABLE_OPENACC_], [1], [Enable OpenACC])
102 |        ])
103 | 
104 | AS_CASE([$enable_cuda],
105 |         [yes], [build_cuda_kernels=yes; build_cuda=yes],
106 |         [basic], [build_cuda=yes])
107 | 
108 | AS_IF([test "x$build_cuda" = xyes], [
109 |        AC_SEARCH_LIBS([cuPointerGetAttribute], [cuda], [],
110 |                       [AC_MSG_ERROR([cannot link with -lcuda])])
111 |        AC_SEARCH_LIBS([cudaFree], [cudart], [],
112 |                       [AC_MSG_ERROR([cannot link with -lcudart])])
113 |        AC_CHECK_HEADERS([cuda.h], [],
114 |                         [AC_MSG_ERROR([cannot include cuda.h])])
115 |        AC_DEFINE([_ENABLE_CUDA_], [1], [Enable CUDA])
116 |        ])
117 | 
118 | AS_IF([test "xbuild_cuda_kernels" = xyes], [
119 |        AC_DEFINE([_ENABLE_CUDA_KERNEL_], [1], [Enable CUDA Kernel])
120 |        ])
121 | 
122 | AM_CONDITIONAL([MPI2_LIBRARY], [test x$mpi2_library = xtrue])
123 | AM_CONDITIONAL([MPI3_LIBRARY], [test x$mpi3_library = xtrue])
124 | AM_CONDITIONAL([CUDA], [test x$build_cuda = xyes])
125 | AM_CONDITIONAL([CUDA_KERNELS], [test x$build_cuda_kernels = xyes])
126 | AM_CONDITIONAL([OPENACC], [test x$enable_openacc = xyes])
127 | AM_CONDITIONAL([OSHM], [test x$oshm_library = xtrue])
128 | AM_CONDITIONAL([MPI], [test x$mpi_library = xtrue])
129 | AM_CONDITIONAL([UPC], [test x$upc_compiler = xtrue])
130 | AM_CONDITIONAL([UPCXX], [test x$upcxx_compiler = xtrue])
131 | 
132 | AC_DEFINE([FIELD_WIDTH], [18], [Width of field used to report numbers])
133 | AC_DEFINE([FLOAT_PRECISION], [2], [Precision of reported numbers])
134 | 
135 | AC_CONFIG_FILES([Makefile mpi/Makefile mpi/pt2pt/Makefile mpi/startup/Makefile
136 |                  mpi/one-sided/Makefile mpi/collective/Makefile
137 |                  openshmem/Makefile upc/Makefile upcxx/Makefile])
138 | AC_OUTPUT
139 | 


--------------------------------------------------------------------------------
/get_local_rank:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LOCAL_RANK=$MV2_COMM_WORLD_LOCAL_RANK
4 | exec $*
5 | 


--------------------------------------------------------------------------------
/mpi/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS = pt2pt collective startup
2 | 
3 | if MPI2_LIBRARY
4 |     SUBDIRS += one-sided
5 | endif
6 | 


--------------------------------------------------------------------------------
/mpi/collective/Makefile.am:
--------------------------------------------------------------------------------
 1 | NVCC = nvcc
 2 | NVCFLAGS = -cuda -maxrregcount 32
 3 | SUFFIXES = .cu .cpp
 4 | .cu.cpp:
 5 | 	$(NVCC) $(NVCFLAGS) $(INCLUDES) $(CPPFLAGS) --output-file $@ $<
 6 | 
 7 | collectivedir = $(pkglibexecdir)/mpi/collective
 8 | collective_PROGRAMS = osu_alltoallv osu_allgatherv osu_scatterv osu_gatherv osu_reduce_scatter osu_barrier osu_reduce osu_allreduce osu_alltoall osu_bcast osu_gather osu_allgather osu_scatter osu_iallgather osu_ibcast  osu_ialltoall osu_ibarrier osu_igather osu_iscatter osu_iscatterv osu_igatherv osu_iallgatherv osu_ialltoallv osu_ialltoallw
 9 | 
10 | osu_alltoallv_SOURCES = osu_alltoallv.c osu_coll.c osu_coll.h
11 | osu_allgatherv_SOURCES = osu_allgatherv.c osu_coll.c osu_coll.h
12 | osu_scatterv_SOURCES = osu_scatterv.c osu_coll.c osu_coll.h
13 | osu_gather_SOURCES = osu_gather.c osu_coll.c osu_coll.h
14 | osu_gatherv_SOURCES = osu_gatherv.c osu_coll.c osu_coll.h
15 | osu_reduce_scatter_SOURCES = osu_reduce_scatter.c osu_coll.c osu_coll.h  
16 | osu_barrier_SOURCES = osu_barrier.c osu_coll.c osu_coll.h  
17 | osu_reduce_SOURCES = osu_reduce.c osu_coll.c osu_coll.h  
18 | osu_allreduce_SOURCES = osu_allreduce.c osu_coll.c osu_coll.h  
19 | osu_bcast_SOURCES = osu_bcast.c osu_coll.c osu_coll.h  
20 | osu_alltoall_SOURCES = osu_alltoall.c osu_coll.c osu_coll.h  
21 | osu_ialltoall_SOURCES = osu_ialltoall.c osu_coll.c osu_coll.h  
22 | osu_ialltoallv_SOURCES = osu_ialltoallv.c osu_coll.c osu_coll.h  
23 | osu_ialltoallw_SOURCES = osu_ialltoallw.c osu_coll.c osu_coll.h  
24 | osu_ibarrier_SOURCES = osu_ibarrier.c osu_coll.c osu_coll.h  
25 | osu_ibcast_SOURCES = osu_ibcast.c osu_coll.c osu_coll.h  
26 | osu_igather_SOURCES = osu_igather.c osu_coll.c osu_coll.h  
27 | osu_igatherv_SOURCES = osu_igatherv.c osu_coll.c osu_coll.h  
28 | osu_allgather_SOURCES = osu_allgather.c osu_coll.c osu_coll.h  
29 | osu_iallgather_SOURCES = osu_iallgather.c osu_coll.c osu_coll.h  
30 | osu_iallgatherv_SOURCES = osu_iallgatherv.c osu_coll.c osu_coll.h  
31 | osu_scatter_SOURCES = osu_scatter.c osu_coll.c osu_coll.h  
32 | osu_iscatter_SOURCES = osu_iscatter.c osu_coll.c osu_coll.h  
33 | osu_iscatterv_SOURCES = osu_iscatterv.c osu_coll.c osu_coll.h  
34 | 
35 | if CUDA_KERNELS
36 | osu_alltoall_SOURCES += kernel.cu
37 | osu_alltoallv_SOURCES += kernel.cu
38 | osu_allgather_SOURCES += kernel.cu
39 | osu_allgatherv_SOURCES += kernel.cu
40 | osu_barrier_SOURCES += kernel.cu
41 | osu_bcast_SOURCES += kernel.cu
42 | osu_scatter_SOURCES += kernel.cu
43 | osu_scatterv_SOURCES += kernel.cu
44 | osu_gather_SOURCES += kernel.cu
45 | osu_gatherv_SOURCES += kernel.cu
46 | osu_allreduce_SOURCES += kernel.cu
47 | osu_reduce_SOURCES += kernel.cu
48 | osu_reduce_scatter_SOURCES += kernel.cu
49 | osu_ialltoall_SOURCES += kernel.cu
50 | osu_ialltoallv_SOURCES += kernel.cu
51 | osu_ialltoallw_SOURCES += kernel.cu
52 | osu_iallgather_SOURCES += kernel.cu
53 | osu_iallgatherv_SOURCES += kernel.cu
54 | osu_ibarrier_SOURCES += kernel.cu
55 | osu_ibcast_SOURCES += kernel.cu
56 | osu_iscatter_SOURCES += kernel.cu
57 | osu_iscatterv_SOURCES += kernel.cu
58 | osu_igather_SOURCES += kernel.cu
59 | osu_igatherv_SOURCES += kernel.cu
60 | endif
61 | 
62 | if EMBEDDED_BUILD
63 |     AM_LDFLAGS =
64 |     AM_CPPFLAGS = -I$(top_builddir)/../src/include \
65 | 		  -I${top_srcdir}/../src/include
66 | if BUILD_PROFILING_LIB
67 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la
68 | endif
69 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la
70 | endif
71 | 
72 | if OPENACC
73 |     AM_CFLAGS = -acc
74 | endif
75 | 


--------------------------------------------------------------------------------
/mpi/collective/kernel.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University.
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | 
11 | __global__ 
12 | void compute_kernel(float a, float * x, float * y, int N) 
13 | {
14 |     int i = blockIdx.x * blockDim.x + threadIdx.x;
15 |     
16 |     int count = 0;
17 |     
18 |     if (i < N) {
19 |         for(count=0; count < (N/8); count++) { 
20 |             y[i] = a * x[i] + y[i];
21 |         }
22 |     }
23 | }   
24 | 
25 | extern "C" 
26 | void 
27 | call_kernel(float a, float * d_x, float * d_y, int N, cudaStream_t * stream)
28 | {
29 |     compute_kernel<<<(N+255)/256, 256, 0, *stream>>>(a, d_x, d_y, N);
30 | }
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_allgather.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Allgather Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     char *sendbuf, *recvbuf;
 20 |     int po_ret;
 21 |     size_t bufsize;
 22 | 
 23 |     set_header(HEADER);
 24 |     set_benchmark_name("osu_allgather");
 25 |     enable_accel_support();
 26 |     po_ret = process_options(argc, argv);
 27 | 
 28 |     if (po_okay == po_ret && none != options.accel) {
 29 |         if (init_accel()) {
 30 |             fprintf(stderr, "Error initializing device\n");
 31 |             exit(EXIT_FAILURE);
 32 |         }
 33 |     }
 34 | 
 35 |     MPI_Init(&argc, &argv);
 36 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 37 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 38 | 
 39 |     switch (po_ret) {
 40 |         case po_bad_usage:
 41 |             print_bad_usage_message(rank);
 42 |             MPI_Finalize();
 43 |             exit(EXIT_FAILURE);
 44 |         case po_help_message:
 45 |             print_help_message(rank);
 46 |             MPI_Finalize();
 47 |             exit(EXIT_SUCCESS);
 48 |         case po_version_message:
 49 |             print_version_message(rank);
 50 |             MPI_Finalize();
 51 |             exit(EXIT_SUCCESS);
 52 |         case po_okay:
 53 |             break;
 54 |     }
 55 | 
 56 |     if(numprocs < 2) {
 57 |         if (rank == 0) {
 58 |             fprintf(stderr, "This test requires at least two processes\n");
 59 |         }
 60 | 
 61 |         MPI_Finalize();
 62 |         exit(EXIT_FAILURE);
 63 |     }
 64 | 
 65 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 66 |         options.max_message_size = options.max_mem_limit / numprocs;
 67 |     }
 68 | 
 69 |     if (allocate_buffer((void**)&sendbuf, options.max_message_size, options.accel)) {
 70 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 71 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 72 |     }
 73 |     set_buffer(sendbuf, options.accel, 1, options.max_message_size);
 74 | 
 75 |     bufsize = options.max_message_size * numprocs;
 76 |     if (allocate_buffer((void**)&recvbuf, bufsize,
 77 |                 options.accel)) {
 78 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 79 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 80 |     }
 81 |     set_buffer(recvbuf, options.accel, 0, bufsize);
 82 | 
 83 |     print_preamble(rank);
 84 | 
 85 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 86 | 
 87 |         if(size > LARGE_MESSAGE_SIZE) {
 88 |             options.skip = options.skip_large;
 89 |             options.iterations = options.iterations_large;
 90 |         }
 91 | 
 92 |         MPI_Barrier(MPI_COMM_WORLD);
 93 |         timer=0.0;
 94 |         for(i=0; i < options.iterations + options.skip ; i++) {
 95 |             t_start = MPI_Wtime();
 96 |             MPI_Allgather( sendbuf, size, MPI_CHAR,
 97 |                            recvbuf, size, MPI_CHAR, MPI_COMM_WORLD );
 98 | 
 99 |             t_stop = MPI_Wtime();
100 | 
101 |             if(i >= options.skip) {
102 |                 timer+= t_stop-t_start;
103 |             }
104 |             MPI_Barrier(MPI_COMM_WORLD);
105 | 
106 |         }
107 | 
108 |         MPI_Barrier(MPI_COMM_WORLD);
109 | 
110 |         latency = (double)(timer * 1e6) / options.iterations;
111 | 
112 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
113 |                 MPI_COMM_WORLD);
114 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
115 |                 MPI_COMM_WORLD);
116 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
117 |                 MPI_COMM_WORLD);
118 |         avg_time = avg_time/numprocs;
119 | 
120 |         print_stats(rank, size, avg_time, min_time, max_time);
121 |         MPI_Barrier(MPI_COMM_WORLD);
122 |     }
123 | 
124 |     free_buffer(sendbuf, options.accel);
125 |     free_buffer(recvbuf, options.accel);
126 | 
127 |     MPI_Finalize();
128 | 
129 |     if (none != options.accel) {
130 |         if (cleanup_accel()) {
131 |             fprintf(stderr, "Error cleaning up device\n");
132 |             exit(EXIT_FAILURE);
133 |         }
134 |     }
135 | 
136 |     return EXIT_SUCCESS;
137 | }
138 | /* vi: set sw=4 sts=4 tw=80: */
139 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_allgatherv.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Allgatherv Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size, disp;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     char *sendbuf, *recvbuf;
 20 |     int *rdispls=NULL, *recvcounts=NULL;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_allgather");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) {
 71 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 72 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 73 |     }
 74 |     if (allocate_buffer((void**)&rdispls, numprocs*sizeof(int), none)) {
 75 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 76 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 77 |     }
 78 | 
 79 |     if (allocate_buffer((void**)&sendbuf, options.max_message_size, options.accel)) {
 80 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 81 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 82 |     }
 83 |     set_buffer(sendbuf, options.accel, 1, options.max_message_size);
 84 | 
 85 |     bufsize = options.max_message_size * numprocs;
 86 |     if (allocate_buffer((void**)&recvbuf, bufsize,
 87 |                 options.accel)) {
 88 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 89 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 90 |     }
 91 |     set_buffer(recvbuf, options.accel, 0, bufsize);
 92 | 
 93 |     print_preamble(rank);
 94 | 
 95 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 96 |         if(size > LARGE_MESSAGE_SIZE) {
 97 |             options.skip = options.skip_large;
 98 |             options.iterations = options.iterations_large;
 99 |         }
100 | 
101 |         MPI_Barrier(MPI_COMM_WORLD);
102 | 
103 |         disp =0;
104 |         for ( i = 0; i < numprocs; i++) {
105 |             recvcounts[i] = size;
106 |             rdispls[i] = disp;
107 |             disp += size;
108 |         }
109 | 
110 |         MPI_Barrier(MPI_COMM_WORLD);
111 |         timer=0.0;
112 |         for(i=0; i < options.iterations + options.skip ; i++) {
113 | 
114 |             t_start = MPI_Wtime();
115 | 
116 |             MPI_Allgatherv(sendbuf, size, MPI_CHAR, recvbuf, recvcounts, rdispls, MPI_CHAR, MPI_COMM_WORLD);
117 | 
118 |             t_stop = MPI_Wtime();
119 | 
120 |             if(i >= options.skip) {
121 |                 timer+= t_stop-t_start;
122 |             }
123 |             MPI_Barrier(MPI_COMM_WORLD);
124 | 
125 |         }
126 | 
127 |         MPI_Barrier(MPI_COMM_WORLD);
128 | 
129 |         latency = (double)(timer * 1e6) / options.iterations;
130 | 
131 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
132 |                 MPI_COMM_WORLD);
133 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
134 |                 MPI_COMM_WORLD);
135 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
136 |                 MPI_COMM_WORLD);
137 |         avg_time = avg_time/numprocs;
138 | 
139 |         print_stats(rank, size, avg_time, min_time, max_time);
140 |         MPI_Barrier(MPI_COMM_WORLD);
141 |     }
142 | 
143 |     free_buffer(rdispls, none);
144 |     free_buffer(recvcounts, none);
145 |     free_buffer(sendbuf, options.accel);
146 |     free_buffer(recvbuf, options.accel);
147 | 
148 |     MPI_Finalize();
149 | 
150 |     if (none != options.accel) {
151 |         if (cleanup_accel()) {
152 |             fprintf(stderr, "Error cleaning up device\n");
153 |             exit(EXIT_FAILURE);
154 |         }
155 |     }
156 | 
157 |     return EXIT_SUCCESS;
158 | }
159 | /* vi: set sw=4 sts=4 tw=80: */
160 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_allreduce.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Allreduce Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     float *sendbuf, *recvbuf;
 20 |     int po_ret;
 21 |     size_t bufsize;
 22 | 
 23 |     set_header(HEADER);
 24 |     set_benchmark_name("osu_allreduce");
 25 |     enable_accel_support();
 26 |     po_ret = process_options(argc, argv);
 27 | 
 28 |     if (po_okay == po_ret && none != options.accel) {
 29 |         if (init_accel()) {
 30 |             fprintf(stderr, "Error initializing device\n");
 31 |             exit(EXIT_FAILURE);
 32 |         }
 33 |     }
 34 | 
 35 |     MPI_Init(&argc, &argv);
 36 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 37 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 38 | 
 39 |     switch (po_ret) {
 40 |         case po_bad_usage:
 41 |             print_bad_usage_message(rank);
 42 |             MPI_Finalize();
 43 |             exit(EXIT_FAILURE);
 44 |         case po_help_message:
 45 |             print_help_message(rank);
 46 |             MPI_Finalize();
 47 |             exit(EXIT_SUCCESS);
 48 |         case po_version_message:
 49 |             print_version_message(rank);
 50 |             MPI_Finalize();
 51 |             exit(EXIT_SUCCESS);
 52 |         case po_okay:
 53 |             break;
 54 |     }
 55 | 
 56 |     if(numprocs < 2) {
 57 |         if (rank == 0) {
 58 |             fprintf(stderr, "This test requires at least two processes\n");
 59 |         }
 60 | 
 61 |         MPI_Finalize();
 62 |         exit(EXIT_FAILURE);
 63 |     }
 64 | 
 65 |     if (options.max_message_size > options.max_mem_limit) {
 66 |         options.max_message_size = options.max_mem_limit;
 67 |     }
 68 | 
 69 |     options.min_message_size /= sizeof(float);
 70 |     if (options.min_message_size < DEFAULT_MIN_MESSAGE_SIZE) {
 71 |         options.min_message_size = DEFAULT_MIN_MESSAGE_SIZE;
 72 |     }
 73 | 
 74 |     bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
 75 |     if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
 76 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 77 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 78 |     }
 79 |     set_buffer(sendbuf, options.accel, 1, bufsize);
 80 | 
 81 |     bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
 82 |     if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) {
 83 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 84 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 85 |     }
 86 |     set_buffer(recvbuf, options.accel, 0, bufsize);
 87 | 
 88 |     print_preamble(rank);
 89 | 
 90 |     for(size=options.min_message_size; size*sizeof(float) <= options.max_message_size; size *= 2) {
 91 | 
 92 |         if(size > LARGE_MESSAGE_SIZE) {
 93 |             options.skip = options.skip_large;
 94 |             options.iterations = options.iterations_large;
 95 |         }
 96 | 
 97 |         MPI_Barrier(MPI_COMM_WORLD);
 98 | 
 99 |         timer=0.0;
100 |         for(i=0; i < options.iterations + options.skip ; i++) {
101 |             t_start = MPI_Wtime();
102 |             MPI_Allreduce(sendbuf, recvbuf, size, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD );
103 |             t_stop=MPI_Wtime();
104 |             if(i>=options.skip){
105 | 
106 |             timer+=t_stop-t_start;
107 |             }
108 |             MPI_Barrier(MPI_COMM_WORLD);
109 |         }
110 |         latency = (double)(timer * 1e6) / options.iterations;
111 | 
112 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
113 |                 MPI_COMM_WORLD);
114 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
115 |                 MPI_COMM_WORLD);
116 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
117 |                 MPI_COMM_WORLD);
118 |         avg_time = avg_time/numprocs;
119 | 
120 |         print_stats(rank, size * sizeof(float), avg_time, min_time, max_time);
121 |         MPI_Barrier(MPI_COMM_WORLD);
122 |     }
123 | 
124 |     free_buffer(sendbuf, options.accel);
125 |     free_buffer(recvbuf, options.accel);
126 | 
127 |     MPI_Finalize();
128 | 
129 |     if (none != options.accel) {
130 |         if (cleanup_accel()) {
131 |             fprintf(stderr, "Error cleaning up device\n");
132 |             exit(EXIT_FAILURE);
133 |         }
134 |     }
135 | 
136 |     return EXIT_SUCCESS;
137 | }
138 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_alltoall.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s All-to-All Personalized Exchange Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int
 14 | main (int argc, char *argv[])
 15 | {
 16 |     int i, numprocs, rank, size;
 17 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 18 |     double timer=0.0;
 19 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 20 |     char * sendbuf = NULL, * recvbuf = NULL;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_alltoall");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     bufsize = options.max_message_size * numprocs;
 71 | 
 72 |     if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
 73 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 74 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 75 |     }
 76 | 
 77 |     set_buffer(sendbuf, options.accel, 1, bufsize);
 78 | 
 79 |     if (allocate_buffer((void**)&recvbuf, options.max_message_size * numprocs,
 80 |                 options.accel)) {
 81 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 82 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 83 |     }
 84 | 
 85 |     set_buffer(recvbuf, options.accel, 0, bufsize);
 86 |     print_preamble(rank);
 87 | 
 88 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 89 |         if (size > LARGE_MESSAGE_SIZE) {
 90 |             options.skip = options.skip_large;
 91 |             options.iterations = options.iterations_large;
 92 |         }
 93 | 
 94 |         MPI_Barrier(MPI_COMM_WORLD);
 95 |         timer=0.0;
 96 | 
 97 |         for (i=0; i < options.iterations + options.skip ; i++) {
 98 |             t_start = MPI_Wtime();
 99 |             MPI_Alltoall(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR,
100 |                     MPI_COMM_WORLD);
101 |             t_stop = MPI_Wtime();
102 | 
103 |             if (i >= options.skip) {
104 |                 timer+=t_stop-t_start;
105 |             }
106 |             MPI_Barrier(MPI_COMM_WORLD);
107 |         }
108 |         latency = (double)(timer * 1e6) / options.iterations;
109 | 
110 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
111 |                 MPI_COMM_WORLD);
112 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
113 |                 MPI_COMM_WORLD);
114 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
115 |                 MPI_COMM_WORLD);
116 |         avg_time = avg_time/numprocs;
117 | 
118 |         print_stats(rank, size, avg_time, min_time, max_time);
119 |         MPI_Barrier(MPI_COMM_WORLD);
120 |     }
121 | 
122 |     free_buffer(sendbuf, options.accel);
123 |     free_buffer(recvbuf, options.accel);
124 | 
125 |     MPI_Finalize();
126 | 
127 |     if (none != options.accel) {
128 |         if (cleanup_accel()) {
129 |             fprintf(stderr, "Error cleaning up device\n");
130 |             exit(EXIT_FAILURE);
131 |         }
132 |     }
133 | 
134 |     return EXIT_SUCCESS;
135 | }
136 | 
137 | /* vi: set sw=4 sts=4 tw=80: */
138 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_alltoallv.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s All-to-Allv Personalized Exchange Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i = 0, rank = 0, size, numprocs, disp;
 16 |     double latency=0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     char *sendbuf=NULL, *recvbuf=NULL;
 20 |     int *rdispls=NULL, *recvcounts=NULL, *sdispls=NULL, *sendcounts=NULL;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_alltoallv");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) {
 71 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 72 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 73 |     }
 74 |     if (allocate_buffer((void**)&sendcounts, numprocs*sizeof(int), none)) {
 75 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 76 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 77 |     }
 78 | 
 79 |     if (allocate_buffer((void**)&rdispls, numprocs*sizeof(int), none)) {
 80 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 81 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 82 |     }
 83 |     if (allocate_buffer((void**)&sdispls, numprocs*sizeof(int), none)) {
 84 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 85 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 86 |     }
 87 | 
 88 |     bufsize = options.max_message_size * numprocs;
 89 |     if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
 90 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 91 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 92 |     }
 93 |     set_buffer(sendbuf, options.accel, 1, bufsize);
 94 | 
 95 |     if (allocate_buffer((void**)&recvbuf, bufsize,
 96 |                 options.accel)) {
 97 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 98 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 99 |     }
100 |     set_buffer(recvbuf, options.accel, 0, bufsize);
101 | 
102 |     print_preamble(rank);
103 | 
104 |     MPI_Barrier(MPI_COMM_WORLD);
105 | 
106 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
107 |         if(size > LARGE_MESSAGE_SIZE) {
108 |             options.skip = options.skip_large;
109 |             options.iterations = options.iterations_large;
110 |         }
111 | 
112 |         disp =0;
113 |         for ( i = 0; i < numprocs; i++) {
114 |             recvcounts[i] = size;
115 |             sendcounts[i] = size;
116 |             rdispls[i] = disp;
117 |             sdispls[i] = disp;
118 |             disp += size;
119 | 
120 |         }
121 | 
122 |         MPI_Barrier(MPI_COMM_WORLD);
123 | 
124 |         timer=0.0;
125 |         for(i = 0; i < options.iterations + options.skip; i++) {
126 |             t_start = MPI_Wtime();
127 | 
128 |               MPI_Alltoallv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf, recvcounts, rdispls, MPI_CHAR,
129 |                       MPI_COMM_WORLD);
130 | 
131 |             t_stop = MPI_Wtime();
132 | 
133 |             if(i>=options.skip)
134 |             {
135 |                 timer+=t_stop-t_start;
136 |             }
137 |             MPI_Barrier(MPI_COMM_WORLD);
138 |         }
139 | 
140 |         latency = (double)(timer * 1e6) / options.iterations;
141 | 
142 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
143 |                 MPI_COMM_WORLD);
144 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
145 |                 MPI_COMM_WORLD);
146 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
147 |                 MPI_COMM_WORLD);
148 |         avg_time = avg_time/numprocs;
149 | 
150 |         print_stats(rank, size, avg_time, min_time, max_time);
151 | 
152 |         MPI_Barrier(MPI_COMM_WORLD);
153 |     }
154 | 
155 |     free_buffer(rdispls, none);
156 |     free_buffer(sdispls, none);
157 |     free_buffer(recvcounts, none);
158 |     free_buffer(sendcounts, none);
159 |     free_buffer(sendbuf, options.accel);
160 |     free_buffer(recvbuf, options.accel);
161 | 
162 |     MPI_Finalize();
163 | 
164 |     if (none != options.accel) {
165 |         if (cleanup_accel()) {
166 |             fprintf(stderr, "Error cleaning up device\n");
167 |             exit(EXIT_FAILURE);
168 |         }
169 |     }
170 | 
171 |     return EXIT_SUCCESS;
172 | }
173 | 
174 | /* vi: set sw=4 sts=4 tw=80: */
175 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_barrier.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Barrier Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include "osu_coll.h"
 13 | 
 14 | int main(int argc, char *argv[])
 15 | {
 16 |     int i = 0, rank;
 17 |     int numprocs;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 20 |     double timer=0.0;
 21 |     int po_ret;
 22 | 
 23 |     set_header(HEADER);
 24 |     set_benchmark_name("osu_barrier");
 25 |     enable_accel_support();
 26 |     po_ret = process_options(argc, argv);
 27 | 
 28 |     if (po_okay == po_ret && none != options.accel) {
 29 |         if (init_accel()) {
 30 |             fprintf(stderr, "Error initializing device\n");
 31 |             exit(EXIT_FAILURE);
 32 |         }
 33 |     }
 34 | 
 35 |     options.show_size = 0;
 36 | 
 37 |     MPI_Init(&argc, &argv);
 38 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 39 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 40 | 
 41 |     switch (po_ret) {
 42 |         case po_bad_usage:
 43 |             print_bad_usage_message(rank);
 44 |             MPI_Finalize();
 45 |             exit(EXIT_FAILURE);
 46 |         case po_help_message:
 47 |             print_help_message(rank);
 48 |             MPI_Finalize();
 49 |             exit(EXIT_SUCCESS);
 50 |         case po_version_message:
 51 |             print_version_message(rank);
 52 |             MPI_Finalize();
 53 |             exit(EXIT_SUCCESS);
 54 |         case po_okay:
 55 |             break;
 56 |     }
 57 | 
 58 |     if(numprocs < 2) {
 59 |         if(rank == 0) {
 60 |             fprintf(stderr, "This test requires at least two processes\n");
 61 |         }
 62 | 
 63 |         MPI_Finalize();
 64 | 
 65 |         return EXIT_FAILURE;
 66 |     }
 67 | 
 68 |     print_preamble(rank);
 69 | 
 70 |     options.skip = options.skip_large;
 71 |     options.iterations = options.iterations_large;
 72 |     timer = 0.0;
 73 | 
 74 |     for(i=0; i < options.iterations + options.skip ; i++) {
 75 |         t_start = MPI_Wtime();
 76 |         MPI_Barrier(MPI_COMM_WORLD);
 77 |         t_stop = MPI_Wtime();
 78 | 
 79 |         if(i>=options.skip){
 80 |             timer+=t_stop-t_start;
 81 |         }
 82 |     }
 83 | 
 84 |     MPI_Barrier(MPI_COMM_WORLD);
 85 | 
 86 |     latency = (timer * 1e6) / options.iterations;
 87 | 
 88 |     MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
 89 |                 MPI_COMM_WORLD);
 90 |     MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
 91 |                 MPI_COMM_WORLD);
 92 |     MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
 93 |                 MPI_COMM_WORLD);
 94 |     avg_time = avg_time/numprocs;
 95 | 
 96 |     print_stats(rank, 0, avg_time, min_time, max_time);
 97 |     MPI_Finalize();
 98 | 
 99 |     return EXIT_SUCCESS;
100 | }
101 | 
102 | /* vi: set sw=4 sts=4 tw=80: */
103 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_bcast.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Broadcast Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i = 0, rank, size;
 16 |     int numprocs;
 17 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 18 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 19 |     double timer=0.0;
 20 |     char *buffer=NULL;
 21 |     int po_ret;
 22 | 
 23 |     set_header(HEADER);
 24 |     set_benchmark_name("osu_bcast");
 25 |     enable_accel_support();
 26 |     po_ret = process_options(argc, argv);
 27 | 
 28 |     if (po_okay == po_ret && none != options.accel) {
 29 |         if (init_accel()) {
 30 |             fprintf(stderr, "Error initializing device\n");
 31 |             exit(EXIT_FAILURE);
 32 |         }
 33 |     }
 34 | 
 35 |     MPI_Init(&argc, &argv);
 36 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 37 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 38 | 
 39 |     switch (po_ret) {
 40 |         case po_bad_usage:
 41 |             print_bad_usage_message(rank);
 42 |             MPI_Finalize();
 43 |             exit(EXIT_FAILURE);
 44 |         case po_help_message:
 45 |             print_help_message(rank);
 46 |             MPI_Finalize();
 47 |             exit(EXIT_SUCCESS);
 48 |         case po_version_message:
 49 |             print_version_message(rank);
 50 |             MPI_Finalize();
 51 |             exit(EXIT_SUCCESS);
 52 |         case po_okay:
 53 |             break;
 54 |     }
 55 | 
 56 |     if(numprocs < 2) {
 57 |         if (rank == 0) {
 58 |             fprintf(stderr, "This test requires at least two processes\n");
 59 |         }
 60 | 
 61 |         MPI_Finalize();
 62 |         exit(EXIT_FAILURE);
 63 |     }
 64 | 
 65 |     if (options.max_message_size > options.max_mem_limit) {
 66 |         options.max_message_size = options.max_mem_limit;
 67 |     }
 68 | 
 69 |     if (allocate_buffer((void**)&buffer, options.max_message_size, options.accel)) {
 70 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 71 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 72 |     }
 73 |     set_buffer(buffer, options.accel, 1, options.max_message_size);
 74 | 
 75 |     print_preamble(rank);
 76 | 
 77 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 78 |         if(size > LARGE_MESSAGE_SIZE) {
 79 |             options.skip = options.skip_large; 
 80 |             options.iterations = options.iterations_large;
 81 |         }
 82 | 
 83 |         timer=0.0;
 84 |         for(i=0; i < options.iterations + options.skip ; i++) {
 85 |             t_start = MPI_Wtime();
 86 |             MPI_Bcast(buffer, size, MPI_CHAR, 0, MPI_COMM_WORLD);
 87 |             t_stop = MPI_Wtime();
 88 | 
 89 |             if(i>=options.skip){
 90 |                 timer+=t_stop-t_start;
 91 |             }
 92 |             MPI_Barrier(MPI_COMM_WORLD);
 93 | 
 94 |         }
 95 | 
 96 |         MPI_Barrier(MPI_COMM_WORLD);
 97 | 
 98 |         latency = (timer * 1e6) / options.iterations;
 99 | 
100 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
101 |                 MPI_COMM_WORLD);
102 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
103 |                 MPI_COMM_WORLD);
104 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
105 |                 MPI_COMM_WORLD);
106 |         avg_time = avg_time/numprocs;
107 | 
108 |         print_stats(rank, size, avg_time, min_time, max_time);
109 |     }
110 | 
111 |     free_buffer(buffer, options.accel);
112 | 
113 |     MPI_Finalize();
114 | 
115 |     if (none != options.accel) {
116 |         if (cleanup_accel()) {
117 |             fprintf(stderr, "Error cleaning up device\n");
118 |             exit(EXIT_FAILURE);
119 |         }
120 |     }
121 | 
122 |     return EXIT_SUCCESS;
123 | }
124 | 
125 | /* vi: set sw=4 sts=4 tw=80: */
126 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_gather.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Gather Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int
 14 | main (int argc, char *argv[])
 15 | {
 16 |     int i, numprocs, rank, size;
 17 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 18 |     double timer=0.0;
 19 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 20 |     char * sendbuf = NULL, * recvbuf = NULL;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_gather");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     if (0 == rank) {
 71 |         bufsize = options.max_message_size * numprocs;
 72 |         if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) {
 73 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 74 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 75 |         }
 76 |         set_buffer(recvbuf, options.accel, 1, bufsize);
 77 |     }
 78 | 
 79 |     if (allocate_buffer((void**)&sendbuf, options.max_message_size,
 80 |                 options.accel)) {
 81 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 82 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 83 |     }
 84 |     set_buffer(sendbuf, options.accel, 0, options.max_message_size);
 85 | 
 86 |     print_preamble(rank);
 87 | 
 88 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 89 |         if (size > LARGE_MESSAGE_SIZE) {
 90 |             options.skip = options.skip_large; 
 91 |             options.iterations = options.iterations_large;
 92 |         }
 93 | 
 94 |         MPI_Barrier(MPI_COMM_WORLD);
 95 |         timer=0.0;
 96 | 
 97 |         for (i=0; i < options.iterations + options.skip ; i++) {
 98 |             t_start = MPI_Wtime();
 99 |             MPI_Gather(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 0,
100 |                     MPI_COMM_WORLD);
101 |             t_stop = MPI_Wtime();
102 | 
103 |             if (i >= options.skip) {
104 |                 timer+=t_stop-t_start;
105 |             }
106 |             MPI_Barrier(MPI_COMM_WORLD);
107 |         }
108 |         latency = (double)(timer * 1e6) / options.iterations;
109 | 
110 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
111 |                 MPI_COMM_WORLD);
112 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
113 |                 MPI_COMM_WORLD);
114 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
115 |                 MPI_COMM_WORLD);
116 |         avg_time = avg_time/numprocs;
117 | 
118 |         print_stats(rank, size, avg_time, min_time, max_time);
119 |         MPI_Barrier(MPI_COMM_WORLD);
120 |     }
121 | 
122 |     if (0 == rank) {
123 |         free_buffer(recvbuf, options.accel);
124 |     }
125 |     free_buffer(sendbuf, options.accel);
126 | 
127 |     MPI_Finalize();
128 | 
129 |     if (none != options.accel) {
130 |         if (cleanup_accel()) {
131 |             fprintf(stderr, "Error cleaning up device\n");
132 |             exit(EXIT_FAILURE);
133 |         }
134 |     }
135 | 
136 |     return EXIT_SUCCESS;
137 | }
138 | 
139 | /* vi: set sw=4 sts=4 tw=80: */
140 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_gatherv.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Gatherv Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size, disp;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     char *sendbuf, *recvbuf;
 20 |     int *rdispls, *recvcounts;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_gatherv");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     if (0 == rank) {
 71 |         if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) {
 72 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 73 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 74 |         }
 75 |         if (allocate_buffer((void**)&rdispls, numprocs*sizeof(int), none)) {
 76 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 77 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 78 |         }
 79 | 
 80 |         bufsize = options.max_message_size * numprocs;
 81 |         if (allocate_buffer((void**)&recvbuf, bufsize, options.accel)) {
 82 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 83 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 84 |         }
 85 |         set_buffer(recvbuf, options.accel, 1, bufsize);
 86 |     }
 87 | 
 88 |     if (allocate_buffer((void**)&sendbuf, options.max_message_size,
 89 |                 options.accel)) {
 90 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 91 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 92 |     }
 93 |     set_buffer(sendbuf, options.accel, 0, options.max_message_size);
 94 | 
 95 |     print_preamble(rank);
 96 | 
 97 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 98 | 
 99 |         if(size > LARGE_MESSAGE_SIZE) {
100 |             options.skip = options.skip_large;
101 |             options.iterations = options.iterations_large;
102 |         }
103 | 
104 |         MPI_Barrier(MPI_COMM_WORLD);
105 | 
106 |         if (0 == rank) {
107 |             disp =0;
108 |             for ( i = 0; i < numprocs; i++) {
109 |                 recvcounts[i] = size;
110 |                 rdispls[i] = disp;
111 |                 disp += size;
112 |             }
113 |         }
114 | 
115 |         MPI_Barrier(MPI_COMM_WORLD);
116 |         timer=0.0;
117 |         for(i=0; i < options.iterations + options.skip ; i++) {
118 | 
119 |             t_start = MPI_Wtime();
120 | 
121 |             MPI_Gatherv(sendbuf, size, MPI_CHAR, recvbuf, recvcounts, rdispls, MPI_CHAR, 0, MPI_COMM_WORLD);
122 | 
123 |             t_stop = MPI_Wtime();
124 | 
125 |             if(i >= options.skip) {
126 |                 timer+= t_stop-t_start;
127 |             }
128 |             MPI_Barrier(MPI_COMM_WORLD);
129 | 
130 |         }
131 | 
132 |         MPI_Barrier(MPI_COMM_WORLD);
133 | 
134 |         latency = (double)(timer * 1e6) / options.iterations;
135 | 
136 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
137 |                 MPI_COMM_WORLD);
138 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
139 |                 MPI_COMM_WORLD);
140 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
141 |                 MPI_COMM_WORLD);
142 |         avg_time = avg_time/numprocs;
143 | 
144 |         print_stats(rank, size, avg_time, min_time, max_time);
145 |         MPI_Barrier(MPI_COMM_WORLD);
146 |     }
147 | 
148 |     if (0 == rank) {
149 |         free_buffer(rdispls, none);
150 |         free_buffer(recvcounts, none);
151 |         free_buffer(recvbuf, options.accel);
152 |     }
153 |     free_buffer(sendbuf, options.accel);
154 | 
155 |     MPI_Finalize();
156 | 
157 |     if (none != options.accel) {
158 |         if (cleanup_accel()) {
159 |             fprintf(stderr, "Error cleaning up device\n");
160 |             exit(EXIT_FAILURE);
161 |         }
162 |     }
163 | 
164 |     return EXIT_SUCCESS;
165 | }
166 | /* vi: set sw=4 sts=4 tw=80: */
167 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_ibarrier.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Non-blocking Barrier Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include "osu_coll.h"
 13 | 
 14 | int main(int argc, char *argv[])
 15 | {
 16 |     int i = 0, rank, size = 0;
 17 |     int numprocs;
 18 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 19 |     double test_time = 0.0, test_total = 0.0;
 20 |     double tcomp = 0.0, tcomp_total=0.0, latency_in_secs=0.0;
 21 |     double wait_time = 0.0, init_time = 0.0;
 22 |     double init_total = 0.0, wait_total = 0.0;
 23 |     double timer = 0.0;
 24 |     int po_ret;
 25 | 
 26 |     set_header(HEADER);
 27 |     set_benchmark_name("osu_ibarrier");
 28 |     enable_accel_support();
 29 |     po_ret = process_options(argc, argv);
 30 | 
 31 |     if (po_okay == po_ret && none != options.accel) {
 32 |         if (init_accel()) {
 33 |             fprintf(stderr, "Error initializing device\n");
 34 |             exit(EXIT_FAILURE);
 35 |         }
 36 |     }
 37 | 
 38 |     options.show_size = 0;
 39 | 
 40 |     MPI_Init(&argc, &argv);
 41 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 42 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 43 |     MPI_Request request;
 44 |     MPI_Status status;
 45 | 
 46 |     switch (po_ret) {
 47 |         case po_bad_usage:
 48 |             print_bad_usage_message(rank);
 49 |             MPI_Finalize();
 50 |             exit(EXIT_FAILURE);
 51 |         case po_help_message:
 52 |             print_help_message(rank);
 53 |             MPI_Finalize();
 54 |             exit(EXIT_SUCCESS);
 55 |         case po_version_message:
 56 |             print_version_message(rank);
 57 |             MPI_Finalize();
 58 |             exit(EXIT_SUCCESS);
 59 |         case po_okay:
 60 |             break;
 61 |     }
 62 | 
 63 |     if(numprocs < 2) {
 64 |         if(rank == 0) {
 65 |             fprintf(stderr, "This test requires at least two processes\n");
 66 |         }
 67 | 
 68 |         MPI_Finalize();
 69 | 
 70 |         return EXIT_FAILURE;
 71 |     }
 72 | 
 73 |     print_preamble_nbc(rank);
 74 | 
 75 |     options.skip = options.skip_large;
 76 |     options.iterations = iterations_large;
 77 |     timer = 0.0;
 78 | 
 79 |     for(i=0; i < options.iterations + options.skip ; i++) {
 80 |         t_start = MPI_Wtime();
 81 |         MPI_Ibarrier(MPI_COMM_WORLD, &request);
 82 |         MPI_Wait(&request,&status);
 83 |         t_stop = MPI_Wtime();
 84 | 
 85 |         if(i>=options.skip){
 86 |             timer+=t_stop-t_start;
 87 |         }
 88 |     }
 89 | 
 90 |     MPI_Barrier(MPI_COMM_WORLD);
 91 | 
 92 |     latency = (timer * 1e6) / options.iterations;
 93 | 
 94 |     /* Comm. latency in seconds, fed to dummy_compute */
 95 |     latency_in_secs = timer/options.iterations;
 96 | 
 97 |     init_arrays(latency_in_secs);
 98 | 
 99 |     MPI_Barrier(MPI_COMM_WORLD);
100 | 
101 |     timer = 0.0; tcomp_total = 0; tcomp = 0;
102 |     init_total = 0.0; wait_total = 0.0;
103 |     test_time = 0.0, test_total = 0.0;
104 | 
105 |     for(i=0; i < options.iterations + options.skip ; i++) {
106 |             t_start = MPI_Wtime();
107 | 
108 |             init_time = MPI_Wtime();
109 |             MPI_Ibarrier(MPI_COMM_WORLD, &request);
110 |             init_time = MPI_Wtime() - init_time;
111 | 
112 |             tcomp = MPI_Wtime();
113 |             test_time = dummy_compute(latency_in_secs, &request);
114 |             tcomp = MPI_Wtime() - tcomp;
115 | 
116 |             wait_time = MPI_Wtime();
117 |             MPI_Wait(&request,&status);
118 |             wait_time = MPI_Wtime() - wait_time;
119 | 
120 |             t_stop = MPI_Wtime();
121 | 
122 |             if(i>=options.skip){
123 |                 timer += t_stop-t_start;
124 |                 tcomp_total += tcomp;
125 |                 test_total += test_time;
126 |                 init_total += init_time;
127 |                 wait_total += wait_time;
128 |             }
129 |             MPI_Barrier(MPI_COMM_WORLD);
130 |     }
131 | 
132 |     MPI_Barrier (MPI_COMM_WORLD);
133 | 
134 |     calculate_and_print_stats(rank, size, numprocs,
135 |                                   timer, latency,
136 |                                   test_total, tcomp_total,
137 |                                   wait_total, init_total);
138 | 
139 |     MPI_Finalize();
140 | 
141 |     return EXIT_SUCCESS;
142 | }
143 | 
144 | /* vi: set sw=4 sts=4 tw=80: */
145 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_ibcast.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Non-Blocking Broadcast Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include "osu_coll.h"
 13 | 
 14 | int main(int argc, char *argv[])
 15 | {
 16 |     int i = 0, rank, size;
 17 |     int numprocs;
 18 |     double test_time = 0.0, test_total = 0.0;
 19 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 20 |     double tcomp = 0.0, tcomp_total=0.0, latency_in_secs=0.0;
 21 |     double timer=0.0;
 22 |     double wait_time = 0.0, init_time = 0.0;
 23 |     double init_total = 0.0, wait_total = 0.0;
 24 |     char *buffer=NULL;
 25 |     int po_ret;
 26 | 
 27 |     set_header(HEADER);
 28 |     set_benchmark_name("osu_ibcast");
 29 |     enable_accel_support();
 30 |     po_ret = process_options(argc, argv);
 31 | 
 32 |     if (po_okay == po_ret && none != options.accel) {
 33 |         if (init_accel()) {
 34 |             fprintf(stderr, "Error initializing device\n");
 35 |             exit(EXIT_FAILURE);
 36 |         }
 37 |     }
 38 | 
 39 |     MPI_Init(&argc, &argv);
 40 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 41 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 42 |     MPI_Request request;
 43 |     MPI_Status status;
 44 | 
 45 |     switch (po_ret) {
 46 |         case po_bad_usage:
 47 |             print_bad_usage_message(rank);
 48 |             MPI_Finalize();
 49 |             exit(EXIT_FAILURE);
 50 |         case po_help_message:
 51 |             print_help_message(rank);
 52 |             MPI_Finalize();
 53 |             exit(EXIT_SUCCESS);
 54 |         case po_version_message:
 55 |             print_version_message(rank);
 56 |             MPI_Finalize();
 57 |             exit(EXIT_SUCCESS);
 58 |         case po_okay:
 59 |             break;
 60 |     }
 61 | 
 62 |     if(numprocs < 2) {
 63 |         if (rank == 0) {
 64 |             fprintf(stderr, "This test requires at least two processes\n");
 65 |         }
 66 | 
 67 |         MPI_Finalize();
 68 |         exit(EXIT_FAILURE);
 69 |     }
 70 | 
 71 |     if (options.max_message_size > options.max_mem_limit) {
 72 |         options.max_message_size = options.max_mem_limit;
 73 |     }
 74 | 
 75 |     if (allocate_buffer((void**)&buffer, options.max_message_size, options.accel)) {
 76 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 77 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 78 |     }
 79 | 
 80 |     if(rank==0)
 81 |       set_buffer(buffer, options.accel, 1, options.max_message_size);
 82 |     else
 83 |       set_buffer(buffer, options.accel, 0, options.max_message_size);
 84 | 
 85 |     print_preamble_nbc(rank);
 86 | 
 87 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 88 |         if(size > LARGE_MESSAGE_SIZE) {
 89 |             options.skip = options.skip_large; 
 90 |             options.iterations = options.iterations_large;
 91 |         }
 92 | 
 93 |         timer = 0.0;
 94 | 
 95 |         for(i=0; i < options.iterations + options.skip ; i++) {
 96 |             t_start = MPI_Wtime();
 97 |             MPI_Ibcast(buffer, size, MPI_CHAR, 0, MPI_COMM_WORLD, &request);
 98 |             MPI_Wait(&request,&status);
 99 | 
100 |             t_stop = MPI_Wtime();
101 | 
102 |             if(i>=options.skip){
103 |                 timer += t_stop-t_start;
104 |             }
105 |             MPI_Barrier(MPI_COMM_WORLD);
106 |         }
107 | 
108 |         MPI_Barrier(MPI_COMM_WORLD);
109 | 
110 |         latency = (timer * 1e6) / options.iterations;
111 | 
112 |         /* Comm. latency in seconds, fed to dummy_compute */
113 |         latency_in_secs = timer/options.iterations;
114 | 
115 |         init_arrays(latency_in_secs);
116 |         
117 |         MPI_Barrier(MPI_COMM_WORLD);
118 | 
119 |         timer = 0.0; tcomp_total = 0; tcomp = 0;
120 |         init_total = 0.0; wait_total = 0.0;
121 |         test_time = 0.0, test_total = 0.0;
122 | 
123 |         for(i=0; i < options.iterations + options.skip ; i++) {
124 |             t_start = MPI_Wtime();
125 |             init_time = MPI_Wtime();
126 |             MPI_Ibcast(buffer, size, MPI_CHAR, 0, MPI_COMM_WORLD, &request);
127 |             init_time = MPI_Wtime() - init_time;
128 | 
129 |             tcomp = MPI_Wtime();
130 |             test_time = dummy_compute(latency_in_secs, &request);
131 |             tcomp = MPI_Wtime() - tcomp;
132 | 
133 |             wait_time = MPI_Wtime();
134 |             MPI_Wait(&request,&status);
135 |             wait_time = MPI_Wtime() - wait_time;
136 | 
137 |             t_stop = MPI_Wtime();
138 | 
139 |             if(i>=options.skip){
140 |                 timer += t_stop-t_start;
141 |                 tcomp_total += tcomp;
142 |                 init_total += init_time;
143 |                 test_total += test_time;
144 |                 wait_total += wait_time;
145 |             }
146 |             MPI_Barrier(MPI_COMM_WORLD);
147 |         }
148 | 
149 |         MPI_Barrier (MPI_COMM_WORLD);
150 | 
151 |         calculate_and_print_stats(rank, size, numprocs,
152 |                                   timer, latency,
153 |                                   test_total, tcomp_total,
154 |                                   wait_total, init_total);
155 |     }
156 | 
157 |     free_buffer(buffer, options.accel);
158 | 
159 |     MPI_Finalize();
160 | 
161 |     if (none != options.accel) {
162 |         if (cleanup_accel()) {
163 |             fprintf(stderr, "Error cleaning up device\n");
164 |             exit(EXIT_FAILURE);
165 |         }
166 |     }
167 | 
168 |     return EXIT_SUCCESS;
169 | }
170 | 
171 | /* vi: set sw=4 sts=4 tw=80: */
172 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_reduce.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Reduce Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     float *sendbuf, *recvbuf;
 20 |     int po_ret;
 21 |     size_t bufsize;
 22 | 
 23 |     set_header(HEADER);
 24 |     set_benchmark_name("osu_reduce");
 25 |     enable_accel_support();
 26 |     po_ret = process_options(argc, argv);
 27 | 
 28 |     if (po_okay == po_ret && none != options.accel) {
 29 |         if (init_accel()) {
 30 |             fprintf(stderr, "Error initializing device\n");
 31 |             exit(EXIT_FAILURE);
 32 |         }
 33 |     }
 34 | 
 35 |     MPI_Init(&argc, &argv);
 36 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 37 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 38 | 
 39 |     switch (po_ret) {
 40 |         case po_bad_usage:
 41 |             print_bad_usage_message(rank);
 42 |             MPI_Finalize();
 43 |             exit(EXIT_FAILURE);
 44 |         case po_help_message:
 45 |             print_help_message(rank);
 46 |             MPI_Finalize();
 47 |             exit(EXIT_SUCCESS);
 48 |         case po_version_message:
 49 |             print_version_message(rank);
 50 |             MPI_Finalize();
 51 |             exit(EXIT_SUCCESS);
 52 |         case po_okay:
 53 |             break;
 54 |     }
 55 | 
 56 |     if(numprocs < 2) {
 57 |         if (rank == 0) {
 58 |             fprintf(stderr, "This test requires at least two processes\n");
 59 |         }
 60 | 
 61 |         MPI_Finalize();
 62 |         exit(EXIT_FAILURE);
 63 |     }
 64 | 
 65 |     if (options.max_message_size > options.max_mem_limit) {
 66 |         options.max_message_size = options.max_mem_limit;
 67 |     }
 68 | 
 69 |     options.min_message_size /= sizeof(float);
 70 |     if (options.min_message_size < DEFAULT_MIN_MESSAGE_SIZE) {
 71 |         options.min_message_size = DEFAULT_MIN_MESSAGE_SIZE;
 72 |     }
 73 | 
 74 |     bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
 75 |     if (allocate_buffer((void**)&recvbuf, bufsize,
 76 |                 options.accel)) {
 77 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 78 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 79 |     }
 80 |     set_buffer(recvbuf, options.accel, 1, bufsize);
 81 | 
 82 |     bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
 83 |     if (allocate_buffer((void**)&sendbuf, bufsize,
 84 |                 options.accel)) {
 85 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 86 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 87 |     }
 88 |     set_buffer(sendbuf, options.accel, 0, bufsize);
 89 | 
 90 |     print_preamble(rank);
 91 | 
 92 |     for(size=options.min_message_size; size*sizeof(float) <= options.max_message_size; size *= 2) {
 93 | 
 94 |         if(size > LARGE_MESSAGE_SIZE) {
 95 |             options.skip = options.skip_large;
 96 |             options.iterations = options.iterations_large;
 97 |         }
 98 | 
 99 |         MPI_Barrier(MPI_COMM_WORLD);
100 | 
101 |         timer=0.0;
102 |         for(i=0; i < options.iterations + options.skip ; i++) {
103 |             t_start = MPI_Wtime();
104 | 
105 |             MPI_Reduce(sendbuf, recvbuf, size, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD );
106 |             t_stop=MPI_Wtime();
107 |             if(i>=options.skip){
108 | 
109 |             timer+=t_stop-t_start;
110 |             }
111 |             MPI_Barrier(MPI_COMM_WORLD);
112 |         }
113 |         latency = (double)(timer * 1e6) / options.iterations;
114 | 
115 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
116 |                 MPI_COMM_WORLD);
117 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
118 |                 MPI_COMM_WORLD);
119 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
120 |                 MPI_COMM_WORLD);
121 |         avg_time = avg_time/numprocs;
122 | 
123 |         print_stats(rank, size * sizeof(float), avg_time, min_time, max_time);
124 |         MPI_Barrier(MPI_COMM_WORLD);
125 |     }
126 | 
127 |     free_buffer(recvbuf, options.accel);
128 |     free_buffer(sendbuf, options.accel);
129 | 
130 |     MPI_Finalize();
131 | 
132 |     if (none != options.accel) {
133 |         if (cleanup_accel()) {
134 |             fprintf(stderr, "Error cleaning up device\n");
135 |             exit(EXIT_FAILURE);
136 |         }
137 |     }
138 | 
139 |     return EXIT_SUCCESS;
140 | }
141 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_reduce_scatter.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Reduce_scatter Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     float *sendbuf, *recvbuf;
 20 |     int *recvcounts;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_scatter");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if (options.max_message_size > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit;
 68 |     }
 69 | 
 70 |     options.min_message_size /= sizeof(float);
 71 |     if (options.min_message_size < DEFAULT_MIN_MESSAGE_SIZE) {
 72 |         options.min_message_size = DEFAULT_MIN_MESSAGE_SIZE;
 73 |     }
 74 | 
 75 |     if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) {
 76 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 77 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 78 |     }
 79 | 
 80 |     bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
 81 |     if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
 82 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 83 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 84 |     }
 85 |     set_buffer(sendbuf, options.accel, 1, bufsize);
 86 | 
 87 |     bufsize = sizeof(float)*((options.max_message_size/numprocs + 1)/sizeof(float));
 88 |     if (allocate_buffer((void**)&recvbuf, bufsize,
 89 |                 options.accel)) {
 90 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 91 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 92 |     }
 93 |     set_buffer(recvbuf, options.accel, 0, bufsize);
 94 | 
 95 |     print_preamble(rank);
 96 | 
 97 |     for(size=options.min_message_size; size*sizeof(float) <= options.max_message_size; size *= 2) {
 98 | 
 99 |         if(size > LARGE_MESSAGE_SIZE) {
100 |             options.skip = options.skip_large;
101 |             options.iterations = options.iterations_large;
102 |         }
103 | 
104 |         int portion=0, remainder=0;
105 |         portion=size/numprocs;
106 |         remainder=size%numprocs;
107 | 
108 |         for (i=0; i<numprocs; i++){
109 |             recvcounts[i]=0;
110 |             if(size<numprocs){
111 |                 if(i<size)
112 |                     recvcounts[i]=1;
113 |             }
114 |             else{
115 |                 if((remainder!=0) && (i<remainder)){
116 |                     recvcounts[i]+=1;
117 |                 }
118 |                 recvcounts[i]+=portion;
119 |             }
120 |         }
121 |         MPI_Barrier(MPI_COMM_WORLD);
122 | 
123 |         timer=0.0;
124 |         for(i=0; i < options.iterations + options.skip ; i++) {
125 |             t_start = MPI_Wtime();
126 | 
127 |             MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD );
128 |             t_stop=MPI_Wtime();
129 |             if(i>=options.skip){
130 | 
131 |             timer+=t_stop-t_start;
132 |             }
133 |             MPI_Barrier(MPI_COMM_WORLD);
134 |         }
135 |         latency = (double)(timer * 1e6) / options.iterations;
136 | 
137 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
138 |                 MPI_COMM_WORLD);
139 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
140 |                 MPI_COMM_WORLD);
141 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
142 |                 MPI_COMM_WORLD);
143 |         avg_time = avg_time/numprocs;
144 | 
145 |         print_stats(rank, size * sizeof(float), avg_time, min_time, max_time);
146 |         MPI_Barrier(MPI_COMM_WORLD);
147 |     }
148 | 
149 |     free_buffer(recvcounts, none);
150 |     free_buffer(sendbuf, options.accel);
151 |     free_buffer(recvbuf, options.accel);
152 | 
153 |     MPI_Finalize();
154 | 
155 |     if (none != options.accel) {
156 |         if (cleanup_accel()) {
157 |             fprintf(stderr, "Error cleaning up device\n");
158 |             exit(EXIT_FAILURE);
159 |         }
160 |     }
161 | 
162 |     return EXIT_SUCCESS;
163 | }
164 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_scatter.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Scatter Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int
 14 | main (int argc, char *argv[])
 15 | {
 16 |     int i, numprocs, rank, size;
 17 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 18 |     double timer=0.0;
 19 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 20 |     char * sendbuf = NULL, * recvbuf = NULL;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_scatter");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     if (0 == rank) {
 71 |         bufsize = options.max_message_size * numprocs;
 72 |         if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
 73 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 74 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 75 |         }
 76 |         set_buffer(sendbuf, options.accel, 1, bufsize);
 77 |     }
 78 | 
 79 |     if (allocate_buffer((void**)&recvbuf, options.max_message_size,
 80 |                 options.accel)) {
 81 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 82 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 83 |     }
 84 |     set_buffer(recvbuf, options.accel, 0, options.max_message_size);
 85 | 
 86 |     print_preamble(rank);
 87 | 
 88 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 89 |         if (size > LARGE_MESSAGE_SIZE) {
 90 |             options.skip = options.skip_large;
 91 |             options.iterations = options.iterations_large;
 92 |         }
 93 | 
 94 |         MPI_Barrier(MPI_COMM_WORLD);
 95 |         timer=0.0;
 96 | 
 97 |         for (i=0; i < options.iterations + options.skip ; i++) {
 98 |             t_start = MPI_Wtime();
 99 |             MPI_Scatter(sendbuf, size, MPI_CHAR, recvbuf, size, MPI_CHAR, 0,
100 |                     MPI_COMM_WORLD);
101 |             t_stop = MPI_Wtime();
102 | 
103 |             if (i >= options.skip) {
104 |                 timer+=t_stop-t_start;
105 |             }
106 |             MPI_Barrier(MPI_COMM_WORLD);
107 |         }
108 |         latency = (double)(timer * 1e6) / options.iterations;
109 | 
110 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
111 |                 MPI_COMM_WORLD);
112 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
113 |                 MPI_COMM_WORLD);
114 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
115 |                 MPI_COMM_WORLD);
116 |         avg_time = avg_time/numprocs;
117 | 
118 |         print_stats(rank, size, avg_time, min_time, max_time);
119 |         MPI_Barrier(MPI_COMM_WORLD);
120 |     }
121 | 
122 |     if (0 == rank) {
123 |         free_buffer(sendbuf, options.accel);
124 |     }
125 |     free_buffer(recvbuf, options.accel);
126 | 
127 |     MPI_Finalize();
128 | 
129 |     if (none != options.accel) {
130 |         if (cleanup_accel()) {
131 |             fprintf(stderr, "Error cleaning up device\n");
132 |             exit(EXIT_FAILURE);
133 |         }
134 |     }
135 | 
136 |     return EXIT_SUCCESS;
137 | }
138 | 
139 | /* vi: set sw=4 sts=4 tw=80: */
140 | 


--------------------------------------------------------------------------------
/mpi/collective/osu_scatterv.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Scatterv Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include "osu_coll.h"
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int i, numprocs, rank, size, disp;
 16 |     double latency = 0.0, t_start = 0.0, t_stop = 0.0;
 17 |     double timer=0.0;
 18 |     double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 19 |     char *sendbuf, *recvbuf;
 20 |     int *sdispls=NULL, *sendcounts=NULL;
 21 |     int po_ret;
 22 |     size_t bufsize;
 23 | 
 24 |     set_header(HEADER);
 25 |     set_benchmark_name("osu_scatterv");
 26 |     enable_accel_support();
 27 |     po_ret = process_options(argc, argv);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 | 
 36 |     MPI_Init(&argc, &argv);
 37 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 38 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 39 | 
 40 |     switch (po_ret) {
 41 |         case po_bad_usage:
 42 |             print_bad_usage_message(rank);
 43 |             MPI_Finalize();
 44 |             exit(EXIT_FAILURE);
 45 |         case po_help_message:
 46 |             print_help_message(rank);
 47 |             MPI_Finalize();
 48 |             exit(EXIT_SUCCESS);
 49 |         case po_version_message:
 50 |             print_version_message(rank);
 51 |             MPI_Finalize();
 52 |             exit(EXIT_SUCCESS);
 53 |         case po_okay:
 54 |             break;
 55 |     }
 56 | 
 57 |     if(numprocs < 2) {
 58 |         if (rank == 0) {
 59 |             fprintf(stderr, "This test requires at least two processes\n");
 60 |         }
 61 | 
 62 |         MPI_Finalize();
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if ((options.max_message_size * numprocs) > options.max_mem_limit) {
 67 |         options.max_message_size = options.max_mem_limit / numprocs;
 68 |     }
 69 | 
 70 |     if (0 == rank) {
 71 |         if (allocate_buffer((void**)&sendcounts, numprocs*sizeof(int), none)) {
 72 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 73 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 74 |         }
 75 |         if (allocate_buffer((void**)&sdispls, numprocs*sizeof(int), none)) {
 76 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 77 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 78 |         }
 79 | 
 80 |         bufsize = options.max_message_size * numprocs;
 81 |         if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
 82 |             fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 83 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 84 |         }
 85 |         set_buffer(sendbuf, options.accel, 1, bufsize);
 86 |     }
 87 | 
 88 |     if (allocate_buffer((void**)&recvbuf, options.max_message_size,
 89 |                 options.accel)) {
 90 |         fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
 91 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
 92 |     }
 93 |     set_buffer(recvbuf, options.accel, 0, options.max_message_size);
 94 | 
 95 |     print_preamble(rank);
 96 | 
 97 |     for(size=options.min_message_size; size <= options.max_message_size; size *= 2) {
 98 | 
 99 |         if(size > LARGE_MESSAGE_SIZE) {
100 |             options.skip = options.skip_large;
101 |             options.iterations = options.iterations_large;
102 |         }
103 | 
104 |         MPI_Barrier(MPI_COMM_WORLD);
105 | 
106 |         if (0 == rank) {
107 |             disp =0;
108 |             for ( i = 0; i < numprocs; i++) {
109 |                 sendcounts[i] = size;
110 |                 sdispls[i] = disp;
111 |                 disp += size;
112 |             }
113 |         }
114 | 
115 |         MPI_Barrier(MPI_COMM_WORLD);
116 | 
117 |         timer=0.0;
118 | 
119 |         for(i=0; i < options.iterations + options.skip ; i++) {
120 | 
121 |             t_start = MPI_Wtime();
122 |             MPI_Scatterv(sendbuf, sendcounts, sdispls, MPI_CHAR, recvbuf,
123 |                       size, MPI_CHAR, 0, MPI_COMM_WORLD);
124 | 
125 |             t_stop = MPI_Wtime();
126 |             if(i >= options.skip) {
127 |                 timer+=t_stop-t_start;
128 |             }
129 |             MPI_Barrier(MPI_COMM_WORLD);
130 |         }
131 |         latency = (double)(timer * 1e6) / options.iterations;
132 | 
133 |         MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
134 |                 MPI_COMM_WORLD);
135 |         MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
136 |                 MPI_COMM_WORLD);
137 |         MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
138 |                 MPI_COMM_WORLD);
139 |         avg_time = avg_time/numprocs;
140 | 
141 |         print_stats(rank, size, avg_time, min_time, max_time);
142 |         MPI_Barrier(MPI_COMM_WORLD);
143 |     }
144 | 
145 |     if (0 == rank) {
146 |         free_buffer(sendcounts, none);
147 |         free_buffer(sdispls, none);
148 |         free_buffer(sendbuf, options.accel);
149 |     }
150 |     free_buffer(recvbuf, options.accel);
151 | 
152 |     MPI_Finalize();
153 | 
154 |     if (none != options.accel) {
155 |         if (cleanup_accel()) {
156 |             fprintf(stderr, "Error cleaning up device\n");
157 |             exit(EXIT_FAILURE);
158 |         }
159 |     }
160 | 
161 |     return EXIT_SUCCESS;
162 | }
163 | 
164 | /* vi: set sw=4 sts=4 tw=80: */
165 | 


--------------------------------------------------------------------------------
/mpi/one-sided/Makefile.am:
--------------------------------------------------------------------------------
 1 | one_sideddir = $(pkglibexecdir)/mpi/one-sided
 2 | one_sided_PROGRAMS = osu_acc_latency osu_get_bw osu_get_latency osu_put_bibw osu_put_bw osu_put_latency
 3 | 
 4 | if MPI3_LIBRARY
 5 |     one_sided_PROGRAMS += osu_get_acc_latency osu_fop_latency osu_cas_latency
 6 | endif
 7 | 
 8 | osu_put_latency_SOURCES = osu_put_latency.c osu_1sc.c osu_1sc.h
 9 | osu_put_bw_SOURCES = osu_put_bw.c osu_1sc.c osu_1sc.h
10 | osu_put_bibw_SOURCES = osu_put_bibw.c osu_1sc.c osu_1sc.h
11 | osu_get_latency_SOURCES = osu_get_latency.c osu_1sc.c osu_1sc.h
12 | osu_get_bw_SOURCES = osu_get_bw.c osu_1sc.c osu_1sc.h
13 | osu_acc_latency_SOURCES = osu_acc_latency.c osu_1sc.c osu_1sc.h
14 | osu_fop_latency_SOURCES = osu_fop_latency.c osu_1sc.c osu_1sc.h
15 | osu_cas_latency_SOURCES = osu_cas_latency.c osu_1sc.c osu_1sc.h
16 | 
17 | if EMBEDDED_BUILD
18 |     AM_LDFLAGS =
19 |     AM_CPPFLAGS = -I$(top_builddir)/../src/include \
20 | 		  -I${top_srcdir}/../src/include
21 | if BUILD_PROFILING_LIB
22 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la
23 | endif
24 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la
25 | endif
26 | 
27 | if OPENACC
28 |     AM_CFLAGS = -acc
29 | endif
30 | 


--------------------------------------------------------------------------------
/mpi/one-sided/osu_1sc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2003-2016 the Network-Based Computing Laboratory
  3 |  * (NBCL), The Ohio State University.
  4 |  *
  5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  6 |  *
  7 |  * For detailed copyright and licensing information, please refer to the
  8 |  * copyright file COPYRIGHT in the top level OMB directory.
  9 |  */
 10 | 
 11 | #include <mpi.h>
 12 | #include <unistd.h>
 13 | #include <stdlib.h>
 14 | #include <stdio.h>
 15 | #include <string.h>
 16 | #include <assert.h>
 17 | #include <getopt.h>
 18 | 
 19 | #ifdef _ENABLE_OPENACC_
 20 | #include <openacc.h>
 21 | #endif
 22 | 
 23 | #ifdef _ENABLE_CUDA_
 24 | #include <cuda.h>
 25 | #include <cuda_runtime.h>
 26 | #endif
 27 | 
 28 | #define MAX_ALIGNMENT 65536
 29 | 
 30 | #ifndef FIELD_WIDTH
 31 | #   define FIELD_WIDTH 20
 32 | #endif
 33 | 
 34 | #ifndef FLOAT_PRECISION
 35 | #   define FLOAT_PRECISION 2
 36 | #endif
 37 | 
 38 | #define CHECK(stmt)                                              \
 39 | do {                                                             \
 40 |    int errno = (stmt);                                           \
 41 |    if (0 != errno) {                                             \
 42 |        fprintf(stderr, "[%s:%d] function call failed with %d \n",\
 43 |         __FILE__, __LINE__, errno);                              \
 44 |        exit(EXIT_FAILURE);                                       \
 45 |    }                                                             \
 46 |    assert(0 == errno);                                           \
 47 | } while (0)
 48 | 
 49 | #define MPI_CHECK(stmt)                                          \
 50 | do {                                                             \
 51 |    int mpi_errno = (stmt);                                       \
 52 |    if (MPI_SUCCESS != mpi_errno) {                               \
 53 |        fprintf(stderr, "[%s:%d] MPI call failed with %d \n",     \
 54 |         __FILE__, __LINE__,mpi_errno);                           \
 55 |        exit(EXIT_FAILURE);                                       \
 56 |    }                                                             \
 57 |    assert(MPI_SUCCESS == mpi_errno);                             \
 58 | } while (0)
 59 | 
 60 | #ifdef _ENABLE_CUDA_
 61 | #   define CUDA_ENABLED 1
 62 | #else
 63 | #   define CUDA_ENABLED 0
 64 | #endif
 65 | 
 66 | #ifdef _ENABLE_OPENACC_
 67 | #   define OPENACC_ENABLED 1
 68 | #else
 69 | #   define OPENACC_ENABLED 0
 70 | #endif
 71 | 
 72 | /*structures, enumerators and such*/
 73 | /* Window creation */
 74 | typedef enum {
 75 |     WIN_CREATE=0,
 76 | #if MPI_VERSION >= 3
 77 |     WIN_ALLOCATE,
 78 |     WIN_DYNAMIC
 79 | #endif
 80 | } WINDOW;
 81 | 
 82 | /* Synchronization */
 83 | typedef enum {
 84 |     LOCK=0,
 85 |     PSCW,
 86 |     FENCE,
 87 | #if MPI_VERSION >= 3
 88 |     FLUSH,
 89 |     FLUSH_LOCAL,
 90 |     LOCK_ALL,
 91 | #endif
 92 | } SYNC;
 93 | 
 94 | enum po_ret_type {
 95 |     po_cuda_not_avail,
 96 |     po_openacc_not_avail,
 97 |     po_bad_usage,
 98 |     po_help_message,
 99 |     po_okay,
100 | };
101 | 
102 | enum accel_type {
103 |     none,
104 |     cuda,
105 |     openacc
106 | };
107 | 
108 | enum options_type {
109 |    all_sync,
110 |    active_sync
111 | };
112 | 
113 | struct options_t {
114 |     char rank0;
115 |     char rank1;
116 |     enum accel_type accel;
117 |     int loop;
118 |     int loop_large;
119 |     int skip;
120 |     int skip_large;
121 | };
122 | 
123 | extern struct options_t options;
124 | 
125 | /*variables*/
126 | extern char const *win_info[20];
127 | extern char const *sync_info[20];
128 | 
129 | #ifdef _ENABLE_CUDA_
130 | extern CUcontext cuContext;
131 | #endif
132 | 
133 | extern MPI_Aint disp_remote;
134 | extern MPI_Aint disp_local;
135 | 
136 | /*function declarations*/
137 | void usage (int, char const *);
138 | int  process_options (int, char **, WINDOW*, SYNC*, int);
139 | void allocate_memory(int, char *, char *, char **, char **,
140 |             char **win_base, int, WINDOW, MPI_Win *);
141 | void free_memory (void *, void *, MPI_Win, int);
142 | void allocate_atomic_memory(int, char *, char *, char *, 
143 |             char *, char **, char **, char **, char **,
144 |             char **win_base, int, WINDOW, MPI_Win *);
145 | void free_atomic_memory (void *, void *, void *, void *, MPI_Win, int);
146 | int init_accel ();
147 | int cleanup_accel ();
148 | 


--------------------------------------------------------------------------------
/mpi/pt2pt/Makefile.am:
--------------------------------------------------------------------------------
 1 | pt2ptdir = $(pkglibexecdir)/mpi/pt2pt
 2 | pt2pt_PROGRAMS = osu_bibw osu_bw osu_latency osu_mbw_mr osu_multi_lat
 3 | 
 4 | osu_bw_SOURCES = osu_bw.c osu_pt2pt.c osu_pt2pt.h
 5 | osu_bibw_SOURCES = osu_bibw.c osu_pt2pt.c osu_pt2pt.h
 6 | osu_latency_SOURCES = osu_latency.c osu_pt2pt.c osu_pt2pt.h
 7 | osu_multi_lat_SOURCES = osu_multi_lat.c osu_pt2pt.c osu_pt2pt.h
 8 | osu_latency_mt_SOURCES = osu_latency_mt.c osu_pt2pt.c osu_pt2pt.h
 9 | 
10 | if MPI2_LIBRARY
11 |     pt2pt_PROGRAMS += osu_latency_mt 
12 | endif
13 | 
14 | if EMBEDDED_BUILD
15 |     AM_LDFLAGS =
16 |     AM_CPPFLAGS = -I$(top_builddir)/../src/include \
17 | 		  -I${top_srcdir}/../src/include
18 | if BUILD_PROFILING_LIB
19 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la
20 | endif
21 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la
22 | endif
23 | 
24 | if OPENACC
25 |     AM_CFLAGS = -acc
26 | endif
27 | 
28 | 


--------------------------------------------------------------------------------
/mpi/pt2pt/osu_bibw.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Bi-Directional Bandwidth Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include <osu_pt2pt.h>
 12 | 
 13 | int main(int argc, char *argv[])
 14 | {
 15 |     int myid, numprocs, i, j;
 16 |     int size;
 17 |     char *s_buf, *r_buf;
 18 |     double t_start = 0.0, t_end = 0.0, t = 0.0;
 19 |     int window_size = 64;
 20 |     int po_ret = process_options(argc, argv, BW);
 21 | 
 22 |     if (po_okay == po_ret && none != options.accel) {
 23 |         if (init_accel()) {
 24 |             fprintf(stderr, "Error initializing device\n");
 25 |             exit(EXIT_FAILURE);
 26 |         }
 27 |     }
 28 |     set_header(HEADER);
 29 | 
 30 |     MPI_Init(&argc, &argv);
 31 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 32 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
 33 | 
 34 |     if (0 == myid) {
 35 |         switch (po_ret) {
 36 |             case po_cuda_not_avail:
 37 |                 fprintf(stderr, "CUDA support not enabled.  Please recompile "
 38 |                         "benchmark with CUDA support.\n");
 39 |                 break;
 40 |             case po_openacc_not_avail:
 41 |                 fprintf(stderr, "OPENACC support not enabled.  Please "
 42 |                         "recompile benchmark with OPENACC support.\n");
 43 |                 break;
 44 |             case po_bad_usage:
 45 |             case po_help_message:
 46 |                 usage("osu_bibw");
 47 |                 break;
 48 |         }
 49 |     }
 50 | 
 51 |     switch (po_ret) {
 52 |         case po_cuda_not_avail:
 53 |         case po_openacc_not_avail:
 54 |         case po_bad_usage:
 55 |             MPI_Finalize();
 56 |             exit(EXIT_FAILURE);
 57 |         case po_help_message:
 58 |             MPI_Finalize();
 59 |             exit(EXIT_SUCCESS);
 60 |         case po_okay:
 61 |             break;
 62 |     }
 63 | 
 64 |     if(numprocs != 2) {
 65 |         if(myid == 0) {
 66 |             fprintf(stderr, "This test requires exactly two processes\n");
 67 |         }
 68 | 
 69 |         MPI_Finalize();
 70 |         exit(EXIT_FAILURE);
 71 |     }
 72 | 
 73 |     if (allocate_memory(&s_buf, &r_buf, myid)) {
 74 |         /* Error allocating memory */
 75 |         MPI_Finalize();
 76 |         exit(EXIT_FAILURE);
 77 |     }
 78 | 
 79 |     print_header(myid, BW);
 80 | 
 81 |     /* Bi-Directional Bandwidth test */
 82 |     for(size = 1; size <= MAX_MSG_SIZE; size *= 2) {
 83 |         /* touch the data */
 84 |         touch_data(s_buf, r_buf, myid, size);
 85 | 
 86 |         if(size > LARGE_MESSAGE_SIZE) {
 87 |             options.loop = options.loop_large;
 88 |             options.skip = options.skip_large;
 89 |             window_size = WINDOW_SIZE_LARGE;
 90 |         }
 91 | 
 92 |         if(myid == 0) {
 93 |             for(i = 0; i < options.loop + options.skip; i++) {
 94 |                 if(i == options.skip) {
 95 |                     t_start = MPI_Wtime();
 96 |                 }
 97 | 
 98 |                 for(j = 0; j < window_size; j++) {
 99 |                     MPI_Irecv(r_buf, size, MPI_CHAR, 1, 10, MPI_COMM_WORLD,
100 |                             recv_request + j);
101 |                 }
102 | 
103 |                 for(j = 0; j < window_size; j++) {
104 |                     MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD,
105 |                             send_request + j);
106 |                 }
107 | 
108 |                 MPI_Waitall(window_size, send_request, reqstat);
109 |                 MPI_Waitall(window_size, recv_request, reqstat);
110 |             }
111 | 
112 |             t_end = MPI_Wtime();
113 |             t = t_end - t_start;
114 | 
115 |         }
116 | 
117 |         else if(myid == 1) {
118 |             for(i = 0; i < options.loop + options.skip; i++) {
119 |                 for(j = 0; j < window_size; j++) {
120 |                     MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD,
121 |                             recv_request + j);
122 |                 }
123 | 
124 |                 for (j = 0; j < window_size; j++) {
125 |                     MPI_Isend(s_buf, size, MPI_CHAR, 0, 10, MPI_COMM_WORLD,
126 |                             send_request + j);
127 |                 }
128 | 
129 |                 MPI_Waitall(window_size, send_request, reqstat);
130 |                 MPI_Waitall(window_size, recv_request, reqstat);
131 |             }
132 |         }
133 | 
134 |         if(myid == 0) {
135 |             double tmp = size / 1e6 * options.loop * window_size * 2;
136 | 
137 |             fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
138 |                     FLOAT_PRECISION, tmp / t);
139 |             fflush(stdout);
140 |         }
141 |     }
142 | 
143 |     free_memory(s_buf, r_buf, myid);
144 |     MPI_Finalize();
145 | 
146 |     if (none != options.accel) {
147 |         if (cleanup_accel()) {
148 |             fprintf(stderr, "Error cleaning up device\n");
149 |             exit(EXIT_FAILURE);
150 |         }
151 |     }
152 | 
153 |     return EXIT_SUCCESS;
154 | }
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/mpi/pt2pt/osu_bw.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Bandwidth Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <osu_pt2pt.h>
 13 | 
 14 | int
 15 | main (int argc, char *argv[])
 16 | {
 17 |     int myid, numprocs, i, j;
 18 |     int size;
 19 |     char *s_buf, *r_buf;
 20 |     double t_start = 0.0, t_end = 0.0, t = 0.0;
 21 |     int window_size = 64;
 22 |     int po_ret = process_options(argc, argv, BW);
 23 | 
 24 |     if (po_okay == po_ret && none != options.accel) {
 25 |         if (init_accel()) {
 26 |             fprintf(stderr, "Error initializing device\n");
 27 |             exit(EXIT_FAILURE);
 28 |         }
 29 |     }
 30 |     
 31 |     set_header(HEADER);
 32 |     
 33 |     MPI_Init(&argc, &argv);
 34 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 35 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
 36 | 
 37 |     if (0 == myid) {
 38 |         switch (po_ret) {
 39 |             case po_cuda_not_avail:
 40 |                 fprintf(stderr, "CUDA support not enabled.  Please recompile "
 41 |                         "benchmark with CUDA support.\n");
 42 |                 break;
 43 |             case po_openacc_not_avail:
 44 |                 fprintf(stderr, "OPENACC support not enabled.  Please "
 45 |                         "recompile benchmark with OPENACC support.\n");
 46 |                 break;
 47 |             case po_bad_usage:
 48 |             case po_help_message:
 49 |                 usage("osu_bw");
 50 |                 break;
 51 |         }
 52 |     }
 53 | 
 54 |     switch (po_ret) {
 55 |         case po_cuda_not_avail:
 56 |         case po_openacc_not_avail:
 57 |         case po_bad_usage:
 58 |             MPI_Finalize();
 59 |             exit(EXIT_FAILURE);
 60 |         case po_help_message:
 61 |             MPI_Finalize();
 62 |             exit(EXIT_SUCCESS);
 63 |         case po_okay:
 64 |             break;
 65 |     }
 66 | 
 67 |     if(numprocs != 2) {
 68 |         if(myid == 0) {
 69 |             fprintf(stderr, "This test requires exactly two processes\n");
 70 |         }
 71 | 
 72 |         MPI_Finalize();
 73 |         exit(EXIT_FAILURE);
 74 |     }
 75 | 
 76 |     if (allocate_memory(&s_buf, &r_buf, myid)) {
 77 |         /* Error allocating memory */
 78 |         MPI_Finalize();
 79 |         exit(EXIT_FAILURE);
 80 |     }
 81 | 
 82 |     print_header(myid, BW);
 83 | 
 84 |     /* Bandwidth test */
 85 |     for(size = 1; size <= MAX_MSG_SIZE; size *= 2) {
 86 |         touch_data(s_buf, r_buf, myid, size);
 87 | 
 88 |         if(size > LARGE_MESSAGE_SIZE) {
 89 |             options.loop = options.loop_large;
 90 |             options.skip = options.skip_large;
 91 |             window_size = WINDOW_SIZE_LARGE;
 92 |         }
 93 | 
 94 |         if(myid == 0) {
 95 |             for(i = 0; i < options.loop + options.skip; i++) {
 96 |                 if(i == options.skip) {
 97 |                     t_start = MPI_Wtime();
 98 |                 }
 99 | 
100 |                 for(j = 0; j < window_size; j++) {
101 |                     MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD,
102 |                             request + j);
103 |                 }
104 | 
105 |                 MPI_Waitall(window_size, request, reqstat);
106 |                 MPI_Recv(r_buf, 4, MPI_CHAR, 1, 101, MPI_COMM_WORLD,
107 |                         &reqstat[0]);
108 |             }
109 | 
110 |             t_end = MPI_Wtime();
111 |             t = t_end - t_start;
112 |         }
113 | 
114 |         else if(myid == 1) {
115 |             for(i = 0; i < options.loop + options.skip; i++) {
116 |                 for(j = 0; j < window_size; j++) {
117 |                     MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD,
118 |                             request + j);
119 |                 }
120 | 
121 |                 MPI_Waitall(window_size, request, reqstat);
122 |                 MPI_Send(s_buf, 4, MPI_CHAR, 0, 101, MPI_COMM_WORLD);
123 |             }
124 |         }
125 | 
126 |         if(myid == 0) {
127 |             double tmp = size / 1e6 * options.loop * window_size;
128 | 
129 |             fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
130 |                     FLOAT_PRECISION, tmp / t);
131 |             fflush(stdout);
132 |         }
133 |     }
134 | 
135 |     free_memory(s_buf, r_buf, myid);
136 |     MPI_Finalize();
137 | 
138 |     if (none != options.accel) {
139 |         if (cleanup_accel()) {
140 |             fprintf(stderr, "Error cleaning up device\n");
141 |             exit(EXIT_FAILURE);
142 |         }
143 |     }
144 | 
145 |     return EXIT_SUCCESS;
146 | }
147 | 


--------------------------------------------------------------------------------
/mpi/pt2pt/osu_latency.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI%s Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | #include <osu_pt2pt.h>
 12 | 
 13 | int
 14 | main (int argc, char *argv[])
 15 | {
 16 |     int myid, numprocs, i;
 17 |     int size;
 18 |     MPI_Status reqstat;
 19 |     char *s_buf, *r_buf;
 20 |     double t_start = 0.0, t_end = 0.0;
 21 |     int po_ret = process_options(argc, argv, LAT);
 22 | 
 23 |     if (po_okay == po_ret && none != options.accel) {
 24 |         if (init_accel()) {
 25 |            fprintf(stderr, "Error initializing device\n");
 26 |             exit(EXIT_FAILURE);
 27 |         }
 28 |     }
 29 | 
 30 |     set_header(HEADER);
 31 | 
 32 |     MPI_Init(&argc, &argv);
 33 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 34 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
 35 | 
 36 |     if (0 == myid) {
 37 |         switch (po_ret) {
 38 |             case po_cuda_not_avail:
 39 |                 fprintf(stderr, "CUDA support not enabled.  Please recompile "
 40 |                         "benchmark with CUDA support.\n");
 41 |                 break;
 42 |             case po_openacc_not_avail:
 43 |                 fprintf(stderr, "OPENACC support not enabled.  Please "
 44 |                         "recompile benchmark with OPENACC support.\n");
 45 |                 break;
 46 |             case po_bad_usage:
 47 |             case po_help_message:
 48 |                 usage("osu_latency");
 49 |                 break;
 50 |         }
 51 |     }
 52 | 
 53 |     switch (po_ret) {
 54 |         case po_cuda_not_avail:
 55 |         case po_openacc_not_avail:
 56 |         case po_bad_usage:
 57 |             MPI_Finalize();
 58 |             exit(EXIT_FAILURE);
 59 |         case po_help_message:
 60 |             MPI_Finalize();
 61 |             exit(EXIT_SUCCESS);
 62 |         case po_okay:
 63 |             break;
 64 |     }
 65 | 
 66 |     if(numprocs != 2) {
 67 |         if(myid == 0) {
 68 |             fprintf(stderr, "This test requires exactly two processes\n");
 69 |         }
 70 | 
 71 |         MPI_Finalize();
 72 |         exit(EXIT_FAILURE);
 73 |     }
 74 | 
 75 |     if (allocate_memory(&s_buf, &r_buf, myid)) {
 76 |         /* Error allocating memory */
 77 |         MPI_Finalize();
 78 |         exit(EXIT_FAILURE);
 79 |     }
 80 | 
 81 |     print_header(myid, LAT);
 82 | 
 83 |     
 84 |     /* Latency test */
 85 |     for(size = 0; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) {
 86 |         touch_data(s_buf, r_buf, myid, size);
 87 | 
 88 |         if(size > LARGE_MESSAGE_SIZE) {
 89 |             options.loop = options.loop_large;
 90 |             options.skip = options.skip_large;
 91 |         }
 92 | 
 93 |         MPI_Barrier(MPI_COMM_WORLD);
 94 | 
 95 |         if(myid == 0) {
 96 |             for(i = 0; i < options.loop + options.skip; i++) {
 97 |                 if(i == options.skip) t_start = MPI_Wtime();
 98 | 
 99 |                 MPI_Send(s_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD);
100 |                 MPI_Recv(r_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD, &reqstat);
101 |             }
102 | 
103 |             t_end = MPI_Wtime();
104 |         }
105 | 
106 |         else if(myid == 1) {
107 |             for(i = 0; i < options.loop + options.skip; i++) {
108 |                 MPI_Recv(r_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD, &reqstat);
109 |                 MPI_Send(s_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD);
110 |             }
111 |         }
112 | 
113 |         if(myid == 0) {
114 |             double latency = (t_end - t_start) * 1e6 / (2.0 * options.loop);
115 | 
116 |             fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
117 |                     FLOAT_PRECISION, latency);
118 |             fflush(stdout);
119 |         }
120 |     }
121 | 
122 |     free_memory(s_buf, r_buf, myid);
123 |     MPI_Finalize();
124 | 
125 |     if (none != options.accel) {
126 |         if (cleanup_accel()) {
127 |             fprintf(stderr, "Error cleaning up device\n");
128 |             exit(EXIT_FAILURE);
129 |         }
130 |     }
131 | 
132 |     return EXIT_SUCCESS;
133 | }
134 | 
135 | 


--------------------------------------------------------------------------------
/mpi/pt2pt/osu_multi_lat.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU MPI Multi Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <osu_pt2pt.h>
 13 | 
 14 | #define MAX_MSG_SIZE (1<<22)
 15 | #define MAX_STEPS    (22+1)
 16 | 
 17 | char *s_buf, *r_buf;
 18 | 
 19 | static void multi_latency(int rank, int pairs);
 20 | 
 21 | int main(int argc, char* argv[])
 22 | {
 23 |     unsigned long align_size = sysconf(_SC_PAGESIZE);
 24 |     int rank, nprocs; 
 25 |     int pairs;
 26 | 
 27 |     int po_ret = process_options(argc, argv, LAT);
 28 | 
 29 |     if (po_okay == po_ret && none != options.accel) {
 30 |         if (init_accel()) {
 31 |             fprintf(stderr, "Error initializing device\n");
 32 |             exit(EXIT_FAILURE);
 33 |         }
 34 |     }
 35 |     set_header(HEADER);
 36 |     MPI_Init(&argc, &argv);
 37 | 
 38 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 39 |     MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
 40 | 
 41 |     pairs = nprocs/2;
 42 | 
 43 |     if (0 == rank) {
 44 |         switch (po_ret) {
 45 |             case po_cuda_not_avail:
 46 |                 fprintf(stderr, "CUDA support not enabled.  Please recompile "
 47 |                         "benchmark with CUDA support.\n");
 48 |                 break;
 49 |             case po_openacc_not_avail:
 50 |                 fprintf(stderr, "OPENACC support not enabled.  Please "
 51 |                         "recompile benchmark with OPENACC support.\n");
 52 |                 break;
 53 |             case po_bad_usage:
 54 |             case po_help_message:
 55 |                 usage("osu_multi_lat");
 56 |                 break;
 57 |         }
 58 |     }
 59 | 
 60 |     switch (po_ret) {
 61 |         case po_cuda_not_avail:
 62 |         case po_openacc_not_avail:
 63 |         case po_bad_usage:
 64 |             MPI_Finalize();
 65 |             exit(EXIT_FAILURE);
 66 |         case po_help_message:
 67 |             MPI_Finalize();
 68 |             exit(EXIT_SUCCESS);
 69 |         case po_okay:
 70 |             break;
 71 |     }
 72 | 
 73 |     if (posix_memalign((void**)&s_buf, align_size, MAX_MSG_SIZE)) {
 74 |         fprintf(stderr, "Error allocating host memory\n");
 75 |         return EXIT_FAILURE;
 76 |     }
 77 | 
 78 |     if (posix_memalign((void**)&r_buf, align_size, MAX_MSG_SIZE)) {
 79 |         fprintf(stderr, "Error allocating host memory\n");
 80 |         return EXIT_FAILURE;
 81 |     }
 82 | 
 83 |     memset(s_buf, 0, MAX_MSG_SIZE);
 84 |     memset(r_buf, 0, MAX_MSG_SIZE);
 85 | 
 86 |     if(rank == 0) {
 87 |         fprintf(stdout, HEADER);
 88 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
 89 |         fflush(stdout);
 90 |     }
 91 | 
 92 |     MPI_Barrier(MPI_COMM_WORLD);
 93 | 
 94 |     multi_latency(rank, pairs);
 95 |     
 96 |     MPI_Barrier(MPI_COMM_WORLD);
 97 | 
 98 |     MPI_Finalize();
 99 | 
100 |     free(r_buf);
101 |     free(s_buf);
102 | 
103 |     return EXIT_SUCCESS;
104 | }
105 | 
106 | static void multi_latency(int rank, int pairs)
107 | {
108 |     int size, partner;
109 |     int i;
110 |     double t_start = 0.0, t_end = 0.0,
111 |            latency = 0.0, total_lat = 0.0,
112 |            avg_lat = 0.0;
113 | 
114 |     MPI_Status reqstat;
115 | 
116 | 
117 |     for(size = 0; size <= MAX_MSG_SIZE; size  = (size ? size * 2 : 1)) {
118 | 
119 |         MPI_Barrier(MPI_COMM_WORLD);
120 | 
121 |         if(size > LARGE_MESSAGE_SIZE) {
122 |             options.loop = options.loop_large;
123 |             options.skip = options.skip_large;
124 |         } else {
125 |             options.loop = options.loop; 
126 |             options.skip = options.skip;
127 |         }
128 | 
129 |         if (rank < pairs) {
130 |             partner = rank + pairs;
131 | 
132 |             for (i = 0; i < options.loop + options.skip; i++) {
133 | 
134 |                 if (i == options.skip) {
135 |                     t_start = MPI_Wtime();
136 |                     MPI_Barrier(MPI_COMM_WORLD);
137 |                 }
138 | 
139 |                 MPI_Send(s_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD);
140 |                 MPI_Recv(r_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD,
141 |                          &reqstat);
142 |             }
143 | 
144 |             t_end = MPI_Wtime();
145 | 
146 |         } else {
147 |             partner = rank - pairs;
148 | 
149 |             for (i = 0; i < options.loop + options.skip; i++) {
150 | 
151 |                 if (i == options.skip) {
152 |                     t_start = MPI_Wtime();
153 |                     MPI_Barrier(MPI_COMM_WORLD);
154 |                 }
155 | 
156 |                 MPI_Recv(r_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD,
157 |                          &reqstat);
158 |                 MPI_Send(s_buf, size, MPI_CHAR, partner, 1, MPI_COMM_WORLD);
159 |             }
160 | 
161 |             t_end = MPI_Wtime();
162 |         }
163 | 
164 |         latency = (t_end - t_start) * 1.0e6 / (2.0 * options.loop);
165 | 
166 |         MPI_Reduce(&latency, &total_lat, 1, MPI_DOUBLE, MPI_SUM, 0, 
167 |                    MPI_COMM_WORLD);
168 | 
169 |         avg_lat = total_lat/(double) (pairs * 2);
170 | 
171 |         if(0 == rank) {
172 |             fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
173 |                     FLOAT_PRECISION, avg_lat);
174 |             fflush(stdout);
175 |         }
176 |     }
177 | }
178 | 
179 | /* vi: set sw=4 sts=4 tw=80: */
180 | 


--------------------------------------------------------------------------------
/mpi/pt2pt/osu_pt2pt.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  3 |  * (NBCL), The Ohio State University.
  4 |  *  
  5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  6 |  *
  7 |  * For detailed copyright and licensing information, please refer to the
  8 |  * copyright file COPYRIGHT in the top level OMB directory.         
  9 |  */
 10 | #ifndef OSU_PT2PT_H
 11 | #define OSU_PT2PT_H 1
 12 | 
 13 | #include <mpi.h>
 14 | #include <unistd.h>
 15 | #include <stdlib.h>
 16 | #include <stdio.h>
 17 | #include <math.h>
 18 | #include <string.h>
 19 | #include <stdint.h>
 20 | 
 21 | #ifdef _ENABLE_CUDA_
 22 | #include "cuda.h"
 23 | #include "cuda_runtime.h"
 24 | #endif
 25 | 
 26 | #ifdef _ENABLE_OPENACC_
 27 | #include <openacc.h>
 28 | #endif
 29 | 
 30 | #ifdef PACKAGE_VERSION
 31 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
 32 | #else
 33 | #   define HEADER "# " BENCHMARK "\n"
 34 | #endif
 35 | 
 36 | #ifndef FIELD_WIDTH
 37 | #   define FIELD_WIDTH 20
 38 | #endif
 39 | 
 40 | #ifndef FLOAT_PRECISION
 41 | #   define FLOAT_PRECISION 2
 42 | #endif
 43 | 
 44 | #define MAX_REQ_NUM 1000
 45 | 
 46 | #define MAX_MSG_SIZE (1<<22)
 47 | #define MYBUFSIZE (MAX_MSG_SIZE)
 48 | 
 49 | #define WINDOW_SIZE_LARGE  64
 50 | #define LARGE_MESSAGE_SIZE  8192
 51 | 
 52 | #ifdef _ENABLE_OPENACC_
 53 | #   define OPENACC_ENABLED 1
 54 | #else
 55 | #   define OPENACC_ENABLED 0
 56 | #endif
 57 | 
 58 | #ifdef _ENABLE_CUDA_
 59 | #   define CUDA_ENABLED 1
 60 | #else
 61 | #   define CUDA_ENABLED 0
 62 | #endif
 63 | 
 64 | extern MPI_Request request[MAX_REQ_NUM];
 65 | extern MPI_Status  reqstat[MAX_REQ_NUM];
 66 | extern MPI_Request send_request[MAX_REQ_NUM];
 67 | extern MPI_Request recv_request[MAX_REQ_NUM];
 68 | 
 69 | #ifdef _ENABLE_CUDA_
 70 | extern CUcontext cuContext;
 71 | #endif
 72 | 
 73 | #define BW 0
 74 | #define LAT 1
 75 | 
 76 | #define BW_LOOP_SMALL   100
 77 | #define BW_SKIP_SMALL   10
 78 | #define BW_LOOP_LARGE   20
 79 | #define BW_SKIP_LARGE   2
 80 | 
 81 | #define LAT_LOOP_SMALL  10000
 82 | #define LAT_SKIP_SMALL  100
 83 | #define LAT_LOOP_LARGE  1000
 84 | #define LAT_SKIP_LARGE  10
 85 | 
 86 | enum po_ret_type {
 87 |     po_cuda_not_avail,
 88 |     po_openacc_not_avail,
 89 |     po_bad_usage,
 90 |     po_help_message,
 91 |     po_okay,
 92 | };
 93 | 
 94 | enum accel_type {
 95 |     none,
 96 |     cuda,
 97 |     openacc
 98 | };
 99 | 
100 | struct options_t {
101 |     char src;
102 |     char dst;
103 |     enum accel_type accel;
104 |     int loop;
105 |     int loop_large;
106 |     int skip;
107 |     int skip_large;
108 |     char managedSend;
109 |     char managedRecv;
110 | };
111 | 
112 | extern struct options_t options;
113 | 
114 | void usage (char const *);
115 | int process_options (int argc, char *argv[], int type);
116 | int allocate_memory (char **sbuf, char **rbuf, int rank);
117 | void print_header (int rank, int type);
118 | void touch_data (void *sbuf, void *rbuf, int rank, size_t size);
119 | void free_memory (void *sbuf, void *rbuf, int rank);
120 | int init_accel (void);
121 | int cleanup_accel (void);
122 | 
123 | void set_header (const char * header);
124 | 
125 | #endif
126 | 


--------------------------------------------------------------------------------
/mpi/startup/Makefile.am:
--------------------------------------------------------------------------------
 1 | startupdir = $(pkglibexecdir)/mpi/startup
 2 | startup_PROGRAMS = osu_init osu_hello
 3 | 
 4 | if EMBEDDED_BUILD
 5 |     AM_LDFLAGS =
 6 |     AM_CPPFLAGS = -I$(top_builddir)/../src/include \
 7 | 		  -I${top_srcdir}/../src/include
 8 | if BUILD_PROFILING_LIB
 9 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@PMPILIBNAME@.la
10 | endif
11 |     AM_LDFLAGS += $(top_builddir)/../lib/lib@MPILIBNAME@.la
12 | endif
13 | 
14 | 


--------------------------------------------------------------------------------
/mpi/startup/osu_hello.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University.
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | #include <mpi.h>
11 | 
12 | int main(int argc, char **argv) {
13 |     MPI_Init(&argc, &argv);
14 |     MPI_Finalize();
15 |     return 0;
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/mpi/startup/osu_init.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University.
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | #include <mpi.h>
11 | #include <stdlib.h>
12 | #include <stdio.h>
13 | #include <time.h>
14 | 
15 | int
16 | main (int argc, char *argv[])
17 | {
18 |     int myid, numprocs;
19 |     struct timespec tp_before, tp_after;
20 |     long duration = 0, min, max, avg;
21 | 
22 |     clock_gettime(CLOCK_REALTIME, &tp_before);
23 |     MPI_Init(&argc, &argv);
24 |     clock_gettime(CLOCK_REALTIME, &tp_after);
25 | 
26 |     duration = (tp_after.tv_sec - tp_before.tv_sec) * 1e3;
27 |     duration += (tp_after.tv_nsec - tp_before.tv_nsec) / 1e6;
28 | 
29 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
30 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
31 | 
32 |     MPI_Reduce(&duration, &min, 1, MPI_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
33 |     MPI_Reduce(&duration, &max, 1, MPI_LONG, MPI_MAX, 0, MPI_COMM_WORLD);
34 |     MPI_Reduce(&duration, &avg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
35 |     avg = avg/numprocs;
36 | 
37 |     if(myid == 0) {
38 |         printf("nprocs: %d, min: %ld, max: %ld, avg: %ld\n", numprocs, min, max, avg);
39 |     }
40 | 
41 |     MPI_Finalize();
42 | 
43 |     return EXIT_SUCCESS;
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/openshmem/Makefile.am:
--------------------------------------------------------------------------------
 1 | openshmemdir = $(pkglibexecdir)/openshmem
 2 | openshmem_PROGRAMS = osu_oshm_get osu_oshm_put osu_oshm_put_mr osu_oshm_atomics osu_oshm_barrier osu_oshm_broadcast osu_oshm_fcollect osu_oshm_collect osu_oshm_reduce
 3 | 
 4 | osu_oshm_get_SOURCES = osu_oshm_get.c osu_common.c osu_common.h
 5 | osu_oshm_put_SOURCES = osu_oshm_put.c osu_common.c osu_common.h
 6 | osu_oshm_put_mr_SOURCES = osu_oshm_put_mr.c osu_common.c osu_common.h
 7 | osu_oshm_atomics_SOURCES = osu_oshm_atomics.c osu_common.c osu_common.h
 8 | osu_oshm_barrier_SOURCES = osu_oshm_barrier.c osu_common.c osu_common.h osu_coll.h
 9 | osu_oshm_broadcast_SOURCES = osu_oshm_broadcast.c osu_common.c osu_common.h osu_coll.h
10 | osu_oshm_collect_SOURCES = osu_oshm_collect.c osu_common.c osu_common.h osu_coll.h
11 | osu_oshm_fcollect_SOURCES = osu_oshm_fcollect.c osu_common.c osu_common.h osu_coll.h
12 | osu_oshm_reduce_SOURCES = osu_oshm_reduce.c osu_common.c osu_common.h osu_coll.h
13 | 
14 | 


--------------------------------------------------------------------------------
/openshmem/osu_common.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University. 
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | 
11 | #include <unistd.h>
12 | #include <stdlib.h>
13 | #include <stdio.h>
14 | #include <time.h>
15 | 
16 | int64_t getMicrosecondTimeStamp() 
17 | {
18 |     int64_t retval;
19 |     struct timeval tv; 
20 |     if (gettimeofday(&tv, NULL)) {
21 |         perror("gettimeofday");
22 |         abort();
23 |     }   
24 |     retval = ((int64_t)tv.tv_sec) * 1000000 + tv.tv_usec;
25 |     return retval;
26 | }
27 | 


--------------------------------------------------------------------------------
/openshmem/osu_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University. 
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | #ifndef _OSU_COMMON_H_
11 | #define _OSU_COMMON_H_
12 | 
13 | #define TIME() getMicrosecondTimeStamp()
14 | int64_t getMicrosecondTimeStamp();
15 | 
16 | #endif /* _OSU_COMMON_H */
17 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_barrier.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU OpenSHMEM Barrier Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <shmem.h>
16 | #include <stdlib.h>
17 | #include "osu_common.h"
18 | #include "osu_coll.h"
19 | 
20 | long pSyncBarrier1[_SHMEM_BARRIER_SYNC_SIZE];
21 | long pSyncBarrier2[_SHMEM_BARRIER_SYNC_SIZE];
22 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE];
23 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE];
24 | 
25 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
26 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
27 | 
28 | int main(int argc, char *argv[])
29 | {
30 |     int i = 0, rank;
31 |     int skip, numprocs;
32 |     static double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
33 |     static double latency = 0.0;
34 |     int64_t t_start = 0, t_stop = 0, timer=0;
35 |     int full = 0, t;
36 | 
37 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE;
38 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE;
39 |     for ( t = 0; t < _SHMEM_BARRIER_SYNC_SIZE; t += 1) pSyncBarrier1[t] = _SHMEM_SYNC_VALUE;
40 |     for ( t = 0; t < _SHMEM_BARRIER_SYNC_SIZE; t += 1) pSyncBarrier2[t] = _SHMEM_SYNC_VALUE;
41 | 
42 |     start_pes(0);
43 |     rank = _my_pe();
44 |     numprocs = _num_pes();
45 | 
46 |     if (process_args(argc, argv, rank, NULL, &full)) {
47 |         return EXIT_SUCCESS;
48 |     }
49 | 
50 |     if(numprocs < 2) {
51 |         if(rank == 0) {
52 |             fprintf(stderr, "This test requires at least two processes\n");
53 |         }
54 |         return EXIT_FAILURE;
55 |     }
56 | 
57 |     print_header(rank, full);
58 | 
59 |     skip = SKIP_LARGE;
60 |     iterations = iterations_large;
61 |     timer=0;        
62 | 
63 |     for(i=0; i < iterations + skip ; i++) {
64 |         t_start = TIME();
65 |         if(i%2)
66 |             shmem_barrier(0, 0, numprocs, pSyncBarrier1);
67 |         else
68 |             shmem_barrier(0, 0, numprocs, pSyncBarrier2);
69 |         t_stop = TIME();
70 | 
71 |         if(i>=skip){
72 |             timer+=t_stop-t_start;
73 |         } 
74 |     }
75 |             
76 |     shmem_barrier_all();
77 | 
78 |     latency = (1.0 * timer) / iterations;
79 |     shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
80 |     shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2);
81 |     shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
82 | 
83 |     avg_time = avg_time/numprocs;
84 |     print_data(rank, full, 0, avg_time, min_time, max_time, iterations);
85 | 
86 |     return EXIT_SUCCESS;
87 | }
88 | 
89 | /* vi: set sw=4 sts=4 tw=80: */
90 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_broadcast.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU OpenSHMEM Broadcast Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <sys/time.h>
 14 | #include <stdint.h>
 15 | #include <shmem.h>
 16 | #include "osu_common.h"
 17 | #include "osu_coll.h"
 18 | #include <stdlib.h>
 19 | 
 20 | long pSyncBcast1[_SHMEM_BCAST_SYNC_SIZE];
 21 | long pSyncBcast2[_SHMEM_BCAST_SYNC_SIZE];
 22 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE];
 23 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE];
 24 | 
 25 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 26 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 27 | 
 28 | int main(int argc, char *argv[])
 29 | {
 30 |     int i = 0, rank, size;
 31 |     int skip, numprocs;
 32 |     static double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 33 |     static double latency = 0.0;
 34 |     int64_t t_start = 0, t_stop = 0, timer=0;
 35 |     char *buffer=NULL;
 36 |     int max_msg_size = 1048576, full = 0;
 37 |     int t;
 38 | 
 39 |     for ( t = 0; t < _SHMEM_BCAST_SYNC_SIZE; t += 1) pSyncBcast1[t] = _SHMEM_SYNC_VALUE;
 40 |     for ( t = 0; t < _SHMEM_BCAST_SYNC_SIZE; t += 1) pSyncBcast2[t] = _SHMEM_SYNC_VALUE;
 41 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE;
 42 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE;
 43 | 
 44 |     start_pes(0);
 45 |     rank = _my_pe();
 46 |     numprocs = _num_pes();
 47 | 
 48 |     if (process_args(argc, argv, rank, &max_msg_size, &full)) {
 49 |         return 0;
 50 |     }
 51 |     
 52 |     if(numprocs < 2) {
 53 |         if(rank == 0) {
 54 |             fprintf(stderr, "This test requires at least two processes\n");
 55 |         }
 56 |         return -1;
 57 |     }
 58 |     print_header(rank, full);
 59 | 
 60 |     buffer = shmalloc(max_msg_size * sizeof(char));
 61 |     if(NULL == buffer) {
 62 |         fprintf(stderr, "malloc failed.\n");
 63 |         exit(1);
 64 |     }
 65 |     
 66 |     memset(buffer,1, max_msg_size);
 67 | 
 68 |     for(size=1; size <=max_msg_size/sizeof(uint32_t); size *= 2) {
 69 |         if(size > LARGE_MESSAGE_SIZE) {
 70 |             skip = SKIP_LARGE;
 71 |             iterations = iterations_large;
 72 |         }
 73 |         else {
 74 |             skip = SKIP;
 75 |         }
 76 | 
 77 |         timer=0;        
 78 |         for(i=0; i < iterations + skip ; i++) {
 79 |             t_start = TIME();
 80 |             if(i%2)
 81 |                 shmem_broadcast32(buffer, buffer, size, 0, 0, 0, numprocs, pSyncBcast1);
 82 |             else
 83 |                 shmem_broadcast32(buffer, buffer, size, 0, 0, 0, numprocs, pSyncBcast2);
 84 |             t_stop = TIME();
 85 | 
 86 |             if(i>=skip){
 87 |                 timer+=t_stop-t_start;
 88 |             } 
 89 |             shmem_barrier_all();
 90 |         }
 91 |         shmem_barrier_all();            
 92 |         latency = (1.0 * timer) / iterations;
 93 | 
 94 |         shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
 95 |         shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2);
 96 |         shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
 97 |         avg_time = avg_time/numprocs;
 98 | 
 99 |         print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations);
100 |     }
101 | 
102 |     shfree(buffer);  
103 |     return EXIT_SUCCESS;
104 | }
105 | 
106 | /* vi: set sw=4 sts=4 tw=80: */
107 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_collect.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU OpenSHMEM Collect Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  */
  8 | 
  9 | /*
 10 | This program is available under BSD licensing.
 11 | 
 12 | Redistribution and use in source and binary forms, with or without
 13 | modification, are permitted provided that the following conditions are
 14 | met:
 15 | 
 16 | (1) Redistributions of source code must retain the above copyright
 17 | notice, this list of conditions and the following disclaimer.
 18 | 
 19 | (2) Redistributions in binary form must reproduce the above copyright
 20 | notice, this list of conditions and the following disclaimer in the
 21 | documentation and/or other materials provided with the distribution.
 22 | 
 23 | (3) Neither the name of The Ohio State University nor the names of
 24 | their contributors may be used to endorse or promote products derived
 25 | from this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38 | 
 39 | */
 40 | 
 41 | #include <stdio.h>
 42 | #include <sys/time.h>
 43 | #include <stdint.h>
 44 | #include <shmem.h>
 45 | #include "osu_common.h"
 46 | #include "osu_coll.h"
 47 | #include <stdlib.h>
 48 | 
 49 | long pSyncCollect1[_SHMEM_COLLECT_SYNC_SIZE];
 50 | long pSyncCollect2[_SHMEM_COLLECT_SYNC_SIZE];
 51 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE];
 52 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE];
 53 | 
 54 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 55 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 56 | 
 57 | int main(int argc, char *argv[])
 58 | {
 59 |     int i, numprocs, rank, size;
 60 |     unsigned long align_size = sysconf(_SC_PAGESIZE);
 61 |     int skip;
 62 |     static double latency = 0.0;
 63 |     int64_t t_start = 0, t_stop = 0, timer=0;
 64 |     static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 
 65 |     char *recvbuff, *sendbuff;
 66 |     int max_msg_size = 1048576, full = 0, t;
 67 |     uint64_t requested_mem_limit = 0;
 68 | 
 69 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE;
 70 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE;
 71 |     for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect1[t] = _SHMEM_SYNC_VALUE;
 72 |     for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect2[t] = _SHMEM_SYNC_VALUE;
 73 | 
 74 |     start_pes(0);
 75 |     rank = _my_pe();
 76 |     numprocs = _num_pes();
 77 | 
 78 |     if (process_args(argc, argv, rank, &max_msg_size, &full)) {
 79 |         return 0;
 80 |     }
 81 | 
 82 |     if(numprocs < 2) {
 83 |         if(rank == 0) {
 84 |             fprintf(stderr, "This test requires at least two processes\n");
 85 |         }
 86 |         return -1;
 87 |     }
 88 | 
 89 |     requested_mem_limit = (uint64_t) (max_msg_size) * numprocs; 
 90 |     if( requested_mem_limit > max_mem_limit) {
 91 |         max_msg_size = max_mem_limit/numprocs;
 92 |     } 
 93 | 
 94 |     print_header(rank, full);
 95 | 
 96 |     recvbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size
 97 |             * numprocs);
 98 |     if (NULL == recvbuff) {
 99 |         fprintf(stderr, "shmemalign failed.\n");
100 |         exit(1);
101 |     }
102 | 
103 |     sendbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size);
104 |     if (NULL == sendbuff) {
105 |         fprintf(stderr, "shmemalign failed.\n");
106 |         exit(1);
107 |     }
108 | 
109 |     memset(recvbuff, 1, max_msg_size*numprocs);
110 |     memset(sendbuff, 0, max_msg_size);
111 | 
112 |     for(size=1; size <= max_msg_size/sizeof(uint32_t); size *= 2) {
113 | 
114 |         if(size > LARGE_MESSAGE_SIZE) {
115 |             skip = SKIP_LARGE;
116 |             iterations = iterations_large;
117 |         } else {
118 |             skip = SKIP;
119 |         }
120 | 
121 |         shmem_barrier_all();
122 | 
123 |         timer=0;
124 |         for(i=0; i < iterations + skip ; i++) {
125 |             t_start = TIME();
126 |             if(i%2)
127 |                 shmem_collect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect1);
128 |             else
129 |                 shmem_collect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect2);
130 |             t_stop = TIME();
131 | 
132 |             if(i >= skip) {
133 |                 timer+= t_stop-t_start;
134 |             }
135 |             shmem_barrier_all();
136 |         }
137 | 
138 |         shmem_barrier_all();        
139 |         latency = (double)(timer * 1.0) / iterations;
140 | 
141 |         shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
142 |         shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2);
143 |         shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
144 |         avg_time = avg_time/numprocs;
145 | 
146 |         print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations);
147 |     }
148 | 
149 |     shmem_barrier_all();
150 |     shfree(recvbuff);
151 |     shfree(sendbuff);
152 | 
153 |     return EXIT_SUCCESS;
154 | }
155 | 
156 | /* vi: set sw=4 sts=4 tw=80: */
157 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_fcollect.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU OpenSHMEM FCollect Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  */
  8 | 
  9 | /*
 10 | This program is available under BSD licensing.
 11 | 
 12 | Redistribution and use in source and binary forms, with or without
 13 | modification, are permitted provided that the following conditions are
 14 | met:
 15 | 
 16 | (1) Redistributions of source code must retain the above copyright
 17 | notice, this list of conditions and the following disclaimer.
 18 | 
 19 | (2) Redistributions in binary form must reproduce the above copyright
 20 | notice, this list of conditions and the following disclaimer in the
 21 | documentation and/or other materials provided with the distribution.
 22 | 
 23 | (3) Neither the name of The Ohio State University nor the names of
 24 | their contributors may be used to endorse or promote products derived
 25 | from this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38 | 
 39 | */
 40 | 
 41 | #include <stdio.h>
 42 | #include <sys/time.h>
 43 | #include <stdint.h>
 44 | #include <shmem.h>
 45 | #include "osu_common.h"
 46 | #include "osu_coll.h"
 47 | #include <stdlib.h>
 48 | 
 49 | long pSyncCollect1[_SHMEM_COLLECT_SYNC_SIZE];
 50 | long pSyncCollect2[_SHMEM_COLLECT_SYNC_SIZE];
 51 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE];
 52 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE];
 53 | 
 54 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 55 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 56 | 
 57 | int main(int argc, char *argv[])
 58 | {
 59 |     int i, numprocs, rank, size;
 60 |     unsigned long align_size = sysconf(_SC_PAGESIZE);
 61 |     int skip;
 62 |     static double latency = 0.0;
 63 |     int64_t t_start = 0, t_stop = 0, timer=0;
 64 |     static double avg_time = 0.0, max_time = 0.0, min_time = 0.0; 
 65 |     char *recvbuff, *sendbuff;
 66 |     int max_msg_size = 1048576, full = 0, t;
 67 |     uint64_t requested_mem_limit = 0;
 68 | 
 69 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE;
 70 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE;
 71 |     for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect1[t] = _SHMEM_SYNC_VALUE;
 72 |     for ( t = 0; t < _SHMEM_COLLECT_SYNC_SIZE; t += 1) pSyncCollect2[t] = _SHMEM_SYNC_VALUE;
 73 | 
 74 |     start_pes(0);
 75 |     rank = _my_pe();
 76 |     numprocs = _num_pes();
 77 | 
 78 |     if (process_args(argc, argv, rank, &max_msg_size, &full)) {
 79 |         return 0;
 80 |     }
 81 | 
 82 |     if(numprocs < 2) {
 83 |         if(rank == 0) {
 84 |             fprintf(stderr, "This test requires at least two processes\n");
 85 |         }
 86 |         return -1;
 87 |     }
 88 | 
 89 |     requested_mem_limit = (uint64_t) (max_msg_size) * numprocs; 
 90 |     if( requested_mem_limit > max_mem_limit) {
 91 |         max_msg_size = max_mem_limit/numprocs;
 92 |     } 
 93 | 
 94 |     print_header(rank, full);
 95 | 
 96 |     recvbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size
 97 |             * numprocs);
 98 |     if (NULL == recvbuff) {
 99 |         fprintf(stderr, "shmemalign failed.\n");
100 |         exit(1);
101 |     }
102 | 
103 |     sendbuff = (char *)shmemalign(align_size, sizeof(char) * max_msg_size);
104 |     if (NULL == sendbuff) {
105 |         fprintf(stderr, "shmemalign failed.\n");
106 |         exit(1);
107 |     }
108 | 
109 |     memset(recvbuff, 1, max_msg_size*numprocs);
110 |     memset(sendbuff, 0, max_msg_size);
111 | 
112 |     for(size=1; size <= max_msg_size/sizeof(uint32_t); size *= 2) {
113 | 
114 |         if(size > LARGE_MESSAGE_SIZE) {
115 |             skip = SKIP_LARGE;
116 |             iterations = iterations_large;
117 |         } else {
118 |             skip = SKIP;
119 |         }
120 | 
121 |         shmem_barrier_all();
122 | 
123 |         timer=0;
124 |         for(i=0; i < iterations + skip ; i++) {
125 |             t_start = TIME();
126 |             if(i%2)
127 |                 shmem_fcollect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect1);
128 |             else
129 |                 shmem_fcollect32(recvbuff, sendbuff, size, 0, 0, numprocs, pSyncCollect2);
130 |             t_stop = TIME();
131 | 
132 |             if(i >= skip) {
133 |                 timer+= t_stop-t_start;
134 |             }
135 |             shmem_barrier_all();
136 |         }
137 | 
138 |         shmem_barrier_all();        
139 | 
140 |         latency = (double)(timer * 1.0) / iterations;
141 |         shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
142 |         shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2);
143 |         shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
144 |         avg_time = avg_time/numprocs;
145 | 
146 |         print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations);
147 |     }
148 | 
149 |     shmem_barrier_all();
150 |     shfree(recvbuff);
151 |     shfree(sendbuff);
152 | 
153 |     return EXIT_SUCCESS;
154 | }
155 | 
156 | /* vi: set sw=4 sts=4 tw=80: */
157 | 
158 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_get.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU OpenSHMEM Get Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <unistd.h>
 13 | #include <stdlib.h>
 14 | #include <stdio.h>
 15 | #include <string.h>
 16 | #include <shmem.h>
 17 | #include "osu_common.h"
 18 | 
 19 | #define MESSAGE_ALIGNMENT 64
 20 | #define MAX_MSG_SIZE (1<<20)
 21 | #define MYBUFSIZE (MAX_MSG_SIZE + MESSAGE_ALIGNMENT)
 22 | 
 23 | char s_buf_original[MYBUFSIZE];
 24 | char r_buf_original[MYBUFSIZE];
 25 | 
 26 | int skip = 1000;
 27 | int loop = 10000;
 28 | int skip_large = 10;
 29 | int loop_large = 100;
 30 | int large_message_size = 8192;
 31 | 
 32 | #ifdef PACKAGE_VERSION
 33 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
 34 | #else
 35 | #   define HEADER "# " BENCHMARK "\n"
 36 | #endif
 37 | 
 38 | #ifndef FIELD_WIDTH
 39 | #   define FIELD_WIDTH 20
 40 | #endif
 41 | 
 42 | #ifndef FLOAT_PRECISION
 43 | #   define FLOAT_PRECISION 2
 44 | #endif
 45 | 
 46 | static void usage(int myid)
 47 | {
 48 |     if(myid == 0) {
 49 |         fprintf(stderr, "Invalid arguments. Usage: <prog_name> <heap|global>\n");
 50 |     }
 51 | }
 52 | 
 53 | int main(int argc, char *argv[])
 54 | {
 55 |     int myid, numprocs, i;
 56 |     int size;
 57 |     char *s_buf, *r_buf;
 58 |     char *s_buf_heap, *r_buf_heap;
 59 |     int align_size;
 60 |     int64_t t_start = 0, t_end = 0;
 61 |     int use_heap = 0;   //default uses global
 62 | 
 63 |     start_pes(0);
 64 |     myid = _my_pe();
 65 |     numprocs = _num_pes();
 66 | 
 67 |     if(numprocs != 2) {
 68 |         if(myid == 0) {
 69 |             fprintf(stderr, "This test requires exactly two processes\n");
 70 |         }
 71 | 
 72 |         return EXIT_FAILURE;
 73 |     }
 74 | 
 75 |     if(argc != 2) {
 76 |         usage(myid);
 77 | 
 78 |         return EXIT_FAILURE;
 79 |     }
 80 | 
 81 |     if(0 == strncmp(argv[1], "heap", strlen("heap"))){
 82 |         use_heap = 1;
 83 |     } else if(0 == strncmp(argv[1], "global", strlen("global"))){
 84 |         use_heap = 0;
 85 |     } else {
 86 |         usage(myid);
 87 |         return EXIT_FAILURE;
 88 |     }
 89 | 
 90 |     align_size = MESSAGE_ALIGNMENT;
 91 | 
 92 |     /**************Allocating Memory*********************/
 93 | 
 94 |     if(use_heap){
 95 | 
 96 |         s_buf_heap = shmalloc(MYBUFSIZE);
 97 |         r_buf_heap = shmalloc(MYBUFSIZE);
 98 | 
 99 |         s_buf =
100 |             (char *) (((unsigned long) s_buf_heap + (align_size - 1)) /
101 |                       align_size * align_size);
102 | 
103 |         r_buf =
104 |             (char *) (((unsigned long) r_buf_heap + (align_size - 1)) /
105 |                       align_size * align_size);
106 |     } else {
107 | 
108 |         s_buf =
109 |             (char *) (((unsigned long) s_buf_original + (align_size - 1)) /
110 |                       align_size * align_size);
111 | 
112 |         r_buf =
113 |             (char *) (((unsigned long) r_buf_original + (align_size - 1)) /
114 |                       align_size * align_size);
115 |     }
116 | 
117 |     /**************Memory Allocation Done*********************/
118 | 
119 |     if(myid == 0) {
120 |         fprintf(stdout, HEADER);
121 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
122 |         fflush(stdout);
123 |     }
124 | 
125 |     for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) {
126 |         
127 |         /* touch the data */
128 |         for(i = 0; i < size; i++) {
129 |             s_buf[i] = 'a';
130 |             r_buf[i] = 'b';
131 |         }
132 | 
133 |         if(size > large_message_size) {
134 |             loop = loop_large = 100;
135 |             skip = skip_large = 0;
136 |         }
137 | 
138 |         shmem_barrier_all();
139 | 
140 |         if(myid == 0) 
141 |             {
142 |                 for(i = 0; i < loop + skip; i++) {
143 |                     if(i == skip) t_start = TIME();
144 | 
145 |                     shmem_getmem(r_buf, s_buf, size, 1);
146 |                 }
147 | 
148 |                 t_end = TIME();
149 |             }
150 |         shmem_barrier_all();
151 | 
152 |         if(myid == 0) {
153 |             double latency = (1.0 * (t_end-t_start)) / loop;
154 | 
155 |             fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
156 |                     FLOAT_PRECISION, latency);
157 |             fflush(stdout);
158 |         }
159 |     }
160 | 
161 |     shmem_barrier_all();
162 | 
163 |     if(use_heap){
164 |         shfree(s_buf_heap);
165 |         shfree(r_buf_heap);
166 |     }
167 | 
168 |     shmem_barrier_all();
169 |     return EXIT_SUCCESS;
170 | }
171 | 
172 | /* vi: set sw=4 sts=4 tw=80: */
173 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_put.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU OpenSHMEM Put Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <unistd.h>
 13 | #include <stdlib.h>
 14 | #include <stdio.h>
 15 | #include <string.h>
 16 | #include <shmem.h>
 17 | #include "osu_common.h"
 18 | 
 19 | #define MESSAGE_ALIGNMENT 64
 20 | #define MAX_MSG_SIZE (1<<20)
 21 | #define MYBUFSIZE (MAX_MSG_SIZE + MESSAGE_ALIGNMENT)
 22 | 
 23 | char s_buf_original[MYBUFSIZE];
 24 | char r_buf_original[MYBUFSIZE];
 25 | 
 26 | int skip = 1000;
 27 | int loop = 10000;
 28 | int skip_large = 10;
 29 | int loop_large = 100;
 30 | int large_message_size = 8192;
 31 | 
 32 | #ifdef PACKAGE_VERSION
 33 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
 34 | #else
 35 | #   define HEADER "# " BENCHMARK "\n"
 36 | #endif
 37 | 
 38 | #ifndef FIELD_WIDTH
 39 | #   define FIELD_WIDTH 20
 40 | #endif
 41 | 
 42 | #ifndef FLOAT_PRECISION
 43 | #   define FLOAT_PRECISION 2
 44 | #endif
 45 | 
 46 | static void usage(int myid)
 47 | {
 48 |     if(myid == 0) {
 49 |         fprintf(stderr, "Invalid arguments. Usage: <prog_name> <heap|global>\n");
 50 |     }
 51 | }
 52 | 
 53 | int main(int argc, char *argv[])
 54 | {
 55 |     int myid, numprocs, i;
 56 |     int size;
 57 |     char *s_buf, *r_buf;
 58 |     char *s_buf_heap, *r_buf_heap;
 59 |     int align_size;
 60 |     int64_t t_start = 0, t_end = 0;
 61 |     int use_heap = 0;   //default uses global
 62 | 
 63 |     start_pes(0);
 64 |     myid = _my_pe();
 65 |     numprocs = _num_pes();
 66 | 
 67 |     if(numprocs != 2) {
 68 |         if(myid == 0) {
 69 |             fprintf(stderr, "This test requires exactly two processes\n");
 70 |         }
 71 | 
 72 |         return EXIT_FAILURE;
 73 |     }
 74 | 
 75 |     if(argc != 2) {
 76 |         usage(myid);
 77 | 
 78 |         return EXIT_FAILURE;
 79 |     }
 80 | 
 81 |     if(0 == strncmp(argv[1], "heap", strlen("heap"))){
 82 |         use_heap = 1;
 83 |     } else if(0 == strncmp(argv[1], "global", strlen("global"))){
 84 |         use_heap = 0;
 85 |     } else {
 86 |         usage(myid);
 87 |         return EXIT_FAILURE;
 88 |     }
 89 | 
 90 |     align_size = MESSAGE_ALIGNMENT;
 91 | 
 92 |     /**************Allocating Memory*********************/
 93 | 
 94 |     if(use_heap){
 95 | 
 96 |         s_buf_heap = shmalloc(MYBUFSIZE);
 97 |         r_buf_heap = shmalloc(MYBUFSIZE);
 98 | 
 99 |         s_buf =
100 |             (char *) (((unsigned long) s_buf_heap + (align_size - 1)) /
101 |                       align_size * align_size);
102 | 
103 |         r_buf =
104 |             (char *) (((unsigned long) r_buf_heap + (align_size - 1)) /
105 |                       align_size * align_size);
106 |     } else {
107 | 
108 |         s_buf =
109 |             (char *) (((unsigned long) s_buf_original + (align_size - 1)) /
110 |                       align_size * align_size);
111 | 
112 |         r_buf =
113 |             (char *) (((unsigned long) r_buf_original + (align_size - 1)) /
114 |                       align_size * align_size);
115 |     }
116 | 
117 |     /**************Memory Allocation Done*********************/
118 | 
119 |     if(myid == 0) {
120 |         fprintf(stdout, HEADER);
121 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
122 |         fflush(stdout);
123 |     }
124 | 
125 |     for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) {
126 |         
127 |         /* touch the data */
128 |         for(i = 0; i < size; i++) {
129 |             s_buf[i] = 'a';
130 |             r_buf[i] = 'b';
131 |         }
132 | 
133 |         if(size > large_message_size) {
134 |             loop = loop_large = 100;
135 |             skip = skip_large = 0;
136 |         }
137 | 
138 |         shmem_barrier_all();
139 | 
140 |         if(myid == 0) 
141 |             {
142 |                 for(i = 0; i < loop + skip; i++) {
143 |                     if(i == skip) t_start = TIME();
144 | 
145 |                     shmem_putmem(r_buf, s_buf, size, 1);
146 |                     shmem_quiet();
147 |                 }
148 | 
149 |                 t_end = TIME();
150 |             }
151 |         shmem_barrier_all();
152 | 
153 |         if(myid == 0) {
154 |             double latency = (1.0 * (t_end-t_start)) / loop;
155 | 
156 |             fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
157 |                     FLOAT_PRECISION, latency);
158 |             fflush(stdout);
159 |         }
160 |     }
161 | 
162 |     shmem_barrier_all();
163 | 
164 |     if(use_heap){
165 |         shfree(s_buf_heap);
166 |         shfree(r_buf_heap);
167 |     }
168 | 
169 |     shmem_barrier_all();
170 |     return EXIT_SUCCESS;
171 | }
172 | 
173 | /* vi: set sw=4 sts=4 tw=80: */
174 | 


--------------------------------------------------------------------------------
/openshmem/osu_oshm_reduce.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU OpenSHMEM Reduce Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  */
  8 | 
  9 | /*
 10 | This program is available under BSD licensing.
 11 | 
 12 | Redistribution and use in source and binary forms, with or without
 13 | modification, are permitted provided that the following conditions are
 14 | met:
 15 | 
 16 | (1) Redistributions of source code must retain the above copyright
 17 | notice, this list of conditions and the following disclaimer.
 18 | 
 19 | (2) Redistributions in binary form must reproduce the above copyright
 20 | notice, this list of conditions and the following disclaimer in the
 21 | documentation and/or other materials provided with the distribution.
 22 | 
 23 | (3) Neither the name of The Ohio State University nor the names of
 24 | their contributors may be used to endorse or promote products derived
 25 | from this software without specific prior written permission.
 26 | 
 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38 | 
 39 | */
 40 | 
 41 | #include <stdio.h>
 42 | #include <sys/time.h>
 43 | #include <stdint.h>
 44 | #include <shmem.h>
 45 | #include "osu_common.h"
 46 | #include "osu_coll.h"
 47 | #include <stdlib.h>
 48 | 
 49 | long pSyncRed1[_SHMEM_REDUCE_SYNC_SIZE];
 50 | long pSyncRed2[_SHMEM_REDUCE_SYNC_SIZE];
 51 | 
 52 | double pWrk1[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 53 | double pWrk2[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 54 | 
 55 | int main(int argc, char *argv[])
 56 | {
 57 |     int i, numprocs, rank, size;
 58 |     unsigned long align_size = sysconf(_SC_PAGESIZE);
 59 |     int skip;
 60 |     static double latency = 0.0;
 61 |     int64_t t_start = 0, t_stop = 0, timer=0;
 62 |     static double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
 63 |     float *sendbuf, *recvbuf;
 64 |     int max_msg_size = 1048576, full = 0, t;
 65 | 
 66 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE;
 67 |     for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE;
 68 | 
 69 |     start_pes(0);
 70 |     rank = _my_pe();
 71 |     numprocs = _num_pes();
 72 | 
 73 |     if (process_args(argc, argv, rank, &max_msg_size, &full)) {
 74 |         return EXIT_SUCCESS;
 75 |     }
 76 | 
 77 |     if(numprocs < 2) {
 78 |         if(rank == 0) {
 79 |             fprintf(stderr, "This test requires at least two processes\n");
 80 |         }
 81 |         return EXIT_FAILURE;
 82 |     }
 83 | 
 84 |     int nreduce = max_msg_size/sizeof(float);
 85 |     float *pWrkF1 = shmalloc(MAX(nreduce/2+1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
 86 |     float *pWrkF2 = shmalloc(MAX(nreduce/2+1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
 87 | 
 88 |     print_header(rank, full);
 89 | 
 90 |     recvbuf = (float *)shmemalign(align_size, max_msg_size);
 91 |     if (NULL == recvbuf) {
 92 |         fprintf(stderr, "shmemalign failed.\n");
 93 |         exit(1);
 94 |     }
 95 | 
 96 |     sendbuf = (float *)shmemalign(align_size, max_msg_size);
 97 |     if (NULL == sendbuf) {
 98 |         fprintf(stderr, "shmemalign failed.\n");
 99 |         exit(1);
100 |     }
101 | 
102 |     memset(sendbuf, 1, max_msg_size);
103 |     memset(recvbuf, 0, max_msg_size);
104 | 
105 |     for(size=1; size*sizeof(float)<= max_msg_size; size *= 2) {
106 | 
107 |         if(size > LARGE_MESSAGE_SIZE) {
108 |             skip = SKIP_LARGE;
109 |             iterations = iterations_large;
110 |         } else {
111 |             skip = SKIP;
112 |         }
113 | 
114 |         shmem_barrier_all();
115 |         
116 |         timer=0;
117 |         for(i=0; i < iterations + skip ; i++) {
118 |             t_start = TIME();
119 | 
120 |             if(i%2)
121 |                 shmem_float_sum_to_all(recvbuf, sendbuf, size, 0, 0, numprocs, pWrkF1, pSyncRed1);
122 |             else
123 |                 shmem_float_sum_to_all(recvbuf, sendbuf, size, 0, 0, numprocs, pWrkF2, pSyncRed2);
124 | 
125 |             t_stop=TIME();
126 | 
127 |             if(i>=skip){
128 |                 timer+=t_stop-t_start;
129 |             }
130 |             shmem_barrier_all();
131 |         }
132 | 
133 |         latency = (double)(timer * 1.0) / iterations;
134 |         shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
135 |         shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2);
136 |         shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
137 |         avg_time = avg_time/numprocs;
138 | 
139 |         print_data(rank, full, sizeof(float)*size, avg_time, min_time, max_time, iterations);
140 |         shmem_barrier_all();
141 |     }
142 |                            
143 |     shmem_barrier_all();
144 |                            
145 |     shfree(pWrkF1);
146 |     shfree(pWrkF2);
147 | 
148 |     shfree(recvbuf);
149 |     shfree(sendbuf);
150 |                            
151 |     return EXIT_SUCCESS;
152 | }
153 | 
154 | /* vi: set sw=4 sts=4 tw=80: */
155 | 
156 | 


--------------------------------------------------------------------------------
/upc/Makefile.am:
--------------------------------------------------------------------------------
 1 | upcdir = $(pkglibexecdir)/upc
 2 | upc_PROGRAMS = osu_upc_memget osu_upc_memput osu_upc_all_barrier \
 3 | 	       osu_upc_all_broadcast osu_upc_all_exchange \
 4 | 	       osu_upc_all_gather_all osu_upc_all_gather osu_upc_all_reduce \
 5 | 	       osu_upc_all_scatter
 6 | 
 7 | osu_upc_all_barrier_SOURCES = osu_upc_all_barrier.c osu_common.c osu_common.h osu_coll.h
 8 | osu_upc_all_broadcast_SOURCES = osu_upc_all_broadcast.c osu_common.c osu_common.h osu_coll.h
 9 | osu_upc_all_exchange_SOURCES = osu_upc_all_exchange.c osu_common.c osu_common.h osu_coll.h
10 | osu_upc_all_gather_SOURCES = osu_upc_all_gather.c osu_common.c osu_common.h osu_coll.h
11 | osu_upc_all_gather_all_SOURCES = osu_upc_all_gather_all.c osu_common.c osu_common.h osu_coll.h
12 | osu_upc_all_reduce_SOURCES = osu_upc_all_reduce.c osu_common.c osu_common.h osu_coll.h
13 | osu_upc_all_scatter_SOURCES = osu_upc_all_scatter.c osu_common.c osu_common.h osu_coll.h
14 | 


--------------------------------------------------------------------------------
/upc/osu_common.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University. 
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | 
11 | #include <unistd.h>
12 | #include <stdlib.h>
13 | #include <stdio.h>
14 | #include <time.h>
15 | 
16 | int64_t getMicrosecondTimeStamp() 
17 | {
18 |     int64_t retval;
19 |     struct timeval tv; 
20 |     if (gettimeofday(&tv, NULL)) {
21 |         perror("gettimeofday");
22 |         abort();
23 |     }   
24 |     retval = ((int64_t)tv.tv_sec) * 1000000 + tv.tv_usec;
25 |     return retval;
26 | }
27 | 


--------------------------------------------------------------------------------
/upc/osu_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University. 
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | #ifndef _OSU_COMMON_H_
11 | #define _OSU_COMMON_H_
12 | 
13 | #define TIME() getMicrosecondTimeStamp()
14 | int64_t getMicrosecondTimeStamp();
15 | 
16 | #endif /* _OSU_COMMON_H */
17 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_barrier.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC Barrier Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include "osu_coll.h"
16 | #include "osu_common.h"
17 | #include <stdlib.h>
18 | 
19 | #include <upc.h>
20 | #include <upc_collective.h>
21 | 
22 | #ifdef PACKAGE_VERSION
23 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
24 | #else
25 | #   define HEADER "# " BENCHMARK "\n"
26 | #endif
27 | 
28 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
29 | 
30 | shared double avg_time, max_time, min_time;
31 | shared double latency[THREADS];
32 | 
33 | int main(int argc, char *argv[])
34 | {
35 |     int i = 0;
36 |     int skip;
37 |     int64_t t_start = 0, t_stop = 0, timer=0;
38 |     int full = 0;
39 | 
40 |     if (process_args(argc, argv, MYTHREAD, NULL, &full, HEADER)) {
41 |         return 0;
42 |     }
43 |     
44 |     if(THREADS < 2) {
45 |         if(MYTHREAD == 0) {
46 |             fprintf(stderr, "This test requires at least two processes\n");
47 |         }
48 |         return -1;
49 |     }
50 |     print_header(HEADER, MYTHREAD, full);
51 |     upc_barrier;
52 | 
53 |     skip = SKIP;    
54 |     timer=0;        
55 |     for(i=0; i < iterations + skip ; i++) {        
56 |         t_start = TIME();
57 |         upc_barrier;
58 |         t_stop = TIME();
59 | 
60 |         if(i>=skip){
61 |             timer+=t_stop-t_start;
62 |         } 
63 |     }
64 |     upc_barrier;
65 |     latency[MYTHREAD] = (1.0 * timer) / iterations;
66 | 
67 |     upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
68 |     upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
69 |     upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
70 |     if(!MYTHREAD)
71 |         avg_time = avg_time/THREADS;
72 |     
73 |     print_data(MYTHREAD, full, 0, avg_time, min_time, max_time, iterations);
74 |     upc_barrier;
75 |     return EXIT_SUCCESS;
76 | }
77 | 
78 | /* vi: set sw=4 sts=4 tw=80: */
79 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_broadcast.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC Broadcast Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <upc.h>
16 | #include <upc_collective.h>
17 | #include "osu_common.h"
18 | #include <stdlib.h>
19 | 
20 | #ifdef PACKAGE_VERSION
21 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
22 | #else
23 | #   define HEADER "# " BENCHMARK "\n"
24 | #endif
25 | #include "osu_coll.h"
26 | 
27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
28 | 
29 | shared [] char *src;
30 | shared char *dst;
31 | 
32 | shared double avg_time, max_time, min_time;
33 | shared double latency[THREADS];
34 | 
35 | int main(int argc, char *argv[])
36 | {
37 |     int i = 0, size;
38 |     int skip;
39 |     int64_t t_start = 0, t_stop = 0, timer=0;
40 |     int max_msg_size = 1<<20, full = 0;
41 | 
42 |     if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) {
43 |         return 0;
44 |     }
45 |     
46 |     if(THREADS < 2) {
47 |         if(MYTHREAD == 0) {
48 |             fprintf(stderr, "This test requires at least two processes\n");
49 |         }
50 |         return -1;
51 |     }
52 | 
53 |     print_header(HEADER, MYTHREAD, full);
54 | 
55 |     src = upc_all_alloc(1, max_msg_size*sizeof(char));
56 |     dst = upc_all_alloc(THREADS, max_msg_size*sizeof(char));
57 | 
58 |     if(NULL == src || NULL == dst) {
59 |         fprintf(stderr, "malloc failed.\n");
60 |         exit(1);
61 |     }
62 |     
63 |     for(size=1; size <=max_msg_size; size *= 2) {
64 |         if(size > LARGE_MESSAGE_SIZE) {
65 |             skip = SKIP_LARGE;
66 |             iterations = iterations_large;
67 |         }
68 |         else {
69 |             skip = SKIP;
70 |         }
71 | 
72 |         timer=0;        
73 |         for(i=0; i < iterations + skip ; i++) {
74 |             t_start = TIME();
75 |             upc_all_broadcast(dst, src, size, SYNC_MODE );
76 |             t_stop = TIME();
77 | 
78 |             if(i>=skip){
79 |                 timer+=t_stop-t_start;
80 |             } 
81 |             upc_barrier;
82 |         }
83 |         upc_barrier;
84 |         latency[MYTHREAD] = (1.0 * timer) / iterations;
85 | 
86 |         upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
87 |         upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
88 |         upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
89 |         if(!MYTHREAD)
90 |             avg_time = avg_time/THREADS;
91 | 
92 |         print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations);
93 |     }
94 | 
95 |     return EXIT_SUCCESS;
96 | }
97 | 
98 | /* vi: set sw=4 sts=4 tw=80: */
99 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_exchange.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC Exchange Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <upc.h>
16 | #include <upc_collective.h>
17 | #include "osu_common.h"
18 | #include "osu_coll.h"
19 | #include <stdlib.h>
20 | 
21 | #ifdef PACKAGE_VERSION
22 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
23 | #else
24 | #   define HEADER "# " BENCHMARK "\n"
25 | #endif
26 | 
27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
28 | 
29 | shared char *src, *dst; 
30 | 
31 | shared double avg_time, max_time, min_time;
32 | shared double latency[THREADS];
33 | 
34 | int main(int argc, char *argv[])
35 | {
36 |     int i = 0, size;
37 |     int skip;
38 |     int64_t t_start = 0, t_stop = 0, timer=0;
39 |     int max_msg_size = 1<<20, full = 0;
40 | 
41 |     if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) {
42 |         return 0;
43 |     }
44 |     
45 |     if(THREADS < 2) {
46 |         if(MYTHREAD == 0) {
47 |             fprintf(stderr, "This test requires at least two processes\n");
48 |         }
49 |         return -1;
50 |     }
51 |     print_header(HEADER, MYTHREAD, full);
52 |     
53 |     src = upc_all_alloc(THREADS*THREADS, max_msg_size*sizeof(char));
54 |     dst = upc_all_alloc(THREADS*THREADS, max_msg_size*sizeof(char));
55 |     upc_barrier;
56 | 
57 |     if(NULL == src || NULL == dst) {
58 |         fprintf(stderr, "malloc failed.\n");
59 |         exit(1);
60 |     }
61 |     
62 |     for(size=1; size <=max_msg_size; size *= 2) {
63 |         if(size > LARGE_MESSAGE_SIZE) {
64 |             skip = SKIP_LARGE;
65 |             iterations = iterations_large;
66 |         }
67 |         else {
68 |             skip = SKIP;
69 |         }
70 | 
71 |         timer=0;        
72 |         for(i=0; i < iterations + skip ; i++) {
73 |             t_start = TIME();
74 |             upc_all_exchange(dst, src, size, SYNC_MODE);
75 |             t_stop = TIME();
76 | 
77 |             if(i>=skip){
78 |                 timer+=t_stop-t_start;
79 |             } 
80 |             upc_barrier;
81 |         }
82 |         upc_barrier;
83 |         latency[MYTHREAD] = (1.0 * timer) / iterations;
84 | 
85 |         upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
86 |         upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
87 |         upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
88 |         if(!MYTHREAD)
89 |             avg_time = avg_time/THREADS;
90 | 
91 |         print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations);
92 |     }
93 | 
94 |     upc_barrier;
95 |     return EXIT_SUCCESS;
96 | }
97 | 
98 | /* vi: set sw=4 sts=4 tw=80: */
99 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_gather.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC Gather Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <upc.h>
16 | #include <upc_collective.h>
17 | #include "osu_common.h"
18 | #include "osu_coll.h"
19 | #include <stdlib.h>
20 | 
21 | #ifdef PACKAGE_VERSION
22 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
23 | #else
24 | #   define HEADER "# " BENCHMARK "\n"
25 | #endif
26 | 
27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
28 | 
29 | shared char *src, *dst; 
30 | 
31 | shared double avg_time, max_time, min_time;
32 | shared double latency[THREADS];
33 | 
34 | int main(int argc, char *argv[])
35 | {
36 |     int i = 0, size;
37 |     int skip;
38 |     int64_t t_start = 0, t_stop = 0, timer=0;
39 |     int max_msg_size = 1<<20, full = 0;
40 | 
41 |     if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) {
42 |         return 0;
43 |     }
44 |     
45 |     if(THREADS < 2) {
46 |         if(MYTHREAD == 0) {
47 |             fprintf(stderr, "This test requires at least two processes\n");
48 |         }
49 |         return -1;
50 |     }
51 |     print_header(HEADER, MYTHREAD, full);
52 | 
53 |     src = upc_all_alloc(THREADS, max_msg_size*sizeof(char));
54 |     dst = upc_all_alloc(1, THREADS*max_msg_size*sizeof(char));
55 |     upc_barrier;
56 | 
57 |     if(NULL == src || NULL == dst) {
58 |         fprintf(stderr, "malloc failed.\n");
59 |         exit(1);
60 |     }
61 |     
62 |     for(size=1; size <=max_msg_size; size *= 2) {
63 |         if(size > LARGE_MESSAGE_SIZE) {
64 |             skip = SKIP_LARGE;
65 |             iterations = iterations_large;
66 |         }
67 |         else {
68 |             skip = SKIP;
69 |         }
70 | 
71 |         timer=0;        
72 |         for(i=0; i < iterations + skip ; i++) {
73 |             t_start = TIME();
74 |             upc_all_gather(dst, src, size, SYNC_MODE );
75 |             t_stop = TIME();
76 | 
77 |             if(i>=skip){
78 |                 timer+=t_stop-t_start;
79 |             } 
80 |             upc_barrier;
81 |         }
82 |         upc_barrier;
83 |         latency[MYTHREAD] = (1.0 * timer) / iterations;
84 | 
85 |         upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
86 |         upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
87 |         upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
88 |         if(!MYTHREAD)
89 |             avg_time = avg_time/THREADS;
90 | 
91 |         print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations);
92 |     }
93 | 
94 |     return EXIT_SUCCESS;
95 | }
96 | 
97 | /* vi: set sw=4 sts=4 tw=80: */
98 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_gather_all.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC GatherAll Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <upc.h>
16 | #include <upc_collective.h>
17 | #include "osu_common.h"
18 | #include "osu_coll.h"
19 | #include <stdlib.h>
20 | 
21 | #ifdef PACKAGE_VERSION
22 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
23 | #else
24 | #   define HEADER "# " BENCHMARK "\n"
25 | #endif
26 | 
27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
28 | 
29 | shared char *src, *dst; 
30 | 
31 | shared double avg_time, max_time, min_time;
32 | shared double latency[THREADS];
33 | 
34 | int main(int argc, char *argv[])
35 | {
36 |     int i = 0, size;
37 |     int skip;
38 |     int64_t t_start = 0, t_stop = 0, timer=0;
39 |     int max_msg_size = 1<<20, full = 0;
40 | 
41 |     if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) {
42 |         return 0;
43 |     }
44 |     
45 |     if(THREADS < 2) {
46 |         if(MYTHREAD == 0) {
47 |             fprintf(stderr, "This test requires at least two processes\n");
48 |         }
49 |         return -1;
50 |     }
51 |     print_header(HEADER, MYTHREAD, full);
52 | 
53 |     src = upc_all_alloc(THREADS, max_msg_size*sizeof(char));
54 |     dst = upc_all_alloc(THREADS*THREADS, max_msg_size*sizeof(char));
55 |     upc_barrier;
56 | 
57 |     if(NULL == src || NULL == dst) {
58 |         fprintf(stderr, "malloc failed.\n");
59 |         exit(1);
60 |     }
61 |     
62 |     for(size=1; size <=max_msg_size; size *= 2) {
63 |         if(size > LARGE_MESSAGE_SIZE) {
64 |             skip = SKIP_LARGE;
65 |             iterations = iterations_large;
66 |         }
67 |         else {
68 |             skip = SKIP;
69 |         }
70 | 
71 |         timer=0;        
72 |         for(i=0; i < iterations + skip ; i++) {
73 |             t_start = TIME();
74 |             upc_all_gather_all(dst, src, size, SYNC_MODE );
75 |             t_stop = TIME();
76 | 
77 |             if(i>=skip){
78 |                 timer+=t_stop-t_start;
79 |             } 
80 |             upc_barrier;
81 |         }
82 |         upc_barrier;
83 |         latency[MYTHREAD] = (1.0 * timer) / iterations;
84 | 
85 |         upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
86 |         upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
87 |         upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
88 |         if(!MYTHREAD)
89 |             avg_time = avg_time/THREADS;
90 | 
91 |         print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations);
92 |     }
93 | 
94 |     upc_barrier;
95 |     return EXIT_SUCCESS;
96 | }
97 | 
98 | /* vi: set sw=4 sts=4 tw=80: */
99 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_reduce.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC Reduce Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <upc.h>
16 | #include <upc_collective.h>
17 | #include "osu_common.h"
18 | #include "osu_coll.h"
19 | #include <stdlib.h>
20 | 
21 | #ifdef PACKAGE_VERSION
22 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
23 | #else
24 | #   define HEADER "# " BENCHMARK "\n"
25 | #endif
26 | 
27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
28 | 
29 | shared char *src; 
30 | shared double avg_time, max_time, min_time;
31 | shared double latency[THREADS];
32 | shared char dst;
33 | 
34 | int main(int argc, char *argv[])
35 | {
36 |     int i = 0, size;
37 |     int skip;
38 |     int64_t t_start = 0, t_stop = 0, timer=0;
39 |     int max_msg_size = 1<<20, full = 0;
40 | 
41 |     if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) {
42 |         return 0;
43 |     }
44 |     
45 |     if(THREADS < 2) {
46 |         if(MYTHREAD == 0) {
47 |             fprintf(stderr, "This test requires at least two processes\n");
48 |         }
49 |         return -1;
50 |     }
51 |     print_header(HEADER, MYTHREAD, full);
52 | 
53 |     src = upc_all_alloc(THREADS, max_msg_size*sizeof(char));
54 | 
55 |     if(NULL == src) {
56 |         fprintf(stderr, "malloc failed.\n");
57 |         exit(1);
58 |     }
59 |     
60 |     for(size=1; size <=max_msg_size; size *= 2) {
61 |         if(size > LARGE_MESSAGE_SIZE) {
62 |             skip = SKIP_LARGE;
63 |             iterations = iterations_large;
64 |         }
65 |         else {
66 |             skip = SKIP;
67 |         }
68 | 
69 |         timer=0;        
70 |         for(i=0; i < iterations + skip ; i++) {
71 |             upc_barrier;
72 |             t_start = TIME();
73 |             upc_all_reduceC(&dst, src, UPC_MAX, size * THREADS, size, NULL, SYNC_MODE);
74 |             t_stop = TIME();
75 | 
76 |             if(i>=skip){
77 |                 timer+=t_stop-t_start;
78 |             } 
79 |             upc_barrier;
80 |         }
81 |         upc_barrier;
82 |         latency[MYTHREAD] = (1.0 * timer) / iterations;
83 | 
84 |         upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
85 |         upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
86 |         upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
87 |         if(!MYTHREAD)
88 |             avg_time = avg_time/THREADS;
89 | 
90 |         print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations);
91 |     }
92 | 
93 |     return EXIT_SUCCESS;
94 | }
95 | 
96 | /* vi: set sw=4 sts=4 tw=80: */
97 | 


--------------------------------------------------------------------------------
/upc/osu_upc_all_scatter.c:
--------------------------------------------------------------------------------
 1 | #define BENCHMARK "OSU UPC Scatter Latency Test"
 2 | /*
 3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
 4 |  * (NBCL), The Ohio State University. 
 5 |  *
 6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 7 |  *
 8 |  * For detailed copyright and licensing information, please refer to the
 9 |  * copyright file COPYRIGHT in the top level OMB directory.
10 |  */
11 | 
12 | #include <stdio.h>
13 | #include <sys/time.h>
14 | #include <stdint.h>
15 | #include <upc.h>
16 | #include <upc_collective.h>
17 | #include "osu_common.h"
18 | #include "osu_coll.h"
19 | #include <stdlib.h>
20 | 
21 | #ifdef PACKAGE_VERSION
22 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
23 | #else
24 | #   define HEADER "# " BENCHMARK "\n"
25 | #endif
26 | 
27 | #define SYNC_MODE (UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC)
28 | 
29 | shared char *dst, *src; 
30 | 
31 | shared double avg_time, max_time, min_time;
32 | shared double latency[THREADS];
33 | 
34 | int main(int argc, char *argv[])
35 | {
36 |     int i = 0, size;
37 |     int skip;
38 |     int64_t t_start = 0, t_stop = 0, timer=0;
39 |     int max_msg_size = 1<<20, full = 0;
40 | 
41 |     if (process_args(argc, argv, MYTHREAD, &max_msg_size, &full, HEADER)) {
42 |         return 0;
43 |     }
44 |     
45 |     if(THREADS < 2) {
46 |         if(MYTHREAD == 0) {
47 |             fprintf(stderr, "This test requires at least two processes\n");
48 |         }
49 |         return -1;
50 |     }
51 |     print_header(HEADER, MYTHREAD, full);
52 | 
53 |     dst = upc_all_alloc(THREADS, max_msg_size*sizeof(char));
54 |     src = upc_all_alloc(1, THREADS*max_msg_size*sizeof(char));
55 | 
56 |     if(NULL == dst || NULL == src) {
57 |         fprintf(stderr, "malloc failed.\n");
58 |         exit(1);
59 |     }
60 |     
61 |     for(size=1; size <=max_msg_size; size *= 2) {
62 |         if(size > LARGE_MESSAGE_SIZE) {
63 |             skip = SKIP_LARGE;
64 |             iterations = iterations_large;
65 |         }
66 |         else {
67 |             skip = SKIP;
68 |         }
69 | 
70 |         timer=0;        
71 |         for(i=0; i < iterations + skip ; i++) {
72 |             t_start = TIME();
73 | 
74 |             upc_all_scatter(dst, src, size, SYNC_MODE );
75 |             t_stop = TIME();
76 | 
77 |             if(i>=skip){
78 |                 timer+=t_stop-t_start;
79 |             } 
80 |             upc_barrier;
81 |         }
82 |         upc_barrier;
83 |         latency[MYTHREAD] = (1.0 * timer) / iterations;
84 | 
85 |         upc_all_reduceD(&min_time, latency, UPC_MIN, THREADS, 1, NULL, SYNC_MODE);
86 |         upc_all_reduceD(&max_time, latency, UPC_MAX, THREADS, 1, NULL, SYNC_MODE);
87 |         upc_all_reduceD(&avg_time, latency, UPC_ADD, THREADS, 1, NULL, SYNC_MODE);
88 |         if(!MYTHREAD)
89 |             avg_time = avg_time/THREADS;
90 | 
91 |         print_data(MYTHREAD, full, size*sizeof(char), avg_time, min_time, max_time, iterations);
92 |     }
93 | 
94 |     return EXIT_SUCCESS;
95 | }
96 | 
97 | /* vi: set sw=4 sts=4 tw=80: */
98 | 


--------------------------------------------------------------------------------
/upc/osu_upc_memget.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC MEMGET Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <upc.h>
 13 | #include <stdio.h>
 14 | #include <string.h>
 15 | 
 16 | #define MAX_MSG_SIZE         (1<<22)
 17 | #define SKIP_LARGE  10
 18 | #define LOOP_LARGE  100
 19 | #define LARGE_MESSAGE_SIZE  8192
 20 | 
 21 | int skip = 1000;
 22 | int loop = 10000;
 23 | 
 24 | #ifdef PACKAGE_VERSION
 25 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
 26 | #else
 27 | #   define HEADER "# " BENCHMARK "\n"
 28 | #endif
 29 | 
 30 | #ifndef FIELD_WIDTH
 31 | #   define FIELD_WIDTH 20
 32 | #endif
 33 | 
 34 | #ifndef FLOAT_PRECISION
 35 | #   define FLOAT_PRECISION 2
 36 | #endif
 37 | 
 38 | void wtime(double *t)
 39 | {
 40 |   static int sec = -1;
 41 |   struct timeval tv;
 42 |   gettimeofday(&tv, (void *)0);
 43 |   if (sec < 0) sec = tv.tv_sec;
 44 |   *t = (tv.tv_sec - sec)*1.0e+6 + tv.tv_usec;
 45 | }
 46 | 
 47 | int main(int argc, char **argv) 
 48 | {
 49 |     int iters=0;
 50 |     double t_start, t_end;
 51 |     int peerid = (MYTHREAD+1)%THREADS; 
 52 |     int iamsender = 0;
 53 |     int i;
 54 | 
 55 |     if( THREADS == 1 ) {
 56 |         if(MYTHREAD == 0) {
 57 |             fprintf(stderr, "This test requires at least two UPC threads\n");
 58 |         }
 59 |         return 0;
 60 |     }
 61 | 
 62 |     if ( MYTHREAD < THREADS/2 )
 63 |         iamsender = 1;
 64 | 
 65 |     shared char *data = upc_all_alloc(THREADS, MAX_MSG_SIZE*2);
 66 |     shared [] char *remote = (shared [] char *)(data + peerid);
 67 |     char *local = ((char *)(data+MYTHREAD)) + MAX_MSG_SIZE;
 68 | 
 69 |     if ( !MYTHREAD ) {
 70 |         fprintf(stdout, HEADER);
 71 |         fprintf(stdout, "# [ pairs: %d ]\n", THREADS/2);
 72 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
 73 |         fflush(stdout);
 74 |     }
 75 | 
 76 |     for (int size = 1; size <= MAX_MSG_SIZE; size*=2) {
 77 | 
 78 |         if ( iamsender )
 79 |             for(i = 0; i < size; i++) {
 80 |                 local[i] = 'a';
 81 |             }
 82 |         else
 83 |             for(i = 0; i < size; i++) {
 84 |                 local[i] = 'b';
 85 |             }
 86 | 
 87 |         upc_barrier;
 88 | 
 89 |         if(size > LARGE_MESSAGE_SIZE) {
 90 |             loop = LOOP_LARGE;
 91 |             skip = SKIP_LARGE;
 92 |         }
 93 | 
 94 |         if( iamsender )
 95 |         {
 96 |             for ( i = 0; i < loop + skip; i++) {
 97 |                 if(i == skip) {
 98 |                     upc_barrier;
 99 |                     wtime(&t_start);
100 |                 }
101 | 
102 |                 upc_memget(local, remote, size);
103 |             }
104 | 
105 |             upc_barrier;
106 | 
107 |             wtime(&t_end);
108 |             if( !MYTHREAD )
109 |             {
110 |                 double latency = (t_end - t_start)/(1.0 * loop);
111 |                 fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
112 |                         FLOAT_PRECISION, latency);
113 |                 fflush(stdout);
114 |             }
115 |         } else 
116 |         {
117 |             upc_barrier;
118 |             upc_barrier;
119 |         }
120 | 
121 |     }
122 |     return 0;
123 | }
124 | 


--------------------------------------------------------------------------------
/upc/osu_upc_memput.c:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC MEMPUT Test"
  2 | /*
  3 |  * Copyright (C) 2002-2016 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University. 
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <upc.h>
 13 | #include <stdio.h>
 14 | #include <string.h>
 15 | #include <sys/time.h>
 16 | 
 17 | #define MAX_MSG_SIZE         (1<<22)
 18 | #define SKIP_LARGE  10
 19 | #define LOOP_LARGE  100
 20 | #define LARGE_MESSAGE_SIZE  8192
 21 | 
 22 | int skip = 1000;
 23 | int loop = 10000;
 24 | 
 25 | #ifdef PACKAGE_VERSION
 26 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
 27 | #else
 28 | #   define HEADER "# " BENCHMARK "\n"
 29 | #endif
 30 | 
 31 | #ifndef FIELD_WIDTH
 32 | #   define FIELD_WIDTH 20
 33 | #endif
 34 | 
 35 | #ifndef FLOAT_PRECISION
 36 | #   define FLOAT_PRECISION 2
 37 | #endif
 38 | 
 39 | 
 40 | void wtime(double *t)
 41 | {
 42 |   static int sec = -1;
 43 |   struct timeval tv;
 44 |   gettimeofday(&tv, (void *)0);
 45 |   if (sec < 0) sec = tv.tv_sec;
 46 |   *t = (tv.tv_sec - sec)*1.0e+6 + tv.tv_usec;
 47 | }
 48 | 
 49 | int main(int argc, char **argv) 
 50 | {
 51 |     int iters=0;
 52 |     double t_start, t_end;
 53 |     int peerid = (MYTHREAD+1)%THREADS; 
 54 |     int iamsender = 0;
 55 |     int i;
 56 | 
 57 |     if( THREADS == 1 ) {
 58 |         if(MYTHREAD == 0) {
 59 |             fprintf(stderr, "This test requires at least two UPC threads\n");
 60 |         }
 61 |         return 0;
 62 |     }
 63 | 
 64 |     if ( MYTHREAD < THREADS/2 )
 65 |         iamsender = 1;
 66 | 
 67 |     shared char *data = upc_all_alloc(THREADS, MAX_MSG_SIZE*2);
 68 |     shared [] char *remote = (shared [] char *)(data + peerid);
 69 |     char *local = ((char *)(data+MYTHREAD)) + MAX_MSG_SIZE;
 70 | 
 71 |     if ( !MYTHREAD ) {
 72 |         fprintf(stdout, HEADER);
 73 |         fprintf(stdout, "# [ pairs: %d ]\n", THREADS/2);
 74 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
 75 |         fflush(stdout);
 76 |     }
 77 | 
 78 |     for (int size = 1; size <= MAX_MSG_SIZE; size*=2) {
 79 | 
 80 |         if ( iamsender )
 81 |             for(i = 0; i < size; i++) {
 82 |                 local[i] = 'a';
 83 |             }
 84 |         else
 85 |             for(i = 0; i < size; i++) {
 86 |                 local[i] = 'b';
 87 |             }
 88 | 
 89 |         upc_barrier;
 90 | 
 91 |         if(size > LARGE_MESSAGE_SIZE) {
 92 |             loop = LOOP_LARGE;
 93 |             skip = SKIP_LARGE;
 94 |         }
 95 | 
 96 |         if( iamsender )
 97 |         {
 98 |             for (i = 0; i < loop + skip; i++) {
 99 |                 if(i == skip) {
100 |                     upc_barrier;
101 |                     wtime(&t_start);
102 |                 }
103 | 
104 |                 upc_memput(remote, local, size);
105 |                 upc_fence;
106 |             }
107 | 
108 |             upc_barrier;
109 | 
110 |             wtime(&t_end);
111 |             if( !MYTHREAD )
112 |             {
113 |                 double latency = (t_end - t_start)/(1.0 * loop);
114 |                 fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
115 |                         FLOAT_PRECISION, latency);
116 |                 fflush(stdout);
117 |             }
118 |         } else 
119 |         {
120 |             upc_barrier;
121 |             upc_barrier;
122 |         }
123 | 
124 |     }
125 |     return 0;
126 | }
127 | 


--------------------------------------------------------------------------------
/upcxx/Makefile.am:
--------------------------------------------------------------------------------
 1 | upcdir = $(pkglibexecdir)/upcxx
 2 | upc_PROGRAMS = osu_upcxx_allgather osu_upcxx_alltoall osu_upcxx_bcast \
 3 | 	       osu_upcxx_gather osu_upcxx_reduce osu_upcxx_scatter \
 4 | 	       osu_upcxx_async_copy_get osu_upcxx_async_copy_put
 5 | 
 6 | osu_upcxx_allgather_SOURCES = osu_upcxx_allgather.cpp osu_coll.h \
 7 | 			      osu_common.c osu_common.h
 8 | osu_upcxx_alltoall_SOURCES = osu_upcxx_alltoall.cpp osu_coll.h \
 9 | 			     osu_common.c osu_common.h
10 | osu_upcxx_bcast_SOURCES = osu_upcxx_bcast.cpp osu_coll.h \
11 | 			  osu_common.c osu_common.h
12 | osu_upcxx_gather_SOURCES = osu_upcxx_gather.cpp osu_coll.h \
13 | 			   osu_common.c osu_common.h
14 | osu_upcxx_reduce_SOURCES = osu_upcxx_reduce.cpp osu_coll.h \
15 | 			   osu_common.c osu_common.h
16 | osu_upcxx_scatter_SOURCES = osu_upcxx_scatter.cpp osu_coll.h \
17 | 			    osu_common.c osu_common.h
18 | osu_upcxx_async_copy_get_SOURCES = osu_upcxx_async_copy_get.cpp \
19 | 				   osu_common.c osu_common.h
20 | osu_upcxx_async_copy_put_SOURCES = osu_upcxx_async_copy_put.cpp \
21 | 				   osu_common.c osu_common.h
22 | 


--------------------------------------------------------------------------------
/upcxx/osu_common.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University.
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | #include <stdlib.h>
11 | #include <sys/time.h>
12 | #include <stdint.h>
13 | #include <stdio.h>
14 | 
15 | #ifdef __cplusplus
16 | extern "C" double getMicrosecondTimeStamp (void);
17 | #endif /* #ifdef __cplusplus */
18 | 
19 | double
20 | getMicrosecondTimeStamp (void)
21 | {
22 |     double retval;
23 |     struct timeval tv;
24 | 
25 |     if (gettimeofday(&tv, NULL)) {
26 |         perror("gettimeofday");
27 |         abort();
28 |     }
29 | 
30 |     retval = tv.tv_sec * (double)1e6 + tv.tv_usec;
31 | 
32 |     return retval;
33 | }
34 | 


--------------------------------------------------------------------------------
/upcxx/osu_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
 3 |  * (NBCL), The Ohio State University. 
 4 |  *
 5 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
 6 |  *
 7 |  * For detailed copyright and licensing information, please refer to the
 8 |  * copyright file COPYRIGHT in the top level OMB directory.
 9 |  */
10 | #ifndef _OSU_COMMON_H_
11 | #define _OSU_COMMON_H_
12 | 
13 | #ifdef PACKAGE_VERSION
14 | #   define HEADER "# " BENCHMARK " v" PACKAGE_VERSION "\n"
15 | #else
16 | #   define HEADER "# " BENCHMARK "\n"
17 | #endif
18 | 
19 | #ifndef FIELD_WIDTH
20 | #   define FIELD_WIDTH 20
21 | #endif
22 | 
23 | #ifndef FLOAT_PRECISION
24 | #   define FLOAT_PRECISION 2
25 | #endif
26 | 
27 | #define TIME() getMicrosecondTimeStamp()
28 | 
29 | #ifdef __cplusplus
30 | extern "C" {
31 | #endif /* #ifdef __cplusplus */
32 | 
33 | double getMicrosecondTimeStamp();
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif /* #ifdef __cplusplus */
38 | 
39 | #endif /* _OSU_COMMON_H */
40 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_allgather.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ AllGather Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <upcxx.h>
 14 | #include <stdlib.h>
 15 | #include <osu_common.h>
 16 | #include <osu_coll.h>
 17 | 
 18 | #define root 0
 19 | #define VERIFY 0
 20 | 
 21 | using namespace std;
 22 | using namespace upcxx;
 23 | 
 24 | int
 25 | main (int argc, char *argv[])
 26 | {
 27 |     init(&argc, &argv);
 28 | 
 29 |     global_ptr<char> src;
 30 |     global_ptr<char> dst;
 31 |     global_ptr<double> time_src;
 32 |     global_ptr<double> time_dst;
 33 | 
 34 |     double avg_time, max_time, min_time;
 35 |     int i = 0, size;
 36 |     int skip;
 37 |     int64_t t_start = 0, t_stop = 0, timer=0;
 38 |     int max_msg_size = 1<<20, full = 0;
 39 | 
 40 |     if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) {
 41 |         return 0;
 42 |     }
 43 | 
 44 |     if (ranks() < 2) {
 45 |         if (myrank() == 0) {
 46 |             fprintf(stderr, "This test requires at least two processes\n");
 47 |         }
 48 |         return -1;
 49 |     }
 50 | 
 51 |     src = allocate<char> (myrank(), max_msg_size*sizeof(char));
 52 |     dst = allocate<char> (myrank(), max_msg_size*sizeof(char)*ranks());
 53 | 
 54 |     assert(src != NULL);
 55 |     assert(dst != NULL);
 56 | 
 57 |     time_src = allocate<double> (myrank(), 1);
 58 |     time_dst = allocate<double> (root, 1);
 59 | 
 60 |     assert(time_src != NULL);
 61 |     assert(time_dst != NULL);
 62 | 
 63 |     /*
 64 |      * put a barrier since allocate is non-blocking in upc++
 65 |      */
 66 |     barrier();
 67 | 
 68 |     print_header(HEADER, myrank(), full);
 69 | 
 70 |     for (size=1; size <=max_msg_size; size *= 2) {
 71 |         if (size > LARGE_MESSAGE_SIZE) {
 72 |             skip = SKIP_LARGE;
 73 |             iterations = iterations_large;
 74 |         } else {
 75 |             skip = SKIP;
 76 |         }
 77 | 
 78 |         timer=0;
 79 |         for(i=0; i < iterations + skip ; i++) {
 80 |             //t_start = TIME();
 81 |             t_start = getMicrosecondTimeStamp();
 82 | 
 83 |             upcxx_allgather((char *)src, (char *)dst, size*sizeof(char));
 84 |             t_stop = getMicrosecondTimeStamp();
 85 | 
 86 |             if(i>=skip){
 87 |                 timer+=t_stop-t_start;
 88 |             }
 89 |             barrier();
 90 |         }
 91 | 
 92 |         barrier();
 93 | 
 94 |         double* lsrc = (double *)time_src;
 95 |         lsrc[0] = (1.0 * timer) / iterations;
 96 | 
 97 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
 98 |                 UPCXX_MAX, UPCXX_DOUBLE);
 99 |         if (myrank()==root) {
100 |             double* ldst = (double *)time_dst;
101 |             max_time = ldst[0];
102 |         }
103 | 
104 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
105 |                 UPCXX_MIN, UPCXX_DOUBLE);
106 |         if (myrank()==root) {
107 |             double* ldst = (double *)time_dst;
108 |             min_time = ldst[0];
109 |         }
110 | 
111 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
112 |                 UPCXX_SUM, UPCXX_DOUBLE);
113 |         if (myrank()==root) {
114 |             double* ldst = (double *)time_dst;
115 |             avg_time = ldst[0]/ranks();
116 |         }
117 | 
118 |         barrier ();
119 | 
120 |         print_data(myrank(), full, size*sizeof(char), avg_time, min_time,
121 |                 max_time, iterations);
122 |     }
123 | 
124 |     deallocate(src);
125 |     deallocate(dst);
126 |     deallocate(time_src);
127 |     deallocate(time_dst);
128 | 
129 |     finalize();
130 | 
131 |     return EXIT_SUCCESS;
132 | }
133 | 
134 | /* vi: set sw=4 sts=4 tw=80: */
135 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_alltoall.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ AlltoAll Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <upcxx.h>
 14 | #include <stdlib.h>
 15 | #include <osu_common.h>
 16 | #include <osu_coll.h>
 17 | 
 18 | #define root 0
 19 | #define VERIFY 0
 20 | 
 21 | using namespace std;
 22 | using namespace upcxx;
 23 | 
 24 | int
 25 | main (int argc, char *argv[])
 26 | {
 27 |     init(&argc, &argv);
 28 | 
 29 |     global_ptr<char> src;
 30 |     global_ptr<char> dst;
 31 |     global_ptr<double> time_src;
 32 |     global_ptr<double> time_dst;
 33 | 
 34 |     double avg_time, max_time, min_time;
 35 |     int i = 0, size;
 36 |     int skip;
 37 |     int64_t t_start = 0, t_stop = 0, timer=0;
 38 |     int max_msg_size = 1<<20, full = 0;
 39 | 
 40 |     if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) {
 41 |         return 0;
 42 |     }
 43 | 
 44 |     if (ranks() < 2) {
 45 |         if (myrank() == 0) {
 46 |             fprintf(stderr, "This test requires at least two processes\n");
 47 |         }
 48 |         return -1;
 49 |     }
 50 | 
 51 |     src = allocate<char> (myrank(), max_msg_size*sizeof(char)*ranks());
 52 |     dst = allocate<char> (myrank(), max_msg_size*sizeof(char)*ranks());
 53 | 
 54 |     assert(src != NULL);
 55 |     assert(dst != NULL);
 56 | 
 57 |     time_src = allocate<double> (myrank(), 1);
 58 |     time_dst = allocate<double> (root, 1);
 59 | 
 60 |     assert(time_src != NULL);
 61 |     assert(time_dst != NULL);
 62 | 
 63 |     /*
 64 |      * put a barrier since allocate is non-blocking in upc++
 65 |      */
 66 |     barrier();
 67 | 
 68 |     print_header(HEADER, myrank(), full);
 69 | 
 70 |     for (size=1; size <=max_msg_size; size *= 2) {
 71 |         if (size > LARGE_MESSAGE_SIZE) {
 72 |             skip = SKIP_LARGE;
 73 |             iterations = iterations_large;
 74 |         } else {
 75 |             skip = SKIP;
 76 |         }
 77 | 
 78 |         timer=0;
 79 |         for (i=0; i < iterations + skip ; i++) {
 80 |             t_start = getMicrosecondTimeStamp();
 81 |             upcxx_alltoall((char *)src, (char *)dst, size*sizeof(char));
 82 |             t_stop = getMicrosecondTimeStamp();
 83 | 
 84 |             if (i>=skip) {
 85 |                 timer+=t_stop-t_start;
 86 |             }
 87 |             barrier();
 88 |         }
 89 | 
 90 |         barrier();
 91 | 
 92 |         double* lsrc = (double *)time_src;
 93 |         lsrc[0] = (1.0 * timer) / iterations;
 94 | 
 95 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
 96 |                 UPCXX_MAX, UPCXX_DOUBLE);
 97 |         if (myrank()==root) {
 98 |             double* ldst = (double *)time_dst;
 99 |             max_time = ldst[0];
100 |         }
101 | 
102 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
103 |                 UPCXX_MIN, UPCXX_DOUBLE);
104 |         if (myrank()==root) {
105 |             double* ldst = (double *)time_dst;
106 |             min_time = ldst[0];
107 |         }
108 | 
109 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
110 |                 UPCXX_SUM, UPCXX_DOUBLE);
111 |         if (myrank()==root) {
112 |             double* ldst = (double *)time_dst;
113 |             avg_time = ldst[0]/ranks();
114 |         }
115 | 
116 |         barrier ();
117 | 
118 |         print_data(myrank(), full, size*sizeof(char), avg_time, min_time,
119 |                 max_time, iterations);
120 |     }
121 | 
122 |     deallocate(src);
123 |     deallocate(dst);
124 |     deallocate(time_src);
125 |     deallocate(time_dst);
126 | 
127 |     finalize();
128 | 
129 |     return EXIT_SUCCESS;
130 | }
131 | 
132 | /* vi: set sw=4 sts=4 tw=80: */
133 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_async_copy_get.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ Async Copy (Get) Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <upcxx.h>
 13 | #include <stdio.h>
 14 | #include <string.h>
 15 | #include <stdlib.h>
 16 | #include <osu_common.h>
 17 | 
 18 | using namespace upcxx;
 19 | 
 20 | #define VERIFY 0
 21 | #define MAX_MSG_SIZE         (1<<22)
 22 | #define SKIP_LARGE  10
 23 | #define LOOP_LARGE  100
 24 | #define LARGE_MESSAGE_SIZE  8192
 25 | 
 26 | int skip = 1000;
 27 | int loop = 10000;
 28 | 
 29 | int
 30 | main (int argc, char **argv)
 31 | {
 32 |     init(&argc, &argv);
 33 | 
 34 |     int iters=0;
 35 |     double t_start, t_end;
 36 |     int peerid = (myrank()+1)%ranks();
 37 |     int iamsender = 0;
 38 |     int i;
 39 | 
 40 |     if (ranks() == 1) {
 41 |         if (myrank() == 0) {
 42 |             fprintf(stderr, "This test requires at least two UPC threads\n");
 43 |         }
 44 |         return 0;
 45 |     }
 46 | 
 47 |     if (myrank() < ranks()/2) {
 48 |         iamsender = 1;
 49 |     }
 50 | 
 51 |     shared_array<global_ptr<char>, 1> data_ptrs (ranks());
 52 | 
 53 |     /*
 54 |      * allocate memory to each global pointer.
 55 |      */
 56 |     data_ptrs[myrank()] = allocate<char>(myrank(), sizeof(char)
 57 |             * MAX_MSG_SIZE);
 58 | 
 59 |     /*
 60 |      * put a barrier since allocate is non-blocking in upc++
 61 |      */
 62 |     barrier();
 63 | 
 64 |     /*
 65 |      * my peer's pointer from where I will memget.
 66 |      */
 67 |     global_ptr<char> remote = data_ptrs[peerid];
 68 | 
 69 |     /*
 70 |      * cast my global pointer to a local pointer.
 71 |      */
 72 |     global_ptr<char> local = (global_ptr<char>)data_ptrs[myrank()];
 73 | 
 74 |     barrier();
 75 | 
 76 |     if (!myrank()) {
 77 |         fprintf(stdout, HEADER);
 78 |         fprintf(stdout, "# [ pairs: %d ]\n", ranks()/2);
 79 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
 80 |         fflush(stdout);
 81 |     }
 82 | 
 83 |     for (int size = 1; size <= MAX_MSG_SIZE; size*=2) {
 84 |         if (iamsender) {
 85 |             for (i = 0; i < size; i++) {
 86 |                 char *lptr = (char *)local;
 87 |                 lptr[i] = 'a';
 88 |             }
 89 |         } else {
 90 |             for (i = 0; i < size; i++) {
 91 |                 char *lptr = (char *)local;
 92 |                 lptr[i] = 'b';
 93 |             }
 94 |         }
 95 | 
 96 |         barrier();
 97 | 
 98 |         if (size > LARGE_MESSAGE_SIZE) {
 99 |             loop = LOOP_LARGE;
100 |             skip = SKIP_LARGE;
101 |         }
102 | 
103 |         if (iamsender) {
104 |             for ( i = 0; i < loop + skip; i++) {
105 |                 if (i == skip) {
106 |                     barrier();
107 |                     t_start = getMicrosecondTimeStamp();
108 |                 }
109 | 
110 |                 async_copy(remote, local, size);
111 |             }
112 | 
113 |             async_wait();
114 |             barrier();
115 | 
116 |             t_end = getMicrosecondTimeStamp();
117 |             if (!myrank()) {
118 |                 double latency = (t_end - t_start)/(1.0 * loop);
119 |                 fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
120 |                         FLOAT_PRECISION, latency);
121 |                 fflush(stdout);
122 |             }
123 |         } else {
124 |             barrier();
125 |             barrier();
126 |         }
127 |     }
128 | 
129 |     if (VERIFY) {
130 |         if (iamsender) {
131 |             /*
132 |              * my local and my remote ptr should have same data
133 |              */
134 |             char *lptr = (char *)local;
135 |             for (int i = 0; i < MIN(20, MAX_MSG_SIZE); i++) {
136 |                 printf("sender_rank():%d --- lptr[%d]=%c , rptr[%d]=%c \n",
137 |                         myrank(), i, lptr[i], i, (char)remote[i]);
138 | 
139 |             }
140 |         }
141 |     }
142 | 
143 |     deallocate(local);
144 |     barrier();
145 |     finalize();
146 | 
147 |     return 0;
148 | }
149 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_async_copy_put.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ Async Copy (Put) Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <upcxx.h>
 13 | #include <stdio.h>
 14 | #include <string.h>
 15 | #include <stdlib.h>
 16 | #include <osu_common.h>
 17 | 
 18 | using namespace upcxx;
 19 | 
 20 | #define VERIFY 0
 21 | #define MAX_MSG_SIZE         (1<<22)
 22 | #define SKIP_LARGE  10
 23 | #define LOOP_LARGE  100
 24 | #define LARGE_MESSAGE_SIZE  8192
 25 | 
 26 | int skip = 1000;
 27 | int loop = 10000;
 28 | 
 29 | int
 30 | main (int argc, char **argv)
 31 | {
 32 |     init(&argc, &argv);
 33 | 
 34 |     int iters=0;
 35 |     double t_start, t_end;
 36 |     int peerid = (ranks()+1)%ranks();
 37 |     int iamsender = 0;
 38 |     int i;
 39 | 
 40 |     if (ranks() == 1) {
 41 |         if (myrank() == 0) {
 42 |             fprintf(stderr, "This test requires at least two UPC threads\n");
 43 |         }
 44 |         return 0;
 45 |     }
 46 | 
 47 |     if (myrank() < ranks()/2) {
 48 |         iamsender = 1;
 49 |     }
 50 | 
 51 |     /*
 52 |      * a shared array of global pointers.
 53 |      */
 54 |     shared_array<global_ptr<char>, 1> data_ptrs (ranks());
 55 | 
 56 |     /*
 57 |      * allocate memory to each global pointer.
 58 |      */
 59 |     data_ptrs[myrank()] = allocate<char>(myrank(), sizeof(char) * MAX_MSG_SIZE);
 60 | 
 61 |     /*
 62 |      * put a barrier since allocate is non-blocking in upc++
 63 |      */
 64 |     barrier();
 65 | 
 66 |     /*
 67 |      * my peer's pointer from where I will memput.
 68 |      */
 69 |     global_ptr<char> remote = data_ptrs[peerid];
 70 | 
 71 |     /*
 72 |      * cast my global pointer to a local pointer.
 73 |      */
 74 |     global_ptr<char> local = data_ptrs[myrank()];
 75 | 
 76 |     barrier();
 77 | 
 78 |     if (!myrank()) {
 79 |         fprintf(stdout, HEADER);
 80 |         fprintf(stdout, "# [ pairs: %d ]\n", ranks()/2);
 81 |         fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH,
 82 |                 "Latency (us)");
 83 |         fflush(stdout);
 84 |     }
 85 | 
 86 |     for (int size = 1; size <= MAX_MSG_SIZE; size*=2) {
 87 |         if (iamsender) {
 88 |             for(i = 0; i < size; i++) {
 89 |                 char *lptr = (char *)local;
 90 |                 lptr[i] = 'a';
 91 |             }
 92 |         } else {
 93 |             for(i = 0; i < size; i++) {
 94 |                 char *lptr = (char *)local;
 95 |                 lptr[i] = 'b';
 96 |             }
 97 |         }
 98 | 
 99 |         barrier();
100 | 
101 |         if (size > LARGE_MESSAGE_SIZE) {
102 |             loop = LOOP_LARGE;
103 |             skip = SKIP_LARGE;
104 |         }
105 | 
106 |         if (iamsender) {
107 |             for (i = 0; i < loop + skip; i++) {
108 |                 if(i == skip) {
109 |                     barrier();
110 |                     t_start = getMicrosecondTimeStamp();
111 |                 }
112 | 
113 |                 async_copy(local, remote, size);
114 |             }
115 |             async_wait();
116 | 
117 |             barrier();
118 | 
119 |             t_end = getMicrosecondTimeStamp();
120 | 
121 |             if (!myrank()) {
122 |                 double latency = (t_end - t_start)/(1.0 * loop);
123 |                 fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
124 |                         FLOAT_PRECISION, latency);
125 |                 fflush(stdout);
126 |             }
127 |         } else {
128 |             barrier();
129 |             barrier();
130 |         }
131 |     }
132 | 
133 |     if (VERIFY) {
134 |         if (iamsender) {
135 |             /*
136 |              * my local and my remote ptr should have same data
137 |              */
138 |             char *lptr = (char *)local;
139 |             for (int i = 0; i < MIN(20, MAX_MSG_SIZE); i++) {
140 |                 printf ("sender_rank():%d --- lptr[%d]=%c , rptr[%d]=%c \n",
141 |                         myrank(), i, lptr[i], i, (char)remote[i]);
142 |             }
143 |         }
144 |     }
145 | 
146 |     deallocate(local);
147 |     barrier();
148 |     finalize();
149 | 
150 |     return 0;
151 | }
152 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_bcast.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ Broadcast Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <upcxx.h>
 14 | #include <stdlib.h>
 15 | #include <osu_common.h>
 16 | #include <osu_coll.h>
 17 | 
 18 | using namespace std;
 19 | using namespace upcxx;
 20 | 
 21 | #define root 0
 22 | #define VERIFY 0
 23 | 
 24 | int
 25 | main (int argc, char *argv[])
 26 | {
 27 |     init(&argc, &argv);
 28 | 
 29 |     global_ptr<char> src;
 30 |     global_ptr<char> dst;
 31 |     global_ptr<double> time_src;
 32 |     global_ptr<double> time_dst;
 33 | 
 34 |     double avg_time, max_time, min_time;
 35 |     int i = 0, size;
 36 |     int skip;
 37 |     int64_t t_start = 0, t_stop = 0, timer=0;
 38 |     int max_msg_size = 1<<20, full = 0;
 39 | 
 40 |     if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) {
 41 |         return 0;
 42 |     }
 43 | 
 44 |     if (ranks() < 2) {
 45 |         if (myrank() == 0) {
 46 |             fprintf(stderr, "This test requires at least two processes\n");
 47 |         }
 48 |         return -1;
 49 |     }
 50 | 
 51 |     src = allocate<char> (root, max_msg_size*sizeof(char));
 52 |     dst = allocate<char> (myrank(), max_msg_size*sizeof(char));
 53 | 
 54 |     assert(src != NULL);
 55 |     assert(dst != NULL);
 56 | 
 57 |     time_src = allocate<double> (myrank(), 1);
 58 |     time_dst = allocate<double> (root, 1);
 59 | 
 60 |     assert(time_src != NULL);
 61 |     assert(time_dst != NULL);
 62 | 
 63 |     /*
 64 |      * put a barrier since allocate is non-blocking in upc++
 65 |      */
 66 |     barrier();
 67 | 
 68 |     print_header(HEADER, myrank(), full);
 69 | 
 70 |     for (size=1; size <=max_msg_size; size *= 2) {
 71 |         if (size > LARGE_MESSAGE_SIZE) {
 72 |             skip = SKIP_LARGE;
 73 |             iterations = iterations_large;
 74 |         } else {
 75 |             skip = SKIP;
 76 |         }
 77 | 
 78 |         timer=0;
 79 |         for (i=0; i < iterations + skip ; i++) {
 80 |             t_start = getMicrosecondTimeStamp();
 81 |             upcxx_bcast((char *)src, (char *)dst, size*sizeof(char), root);
 82 |             t_stop = getMicrosecondTimeStamp();
 83 | 
 84 |             if (i>=skip) {
 85 |                 timer+=t_stop-t_start;
 86 |             }
 87 |             barrier();
 88 |         }
 89 | 
 90 |         barrier();
 91 | 
 92 |         double* lsrc = (double *)time_src;
 93 |         lsrc[0] = (1.0 * timer) / iterations;
 94 | 
 95 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
 96 |                 UPCXX_MAX, UPCXX_DOUBLE);
 97 |         if (myrank()==root) {
 98 |             double* ldst = (double *)time_dst;
 99 |             max_time = ldst[0];
100 |         }
101 | 
102 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
103 |                 UPCXX_MIN, UPCXX_DOUBLE);
104 |         if (myrank()==root) {
105 |             double* ldst = (double *)time_dst;
106 |             min_time = ldst[0];
107 |         }
108 | 
109 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
110 |                 UPCXX_SUM, UPCXX_DOUBLE);
111 |         if (myrank()==root) {
112 |             double* ldst = (double *)time_dst;
113 |             avg_time = ldst[0]/ranks();
114 |         }
115 | 
116 |         barrier();
117 | 
118 |         print_data(myrank(), full, size*sizeof(char), avg_time, min_time,
119 |                 max_time, iterations);
120 |     }
121 | 
122 |     deallocate(src);
123 |     deallocate(dst);
124 |     deallocate(time_src);
125 |     deallocate(time_dst);
126 | 
127 |     finalize();
128 | 
129 |     return EXIT_SUCCESS;
130 | }
131 | 
132 | /* vi: set sw=4 sts=4 tw=80: */
133 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_gather.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ Gather Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <upcxx.h>
 14 | #include <stdlib.h>
 15 | #include <osu_common.h>
 16 | #include <osu_coll.h>
 17 | 
 18 | using namespace std;
 19 | using namespace upcxx;
 20 | 
 21 | #define root 0
 22 | #define VERIFY 1
 23 | 
 24 | shared_lock sl;
 25 | 
 26 | int
 27 | main (int argc, char *argv[])
 28 | {
 29 |     init(&argc, &argv);
 30 | 
 31 |     global_ptr<char> src;
 32 |     global_ptr<char> dst;
 33 |     global_ptr<double> time_src;
 34 |     global_ptr<double> time_dst;
 35 | 
 36 |     double avg_time, max_time, min_time;
 37 |     int i = 0, size;
 38 |     int skip;
 39 |     int64_t t_start = 0, t_stop = 0, timer=0;
 40 |     int max_msg_size = 1<<20, full = 0;
 41 | 
 42 |     if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) {
 43 |         return 0;
 44 |     }
 45 | 
 46 |     if(ranks() < 2) {
 47 |         if(myrank() == 0) {
 48 |             fprintf(stderr, "This test requires at least two processes\n");
 49 |         }
 50 |         return -1;
 51 |     }
 52 | 
 53 |     src = allocate<char> (myrank(), max_msg_size*sizeof(char));
 54 |     dst = allocate<char> (root, max_msg_size*sizeof(char)*ranks());
 55 | 
 56 |     assert(src != NULL);
 57 |     assert(dst != NULL);
 58 | 
 59 |     time_src = allocate<double> (myrank(), 1);
 60 |     time_dst = allocate<double> (root, 1);
 61 | 
 62 |     assert(time_src != NULL);
 63 |     assert(time_dst != NULL);
 64 | 
 65 |     /*
 66 |      * put a barrier since allocate is non-blocking in upc++
 67 |      */
 68 |     barrier();
 69 | 
 70 |     print_header(HEADER, myrank(), full);
 71 | 
 72 |     for (size=1; size <=max_msg_size; size *= 2) {
 73 |         if (size > LARGE_MESSAGE_SIZE) {
 74 |             skip = SKIP_LARGE;
 75 |             iterations = iterations_large;
 76 |         } else {
 77 |             skip = SKIP;
 78 |         }
 79 | 
 80 |         timer=0;
 81 |         for(i=0; i < iterations + skip ; i++) {
 82 |             t_start = getMicrosecondTimeStamp();
 83 |             upcxx_gather((char *)src, (char *)dst, size*sizeof(char), root);
 84 |             t_stop = getMicrosecondTimeStamp();
 85 | 
 86 |             if (i>=skip) {
 87 |                 timer+=t_stop-t_start;
 88 |             }
 89 |             barrier();
 90 |         }
 91 | 
 92 |         barrier();
 93 | 
 94 |         double* lsrc = (double *)time_src;
 95 |         lsrc[0] = (1.0 * timer) / iterations;
 96 | 
 97 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
 98 |                 UPCXX_MAX, UPCXX_DOUBLE);
 99 |         if (myrank()==root) {
100 |             double* ldst = (double *)time_dst;
101 |             max_time = ldst[0];
102 |         }
103 | 
104 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
105 |                 UPCXX_MIN, UPCXX_DOUBLE);
106 |         if (myrank()==root) {
107 |             double* ldst = (double *)time_dst;
108 |             min_time = ldst[0];
109 |         }
110 | 
111 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
112 |                 UPCXX_SUM, UPCXX_DOUBLE);
113 |         if (myrank()==root) {
114 |             double* ldst = (double *)time_dst;
115 |             avg_time = ldst[0]/ranks();
116 |         }
117 | 
118 |         barrier();
119 | 
120 |         print_data(myrank(), full, size*sizeof(char), avg_time, min_time,
121 |                 max_time, iterations);
122 |     }
123 | 
124 |     deallocate(src);
125 |     deallocate(dst);
126 |     deallocate(time_src);
127 |     deallocate(time_dst);
128 | 
129 |     finalize();
130 | 
131 |     return EXIT_SUCCESS;
132 | }
133 | 
134 | /* vi: set sw=4 sts=4 tw=80: */
135 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_reduce.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ Reduce Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <upcxx.h>
 14 | #include <stdlib.h>
 15 | #include <osu_common.h>
 16 | #include <osu_coll.h>
 17 | 
 18 | using namespace std;
 19 | using namespace upcxx;
 20 | 
 21 | #define root 0
 22 | #define VERIFY 0
 23 | 
 24 | int
 25 | main (int argc, char *argv[])
 26 | {
 27 |     init(&argc, &argv);
 28 | 
 29 |     global_ptr<char> src;
 30 |     global_ptr<char> dst;
 31 |     global_ptr<double> time_src;
 32 |     global_ptr<double> time_dst;
 33 | 
 34 |     double avg_time, max_time, min_time;
 35 |     int i = 0, size;
 36 |     int skip;
 37 |     int64_t t_start = 0, t_stop = 0, timer=0;
 38 |     int max_msg_size = 1<<20, full = 0;
 39 | 
 40 |     if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) {
 41 |         return 0;
 42 |     }
 43 | 
 44 |     if (ranks() < 2) {
 45 |         if (myrank() == 0) {
 46 |             fprintf(stderr, "This test requires at least two processes\n");
 47 |         }
 48 |         return -1;
 49 |     }
 50 | 
 51 |     src = allocate<char> (myrank(), max_msg_size*sizeof(char));
 52 |     dst = allocate<char> (root, max_msg_size*sizeof(char));
 53 | 
 54 |     assert(src != NULL);
 55 |     assert(dst != NULL);
 56 | 
 57 |     time_src = allocate<double> (myrank(), 1);
 58 |     time_dst = allocate<double> (root, 1);
 59 | 
 60 |     assert(time_src != NULL);
 61 |     assert(time_dst != NULL);
 62 | 
 63 |     /*
 64 |      * put a barrier since allocate is non-blocking in upc++
 65 |      */
 66 |     barrier();
 67 | 
 68 |     print_header(HEADER, myrank(), full);
 69 | 
 70 |     for (size=1; size <=max_msg_size; size *= 2) {
 71 |         if (size > LARGE_MESSAGE_SIZE) {
 72 |             skip = SKIP_LARGE;
 73 |             iterations = iterations_large;
 74 |         } else {
 75 |             skip = SKIP;
 76 |         }
 77 | 
 78 |         timer=0;
 79 |         for (i=0; i < iterations + skip ; i++) {
 80 |             t_start = getMicrosecondTimeStamp();
 81 |             upcxx_reduce<char>((char *)src, (char *)dst, size*sizeof(char),
 82 |                     root, UPCXX_SUM, UPCXX_CHAR);
 83 |             t_stop = getMicrosecondTimeStamp();
 84 | 
 85 |             if (i>=skip){
 86 |                 timer+=t_stop-t_start;
 87 |             }
 88 |             barrier();
 89 |         }
 90 | 
 91 |         barrier();
 92 | 
 93 |         double* lsrc = (double *)time_src;
 94 |         lsrc[0] = (1.0 * timer) / iterations;
 95 | 
 96 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
 97 |                 UPCXX_MAX, UPCXX_DOUBLE);
 98 |         if (myrank()==root) {
 99 |             double* ldst = (double *)time_dst;
100 |             max_time = ldst[0];
101 |         }
102 | 
103 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
104 |                 UPCXX_MIN, UPCXX_DOUBLE);
105 |         if (myrank()==root) {
106 |             double* ldst = (double *)time_dst;
107 |             min_time = ldst[0];
108 |         }
109 | 
110 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
111 |                 UPCXX_SUM, UPCXX_DOUBLE);
112 |         if (myrank()==root) {
113 |             double* ldst = (double *)time_dst;
114 |             avg_time = ldst[0]/ranks();
115 |         }
116 | 
117 |         barrier();
118 | 
119 |         print_data(myrank(), full, size*sizeof(char), avg_time, min_time,
120 |                 max_time, iterations);
121 |     }
122 | 
123 |     deallocate(src);
124 |     deallocate(dst);
125 |     deallocate(time_src);
126 |     deallocate(time_dst);
127 | 
128 |     finalize();
129 | 
130 |     return EXIT_SUCCESS;
131 | }
132 | 
133 | /* vi: set sw=4 sts=4 tw=80: */
134 | 


--------------------------------------------------------------------------------
/upcxx/osu_upcxx_scatter.cpp:
--------------------------------------------------------------------------------
  1 | #define BENCHMARK "OSU UPC++ Scatter Latency Test"
  2 | /*
  3 |  * Copyright (C) 2002-2015 the Network-Based Computing Laboratory
  4 |  * (NBCL), The Ohio State University.
  5 |  *
  6 |  * Contact: Dr. D. K. Panda (panda@cse.ohio-state.edu)
  7 |  *
  8 |  * For detailed copyright and licensing information, please refer to the
  9 |  * copyright file COPYRIGHT in the top level OMB directory.
 10 |  */
 11 | 
 12 | #include <stdio.h>
 13 | #include <upcxx.h>
 14 | #include <stdlib.h>
 15 | #include <osu_common.h>
 16 | #include <osu_coll.h>
 17 | 
 18 | using namespace std;
 19 | using namespace upcxx;
 20 | 
 21 | #define root 0
 22 | #define VERIFY 0
 23 | 
 24 | int
 25 | main (int argc, char *argv[])
 26 | {
 27 |     init(&argc, &argv);
 28 | 
 29 |     global_ptr<char> src;
 30 |     global_ptr<char> dst;
 31 |     global_ptr<double> time_src;
 32 |     global_ptr<double> time_dst;
 33 | 
 34 |     double avg_time, max_time, min_time;
 35 |     int i = 0, size;
 36 |     int skip;
 37 |     int64_t t_start = 0, t_stop = 0, timer=0;
 38 |     int max_msg_size = 1<<20, full = 0;
 39 | 
 40 |     if (process_args(argc, argv, myrank(), &max_msg_size, &full, HEADER)) {
 41 |         return 0;
 42 |     }
 43 | 
 44 |     if (ranks() < 2) {
 45 |         if (myrank() == 0) {
 46 |             fprintf(stderr, "This test requires at least two processes\n");
 47 |         }
 48 |         return -1;
 49 |     }
 50 | 
 51 |     src = allocate<char> (root, max_msg_size*sizeof(char)*ranks());
 52 |     dst = allocate<char> (myrank(), max_msg_size*sizeof(char));
 53 | 
 54 |     assert(src != NULL);
 55 |     assert(dst != NULL);
 56 | 
 57 |     time_src = allocate<double> (myrank(), 1); //for each node's local result
 58 |     time_dst = allocate<double> (root, 1); //for reduction result on root
 59 | 
 60 |     assert(time_src != NULL);
 61 |     assert(time_dst != NULL);
 62 | 
 63 |     /*
 64 |      * put a barrier since allocate is non-blocking in upc++
 65 |      */
 66 |     barrier();
 67 | 
 68 |     print_header(HEADER, myrank(), full);
 69 | 
 70 |     for (size=1; size <=max_msg_size; size *= 2) {
 71 |         if (size > LARGE_MESSAGE_SIZE) {
 72 |             skip = SKIP_LARGE;
 73 |             iterations = iterations_large;
 74 |         } else {
 75 |             skip = SKIP;
 76 |         }
 77 | 
 78 |         timer=0;
 79 |         for (i=0; i < iterations + skip ; i++) {
 80 |             t_start = getMicrosecondTimeStamp();
 81 |             upcxx_scatter((char *)src, (char *)dst, size*sizeof(char), root);
 82 |             t_stop = getMicrosecondTimeStamp();
 83 | 
 84 |             if (i>=skip) {
 85 |                 timer+=t_stop-t_start;
 86 |             }
 87 |             barrier();
 88 |         }
 89 | 
 90 |         barrier();
 91 | 
 92 |         double* lsrc = (double *)time_src;
 93 |         lsrc[0] = (1.0 * timer) / iterations;
 94 | 
 95 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
 96 |                 UPCXX_MAX, UPCXX_DOUBLE);
 97 |         if (myrank()==root) {
 98 |             double* ldst = (double *)time_dst;
 99 |             max_time = ldst[0];
100 |         }
101 | 
102 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
103 |                 UPCXX_MIN, UPCXX_DOUBLE);
104 |         if (myrank()==root) {
105 |             double* ldst = (double *)time_dst;
106 |             min_time = ldst[0];
107 |         }
108 | 
109 |         upcxx_reduce<double>((double *)time_src, (double *)time_dst, 1, root,
110 |                 UPCXX_SUM, UPCXX_DOUBLE);
111 |         if (myrank()==root) {
112 |             double* ldst = (double *)time_dst;
113 |             avg_time = ldst[0]/ranks();
114 |         }
115 | 
116 |         barrier();
117 | 
118 |         print_data(myrank(), full, size*sizeof(char), avg_time, min_time,
119 |                 max_time, iterations);
120 |     }
121 | 
122 |     deallocate(src);
123 |     deallocate(dst);
124 |     deallocate(time_src);
125 |     deallocate(time_dst);
126 | 
127 |     finalize();
128 | 
129 |     return EXIT_SUCCESS;
130 | }
131 | 
132 | /* vi: set sw=4 sts=4 tw=80: */
133 | 


--------------------------------------------------------------------------------