├── Makefile.am
├── src
    ├── opencl
    │   ├── level2
    │   │   ├── Makefile.am
    │   │   └── s3d
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │       └── Makefile.am
    │   │   │   └── rdwdot2.cl
    │   ├── Makefile.am
    │   ├── level1
    │   │   ├── Makefile.am
    │   │   ├── sort
    │   │   │   ├── Sort.h
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── stencil2d
    │   │   │   ├── OpenCLStencilFactory.h
    │   │   │   ├── Makefile.am
    │   │   │   ├── README.txt
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── OpenCLStencilFactory.cpp
    │   │   │   ├── MPIOpenCLStencilFactory.h
    │   │   │   ├── CommonOpenCLStencilFactory.h
    │   │   │   ├── MPIOpenCLStencil.h
    │   │   │   ├── OpenCLStencil.h
    │   │   │   └── CommonOpenCLStencilFactory.cpp
    │   │   ├── md
    │   │   │   ├── MD.h
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── md.cl
    │   │   ├── md5hash
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── triad
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── gemm
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── scan
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── TPScan.h
    │   │   ├── fft
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── fftlib.h
    │   │   ├── spmv
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── reduction
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── reduction.cl
    │   │   └── bfs
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │       └── Makefile.am
    │   │   │   └── bfs_iiit.cl
    │   ├── common
    │   │   ├── Makefile.am
    │   │   ├── PMSMemMgmt.h
    │   │   ├── OpenCLNodePlatformContainer.h
    │   │   └── OpenCLPlatform.h
    │   └── level0
    │   │   ├── Makefile.am
    │   │   └── epmpi
    │   │       └── Makefile.am
    ├── cuda
    │   ├── Makefile.am
    │   ├── level2
    │   │   ├── Makefile.am
    │   │   ├── qtclustering
    │   │   │   ├── tuningParameters.h
    │   │   │   ├── libdata.h
    │   │   │   ├── qtc_common.h
    │   │   │   ├── qtclib.h
    │   │   │   ├── comm.h
    │   │   │   ├── Makefile.am
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── comm.cpp
    │   │   └── s3d
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │       └── Makefile.am
    │   │   │   └── gr_base.h
    │   ├── level1
    │   │   ├── Makefile.am
    │   │   ├── neuralnet
    │   │   │   ├── nn_data.zip
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── Makefile.am
    │   │   ├── spmv
    │   │   │   ├── Spmv.h
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── scan
    │   │   │   ├── Scan.h
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── TPScan.h
    │   │   │   └── tpScanLaunchKernel.cu
    │   │   ├── stencil2d
    │   │   │   ├── CUDAStencilFactory.h
    │   │   │   ├── Makefile.am
    │   │   │   ├── CUDAStencil.cpp
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── MPICUDAStencilFactory.h
    │   │   │   ├── CUDAStencil.h
    │   │   │   ├── CommonCUDAStencilFactory.h
    │   │   │   ├── CUDAStencilFactory.cpp
    │   │   │   └── MPICUDAStencil.h
    │   │   ├── md
    │   │   │   ├── MD.h
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── triad
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── md5hash
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── sort
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── Sort.h
    │   │   │   └── sort_kernel.h
    │   │   ├── bfs
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │   │   └── Makefile.am
    │   │   ├── fft
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   └── fftlib.h
    │   │   ├── reduction
    │   │   │   ├── Makefile.am
    │   │   │   ├── epmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── tpmpi
    │   │   │   │   └── Makefile.am
    │   │   │   ├── TPReduction.h
    │   │   │   └── tpRedLaunchKernel.cu
    │   │   └── gemm
    │   │   │   ├── Makefile.am
    │   │   │   └── epmpi
    │   │   │       └── Makefile.am
    │   ├── common
    │   │   ├── CUDAPMSMemMgr.h
    │   │   ├── PMSMemMgmt.h
    │   │   ├── support.h
    │   │   └── cudacommon.h
    │   └── level0
    │   │   ├── Makefile.am
    │   │   └── epmpi
    │   │       └── Makefile.am
    ├── mpi
    │   ├── Makefile.am
    │   ├── contention
    │   │   ├── Makefile.am
    │   │   ├── README
    │   │   ├── cuda
    │   │   │   └── Makefile.am
    │   │   └── opencl
    │   │   │   └── Makefile.am
    │   ├── contention-mt
    │   │   ├── Makefile.am
    │   │   ├── README
    │   │   ├── cuda
    │   │   │   └── Makefile.am
    │   │   └── opencl
    │   │   │   └── Makefile.am
    │   └── common
    │   │   ├── Makefile.am
    │   │   ├── NodeInfo.cpp
    │   │   ├── GetMPIType.h
    │   │   ├── RandomPairs.h
    │   │   ├── MPIHostStencilFactory.h
    │   │   ├── MPIHostStencil.h
    │   │   ├── MPIStencilUtil.h
    │   │   ├── MPIHostStencilFactory.cpp
    │   │   ├── ParallelHelpers.h
    │   │   ├── MPIHostStencil.cpp
    │   │   └── MPIStencilUtil.cpp
    ├── common
    │   ├── Matrix2DStatics.cpp
    │   ├── ProgressBar.cpp
    │   ├── CTimer.cpp
    │   ├── Makefile.am
    │   ├── Option.cpp
    │   ├── InvalidArgValue.h
    │   ├── PMSMemMgr.h
    │   ├── InvalidArgValue.cpp
    │   ├── CTimer.h
    │   ├── NodeIDList.h
    │   ├── HostStencilFactory.cpp
    │   ├── BadCommandLine.h
    │   ├── Option.h
    │   ├── HostStencilFactory.h
    │   ├── StencilUtil.cpp
    │   ├── HostStencil.h
    │   ├── SerializableObject.h
    │   ├── ValidateMatrix2D.cpp
    │   ├── Graph.h
    │   ├── SerialStencilUtil.cpp
    │   ├── InitializeMatrix2D.h
    │   ├── SerialStencilUtil.h
    │   ├── StencilFactory.cpp
    │   ├── StencilUtil.h
    │   ├── Matrix2DFileSupport.cpp
    │   ├── StencilFactory.h
    │   ├── Stencil.h
    │   ├── ValidateMatrix2D.h
    │   ├── Matrix2D.cpp
    │   ├── HostStencil.cpp
    │   ├── OptionParser.h
    │   ├── Timer.h
    │   ├── Utility.h
    │   └── ProgressBar.h
    ├── Makefile.am
    └── stability
    │   ├── Stability.h
    │   ├── Makefile.am
    │   └── epmpi
    │       └── Makefile.am
├── config
    ├── config.mk.in
    ├── conf-cation.sh
    ├── conf-lens.sh
    ├── conf-llano.sh
    ├── conf-ion.sh
    ├── conf-linux.sh
    ├── dirtargets.mk.in
    ├── conf-valhalla.sh
    ├── conf-keeneland.sh
    ├── conf-newark.sh
    ├── conf-atlanta.sh
    ├── conf-crossarm.sh
    ├── common.mk.in
    ├── conf-titan.sh
    ├── conf-osx.sh
    └── find_cuda_libs.sh
├── doc
    ├── shoc-manual.pdf
    └── Makefile.am
├── tools
    ├── shocdriver.in
    ├── Makefile.am
    ├── numatest.pl
    └── prettyPrint.pl
├── .gitignore
├── data
    ├── platforms.csv
    ├── devices.csv
    └── REPORTING_RESULTS
├── README.txt
├── LICENSE.txt
└── LICENSE-CUDPP.txt


/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS	= src tools doc
2 | 


--------------------------------------------------------------------------------
/src/opencl/level2/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=s3d
2 | 


--------------------------------------------------------------------------------
/src/cuda/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=level0 level1 level2
2 | 


--------------------------------------------------------------------------------
/src/cuda/level2/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=s3d qtclustering
2 | 


--------------------------------------------------------------------------------
/src/mpi/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=common contention contention-mt
2 | 


--------------------------------------------------------------------------------
/src/opencl/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=common level0 level1 level2
2 | 


--------------------------------------------------------------------------------
/config/config.mk.in:
--------------------------------------------------------------------------------
1 | include $(top_builddir)/config/common.mk
2 | 


--------------------------------------------------------------------------------
/doc/shoc-manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vetter/shoc/HEAD/doc/shoc-manual.pdf


--------------------------------------------------------------------------------
/tools/shocdriver.in:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | perl SHOC_LIBEXEC_DIR/driver.pl -bindir SHOC_BIN_DIR $*
4 | 
5 | 


--------------------------------------------------------------------------------
/src/opencl/level1/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=bfs fft gemm stencil2d reduction scan sort triad md spmv md5hash
2 | 


--------------------------------------------------------------------------------
/src/cuda/level1/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=bfs fft gemm stencil2d md reduction scan sort spmv triad md5hash neuralnet
2 | 


--------------------------------------------------------------------------------
/src/cuda/level1/neuralnet/nn_data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vetter/shoc/HEAD/src/cuda/level1/neuralnet/nn_data.zip


--------------------------------------------------------------------------------
/src/common/Matrix2DStatics.cpp:
--------------------------------------------------------------------------------
1 | #include "Matrix2D.h"
2 | 
3 | template<> PMSMemMgr<float>* Matrix2D<float>::pmsmm = 0;
4 | template<> PMSMemMgr<double>* Matrix2D<double>::pmsmm = 0;
5 | 
6 | 


--------------------------------------------------------------------------------
/src/mpi/contention/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | if BUILD_CUDA
 3 |     MAYBE_CUDA = cuda
 4 | endif
 5 | 
 6 | if BUILD_OPENCL
 7 |     MAYBE_OPENCL = opencl
 8 | endif
 9 | 
10 | SUBDIRS=$(MAYBE_OPENCL) $(MAYBE_CUDA)
11 | 
12 | 


--------------------------------------------------------------------------------
/src/mpi/contention-mt/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | if BUILD_CUDA
 3 |     MAYBE_CUDA = cuda
 4 | endif
 5 | 
 6 | if BUILD_OPENCL
 7 |     MAYBE_OPENCL = opencl
 8 | endif
 9 | 
10 | SUBDIRS=$(MAYBE_OPENCL) $(MAYBE_CUDA)
11 | 
12 | 


--------------------------------------------------------------------------------
/config/conf-cation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | sh ./configure \
 4 |   CPPFLAGS="-I/opt/cuda-4.0/cuda/include" \
 5 |   LDFLAGS="-L/opt/cuda-4.0/cuda/lib64"    
 6 | 
 7 | 
 8 | # other useful options
 9 | #    --disable-stability
10 | 
11 | 


--------------------------------------------------------------------------------
/src/common/ProgressBar.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "ProgressBar.h"
3 | 
4 | // initialize static members of the ProgressBar class.
5 | const char ProgressBar::barDone[81] = "================================================================================";
6 | 
7 | 


--------------------------------------------------------------------------------
/src/cuda/level1/spmv/Spmv.h:
--------------------------------------------------------------------------------
 1 | #ifndef SPMV_H_
 2 | #define SPMV_H_
 3 | 
 4 | // Block size
 5 | static const int BLOCK_SIZE = 128;
 6 | static const int WARP_SIZE = 32;
 7 | 
 8 | enum kernelType{CSR_SCALAR, CSR_VECTOR, ELLPACKR};
 9 | 
10 | #endif // SPMV_H_
11 | 


--------------------------------------------------------------------------------
/config/conf-lens.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | 
 4 | CUDA_ROOT=/sw/analysis-x64/cuda/3.2/sl5.0_binary
 5 | 
 6 | # do the actual configuration
 7 | sh ./configure \
 8 | CPPFLAGS="-I$CUDA_ROOT/include" \
 9 | PATH"=$CUDA_ROOT/bin:$PATH" \
10 |     --disable-stability
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/config/conf-llano.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | OCL_ROOT=/opt/AMDAPP
 4 | 
 5 | # do the actual configuration
 6 | sh ./configure \
 7 |     CPPFLAGS="-I$OCL_ROOT/include" \
 8 |     LDFLAGS="-L$OCL_ROOT/lib/x86_64" \
 9 |     --without-cuda \
10 |     --disable-stability
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/tuningParameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TUNINGPARAMETERS_H_
 2 | #define _TUNINGPARAMETERS_H_
 3 | 
 4 | #define THREADSPERBLOCK     64
 5 | 
 6 | #define SM_COUNT 16
 7 | #define OVR_SBSCR_FACTOR 16
 8 | 
 9 | #define GPU_MIN_SATURATION_FACTOR 32
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/src/cuda/level1/scan/Scan.h:
--------------------------------------------------------------------------------
 1 | #ifndef SCAN_H_
 2 | #define SCAN_H_
 3 | 
 4 | template <class T>
 5 | bool scanCPU(T *data, T* reference, T* dev_result, const size_t size);
 6 | 
 7 | template <class T, class vecT>
 8 | void RunTest(string testName, ResultDatabase &resultDB, OptionParser &op);
 9 | 
10 | #endif // SCAN_H_
11 | 


--------------------------------------------------------------------------------
/src/opencl/common/Makefile.am:
--------------------------------------------------------------------------------
 1 | include ${top_builddir}/config/common.mk
 2 | include ${top_builddir}/config/targets.mk
 3 | 
 4 | noinst_LIBRARIES	= libSHOCCommonOpenCL.a
 5 | libSHOCCommonOpenCL_a_SOURCES = OpenCLDeviceInfo.cpp \
 6 | 	OpenCLPlatform.cpp \
 7 | 	OpenCLNodePlatformContainer.cpp \
 8 | 	Event.cpp
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/mpi/contention-mt/README:
--------------------------------------------------------------------------------
1 |             Over-subscribed, multi-threaded contention benchmark
2 | 
3 | The benchmark counts the number of GPU devices and spawns threads to do execute
4 | GPU benchmark and the main processes do the MPI Latency test. Sequential runs
5 | are initially performed to report the base case numbers. 
6 | 


--------------------------------------------------------------------------------
/src/common/CTimer.cpp:
--------------------------------------------------------------------------------
 1 | #include "CTimer.h"
 2 | #include "Timer.h"
 3 | 
 4 | int
 5 | Timer_Start()
 6 | {
 7 |     return Timer::Start();
 8 | }
 9 | 
10 | double
11 | Timer_Stop(int h, const char *d)
12 | {
13 |     return Timer::Stop(h,d);
14 | }
15 | 
16 | void
17 | Timer_Insert(const char *d, double v)
18 | {
19 |     Timer::Insert(d,v);
20 | }
21 | 


--------------------------------------------------------------------------------
/config/conf-ion.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # note: "ion" is an Ubuntu 12.04 system with gcc 4.6.x
 3 | 
 4 | PATH="/usr/local/cuda42/cuda/bin:$PATH"
 5 | 
 6 | ./configure \
 7 | CPPFLAGS="-I/usr/local/cuda42/cuda/include/" \
 8 | CUDA_CPPFLAGS="-DUSE_CLOCK_GETTIME -gencode=arch=compute_11,code=sm_11 -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_30,code=sm_30"
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | if BUILD_CUDA
 3 |     MAYBE_CUDA = cuda
 4 | endif
 5 | 
 6 | if BUILD_OPENCL
 7 |     MAYBE_OPENCL = opencl
 8 | endif
 9 | 
10 | if BUILD_STABILITY
11 |     MAYBE_STABILITY = stability
12 | endif
13 | 
14 | if BUILD_MPI
15 |     MAYBE_MPI = mpi
16 | endif
17 | 
18 | SUBDIRS=common $(MAYBE_OPENCL) $(MAYBE_CUDA) $(MAYBE_MPI) $(MAYBE_STABILITY)
19 | 
20 | 


--------------------------------------------------------------------------------
/src/common/Makefile.am:
--------------------------------------------------------------------------------
 1 | include ${top_builddir}/config/common.mk
 2 | include ${top_builddir}/config/targets.mk
 3 | 
 4 | noinst_LIBRARIES	= libSHOCCommon.a
 5 | libSHOCCommon_a_SOURCES = CTimer.cpp \
 6 | 	ResultDatabase.cpp \
 7 | 	OptionParser.cpp \
 8 | 	Option.cpp \
 9 | 	Timer.cpp \
10 | 	ProgressBar.cpp \
11 | 	InvalidArgValue.cpp \
12 | 	Matrix2DStatics.cpp
13 | 
14 | 


--------------------------------------------------------------------------------
/doc/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | doc_DATA = shoc-manual.pdf
 3 | 
 4 | if BUILD_DOC
 5 | shoc-manual.pdf:  $(srcdir)/shoc-manual.tex
 6 | 	cp -f $(srcdir)/shoc-manual.bib ./shoc.bib
 7 | 	latexmk -pdf $(srcdir)/shoc-manual.tex
 8 | 
 9 | clean:
10 | 	${RM} *.aux *.bbl *.dvi *.ps *.log *.blg *.toc shoc-manual.out shoc-manual.fls shoc-manual.fdb_latexmk shoc-manual.pdf shoc.bib
11 | endif
12 | 
13 | 


--------------------------------------------------------------------------------
/src/mpi/common/Makefile.am:
--------------------------------------------------------------------------------
 1 | include ${top_builddir}/config/common.mk
 2 | include ${top_builddir}/config/targets.mk
 3 | 
 4 | # Which compiler to use to build and link?
 5 | CXX = ${MPICXX}
 6 | CXXLD = ${MPICXX}
 7 | 
 8 | noinst_LIBRARIES    = libSHOCCommonMPI.a
 9 | libSHOCCommonMPI_a_SOURCES = RandomPairs.cpp \
10 | 	MPI2DGridProgram.cpp \
11 | 	MPIHostStencil.cpp \
12 | 	MPIHostStencilFactory.cpp \
13 | 	MPIStencilUtil.cpp
14 | 
15 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/libdata.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LIBDATA_H_
 2 | #define _LIBDATA_H_
 3 | #include <math.h>
 4 | #include "support.h"
 5 | #include "qtclib.h"
 6 | 
 7 | float *generate_synthetic_data(float **rslt_mtrx, int **indr_mtrx, int *max_degree, float threshold, int N, int type);
 8 | int read_BLAST_data(float **rslt_mtrx, int **indr_mtrx, int *max_degree, float threshold, const char *fname, int maxN, int matrix_type_mask);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/src/common/Option.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "Option.h"
 3 | using namespace std;
 4 | 
 5 | void Option::print() {
 6 | 
 7 |    cout << "Long Name: " << longName << endl;
 8 |    cout << "Short Name: " << shortLetter << endl;
 9 |    cout << "Default Value: " << defaultValue << endl;
10 |    cout << "Actual Value: " << value << endl;
11 |    cout << "Type: " << type << endl;
12 |    cout << "helpText: " << helpText << endl;
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/tools/Makefile.am:
--------------------------------------------------------------------------------
 1 | toolsdir		= $(prefix)/tools
 2 | libexec_SCRIPTS	= driver.pl numatest.pl
 3 | bin_SCRIPTS		= shocdriver
 4 | doc_DATA		= compilerVersion.txt buildFlags.txt
 5 | 
 6 | shocdriver:
 7 | 	cat $(srcdir)/shocdriver.in | sed "s,SHOC_LIBEXEC_DIR,@libexecdir@," | sed "s,SHOC_BIN_DIR,@bindir@," > $@
 8 | 
 9 | compilerVersion.txt:
10 | 	${CXX} --version > $@
11 | 
12 | buildFlags.txt:
13 | 	cp ${top_builddir}/config/common.mk $@
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/src/common/InvalidArgValue.h:
--------------------------------------------------------------------------------
 1 | #ifndef INVALIDARGVALUE_H
 2 | #define INVALIDARGVALUE_H
 3 | 
 4 | #include <stdexcept>
 5 | #include <string>
 6 | 
 7 | // Exception for command line argument value errors
 8 | class InvalidArgValue : public std::runtime_error
 9 | {
10 | private:
11 |     static std::string GenerateErrorMessage( const std::string& _msg );
12 | 
13 | public:
14 |     InvalidArgValue( const std::string& _msg );
15 | };
16 | 
17 | #endif // INVALIDARGVALUE_H
18 | 


--------------------------------------------------------------------------------
/src/mpi/common/NodeInfo.cpp:
--------------------------------------------------------------------------------
 1 | #include "NodeInfo.h"
 2 | #include <string>
 3 | #include <iostream>
 4 | #include <sstream>
 5 | #include <fstream>
 6 | #include <string.h>
 7 | #include <stdlib.h>
 8 | 
 9 | using namespace std;
10 | 
11 | int main(int argc,char *argv[])
12 | {
13 |     int numtasks, rank, dest, source, rc, count, tag=1, noderank;
14 |     char inmsg, outmsg='x';
15 |     MPI_Init(&argc,&argv);
16 |     NodeInfo NI;
17 |     NI.print();
18 |     MPI_Finalize();
19 | }
20 | 


--------------------------------------------------------------------------------
/src/mpi/common/GetMPIType.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef GET_MPI_TYPE_H
 3 | #define GET_MPI_TYPE_H
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | inline MPI_Datatype GetMPIType(const float&)  { return MPI_FLOAT; }
 8 | inline MPI_Datatype GetMPIType(const double&) { return MPI_DOUBLE; }
 9 | inline MPI_Datatype GetMPIType(const int&)    { return MPI_INT; }
10 | inline MPI_Datatype GetMPIType(const long&)   { return MPI_LONG; }
11 | inline MPI_Datatype GetMPIType(const char&)   { return MPI_CHAR; }
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/config/conf-linux.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | 
 4 | # do the actual configuration
 5 | #
 6 | # the configure script looks for CUDA using the PATH, but since OpenCL
 7 | # is library based, you have to explicitly specify CPPFLAGS to find
 8 | # the OpenCL headers.  You may also need to specify LDFLAGS, depending on
 9 | # whether the OpenCL libraries are installed in a location searched by
10 | # the linker such as /usr/lib.
11 | #
12 | sh ./configure \
13 | CPPFLAGS="-I/usr/local/cuda/include"
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/config/dirtargets.mk.in:
--------------------------------------------------------------------------------
 1 | all opencl cuda:
 2 | 	@for dir in $(SUBDIRS); do ${MAKE} -C $$dir $@; done
 3 | 
 4 | clean:
 5 | 	@if test -n "$(SUBDIRS)"; then \
 6 | 	  rev=""; for dir in $(SUBDIRS); do rev="$$dir $$rev"; done; \
 7 | 	  for dir in $$rev; do ${MAKE} -C $$dir $@; done \
 8 | 	fi
 9 | 
10 | distclean:
11 | 	@if test -n "$(SUBDIRS)"; then \
12 | 	  rev=""; for dir in $(SUBDIRS); do rev="$$dir $$rev"; done; \
13 | 	  for dir in $$rev; do ${MAKE} -C $$dir $@; done \
14 | 	fi
15 | 	${RM} Makefile
16 | 
17 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/qtc_common.h:
--------------------------------------------------------------------------------
 1 | #ifndef _QTC_COMMON_H_
 2 | #define _QTC_COMMON_H_
 3 | 
 4 | #define GLOBAL_MEMORY 0x0
 5 | #define TEXTUR_MEMORY 0x1
 6 | #define COMPACT_STORAGE_MATRIX 0x00
 7 | #define FULL_STORAGE_MATRIX    0x10
 8 | 
 9 | #ifdef MIN
10 | # undef MIN
11 | #endif
12 | #define MIN(_X, _Y) ( ((_X) < (_Y)) ? (_X) : (_Y) )
13 | 
14 | #ifdef MAX
15 | # undef MAX
16 | #endif
17 | #define MAX(_X, _Y) ( ((_X) > (_Y)) ? (_X) : (_Y) )
18 | 
19 | #define INVALID_POINT_MARKER -42
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/config/conf-valhalla.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # note: "valhalla" is an Ubuntu 12.04 system with gcc 4.6.x
 3 | 
 4 | which nvcc
 5 | if (test $? -ne 0); then
 6 |    echo "Error: no nvcc found.  Please set your path:"
 7 |    echo "export PATH=\"/usr/local/cuda-6.0/bin:\$PATH\""
 8 |    echo "export LD_LIBRARY_PATH=\"/usr/local/cuda-6.0/lib64:\$PATH\""
 9 |    exit 1
10 | fi
11 | 
12 | ./configure \
13 | CPPFLAGS="-I/usr/local/cuda-6.0/include/" \
14 | CUDA_CPPFLAGS="-DUSE_CLOCK_GETTIME -gencode=arch=compute_50,code=sm_50"
15 | 
16 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/qtclib.h:
--------------------------------------------------------------------------------
 1 | #ifndef QTLIB_H
 2 | #define QTLIB_H
 3 | 
 4 | #include "OptionParser.h"
 5 | 
 6 | extern int qtcDevice;
 7 | 
 8 | void init(OptionParser& op);
 9 | void reduce_card(void *card, int pointCount);
10 | void allocDeviceBuffer(void** bufferp, unsigned long bytes);
11 | void freeDeviceBuffer(void* buffer);
12 | void copyToDevice(void* to_device, void* from_host, unsigned long bytes);
13 | void copyFromDevice(void* to_host, void* from_device, unsigned long bytes);
14 | 
15 | #endif // QTLIB_H
16 | 


--------------------------------------------------------------------------------
/src/opencl/level1/sort/Sort.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SORT_H
 2 | #define _SORT_H
 3 | 
 4 | static const int SORT_BLOCK_SIZE = 128;
 5 | static const int SORT_BITS = 32;
 6 | 
 7 | void radixSortStep(uint nbits, uint startbit, cl_mem counters,
 8 |         cl_mem countersSum, cl_mem blockOffsets, cl_mem* scanBlockSums,
 9 |         uint numElements, cl_kernel sortBlocks, cl_kernel findOffsets,
10 |         cl_kernel reorder, cl_kernel scan, cl_kernel uniformAdd,
11 |         cl_command_queue queue, cl_device_id dev);
12 | 
13 | #endif // _SORT_H
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.a
 3 | *.d
 4 | *_cl.cpp
 5 | *.buildflags
 6 | *Makefile
 7 | config.log
 8 | config.status
 9 | config/common.mk
10 | config/config.h
11 | config/targets.mk
12 | tools/Logs/*
13 | tools/results.csv
14 | autom4te.cache
15 | *.suo
16 | *.sdf
17 | *.opensdf
18 | ipch
19 | bin
20 | libexec
21 | share
22 | 
23 | # Ignore some generated files for in-place build
24 | .deps
25 | config/config.mk
26 | config/dirtargets.mk
27 | config/stamp-h1
28 | tools/buildFlags.txt
29 | tools/compilerVersion.txt
30 | tools/shocdriver
31 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/CUDAStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUDASTENCILFACTORY_H
 2 | #define CUDASTENCILFACTORY_H
 3 | 
 4 | #include "CommonCUDAStencilFactory.h"
 5 | 
 6 | template<class T>
 7 | class CUDAStencilFactory : public CommonCUDAStencilFactory<T>
 8 | {
 9 | public:
10 |     CUDAStencilFactory( void )
11 |       : CommonCUDAStencilFactory<T>( "CUDAStencil" )
12 |     {
13 |         // nothing else to do
14 |     }
15 | 
16 |     virtual Stencil<T>* BuildStencil( const OptionParser& opts );
17 | };
18 | 
19 | #endif // CUDASTENCILFACTORY_H
20 | 
21 | 


--------------------------------------------------------------------------------
/src/mpi/common/RandomPairs.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANDOM_PAIRS_H
 2 | #define RANDOM_PAIRS_H
 3 | 
 4 | // ****************************************************************************
 5 | // File: RandomPairs.h
 6 | //
 7 | // Purpose:
 8 | //   Collective method that pics a random, unique, pair each time called
 9 | //
10 | // Programmer:  Vinod Tipparaju
11 | // Creation:    August 12, 2009
12 | //
13 | // ****************************************************************************
14 | 
15 | int RandomPairs(int myrank, int numranks, MPI_Comm new_comm);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/comm.h:
--------------------------------------------------------------------------------
 1 | #ifndef _COMM_H_
 2 | #define _COMM_H_
 3 | 
 4 | #if defined(PARALLEL)
 5 | #  include "mpi.h"
 6 | #endif
 7 | 
 8 | #define COMM_TYPE_INT   0
 9 | #define COMM_TYPE_FLOAT 1
10 | 
11 | void comm_update_communicator(int cwrank, int active_node_count);
12 | void comm_find_winner(int *max_card, int *winner_node, int *winner_index, int cwrank, int max_index);
13 | void comm_broadcast( void *ptr, int cnt, int type, int source);
14 | void comm_barrier(void);
15 | int comm_get_size(void);
16 | int comm_get_rank(void);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/cuda/level1/md/MD.h:
--------------------------------------------------------------------------------
 1 | #ifndef MD_H__
 2 | #define MD_H__
 3 | 
 4 | #include <cuda.h>
 5 | #include <cuda_runtime_api.h>
 6 | 
 7 | // Problem Constants
 8 | static const float  cutsq        = 16.0f; // Square of cutoff distance
 9 | static const int    maxNeighbors = 128;  // Max number of nearest neighbors
10 | static const double domainEdge   = 20.0; // Edge length of the cubic domain
11 | static const float  lj1          = 1.5;  // LJ constants
12 | static const float  lj2          = 2.0;
13 | static const float  EPSILON      = 0.1f; // Relative Error between CPU/GPU
14 | 
15 | #endif // MD_H__
16 | 


--------------------------------------------------------------------------------
/data/platforms.csv:
--------------------------------------------------------------------------------
 1 | platform,name
 2 | cuda32,CUDA 3.2
 3 | cuda40,"CUDA 4.0	"
 4 | cuda41,CUDA 4.1
 5 | cuda50,CUDA 5.0
 6 | cuda42,CUDA 4.2
 7 | cuda60,CUDA 6.0
 8 | cuda65,CUDA 6.5
 9 | nvocl32,NV OpenCL 3.2
10 | nvocl40,NV OpenCL 4.0
11 | nvocl41,NV OpenCL 4.1
12 | nvocl42,NV OpenCL 4.2
13 | nvocl50,NV OpenCL 5.0
14 | nvocl60,NV OpenCL 6.0
15 | nvocl65,NV OpenCL 6.5
16 | amdocl24,AMD OpenCL 2.4
17 | amdocl26,AMD OpenCL 2.6
18 | amdocl29,AMD OpenCL 2.9
19 | intel15,Intel OpenCL SDK 1.5
20 | intel44,Intel OpenCL SDK 4.4
21 | intelbgnt1,Intel Beignet 1.0.0
22 | best,Best Available Result
23 | 


--------------------------------------------------------------------------------
/src/common/PMSMemMgr.h:
--------------------------------------------------------------------------------
 1 | #ifndef PMSMEMMGR_H
 2 | #define PMSMEMMGR_H
 3 | 
 4 | template<typename T>
 5 | class PMSMemMgr
 6 | {
 7 | public:
 8 |     virtual T* AllocHostBuffer( size_t nItems ) = 0;
 9 |     virtual void ReleaseHostBuffer( T* buf ) = 0;
10 | };
11 | 
12 | 
13 | template<typename T>
14 | class DefaultPMSMemMgr : public PMSMemMgr<T>
15 | {
16 | public:
17 |     virtual T* AllocHostBuffer( size_t nItems )
18 |     {
19 |         return new T[nItems];
20 |     }
21 | 
22 |     virtual void ReleaseHostBuffer( T* buf )
23 |     {
24 |         delete[] buf;
25 |     }
26 | };
27 | 
28 | #endif // PMSMEMMGR_H
29 | 


--------------------------------------------------------------------------------
/src/stability/Stability.h:
--------------------------------------------------------------------------------
 1 | #ifndef STABILITY_H
 2 | 
 3 | void init(OptionParser& op);
 4 | void forward(void* work, const int n_tasks);
 5 | void inverse(void* work, const int n_tasks);
 6 | int check(void* work, void* check, const int half_n_tasks, const int half_n_elts);
 7 | unsigned long findAvailBytes(void);
 8 | void allocDeviceBuffer(void** bufferp, const unsigned long bytes);
 9 | void freeDeviceBuffer(void* buffer);
10 | void copyToDevice(void* to_device, const void* from_host, const unsigned long bytes);
11 | void copyFromDevice(void* to_host, const void* from_device, const unsigned long bytes);
12 | 
13 | #endif
14 | 
15 | 


--------------------------------------------------------------------------------
/src/common/InvalidArgValue.cpp:
--------------------------------------------------------------------------------
 1 | #include <sstream>
 2 | #include "InvalidArgValue.h"
 3 | 
 4 | std::string
 5 | InvalidArgValue::GenerateErrorMessage( const std::string& _msg )
 6 | {
 7 |     std::ostringstream msgstr;
 8 |     msgstr << "invalid argument value: ";
 9 |     if( _msg.length() > 0 )
10 |     {
11 |         msgstr << _msg;
12 |     }
13 |     else
14 |     {
15 |         msgstr << "no further details available";
16 |     }
17 |     return msgstr.str();
18 | }
19 | 
20 | 
21 | InvalidArgValue::InvalidArgValue( const std::string& _msg )
22 |   : std::runtime_error( GenerateErrorMessage(_msg) )
23 | {
24 |     // nothing else to do
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/src/common/CTimer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CTIMER_H
 2 | #define CTIMER_H
 3 | 
 4 | // ****************************************************************************
 5 | //  File:  CTimer.h
 6 | //
 7 | //  Purpose:
 8 | //    C versions to call the critical routines of the Timer class.
 9 | //
10 | //  Programmer:  Jeremy Meredith
11 | //  Creation:    October 22, 2007
12 | //
13 | // ****************************************************************************
14 | #ifdef __cplusplus
15 | extern "C" {
16 | #endif
17 | int    Timer_Start();
18 | double Timer_Stop(int, const char *);
19 | void   Timer_Insert(const char *, double);
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/config/conf-keeneland.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # A "good" set of optimization flags is compiler dependent.
 4 | # These might be reasonable flags to start from.
 5 | #
 6 | # GNU
 7 | OPTFLAGS="-g -O2"
 8 | 
 9 | # Intel
10 | #OPTFLAGS="-g -xHOST -O3 -ip -no-prec-div"
11 | #export CXX=icpc
12 | #export CC=icc
13 | 
14 | # PGI
15 | #OPTFLAGS="-g -fastsse"
16 | #export CXX=pgcpp
17 | #export CC=pgcc
18 | 
19 | 
20 | sh ./configure \
21 |     CPPFLAGS="-I/sw/kfs/cuda/4.2/linux_binary/include" \
22 |     CUDA_CPPFLAGS="-gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_20,code=compute_20" \
23 |     CXXFLAGS="$OPTFLAGS" \
24 |     CFLAGS="$OPTFLAGS" \
25 |     LDFLAGS="$OPTFLAGS"
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/common/PMSMemMgmt.h:
--------------------------------------------------------------------------------
 1 | #ifndef PMSMEMMGMT_H
 2 | #define PMSMEMMGMT_H
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | // Programming Model-Specific Memory Management
 7 | // Some programming models for heterogeneous systems provide
 8 | // memory management functions for allocating memory on the host
 9 | // and on the device.  These functions provide an abstract interface
10 | // to that programming-model-specific interface.
11 | 
12 | template<class T>
13 | T*
14 | pmsAllocHostBuffer( size_t nItems )
15 | {
16 |     return new T[nItems];
17 | }
18 | 
19 | 
20 | template<class T>
21 | void
22 | pmsFreeHostBuffer( T* buf )
23 | {
24 |     delete[] buf;
25 | }
26 | 
27 | #endif // PMSMEMMGMT_H
28 | 


--------------------------------------------------------------------------------
/config/conf-newark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | 
 4 | # A "good" set of optimization flags is compiler dependent.
 5 | # These might be reasonable flags to start from.
 6 | #
 7 | # GNU
 8 | PATH="/opt/cuda/6.0/cuda/bin:$PATH"
 9 | OPTFLAGS="-g -O2"
10 | 
11 | # Intel
12 | #OPTFLAGS="-g -xHOST -O3 -ip -no-prec-div"
13 | #export CXX=icpc
14 | #export CC=icc
15 | 
16 | # PGI
17 | #OPTFLAGS="-g -fastsse"
18 | #export CXX=pgcpp
19 | #export CC=pgcc
20 | 
21 | CPPFLAGS="-I/opt/cuda/6.0/cuda/include/" \
22 | CUDA_CPPFLAGS="-gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_30,code=sm_30" \
23 | sh ./configure \
24 |     CXXFLAGS="$OPTFLAGS" \
25 |     CFLAGS="$OPTFLAGS" \
26 |     LDFLAGS="$OPTFLAGS"
27 | 
28 | 


--------------------------------------------------------------------------------
/config/conf-atlanta.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | OCL_ROOT=/opt/AMDAPP
 4 | 
 5 | # A "good" set of optimization flags is compiler dependent.
 6 | # These might be reasonable flags to start from.
 7 | #
 8 | # GNU
 9 | OPTFLAGS="-g -O2"
10 | 
11 | # Intel
12 | #OPTFLAGS="-g -xHOST -O3 -ip -no-prec-div"
13 | #export CXX=icpc
14 | #export CC=icc
15 | 
16 | # PGI
17 | #OPTFLAGS="-g -fastsse"
18 | #export CXX=pgcpp
19 | #export CC=pgcc
20 | 
21 | 
22 | # do the actual configuration
23 | sh ./configure \
24 |     CPPFLAGS="-I$OCL_ROOT/include" \
25 |     CXXFLAGS="$OPTFLAGS" \
26 |     CFLAGS="$OPTFLAGS" \
27 |     LDFLAGS="$OPTFLAGS -L$OCL_ROOT/lib/x86_64" \
28 |     --without-cuda \
29 |     --disable-stability
30 | 
31 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/OpenCLStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPENCLSTENCILFACTORY_H
 2 | #define OPENCLSTENCILFACTORY_H
 3 | 
 4 | #include "CommonOpenCLStencilFactory.h"
 5 | 
 6 | template<class T>
 7 | class OpenCLStencilFactory : public CommonOpenCLStencilFactory<T>
 8 | {
 9 | public:
10 |     OpenCLStencilFactory( cl_device_id _dev,
11 |                             cl_context _ctx,
12 |                             cl_command_queue _queue )
13 |       : CommonOpenCLStencilFactory<T>( "OpenCLStencil", _dev, _ctx, _queue )
14 |     {
15 |         // nothing else to do
16 |     }
17 | 
18 |     virtual Stencil<T>* BuildStencil( const OptionParser& options );
19 | };
20 | 
21 | #endif // OPENCLSTENCILFACTORY_H
22 | 


--------------------------------------------------------------------------------
/src/cuda/level1/md/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = MD
20 | 
21 | # How to build those programs?
22 | MD_SOURCES = main.cpp
23 | MD_LDADD = MD.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level2/s3d/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = S3D
20 | 
21 | # How to build those programs?
22 | S3D_SOURCES = main.cpp
23 | S3D_LDADD = S3D.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/spmv/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Spmv
20 | 
21 | # How to build those programs?
22 | Spmv_SOURCES = main.cpp
23 | Spmv_LDADD = Spmv.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/scan/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Scan
20 | 
21 | # How to build those programs?
22 | Scan_SOURCES = main.cpp
23 | Scan_LDADD = Scan.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/triad/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Triad
20 | 
21 | # How to build those programs?
22 | Triad_SOURCES = main.cpp
23 | Triad_LDADD = Triad.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/common/CUDAPMSMemMgr.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUDAPMSMEMMGR_H
 2 | #define CUDAPMSMEMMGR_H
 3 | 
 4 | #include <stdlib.h>
 5 | #include "cudacommon.h"
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | #include "PMSMemMgr.h"
 9 | 
10 | template<typename T>
11 | class CUDAPMSMemMgr : public PMSMemMgr<T>
12 | {
13 | public:
14 |     virtual T* AllocHostBuffer( size_t nItems )
15 |     {
16 |         T* ret = NULL;
17 |         size_t nBytes = nItems * sizeof(T);
18 |         CUDA_SAFE_CALL(cudaMallocHost((void**)&ret, nBytes));
19 |         return ret;
20 |     }
21 | 
22 |     virtual void ReleaseHostBuffer( T* buf )
23 |     {
24 |         CUDA_SAFE_CALL(cudaFreeHost(buf));
25 |     }
26 | };
27 | 
28 | #endif // CUDAPMSMEMMGR_H
29 | 


--------------------------------------------------------------------------------
/src/cuda/level1/md5hash/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = MD5Hash
20 | 
21 | # How to build those programs?
22 | MD5Hash_SOURCES = main.cpp
23 | MD5Hash_LDADD = MD5Hash.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/sort/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Sort
20 | 
21 | # How to build those programs?
22 | Sort_SOURCES = main.cpp
23 | Sort_LDADD = Sort.o sort_kernel.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/mpi/contention/README:
--------------------------------------------------------------------------------
 1 |        The non-multi-threaded version of the contention benchmark
 2 | 
 3 | This splits the process space into processes that do MPI communication and
 4 | processes that do GPU communication. The processes that do GPU communication
 5 | are equal to then number of devices on the node.
 6 | 
 7 | Node Random Pairs: Forms pairs of nodes first, then pairs processes within the
 8 | nodes. For example, if node 0 and 1 have 4 processes each, and 0 and 1 are
 9 | paired, then process 0 on node 0 is paired with process 0 on node 1 which is
10 | MPI rank 4 (when nodes are not allocated in a round robin or any other weird
11 | way). Similarly, process 1 on node 0 is paired with process 1 on node 1 which
12 | is MPI rank 5. 
13 | 


--------------------------------------------------------------------------------
/src/opencl/level1/md/MD.h:
--------------------------------------------------------------------------------
 1 | #ifndef MD_H__
 2 | #define MD_H__
 3 | 
 4 | struct float4 {
 5 |     float x;
 6 |     float y;
 7 |     float z;
 8 |     float w;
 9 | };
10 | 
11 | struct double4 {
12 |     double x;
13 |     double y;
14 |     double z;
15 |     double w;
16 | };
17 | 
18 | // Problem Constants
19 | static const float  cutsq        = 16.0f; // Square of cutoff distance
20 | static const int    maxNeighbors = 128;  // Max number of nearest neighbors
21 | static const double domainEdge   = 20.0; // Edge length of the cubic domain
22 | static const float  lj1          = 1.5;  // LJ constants
23 | static const float  lj2          = 2.0;
24 | static const float  EPSILON      = 0.1f; // Relative Error between CPU/GPU
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/common/NodeIDList.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _NODE_ID_LIST_H
 3 | #define _NODE_ID_LIST_H
 4 | 
 5 | #include <list>
 6 | #include <string>
 7 | 
 8 | // ****************************************************************************
 9 | // Type: NodeIDList
10 | //
11 | // Purpose:
12 | //   Defines a list of strings for holding host name information
13 | //   associated with a particular system configuration.
14 | //
15 | // Programmer: Gabriel Marin
16 | // Creation: August 25, 2009
17 | //
18 | // Modifications:
19 | //
20 | // ****************************************************************************
21 | 
22 | namespace SHOC {
23 |     const int MAGIC_KEY_NODE_ID_LIST = 0x1071badc;
24 |     typedef std::list <std::string> NodeIDList;
25 | };
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/src/cuda/level1/bfs/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = BFS
20 | 
21 | # How to build those programs?
22 | BFS_SOURCES = Graph.cpp \
23 | 				main.cpp
24 | BFS_LDADD = BFS.o bfs_kernel.o $(CUDA_LIBS) $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level1/fft/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = FFT
20 | 
21 | # How to build those programs?
22 | FFT_SOURCES = FFT.cpp \
23 |                 main.cpp
24 | FFT_LDADD = fftlib.o $(CUDA_LIBS) $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level1/reduction/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Reduction
20 | 
21 | # How to build those programs?
22 | Reduction_SOURCES = main.cpp
23 | Reduction_LDADD = Reduction.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/gemm/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = GEMM
20 | 
21 | # How to build those programs?
22 | GEMM_SOURCES = GEMM.cpp \
23 |                 main.cpp
24 | GEMM_LDADD = $(CUDA_LIBS) -lcublas $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level1/md/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = MD
19 | 
20 | # How to build those programs?
21 | MD_SOURCES = main.cpp
22 | MD_LDADD = MD.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/cuda/level2/s3d/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = S3D
19 | 
20 | # How to build those programs?
21 | S3D_SOURCES = main.cpp
22 | S3D_LDADD = S3D.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/cuda/level1/scan/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = Scan
19 | 
20 | # How to build those programs?
21 | Scan_SOURCES = main.cpp
22 | Scan_LDADD = Scan.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/cuda/level1/spmv/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = Spmv
19 | 
20 | # How to build those programs?
21 | Spmv_SOURCES = main.cpp
22 | Spmv_LDADD = Spmv.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/cuda/level1/triad/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = Triad
19 | 
20 | # How to build those programs?
21 | Triad_SOURCES = main.cpp
22 | Triad_LDADD = Triad.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = QTC
20 | 
21 | # How to build those programs?
22 | QTC_SOURCES = comm.cpp \
23 | 				libdata.cpp \
24 |                 main.cpp
25 | QTC_LDADD = QTC.o $(CUDA_LIBS) $(LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/md5hash/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi 
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = MD5Hash
21 | 
22 | # how to build those programs
23 | MD5Hash_SOURCES = MD5Hash.cpp md5_cl.cpp main.cpp
24 | MD5Hash_LDADD   = $(L1LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/opencl/level1/triad/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = Triad
21 | 
22 | # how to build those programs
23 | Triad_SOURCES = Triad.cpp \
24 |                 main.cpp
25 | Triad_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/md/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = MD
21 | 
22 | # how to build those programs
23 | MD_SOURCES = MD.cpp \
24 | 				md_cl.cpp \
25 |                 main.cpp
26 | MD_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/cuda/level1/md5hash/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = MD5Hash
19 | 
20 | # How to build those programs?
21 | MD5Hash_SOURCES = main.cpp
22 | MD5Hash_LDADD = MD5Hash.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/cuda/level1/sort/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = Sort
19 | 
20 | # How to build those programs?
21 | Sort_SOURCES = main.cpp
22 | Sort_LDADD = Sort.o sort_kernel.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/stability/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(top_srcdir)/src/cuda/common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Stability
20 | 
21 | # How to build those programs?
22 | Stability_SOURCES = Stability.cpp \
23 |                 main.cpp
24 | Stability_LDADD = $(top_builddir)/src/cuda/level1/fft/fftlib.o $(CUDA_LIBS) -lcufft $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level1/reduction/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = Reduction
19 | 
20 | # How to build those programs?
21 | Reduction_SOURCES = main.cpp
22 | Reduction_LDADD = Reduction.o $(CUDA_LIBS) $(LIBS)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/opencl/level1/gemm/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = GEMM
21 | 
22 | # how to build those programs
23 | GEMM_SOURCES = GEMM.cpp \
24 | 				gemmN_cl.cpp \
25 |                 main.cpp
26 | GEMM_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/opencl/level1/sort/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = Sort
21 | 
22 | # how to build those programs
23 | Sort_SOURCES = Sort.cpp \
24 | 				sort_cl.cpp \
25 |                 main.cpp
26 | Sort_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/common/HostStencilFactory.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "HostStencilFactory.h"
 3 | #include "HostStencil.h"
 4 | 
 5 | template<class T>
 6 | Stencil<T>*
 7 | HostStencilFactory<T>::BuildStencil( const OptionParser& options )
 8 | {
 9 |     // get options for base class
10 |     T wCenter;
11 |     T wCardinal;
12 |     T wDiagonal;
13 |     StencilFactory<T>::ExtractOptions( options, wCenter, wCardinal, wDiagonal );
14 | 
15 |     return new HostStencil<T>( wCenter, wCardinal, wDiagonal );
16 | }
17 | 
18 | 
19 | template<class T>
20 | void
21 | HostStencilFactory<T>::CheckOptions( const OptionParser& options ) const
22 | {
23 |     // let base class check its options
24 |     StencilFactory<T>::CheckOptions( options );
25 | 
26 |     // nothing else to do - we add no options
27 | }
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/src/cuda/level1/bfs/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = BFS
19 | 
20 | # How to build those programs?
21 | BFS_SOURCES = Graph.cpp \
22 | 				main.cpp
23 | BFS_LDADD = BFS.o bfs_kernel.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/fft/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = FFT
19 | 
20 | # How to build those programs?
21 | FFT_SOURCES = FFT.cpp \
22 |                 main.cpp
23 | FFT_LDADD = fftlib.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/opencl/level1/scan/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = Scan
21 | 
22 | # how to build those programs
23 | Scan_SOURCES = Scan.cpp \
24 | 				scan_cl.cpp \
25 |                 main.cpp
26 | Scan_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/cuda/level1/gemm/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = GEMM
19 | 
20 | # How to build those programs?
21 | GEMM_SOURCES = GEMM.cpp \
22 |                 main.cpp
23 | GEMM_LDADD = $(CUDA_LIBS) -lcublas $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/neuralnet/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = NeuralNet
19 | 
20 | # How to build those programs?
21 | NeuralNet_SOURCES = main.cpp
22 | NeuralNet_LDADD = NeuralNet.o $(CUDA_LIBS) $(LIBS) -lcublas
23 | 
24 | 


--------------------------------------------------------------------------------
/src/opencl/level1/triad/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = Triad
20 | 
21 | # how to build those programs
22 | Triad_SOURCES = Triad.cpp \
23 | 				main.cpp
24 | Triad_LDADD   = $(L1LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level1/scan/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | tpcudadir = $(bindir)/TP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | tpcuda_PROGRAMS = Scan
19 | 
20 | # How to build those programs?
21 | Scan_SOURCES = main.cpp \
22 | 					tpScan.cpp
23 | Scan_LDADD = tpScanLaunchKernel.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/opencl/level1/fft/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = FFT
21 | 
22 | # how to build those programs
23 | FFT_SOURCES = FFT.cpp \
24 | 				fftlib.cpp \
25 | 				fft_cl.cpp \
26 |                 main.cpp
27 | FFT_LDADD   = $(L1LIBS)
28 | 
29 | 


--------------------------------------------------------------------------------
/src/opencl/level1/md/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = MD
20 | 
21 | # how to build those programs
22 | MD_SOURCES = MD.cpp \
23 | 				md_cl.cpp \
24 | 				main.cpp
25 | MD_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/gemm/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = GEMM
20 | 
21 | # how to build those programs
22 | GEMM_SOURCES = GEMM.cpp \
23 | 				gemmN_cl.cpp \
24 | 				main.cpp
25 | GEMM_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/scan/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = Scan
20 | 
21 | # how to build those programs
22 | Scan_SOURCES = Scan.cpp \
23 | 				scan_cl.cpp \
24 | 				main.cpp
25 | Scan_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/sort/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = Sort
20 | 
21 | # how to build those programs
22 | Sort_SOURCES = Sort.cpp \
23 | 				sort_cl.cpp \
24 | 				main.cpp
25 | Sort_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/spmv/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS) -I$(srcdir)/../../common
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = Spmv
21 | 
22 | # how to build those programs
23 | Spmv_SOURCES = Spmv.cpp \
24 | 				spmv_cl.cpp \
25 |                 main.cpp
26 | Spmv_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/opencl/level1/spmv/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = Spmv
20 | 
21 | # how to build those programs
22 | Spmv_SOURCES = Spmv.cpp \
23 | 				spmv_cl.cpp \
24 | 				main.cpp
25 | Spmv_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/scan/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | tpopencldir = $(bindir)/TP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | tpopencl_PROGRAMS = Scan
20 | 
21 | # how to build those programs
22 | Scan_SOURCES = scan_cl.cpp \
23 | 				tpScan.cpp \
24 | 				main.cpp
25 | Scan_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/cuda/level1/reduction/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | tpcudadir = $(bindir)/TP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | tpcuda_PROGRAMS = Reduction
19 | 
20 | # How to build those programs?
21 | Reduction_SOURCES = main.cpp \
22 | 					tpReduction.cpp
23 | Reduction_LDADD = tpRedLaunchKernel.o $(CUDA_LIBS) $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | tpcudadir = $(bindir)/TP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | tpcuda_PROGRAMS = QTC
19 | 
20 | # How to build those programs?
21 | QTC_SOURCES = comm.cpp \
22 | 				libdata.cpp \
23 |                 main.cpp
24 | QTC_LDADD = QTC.o $(CUDA_LIBS) $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/opencl/level1/reduction/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = Reduction
21 | 
22 | # how to build those programs
23 | Reduction_SOURCES = Reduction.cpp \
24 | 				reduction_cl.cpp \
25 |                 main.cpp
26 | Reduction_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/cuda/level1/sort/Sort.h:
--------------------------------------------------------------------------------
 1 | #ifndef SORT_H_
 2 | #define SORT_H_
 3 | 
 4 | typedef unsigned int uint;
 5 | 
 6 | static const int SORT_BLOCK_SIZE = 128;
 7 | static const int SCAN_BLOCK_SIZE = 256;
 8 | static const int SORT_BITS = 32;
 9 | 
10 | void
11 | radixSortStep(uint nbits, uint startbit, uint4* keys, uint4* values,
12 |         uint4* tempKeys, uint4* tempValues, uint* counters,
13 |         uint* countersSum, uint* blockOffsets, uint** scanBlockSums,
14 |         uint numElements);
15 | 
16 | void
17 | scanArrayRecursive(uint* outArray, uint* inArray, int numElements, int level,
18 |         uint** blockSums);
19 | 
20 | bool
21 | verifySort(uint *keys, uint* vals, const size_t size);
22 | 
23 | #ifdef __DEVICE_EMULATION__
24 | #define __SYNC __syncthreads();
25 | #else
26 | #define __SYNC ;
27 | #endif
28 | 
29 | #endif // SORT_H_
30 | 


--------------------------------------------------------------------------------
/src/opencl/level1/bfs/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = BFS
21 | 
22 | # how to build those programs
23 | BFS_SOURCES	= BFS.cpp \
24 | 				bfs_uiuc_spill_cl.cpp \
25 | 				bfs_iiit_cl.cpp \
26 | 				Graph.cpp \
27 | 				main.cpp
28 | BFS_LDADD	= $(L1LIBS)
29 | 
30 | 


--------------------------------------------------------------------------------
/src/opencl/level1/fft/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = FFT
20 | 
21 | # how to build those programs
22 | FFT_SOURCES = FFT.cpp \
23 | 				fftlib.cpp \
24 | 				fft_cl.cpp \
25 | 				main.cpp
26 | FFT_LDADD   = $(L1LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/opencl/level1/md5hash/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = MD5Hash
20 | 
21 | # how to build those programs
22 | MD5Hash_SOURCES = MD5Hash.cpp \
23 | 				md5_cl.cpp \
24 | 				main.cpp
25 | MD5Hash_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/common/BadCommandLine.h:
--------------------------------------------------------------------------------
 1 | #ifndef BAD_COMMAND_LINE_H
 2 | #define BAD_COMMAND_LINE_H
 3 | 
 4 | #include <stdexcept>
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  BadCommandLine
 8 | //
 9 | // Purpose:
10 | //   Exception for command line parse errors
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    October 28, 2009
14 | //
15 | // ****************************************************************************
16 | struct BadCommandLine : public std::exception
17 | {
18 |     // NOTE: current OptionParser implementation prints problems rather
19 |     // than leaving it for us to determine how to print, so we have nothing
20 |     // else to do.
21 |     virtual char const* what( void ) const throw()  { return "invalid command line"; }
22 | };
23 | 
24 | #endif // BAD_COMMAND_LINE_H
25 | 


--------------------------------------------------------------------------------
/src/cuda/level1/fft/fftlib.h:
--------------------------------------------------------------------------------
 1 | #ifndef FFTLIB_H
 2 | #define FFTLIB_H
 3 | 
 4 | #include "OptionParser.h"
 5 | 
 6 | extern int fftDevice;
 7 | 
 8 | void init(OptionParser& op, const bool do_dp, const int n_ffts);
 9 | void forward(void* work, const int n_ffts);
10 | void inverse(void* work, const int n_ffts);
11 | int check(void* work, void* check, const int half_n_ffts, 
12 |     const int half_n_cmplx);
13 | void allocHostBuffer(void** bufferp, const unsigned long bytes);
14 | void allocDeviceBuffer(void** bufferp, const unsigned long bytes);
15 | void freeHostBuffer(void* buffer);
16 | void freeDeviceBuffer(void* buffer);
17 | void copyToDevice(void* to_device, const void* from_host, 
18 |     const unsigned long bytes);
19 | void copyFromDevice(void* to_host, const void* from_device, 
20 |     const unsigned long bytes);
21 | 
22 | #endif // FFTLIB_H
23 | 


--------------------------------------------------------------------------------
/src/opencl/level1/reduction/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = Reduction
20 | 
21 | # how to build those programs
22 | Reduction_SOURCES = Reduction.cpp \
23 | 				reduction_cl.cpp \
24 | 				main.cpp
25 | Reduction_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/stability/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(top_srcdir)/src/cuda/common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = Stability_mpi
19 | 
20 | # How to build those programs?
21 | Stability_mpi_SOURCES = Stability.cpp \
22 |                 main.cpp
23 | Stability_mpi_LDADD = $(top_builddir)/src/cuda/level1/fft/fftlib.o $(CUDA_LIBS) -lcufft $(LIBS)
24 | 
25 | 


--------------------------------------------------------------------------------
/src/opencl/level1/reduction/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir):$(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | tpopencldir = $(bindir)/TP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | tpopencl_PROGRAMS = Reduction
20 | 
21 | # how to build those programs
22 | Reduction_SOURCES = reduction_cl.cpp \
23 | 					tpReduction.cpp \
24 | 					main.cpp
25 | Reduction_LDADD   = $(L1LIBS)
26 | 
27 | 


--------------------------------------------------------------------------------
/src/opencl/level1/bfs/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = BFS
20 | 
21 | # how to build those programs
22 | BFS_SOURCES	= BFS.cpp \
23 | 				bfs_uiuc_spill_cl.cpp \
24 | 				bfs_iiit_cl.cpp \
25 | 				Graph.cpp \
26 | 				main.cpp
27 | BFS_LDADD	= $(L1LIBS)
28 | 
29 | 


--------------------------------------------------------------------------------
/src/common/Option.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTION_H
 2 | #define OPTION_H
 3 | 
 4 | #include <string>
 5 | 
 6 | using namespace std;
 7 | 
 8 | enum OptionType {OPT_FLOAT, OPT_INT, OPT_STRING, OPT_BOOL,
 9 |                  OPT_VECFLOAT, OPT_VECINT, OPT_VECSTRING};
10 | 
11 | // ****************************************************************************
12 | // Class:  Option
13 | //
14 | // Purpose:
15 | //   Encapsulation of a single option, to be used by an option parser.
16 | //
17 | // Programmer:  Kyle Spafford
18 | // Creation:    August 4, 2009
19 | //
20 | // ****************************************************************************
21 | class Option {
22 | 
23 |   public:
24 | 
25 |    string longName;
26 |    char   shortLetter;
27 |    string defaultValue;
28 |    string value;
29 |    OptionType type;
30 |    string helpText;
31 | 
32 |    void print();
33 | };
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = Stencil2D
20 | 
21 | # How to build those programs?
22 | Stencil2D_SOURCES = CUDAStencil.cpp \
23 | 				CommonCUDAStencilFactory.cpp \
24 | 				Stencil2Dmain.cpp \
25 | 				CUDAStencilFactory.cpp \
26 |                 main.cpp
27 | Stencil2D_LDADD = CUDAStencilKernel.o $(CUDA_LIBS) $(LIBS)
28 | 
29 | 


--------------------------------------------------------------------------------
/src/mpi/contention/cuda/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/cuda/level0
 6 | 
 7 | # Which compiler to use?
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS) -L$(top_builddir)/src/mpi/common
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = BusCont
19 | 
20 | # How to build those programs?
21 | BusCont_SOURCES = CUDADriver.cpp \
22 | 					BusCont.cpp \
23 | 					bcmain.cpp
24 | BusCont_LDADD = $(top_builddir)/src/cuda/level0/BusSpeedDownload.o -lSHOCCommonMPI $(CUDA_LIBS) $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/CUDAStencil.cpp:
--------------------------------------------------------------------------------
 1 | #include "CUDAStencil.h"
 2 | 
 3 | template<class T>
 4 | CUDAStencil<T>::CUDAStencil( T _wCenter,
 5 |                     T _wCardinal,
 6 |                     T _wDiagonal,
 7 |                     size_t _lRows,
 8 |                     size_t _lCols,
 9 |                     int _device )
10 |   : Stencil<T>( _wCenter, _wCardinal, _wDiagonal ),
11 |     lRows( _lRows ),
12 |     lCols( _lCols ),
13 |     device( _device )
14 | {
15 |     // nothing else to do
16 | }
17 | 
18 | template<class T>
19 | void
20 | CUDAStencil<T>::DoPreIterationWork( T* currBuf, // in device global memory
21 |                                     T* altBuf,  // in device global memory
22 |                                     Matrix2D<T>& mtx,
23 |                                     unsigned int iter )
24 | {
25 |     // in single-process version, nothing for us to do
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/src/cuda/common/PMSMemMgmt.h:
--------------------------------------------------------------------------------
 1 | #ifndef PMSMEMMGMT_H
 2 | #define PMSMEMMGMT_H
 3 | 
 4 | #include <stdlib.h>
 5 | #include "cudacommon.h"
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | 
 9 | // Programming Model-Specific Memory Management
10 | // Some programming models for heterogeneous systems provide
11 | // memory management functions for allocating memory on the host
12 | // and on the device.  These functions provide an abstract interface
13 | // to that programming-model-specific interface.
14 | 
15 | template<class T>
16 | T*
17 | pmsAllocHostBuffer( size_t nItems )
18 | {
19 |     T* ret = NULL;
20 |     size_t nBytes = nItems * sizeof(T);
21 |     CUDA_SAFE_CALL(cudaMallocHost(&ret, nBytes));
22 |     return ret;
23 | }
24 | 
25 | 
26 | template<class T>
27 | void
28 | pmsFreeHostBuffer( T* buf )
29 | {
30 |     CUDA_SAFE_CALL(cudaFreeHost(buf));
31 | }
32 | 
33 | #endif // PMSMEMMGMT_H
34 | 


--------------------------------------------------------------------------------
/src/mpi/contention-mt/cuda/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/cuda/level0
 6 | 
 7 | # Which compiler to use?
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS) -L$(top_builddir)/src/mpi/common
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | epcuda_PROGRAMS = MTBusCont
19 | 
20 | # How to build those programs?
21 | MTBusCont_SOURCES = CUDADriver.cpp \
22 | 					MTBusCont.cpp \
23 | 					mtbcmain.cpp
24 | MTBusCont_LDADD = $(top_builddir)/src/cuda/level0/BusSpeedDownload.o -lSHOCCommonMPI $(CUDA_LIBS) $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/mpi/contention-mt/opencl/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/opencl/level0
 6 | 
 7 | # Which compiler to use?
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS) -L$(top_builddir)/src/mpi/common
12 | AM_CPPFLAGS = $(OCL_CPPFLAGS) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epopencldir = $(bindir)/EP/OpenCL
16 | 
17 | # What programs should be installed to that destination?
18 | epopencl_PROGRAMS = MTBusCont
19 | 
20 | # How to build those programs?
21 | MTBusCont_SOURCES = OCLDriver.cpp \
22 | 					MTBusCont.cpp \
23 | 					mtbcmain.cpp
24 | MTBusCont_LDADD = $(top_builddir)/src/opencl/level0/BusSpeedDownload.o -lSHOCCommonMPI $(OCL_LIBS) $(LIBS)
25 | 
26 | 


--------------------------------------------------------------------------------
/src/common/HostStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef HOSTSTENCILFACTORY_H
 2 | #define HOSTSTENCILFACTORY_H
 3 | 
 4 | #include "StencilFactory.h"
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  HostStencilFactory
 8 | //
 9 | // Purpose:
10 | //   Class to generate stencils for hosts.
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    October 28, 2009
14 | //
15 | // ****************************************************************************
16 | template<class T>
17 | class HostStencilFactory: public StencilFactory<T>
18 | {
19 | public:
20 |     HostStencilFactory( void )
21 |       : StencilFactory<T>( "HostStencil" )
22 |     {
23 |         // nothing else to do
24 |     }
25 | 
26 |     virtual Stencil<T>* BuildStencil( const OptionParser& options );
27 |     virtual void CheckOptions( const OptionParser& options ) const;
28 | };
29 | 
30 | #endif // HOSTSTENCILFACTORY_H
31 | 


--------------------------------------------------------------------------------
/src/common/StencilUtil.cpp:
--------------------------------------------------------------------------------
 1 | #include "StencilUtil.h"
 2 | 
 3 | 
 4 | template<class T>
 5 | void
 6 | StencilValidater<T>::PrintValidationErrors( std::ostream& s,
 7 |                     const std::vector<ValidationErrorInfo<T> >& validationErrors,
 8 |                     unsigned int nValErrsToPrint ) const
 9 | {
10 |     unsigned int nErrorsPrinted = 0;
11 |     for( typename std::vector<ValidationErrorInfo<T> >::const_iterator iter = validationErrors.begin();
12 |             iter != validationErrors.end();
13 |             iter++ )
14 |     {
15 |         if( nErrorsPrinted <= nValErrsToPrint )
16 |         {
17 |             s << "out[" << iter->i
18 |                 << "][" << iter->j
19 |                 << "]=" << iter->val
20 |                 << ", expected " << iter->exp
21 |                 << ", relErr " << iter->relErr
22 |                 << '\n';
23 |         }
24 |         nErrorsPrinted++;
25 |     }
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = tpmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = Stencil2D
21 | 
22 | # how to build those programs
23 | Stencil2D_SOURCES = Stencil2Dmain.cpp \
24 | 				OpenCLStencil.cpp \
25 | 				OpenCLStencilFactory.cpp \
26 | 				CommonOpenCLStencilFactory.cpp \
27 | 				stencil2d_cl.cpp \
28 |                 main.cpp
29 | Stencil2D_LDADD   = $(L1LIBS)
30 | 
31 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use to build and link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | tpcudadir = $(bindir)/TP/CUDA
16 | 
17 | # What programs should be installed to that destination?
18 | tpcuda_PROGRAMS = Stencil2D
19 | 
20 | # How to build those programs?
21 | Stencil2D_SOURCES = CUDAStencil.cpp \
22 | 				CommonCUDAStencilFactory.cpp \
23 | 				Stencil2Dmain.cpp \
24 | 				CUDAStencilFactory.cpp \
25 |                 main.cpp
26 | Stencil2D_LDADD = CUDAStencilKernel.o $(CUDA_LIBS) $(LIBS)
27 | 
28 | 


--------------------------------------------------------------------------------
/src/common/HostStencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef HOSTSTENCIL_H
 2 | #define HOSTSTENCIL_H
 3 | 
 4 | #include "Stencil.h"
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  HostStencil
 8 | //
 9 | // Purpose:
10 | //   Stencils for hosts.
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    October 28, 2009
14 | //
15 | // ****************************************************************************
16 | template<class T>
17 | class HostStencil : public Stencil<T>
18 | {
19 | protected:
20 |     virtual void DoPreIterationWork( Matrix2D<T>& mtx, unsigned int iter );
21 | 
22 | public:
23 |     HostStencil( T wCenter,
24 |                         T wCardinal,
25 |                         T wDiagonal )
26 |       : Stencil<T>( wCenter, wCardinal, wDiagonal )
27 |     {
28 |         // nothing else to do
29 |     }
30 | 
31 |     virtual void operator()( Matrix2D<T>&, unsigned int nIters );
32 | };
33 | 
34 | #endif /* HOSTSTENCIL_H */
35 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | A relatively naive 9-point stencil operation over a 2D array.  The result
 3 | computed via OpenCL is compared against the result computed on the host CPU.
 4 | 
 5 | In the OpenCL implementation, a thread block copies data from the array in
 6 | device global memory to shared memory with a 1-element-wide halo, and each
 7 | thread in the thread block computes one data point of the array.
 8 | 
 9 | Double buffering in device global memory is used to avoid problems with mixing
10 | 'new' and 'old' array data, since there no synchronization across thread
11 | blocks is available in the device code.
12 | 
13 | The number of iterations, the weights used in the stencil operation, and the
14 | dimensions of the 2D array are all user configurable.
15 | 
16 | Better performance is likely possible.  Some potential optimizations include:
17 | having a thread compute multiple data points; reducing the branching logic in
18 | the code that loads shared memory.
19 | 
20 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/MPICUDAStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPICUDASTENCILFACTORY_H
 2 | #define MPICUDASTENCILFACTORY_H
 3 | 
 4 | #include "CommonCUDAStencilFactory.h"
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  MPICUDAStencilFactory
 8 | //
 9 | // Purpose:
10 | //   MPI implementation of the CUDA stencil factory.
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    November 5, 2009
14 | //
15 | // ****************************************************************************
16 | template<class T>
17 | class MPICUDAStencilFactory : public CommonCUDAStencilFactory<T>
18 | {
19 | public:
20 |     MPICUDAStencilFactory( void )
21 |       : CommonCUDAStencilFactory<T>( "MPICUDAStencil" )
22 |     {
23 |         // nothing else to do
24 |     }
25 | 
26 |     virtual Stencil<T>* BuildStencil( const OptionParser& options );
27 |     virtual void CheckOptions( const OptionParser& options ) const;
28 | };
29 | 
30 | #endif // MPICUDASTENCILFACTORY_H
31 | 


--------------------------------------------------------------------------------
/src/mpi/common/MPIHostStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPIHOSTSTENCILFACTORY_H
 2 | #define MPIHOSTSTENCILFACTORY_H
 3 | 
 4 | #include "StencilFactory.h"
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  MPIHostStencilFactory
 8 | //
 9 | // Purpose:
10 | //   Class to generate stencils for MPI Hosts.
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    November 5, 2009
14 | //
15 | // ****************************************************************************
16 | template<class T>
17 | class MPIHostStencilFactory: public StencilFactory<T>
18 | {
19 | public:
20 |     MPIHostStencilFactory( void )
21 |       : StencilFactory<T>( "MPIHostStencil" )
22 |     {
23 |         // nothing else to do
24 |     }
25 | 
26 |     virtual Stencil<T>* BuildStencil( const OptionParser& options );
27 |     virtual void AddOptions( OptionParser& odesc ) const;
28 |     virtual void CheckOptions( const OptionParser& options ) const;
29 | };
30 | 
31 | #endif // MPIHOSTSTENCILFACTORY_H
32 | 


--------------------------------------------------------------------------------
/src/common/SerializableObject.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _SERIALIZABLE_OBJECT_H
 3 | #define _SERIALIZABLE_OBJECT_H
 4 | 
 5 | #include <iostream>
 6 | #include <sstream>
 7 | 
 8 | // ****************************************************************************
 9 | // Class: SerializableObject
10 | //
11 | // Purpose:
12 | //   Abstract class with two pure virtual methods for serializing and
13 | //   unserializing an object to string.
14 | //
15 | // Notes:
16 | //   All Devices, Platforms, Node Containers and Multi-Node Containers
17 | //   that are sent over the network must implement this interface.
18 | //
19 | // Programmer: Gabriel Marin
20 | // Creation: August 21, 2009
21 | //
22 | // Modifications:
23 | //
24 | // ****************************************************************************
25 | class SerializableObject
26 | {
27 | public:
28 |     SerializableObject() {}
29 |     virtual void writeObject (std::ostringstream &oss) const = 0;
30 |     virtual void readObject (std::istringstream &iss) = 0;
31 | };
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/src/cuda/level1/reduction/TPReduction.h:
--------------------------------------------------------------------------------
 1 | #ifndef TPREDUCTION_H_
 2 | #define TPREDUCTION_H_
 3 | 
 4 | #include "mpi.h"
 5 | 
 6 | template<class T>
 7 | void RunTestLaunchKernel(int num_blocks,
 8 |                          int num_threads,
 9 |                          int smem_size,
10 |                          T* d_idata,
11 |                          T* d_odata,
12 |                          int size );
13 | 
14 | // Template specializations for MPI allreduce call.
15 | template <class T>
16 | inline void globalReduction(T* local_result, T* global_result);
17 | 
18 | template <>
19 | inline void globalReduction(float* local_result, float* global_result)
20 | {
21 |    MPI_Allreduce(local_result, global_result, 1, MPI_FLOAT,
22 |            MPI_SUM, MPI_COMM_WORLD);
23 | }
24 | 
25 | template <>
26 | inline void globalReduction(double* local_result, double* global_result)
27 | {
28 |    MPI_Allreduce(local_result, global_result, 1, MPI_DOUBLE,
29 |            MPI_SUM, MPI_COMM_WORLD);
30 | }
31 | 
32 | #endif // TPREDUCTION_H_
33 | 


--------------------------------------------------------------------------------
/src/opencl/level1/scan/TPScan.h:
--------------------------------------------------------------------------------
 1 | #ifndef __TPSCAN_H
 2 | #define __TPSCAN_H
 3 | 
 4 | // When using MPICH and MPICH-derived MPI implementations, there is a
 5 | // naming conflict between stdio.h and MPI's C++ binding.
 6 | // Since we do not use the C++ MPI binding, we can avoid the ordering
 7 | // issue by ignoring the C++ MPI binding headers.
 8 | // This #define should be quietly ignored when using other MPI implementations.
 9 | #define MPICH_SKIP_MPICXX
10 | #include "mpi.h"
11 | 
12 | // Templated wrapper for MPI_Exscan
13 | template <class T>
14 | inline void globalExscan(T* local_result, T* global_result);
15 | 
16 | template <>
17 | inline void globalExscan(float* local_result, float* global_result)
18 | {
19 |    MPI_Exscan(local_result, global_result, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
20 | }
21 | 
22 | template <>
23 | inline void globalExscan(double* local_result, double* global_result)
24 | {
25 |    MPI_Exscan(local_result, global_result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
26 | }
27 | 
28 | #endif // __TPSCAN_H
29 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/tpmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | tpopencldir = $(bindir)/TP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | tpopencl_PROGRAMS = Stencil2D
20 | 
21 | # how to build those programs
22 | Stencil2D_SOURCES = OpenCLStencil.cpp \
23 | 				OpenCLStencilFactory.cpp \
24 | 				CommonOpenCLStencilFactory.cpp \
25 | 				stencil2d_cl.cpp \
26 | 				Stencil2Dmain.cpp \
27 | 				MPIOpenCLStencil.cpp \
28 | 				MPIOpenCLStencilFactory.cpp \
29 | 				main.cpp
30 | Stencil2D_LDADD   = $(L1LIBS)
31 | 
32 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | The Scalable HeterOgeneous Computing (SHOC) benchmark suite is a
 3 | collection of benchmark programs testing the performance and
 4 | stability of systems using computing devices with non-traditional architectures
 5 | for general purpose computing. Its initial focus is on systems containing
 6 | Graphics Processing Units (GPUs) and multi-core processors, and on the
 7 | OpenCL programming standard. It can be used on clusters as well as individual
 8 | hosts.
 9 | 
10 | Documentation on configuring, building, and running the SHOC benchmark
11 | programs is contained in the SHOC user manual, in the doc subdirectory
12 | of the SHOC source code tree.  The file INSTALL.txt contains a sketch of
13 | those instructions for rapid installation.
14 | 
15 | Installation should be familiar to anyone who is experienced with configure
16 | and make, see the config directory for some examples.  Also, if your
17 | platform requires regenerating the configure script, see build-aux/bootstrap.sh
18 | and the manual for more details.
19 | 
20 | Last update: 2014-04-13 15:39:22 kspaff
21 | 
22 | 


--------------------------------------------------------------------------------
/src/cuda/level1/sort/sort_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef SORT_KERNEL_H_
 2 | #define SORT_KERNEL_H_
 3 | 
 4 | #include <cuda.h>
 5 | 
 6 | #define WARP_SIZE 32
 7 | #define SORT_BLOCK_SIZE 128
 8 | #define SCAN_BLOCK_SIZE 256
 9 | 
10 | typedef unsigned int uint;
11 | 
12 | __global__ void radixSortBlocks(uint nbits, uint startbit, uint4* keysOut,
13 |         uint4* valuesOut, uint4* keysIn, uint4* valuesIn);
14 | 
15 | __global__ void findRadixOffsets(uint2* keys, uint* counters,
16 |         uint* blockOffsets, uint startbit, uint numElements, uint totalBlocks);
17 | 
18 | __global__ void reorderData(uint startbit, uint *outKeys, uint *outValues,
19 |         uint2 *keys, uint2 *values, uint *blockOffsets, uint *offsets,
20 |         uint *sizes, uint totalBlocks);
21 | 
22 | // Scan Kernels
23 | __global__ void vectorAddUniform4(uint *d_vector, const uint *d_uniforms,
24 |                                   const int n);
25 | 
26 | __global__ void scan(uint *g_odata, uint *g_idata, uint *g_blockSums,
27 |         const int n, const bool fullBlock, const bool storeSum);
28 | 
29 | #endif // SORT_KERNEL_H_
30 | 


--------------------------------------------------------------------------------
/src/common/ValidateMatrix2D.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <math.h>
 3 | #include "ValidateMatrix2D.h"
 4 | 
 5 | 
 6 | template<class T>
 7 | std::vector<ValidationErrorInfo<T> >
 8 | Validate<T>::operator()( const Matrix2D<T>& s, const Matrix2D<T>& t )
 9 | {
10 |     std::vector<ValidationErrorInfo<T> > ret;
11 | 
12 |     // ensure matrices are same shape
13 |     assert( (s.GetNumRows() == t.GetNumRows()) && (s.GetNumColumns() == t.GetNumColumns()) );
14 | 
15 |     for( unsigned int i = 0; i < s.GetNumRows(); i++ )
16 |     {
17 |         for( unsigned int j = 0; j < s.GetNumColumns(); j++ )
18 |         {
19 |             T expVal = s.GetConstData()[i][j];
20 |             T actualVal = t.GetConstData()[i][j];
21 |             T delta = fabsf( actualVal - expVal );
22 |             T relError = (expVal != 0.0f) ? delta / expVal : 0.0f;
23 | 
24 |             if( relError > relErrThreshold )
25 |             {
26 |                 ret.push_back( ValidationErrorInfo<T>( i, j, actualVal, expVal, relError ) );
27 |             }
28 |         }
29 |     }
30 | 
31 |     return ret;
32 | }
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/src/cuda/level0/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | 
 5 | SUBDIRS	= $(MAYBE_MPI)
 6 | 
 7 | include $(top_builddir)/config/config.mk
 8 | include $(top_builddir)/config/targets.mk
 9 | 
10 | # How to find source files
11 | VPATH = $(srcdir):$(srcdir)/../common
12 | 
13 | AM_LDFLAGS = $(CUDA_LDFLAGS)
14 | AM_CPPFLAGS = $(CUDA_INC)
15 | 
16 | # What is the destination for programs built from this directory?
17 | serialcudadir = $(bindir)/Serial/CUDA
18 | 
19 | # What programs should be installed in the bin/Serial/CUDA destination?
20 | serialcuda_PROGRAMS = BusSpeedDownload \
21 | 						BusSpeedReadback \
22 | 						DeviceMemory \
23 | 						MaxFlops
24 | 
25 | BusSpeedDownload_SOURCES	= main.cpp
26 | BusSpeedDownload_LDADD		= BusSpeedDownload.o $(CUDA_LIBS) $(LIBS)
27 | 
28 | BusSpeedReadback_SOURCES	= main.cpp
29 | BusSpeedReadback_LDADD		= BusSpeedReadback.o $(CUDA_LIBS) $(LIBS)
30 | 
31 | DeviceMemory_SOURCES	= main.cpp
32 | DeviceMemory_LDADD		= DeviceMemory.o $(CUDA_LIBS) $(LIBS)
33 | 
34 | MaxFlops_SOURCES	= main.cpp
35 | MaxFlops_LDADD		= MaxFlops.o $(CUDA_LIBS) $(LIBS)
36 | 
37 | 


--------------------------------------------------------------------------------
/src/cuda/level1/reduction/tpRedLaunchKernel.cu:
--------------------------------------------------------------------------------
 1 | #include "reduction_kernel.h"
 2 | 
 3 | template<class T>
 4 | void
 5 | RunTestLaunchKernel( int num_blocks,
 6 |                     int num_threads,
 7 |                     int smem_size,
 8 |                     T* d_idata,
 9 |                     T* d_odata,
10 |                     int size )
11 | {
12 |     // In CUDA 4.0 we will be able to remove this level of indirection
13 |     // if we use the cuConfigureCall and cuLaunchKernel functions.
14 |     reduce<T,256><<<num_blocks,num_threads,smem_size>>>(d_idata, d_odata, size);
15 | }
16 | 
17 | 
18 | // ensure that the template functions are instantiated
19 | // Unlike the Stencil2D CUDA version that needs to instantiate objects,
20 | // we need to instantiate template functions.  Declaration of the needed
21 | // specializations seem to work for several recent versions of g++ that
22 | // people are likely to be using underneath nvcc.
23 | template void RunTestLaunchKernel<float>( int, int, int, float*, float*, int );
24 | template void RunTestLaunchKernel<double>( int, int, int, double*, double*, int );
25 | 
26 | 


--------------------------------------------------------------------------------
/src/cuda/level0/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../common
 6 | 
 7 | # Which compiler to use to link
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(CUDA_LDFLAGS)
12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epcudadir = $(bindir)/EP/CUDA
16 | 
17 | # What programs should be installed in the destination?
18 | epcuda_PROGRAMS = BusSpeedDownload \
19 | 						BusSpeedReadback \
20 | 						DeviceMemory \
21 | 						MaxFlops
22 | 
23 | BusSpeedDownload_SOURCES	= main.cpp
24 | BusSpeedDownload_LDADD		= BusSpeedDownload.o $(CUDA_LIBS) $(LIBS)
25 | 
26 | BusSpeedReadback_SOURCES	= main.cpp
27 | BusSpeedReadback_LDADD		= BusSpeedReadback.o $(CUDA_LIBS) $(LIBS)
28 | 
29 | DeviceMemory_SOURCES	= main.cpp
30 | DeviceMemory_LDADD		= DeviceMemory.o $(CUDA_LIBS) $(LIBS)
31 | 
32 | MaxFlops_SOURCES	= main.cpp
33 | MaxFlops_LDADD		= MaxFlops.o $(CUDA_LIBS) $(LIBS)
34 | 
35 | 


--------------------------------------------------------------------------------
/config/conf-crossarm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #
 4 | # Configure SHOC for cross-compilation using ARM cross compilers.
 5 | # Gives an example of how to cross compile, should be adaptable to 
 6 | # other cross compilation targets.
 7 | #
 8 | # Assumes we are using CodeSourcery Lite ARM cross compilers.
 9 | # Assumes cross-compilers, cross-linkers, etc. are in the PATH.
10 | # Assumes CodeSourcery sysroot is in /opt/libc.
11 | #
12 | # Assumes no CUDA support on target system.
13 | #
14 | # Since OpenCL is library based, you have to explicitly specify CPPFLAGS to
15 | # find the OpenCL headers.  You may also need to specify LDFLAGS, depending on
16 | # whether the OpenCL libraries are installed in a location searched by
17 | # the linker such as /usr/lib.
18 | #
19 | # Does not (yet?) support MPI.
20 | #
21 | sh ./configure \
22 | CPPFLAGS="-I$HOME/private/Projects/ARM/ARM-OpenCL-1.1/include" \
23 | LDFLAGS="-L$HOME/private/Projects/ARM/ARM-OpenCL-1.1/lib -Wl,-rpath=/opt/libc/lib:/opt/libc/usr/lib -Wl,--dynamic-linker=/opt/libc/lib/ld-linux.so.3" \
24 | --host=arm-none-linux-gnueabi \
25 | --with-opencl \
26 | --without-cuda \
27 | --without-mpi
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/OpenCLStencilFactory.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <string>
 3 | #include <cassert>
 4 | #include "OpenCLStencilFactory.h"
 5 | #include "OpenCLStencil.h"
 6 | #include "OpenCLDeviceInfo.h"
 7 | 
 8 | 
 9 | 
10 | template<class T>
11 | Stencil<T>*
12 | OpenCLStencilFactory<T>::BuildStencil( const OptionParser& options )
13 | {
14 |     // get options for base class
15 |     T wCenter;
16 |     T wCardinal;
17 |     T wDiagonal;
18 |     size_t lRows;
19 |     size_t lCols;
20 |     this->ExtractOptions( options,
21 |                           wCenter,
22 |                           wCardinal,
23 |                           wDiagonal,
24 |                           lRows,
25 |                           lCols );
26 | 
27 |     // build the stencil object
28 |     return new OpenCLStencil<T>( wCenter,
29 |                                 wCardinal,
30 |                                 wDiagonal,
31 |                                 lRows,
32 |                                 lCols,
33 |                                 this->dev,
34 |                                 this->ctx,
35 |                                 this->queue );
36 | }
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/MPIOpenCLStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPIOPENCLSTENCILFACTORY_H
 2 | #define MPIOPENCLSTENCILFACTORY_H
 3 | 
 4 | #include "CommonOpenCLStencilFactory.h"
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  MPIOpenCLStencilFactory
 8 | //
 9 | // Purpose:
10 | //   MPI implementation of the OpenCL stencil factory.
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    November 5, 2009
14 | //
15 | // ****************************************************************************
16 | template<class T>
17 | class MPIOpenCLStencilFactory : public CommonOpenCLStencilFactory<T>
18 | {
19 | public:
20 |     MPIOpenCLStencilFactory( cl_device_id _dev,
21 |                                 cl_context _ctx,
22 |                                 cl_command_queue _queue )
23 |       : CommonOpenCLStencilFactory<T>( "MPIOpenCLStencil", _dev, _ctx, _queue )
24 |     {
25 |         // nothing else to do
26 |     }
27 | 
28 |     virtual Stencil<T>* BuildStencil( const OptionParser& options );
29 |     virtual void CheckOptions( const OptionParser& options ) const;
30 | };
31 | 
32 | #endif // MPIOPENCLSTENCILFACTORY_H
33 | 


--------------------------------------------------------------------------------
/src/common/Graph.h:
--------------------------------------------------------------------------------
 1 | #define MAX_LINE_LENGTH 500000
 2 | 
 3 | class Graph
 4 | {
 5 |     unsigned int num_verts;
 6 |     unsigned int num_edges;
 7 |     unsigned int adj_list_length;
 8 |     unsigned int *edge_offsets;
 9 |     unsigned int *edge_list;
10 |     unsigned int *edge_costs;
11 |     unsigned int max_degree;
12 |     int graph_type;
13 | 
14 |     bool if_delete_arrays;
15 | 
16 |     void SetAllCosts(unsigned int c);
17 |     public:
18 |     Graph();
19 |     ~Graph();
20 |     void LoadMetisGraph(const char *filename);
21 |     void SaveMetisGraph(const char *filename);
22 |     unsigned int GetNumVertices();
23 |     unsigned int GetNumEdges();
24 |     unsigned int GetMaxDegree();
25 | 
26 |     unsigned int *GetEdgeOffsets();
27 |     unsigned int *GetEdgeList();
28 |     unsigned int *GetEdgeCosts();
29 | 
30 |     unsigned int **GetEdgeOffsetsPtr();
31 |     unsigned int **GetEdgeListPtr();
32 |     unsigned int **GetEdgeCostsPtr();
33 | 
34 |     unsigned int *GetVertexLengths(unsigned int *cost,unsigned int source);
35 |     int GetMetisGraphType();
36 |     unsigned int GetAdjacencyListLength();
37 |     void GenerateSimpleKWayGraph(unsigned int verts,unsigned int degree);
38 | };
39 | 


--------------------------------------------------------------------------------
/src/common/SerialStencilUtil.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <sstream>
 3 | #include "SerialStencilUtil.h"
 4 | #include "ValidateMatrix2D.h"
 5 | 
 6 | 
 7 | template<class T>
 8 | void
 9 | SerialStencilValidater<T>::ValidateResult( const Matrix2D<T>& exp,
10 |                 const Matrix2D<T>& data,
11 |                 double valErrThreshold,
12 |                 unsigned int nValErrsToPrint ) const
13 | {
14 |     Validate<T> val( valErrThreshold );
15 |     std::vector<ValidationErrorInfo<T> > validationErrors = val( exp, data );
16 |     std::ostringstream valResultStr;
17 | 
18 |     valResultStr << validationErrors.size() << " validation errors";
19 |     if( (validationErrors.size() > 0) && (nValErrsToPrint > 0) )
20 |     {
21 |         this->PrintValidationErrors( valResultStr, validationErrors, nValErrsToPrint );
22 |     }
23 |     std::cout << valResultStr.str() << std::endl;
24 | }
25 | 
26 | 
27 | 
28 | //  Modifications:
29 | //    Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
30 | //    Split timing reports into detailed and summary.  For
31 | //    serial code, we report all trial values.
32 | //
33 | void
34 | SerialStencilTimingReporter::ReportTimings( ResultDatabase& resultDB ) const
35 | {
36 |     resultDB.DumpDetailed( std::cout );
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/src/mpi/common/MPIHostStencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPIHOSTSTENCIL_H
 2 | #define MPIHOSTSTENCIL_H
 3 | 
 4 | #include <fstream>
 5 | #include "mpi.h"
 6 | #include "HostStencil.h"
 7 | #include "MPI2DGridProgram.h"
 8 | 
 9 | 
10 | // ****************************************************************************
11 | // Class:  MPIHostStencil
12 | //
13 | // Purpose:
14 | //   Stencils for MPI hosts.
15 | //
16 | // Programmer:  Phil Roth
17 | // Creation:    November 5, 2009
18 | //
19 | // ****************************************************************************
20 | template<class T>
21 | class MPIHostStencil : public HostStencil<T>, public MPI2DGridProgram<T>
22 | {
23 | private:
24 |     std::ofstream ofs;
25 |     bool dumpData;
26 | 
27 | protected:
28 |     virtual void DoPreIterationWork( Matrix2D<T>& mtx, unsigned int iter );
29 | 
30 | public:
31 |     MPIHostStencil( T wCenter,
32 |                         T wCardinal,
33 |                         T wDiagonal,
34 |                         size_t mpiGridRows,
35 |                         size_t mpiGridCols,
36 |                         unsigned int nItersPerHaloExchange,
37 |                         bool dumpData = false );
38 | 
39 |     virtual void operator()( Matrix2D<T>&, unsigned int nIters );
40 | };
41 | 
42 | #endif /* MPIHOSTSTENCIL_H */
43 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/CUDAStencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUDASTENCIL_H
 2 | #define CUDASTENCIL_H
 3 | 
 4 | #include "Stencil.h"
 5 | 
 6 | // ****************************************************************************
 7 | // Class:  CUDAStencil
 8 | //
 9 | // Purpose:
10 | //   CUDA implementation of 9-point stencil.
11 | //
12 | // Programmer:  Phil Roth
13 | // Creation:    October 28, 2009
14 | //
15 | // ****************************************************************************
16 | template<class T>
17 | class CUDAStencil : public Stencil<T>
18 | {
19 | private:
20 |     size_t lRows;
21 |     size_t lCols;
22 |     int device;
23 | 
24 | protected:
25 |     virtual void DoPreIterationWork( T* currBuf,    // in device global memory
26 |                                         T* altBuf,  // in device global memory
27 |                                         Matrix2D<T>& mtx,
28 |                                         unsigned int iter );
29 | 
30 | public:
31 |     CUDAStencil( T _wCenter,
32 |                     T _wCardinal,
33 |                     T _wDiagonal,
34 |                     size_t _lRows,
35 |                     size_t _lCols,
36 |                     int _device );
37 | 
38 |     virtual void operator()( Matrix2D<T>&, unsigned int nIters );
39 | };
40 | 
41 | #endif /* CUDASTENCIL_H */
42 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/CommonCUDAStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMONCUDASTENCILFACTORY_H
 2 | #define COMMONCUDASTENCILFACTORY_H
 3 | 
 4 | #include <vector>
 5 | #include "StencilFactory.h"
 6 | 
 7 | // ****************************************************************************
 8 | // Class:  CommonCUDAStencilFactory
 9 | //
10 | // Purpose:
11 | //   CUDA implementation of stencil factory.
12 | //
13 | // Programmer:  Phil Roth
14 | // Creation:    October 28, 2009
15 | //
16 | // ****************************************************************************
17 | template<class T>
18 | class CommonCUDAStencilFactory : public StencilFactory<T>
19 | {
20 | protected:
21 |     void ExtractOptions( const OptionParser& options,
22 |                             T& wCenter,
23 |                             T& wCardinal,
24 |                             T& wDiagonal,
25 |                             size_t& lRows,
26 |                             size_t& lCols,
27 |                             std::vector<long long>& devices );
28 | 
29 | public:
30 |     CommonCUDAStencilFactory( std::string _sname )
31 |       : StencilFactory<T>( _sname )
32 |     {
33 |         // nothing else to do
34 |     }
35 | 
36 |     virtual void CheckOptions( const OptionParser& opts ) const;
37 | };
38 | 
39 | #endif // COMMONCUDASTENCILFACTORY_H
40 | 
41 | 


--------------------------------------------------------------------------------
/src/opencl/level2/s3d/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(OCL_CPPFLAGS)
14 | L1LIBS = -lSHOCCommonOpenCL -lSHOCCommon $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the destination?
20 | serialopencl_PROGRAMS = S3D
21 | 
22 | # how to build those programs
23 | S3D_SOURCES = S3D.cpp \
24 | main.cpp \
25 | gr_base_cl.cpp \
26 | qssa_cl.cpp \
27 | qssab_cl.cpp \
28 | qssa2_cl.cpp \
29 | ratt_cl.cpp \
30 | ratt2_cl.cpp \
31 | ratt3_cl.cpp \
32 | ratt4_cl.cpp \
33 | ratt5_cl.cpp \
34 | ratt6_cl.cpp \
35 | ratt7_cl.cpp \
36 | ratt8_cl.cpp \
37 | ratt9_cl.cpp \
38 | ratt10_cl.cpp \
39 | ratx_cl.cpp \
40 | ratxb_cl.cpp \
41 | ratx2_cl.cpp \
42 | ratx4_cl.cpp \
43 | rdsmh_cl.cpp \
44 | rdwdot_cl.cpp \
45 | rdwdot2_cl.cpp \
46 | rdwdot3_cl.cpp \
47 | rdwdot6_cl.cpp \
48 | rdwdot7_cl.cpp \
49 | rdwdot8_cl.cpp \
50 | rdwdot9_cl.cpp \
51 | rdwdot10_cl.cpp
52 | S3D_LDADD   = $(L1LIBS)
53 | 
54 | 


--------------------------------------------------------------------------------
/src/common/InitializeMatrix2D.h:
--------------------------------------------------------------------------------
 1 | #ifndef INITIALIZE_H
 2 | #define INITIALIZE_H
 3 | 
 4 | #include <functional>
 5 | #include "Matrix2D.h"
 6 | 
 7 | // ****************************************************************************
 8 | // Class:  Initialize
 9 | //
10 | // Purpose:
11 | //   Initialize 2D matrices.
12 | //
13 | // Programmer:  Phil Roth
14 | // Creation:    October 28, 2009
15 | //
16 | // ****************************************************************************
17 | template<class T>
18 | class Initialize : public std::unary_function<Matrix2D<T>&, void>
19 | {
20 | private:
21 |     long seed;
22 |     unsigned int haloWidth; // width of halo
23 |     T haloVal;          // value to use for halo
24 |     int rowPeriod;          // period for row values
25 |     int colPeriod;          // period for column values
26 | 
27 | public:
28 |     Initialize( long int _seed,
29 |                 unsigned int _halo = 1,
30 |                 T _haloVal = 0,
31 |                 int _rowPeriod = -1,
32 |                 int _colPeriod = -1 )
33 |       : seed( _seed ),
34 |         haloWidth( _halo ),
35 |         haloVal( _haloVal ),
36 |         rowPeriod( _rowPeriod ),
37 |         colPeriod( _colPeriod )
38 |     {
39 |         // nothing else to do
40 |     }
41 | 
42 |     void operator()( Matrix2D<T>& mtx );
43 | };
44 | 
45 | #endif // INITIALIZE_H
46 | 


--------------------------------------------------------------------------------
/src/opencl/level2/s3d/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common
 6 | 
 7 | # Which compiler to use
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS)
12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common
13 | L1LIBS = -lSHOCCommonOpenCL -lSHOCCommon $(OCL_LIBS) $(LIBS)
14 | 
15 | # what is the destination for programs built from this directory?
16 | epopencldir = $(bindir)/EP/OpenCL
17 | 
18 | # what programs should be installed in the destination?
19 | epopencl_PROGRAMS = S3D
20 | 
21 | # how to build those programs
22 | S3D_SOURCES = S3D.cpp \
23 | gr_base_cl.cpp \
24 | qssa_cl.cpp \
25 | qssab_cl.cpp \
26 | qssa2_cl.cpp \
27 | ratt_cl.cpp \
28 | ratt2_cl.cpp \
29 | ratt3_cl.cpp \
30 | ratt4_cl.cpp \
31 | ratt5_cl.cpp \
32 | ratt6_cl.cpp \
33 | ratt7_cl.cpp \
34 | ratt8_cl.cpp \
35 | ratt9_cl.cpp \
36 | ratt10_cl.cpp \
37 | ratx_cl.cpp \
38 | ratxb_cl.cpp \
39 | ratx2_cl.cpp \
40 | ratx4_cl.cpp \
41 | rdsmh_cl.cpp \
42 | rdwdot_cl.cpp \
43 | rdwdot2_cl.cpp \
44 | rdwdot3_cl.cpp \
45 | rdwdot6_cl.cpp \
46 | rdwdot7_cl.cpp \
47 | rdwdot8_cl.cpp \
48 | rdwdot9_cl.cpp \
49 | rdwdot10_cl.cpp \
50 | main.cpp
51 | S3D_LDADD   = $(L1LIBS)
52 | 
53 | 


--------------------------------------------------------------------------------
/src/cuda/level1/neuralnet/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | SUBDIRS = $(MAYBE_MPI)
 5 | 
 6 | include $(top_builddir)/config/config.mk
 7 | include $(top_builddir)/config/targets.mk
 8 | 
 9 | # How to find source files
10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common
11 | 
12 | AM_LDFLAGS = $(CUDA_LDFLAGS)
13 | AM_CPPFLAGS = $(CUDA_INC)
14 | 
15 | # What is the destination for programs built from this directory?
16 | serialcudadir = $(bindir)/Serial/CUDA
17 | 
18 | # What programs should be installed to that destination?
19 | serialcuda_PROGRAMS = NeuralNet
20 | 
21 | # How to build those programs?
22 | NeuralNet_SOURCES = main.cpp
23 | NeuralNet_LDADD = NeuralNet.o $(CUDA_LIBS) $(LIBS) -lcublas
24 | 
25 | # Unzip and move data to the bin directory, if supported
26 | if DATA_UNZIP
27 | #Adding these lines would cause automake to execute these lines
28 | #with install-data flags but this requires listing each file
29 | #separately; instead we use a custom command below
30 | #data_DATA = nn_data
31 | #nn_data: $(srcdir)/nn_data.zip
32 | #	$(UNZIP) -o $(srcdir)/nn_data.zip
33 | # cp -prf nn_data $(bindir)/.
34 | 
35 | #This overrides the default install-data command
36 | install-data-local:
37 | 	$(UNZIP) -o $(srcdir)/nn_data.zip
38 | 	cp -prf nn_data $(bindir)/.
39 | endif
40 | 
41 | clean: 
42 | 	rm -rf nn_data NeuralNet
43 | 


--------------------------------------------------------------------------------
/src/opencl/level0/Makefile.am:
--------------------------------------------------------------------------------
 1 | if BUILD_MPI
 2 |     MAYBE_MPI = epmpi
 3 | endif
 4 | 
 5 | SUBDIRS	= $(MAYBE_MPI)
 6 | 
 7 | include $(top_builddir)/config/config.mk
 8 | include $(top_builddir)/config/targets.mk
 9 | 
10 | # How to find source files
11 | VPATH = $(srcdir):$(srcdir)/../common
12 | 
13 | AM_LDFLAGS = $(OCL_LDFLAGS)
14 | AM_CPPFLAGS = $(OCL_CPPFLAGS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | serialopencldir = $(bindir)/Serial/OpenCL
18 | 
19 | # what programs should be installed in the bin/Serial/OpenCL destination?
20 | serialopencl_PROGRAMS = BusSpeedDownload \
21 | 						BusSpeedReadback \
22 | 						DeviceMemory \
23 | 						KernelCompile \
24 | 						MaxFlops \
25 | 						QueueDelay
26 | 
27 | BusSpeedDownload_SOURCES	= BusSpeedDownload.cpp main.cpp
28 | BusSpeedDownload_LDADD		= $(OCL_LIBS) $(LIBS)
29 | 
30 | BusSpeedReadback_SOURCES	= BusSpeedReadback.cpp main.cpp
31 | BusSpeedReadback_LDADD		= $(OCL_LIBS) $(LIBS)
32 | 
33 | DeviceMemory_SOURCES	= DeviceMemory.cpp main.cpp
34 | DeviceMemory_LDADD		= $(OCL_LIBS) $(LIBS)
35 | 
36 | KernelCompile_SOURCES	= KernelCompile.cpp main.cpp
37 | KernelCompile_LDADD		= $(OCL_LIBS) $(LIBS)
38 | 
39 | MaxFlops_SOURCES	= MaxFlops.cpp main.cpp
40 | MaxFlops_LDADD		= $(OCL_LIBS) $(LIBS)
41 | 
42 | QueueDelay_SOURCES	= QueueDelay.cpp main.cpp
43 | QueueDelay_LDADD	= $(OCL_LIBS) $(LIBS)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/src/mpi/common/MPIStencilUtil.h:
--------------------------------------------------------------------------------
 1 | #ifndef STENCIL_UTILS_MPI_H
 2 | #define STENCIL_UTILS_MPI_H
 3 | 
 4 | #include "StencilUtil.h"
 5 | 
 6 | 
 7 | // ****************************************************************************
 8 | // Class:  MPIStencilValidater
 9 | //
10 | // Purpose:
11 | //   MPI version of stencil validator.
12 | //
13 | // Programmer:  Phil Roth
14 | // Creation:    October 29, 2009
15 | //
16 | // ****************************************************************************
17 | template<class T>
18 | class MPIStencilValidater : public StencilValidater<T>
19 | {
20 | public:
21 |     virtual void ValidateResult( const Matrix2D<T>& exp,
22 |                 const Matrix2D<T>& data,
23 |                 double valErrThreshold,
24 |                 unsigned int nValErrsToPrint ) const;
25 | };
26 | 
27 | 
28 | // ****************************************************************************
29 | // Class:  MPIStencilTimingReporter
30 | //
31 | // Purpose:
32 | //   MPI version of stencil timing reporter.
33 | //
34 | // Programmer:  Phil Roth
35 | // Creation:    October 29, 2009
36 | //
37 | // ****************************************************************************
38 | class MPIStencilTimingReporter : public StencilTimingReporter
39 | {
40 | public:
41 |     virtual void ReportTimings( ResultDatabase& resultDB ) const;
42 | };
43 | 
44 | 
45 | #endif // STENCIL_UTILS_MPI_H
46 | 


--------------------------------------------------------------------------------
/src/cuda/common/support.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPPORT_H
 2 | #define SUPPORT_H
 3 | 
 4 | #include <cuda.h>
 5 | #include <cuda_runtime.h>
 6 | #include "cudacommon.h"
 7 | #include <iostream>
 8 | using std::cin;
 9 | using std::cout;
10 | 
11 | // ****************************************************************************
12 | // Method:  findAvailBytes
13 | //
14 | // Purpose: returns maximum number of bytes *allocatable* (likely less than
15 | //          device memory size) on the device.
16 | //
17 | // Arguments: None.
18 | //
19 | // Programmer:  Collin McCurdy
20 | // Creation:    June 8, 2010
21 | //
22 | // ****************************************************************************
23 | inline unsigned long
24 | findAvailBytes(void)
25 | {
26 |     int device;
27 |     cudaGetDevice(&device);
28 |     CHECK_CUDA_ERROR();
29 |     cudaDeviceProp deviceProp;
30 |     cudaGetDeviceProperties(&deviceProp, device);
31 |     CHECK_CUDA_ERROR();
32 |     unsigned long total_bytes = deviceProp.totalGlobalMem;
33 |     unsigned long avail_bytes = total_bytes;
34 |     void* work;
35 | 
36 |     while (1) {
37 |         cudaMalloc(&work, avail_bytes);
38 |         if (cudaGetLastError() == cudaSuccess) {
39 |             break;
40 |         }
41 |         avail_bytes -= (1024*1024);
42 |     }
43 |     cudaFree(work);
44 |     CHECK_CUDA_ERROR();
45 | 
46 |     return avail_bytes;
47 | }
48 | 
49 | 
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/CUDAStencilFactory.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <string>
 3 | #include <cassert>
 4 | #include "CUDAStencilFactory.h"
 5 | #include "CUDAStencil.h"
 6 | 
 7 | 
 8 | template<class T>
 9 | Stencil<T>*
10 | CUDAStencilFactory<T>::BuildStencil( const OptionParser& options )
11 | {
12 |     // get options for base class
13 |     T wCenter;
14 |     T wCardinal;
15 |     T wDiagonal;
16 |     size_t lRows;
17 |     size_t lCols;
18 |     std::vector<long long int> devs;
19 |     this->ExtractOptions( options,
20 |                           wCenter,
21 |                           wCardinal,
22 |                           wDiagonal,
23 |                           lRows,
24 |                           lCols,
25 |                           devs );
26 | 
27 |     // determine whcih device to use
28 |     // We would really prefer this to be done in main() but
29 |     // since BuildStencil is a virtual function, we cannot change its
30 |     // signature, and OptionParser provides no way to override an
31 |     // options' value after it is set during parsing.
32 |     int chosenDevice = (int)devs[0];
33 | 
34 |     return new CUDAStencil<T>( wCenter,
35 |                                 wCardinal,
36 |                                 wDiagonal,
37 |                                 lRows,
38 |                                 lCols,
39 |                                 chosenDevice );
40 | }
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/src/common/SerialStencilUtil.h:
--------------------------------------------------------------------------------
 1 | #ifndef STENCIL_UTILS_SERIAL_H
 2 | #define STENCIL_UTILS_SERIAL_H
 3 | 
 4 | #include "StencilUtil.h"
 5 | 
 6 | 
 7 | // ****************************************************************************
 8 | // Class:  SerialStencilValidater
 9 | //
10 | // Purpose:
11 | //   Single-processor version of stencil validator.
12 | //
13 | // Programmer:  Phil Roth
14 | // Creation:    October 29, 2009
15 | //
16 | // ****************************************************************************
17 | template<class T>
18 | class SerialStencilValidater : public StencilValidater<T>
19 | {
20 | public:
21 |     virtual void ValidateResult( const Matrix2D<T>& exp,
22 |                 const Matrix2D<T>& data,
23 |                 double valErrThreshold,
24 |                 unsigned int nValErrsToPrint ) const;
25 | };
26 | 
27 | 
28 | // ****************************************************************************
29 | // Class:  SerialStencilTimingReporter
30 | //
31 | // Purpose:
32 | //   Single-processor version of stencil timing reporter.
33 | //
34 | // Programmer:  Phil Roth
35 | // Creation:    October 29, 2009
36 | //
37 | // ****************************************************************************
38 | class SerialStencilTimingReporter : public StencilTimingReporter
39 | {
40 | public:
41 |     virtual void ReportTimings( ResultDatabase& resultDB ) const;
42 | };
43 | 
44 | 
45 | #endif // STENCIL_UTILS_SERIAL_H
46 | 


--------------------------------------------------------------------------------
/src/common/StencilFactory.cpp:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include "StencilFactory.h"
 3 | #include "InvalidArgValue.h"
 4 | 
 5 | 
 6 | template<class T>
 7 | void
 8 | StencilFactory<T>::CheckOptions( const OptionParser& options ) const
 9 | {
10 |     // number of iterations must be positive
11 |     unsigned int nIters = (unsigned int)options.getOptionInt( "num-iters" );
12 |     if( nIters == 0 )
13 |     {
14 |         throw InvalidArgValue( "number of iterations must be positive" );
15 |     }
16 | 
17 |     // no restrictions on weight values, just that we have them
18 | }
19 | 
20 | template<class T>
21 | void
22 | StencilFactory<T>::ExtractOptions( const OptionParser& options,
23 |                                 T& wCenter,
24 |                                 T& wCardinal,
25 |                                 T& wDiagonal )
26 | {
27 |     wCenter = options.getOptionFloat( "weight-center" );
28 |     wCardinal = options.getOptionFloat( "weight-cardinal" );
29 |     wDiagonal = options.getOptionFloat( "weight-diagonal" );
30 | }
31 | 
32 | 
33 | template<class T>
34 | std::vector<long long>
35 | StencilFactory<T>::GetStandardProblemSize( int sizeClass )
36 | {
37 |     const int probSizes[4] = { 512, 1024, 2048, 4096 };
38 |     if (!(sizeClass >= 0 && sizeClass < 5))
39 |     {
40 |         throw InvalidArgValue( "Size class must be between 1-4" );
41 |     }
42 | 
43 |     std::vector<long long> ret( 2, probSizes[sizeClass - 1] );
44 |     return ret;
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/src/opencl/level0/epmpi/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir)/..:$(srcdir)/../../common
 6 | 
 7 | # which compiler to use to link
 8 | CXX= ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | # which flags to use
12 | AM_LDFLAGS = $(OCL_LDFLAGS)
13 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I$(top_srcdir)/src/opencl/common -I$(top_srcdir)/src/mpi/common
14 | L0LIBS = -lSHOCCommonOpenCL -lSHOCCommon $(OCL_LIBS) $(LIBS)
15 | 
16 | # what is the destination for programs built from this directory?
17 | epopencldir = $(bindir)/EP/OpenCL
18 | 
19 | # what programs should be installed in the bin/Serial/OpenCL destination?
20 | epopencl_PROGRAMS = BusSpeedDownload \
21 | 						BusSpeedReadback \
22 | 						DeviceMemory \
23 | 						KernelCompile \
24 | 						MaxFlops \
25 | 						QueueDelay
26 | 
27 | BusSpeedDownload_SOURCES	= BusSpeedDownload.cpp main.cpp
28 | BusSpeedDownload_LDADD		= $(L0LIBS)
29 | 
30 | BusSpeedReadback_SOURCES	= BusSpeedReadback.cpp main.cpp
31 | BusSpeedReadback_LDADD		= $(L0LIBS)
32 | 
33 | DeviceMemory_SOURCES	= DeviceMemory.cpp main.cpp
34 | DeviceMemory_LDADD		= $(L0LIBS)
35 | 
36 | KernelCompile_SOURCES	= KernelCompile.cpp main.cpp
37 | KernelCompile_LDADD		= $(L0LIBS)
38 | 
39 | MaxFlops_SOURCES	= MaxFlops.cpp main.cpp
40 | MaxFlops_LDADD		= $(L0LIBS)
41 | 
42 | QueueDelay_SOURCES	= QueueDelay.cpp main.cpp
43 | QueueDelay_LDADD	= $(L0LIBS)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/src/mpi/common/MPIHostStencilFactory.cpp:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | #include <iostream>
 3 | #include "MPIHostStencilFactory.h"
 4 | #include "MPIHostStencil.h"
 5 | 
 6 | template<class T>
 7 | Stencil<T>*
 8 | MPIHostStencilFactory<T>::BuildStencil( const OptionParser& opts )
 9 | {
10 |     // get options for base class
11 |     T wCenter;
12 |     T wCardinal;
13 |     T wDiagonal;
14 |     StencilFactory<T>::ExtractOptions( opts, wCenter, wCardinal, wDiagonal );
15 | 
16 |     // get our options
17 |     std::vector<long long> mpiDims = opts.getOptionVecInt( "msize" );
18 |     long nItersPerExchange = opts.getOptionInt( "iters-per-exchange" );
19 | 
20 |     return new MPIHostStencil<T>( wCenter,
21 |                                 wCardinal,
22 |                                 wDiagonal,
23 |                                 (size_t)mpiDims[0],
24 |                                 (size_t)mpiDims[1],
25 |                                 (unsigned int)nItersPerExchange
26 |                                 );
27 | }
28 | 
29 | 
30 | template<class T>
31 | void
32 | MPIHostStencilFactory<T>::AddOptions( OptionParser& opts ) const
33 | {
34 |     MPI2DGridProgram<T>::AddOptions( opts );
35 | }
36 | 
37 | 
38 | template<class T>
39 | void
40 | MPIHostStencilFactory<T>::CheckOptions( const OptionParser& opts ) const
41 | {
42 |     // let base class check its options
43 |     StencilFactory<T>::CheckOptions( opts );
44 | 
45 |     // check our options
46 |     MPI2DGridProgram<T>::CheckOptions( opts );
47 | }
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/config/common.mk.in:
--------------------------------------------------------------------------------
 1 | # === Basics ===
 2 | #CC       = @CC@
 3 | #CXX      = @CXX@
 4 | #LD       = @CXX@
 5 | #AR       = @AR@
 6 | #RANLIB   = @RANLIB@
 7 | 
 8 | # In CPPFLAGS, note src/common is from the SHOC source tree, so we must
 9 | # use $(srcdir).  In contrast, the files in config used in the build are
10 | # generated as part of the configuration, so we want to find them in the
11 | # build tree - hence we do not use $(srcdir) for that -I specification.
12 | #CPPFLAGS += -I$(top_srcdir)/src/common -I$(top_builddir)/config @CPPFLAGS@
13 | CPPFLAGS += -I$(top_srcdir)/src/common -I$(top_builddir)/config
14 | #CFLAGS   += @CFLAGS@
15 | #CXXFLAGS += @CXXFLAGS@
16 | NVCXXFLAGS = @NVCXXFLAGS@
17 | #ARFLAGS  = rcv
18 | #LDFLAGS  = @LDFLAGS@ -L$(top_builddir)/src/common
19 | LDFLAGS  += -L$(top_builddir)/src/common
20 | LIBS     = @LIBS@
21 | 
22 | USE_MPI         = @USE_MPI@
23 | MPICXX          = @MPICXX@
24 | MPI_CPPFLAGS	= -DPARALLEL
25 | 
26 | OCL_CPPFLAGS    = -I$(top_srcdir)/src/opencl/common
27 | OCL_LDFLAGS		= -L$(top_builddir)/src/opencl/common
28 | OCL_LIBS        = -lSHOCCommonOpenCL -lSHOCCommon @OPENCL_LIBS@
29 | 
30 | NVCC            = @NVCC@
31 | CUDA_CXX        = @NVCC@
32 | CUDA_INC        = -I@CUDA_INCDIR@ -I$(top_srcdir)/src/cuda/common
33 | CUDA_LDFLAGS	= -L$(top_builddir)/src/cuda/common
34 | CUDA_CPPFLAGS   = @CUDA_CPPFLAGS@ -I$(top_srcdir)/src/cuda/common
35 | 
36 | USE_CUDA        = @USE_CUDA@
37 | ifeq ($(USE_CUDA),yes)
38 | CUDA_LIBS		:= -lSHOCCommon $(shell $(top_srcdir)/config/find_cuda_libs.sh @NVCC@)
39 | else
40 | CUDA_LIBS       =
41 | endif
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/src/common/StencilUtil.h:
--------------------------------------------------------------------------------
 1 | #ifndef STENCIL_UTILS_H
 2 | #define STENCIL_UTILS_H
 3 | 
 4 | #include "Matrix2D.h"
 5 | #include "ResultDatabase.h"
 6 | #include "ValidateMatrix2D.h"
 7 | 
 8 | 
 9 | // ****************************************************************************
10 | // Class:  StencilValidater
11 | //
12 | // Purpose:
13 | //   Validate results of stencil operations and print errors.
14 | //
15 | // Programmer:  Phil Roth
16 | // Creation:    October 29, 2009
17 | //
18 | // ****************************************************************************
19 | template<class T>
20 | class StencilValidater
21 | {
22 | protected:
23 |     void PrintValidationErrors( std::ostream& s,
24 |                 const std::vector<ValidationErrorInfo<T> >& validationErrors,
25 |                 unsigned int nValErrsToPrint ) const;
26 | public:
27 |     virtual void ValidateResult( const Matrix2D<T>& exp,
28 |                 const Matrix2D<T>& data,
29 |                 double valErrThreshold,
30 |                 unsigned int nValErrsToPrint ) const = 0;
31 | };
32 | 
33 | 
34 | // ****************************************************************************
35 | // Class:  StencilTimingReporter
36 | //
37 | // Purpose:
38 | //   Report timing results of stencil operations.
39 | //
40 | // Programmer:  Phil Roth
41 | // Creation:    October 29, 2009
42 | //
43 | // ****************************************************************************
44 | class StencilTimingReporter
45 | {
46 | public:
47 |     virtual void ReportTimings( ResultDatabase& resultDB ) const = 0;
48 | };
49 | 
50 | 
51 | #endif // STENCIL_UTILS_H
52 | 


--------------------------------------------------------------------------------
/src/cuda/level1/stencil2d/MPICUDAStencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPICUDASTENCIL_H
 2 | #define MPICUDASTENCIL_H
 3 | 
 4 | #include <fstream>
 5 | #include <vector>
 6 | #include "CUDAStencil.h"
 7 | #include "MPI2DGridProgram.h"
 8 | 
 9 | 
10 | // ****************************************************************************
11 | // Class:  MPICUDAStencil
12 | //
13 | // Purpose:
14 | //   MPI implementation of CUDA stencil
15 | //
16 | // Programmer:  Phil Roth
17 | // Creation:    November 5, 2009
18 | //
19 | // ****************************************************************************
20 | template<class T>
21 | class MPICUDAStencil : public CUDAStencil<T>, public MPI2DGridProgram<T>
22 | {
23 | private:
24 |     std::ofstream ofs;
25 |     bool dumpData;
26 | 
27 |     virtual void DoPreIterationWork( T* currBuf,    // in device global memory
28 |                                         T* altBuf,  // in device global memory
29 |                                         Matrix2D<T>& mtx,
30 |                                         unsigned int iter );
31 | 
32 | public:
33 |     MPICUDAStencil( T _wCenter,
34 |                     T _wCardinal,
35 |                     T _wDiagonal,
36 |                     size_t _lRows,
37 |                     size_t _lCols,
38 |                     size_t _mpiGridRows,
39 |                     size_t _mpiGridCols,
40 |                     unsigned int _nItersPerHaloExchange,
41 |                     int _deviceIdx = 0,
42 |                     bool dumpData = false );
43 | 
44 |     virtual void operator()( Matrix2D<T>&, unsigned int nIters );
45 | };
46 | 
47 | #endif // MPICUDASTENCIL_H
48 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2011, UT-Battelle, LLC
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | * Redistributions of source code must retain the above copyright
 9 |   notice, this list of conditions and the following disclaimer.
10 | * Redistributions in binary form must reproduce the above copyright
11 |   notice, this list of conditions and the following disclaimer in the
12 |   documentation and/or other materials provided with the distribution.
13 | * Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor
14 |   the names of its contributors may be used to endorse or promote products
15 |   derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/CommonOpenCLStencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMONOPENCLSTENCILFACTORY_H
 2 | #define COMMONOPENCLSTENCILFACTORY_H
 3 | 
 4 | #include <vector>
 5 | #include "StencilFactory.h"
 6 | #include "support.h"
 7 | 
 8 | // ****************************************************************************
 9 | // Class:  CommonOpenCLStencilFactory
10 | //
11 | // Purpose:
12 | //   OpenCL implementation of the stencil factory.
13 | //
14 | // Programmer:  Phil Roth
15 | // Creation:    October 28, 2009
16 | //
17 | // ****************************************************************************
18 | template<class T>
19 | class CommonOpenCLStencilFactory : public StencilFactory<T>
20 | {
21 | protected:
22 |     cl_device_id dev;
23 |     cl_context ctx;
24 |     cl_command_queue queue;
25 | 
26 |     void ExtractOptions( const OptionParser& options,
27 |                             T& wCenter,
28 |                             T& wCardinal,
29 |                             T& wDiagonal,
30 |                             size_t& lRows,
31 |                             size_t& lCols );
32 | 
33 | public:
34 |     CommonOpenCLStencilFactory( std::string _sname,
35 |                                 cl_device_id _dev,
36 |                                 cl_context _ctx,
37 |                                 cl_command_queue _queue )
38 |       : StencilFactory<T>( _sname ),
39 |         dev( _dev ),
40 |         ctx( _ctx ),
41 |         queue( _queue )
42 |     {
43 |         // nothing else to do
44 |     }
45 | 
46 |     virtual void CheckOptions( const OptionParser& options ) const;
47 | };
48 | 
49 | #endif // COMMONOPENCLSTENCILFACTORY_H
50 | 


--------------------------------------------------------------------------------
/src/common/Matrix2DFileSupport.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef MATRIX2DFILESUPPORT_H
 2 | #define MATRIX2DFILESUPPORT_H
 3 | 
 4 | template<class T>
 5 | std::string
 6 | GetMatrixFileName( std::string baseName )
 7 | {
 8 |     // nothing to do - this should never be instantiated
 9 |     assert( false );
10 |     return "";
11 | }
12 | 
13 | template<>
14 | std::string
15 | GetMatrixFileName<float>( std::string baseName )
16 | {
17 |     return baseName + "-sp.dat";
18 | }
19 | 
20 | template<>
21 | std::string
22 | GetMatrixFileName<double>( std::string baseName )
23 | {
24 |     return baseName + "-dp.dat";
25 | }
26 | 
27 | 
28 |  template<class T>
29 | bool
30 | SaveMatrixToFile( const Matrix2D<T>& m, std::string fileName )
31 | {
32 |     bool ok = true;
33 | 
34 |     std::ofstream ofs( fileName.c_str(), ios::out | ios::binary );
35 |     if( ofs.is_open() )
36 |     {
37 |         ok = m.WriteTo( ofs );
38 |         ofs.close();
39 |     }
40 |     else
41 |     {
42 |         std::cerr << "Unable to write matrix to file \'" << fileName << "\'" << std::endl;
43 |         ok = false;
44 |     }
45 |     return ok;
46 | }
47 | 
48 | 
49 | template<class T>
50 | bool
51 | ReadMatrixFromFile( Matrix2D<T>& m, std::string fileName )
52 | {
53 |     bool ok = true;
54 | 
55 |     std::ifstream ifs( fileName.c_str(), ios::in | ios::binary );
56 |     if( ifs.is_open() )
57 |     {
58 |         ok = m.ReadFrom( ifs );
59 |         ifs.close();
60 |     }
61 |     else
62 |     {
63 |         std::cerr << "Unable to read matrix from file \'" << fileName << "\'" << std::endl;
64 |         ok = false;
65 |     }
66 |     return ok;
67 | }
68 | 
69 | #endif // MATRIX2DFILESUPPORT_H
70 | 


--------------------------------------------------------------------------------
/data/devices.csv:
--------------------------------------------------------------------------------
 1 | device,name,Host CPU,Host Memory,PCIe Gen (Host),Motherboard,Host OS,Driver Version
 2 | hd5870,ATI Radeon HD5870,,,,,,
 3 | gtx580,NV GeForce GTX580,,,,,,
 4 | gtx680,NV GeForce GTX680,,,,,,
 5 | gtx690,NV GeForce GTX690,,,,,,
 6 | gtx480,NV GeForce GTX480,,,,,,
 7 | gtx980,NV GeForce GTX980,Intel i5-2550K 3.30 GHz,"8 GB DDR3, 1600 MHz",2,GIGABYTE GA-Z68MA-D2H-B3,Linux 3.13.0-39,
 8 | m2070ecc,NV Tesla M2070-ECC,,,,,,
 9 | c2050noecc,NV Tesla C2050-No ECC,,,,,,
10 | c1060, NV Tesla C1060,,,,,,
11 | ion,NV ION,,,,,,
12 | nehalem,Intel 2.27Ghz Nehalem,,,,,,
13 | gtx570,NV GeForce GTX570,,,,,,
14 | gtx465,NV GeForce GTX465,,,,,,
15 | gtx470,NV GeForce GTX470,,,,,,
16 | hd5750,ATI Radeon HD5750,,,,,,
17 | hd5770,ATI Radeon HD5770,,,,,,
18 | hd5850,ATI Radeon HD5850,,,,,,
19 | hd5970,ATI Radeon HD5970,,,,,,
20 | m2090,NV Tesla M2090,,,,,,
21 | sb00,Intel Core i7-2600,,,,,,
22 | haswellgt2,Intel GT2 iGPU,Intel Core i7-4770,"16 GB DDR3, 1600MHz",3,Aspire T3-605,Linux 3.13.0-34,Intel Beignet 1.0.0
23 | haswell,Intel Core i7-4770,Intel Core i7-4770,"16 GB DDR3, 1600MHz",3,Aspire T3-605,Linux 3.13.0-34,Intel OCL 1.2.0.117
24 | hd7970,ATI Radeon HD7970,,,,,,
25 | hd5670,ATI Radeon HD5670,,,,,,
26 | llano,AMD Llano A8-3850 fGPU,,,,,,
27 | trinity1,AMD Trinity A10-5800K CPU,,,,,,
28 | trinity2,AMD Trinity A10-5800K fGPU,,,,,,
29 | gtxtitan, NV GeForce GTX Titan,,,,,,
30 | jetsontk1, NV Jetson TK1,,,,,,
31 | K40,NVIDIA K40c,Intel E5520 Nehalem 2.27 GHz ,12 GB DDR3,2,Supermicro X8DTG-QF,,
32 | gtx750TiSC,NVIDIA EVGA GeForce 750 Ti Superclocked,Intel Xeon W3505 2.53 GHz,12 GB DDR3-1333,2,,Ubuntu 12.04,NV 331.67
33 | w9100,AMD FirePro 9100,,,,,,
34 | 


--------------------------------------------------------------------------------
/src/mpi/contention/opencl/Makefile.am:
--------------------------------------------------------------------------------
 1 | include $(top_builddir)/config/config.mk
 2 | include $(top_builddir)/config/targets.mk
 3 | 
 4 | # How to find source files
 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/opencl/level0
 6 | 
 7 | # Which compiler to use?
 8 | CXX = ${MPICXX}
 9 | CXXLD = ${MPICXX}
10 | 
11 | AM_LDFLAGS = $(OCL_LDFLAGS) -L$(top_builddir)/src/mpi/common
12 | AM_CPPFLAGS = $(OCL_CPPFLAGS) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common
13 | 
14 | # What is the destination for programs built from this directory?
15 | epopencldir = $(bindir)/EP/OpenCL
16 | 
17 | # What programs should be installed to that destination?
18 | epopencl_PROGRAMS = BusCont
19 | 
20 | # How to build those programs?
21 | # Note: we would prefer to put BusSpeedDownload and main.cpp in the 
22 | # SOURCES list.  However, our VPATH must contain src/opencl/level0 
23 | # so that our build can find the BusSpeedDownload.cpp file.  Since
24 | # this directory is built after that src/opencl/level0 directory is
25 | # built, that directory already has a BusSpeedDownload.o and a main.o file.
26 | # With our VPATH, gnumake finds those existing .o files and doesn't build
27 | # them here.  This isn't as much a problem for BusSpeedDownload.o, which
28 | # is built the same as for the non-contention tests, but the main.cpp file
29 | # we use is different than the main.cpp file used in the non-contention tests.
30 | #
31 | # Hence, we have to list the object files for those files in our LDADD list.
32 | #
33 | BusCont_SOURCES = OCLDriver.cpp \
34 | 					BusCont.cpp \
35 | 					bcmain.cpp
36 | BusCont_LDADD = $(top_builddir)/src/opencl/level0/BusSpeedDownload.o -lSHOCCommonMPI $(OCL_LIBS) $(LIBS)
37 | 
38 | 


--------------------------------------------------------------------------------
/config/conf-titan.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Titan is a Cray XK7 with NVIDIA K20X (Kepler) GPUs, one per node.
 4 | 
 5 | # In the following, we are building with the Cray compiler drivers (named
 6 | # cc for C, CC for C++).  These drivers know how to find CUDA and OpenCL, 
 7 | # as long as the CUDA module is loaded when we configure, and they also know 
 8 | # how to build MPI programs.
 9 | # However, during configuration the autoconf script tries to run the 
10 | # executables it builds and since we expect to be building on the login node,
11 | # some of the libraries the compiler driver links in are not available
12 | # for running the program.
13 | # Thus, we must trick configure into thinking we are cross compiling.  The
14 | # --host flag is how we indicate we are cross compiling.
15 | 
16 | # A typical build might look like:
17 | # $ module swap PrgEnv-pgi PrgEnv-gnu
18 | # $ module load craype-accel-nvidia35
19 | # $ sh ./config/conf-titan.sh
20 | # $ make
21 | 
22 | # We explicitly pass MPICXX variable because the SHOC configure script
23 | # only tries more common MPI C++ compiler names like mpicxx.
24 | 
25 | # We explicitly pass a value in the CUDA_CPPFLAGS environment variable 
26 | # to limit the number of CUDA architectures the SHOC build will support.
27 | # We do this mainly to reduce the amount of time it takes to build SHOC,
28 | # though it has some beneficial effect on the final sizes of the executables
29 | # compared to the default.
30 | # 
31 | 
32 | 
33 | CC=cc \
34 | CXX=CC \
35 | MPICXX=CC \
36 | sh ./configure \
37 | CUDA_CPPFLAGS="-gencode=arch=compute_35,code=sm_35" \
38 | --host=x86_64-unknown-linux-gnu \
39 | --with-opencl \
40 | --with-cuda \
41 | --with-mpi
42 | 
43 | 


--------------------------------------------------------------------------------
/src/common/StencilFactory.h:
--------------------------------------------------------------------------------
 1 | #ifndef STENCILFACTORY_H
 2 | #define STENCILFACTORY_H
 3 | 
 4 | #include <map>
 5 | #include "OptionParser.h"
 6 | #include "Stencil.h"
 7 | 
 8 | // ****************************************************************************
 9 | // Class:  StencilFactory
10 | //
11 | // Purpose:
12 | //   Class to generate stencils.
13 | //
14 | // Programmer:  Phil Roth
15 | // Creation:    October 28, 2009
16 | //
17 | // ****************************************************************************
18 | template<class T>
19 | class StencilFactory
20 | {
21 | public:
22 |     typedef std::map<std::string, StencilFactory*> FactoryMap;
23 | 
24 | private:
25 |     // map of class name to a StencilFactory object
26 |     // would be much easier if C++ classes were first class objects
27 |     // so that we could programmatically construct a class name and
28 |     // then create an instance of that class
29 |     static FactoryMap* factoryMap;
30 | 
31 |     std::string sname;
32 | 
33 | protected:
34 |     void ExtractOptions( const OptionParser& options,
35 |                         T& wCenter,
36 |                         T& wCardinal,
37 |                         T& wDiagonal );
38 | 
39 | public:
40 |     StencilFactory( std::string _sname )
41 |       : sname( _sname )
42 |     {
43 |         // nothing else to do
44 |     }
45 |     virtual ~StencilFactory( void ) { }
46 | 
47 |     std::string GetStencilName( void ) { return sname; }
48 | 
49 |     virtual Stencil<T>* BuildStencil( const OptionParser& options ) = 0;
50 |     virtual void CheckOptions( const OptionParser& options ) const = 0;
51 | 
52 |     static std::vector<long long> GetStandardProblemSize( int sizeClass );
53 | };
54 | 
55 | #endif // STENCILFACTORY_H
56 | 


--------------------------------------------------------------------------------
/src/mpi/common/ParallelHelpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARALLEL_HELPERS_H
 2 | #define PARALLEL_HELPERS_H
 3 | 
 4 | #include <mpi.h>
 5 | #include "GetMPIType.h"
 6 | 
 7 | // ****************************************************************************
 8 | // File:  ParallelHelpers.h
 9 | //
10 | // Purpose:
11 | //   Various C++ encapsulations of MPI routines
12 | //
13 | // Programmer:  Jeremy Meredith
14 | // Creation:    August 14, 2009
15 | //
16 | // Modifications:
17 | //   Jeremy Meredith, Tue Jan 12 14:39:40 EST 2010
18 | //   Added ParAllGather.
19 | //
20 | // ****************************************************************************
21 | 
22 | template<class T>
23 | T ParSumAcrossProcessors(const T &val, MPI_Comm comm)
24 | {
25 |     T newval;
26 |     MPI_Allreduce((void*)&val, &newval, 1,
27 |                   GetMPIType(val), MPI_SUM, comm);
28 |     return newval;
29 | }
30 | 
31 | template<class T>
32 | vector<T> ParGather(const T &val, MPI_Comm comm)
33 | {
34 |     int rank, size;
35 |     MPI_Comm_size(comm, &size);
36 |     MPI_Comm_rank(comm, &rank);
37 |     vector<T> retval;
38 |     if (rank==0)
39 |         retval.resize(size);
40 |     MPI_Datatype t = GetMPIType(val);
41 |     MPI_Gather((void*)(&val), 1, t,
42 |                &(retval[0]), 1, t,
43 |                0, comm);
44 |     return retval;
45 | }
46 | 
47 | template<class T>
48 | vector<T> ParAllGather(const T &val, MPI_Comm comm)
49 | {
50 |     int rank, size;
51 |     MPI_Comm_size(comm, &size);
52 |     vector<T> retval;
53 |     retval.resize(size);
54 |     MPI_Datatype t = GetMPIType(val);
55 |     MPI_Allgather((void*)(&val), 1, t,
56 |                   &(retval[0]), 1, t,
57 |                   comm);
58 |     return retval;
59 | }
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/MPIOpenCLStencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef MPIOPENCLSTENCIL_H
 2 | #define MPIOPENCLSTENCIL_H
 3 | 
 4 | #include <fstream>
 5 | #include <vector>
 6 | #include "OpenCLStencil.h"
 7 | #include "MPI2DGridProgram.h"
 8 | 
 9 | 
10 | // ****************************************************************************
11 | // Class:  MPIOpenCLStencil
12 | //
13 | // Purpose:
14 | //   MPI implementation of OpenCL stencil
15 | //
16 | // Programmer:  Phil Roth
17 | // Creation:    November 5, 2009
18 | //
19 | // ****************************************************************************
20 | template<class T>
21 | class MPIOpenCLStencil : public OpenCLStencil<T>, public MPI2DGridProgram<T>
22 | {
23 | private:
24 |     std::ofstream ofs;
25 |     bool dumpData;
26 | 
27 |     T* eData;
28 |     T* wData;
29 | 
30 |     virtual void DoPreIterationWork( cl_mem buf,
31 |                                         cl_mem altbuf,
32 |                                         Matrix2D<T>& mtx,
33 |                                         unsigned int iter,
34 |                                         cl_command_queue queue );
35 | 
36 | public:
37 |     MPIOpenCLStencil( T wCenter,
38 |                     T wCardinal,
39 |                     T wDiagonal,
40 |                     size_t _lRows,
41 |                     size_t _lCols,
42 |                     size_t _mpiGridRows,
43 |                     size_t _mpiGridCols,
44 |                     unsigned int _nItersPerHaloExchange,
45 |                     cl_device_id dev,
46 |                     cl_context ctx,
47 |                     cl_command_queue queue,
48 |                     bool _dumpData = false );
49 |     virtual ~MPIOpenCLStencil( void );
50 | 
51 |     virtual void operator()( Matrix2D<T>&, unsigned int nIters );
52 | };
53 | 
54 | #endif // MPIOPENCLSTENCIL_H
55 | 


--------------------------------------------------------------------------------
/LICENSE-CUDPP.txt:
--------------------------------------------------------------------------------
 1 | Some portions of the source code are based on the CUDA Data Parallel Primitives
 2 | Library and are subject to the following. 
 3 | 
 4 | Copyright (c) 2007-2011 The Regents of the University of California, Davis
 5 | campus ("The Regents") and NVIDIA Corporation ("NVIDIA"). All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without modification, 
 8 | are permitted provided that the following conditions are met:
 9 | 
10 |     * Redistributions of source code must retain the above copyright notice, 
11 |       this list of conditions and the following disclaimer.
12 |     * Redistributions in binary form must reproduce the above copyright notice, 
13 |       this list of conditions and the following disclaimer in the documentation 
14 |       and/or other materials provided with the distribution.
15 |     * Neither the name of the The Regents, nor NVIDIA, nor the names of its 
16 |       contributors may be used to endorse or promote products derived from this 
17 |       software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
23 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
27 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
28 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/src/cuda/level1/scan/TPScan.h:
--------------------------------------------------------------------------------
 1 | #ifndef __TPSCAN_H
 2 | #define __TPSCAN_H
 3 | 
 4 | // When using MPICH and MPICH-derived MPI implementations, there is a
 5 | // naming conflict between stdio.h and MPI's C++ binding.
 6 | // Since we do not use the C++ MPI binding, we can avoid the ordering
 7 | // issue by ignoring the C++ MPI binding headers.
 8 | // This #define should be quietly ignored when using other MPI implementations.
 9 | #define MPICH_SKIP_MPICXX
10 | #include "mpi.h"
11 | 
12 | // Templated wrapper for MPI_Exscan
13 | template <class T>
14 | inline void globalExscan(T* local_result, T* global_result);
15 | 
16 | template <>
17 | inline void globalExscan(float* local_result, float* global_result)
18 | {
19 |    MPI_Exscan(local_result, global_result, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
20 | }
21 | 
22 | template <>
23 | inline void globalExscan(double* local_result, double* global_result)
24 | {
25 |    MPI_Exscan(local_result, global_result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
26 | }
27 | 
28 | template<class T>
29 | void
30 | LaunchReduceKernel( int num_blocks,
31 |                     int num_threads,
32 |                     int smem_size,
33 |                     T* d_idata,
34 |                     T* d_odata,
35 |                     int size );
36 | 
37 | template<class T>
38 | void
39 | LaunchTopScanKernel( int num_blocks,
40 |                      int num_threads,
41 |                      int smem_size,
42 |                      T* d_block_sums,
43 |                      int size );
44 | 
45 | template<class T, class vecT, int blockSize>
46 | void
47 | LaunchBottomScanKernel( int num_blocks,
48 |                         int num_threads,
49 |                         int smem_size,
50 |                         T* g_idata,
51 |                         T* g_odata,
52 |                         T* d_block_sums,
53 |                         int size );
54 | 
55 | #endif // __TPSCAN_H
56 | 


--------------------------------------------------------------------------------
/src/common/Stencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef STENCIL_H
 2 | #define STENCIL_H
 3 | 
 4 | #include <string>
 5 | #include <functional>
 6 | #include "Matrix2D.h"
 7 | 
 8 | // ****************************************************************************
 9 | // Class:  Stencil
10 | //
11 | // Purpose:
12 | //   9-point stencil.
13 | //
14 | // Programmer:  Phil Roth
15 | // Creation:    October 28, 2009
16 | //
17 | // ****************************************************************************
18 | template<class T>
19 | class Stencil : public std::binary_function<Matrix2D<T>&, unsigned int, void>
20 | {
21 | protected:
22 |     T wCenter;
23 |     T wCardinal;
24 |     T wDiagonal;
25 | 
26 | protected:
27 |     T GetCenterWeight( void ) const { return wCenter; }
28 |     T GetCardinalWeight( void ) const { return wCardinal; }
29 |     T GetDiagonalWeight( void ) const { return wDiagonal; }
30 | 
31 | public:
32 |     Stencil( T _wCenter,
33 |                 T _wCardinal,
34 |                 T _wDiagonal )
35 |       : wCenter( _wCenter ),
36 |         wCardinal( _wCardinal ),
37 |         wDiagonal( _wDiagonal )
38 |     {
39 |         // nothing else to do
40 |     }
41 | 
42 |     virtual ~Stencil( void )
43 |     {
44 |         // nothing to do
45 |     }
46 | 
47 | 
48 |     /*
49 |      * This is a 9-point stencil using three weights:
50 |      *   wCenter is applied to the stencil 'center'
51 |      *   wCardinal is applied to the sum of the stencil NSEW values
52 |      *   wDiagonal is applied to the sum of the stencil diagonal values
53 |      *
54 |      * note two things:
55 |      *   We use the overall boundary values but do not update them.
56 |      *   We apply wCardinal and wDiagonal *only* to the sum of the NSEW and
57 |      *     diagonal values. We don't do any other averaging, etc.
58 |      */
59 |     virtual void operator()( Matrix2D<T>& m, unsigned int nIters ) = 0;
60 | };
61 | 
62 | #endif // STENCIL_H
63 | 


--------------------------------------------------------------------------------
/src/opencl/common/OpenCLNodePlatformContainer.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPENCL_NODE_PLATFORM_CONTAINER_H
 2 | #define OPENCL_NODE_PLATFORM_CONTAINER_H
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <list>
 7 | #include "support.h"
 8 | #include "OpenCLPlatform.h"
 9 | #include "NodePlatformContainer.h"
10 | 
11 | using namespace std;
12 | 
13 | // ****************************************************************************
14 | // Class: OpenCLNodePlatformContainer
15 | //
16 | // Purpose:
17 | //   A container for all OpenCL platforms on a node.
18 | //
19 | // Notes:     Extends the generic node platform container class
20 | //
21 | // Programmer: Gabriel Marin
22 | // Creation: September 22, 2009
23 | //
24 | // Modifications:
25 | //
26 | // ****************************************************************************
27 | namespace SHOC {
28 | 
29 |     class OpenCLNodePlatformContainer : public NodePlatformContainer<OpenCLPlatform>
30 |     {
31 |     private:
32 |         static const int MAGIC_KEY_OPENCL_NODE_CONTAINER;
33 | 
34 |     public:
35 |         // constructor collects information about all platforms on this node
36 |         OpenCLNodePlatformContainer (bool do_initialize = true);
37 |         OpenCLNodePlatformContainer (const OpenCLNodePlatformContainer &ondc);
38 |         OpenCLNodePlatformContainer& operator= (const OpenCLNodePlatformContainer &ondc);
39 | 
40 |         ~OpenCLNodePlatformContainer () { }
41 | 
42 |         void Print (ostream &os) const;
43 | 
44 |         void initialize();
45 | 
46 |         virtual void writeObject (ostringstream &oss) const;
47 |         virtual void readObject (istringstream &iss);
48 | 
49 |         bool operator< (const OpenCLNodePlatformContainer &ndc) const;
50 |         bool operator> (const OpenCLNodePlatformContainer &ndc) const;
51 |         bool operator== (const OpenCLNodePlatformContainer &ndc) const;
52 |     };
53 | };
54 | 
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/src/common/ValidateMatrix2D.h:
--------------------------------------------------------------------------------
 1 | #ifndef VALIDATE_H
 2 | #define VALIDATE_H
 3 | 
 4 | #include <functional>
 5 | #include <vector>
 6 | #include "Matrix2D.h"
 7 | 
 8 | 
 9 | // ****************************************************************************
10 | // Struct:  ValidationErrorInfo
11 | //
12 | // Purpose:
13 | //   Stores information about validation errors originating in a 2D grid.
14 | //
15 | // Programmer:  Phil Roth
16 | // Creation:    October 28, 2009
17 | //
18 | // ****************************************************************************
19 | template<class T>
20 | struct ValidationErrorInfo
21 | {
22 |     int i;
23 |     int j;
24 |     T val;
25 |     T exp;
26 |     double relErr;
27 | 
28 |     ValidationErrorInfo( int _i, int _j,
29 |                             T _val,
30 |                             T _exp,
31 |                             double _relErr )
32 |       : i( _i ),
33 |         j( _j ),
34 |         val( _val ),
35 |         exp( _exp ),
36 |         relErr( _relErr )
37 |     {
38 |         // nothing else to do
39 |     }
40 | };
41 | 
42 | // ****************************************************************************
43 | // Class:  Validate
44 | //
45 | // Purpose:
46 | //   Compares 2D matrices.
47 | //
48 | // Programmer:  Phil Roth
49 | // Creation:    October 28, 2009
50 | //
51 | // ****************************************************************************
52 | template<class T>
53 | class Validate : public std::binary_function<const Matrix2D<T>&, const Matrix2D<T>&, std::vector<ValidationErrorInfo<T> > >
54 | {
55 | private:
56 |     double relErrThreshold;
57 | 
58 | public:
59 |     Validate( double _relErrThreshold )
60 |       : relErrThreshold( _relErrThreshold )
61 |     {
62 |         // nothing else to do
63 |     }
64 | 
65 |     std::vector<ValidationErrorInfo<T> > operator()( const Matrix2D<T>& s, const Matrix2D<T>& t );
66 | };
67 | 
68 | #endif // VALIDATE_H
69 | 


--------------------------------------------------------------------------------
/src/common/Matrix2D.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef HAVE_STDINT_H
 2 | #include <stdint.h>
 3 | #endif // HAVE_STDINT_H
 4 | #include "Matrix2D.h"
 5 | 
 6 | #ifdef _WIN32
 7 | typedef unsigned int uint32_t;
 8 | #endif
 9 | 
10 | 
11 | template<class T>
12 | bool
13 | Matrix2D<T>::ReadFrom( std::istream& s )
14 | {
15 |     uint32_t nRowsUint;
16 |     uint32_t nColsUint;
17 | 
18 |     s.read( (char*)&nRowsUint, sizeof(nRowsUint) );
19 |     s.read( (char*)&nColsUint, sizeof(nColsUint) );
20 | 
21 |     uint32_t nPaddedColsUint = FindNumPaddedColumns( nColsUint, pad );
22 | 
23 |     T* newDataFlat = new T[nRowsUint * nPaddedColsUint];
24 |     T** newData = new T*[nRowsUint];
25 |     for( size_t i = 0; i < nRowsUint; i++ )
26 |     {
27 |         newData[i] = &(newDataFlat[i * nPaddedColsUint]);
28 |         s.read( (char*)newData[i], nColsUint * sizeof(T) );
29 |     }
30 | 
31 |     if( s.good() )
32 |     {
33 |         // we successfully read the matrix
34 |         // release any old data
35 |         delete[] data;
36 |         delete[] flatData;
37 | 
38 |         // re-initialize with new data
39 |         nRows = nRowsUint;
40 |         nColumns = nColsUint;
41 |         nPaddedColumns = nPaddedColsUint;
42 |         flatData = newDataFlat;
43 |         data = newData;
44 |     }
45 |     else
46 |     {
47 |         delete[] newDataFlat;
48 |         delete[] newData;
49 |     }
50 | 
51 |     return s.good();
52 | }
53 | 
54 | 
55 | // note we do not write padding to output file
56 | template<class T>
57 | bool
58 | Matrix2D<T>::WriteTo( std::ostream& s ) const
59 | {
60 |     uint32_t nRowsUint = nRows;
61 |     uint32_t nColsUint = nColumns;
62 | 
63 |     s.write( (const char*)&nRowsUint, sizeof(nRowsUint) );
64 |     s.write( (const char*)&nColsUint, sizeof(nColsUint) );
65 |     for( uint32_t r = 0; r < nRows; r++ )
66 |     {
67 |         s.write( (const char*)data[r], nColumns * sizeof(T) );
68 |     }
69 | 
70 |     return s.good();
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/src/opencl/level1/reduction/reduction.cl:
--------------------------------------------------------------------------------
 1 | #ifdef SINGLE_PRECISION
 2 | #define FPTYPE float
 3 | #elif K_DOUBLE_PRECISION
 4 | #pragma OPENCL EXTENSION cl_khr_fp64: enable
 5 | #define FPTYPE double
 6 | #elif AMD_DOUBLE_PRECISION
 7 | #pragma OPENCL EXTENSION cl_amd_fp64: enable
 8 | #define FPTYPE double
 9 | #endif
10 | 
11 | __kernel void
12 | reduce(__global const FPTYPE *g_idata, __global FPTYPE *g_odata,
13 |        __local FPTYPE* sdata, const unsigned int n)
14 | {
15 |     const unsigned int tid = get_local_id(0);
16 |     unsigned int i = (get_group_id(0)*(get_local_size(0)*2)) + tid;
17 |     const unsigned int gridSize = get_local_size(0)*2*get_num_groups(0);
18 |     const unsigned int blockSize = get_local_size(0);
19 | 
20 |     sdata[tid] = 0;
21 | 
22 |     // Reduce multiple elements per thread, strided by grid size
23 |     while (i < n)
24 |     {
25 |         sdata[tid] += g_idata[i] + g_idata[i+blockSize];
26 |         i += gridSize;
27 |     }
28 |     barrier(CLK_LOCAL_MEM_FENCE);
29 | 
30 |     // do reduction in shared mem
31 |     for (unsigned int s = blockSize / 2; s > 0; s >>= 1)
32 |     {
33 |         if (tid < s)
34 |         {
35 |             sdata[tid] += sdata[tid + s];
36 |         }
37 |         barrier(CLK_LOCAL_MEM_FENCE);
38 |     }
39 | 
40 |     // Write result back to global memory
41 |     if (tid == 0)
42 |     {
43 |         g_odata[get_group_id(0)] = sdata[0];
44 |     }
45 | }
46 | 
47 | 
48 | // Currently, CPUs on Snow Leopard only support a work group size of 1
49 | // So, we have a separate version of the kernel which doesn't use
50 | // local memory. This version is only used when the maximum
51 | // supported local group size is 1.
52 | __kernel void
53 | reduceNoLocal(__global FPTYPE *g_idata, __global FPTYPE *g_odata,
54 |        unsigned int n)
55 | {
56 |     FPTYPE sum = 0.0f;
57 |     for (int i = 0; i < n; i++)
58 |     {
59 |         sum += g_idata[i];
60 |     }
61 |     g_odata[0] = sum;
62 | }
63 | 


--------------------------------------------------------------------------------
/config/conf-osx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Configure to build OpenCL and CUDA tests.
 4 | 
 5 | # By default, building on recent OS X systems will build 64-bit versions
 6 | # of all libraries and executables.
 7 | #
 8 | # However, if you are using an earlier version of CUDA than 4.0, or
 9 | # are on a Mac without a x86_64 processor, you can add the -m32 flag
10 | # in the configure script below to build 32-bit executables 
11 | # (assuming you are building with gcc - use whatever flags are necessary 
12 | # for your compiler).  For example:
13 | #sh ./configure \
14 | #    CXXFLAGS="-m32" \
15 | #    CFLAGS="-m32" \
16 | #    NVCXXFLAGS="-m32" \
17 | #    --with-opencl --with-cuda 
18 | 
19 | #
20 | # On OS X 10.9 (Mavericks) , the Xcode toolchain defaults to using libc++
21 | # as the C++ standard library.  CUDA 6.0's nvcc does not support libc++,
22 | # so we have to specify to use libstdc++ instead.
23 | #
24 | 
25 | #
26 | # The gencode specification here is for a GPU with compute capability 3.0,
27 | # such as a GeForce GT 750M in some recent MacBook Pro laptops.
28 | # Modify it to suit your GPU's compute capability.
29 | #
30 | 
31 | 
32 | sh ./configure \
33 |     CUDA_CPPFLAGS="-gencode=arch=compute_30,code=sm_30" \
34 |     CXXFLAGS="-stdlib=libstdc++" \
35 |     --with-opencl --with-cuda
36 | 
37 | # Example simple config for Mavericks (10.9.2) and CUDA 6.0rc, where
38 | # driving with g++ can be problematic.
39 | #sh ./configure \
40 | #    CXX="nvcc" \
41 | #    CPP="nvcc" \
42 | #    --without-mpi \
43 | #    --without-opencl --with-cuda
44 | 
45 | # Another issue on Mavericks (10.9.2) arises when compiling opencl with
46 | # clang.  An alternative is to use gcc-4.8 (tested with the default config
47 | # in homebrew) and the following:
48 | #sh ./configure \
49 | #  CXXFLAGS="-m64" \
50 | #  CFLAGS="-m64" \
51 | #  NVCXXFLAGS="-m64" \
52 | #  CPP="g++-4.8" \
53 | #  CXX="g++-4.8" \
54 | #  --with-opencl --without-cuda --without-mpi
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/src/opencl/common/OpenCLPlatform.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPENCL_PLATFORM_H
 2 | #define OPENCL_PLATFORM_H
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <list>
 7 | #include "support.h"
 8 | #include "OpenCLDeviceInfo.h"
 9 | #include "Platform.h"
10 | 
11 | using namespace std;
12 | 
13 | namespace SHOC {
14 | 
15 | // ****************************************************************************
16 | // Class: OpenCLPlatform
17 | //
18 | // Purpose:
19 | //   Implements an OpenCL platform. A platform contains information about
20 | //   zero or more devices.
21 | //
22 | // Notes:     Extends the generic platform class
23 | //
24 | // Programmer: Gabriel Marin
25 | // Creation: September 22, 2009
26 | //
27 | // Modifications:
28 | //
29 | // ****************************************************************************
30 |     class OpenCLPlatform : public Platform<OpenCLDeviceInfo>
31 |     {
32 |     private:
33 |         string platformName;
34 |         string platformVendor;
35 |         string platformVersion;
36 |         string platformExtensions;
37 |         static const int MAGIC_KEY_OPENCL_PLATFORM;
38 | 
39 |         static std::string LookupInfo( cl_platform_id platformID, cl_platform_info paramName );
40 | 
41 |     public:
42 |         // constructer collects information about all devices on this node
43 |         OpenCLPlatform ();
44 |         OpenCLPlatform (cl_platform_id platformID);
45 |         OpenCLPlatform (const OpenCLPlatform &ocp);
46 |         OpenCLPlatform& operator= (const OpenCLPlatform &ocp);
47 | 
48 |         ~OpenCLPlatform () { }
49 | 
50 |         void Print (ostream &os) const;
51 | 
52 |         virtual void writeObject (ostringstream &oss) const;
53 |         virtual void readObject (istringstream &iss);
54 | 
55 |         bool operator< (const OpenCLPlatform &ocp) const;
56 |         bool operator> (const OpenCLPlatform &ocp) const;
57 |         bool operator== (const OpenCLPlatform &ocp) const;
58 |     };
59 | };
60 | 
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/src/opencl/level1/md/md.cl:
--------------------------------------------------------------------------------
 1 | #ifdef SINGLE_PRECISION
 2 | #define POSVECTYPE float4
 3 | #define FORCEVECTYPE float4
 4 | #define FPTYPE float
 5 | #elif K_DOUBLE_PRECISION
 6 | #pragma OPENCL EXTENSION cl_khr_fp64: enable
 7 | #define POSVECTYPE double4
 8 | #define FORCEVECTYPE double4
 9 | #define FPTYPE double
10 | #elif AMD_DOUBLE_PRECISION
11 | #pragma OPENCL EXTENSION cl_amd_fp64: enable
12 | #define POSVECTYPE double4
13 | #define FORCEVECTYPE double4
14 | #define FPTYPE double
15 | #endif
16 | 
17 | __kernel void compute_lj_force(__global FORCEVECTYPE *force,
18 |                                __global POSVECTYPE *position,
19 |                                const int neighCount,
20 |                                __global int* neighList,
21 |                                const FPTYPE cutsq,
22 |                                const FPTYPE lj1,
23 |                                const FPTYPE lj2,
24 |                                const int inum)
25 | {
26 |     uint idx = get_global_id(0);
27 | 
28 |     POSVECTYPE ipos = position[idx];
29 |     FORCEVECTYPE f = {0.0f, 0.0f, 0.0f, 0.0f};
30 | 
31 |     int j = 0;
32 |     while (j < neighCount)
33 |     {
34 |         int jidx = neighList[j*inum + idx];
35 | 
36 |         // Uncoalesced read
37 |         POSVECTYPE jpos = position[jidx];
38 | 
39 |         // Calculate distance
40 |         FPTYPE delx = ipos.x - jpos.x;
41 |         FPTYPE dely = ipos.y - jpos.y;
42 |         FPTYPE delz = ipos.z - jpos.z;
43 |         FPTYPE r2inv = delx*delx + dely*dely + delz*delz;
44 | 
45 |         // If distance is less than cutoff, calculate force
46 |         if (r2inv < cutsq)
47 |         {
48 |             r2inv = 1.0f/r2inv;
49 |             FPTYPE r6inv = r2inv * r2inv * r2inv;
50 |             FPTYPE forceC = r2inv*r6inv*(lj1*r6inv - lj2);
51 | 
52 |             f.x += delx * forceC;
53 |             f.y += dely * forceC;
54 |             f.z += delz * forceC;
55 |         }
56 |         j++;
57 |     }
58 |     // store the results
59 |     force[idx] = f;
60 | }
61 | 


--------------------------------------------------------------------------------
/src/mpi/common/MPIHostStencil.cpp:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | #include <iomanip>
 3 | #include <sstream>
 4 | #include <fstream>
 5 | #include <cassert>
 6 | #include "MPIHostStencil.h"
 7 | 
 8 | 
 9 | template<class T>
10 | MPIHostStencil<T>::MPIHostStencil( T _wCenter,
11 |                                 T _wCardinal,
12 |                                 T _wDiagonal,
13 |                                 size_t _mpiGridRows,
14 |                                 size_t _mpiGridCols,
15 |                                 unsigned int _nItersPerHaloExchange,
16 |                                 bool _dumpData )
17 |   : HostStencil<T>( _wCenter,
18 |                 _wCardinal,
19 |                 _wDiagonal ),
20 |     MPI2DGridProgram<T>( _mpiGridRows,
21 |                 _mpiGridCols,
22 |                 _nItersPerHaloExchange ),
23 |     dumpData( _dumpData )
24 | {
25 |     if( dumpData )
26 |     {
27 |         std::ostringstream fnamestr;
28 |         fnamestr << "host." << std::setw( 4 ) << std::setfill('0') << this->GetCommWorldRank();
29 |         ofs.open( fnamestr.str().c_str() );
30 |     }
31 | }
32 | 
33 | 
34 | template<class T>
35 | void
36 | MPIHostStencil<T>::operator()( Matrix2D<T>& mtx, unsigned int nIters )
37 | {
38 |     if( this->ParticipatingInProgram() )
39 |     {
40 |         HostStencil<T>::operator()( mtx, nIters );
41 |         if( dumpData )
42 |         {
43 |             this->DumpData( ofs, mtx, "after all iterations" );
44 |         }
45 |     }
46 |     MPI_Barrier( MPI_COMM_WORLD );
47 | }
48 | 
49 | 
50 | template<class T>
51 | void
52 | MPIHostStencil<T>::DoPreIterationWork( Matrix2D<T>& mtx, unsigned int iter )
53 | {
54 |     if( (iter % this->GetNumberIterationsPerHaloExchange() ) == 0 )
55 |     {
56 |         if( dumpData )
57 |         {
58 |             this->DumpData( ofs, mtx, "before halo exchange" );
59 |         }
60 |         this->DoHaloExchange( mtx );
61 |         if( dumpData )
62 |         {
63 |             this->DumpData( ofs, mtx, "after halo exchange" );
64 |         }
65 |     }
66 | }
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/config/find_cuda_libs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # We do not use nvcc to link CUDA programs, because we may be linking
 4 | # against MPI libraries also, and prefer to allow the MPI compiler
 5 | # drivers to handle the link.
 6 | #
 7 | # This requires us to determine which libraries are needed to link CUDA
 8 | # programs.   We use nvcc -dryrun to determine which libraries are 
 9 | # needed to link CUDA programs.  Prior to the release of CUDA version 6.0,
10 | # the output of nvcc -dryrun included a line of the form LIBRARIES=...
11 | # that indicated all libraries needed to link as -llib flags.  
12 | # The nvcc distributed with CUDA 6.0 no longer lists the libraries 
13 | # in the LIBRARIES line itself, but only as part of the actual
14 | # command that would have been executed to link the executable.
15 | #
16 | # For CUDA < 6.0, we just use the output of the LIBRARIES line.
17 | # For CUDA 6.0, we determine the libraries to use by:
18 | # 
19 | #   Running nvcc -dryrun and saving the LIBRARIES line from the output.
20 | #   Re-running nvcc -dryrun and parsing the link line to remove 
21 | #     everything before the LIBRARIES contents and possibly a -Wl,--end-group
22 | #     specification.
23 | #   
24 | if [ "$#" -ne 1 ]
25 | then
26 |    echo "Usage: $0 <nvcc>" >&2
27 |    echo "  where <nvcc> is the filename or path to the nvcc executable to use." >&2
28 |    exit 1
29 | fi
30 | NVCC=$1
31 | #echo "Using NVCC=$NVCC"
32 | 
33 | cudart_flag_supported=0
34 | $NVCC -dryrun -cudart shared bogus.cu > /dev/null 2>&1
35 | if [ $? -eq 0 ]
36 | then
37 |     cudart_flag_supported=1  
38 | fi
39 | #echo "cudart_flag_supported=$cudart_flag_supported"
40 | 
41 | libspec=`$NVCC -dryrun bogus.cu 2>&1 | grep LIBRARIES | sed 's/^.*LIBRARIES=//'`
42 | #echo "libspec=$libspec"
43 | if [ $cudart_flag_supported -eq 1 ]
44 | then
45 |     cudalibs=`$NVCC -dryrun bogus.cu 2>&1 | tail -1 | sed "s#^.*-o \"a.out\"##" | sed 's#"[a-zA-Z0-9./_-]*\.o"##g' | sed 's/-Wl,--start-group//' | sed 's/-Wl,--end-group//'`
46 | else
47 |     cudalibs=$libspec
48 | fi
49 | 
50 | echo $cudalibs
51 | 
52 | 


--------------------------------------------------------------------------------
/src/common/HostStencil.cpp:
--------------------------------------------------------------------------------
 1 | #include <string.h> // for memcpy
 2 | #include "HostStencil.h"
 3 | 
 4 | 
 5 | template<class T>
 6 | void
 7 | HostStencil<T>::operator()( Matrix2D<T>& mtx, unsigned int nIters )
 8 | {
 9 |     // we need a temp space buffer
10 |     Matrix2D<T> tmpMtx( mtx.GetNumRows(), mtx.GetNumColumns() );
11 | 
12 |     // be able to access the matrices as 2D arrays
13 |     typename Matrix2D<T>::DataPtr mtxData = mtx.GetData();
14 |     typename Matrix2D<T>::DataPtr tmpMtxData = tmpMtx.GetData();
15 | 
16 | 
17 |     for( unsigned int iter = 0; iter < nIters; iter++ )
18 |     {
19 |         DoPreIterationWork( mtx, iter );
20 | 
21 |         /* copy the "real" data to the temp matrix */
22 |         memcpy( tmpMtx.GetFlatData(),
23 |                 mtx.GetFlatData(),
24 |                 mtx.GetDataSize() );
25 | 
26 | 
27 |         /* Apply the stencil operator */
28 |         for( size_t i = 1; i < mtx.GetNumRows()-1; i++ )
29 |         {
30 |             for( size_t j = 1; j < mtx.GetNumColumns()-1; j++ )
31 |             {
32 |                 T oldCenterValue = tmpMtxData[i][j];
33 |                 T oldNSEWValues = (tmpMtxData[i-1][j] +
34 |                                         tmpMtxData[i+1][j] +
35 |                                         tmpMtxData[i][j-1] +
36 |                                         tmpMtxData[i][j+1]);
37 |                 T oldDiagonalValues = (tmpMtxData[i-1][j-1] +
38 |                                             tmpMtxData[i+1][j-1] +
39 |                                             tmpMtxData[i-1][j+1] +
40 |                                             tmpMtxData[i+1][j+1]);
41 | 
42 |                 mtxData[i][j] = this->wCenter * oldCenterValue +
43 |                                 this->wCardinal * oldNSEWValues +
44 |                                 this->wDiagonal * oldDiagonalValues;
45 |             }
46 |         }
47 |     }
48 | }
49 | 
50 | 
51 | template<class T>
52 | void
53 | HostStencil<T>::DoPreIterationWork( Matrix2D<T>& mtx, unsigned int iter )
54 | {
55 |     // we have nothing to do
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/data/REPORTING_RESULTS:
--------------------------------------------------------------------------------
 1 | The data directory is meant to serve as a resource for SHOC runs using the
 2 | shocdriver on different devices and platforms. Results can be contributed
 3 | either by emailing a SHOC development member, issuing a pull request, or
 4 | committing an update (for those with write access). If you would like to 
 5 | report a result for a device that is not listed, please provide the 
 6 | following:
 7 | 
 8 | devices.csv: 
 9 | -----------
10 | For a particular discrete accelerator, it is helpful to have the host system
11 | information to make comparisons between different devices and replicate 
12 | tests. New results should include not just the device name and vendor 
13 | but also the host platform used for testing. 
14 | 
15 | On a Linux system the CPU, memory size and speed, and motherboard can be 
16 | found using dmidecode or lshw. In certain cases dmesg can also be used to
17 | find the specific motherboard model number. For OS, the Linux kernel version 
18 | from uname is preferred or the common name (OSX 10.6) can suffice for other
19 | OSes.
20 | 
21 | specs.csv:
22 | -----------
23 | For a particular accelerator, the key specifications for the device are
24 | reported. PCIe Gen (2.0, 3.0, etc.) and introduction date (or first sale date)
25 | are new fields that have been added.
26 | 
27 | For devices with a boost clock (e.g. NVIDIA Maxwell), report the
28 | boost clock instead of the base clock. (This boost clock is the
29 | average clock speed at which the device actually runs, not
30 | the absolute maximum clockspeed the device supports, and so should
31 | most closely represent the card's real-life performance.)
32 | 
33 | platforms.csv:
34 | --------------
35 | Here the software platforms for a particular set of experiments are added. 
36 | 
37 | results.csv:
38 | ------------
39 | Results from a SHOC run using shocdriver are added to the results CSV and
40 | are correlated with the device and platform. New fields include the test
41 | size (4 is recommended) and the test date which should be the date when 
42 | the test was last run for the listed results.
43 | 


--------------------------------------------------------------------------------
/src/common/OptionParser.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTION_PARSER_H
 2 | #define OPTION_PARSER_H
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <vector>
 7 | #include <map>
 8 | 
 9 | #include "Option.h"
10 | 
11 | using namespace std;
12 | 
13 | // ****************************************************************************
14 | // Class:  OptionParser
15 | //
16 | // Purpose:
17 | //   Class used to specify and parse command-line options to programs.
18 | //
19 | // Programmer:  Kyle Spafford
20 | // Creation:    August 4, 2009
21 | //
22 | // ****************************************************************************
23 | class OptionParser
24 | {
25 |   private:
26 |     typedef std::map<std::string, Option> OptionMap;
27 | 
28 |     OptionMap optionMap;
29 |     map<char, string>   shortLetterMap;
30 | 
31 |     bool helpRequested;
32 | 
33 |   public:
34 | 
35 |     OptionParser();
36 |     void addOption(const string &longName,
37 |                    OptionType type,
38 |                    const string &defaultValue,
39 |                    const string &helpText = "No help specified",
40 |                    char shortLetter = '\0');
41 | 
42 |     void print() const;
43 | 
44 |     //Returns false on failure, true on success
45 |     bool parse(int argc, const char *const argv[]);
46 |     bool parse(const vector<string> &args);
47 |     bool parseFile(const string &fileName);
48 | 
49 |     //Accessors for options
50 |     long long   getOptionInt(const string &name) const;
51 |     float       getOptionFloat(const string &name) const;
52 |     bool        getOptionBool(const string &name) const;
53 |     string      getOptionString(const string &name) const;
54 | 
55 |     vector<long long>     getOptionVecInt(const string &name) const;
56 |     vector<float>         getOptionVecFloat(const string &name) const;
57 |     vector<string>        getOptionVecString(const string &name) const;
58 | 
59 |     void printHelp(const string &optionName) const;
60 |     void usage() const;
61 | 
62 |     bool HelpRequested( void ) const    { return helpRequested; }
63 | };
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/src/opencl/level1/fft/fftlib.h:
--------------------------------------------------------------------------------
 1 | #ifndef FFTLIB_H
 2 | #define FFTLIB_H
 3 | 
 4 | #include "OptionParser.h"
 5 | 
 6 | struct cplxflt {
 7 |     float x;
 8 |     float y;
 9 | };
10 | 
11 | struct cplxdbl {
12 |     double x;
13 |     double y;
14 | };
15 | 
16 | void init(OptionParser& op,
17 |      bool _do_dp,
18 |      cl_device_id fftDev,
19 |      cl_context fftCtx,
20 |      cl_command_queue fftQueue,
21 |      cl_program& fftProg,
22 |      cl_kernel& fftKrnl,
23 |      cl_kernel& ifftKrnl,
24 |      cl_kernel& chkKrnl);
25 | 
26 | void deinit(cl_command_queue fftQueue,
27 |             cl_program& fftProg,
28 |             cl_kernel& fftKrnl,
29 |             cl_kernel& ifftKrnl,
30 |             cl_kernel& chkKrnl);
31 | 
32 | // Replaces forward and inverse, call with the
33 | // appropriate kernel
34 | void transform(void* workp,
35 |               const int n_ffts,
36 |               Event& fftEvent,
37 |               cl_kernel& fftKrnl,
38 |               cl_command_queue& fftQueue);
39 | 
40 | int check(const void* work,
41 |           const void* check,
42 |           const int half_n_ffts,
43 |           const int half_n_cmplx,
44 |           cl_kernel& chkKrnl,
45 |           cl_command_queue& fftQueue);
46 | 
47 | void allocDeviceBuffer(void** bufferp,
48 |                        const unsigned long bytes,
49 |                        cl_context fftCtx,
50 |                        cl_command_queue fftQueue);
51 | 
52 | void freeDeviceBuffer(void* buffer,
53 |                       cl_context fftCtx,
54 |                       cl_command_queue fftQueue);
55 | 
56 | void allocHostBuffer(void** bufp,
57 |                      const unsigned long bytes,
58 |                      cl_context fftCtx,
59 |                      cl_command_queue fftQueue);
60 | 
61 | void freeHostBuffer(void* buf,
62 |                     cl_context fftCtx,
63 |                     cl_command_queue fftQueue);
64 | 
65 | void copyToDevice(void* to_device, void* from_host,
66 |     const unsigned long bytes, cl_command_queue fftQueue);
67 | 
68 | void copyFromDevice(void* to_host, void* from_device,
69 |     const unsigned long bytes, cl_command_queue fftQueue);
70 | 
71 | #endif // FFTLIB_H
72 | 


--------------------------------------------------------------------------------
/src/common/Timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef TIMER_H
 2 | #define TIMER_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <iostream>
 7 | 
 8 | #include <time.h>
 9 | #include <sys/timeb.h>
10 | #ifndef _WIN32
11 | #include <sys/time.h>
12 | #include "config.h"
13 | #endif
14 | 
15 | 
16 | // decide which timer type we are supposed to use
17 | #if defined(_WIN32)
18 | #    define TIMEINFO _timeb
19 | #elif defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_PROCESS_CPUTIME_ID)
20 | #    define TIMEINFO timespec
21 | #elif defined(HAVE_GETTIMEOFDAY)
22 | #    define TIMEINFO timeval
23 | #else
24 | #    error No supported timer available.
25 | #endif
26 | 
27 | 
28 | // ****************************************************************************
29 | //  Class:  Timer
30 | //
31 | //  Purpose:
32 | //    Encapsulated a set of hierarchical timers.  Starting a timer
33 | //    returns a handle to a timer.  Pass this handle, and a description,
34 | //    into the timer Stop routine.  Timers can nest and output will
35 | //    be displayed in a tree format.
36 | //
37 | //    Externally, Timer represents time in units of seconds.
38 | //
39 | //  Programmer:  Jeremy Meredith
40 | //  Creation:    August  6, 2004
41 | //
42 | // ****************************************************************************
43 | class Timer
44 | {
45 |   public:
46 |     static Timer *Instance();
47 | 
48 |     static int    Start();
49 | 
50 |     // Returns time since start of corresponding timer (determined by handle),
51 |     // in seconds.
52 |     static double Stop(int handle, const std::string &descr);
53 |     static void   Insert(const std::string &descr, double value);
54 | 
55 |     static void   Dump(std::ostream&);
56 | 
57 |   private:
58 | 
59 |     int    real_Start();
60 |     double real_Stop(int, const std::string &);
61 |     void   real_Insert(const std::string &descr, double value);
62 |     void   real_Dump(std::ostream&);
63 | 
64 |     Timer();
65 |     ~Timer();
66 | 
67 |     static Timer *instance;
68 | 
69 |     std::vector<TIMEINFO>    startTimes;
70 |     std::vector<double>      timeLengths;
71 |     std::vector<std::string> descriptions;
72 |     int                      currentActiveTimers;
73 | };
74 | 
75 | #endif
76 | 


--------------------------------------------------------------------------------
/src/cuda/level2/s3d/gr_base.h:
--------------------------------------------------------------------------------
 1 | #ifndef GETRATES_BASE_H
 2 | #define GETRATES_BASE_H
 3 | 
 4 | #include "S3D.h"
 5 | 
 6 | template <class real>
 7 | __global__ void
 8 | LAUNCH_BOUNDS (GR_BASE_THRD, GR_BASE_BLK)
 9 | gr_base(const real* P, const real* T, const real* Y, real* C, real TCONV,
10 |         real PCONV) {
11 | 
12 |     const real TEMP = T[threadIdx.x + (blockIdx.x * blockDim.x)]*TCONV;
13 |     const real PRES = P[threadIdx.x + (blockIdx.x * blockDim.x)]*PCONV;
14 |     const real SMALL = FLT_MIN;
15 | 
16 |     real SUM, ctmp;
17 | 
18 |     SUM = 0.0f;
19 | 
20 |     C(1)  = ctmp = Y(1) *4.96046521e-1;
21 |     SUM  += ctmp;
22 |     C(2)  = ctmp = Y(2) *9.92093043e-1;
23 |     SUM  += ctmp;
24 |     C(3)  = ctmp = Y(3) *6.25023433e-2;
25 |     SUM  += ctmp;
26 |     C(4)  = ctmp = Y(4) *3.12511716e-2;
27 |     SUM  += ctmp;
28 |     C(5)  = ctmp = Y(5) *5.87980383e-2;
29 |     SUM  += ctmp;
30 |     C(6)  = ctmp = Y(6) *5.55082499e-2;
31 |     SUM  += ctmp;
32 |     C(7)  = ctmp = Y(7) *3.02968146e-2;
33 |     SUM  += ctmp;
34 |     C(8)  = ctmp = Y(8) *2.93990192e-2;
35 |     SUM  += ctmp;
36 |     C(9)  = ctmp = Y(9) *6.65112065e-2;
37 |     SUM  += ctmp;
38 |     C(10) = ctmp = Y(10)*6.23323639e-2;
39 |     SUM  += ctmp;
40 |     C(11) = ctmp = Y(11)*3.57008335e-2;
41 |     SUM  += ctmp;
42 |     C(12) = ctmp = Y(12)*2.27221341e-2;
43 |     SUM  += ctmp;
44 |     C(13) = ctmp = Y(13)*3.33039255e-2;
45 |     SUM  += ctmp;
46 |     C(14) = ctmp = Y(14)*3.84050525e-2;
47 |     SUM  += ctmp;
48 |     C(15) = ctmp = Y(15)*3.56453112e-2;
49 |     SUM  += ctmp;
50 |     C(16) = ctmp = Y(16)*3.32556033e-2;
51 |     SUM  += ctmp;
52 |     C(17) = ctmp = Y(17)*2.4372606e-2;
53 |     SUM  += ctmp;
54 |     C(18) = ctmp = Y(18)*2.37882046e-2;
55 |     SUM  += ctmp;
56 |     C(19) = ctmp = Y(19)*2.26996304e-2;
57 |     SUM  += ctmp;
58 |     C(20) = ctmp = Y(20)*2.43467162e-2;
59 |     SUM  += ctmp;
60 |     C(21) = ctmp = Y(21)*2.37635408e-2;
61 |     SUM  += ctmp;
62 |     C(22) = ctmp = Y(22)*3.56972032e-2;
63 |     SUM  += ctmp;
64 | 
65 |     SUM = DIV (PRES, (SUM * (TEMP) * 8.314510e7));
66 | 
67 | #pragma unroll 22
68 |     for (unsigned k=1; k<=22; k++) {
69 |         C(k) = MAX(C(k), SMALL) * SUM;
70 |     }
71 | }
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/src/cuda/common/cudacommon.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUDACOMMON_H
 2 | #define CUDACOMMON_H
 3 | 
 4 | // workaround for OS X Snow Leopard w/ gcc 4.2.1 and CUDA 2.3a
 5 | // (undefined __sync_fetch_and_add)
 6 | #if defined(__APPLE__)
 7 | # if _GLIBCXX_ATOMIC_BUILTINS == 1
 8 | #undef _GLIBCXX_ATOMIC_BUILTINS
 9 | #endif // _GLIBC_ATOMIC_BUILTINS
10 | #endif // __APPLE__
11 | 
12 | #include <stdio.h>
13 | #include <cuda.h>
14 | #include <cuda_runtime_api.h>
15 | 
16 | // On Windows, if we call exit, our console may disappear,
17 | // taking the error message with it, so prompt before exiting.
18 | #if defined(_WIN32)
19 | #define safe_exit(val)                          \
20 | {                                               \
21 |     cout << "Press return to exit\n";           \
22 |     cin.get();                                  \
23 |     exit(val);                                  \
24 | }
25 | #else
26 | #define safe_exit(val) exit(val)
27 | #endif
28 | 
29 | #define CHECK_CUDA_ERROR()                                                    \
30 | {                                                                             \
31 |     cudaError_t err = cudaGetLastError();                                     \
32 |     if (err != cudaSuccess)                                                   \
33 |     {                                                                         \
34 |         printf("error=%d name=%s at "                                         \
35 |                "ln: %d\n  ",err,cudaGetErrorString(err),__LINE__);            \
36 |         safe_exit(-1);                                                        \
37 |     }                                                                         \
38 | }
39 | 
40 | // Alternative macro to catch CUDA errors
41 | #define CUDA_SAFE_CALL( call) do {                                            \
42 |    cudaError err = call;                                                      \
43 |    if (cudaSuccess != err) {                                                  \
44 |        fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",          \
45 |            __FILE__, __LINE__, cudaGetErrorString( err) );                    \
46 |        safe_exit(EXIT_FAILURE);                                               \
47 |    }                                                                          \
48 | } while (0)
49 | 
50 | // Alleviate aliasing issues
51 | #define RESTRICT __restrict__
52 | 
53 | #endif // CUDACOMMON_H
54 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/OpenCLStencil.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPENCLSTENCIL_H
 2 | #define OPENCLSTENCIL_H
 3 | 
 4 | #include <vector>
 5 | #include "Stencil.h"
 6 | #include "support.h"
 7 | 
 8 | 
 9 | // ****************************************************************************
10 | // Class:  OpenCLStencil
11 | //
12 | // Purpose:
13 | //   OpenCL implementation of 9-point stencil.
14 | //
15 | // Programmer:  Phil Roth
16 | // Creation:    October 28, 2009
17 | //
18 | // ****************************************************************************
19 | template<class T>
20 | class OpenCLStencil : public Stencil<T>
21 | {
22 | private:
23 |     size_t lRows;
24 |     size_t lCols;
25 | 
26 |     cl_context context;
27 |     cl_device_id device;
28 |     cl_command_queue queue;
29 |     cl_kernel kernel;
30 | 
31 | protected:
32 |     cl_kernel copyRectKernel;
33 | 
34 |     virtual void DoPreIterationWork( cl_mem buf,
35 |                                         cl_mem altBuf,
36 |                                         Matrix2D<T>& mtx,
37 |                                         unsigned int iter,
38 |                                         cl_command_queue queue );
39 | 
40 |     void SetCopyRectKernelArgs( cl_mem dest,
41 |                                 int destOffset,
42 |                                 int destPitch,
43 |                                 cl_mem src,
44 |                                 int srcOffset,
45 |                                 int srcPitch,
46 |                                 int width,
47 |                                 int height );
48 |  
49 |     void SetStencilKernelArgs( cl_mem currData,
50 |                                 cl_mem newData,
51 |                                 int alignment,
52 |                                 T wCenter,
53 |                                 T wCardinal,
54 |                                 T wDiagonal,
55 |                                 size_t localDataSize );
56 | 
57 |     static void ClearWaitEvents( std::vector<cl_event>& waitEvents );
58 | 
59 |     cl_context  GetContext( void )      { return context; }
60 | 
61 | public:
62 |     OpenCLStencil( T wCenter,
63 |                     T wCardinal,
64 |                     T wDiagonal,
65 |                     size_t _lRows,
66 |                     size_t _lCols,
67 |                     cl_device_id dev,
68 |                     cl_context ctx,
69 |                     cl_command_queue queue );
70 | 
71 |     virtual void operator()( Matrix2D<T>&, unsigned int nIters );
72 | };
73 | 
74 | #endif // OPENCLSTENCIL_H
75 | 


--------------------------------------------------------------------------------
/tools/numatest.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env perl
 2 | 
 3 | # Parse the arguments
 4 | $platform = "OpenCL";
 5 | while ($_ = shift @ARGV)
 6 | {
 7 |     if (/^-cuda$/ or /^--cuda$/)
 8 |     {
 9 | 	$platform = "CUDA";
10 |     }
11 |     elsif (/^-opencl$/ or /^--opencl$/)
12 |     {
13 | 	$platform = "OpenCL";
14 |     }
15 |     else
16 |     {
17 | 	print STDERR "Unknown argument: '$_'\n";
18 | 	print STDERR "\n";
19 | 	print STDERR "Usage: $0 [--cuda | --opencl]\n";
20 | 	print STDERR "       (defaults to OpenCL)\n";
21 | 	print STDERR "\n";
22 | 	exit 1;
23 |     }
24 | }
25 | print "Using platform: $platform\n";
26 | 
27 | # Get the CUDA/OpenCL devices available
28 | @devicequeryoutput = `../bin/Serial/$platform/BusSpeedDownload -i`;
29 | $num_devs = (grep(/Number of devices/, @devicequeryoutput))[0];
30 | $num_devs =~ s/^.*=\s*//;
31 | chomp($num_devs);
32 | print "Number of $platform devices: $num_devs\n";
33 | 
34 | # Get the NUMA nodes available
35 | @numaoutput = `numactl --show`;
36 | $numa_node_str = (grep(/nodebind/, @numaoutput))[0];
37 | $numa_node_str =~ s/^.*:\s*//;
38 | chomp($numa_node_str);
39 | @numa_nodes = split /\s+/, $numa_node_str;
40 | print "Number of NUMA nodes= @numa_nodes\n";
41 | 
42 | # Check download speed and latency for all NUMA node / device pairings
43 | foreach $n (@numa_nodes)
44 | {
45 |     for ($d = 0; $d < $num_devs; $d++)
46 |     {
47 | 	@down_output = `numactl --cpunodebind=$n ../bin/Serial/$platform/BusSpeedDownload -d $d`;
48 | 
49 | 	$bw_str = (grep(/DownloadSpeed\s+65536kB/, @down_output))[0];
50 | 	@bw_cols = split /\s+/, $bw_str;
51 | 	$bw_median = $bw_cols[3];
52 | 
53 | 	$lat_str = (grep(/DownloadTime\s+1kB/, @down_output))[0];
54 | 	@lat_cols = split /\s+/, $lat_str;
55 | 	$lat_median = $lat_cols[3];
56 | 
57 | 	print "NUMA Node=$n Device=$d Median Download Latency=$lat_median ms, Speed=$bw_median GB/sec\n";
58 |     }
59 | }
60 | 
61 | # Check readback speed and latency for all NUMA node / device pairings
62 | foreach $n (@numa_nodes)
63 | {
64 |     for ($d = 0; $d < $num_devs; $d++)
65 |     {
66 | 	@up_output = `numactl --cpunodebind=$n ../bin/Serial/$platform/BusSpeedReadback -d $d`;
67 | 
68 | 	$bw_str = (grep(/ReadbackSpeed\s+65536kB/, @up_output))[0];
69 | 	@bw_cols = split /\s+/, $bw_str;
70 | 	$bw_median = $bw_cols[3];
71 | 
72 | 	$lat_str = (grep(/ReadbackTime\s+1kB/, @up_output))[0];
73 | 	@lat_cols = split /\s+/, $lat_str;
74 | 	$lat_median = $lat_cols[3];
75 | 
76 | 	print "NUMA Node=$n Device=$d Median Upload Latency=$lat_median ms, Speed=$bw_median GB/sec\n";
77 |     }    
78 | }
79 | 


--------------------------------------------------------------------------------
/src/mpi/common/MPIStencilUtil.cpp:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <sstream>
 3 | #include "MPIStencilUtil.h"
 4 | #include "ParallelResultDatabase.h"
 5 | 
 6 | template<class T>
 7 | void
 8 | MPIStencilValidater<T>::ValidateResult( const Matrix2D<T>& exp,
 9 |                 const Matrix2D<T>& data,
10 |                 double valErrThreshold,
11 |                 unsigned int nValErrsToPrint ) const
12 | {
13 |     Validate<T> val( valErrThreshold );
14 |     std::vector<ValidationErrorInfo<T> > validationErrors = val( exp, data );
15 |     std::ostringstream valResultStr;
16 | 
17 |     // gather validation results to rank 0, who handles results
18 |     int nValErrors = validationErrors.size();
19 |     int totalValErrors = 0;
20 |     MPI_Reduce( &nValErrors,        // input from each
21 |                     &totalValErrors,    // output (only valid at root)
22 |                     1,          // count
23 |                     MPI_INT,   // datatype
24 |                     MPI_SUM,   // reduction operation
25 |                     0,          // root
26 |                     MPI_COMM_WORLD );   // comm
27 | 
28 |     int cwrank;
29 |     MPI_Comm_rank( MPI_COMM_WORLD, &cwrank );
30 |     if( cwrank == 0 )
31 |     {
32 |         valResultStr << totalValErrors << " validation errors";
33 | 
34 |         if( (totalValErrors > 0) && (nValErrsToPrint > 0) )
35 |         {
36 |             unsigned int valErrPrintsRemaining = nValErrsToPrint;
37 |             this->PrintValidationErrors( valResultStr, validationErrors, valErrPrintsRemaining );
38 |             if( validationErrors.size() <= valErrPrintsRemaining )
39 |             {
40 |                 // TODO do we want to collect validation errors from
41 |                 // other processes?
42 |                 valResultStr << " more validation errors in processes other than rank 0\n";
43 |             }
44 |         }
45 |         std::cout << valResultStr.str() << std::endl;
46 |     }
47 | }
48 | 
49 | 
50 | 
51 | 
52 | //  Modifications:
53 | //    Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
54 | //    Split timing reports into detailed and summary.  For
55 | //    parallel code, don't report per-process values.
56 | //
57 | void
58 | MPIStencilTimingReporter::ReportTimings( ResultDatabase& resultDB ) const
59 | {
60 |     ParallelResultDatabase pdb;
61 |     pdb.MergeSerialDatabases( resultDB, MPI_COMM_WORLD );
62 | 
63 |     int cwrank;
64 |     MPI_Comm_rank( MPI_COMM_WORLD, &cwrank );
65 |     if( cwrank == 0 )
66 |     {
67 |         pdb.DumpSummary( std::cout );
68 |     }
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/src/opencl/level1/stencil2d/CommonOpenCLStencilFactory.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <string>
 3 | #include <cassert>
 4 | #include "CommonOpenCLStencilFactory.h"
 5 | #include "InvalidArgValue.h"
 6 | 
 7 | 
 8 | 
 9 | 
10 | template<class T>
11 | void
12 | CommonOpenCLStencilFactory<T>::CheckOptions( const OptionParser& opts ) const
13 | {
14 |     // let base class check its options first
15 |     StencilFactory<T>::CheckOptions( opts );
16 | 
17 |     // check our options
18 |     std::vector<long long> shDims = opts.getOptionVecInt( "lsize" );
19 |     if( shDims.size() != 2 )
20 |     {
21 |         throw InvalidArgValue( "lsize must have two dimensions" );
22 |     }
23 |     if( (shDims[0] <= 0) || (shDims[1] <= 0) )
24 |     {
25 |         throw InvalidArgValue( "all lsize values must be positive" );
26 |     }
27 | 
28 |     std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
29 |     assert( arrayDims.size() == 2 );
30 |     // If both of these are zero, we're using a non-custom size, skip this test
31 |     if (arrayDims[0] == 0 && arrayDims[0] == 0)
32 |     {
33 |         return;
34 |     }
35 |     size_t gRows = (size_t)arrayDims[0];
36 |     size_t gCols = (size_t)arrayDims[1];
37 |     size_t lRows = (size_t)shDims[0];
38 |     size_t lCols = (size_t)shDims[1];
39 | 
40 |     // verify that local dimensions evenly divide global dimensions
41 |     if( ((gRows % lRows) != 0) || (lRows > gRows) )
42 |     {
43 |         throw InvalidArgValue( "overall rows must be even multiple of lsize rows" );
44 |     }
45 |     if( ((gCols % lCols) != 0) || (lCols > gCols) )
46 |     {
47 |         throw InvalidArgValue( "overall columns must be even multiple of lsize columns" );
48 |     }
49 | 
50 |     // TODO ensure local dims are smaller than OpenCL implementation limits
51 | }
52 | 
53 | 
54 | template<class T>
55 | void
56 | CommonOpenCLStencilFactory<T>::ExtractOptions( const OptionParser& options,
57 |                                             T& wCenter,
58 |                                             T& wCardinal,
59 |                                             T& wDiagonal,
60 |                                             size_t& lRows,
61 |                                             size_t& lCols )
62 | {
63 |     // let base class extract its options
64 |     StencilFactory<T>::ExtractOptions( options, wCenter, wCardinal, wDiagonal );
65 | 
66 |     // extract our options
67 |     std::vector<long long> ldims = options.getOptionVecInt( "lsize" );
68 |     assert( ldims.size() == 2 );
69 |     lRows = (size_t)ldims[0];
70 |     lCols = (size_t)ldims[1];
71 | }
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/src/cuda/level1/scan/tpScanLaunchKernel.cu:
--------------------------------------------------------------------------------
 1 | #include "scan_kernel.h"
 2 | 
 3 | template<class T>
 4 | void
 5 | LaunchReduceKernel( int num_blocks,
 6 |                     int num_threads,
 7 |                     int smem_size,
 8 |                     T* d_idata,
 9 |                     T* d_odata,
10 |                     int size )
11 | {
12 |     // In CUDA 4.0 we will be able to remove this level of indirection
13 |     // if we use the cuConfigureCall and cuLaunchKernel functions.
14 |     reduce<T,256><<<num_blocks,num_threads,smem_size>>>
15 |         (d_idata, d_odata, size);
16 | }
17 | 
18 | template<class T>
19 | void
20 | LaunchTopScanKernel( int num_blocks,
21 |                      int num_threads,
22 |                      int smem_size,
23 |                      T* d_block_sums,
24 |                      int size )
25 | {
26 |     // In CUDA 4.0 we will be able to remove this level of indirection
27 |     // if we use the cuConfigureCall and cuLaunchKernel functions.
28 |     scan_single_block<T,256><<<num_blocks,num_threads,smem_size>>>
29 |         (d_block_sums, size);
30 | }
31 | 
32 | template<class T, class vecT, int blockSize>
33 | void
34 | LaunchBottomScanKernel( int num_blocks,
35 |                         int num_threads,
36 |                         int smem_size,
37 |                         T* g_idata,
38 |                         T* g_odata,
39 |                         T* d_block_sums,
40 |                         int size )
41 | {
42 |     // In CUDA 4.0 we will be able to remove this level of indirection
43 |     // if we use the cuConfigureCall and cuLaunchKernel functions.
44 |     bottom_scan<T, vecT, blockSize><<<num_blocks,num_threads,smem_size>>>(g_idata, g_odata,
45 |         d_block_sums, size);
46 | }
47 | 
48 | // Ensure that the template functions are instantiated
49 | // Unlike the Stencil2D CUDA version that needs to instantiate objects,
50 | // we need to instantiate template functions.  Declaration of the needed
51 | // specializations seem to work for several recent versions of g++ that
52 | // people are likely to be using underneath nvcc.
53 | template void LaunchReduceKernel<float>( int, int, int, float*, float*, int );
54 | template void LaunchReduceKernel<double>( int, int, int, double*, double*, int );
55 | 
56 | template void LaunchTopScanKernel<float>( int, int, int, float*, int );
57 | template void LaunchTopScanKernel<double>( int, int, int, double*, int );
58 | 
59 | template void LaunchBottomScanKernel<float,float4,256>( int, int, int, float*, float*, float*, int );
60 | template void LaunchBottomScanKernel<double,double4,256>( int, int, int, double*, double*, double*, int );
61 | 
62 | 


--------------------------------------------------------------------------------
/src/common/Utility.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILITY_H
 2 | #define UTILITY_H
 3 | 
 4 | #include <sstream>
 5 | #include <math.h>
 6 | 
 7 | // ****************************************************************************
 8 | // File:  Utility.h
 9 | //
10 | // Purpose:
11 | //   Various generic utility routines having to do with string and number
12 | //   manipulation.
13 | //
14 | // Programmer:  Jeremy Meredith
15 | // Creation:    September 18, 2009
16 | // Modified:    Jan 2010, rothpc
17 | //    Jeremy Meredith, Tue Oct  9 17:25:25 EDT 2012
18 | //    Round is c99, not Windows-friendly.  Assuming we are using
19 | //    positive values, replaced it with an equivalent of int(x+.5).
20 | //
21 | // ****************************************************************************
22 | 
23 | inline std::string HumanReadable(long long value, long long *rounding=0)
24 | {
25 |     std::ostringstream vstr;
26 |     long long pVal;
27 |     if (value>10ll*1024*1024*1024)
28 |     {
29 |         pVal = (long long)(0.5 + value/(1024.0*1024*1024));
30 |         if (rounding)
31 |             *rounding = pVal*1024*1024*1024 - value;
32 |         vstr << pVal << 'G';
33 |     }
34 |     else if (value>10ll*1024*1024)
35 |     {
36 |         pVal = (long long)(0.5 + value/(1024.0*1024));
37 |         if (rounding)
38 |             *rounding = pVal*1024*1024 - value;
39 |         vstr << pVal << 'M';
40 |     }
41 |     else if (value>10ll*1024)
42 |     {
43 |         pVal = (long long)(0.5 + value/(1024.0));
44 |         if (rounding)
45 |             *rounding = pVal*1024 - value;
46 |         vstr << pVal << 'k';
47 |     }
48 |     else
49 |     {
50 |         if (rounding)
51 |             *rounding = 0;
52 |         vstr << value;
53 |     }
54 |     return vstr.str();
55 | }
56 | 
57 | inline vector<string> SplitValues(const std::string &buff, char delim)
58 | {
59 |     vector<std::string> output;
60 |     std::string tmp="";
61 |     for (size_t i=0; i<buff.length(); i++)
62 |     {
63 |        if (buff[i] == delim)
64 |        {
65 |           if (!tmp.empty())
66 |              output.push_back(tmp);
67 |           tmp = "";
68 |        }
69 |        else
70 |        {
71 |           tmp += buff[i];
72 |        }
73 |     }
74 |     if (!tmp.empty())
75 |        output.push_back(tmp);
76 | 
77 |     return output;
78 | }
79 | 
80 | #ifdef _WIN32
81 | 
82 | // On Windows, srand48 and drand48 don't exist.
83 | // Create convenience routines that use srand/rand
84 | // and let developers continue to use the -48 versions.
85 | 
86 | inline void srand48(unsigned int seed)
87 | {
88 |     srand(seed);
89 | }
90 | 
91 | inline double drand48()
92 | {
93 |     return double(rand()) / RAND_MAX;
94 | }
95 | 
96 | #endif // _WIN32
97 | 
98 | #endif
99 | 


--------------------------------------------------------------------------------
/tools/prettyPrint.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | my @elem_lengthsA = 0;
 5 | my @elem_lengthsB = 0;
 6 | my @lines;
 7 | my @lastLines;
 8 | my $before = 1;
 9 | my $after = 0;
10 | 
11 | while(<>){
12 |     chomp();
13 |     my $ln=$_;
14 |     my @elems = split(/\t+/,$ln);
15 |     if( $elems[0] eq "test"  && $elems[1] eq "atts" ){
16 |         $before = 0;
17 |     }
18 |     if( $before == 0 && ($#elems == 0 || $elems[0] eq "Note:") ){
19 |         $after = 1;
20 |     }
21 | 
22 |     if( $before == 1 ){
23 |         print "$ln\n";
24 |     }elsif( $after == 1 ){
25 |         push(@lastLines, $ln);
26 |     }else{
27 |         # push each line in an array so we retrieve it later
28 |         push(@lines, $ln);
29 |         my $i=0;
30 |         # for each element in this line, find its length
31 |         foreach(@elems){
32 |             my $elem=$_;
33 |             # ignore elements that are of zero length (split() splits *around* delimiters, so multiple consecutive delimiters are returned as zero length strings).
34 |             if( length($elem) > 0 ){
35 |                 # keep track of the longest string per column
36 |                 my ($lenA, $lenB);
37 |                 if( $elem =~ /(\d*)\.(\d*)/ ){
38 |                     $lenA = length($1);
39 |                     $lenB = length($2);
40 |                 }else{
41 |                     $lenA = 1;
42 |                     $lenB = length($elem);
43 |                 }
44 |                 if($elem_lengthsA[$i] == 0 || $lenA > $elem_lengthsA[$i]){
45 |                   $elem_lengthsA[$i] = $lenA;
46 |                 }
47 |                 if($elem_lengthsB[$i] == 0 || $lenB > $elem_lengthsB[$i]){
48 |                   $elem_lengthsB[$i] = $lenB;
49 |                 }
50 |                 $i++
51 |             }
52 |         }
53 |     }
54 | }
55 | 
56 | # iterate over the input (that we've stored into the array @lines) and print it
57 | foreach(@lines){
58 |     my $ln = $_;
59 |     my @elems = split(/\t+/,$ln);
60 |     my $i=0;
61 |     foreach(@elems){
62 |         my $elem = $_;
63 |         # skip delimiters
64 |         if( length($elem) > 0 ){
65 |             # find the maximum length of this column and use it as the string length (+1)
66 |             my $tmp_len = 1+$elem_lengthsA[$i]+$elem_lengthsB[$i];
67 |             if( $elem !~ /\d*\.\d*/ ){
68 |                 my $frmt = " %-".$tmp_len."s ";
69 |                 printf($frmt,$elem);
70 |             }else {
71 |                 my $frmt = " %".$tmp_len.".".$elem_lengthsB[$i]."lf ";
72 |                 printf($frmt,$elem);
73 |             }
74 |             $i++;
75 |         }
76 |     }
77 |     print "\n";
78 | }
79 | 
80 | foreach(@lastLines){
81 |     print "$_\n";
82 | }
83 | 


--------------------------------------------------------------------------------
/src/opencl/level2/s3d/rdwdot2.cl:
--------------------------------------------------------------------------------
 1 | #ifdef K_DOUBLE_PRECISION
 2 | #define DOUBLE_PRECISION
 3 | #pragma OPENCL EXTENSION cl_khr_fp64: enable
 4 | #elif AMD_DOUBLE_PRECISION
 5 | #define DOUBLE_PRECISION
 6 | #pragma OPENCL EXTENSION cl_amd_fp64: enable
 7 | #endif
 8 | 
 9 | // Macros to explicitly control precision of the constants, otherwise
10 | // known to cause problems for some Compilers
11 | #ifdef DOUBLE_PRECISION
12 | #define CPREC(a) a
13 | #else
14 | #define CPREC(a) a##f
15 | #endif
16 | 
17 | //replace divisions by multiplication with the reciprocal
18 | #define REPLACE_DIV_WITH_RCP 1
19 | 
20 | //Call the appropriate math function based on precision
21 | #ifdef DOUBLE_PRECISION
22 | #define real double
23 | #if REPLACE_DIV_WITH_RCP
24 | #define DIV(x,y) ((x)*(1.0/(y)))
25 | #else
26 | #define DIV(x,y) ((x)/(y))
27 | #endif
28 | #define POW pow
29 | #define EXP exp
30 | #define EXP10 exp10
31 | #define EXP2 exp2
32 | #define MAX fmax
33 | #define MIN fmin
34 | #define LOG log
35 | #define LOG10 log10
36 | #else
37 | #define real float
38 | #if REPLACE_DIV_WITH_RCP
39 | #define DIV(x,y) ((x)*(1.0f/(y)))
40 | #else
41 | #define DIV(x,y) ((x)/(y))
42 | #endif
43 | #define POW pow
44 | #define EXP exp
45 | #define EXP10 exp10
46 | #define EXP2 exp2
47 | #define MAX fmax
48 | #define MIN fmin
49 | #define LOG log
50 | #define LOG10 log10
51 | #endif
52 | 
53 | //Kernel indexing macros
54 | #define thread_num (get_global_id(0))
55 | #define idx2(p,z) (p[(((z)-1)*(N_GP)) + thread_num])
56 | #define idx(x, y) ((x)[(y)-1])
57 | #define C(q)     idx2(C, q)
58 | #define Y(q)     idx2(Y, q)
59 | #define RF(q)    idx2(RF, q)
60 | #define EG(q)    idx2(EG, q)
61 | #define RB(q)    idx2(RB, q)
62 | #define RKLOW(q) idx2(RKLOW, q)
63 | #define ROP(q)   idx(ROP, q)
64 | #define WDOT(q)  idx2(WDOT, q)
65 | #define RKF(q)   idx2(RKF, q)
66 | #define RKR(q)   idx2(RKR, q)
67 | #define A_DIM    (11)
68 | #define A(b, c)  idx2(A, (((b)*A_DIM)+c) )
69 | 
70 | #define ROP2(a)  (RKF(a) - RKR (a))
71 | 
72 | 
73 | __kernel void
74 | rdwdot2_kernel (__global const real* RKF, __global const real* RKR,
75 | 		__global real* WDOT, const real rateconv, __global const real* molwt)
76 | {
77 | 
78 |     WDOT(21) = (ROP2(145) +ROP2(185) +ROP2(187) +ROP2(189)
79 |             -ROP2(190) -ROP2(191) -ROP2(192) -ROP2(193)
80 |             -ROP2(194) -ROP2(195) -ROP2(196) -ROP2(197)
81 |             -ROP2(198) +ROP2(200) +ROP2(202) +ROP2(203)
82 |             +ROP2(205))*rateconv *molwt[20];
83 | 
84 |     WDOT(20) = (+ROP2(121) +ROP2(146) +ROP2(165) +ROP2(167)
85 |             -ROP2(185) -ROP2(186) -ROP2(187) -ROP2(188)
86 |             -ROP2(189) +ROP2(192) +ROP2(195) +ROP2(196)
87 |             +ROP2(197) +ROP2(198) +ROP2(206))*rateconv *molwt[19];
88 | 
89 |     WDOT(22) = 0.0;
90 | }
91 | 


--------------------------------------------------------------------------------
/src/opencl/level1/bfs/bfs_iiit.cl:
--------------------------------------------------------------------------------
 1 | #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 2 | #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
 3 | #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics: enable
 4 | #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics: enable
 5 | 
 6 | 
 7 | //Sungpack Hong, Sang Kyun Kim, Tayo Oguntebi, and Kunle Olukotun. 2011.
 8 | //Accelerating CUDA graph algorithms at maximum warp.
 9 | //In Proceedings of the 16th ACM symposium on Principles and practice of
10 | //parallel programming (PPoPP '11). ACM, New York, NY, USA, 267-276.
11 | // ****************************************************************************
12 | // Function: BFS_kernel_warp
13 | //
14 | // Purpose:
15 | //   Perform BFS on the given graph
16 | //
17 | // Arguments:
18 | //   levels: array that stores the level of vertices
19 | //   edgeArray: array that gives offset of a vertex in edgeArrayAux
20 | //   edgeArrayAux: array that gives the edge list of a vertex
21 | //   W_SZ: the warp size to use to process vertices
22 | //   CHUNK_SZ: the number of vertices each warp processes
23 | //   numVertices: number of vertices in the given graph
24 | //   curr: the current BFS level
25 | //   flag: set when more vertices remain to be traversed
26 | //
27 | // Returns:  nothing
28 | //
29 | // Programmer: Aditya Sarwade
30 | // Creation: June 16, 2011
31 | //
32 | // Modifications:
33 | //
34 | // ****************************************************************************
35 | __kernel void BFS_kernel_warp(
36 |         __global unsigned int *levels,
37 |         __global unsigned int *edgeArray,
38 |         __global unsigned int *edgeArrayAux,
39 |         int W_SZ,
40 |         int CHUNK_SZ,
41 |         unsigned int numVertices,
42 |         int curr,
43 |         __global int *flag)
44 | {
45 | 
46 |     int tid = get_global_id(0);
47 |     int W_OFF = tid % W_SZ;
48 |     int W_ID = tid / W_SZ;
49 |     int v1= W_ID * CHUNK_SZ;
50 |     int chk_sz=CHUNK_SZ+1;
51 | 
52 |     if((v1+CHUNK_SZ)>=numVertices)
53 |     {
54 |         chk_sz =  numVertices-v1+1;//(v1+CHUNK_SZ) - numVertices;
55 |         if(chk_sz<0)
56 |             chk_sz=0;
57 |     }
58 | 
59 |     //each warp processes nodes one by one
60 |     for(int v=v1; v< chk_sz-1+v1; v++)
61 |     {
62 |         if(levels[v] == curr)
63 |         {
64 |             unsigned int num_nbr = edgeArray[v+1]-edgeArray[v];
65 |             unsigned int nbr_off = edgeArray[v];
66 |             for(int i=W_OFF; i<num_nbr; i+=W_SZ)
67 |             {
68 |                int v = edgeArrayAux[i + nbr_off];
69 |                if(levels[v]==UINT_MAX)
70 |                {
71 |                     levels[v] = curr + 1;
72 |                     *flag = 1;
73 |                }
74 |             }
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/common/ProgressBar.h:
--------------------------------------------------------------------------------
  1 | #ifndef _PROGRESS_BAR_H_
  2 | #define _PROGRESS_BAR_H_
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #ifndef _WIN32
  8 | #include <unistd.h>
  9 | #endif
 10 | 
 11 | 
 12 | // ****************************************************************************
 13 | // Class: ProgressBar
 14 | //
 15 | // Purpose:
 16 | //   Simple text progress bar class.
 17 | //
 18 | // Programmer: Gabriel Marin
 19 | // Creation:   October 12, 2009
 20 | //
 21 | // Modifications:
 22 | //
 23 | // ****************************************************************************
 24 | class ProgressBar
 25 | {
 26 | private:
 27 |     int itersDone;
 28 |     int totalIters;
 29 |     static const char barDone[81];
 30 |     double rTotal;
 31 |     double percDone;
 32 | 
 33 | public:
 34 |     //   Constructor
 35 |     //
 36 |     //   Arguments:
 37 |     //       _totalIters  total work amount to be tracked
 38 |     ProgressBar (int _totalIters = 0)
 39 |     {
 40 |         totalIters = _totalIters;
 41 |         itersDone = 0;
 42 |         if (totalIters)
 43 |         {
 44 |             rTotal = 100.0/totalIters;
 45 |         } else
 46 |         {
 47 |             rTotal = 0.0;
 48 |         }
 49 |         percDone = itersDone*rTotal;
 50 |     }
 51 | 
 52 |     //   Method: setTotalIters
 53 |     //
 54 |     //   Purpose: setter for the total work amount
 55 |     //
 56 |     //   Arguments:
 57 |     //       _totalIters  total work amount to be tracked
 58 |     void setTotalIters (int _totalIters)
 59 |     {
 60 |         totalIters = _totalIters;
 61 |         if (totalIters)
 62 |         {
 63 |             rTotal = 100.0/totalIters;
 64 |             percDone = itersDone*rTotal;
 65 |         }
 66 |     }
 67 | 
 68 |     //   Method: setItersDone
 69 |     //
 70 |     //   Purpose: setter for the completed work amount
 71 |     //
 72 |     //   Arguments:
 73 |     //       _itersDone  completed work amount
 74 |     void setItersDone (int _itersDone)
 75 |     {
 76 |         itersDone = _itersDone;
 77 |         percDone = itersDone*rTotal;
 78 |     }
 79 | 
 80 |     //   Method: addItersDone
 81 |     //
 82 |     //   Purpose: update amount of completed work
 83 |     //
 84 |     //   Arguments:
 85 |     //       _inc  amount of newly completed work
 86 |     void addItersDone (int _inc = 1)
 87 |     {
 88 |         itersDone += _inc;
 89 |         percDone = itersDone*rTotal;
 90 |     }
 91 | 
 92 |     //   Method: Show
 93 |     //
 94 |     //   Purpose: display progress bar
 95 |     //
 96 |     //   Arguments:
 97 |     //       fd  output file descriptor
 98 |     void Show (FILE *fd)
 99 |     {
100 |         int lenDone = (int)(percDone/2.0 + 0.5);
101 |         fprintf(fd, "\r|%.*s%*s| %5.1lf%%", lenDone, barDone, 50-lenDone, "", percDone);
102 |         fflush(fd);
103 |     }
104 | };
105 | 
106 | #endif
107 | 


--------------------------------------------------------------------------------
/src/cuda/level2/qtclustering/comm.cpp:
--------------------------------------------------------------------------------
 1 | #include "comm.h"
 2 | #include <iostream>
 3 | 
 4 | using namespace std;
 5 | 
 6 | #if defined(PARALLEL)
 7 | MPI_Comm _qtc_mpi_communicator = MPI_COMM_WORLD;
 8 | #endif
 9 | 
10 | int comm_get_rank(void){
11 |     int rank=0;
12 | #if defined(PARALLEL)
13 |     MPI_Comm_rank( _qtc_mpi_communicator, &rank );
14 | #endif
15 |     return rank;
16 | }
17 | 
18 | int comm_get_size(void){
19 |     int node_count=1;
20 | #if defined(PARALLEL)
21 |     MPI_Comm_size( _qtc_mpi_communicator, &node_count );
22 | #endif // defined(PARALLEL)
23 |     return node_count;
24 | }
25 | 
26 | 
27 | void comm_broadcast( void *ptr, int cnt, int type, int source){
28 | #if defined(PARALLEL)
29 |     switch(type){
30 |         case COMM_TYPE_INT:
31 |             MPI_Bcast ( ptr, cnt, MPI_INT, source, _qtc_mpi_communicator );
32 |             break;
33 |         case COMM_TYPE_FLOAT:
34 |             MPI_Bcast ( ptr, cnt, MPI_FLOAT, source, _qtc_mpi_communicator );
35 |             break;
36 |         default:
37 |             break;
38 |     }
39 | #endif // defined(PARALLEL)
40 |     return;
41 | }
42 | 
43 | 
44 | void comm_barrier(){
45 | #if defined(PARALLEL)
46 |     MPI_Barrier (_qtc_mpi_communicator);
47 | #endif
48 |     return;
49 | }
50 | 
51 | void comm_find_winner(int *max_card, int *winner_node, int *winner_index, int cwrank, int max_index){
52 | #if defined(PARALLEL)
53 |     int glb_max_card = 0, index = *winner_index;
54 |     // Reduce the cardinalities to see what the highest value is.
55 |     MPI_Allreduce (max_card, &glb_max_card, 1, MPI_INT, MPI_MAX, _qtc_mpi_communicator);
56 | 
57 |     // If I'm not one of the winners, set my index to max
58 |     if(*max_card != glb_max_card)
59 |         index = max_index;
60 | 
61 |     MPI_Allreduce (&index, winner_index, 1, MPI_INT, MPI_MIN, _qtc_mpi_communicator);
62 | 
63 |     *max_card = glb_max_card;
64 | 
65 |     if( index == *winner_index ){
66 |         *winner_node = cwrank;
67 |     }else{
68 |         *winner_node = -1;
69 |     }
70 | 
71 | #else
72 |     *winner_node = 0;
73 | #endif // defined(PARALLEL)
74 |     return;
75 | }
76 | 
77 | 
78 | void comm_update_communicator(int cwrank, int active_node_count){
79 | #if defined(PARALLEL)
80 |     static int previous_active_node_count = -1;
81 |     int this_node_participates = 1;
82 | 
83 |     if( -1 == previous_active_node_count ){
84 |         previous_active_node_count = active_node_count;
85 |         return;
86 |     }
87 | 
88 |     if(active_node_count < previous_active_node_count ){
89 |         if( cwrank >= active_node_count ){
90 |             this_node_participates = 0;
91 |             std::cout << "[" << cwrank << "] Shrinking the communicator and staying out of it." << std::endl;
92 |         }
93 |         MPI_Comm_split(_qtc_mpi_communicator, this_node_participates, cwrank, &_qtc_mpi_communicator);
94 |     }
95 |     previous_active_node_count = active_node_count;
96 | #endif
97 |     return;
98 | }
99 | 


--------------------------------------------------------------------------------