├── Makefile.am ├── src ├── opencl │ ├── level2 │ │ ├── Makefile.am │ │ └── s3d │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ └── Makefile.am │ │ │ └── rdwdot2.cl │ ├── Makefile.am │ ├── level1 │ │ ├── Makefile.am │ │ ├── sort │ │ │ ├── Sort.h │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── stencil2d │ │ │ ├── OpenCLStencilFactory.h │ │ │ ├── Makefile.am │ │ │ ├── README.txt │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ ├── OpenCLStencilFactory.cpp │ │ │ ├── MPIOpenCLStencilFactory.h │ │ │ ├── CommonOpenCLStencilFactory.h │ │ │ ├── MPIOpenCLStencil.h │ │ │ ├── OpenCLStencil.h │ │ │ └── CommonOpenCLStencilFactory.cpp │ │ ├── md │ │ │ ├── MD.h │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ └── md.cl │ │ ├── md5hash │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── triad │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── gemm │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── scan │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ └── TPScan.h │ │ ├── fft │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ └── fftlib.h │ │ ├── spmv │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── reduction │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ └── reduction.cl │ │ └── bfs │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ └── Makefile.am │ │ │ └── bfs_iiit.cl │ ├── common │ │ ├── Makefile.am │ │ ├── PMSMemMgmt.h │ │ ├── OpenCLNodePlatformContainer.h │ │ └── OpenCLPlatform.h │ └── level0 │ │ ├── Makefile.am │ │ └── epmpi │ │ └── Makefile.am ├── cuda │ ├── Makefile.am │ ├── level2 │ │ ├── Makefile.am │ │ ├── qtclustering │ │ │ ├── tuningParameters.h │ │ │ ├── libdata.h │ │ │ ├── qtc_common.h │ │ │ ├── qtclib.h │ │ │ ├── comm.h │ │ │ ├── Makefile.am │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ └── comm.cpp │ │ └── s3d │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ └── Makefile.am │ │ │ └── gr_base.h │ ├── level1 │ │ ├── Makefile.am │ │ ├── neuralnet │ │ │ ├── nn_data.zip │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ └── Makefile.am │ │ ├── spmv │ │ │ ├── Spmv.h │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── scan │ │ │ ├── Scan.h │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ ├── TPScan.h │ │ │ └── tpScanLaunchKernel.cu │ │ ├── stencil2d │ │ │ ├── CUDAStencilFactory.h │ │ │ ├── Makefile.am │ │ │ ├── CUDAStencil.cpp │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ ├── MPICUDAStencilFactory.h │ │ │ ├── CUDAStencil.h │ │ │ ├── CommonCUDAStencilFactory.h │ │ │ ├── CUDAStencilFactory.cpp │ │ │ └── MPICUDAStencil.h │ │ ├── md │ │ │ ├── MD.h │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── triad │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── md5hash │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── sort │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ ├── Sort.h │ │ │ └── sort_kernel.h │ │ ├── bfs │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ │ └── Makefile.am │ │ ├── fft │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ └── fftlib.h │ │ ├── reduction │ │ │ ├── Makefile.am │ │ │ ├── epmpi │ │ │ │ └── Makefile.am │ │ │ ├── tpmpi │ │ │ │ └── Makefile.am │ │ │ ├── TPReduction.h │ │ │ └── tpRedLaunchKernel.cu │ │ └── gemm │ │ │ ├── Makefile.am │ │ │ └── epmpi │ │ │ └── Makefile.am │ ├── common │ │ ├── CUDAPMSMemMgr.h │ │ ├── PMSMemMgmt.h │ │ ├── support.h │ │ └── cudacommon.h │ └── level0 │ │ ├── Makefile.am │ │ └── epmpi │ │ └── Makefile.am ├── mpi │ ├── Makefile.am │ ├── contention │ │ ├── Makefile.am │ │ ├── README │ │ ├── cuda │ │ │ └── Makefile.am │ │ └── opencl │ │ │ └── Makefile.am │ ├── contention-mt │ │ ├── Makefile.am │ │ ├── README │ │ ├── cuda │ │ │ └── Makefile.am │ │ └── opencl │ │ │ └── Makefile.am │ └── common │ │ ├── Makefile.am │ │ ├── NodeInfo.cpp │ │ ├── GetMPIType.h │ │ ├── RandomPairs.h │ │ ├── MPIHostStencilFactory.h │ │ ├── MPIHostStencil.h │ │ ├── MPIStencilUtil.h │ │ ├── MPIHostStencilFactory.cpp │ │ ├── ParallelHelpers.h │ │ ├── MPIHostStencil.cpp │ │ └── MPIStencilUtil.cpp ├── common │ ├── Matrix2DStatics.cpp │ ├── ProgressBar.cpp │ ├── CTimer.cpp │ ├── Makefile.am │ ├── Option.cpp │ ├── InvalidArgValue.h │ ├── PMSMemMgr.h │ ├── InvalidArgValue.cpp │ ├── CTimer.h │ ├── NodeIDList.h │ ├── HostStencilFactory.cpp │ ├── BadCommandLine.h │ ├── Option.h │ ├── HostStencilFactory.h │ ├── StencilUtil.cpp │ ├── HostStencil.h │ ├── SerializableObject.h │ ├── ValidateMatrix2D.cpp │ ├── Graph.h │ ├── SerialStencilUtil.cpp │ ├── InitializeMatrix2D.h │ ├── SerialStencilUtil.h │ ├── StencilFactory.cpp │ ├── StencilUtil.h │ ├── Matrix2DFileSupport.cpp │ ├── StencilFactory.h │ ├── Stencil.h │ ├── ValidateMatrix2D.h │ ├── Matrix2D.cpp │ ├── HostStencil.cpp │ ├── OptionParser.h │ ├── Timer.h │ ├── Utility.h │ └── ProgressBar.h ├── Makefile.am └── stability │ ├── Stability.h │ ├── Makefile.am │ └── epmpi │ └── Makefile.am ├── config ├── config.mk.in ├── conf-cation.sh ├── conf-lens.sh ├── conf-llano.sh ├── conf-ion.sh ├── conf-linux.sh ├── dirtargets.mk.in ├── conf-valhalla.sh ├── conf-keeneland.sh ├── conf-newark.sh ├── conf-atlanta.sh ├── conf-crossarm.sh ├── common.mk.in ├── conf-titan.sh ├── conf-osx.sh └── find_cuda_libs.sh ├── doc ├── shoc-manual.pdf └── Makefile.am ├── tools ├── shocdriver.in ├── Makefile.am ├── numatest.pl └── prettyPrint.pl ├── .gitignore ├── data ├── platforms.csv ├── devices.csv └── REPORTING_RESULTS ├── README.txt ├── LICENSE.txt └── LICENSE-CUDPP.txt /Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = src tools doc 2 | -------------------------------------------------------------------------------- /src/opencl/level2/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=s3d 2 | -------------------------------------------------------------------------------- /src/cuda/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=level0 level1 level2 2 | -------------------------------------------------------------------------------- /src/cuda/level2/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=s3d qtclustering 2 | -------------------------------------------------------------------------------- /src/mpi/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=common contention contention-mt 2 | -------------------------------------------------------------------------------- /src/opencl/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=common level0 level1 level2 2 | -------------------------------------------------------------------------------- /config/config.mk.in: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/common.mk 2 | -------------------------------------------------------------------------------- /doc/shoc-manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vetter/shoc/HEAD/doc/shoc-manual.pdf -------------------------------------------------------------------------------- /tools/shocdriver.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | perl SHOC_LIBEXEC_DIR/driver.pl -bindir SHOC_BIN_DIR $* 4 | 5 | -------------------------------------------------------------------------------- /src/opencl/level1/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=bfs fft gemm stencil2d reduction scan sort triad md spmv md5hash 2 | -------------------------------------------------------------------------------- /src/cuda/level1/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=bfs fft gemm stencil2d md reduction scan sort spmv triad md5hash neuralnet 2 | -------------------------------------------------------------------------------- /src/cuda/level1/neuralnet/nn_data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vetter/shoc/HEAD/src/cuda/level1/neuralnet/nn_data.zip -------------------------------------------------------------------------------- /src/common/Matrix2DStatics.cpp: -------------------------------------------------------------------------------- 1 | #include "Matrix2D.h" 2 | 3 | template<> PMSMemMgr* Matrix2D::pmsmm = 0; 4 | template<> PMSMemMgr* Matrix2D::pmsmm = 0; 5 | 6 | -------------------------------------------------------------------------------- /src/mpi/contention/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | if BUILD_CUDA 3 | MAYBE_CUDA = cuda 4 | endif 5 | 6 | if BUILD_OPENCL 7 | MAYBE_OPENCL = opencl 8 | endif 9 | 10 | SUBDIRS=$(MAYBE_OPENCL) $(MAYBE_CUDA) 11 | 12 | -------------------------------------------------------------------------------- /src/mpi/contention-mt/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | if BUILD_CUDA 3 | MAYBE_CUDA = cuda 4 | endif 5 | 6 | if BUILD_OPENCL 7 | MAYBE_OPENCL = opencl 8 | endif 9 | 10 | SUBDIRS=$(MAYBE_OPENCL) $(MAYBE_CUDA) 11 | 12 | -------------------------------------------------------------------------------- /config/conf-cation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | sh ./configure \ 4 | CPPFLAGS="-I/opt/cuda-4.0/cuda/include" \ 5 | LDFLAGS="-L/opt/cuda-4.0/cuda/lib64" 6 | 7 | 8 | # other useful options 9 | # --disable-stability 10 | 11 | -------------------------------------------------------------------------------- /src/common/ProgressBar.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "ProgressBar.h" 3 | 4 | // initialize static members of the ProgressBar class. 5 | const char ProgressBar::barDone[81] = "================================================================================"; 6 | 7 | -------------------------------------------------------------------------------- /src/cuda/level1/spmv/Spmv.h: -------------------------------------------------------------------------------- 1 | #ifndef SPMV_H_ 2 | #define SPMV_H_ 3 | 4 | // Block size 5 | static const int BLOCK_SIZE = 128; 6 | static const int WARP_SIZE = 32; 7 | 8 | enum kernelType{CSR_SCALAR, CSR_VECTOR, ELLPACKR}; 9 | 10 | #endif // SPMV_H_ 11 | -------------------------------------------------------------------------------- /config/conf-lens.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | CUDA_ROOT=/sw/analysis-x64/cuda/3.2/sl5.0_binary 5 | 6 | # do the actual configuration 7 | sh ./configure \ 8 | CPPFLAGS="-I$CUDA_ROOT/include" \ 9 | PATH"=$CUDA_ROOT/bin:$PATH" \ 10 | --disable-stability 11 | 12 | 13 | -------------------------------------------------------------------------------- /config/conf-llano.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OCL_ROOT=/opt/AMDAPP 4 | 5 | # do the actual configuration 6 | sh ./configure \ 7 | CPPFLAGS="-I$OCL_ROOT/include" \ 8 | LDFLAGS="-L$OCL_ROOT/lib/x86_64" \ 9 | --without-cuda \ 10 | --disable-stability 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/tuningParameters.h: -------------------------------------------------------------------------------- 1 | #ifndef _TUNINGPARAMETERS_H_ 2 | #define _TUNINGPARAMETERS_H_ 3 | 4 | #define THREADSPERBLOCK 64 5 | 6 | #define SM_COUNT 16 7 | #define OVR_SBSCR_FACTOR 16 8 | 9 | #define GPU_MIN_SATURATION_FACTOR 32 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /src/cuda/level1/scan/Scan.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_H_ 2 | #define SCAN_H_ 3 | 4 | template 5 | bool scanCPU(T *data, T* reference, T* dev_result, const size_t size); 6 | 7 | template 8 | void RunTest(string testName, ResultDatabase &resultDB, OptionParser &op); 9 | 10 | #endif // SCAN_H_ 11 | -------------------------------------------------------------------------------- /src/opencl/common/Makefile.am: -------------------------------------------------------------------------------- 1 | include ${top_builddir}/config/common.mk 2 | include ${top_builddir}/config/targets.mk 3 | 4 | noinst_LIBRARIES = libSHOCCommonOpenCL.a 5 | libSHOCCommonOpenCL_a_SOURCES = OpenCLDeviceInfo.cpp \ 6 | OpenCLPlatform.cpp \ 7 | OpenCLNodePlatformContainer.cpp \ 8 | Event.cpp 9 | 10 | -------------------------------------------------------------------------------- /src/mpi/contention-mt/README: -------------------------------------------------------------------------------- 1 | Over-subscribed, multi-threaded contention benchmark 2 | 3 | The benchmark counts the number of GPU devices and spawns threads to do execute 4 | GPU benchmark and the main processes do the MPI Latency test. Sequential runs 5 | are initially performed to report the base case numbers. 6 | -------------------------------------------------------------------------------- /src/common/CTimer.cpp: -------------------------------------------------------------------------------- 1 | #include "CTimer.h" 2 | #include "Timer.h" 3 | 4 | int 5 | Timer_Start() 6 | { 7 | return Timer::Start(); 8 | } 9 | 10 | double 11 | Timer_Stop(int h, const char *d) 12 | { 13 | return Timer::Stop(h,d); 14 | } 15 | 16 | void 17 | Timer_Insert(const char *d, double v) 18 | { 19 | Timer::Insert(d,v); 20 | } 21 | -------------------------------------------------------------------------------- /config/conf-ion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # note: "ion" is an Ubuntu 12.04 system with gcc 4.6.x 3 | 4 | PATH="/usr/local/cuda42/cuda/bin:$PATH" 5 | 6 | ./configure \ 7 | CPPFLAGS="-I/usr/local/cuda42/cuda/include/" \ 8 | CUDA_CPPFLAGS="-DUSE_CLOCK_GETTIME -gencode=arch=compute_11,code=sm_11 -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_30,code=sm_30" 9 | 10 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | if BUILD_CUDA 3 | MAYBE_CUDA = cuda 4 | endif 5 | 6 | if BUILD_OPENCL 7 | MAYBE_OPENCL = opencl 8 | endif 9 | 10 | if BUILD_STABILITY 11 | MAYBE_STABILITY = stability 12 | endif 13 | 14 | if BUILD_MPI 15 | MAYBE_MPI = mpi 16 | endif 17 | 18 | SUBDIRS=common $(MAYBE_OPENCL) $(MAYBE_CUDA) $(MAYBE_MPI) $(MAYBE_STABILITY) 19 | 20 | -------------------------------------------------------------------------------- /src/common/Makefile.am: -------------------------------------------------------------------------------- 1 | include ${top_builddir}/config/common.mk 2 | include ${top_builddir}/config/targets.mk 3 | 4 | noinst_LIBRARIES = libSHOCCommon.a 5 | libSHOCCommon_a_SOURCES = CTimer.cpp \ 6 | ResultDatabase.cpp \ 7 | OptionParser.cpp \ 8 | Option.cpp \ 9 | Timer.cpp \ 10 | ProgressBar.cpp \ 11 | InvalidArgValue.cpp \ 12 | Matrix2DStatics.cpp 13 | 14 | -------------------------------------------------------------------------------- /doc/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | doc_DATA = shoc-manual.pdf 3 | 4 | if BUILD_DOC 5 | shoc-manual.pdf: $(srcdir)/shoc-manual.tex 6 | cp -f $(srcdir)/shoc-manual.bib ./shoc.bib 7 | latexmk -pdf $(srcdir)/shoc-manual.tex 8 | 9 | clean: 10 | ${RM} *.aux *.bbl *.dvi *.ps *.log *.blg *.toc shoc-manual.out shoc-manual.fls shoc-manual.fdb_latexmk shoc-manual.pdf shoc.bib 11 | endif 12 | 13 | -------------------------------------------------------------------------------- /src/mpi/common/Makefile.am: -------------------------------------------------------------------------------- 1 | include ${top_builddir}/config/common.mk 2 | include ${top_builddir}/config/targets.mk 3 | 4 | # Which compiler to use to build and link? 5 | CXX = ${MPICXX} 6 | CXXLD = ${MPICXX} 7 | 8 | noinst_LIBRARIES = libSHOCCommonMPI.a 9 | libSHOCCommonMPI_a_SOURCES = RandomPairs.cpp \ 10 | MPI2DGridProgram.cpp \ 11 | MPIHostStencil.cpp \ 12 | MPIHostStencilFactory.cpp \ 13 | MPIStencilUtil.cpp 14 | 15 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/libdata.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIBDATA_H_ 2 | #define _LIBDATA_H_ 3 | #include 4 | #include "support.h" 5 | #include "qtclib.h" 6 | 7 | float *generate_synthetic_data(float **rslt_mtrx, int **indr_mtrx, int *max_degree, float threshold, int N, int type); 8 | int read_BLAST_data(float **rslt_mtrx, int **indr_mtrx, int *max_degree, float threshold, const char *fname, int maxN, int matrix_type_mask); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /src/common/Option.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Option.h" 3 | using namespace std; 4 | 5 | void Option::print() { 6 | 7 | cout << "Long Name: " << longName << endl; 8 | cout << "Short Name: " << shortLetter << endl; 9 | cout << "Default Value: " << defaultValue << endl; 10 | cout << "Actual Value: " << value << endl; 11 | cout << "Type: " << type << endl; 12 | cout << "helpText: " << helpText << endl; 13 | 14 | } 15 | -------------------------------------------------------------------------------- /tools/Makefile.am: -------------------------------------------------------------------------------- 1 | toolsdir = $(prefix)/tools 2 | libexec_SCRIPTS = driver.pl numatest.pl 3 | bin_SCRIPTS = shocdriver 4 | doc_DATA = compilerVersion.txt buildFlags.txt 5 | 6 | shocdriver: 7 | cat $(srcdir)/shocdriver.in | sed "s,SHOC_LIBEXEC_DIR,@libexecdir@," | sed "s,SHOC_BIN_DIR,@bindir@," > $@ 8 | 9 | compilerVersion.txt: 10 | ${CXX} --version > $@ 11 | 12 | buildFlags.txt: 13 | cp ${top_builddir}/config/common.mk $@ 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/common/InvalidArgValue.h: -------------------------------------------------------------------------------- 1 | #ifndef INVALIDARGVALUE_H 2 | #define INVALIDARGVALUE_H 3 | 4 | #include 5 | #include 6 | 7 | // Exception for command line argument value errors 8 | class InvalidArgValue : public std::runtime_error 9 | { 10 | private: 11 | static std::string GenerateErrorMessage( const std::string& _msg ); 12 | 13 | public: 14 | InvalidArgValue( const std::string& _msg ); 15 | }; 16 | 17 | #endif // INVALIDARGVALUE_H 18 | -------------------------------------------------------------------------------- /src/mpi/common/NodeInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "NodeInfo.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | int main(int argc,char *argv[]) 12 | { 13 | int numtasks, rank, dest, source, rc, count, tag=1, noderank; 14 | char inmsg, outmsg='x'; 15 | MPI_Init(&argc,&argv); 16 | NodeInfo NI; 17 | NI.print(); 18 | MPI_Finalize(); 19 | } 20 | -------------------------------------------------------------------------------- /src/mpi/common/GetMPIType.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef GET_MPI_TYPE_H 3 | #define GET_MPI_TYPE_H 4 | 5 | #include 6 | 7 | inline MPI_Datatype GetMPIType(const float&) { return MPI_FLOAT; } 8 | inline MPI_Datatype GetMPIType(const double&) { return MPI_DOUBLE; } 9 | inline MPI_Datatype GetMPIType(const int&) { return MPI_INT; } 10 | inline MPI_Datatype GetMPIType(const long&) { return MPI_LONG; } 11 | inline MPI_Datatype GetMPIType(const char&) { return MPI_CHAR; } 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /config/conf-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | # do the actual configuration 5 | # 6 | # the configure script looks for CUDA using the PATH, but since OpenCL 7 | # is library based, you have to explicitly specify CPPFLAGS to find 8 | # the OpenCL headers. You may also need to specify LDFLAGS, depending on 9 | # whether the OpenCL libraries are installed in a location searched by 10 | # the linker such as /usr/lib. 11 | # 12 | sh ./configure \ 13 | CPPFLAGS="-I/usr/local/cuda/include" 14 | 15 | 16 | -------------------------------------------------------------------------------- /config/dirtargets.mk.in: -------------------------------------------------------------------------------- 1 | all opencl cuda: 2 | @for dir in $(SUBDIRS); do ${MAKE} -C $$dir $@; done 3 | 4 | clean: 5 | @if test -n "$(SUBDIRS)"; then \ 6 | rev=""; for dir in $(SUBDIRS); do rev="$$dir $$rev"; done; \ 7 | for dir in $$rev; do ${MAKE} -C $$dir $@; done \ 8 | fi 9 | 10 | distclean: 11 | @if test -n "$(SUBDIRS)"; then \ 12 | rev=""; for dir in $(SUBDIRS); do rev="$$dir $$rev"; done; \ 13 | for dir in $$rev; do ${MAKE} -C $$dir $@; done \ 14 | fi 15 | ${RM} Makefile 16 | 17 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/qtc_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _QTC_COMMON_H_ 2 | #define _QTC_COMMON_H_ 3 | 4 | #define GLOBAL_MEMORY 0x0 5 | #define TEXTUR_MEMORY 0x1 6 | #define COMPACT_STORAGE_MATRIX 0x00 7 | #define FULL_STORAGE_MATRIX 0x10 8 | 9 | #ifdef MIN 10 | # undef MIN 11 | #endif 12 | #define MIN(_X, _Y) ( ((_X) < (_Y)) ? (_X) : (_Y) ) 13 | 14 | #ifdef MAX 15 | # undef MAX 16 | #endif 17 | #define MAX(_X, _Y) ( ((_X) > (_Y)) ? (_X) : (_Y) ) 18 | 19 | #define INVALID_POINT_MARKER -42 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /config/conf-valhalla.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # note: "valhalla" is an Ubuntu 12.04 system with gcc 4.6.x 3 | 4 | which nvcc 5 | if (test $? -ne 0); then 6 | echo "Error: no nvcc found. Please set your path:" 7 | echo "export PATH=\"/usr/local/cuda-6.0/bin:\$PATH\"" 8 | echo "export LD_LIBRARY_PATH=\"/usr/local/cuda-6.0/lib64:\$PATH\"" 9 | exit 1 10 | fi 11 | 12 | ./configure \ 13 | CPPFLAGS="-I/usr/local/cuda-6.0/include/" \ 14 | CUDA_CPPFLAGS="-DUSE_CLOCK_GETTIME -gencode=arch=compute_50,code=sm_50" 15 | 16 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/qtclib.h: -------------------------------------------------------------------------------- 1 | #ifndef QTLIB_H 2 | #define QTLIB_H 3 | 4 | #include "OptionParser.h" 5 | 6 | extern int qtcDevice; 7 | 8 | void init(OptionParser& op); 9 | void reduce_card(void *card, int pointCount); 10 | void allocDeviceBuffer(void** bufferp, unsigned long bytes); 11 | void freeDeviceBuffer(void* buffer); 12 | void copyToDevice(void* to_device, void* from_host, unsigned long bytes); 13 | void copyFromDevice(void* to_host, void* from_device, unsigned long bytes); 14 | 15 | #endif // QTLIB_H 16 | -------------------------------------------------------------------------------- /src/opencl/level1/sort/Sort.h: -------------------------------------------------------------------------------- 1 | #ifndef _SORT_H 2 | #define _SORT_H 3 | 4 | static const int SORT_BLOCK_SIZE = 128; 5 | static const int SORT_BITS = 32; 6 | 7 | void radixSortStep(uint nbits, uint startbit, cl_mem counters, 8 | cl_mem countersSum, cl_mem blockOffsets, cl_mem* scanBlockSums, 9 | uint numElements, cl_kernel sortBlocks, cl_kernel findOffsets, 10 | cl_kernel reorder, cl_kernel scan, cl_kernel uniformAdd, 11 | cl_command_queue queue, cl_device_id dev); 12 | 13 | #endif // _SORT_H 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.d 4 | *_cl.cpp 5 | *.buildflags 6 | *Makefile 7 | config.log 8 | config.status 9 | config/common.mk 10 | config/config.h 11 | config/targets.mk 12 | tools/Logs/* 13 | tools/results.csv 14 | autom4te.cache 15 | *.suo 16 | *.sdf 17 | *.opensdf 18 | ipch 19 | bin 20 | libexec 21 | share 22 | 23 | # Ignore some generated files for in-place build 24 | .deps 25 | config/config.mk 26 | config/dirtargets.mk 27 | config/stamp-h1 28 | tools/buildFlags.txt 29 | tools/compilerVersion.txt 30 | tools/shocdriver 31 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/CUDAStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDASTENCILFACTORY_H 2 | #define CUDASTENCILFACTORY_H 3 | 4 | #include "CommonCUDAStencilFactory.h" 5 | 6 | template 7 | class CUDAStencilFactory : public CommonCUDAStencilFactory 8 | { 9 | public: 10 | CUDAStencilFactory( void ) 11 | : CommonCUDAStencilFactory( "CUDAStencil" ) 12 | { 13 | // nothing else to do 14 | } 15 | 16 | virtual Stencil* BuildStencil( const OptionParser& opts ); 17 | }; 18 | 19 | #endif // CUDASTENCILFACTORY_H 20 | 21 | -------------------------------------------------------------------------------- /src/mpi/common/RandomPairs.h: -------------------------------------------------------------------------------- 1 | #ifndef RANDOM_PAIRS_H 2 | #define RANDOM_PAIRS_H 3 | 4 | // **************************************************************************** 5 | // File: RandomPairs.h 6 | // 7 | // Purpose: 8 | // Collective method that pics a random, unique, pair each time called 9 | // 10 | // Programmer: Vinod Tipparaju 11 | // Creation: August 12, 2009 12 | // 13 | // **************************************************************************** 14 | 15 | int RandomPairs(int myrank, int numranks, MPI_Comm new_comm); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/comm.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMM_H_ 2 | #define _COMM_H_ 3 | 4 | #if defined(PARALLEL) 5 | # include "mpi.h" 6 | #endif 7 | 8 | #define COMM_TYPE_INT 0 9 | #define COMM_TYPE_FLOAT 1 10 | 11 | void comm_update_communicator(int cwrank, int active_node_count); 12 | void comm_find_winner(int *max_card, int *winner_node, int *winner_index, int cwrank, int max_index); 13 | void comm_broadcast( void *ptr, int cnt, int type, int source); 14 | void comm_barrier(void); 15 | int comm_get_size(void); 16 | int comm_get_rank(void); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/cuda/level1/md/MD.h: -------------------------------------------------------------------------------- 1 | #ifndef MD_H__ 2 | #define MD_H__ 3 | 4 | #include 5 | #include 6 | 7 | // Problem Constants 8 | static const float cutsq = 16.0f; // Square of cutoff distance 9 | static const int maxNeighbors = 128; // Max number of nearest neighbors 10 | static const double domainEdge = 20.0; // Edge length of the cubic domain 11 | static const float lj1 = 1.5; // LJ constants 12 | static const float lj2 = 2.0; 13 | static const float EPSILON = 0.1f; // Relative Error between CPU/GPU 14 | 15 | #endif // MD_H__ 16 | -------------------------------------------------------------------------------- /data/platforms.csv: -------------------------------------------------------------------------------- 1 | platform,name 2 | cuda32,CUDA 3.2 3 | cuda40,"CUDA 4.0 " 4 | cuda41,CUDA 4.1 5 | cuda50,CUDA 5.0 6 | cuda42,CUDA 4.2 7 | cuda60,CUDA 6.0 8 | cuda65,CUDA 6.5 9 | nvocl32,NV OpenCL 3.2 10 | nvocl40,NV OpenCL 4.0 11 | nvocl41,NV OpenCL 4.1 12 | nvocl42,NV OpenCL 4.2 13 | nvocl50,NV OpenCL 5.0 14 | nvocl60,NV OpenCL 6.0 15 | nvocl65,NV OpenCL 6.5 16 | amdocl24,AMD OpenCL 2.4 17 | amdocl26,AMD OpenCL 2.6 18 | amdocl29,AMD OpenCL 2.9 19 | intel15,Intel OpenCL SDK 1.5 20 | intel44,Intel OpenCL SDK 4.4 21 | intelbgnt1,Intel Beignet 1.0.0 22 | best,Best Available Result 23 | -------------------------------------------------------------------------------- /src/common/PMSMemMgr.h: -------------------------------------------------------------------------------- 1 | #ifndef PMSMEMMGR_H 2 | #define PMSMEMMGR_H 3 | 4 | template 5 | class PMSMemMgr 6 | { 7 | public: 8 | virtual T* AllocHostBuffer( size_t nItems ) = 0; 9 | virtual void ReleaseHostBuffer( T* buf ) = 0; 10 | }; 11 | 12 | 13 | template 14 | class DefaultPMSMemMgr : public PMSMemMgr 15 | { 16 | public: 17 | virtual T* AllocHostBuffer( size_t nItems ) 18 | { 19 | return new T[nItems]; 20 | } 21 | 22 | virtual void ReleaseHostBuffer( T* buf ) 23 | { 24 | delete[] buf; 25 | } 26 | }; 27 | 28 | #endif // PMSMEMMGR_H 29 | -------------------------------------------------------------------------------- /src/stability/Stability.h: -------------------------------------------------------------------------------- 1 | #ifndef STABILITY_H 2 | 3 | void init(OptionParser& op); 4 | void forward(void* work, const int n_tasks); 5 | void inverse(void* work, const int n_tasks); 6 | int check(void* work, void* check, const int half_n_tasks, const int half_n_elts); 7 | unsigned long findAvailBytes(void); 8 | void allocDeviceBuffer(void** bufferp, const unsigned long bytes); 9 | void freeDeviceBuffer(void* buffer); 10 | void copyToDevice(void* to_device, const void* from_host, const unsigned long bytes); 11 | void copyFromDevice(void* to_host, const void* from_device, const unsigned long bytes); 12 | 13 | #endif 14 | 15 | -------------------------------------------------------------------------------- /src/common/InvalidArgValue.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "InvalidArgValue.h" 3 | 4 | std::string 5 | InvalidArgValue::GenerateErrorMessage( const std::string& _msg ) 6 | { 7 | std::ostringstream msgstr; 8 | msgstr << "invalid argument value: "; 9 | if( _msg.length() > 0 ) 10 | { 11 | msgstr << _msg; 12 | } 13 | else 14 | { 15 | msgstr << "no further details available"; 16 | } 17 | return msgstr.str(); 18 | } 19 | 20 | 21 | InvalidArgValue::InvalidArgValue( const std::string& _msg ) 22 | : std::runtime_error( GenerateErrorMessage(_msg) ) 23 | { 24 | // nothing else to do 25 | } 26 | 27 | -------------------------------------------------------------------------------- /src/common/CTimer.h: -------------------------------------------------------------------------------- 1 | #ifndef CTIMER_H 2 | #define CTIMER_H 3 | 4 | // **************************************************************************** 5 | // File: CTimer.h 6 | // 7 | // Purpose: 8 | // C versions to call the critical routines of the Timer class. 9 | // 10 | // Programmer: Jeremy Meredith 11 | // Creation: October 22, 2007 12 | // 13 | // **************************************************************************** 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | int Timer_Start(); 18 | double Timer_Stop(int, const char *); 19 | void Timer_Insert(const char *, double); 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /config/conf-keeneland.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # A "good" set of optimization flags is compiler dependent. 4 | # These might be reasonable flags to start from. 5 | # 6 | # GNU 7 | OPTFLAGS="-g -O2" 8 | 9 | # Intel 10 | #OPTFLAGS="-g -xHOST -O3 -ip -no-prec-div" 11 | #export CXX=icpc 12 | #export CC=icc 13 | 14 | # PGI 15 | #OPTFLAGS="-g -fastsse" 16 | #export CXX=pgcpp 17 | #export CC=pgcc 18 | 19 | 20 | sh ./configure \ 21 | CPPFLAGS="-I/sw/kfs/cuda/4.2/linux_binary/include" \ 22 | CUDA_CPPFLAGS="-gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_20,code=compute_20" \ 23 | CXXFLAGS="$OPTFLAGS" \ 24 | CFLAGS="$OPTFLAGS" \ 25 | LDFLAGS="$OPTFLAGS" 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/common/PMSMemMgmt.h: -------------------------------------------------------------------------------- 1 | #ifndef PMSMEMMGMT_H 2 | #define PMSMEMMGMT_H 3 | 4 | #include 5 | 6 | // Programming Model-Specific Memory Management 7 | // Some programming models for heterogeneous systems provide 8 | // memory management functions for allocating memory on the host 9 | // and on the device. These functions provide an abstract interface 10 | // to that programming-model-specific interface. 11 | 12 | template 13 | T* 14 | pmsAllocHostBuffer( size_t nItems ) 15 | { 16 | return new T[nItems]; 17 | } 18 | 19 | 20 | template 21 | void 22 | pmsFreeHostBuffer( T* buf ) 23 | { 24 | delete[] buf; 25 | } 26 | 27 | #endif // PMSMEMMGMT_H 28 | -------------------------------------------------------------------------------- /config/conf-newark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | # A "good" set of optimization flags is compiler dependent. 5 | # These might be reasonable flags to start from. 6 | # 7 | # GNU 8 | PATH="/opt/cuda/6.0/cuda/bin:$PATH" 9 | OPTFLAGS="-g -O2" 10 | 11 | # Intel 12 | #OPTFLAGS="-g -xHOST -O3 -ip -no-prec-div" 13 | #export CXX=icpc 14 | #export CC=icc 15 | 16 | # PGI 17 | #OPTFLAGS="-g -fastsse" 18 | #export CXX=pgcpp 19 | #export CC=pgcc 20 | 21 | CPPFLAGS="-I/opt/cuda/6.0/cuda/include/" \ 22 | CUDA_CPPFLAGS="-gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_30,code=sm_30" \ 23 | sh ./configure \ 24 | CXXFLAGS="$OPTFLAGS" \ 25 | CFLAGS="$OPTFLAGS" \ 26 | LDFLAGS="$OPTFLAGS" 27 | 28 | -------------------------------------------------------------------------------- /config/conf-atlanta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OCL_ROOT=/opt/AMDAPP 4 | 5 | # A "good" set of optimization flags is compiler dependent. 6 | # These might be reasonable flags to start from. 7 | # 8 | # GNU 9 | OPTFLAGS="-g -O2" 10 | 11 | # Intel 12 | #OPTFLAGS="-g -xHOST -O3 -ip -no-prec-div" 13 | #export CXX=icpc 14 | #export CC=icc 15 | 16 | # PGI 17 | #OPTFLAGS="-g -fastsse" 18 | #export CXX=pgcpp 19 | #export CC=pgcc 20 | 21 | 22 | # do the actual configuration 23 | sh ./configure \ 24 | CPPFLAGS="-I$OCL_ROOT/include" \ 25 | CXXFLAGS="$OPTFLAGS" \ 26 | CFLAGS="$OPTFLAGS" \ 27 | LDFLAGS="$OPTFLAGS -L$OCL_ROOT/lib/x86_64" \ 28 | --without-cuda \ 29 | --disable-stability 30 | 31 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/OpenCLStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef OPENCLSTENCILFACTORY_H 2 | #define OPENCLSTENCILFACTORY_H 3 | 4 | #include "CommonOpenCLStencilFactory.h" 5 | 6 | template 7 | class OpenCLStencilFactory : public CommonOpenCLStencilFactory 8 | { 9 | public: 10 | OpenCLStencilFactory( cl_device_id _dev, 11 | cl_context _ctx, 12 | cl_command_queue _queue ) 13 | : CommonOpenCLStencilFactory( "OpenCLStencil", _dev, _ctx, _queue ) 14 | { 15 | // nothing else to do 16 | } 17 | 18 | virtual Stencil* BuildStencil( const OptionParser& options ); 19 | }; 20 | 21 | #endif // OPENCLSTENCILFACTORY_H 22 | -------------------------------------------------------------------------------- /src/cuda/level1/md/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = MD 20 | 21 | # How to build those programs? 22 | MD_SOURCES = main.cpp 23 | MD_LDADD = MD.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level2/s3d/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = S3D 20 | 21 | # How to build those programs? 22 | S3D_SOURCES = main.cpp 23 | S3D_LDADD = S3D.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/spmv/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Spmv 20 | 21 | # How to build those programs? 22 | Spmv_SOURCES = main.cpp 23 | Spmv_LDADD = Spmv.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/scan/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Scan 20 | 21 | # How to build those programs? 22 | Scan_SOURCES = main.cpp 23 | Scan_LDADD = Scan.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/triad/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Triad 20 | 21 | # How to build those programs? 22 | Triad_SOURCES = main.cpp 23 | Triad_LDADD = Triad.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/common/CUDAPMSMemMgr.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDAPMSMEMMGR_H 2 | #define CUDAPMSMEMMGR_H 3 | 4 | #include 5 | #include "cudacommon.h" 6 | #include 7 | #include 8 | #include "PMSMemMgr.h" 9 | 10 | template 11 | class CUDAPMSMemMgr : public PMSMemMgr 12 | { 13 | public: 14 | virtual T* AllocHostBuffer( size_t nItems ) 15 | { 16 | T* ret = NULL; 17 | size_t nBytes = nItems * sizeof(T); 18 | CUDA_SAFE_CALL(cudaMallocHost((void**)&ret, nBytes)); 19 | return ret; 20 | } 21 | 22 | virtual void ReleaseHostBuffer( T* buf ) 23 | { 24 | CUDA_SAFE_CALL(cudaFreeHost(buf)); 25 | } 26 | }; 27 | 28 | #endif // CUDAPMSMEMMGR_H 29 | -------------------------------------------------------------------------------- /src/cuda/level1/md5hash/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = MD5Hash 20 | 21 | # How to build those programs? 22 | MD5Hash_SOURCES = main.cpp 23 | MD5Hash_LDADD = MD5Hash.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/sort/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Sort 20 | 21 | # How to build those programs? 22 | Sort_SOURCES = main.cpp 23 | Sort_LDADD = Sort.o sort_kernel.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/mpi/contention/README: -------------------------------------------------------------------------------- 1 | The non-multi-threaded version of the contention benchmark 2 | 3 | This splits the process space into processes that do MPI communication and 4 | processes that do GPU communication. The processes that do GPU communication 5 | are equal to then number of devices on the node. 6 | 7 | Node Random Pairs: Forms pairs of nodes first, then pairs processes within the 8 | nodes. For example, if node 0 and 1 have 4 processes each, and 0 and 1 are 9 | paired, then process 0 on node 0 is paired with process 0 on node 1 which is 10 | MPI rank 4 (when nodes are not allocated in a round robin or any other weird 11 | way). Similarly, process 1 on node 0 is paired with process 1 on node 1 which 12 | is MPI rank 5. 13 | -------------------------------------------------------------------------------- /src/opencl/level1/md/MD.h: -------------------------------------------------------------------------------- 1 | #ifndef MD_H__ 2 | #define MD_H__ 3 | 4 | struct float4 { 5 | float x; 6 | float y; 7 | float z; 8 | float w; 9 | }; 10 | 11 | struct double4 { 12 | double x; 13 | double y; 14 | double z; 15 | double w; 16 | }; 17 | 18 | // Problem Constants 19 | static const float cutsq = 16.0f; // Square of cutoff distance 20 | static const int maxNeighbors = 128; // Max number of nearest neighbors 21 | static const double domainEdge = 20.0; // Edge length of the cubic domain 22 | static const float lj1 = 1.5; // LJ constants 23 | static const float lj2 = 2.0; 24 | static const float EPSILON = 0.1f; // Relative Error between CPU/GPU 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/common/NodeIDList.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _NODE_ID_LIST_H 3 | #define _NODE_ID_LIST_H 4 | 5 | #include 6 | #include 7 | 8 | // **************************************************************************** 9 | // Type: NodeIDList 10 | // 11 | // Purpose: 12 | // Defines a list of strings for holding host name information 13 | // associated with a particular system configuration. 14 | // 15 | // Programmer: Gabriel Marin 16 | // Creation: August 25, 2009 17 | // 18 | // Modifications: 19 | // 20 | // **************************************************************************** 21 | 22 | namespace SHOC { 23 | const int MAGIC_KEY_NODE_ID_LIST = 0x1071badc; 24 | typedef std::list NodeIDList; 25 | }; 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/cuda/level1/bfs/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = BFS 20 | 21 | # How to build those programs? 22 | BFS_SOURCES = Graph.cpp \ 23 | main.cpp 24 | BFS_LDADD = BFS.o bfs_kernel.o $(CUDA_LIBS) $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level1/fft/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = FFT 20 | 21 | # How to build those programs? 22 | FFT_SOURCES = FFT.cpp \ 23 | main.cpp 24 | FFT_LDADD = fftlib.o $(CUDA_LIBS) $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level1/reduction/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Reduction 20 | 21 | # How to build those programs? 22 | Reduction_SOURCES = main.cpp 23 | Reduction_LDADD = Reduction.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/gemm/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = GEMM 20 | 21 | # How to build those programs? 22 | GEMM_SOURCES = GEMM.cpp \ 23 | main.cpp 24 | GEMM_LDADD = $(CUDA_LIBS) -lcublas $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level1/md/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = MD 19 | 20 | # How to build those programs? 21 | MD_SOURCES = main.cpp 22 | MD_LDADD = MD.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/cuda/level2/s3d/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = S3D 19 | 20 | # How to build those programs? 21 | S3D_SOURCES = main.cpp 22 | S3D_LDADD = S3D.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/cuda/level1/scan/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = Scan 19 | 20 | # How to build those programs? 21 | Scan_SOURCES = main.cpp 22 | Scan_LDADD = Scan.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/cuda/level1/spmv/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = Spmv 19 | 20 | # How to build those programs? 21 | Spmv_SOURCES = main.cpp 22 | Spmv_LDADD = Spmv.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/cuda/level1/triad/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = Triad 19 | 20 | # How to build those programs? 21 | Triad_SOURCES = main.cpp 22 | Triad_LDADD = Triad.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = QTC 20 | 21 | # How to build those programs? 22 | QTC_SOURCES = comm.cpp \ 23 | libdata.cpp \ 24 | main.cpp 25 | QTC_LDADD = QTC.o $(CUDA_LIBS) $(LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/md5hash/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = MD5Hash 21 | 22 | # how to build those programs 23 | MD5Hash_SOURCES = MD5Hash.cpp md5_cl.cpp main.cpp 24 | MD5Hash_LDADD = $(L1LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/opencl/level1/triad/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = Triad 21 | 22 | # how to build those programs 23 | Triad_SOURCES = Triad.cpp \ 24 | main.cpp 25 | Triad_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/md/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = MD 21 | 22 | # how to build those programs 23 | MD_SOURCES = MD.cpp \ 24 | md_cl.cpp \ 25 | main.cpp 26 | MD_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/cuda/level1/md5hash/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = MD5Hash 19 | 20 | # How to build those programs? 21 | MD5Hash_SOURCES = main.cpp 22 | MD5Hash_LDADD = MD5Hash.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/cuda/level1/sort/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = Sort 19 | 20 | # How to build those programs? 21 | Sort_SOURCES = main.cpp 22 | Sort_LDADD = Sort.o sort_kernel.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/stability/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(top_srcdir)/src/cuda/common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Stability 20 | 21 | # How to build those programs? 22 | Stability_SOURCES = Stability.cpp \ 23 | main.cpp 24 | Stability_LDADD = $(top_builddir)/src/cuda/level1/fft/fftlib.o $(CUDA_LIBS) -lcufft $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level1/reduction/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = Reduction 19 | 20 | # How to build those programs? 21 | Reduction_SOURCES = main.cpp 22 | Reduction_LDADD = Reduction.o $(CUDA_LIBS) $(LIBS) 23 | 24 | -------------------------------------------------------------------------------- /src/opencl/level1/gemm/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = GEMM 21 | 22 | # how to build those programs 23 | GEMM_SOURCES = GEMM.cpp \ 24 | gemmN_cl.cpp \ 25 | main.cpp 26 | GEMM_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/opencl/level1/sort/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = Sort 21 | 22 | # how to build those programs 23 | Sort_SOURCES = Sort.cpp \ 24 | sort_cl.cpp \ 25 | main.cpp 26 | Sort_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/common/HostStencilFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HostStencilFactory.h" 3 | #include "HostStencil.h" 4 | 5 | template 6 | Stencil* 7 | HostStencilFactory::BuildStencil( const OptionParser& options ) 8 | { 9 | // get options for base class 10 | T wCenter; 11 | T wCardinal; 12 | T wDiagonal; 13 | StencilFactory::ExtractOptions( options, wCenter, wCardinal, wDiagonal ); 14 | 15 | return new HostStencil( wCenter, wCardinal, wDiagonal ); 16 | } 17 | 18 | 19 | template 20 | void 21 | HostStencilFactory::CheckOptions( const OptionParser& options ) const 22 | { 23 | // let base class check its options 24 | StencilFactory::CheckOptions( options ); 25 | 26 | // nothing else to do - we add no options 27 | } 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/cuda/level1/bfs/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = BFS 19 | 20 | # How to build those programs? 21 | BFS_SOURCES = Graph.cpp \ 22 | main.cpp 23 | BFS_LDADD = BFS.o bfs_kernel.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/fft/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = FFT 19 | 20 | # How to build those programs? 21 | FFT_SOURCES = FFT.cpp \ 22 | main.cpp 23 | FFT_LDADD = fftlib.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/opencl/level1/scan/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = Scan 21 | 22 | # how to build those programs 23 | Scan_SOURCES = Scan.cpp \ 24 | scan_cl.cpp \ 25 | main.cpp 26 | Scan_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/cuda/level1/gemm/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = GEMM 19 | 20 | # How to build those programs? 21 | GEMM_SOURCES = GEMM.cpp \ 22 | main.cpp 23 | GEMM_LDADD = $(CUDA_LIBS) -lcublas $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level1/neuralnet/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = NeuralNet 19 | 20 | # How to build those programs? 21 | NeuralNet_SOURCES = main.cpp 22 | NeuralNet_LDADD = NeuralNet.o $(CUDA_LIBS) $(LIBS) -lcublas 23 | 24 | -------------------------------------------------------------------------------- /src/opencl/level1/triad/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = Triad 20 | 21 | # how to build those programs 22 | Triad_SOURCES = Triad.cpp \ 23 | main.cpp 24 | Triad_LDADD = $(L1LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level1/scan/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | tpcudadir = $(bindir)/TP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | tpcuda_PROGRAMS = Scan 19 | 20 | # How to build those programs? 21 | Scan_SOURCES = main.cpp \ 22 | tpScan.cpp 23 | Scan_LDADD = tpScanLaunchKernel.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/opencl/level1/fft/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = FFT 21 | 22 | # how to build those programs 23 | FFT_SOURCES = FFT.cpp \ 24 | fftlib.cpp \ 25 | fft_cl.cpp \ 26 | main.cpp 27 | FFT_LDADD = $(L1LIBS) 28 | 29 | -------------------------------------------------------------------------------- /src/opencl/level1/md/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = MD 20 | 21 | # how to build those programs 22 | MD_SOURCES = MD.cpp \ 23 | md_cl.cpp \ 24 | main.cpp 25 | MD_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/gemm/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = GEMM 20 | 21 | # how to build those programs 22 | GEMM_SOURCES = GEMM.cpp \ 23 | gemmN_cl.cpp \ 24 | main.cpp 25 | GEMM_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/scan/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = Scan 20 | 21 | # how to build those programs 22 | Scan_SOURCES = Scan.cpp \ 23 | scan_cl.cpp \ 24 | main.cpp 25 | Scan_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/sort/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = Sort 20 | 21 | # how to build those programs 22 | Sort_SOURCES = Sort.cpp \ 23 | sort_cl.cpp \ 24 | main.cpp 25 | Sort_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/spmv/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) -I$(srcdir)/../../common 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = Spmv 21 | 22 | # how to build those programs 23 | Spmv_SOURCES = Spmv.cpp \ 24 | spmv_cl.cpp \ 25 | main.cpp 26 | Spmv_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/opencl/level1/spmv/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = Spmv 20 | 21 | # how to build those programs 22 | Spmv_SOURCES = Spmv.cpp \ 23 | spmv_cl.cpp \ 24 | main.cpp 25 | Spmv_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/scan/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | tpopencldir = $(bindir)/TP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | tpopencl_PROGRAMS = Scan 20 | 21 | # how to build those programs 22 | Scan_SOURCES = scan_cl.cpp \ 23 | tpScan.cpp \ 24 | main.cpp 25 | Scan_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/cuda/level1/reduction/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | tpcudadir = $(bindir)/TP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | tpcuda_PROGRAMS = Reduction 19 | 20 | # How to build those programs? 21 | Reduction_SOURCES = main.cpp \ 22 | tpReduction.cpp 23 | Reduction_LDADD = tpRedLaunchKernel.o $(CUDA_LIBS) $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | tpcudadir = $(bindir)/TP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | tpcuda_PROGRAMS = QTC 19 | 20 | # How to build those programs? 21 | QTC_SOURCES = comm.cpp \ 22 | libdata.cpp \ 23 | main.cpp 24 | QTC_LDADD = QTC.o $(CUDA_LIBS) $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/opencl/level1/reduction/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = Reduction 21 | 22 | # how to build those programs 23 | Reduction_SOURCES = Reduction.cpp \ 24 | reduction_cl.cpp \ 25 | main.cpp 26 | Reduction_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/cuda/level1/sort/Sort.h: -------------------------------------------------------------------------------- 1 | #ifndef SORT_H_ 2 | #define SORT_H_ 3 | 4 | typedef unsigned int uint; 5 | 6 | static const int SORT_BLOCK_SIZE = 128; 7 | static const int SCAN_BLOCK_SIZE = 256; 8 | static const int SORT_BITS = 32; 9 | 10 | void 11 | radixSortStep(uint nbits, uint startbit, uint4* keys, uint4* values, 12 | uint4* tempKeys, uint4* tempValues, uint* counters, 13 | uint* countersSum, uint* blockOffsets, uint** scanBlockSums, 14 | uint numElements); 15 | 16 | void 17 | scanArrayRecursive(uint* outArray, uint* inArray, int numElements, int level, 18 | uint** blockSums); 19 | 20 | bool 21 | verifySort(uint *keys, uint* vals, const size_t size); 22 | 23 | #ifdef __DEVICE_EMULATION__ 24 | #define __SYNC __syncthreads(); 25 | #else 26 | #define __SYNC ; 27 | #endif 28 | 29 | #endif // SORT_H_ 30 | -------------------------------------------------------------------------------- /src/opencl/level1/bfs/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = BFS 21 | 22 | # how to build those programs 23 | BFS_SOURCES = BFS.cpp \ 24 | bfs_uiuc_spill_cl.cpp \ 25 | bfs_iiit_cl.cpp \ 26 | Graph.cpp \ 27 | main.cpp 28 | BFS_LDADD = $(L1LIBS) 29 | 30 | -------------------------------------------------------------------------------- /src/opencl/level1/fft/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = FFT 20 | 21 | # how to build those programs 22 | FFT_SOURCES = FFT.cpp \ 23 | fftlib.cpp \ 24 | fft_cl.cpp \ 25 | main.cpp 26 | FFT_LDADD = $(L1LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/opencl/level1/md5hash/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = MD5Hash 20 | 21 | # how to build those programs 22 | MD5Hash_SOURCES = MD5Hash.cpp \ 23 | md5_cl.cpp \ 24 | main.cpp 25 | MD5Hash_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/common/BadCommandLine.h: -------------------------------------------------------------------------------- 1 | #ifndef BAD_COMMAND_LINE_H 2 | #define BAD_COMMAND_LINE_H 3 | 4 | #include 5 | 6 | // **************************************************************************** 7 | // Class: BadCommandLine 8 | // 9 | // Purpose: 10 | // Exception for command line parse errors 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: October 28, 2009 14 | // 15 | // **************************************************************************** 16 | struct BadCommandLine : public std::exception 17 | { 18 | // NOTE: current OptionParser implementation prints problems rather 19 | // than leaving it for us to determine how to print, so we have nothing 20 | // else to do. 21 | virtual char const* what( void ) const throw() { return "invalid command line"; } 22 | }; 23 | 24 | #endif // BAD_COMMAND_LINE_H 25 | -------------------------------------------------------------------------------- /src/cuda/level1/fft/fftlib.h: -------------------------------------------------------------------------------- 1 | #ifndef FFTLIB_H 2 | #define FFTLIB_H 3 | 4 | #include "OptionParser.h" 5 | 6 | extern int fftDevice; 7 | 8 | void init(OptionParser& op, const bool do_dp, const int n_ffts); 9 | void forward(void* work, const int n_ffts); 10 | void inverse(void* work, const int n_ffts); 11 | int check(void* work, void* check, const int half_n_ffts, 12 | const int half_n_cmplx); 13 | void allocHostBuffer(void** bufferp, const unsigned long bytes); 14 | void allocDeviceBuffer(void** bufferp, const unsigned long bytes); 15 | void freeHostBuffer(void* buffer); 16 | void freeDeviceBuffer(void* buffer); 17 | void copyToDevice(void* to_device, const void* from_host, 18 | const unsigned long bytes); 19 | void copyFromDevice(void* to_host, const void* from_device, 20 | const unsigned long bytes); 21 | 22 | #endif // FFTLIB_H 23 | -------------------------------------------------------------------------------- /src/opencl/level1/reduction/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = Reduction 20 | 21 | # how to build those programs 22 | Reduction_SOURCES = Reduction.cpp \ 23 | reduction_cl.cpp \ 24 | main.cpp 25 | Reduction_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/stability/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(top_srcdir)/src/cuda/common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = Stability_mpi 19 | 20 | # How to build those programs? 21 | Stability_mpi_SOURCES = Stability.cpp \ 22 | main.cpp 23 | Stability_mpi_LDADD = $(top_builddir)/src/cuda/level1/fft/fftlib.o $(CUDA_LIBS) -lcufft $(LIBS) 24 | 25 | -------------------------------------------------------------------------------- /src/opencl/level1/reduction/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir):$(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | tpopencldir = $(bindir)/TP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | tpopencl_PROGRAMS = Reduction 20 | 21 | # how to build those programs 22 | Reduction_SOURCES = reduction_cl.cpp \ 23 | tpReduction.cpp \ 24 | main.cpp 25 | Reduction_LDADD = $(L1LIBS) 26 | 27 | -------------------------------------------------------------------------------- /src/opencl/level1/bfs/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = BFS 20 | 21 | # how to build those programs 22 | BFS_SOURCES = BFS.cpp \ 23 | bfs_uiuc_spill_cl.cpp \ 24 | bfs_iiit_cl.cpp \ 25 | Graph.cpp \ 26 | main.cpp 27 | BFS_LDADD = $(L1LIBS) 28 | 29 | -------------------------------------------------------------------------------- /src/common/Option.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTION_H 2 | #define OPTION_H 3 | 4 | #include 5 | 6 | using namespace std; 7 | 8 | enum OptionType {OPT_FLOAT, OPT_INT, OPT_STRING, OPT_BOOL, 9 | OPT_VECFLOAT, OPT_VECINT, OPT_VECSTRING}; 10 | 11 | // **************************************************************************** 12 | // Class: Option 13 | // 14 | // Purpose: 15 | // Encapsulation of a single option, to be used by an option parser. 16 | // 17 | // Programmer: Kyle Spafford 18 | // Creation: August 4, 2009 19 | // 20 | // **************************************************************************** 21 | class Option { 22 | 23 | public: 24 | 25 | string longName; 26 | char shortLetter; 27 | string defaultValue; 28 | string value; 29 | OptionType type; 30 | string helpText; 31 | 32 | void print(); 33 | }; 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = Stencil2D 20 | 21 | # How to build those programs? 22 | Stencil2D_SOURCES = CUDAStencil.cpp \ 23 | CommonCUDAStencilFactory.cpp \ 24 | Stencil2Dmain.cpp \ 25 | CUDAStencilFactory.cpp \ 26 | main.cpp 27 | Stencil2D_LDADD = CUDAStencilKernel.o $(CUDA_LIBS) $(LIBS) 28 | 29 | -------------------------------------------------------------------------------- /src/mpi/contention/cuda/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/cuda/level0 6 | 7 | # Which compiler to use? 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) -L$(top_builddir)/src/mpi/common 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = BusCont 19 | 20 | # How to build those programs? 21 | BusCont_SOURCES = CUDADriver.cpp \ 22 | BusCont.cpp \ 23 | bcmain.cpp 24 | BusCont_LDADD = $(top_builddir)/src/cuda/level0/BusSpeedDownload.o -lSHOCCommonMPI $(CUDA_LIBS) $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/CUDAStencil.cpp: -------------------------------------------------------------------------------- 1 | #include "CUDAStencil.h" 2 | 3 | template 4 | CUDAStencil::CUDAStencil( T _wCenter, 5 | T _wCardinal, 6 | T _wDiagonal, 7 | size_t _lRows, 8 | size_t _lCols, 9 | int _device ) 10 | : Stencil( _wCenter, _wCardinal, _wDiagonal ), 11 | lRows( _lRows ), 12 | lCols( _lCols ), 13 | device( _device ) 14 | { 15 | // nothing else to do 16 | } 17 | 18 | template 19 | void 20 | CUDAStencil::DoPreIterationWork( T* currBuf, // in device global memory 21 | T* altBuf, // in device global memory 22 | Matrix2D& mtx, 23 | unsigned int iter ) 24 | { 25 | // in single-process version, nothing for us to do 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/cuda/common/PMSMemMgmt.h: -------------------------------------------------------------------------------- 1 | #ifndef PMSMEMMGMT_H 2 | #define PMSMEMMGMT_H 3 | 4 | #include 5 | #include "cudacommon.h" 6 | #include 7 | #include 8 | 9 | // Programming Model-Specific Memory Management 10 | // Some programming models for heterogeneous systems provide 11 | // memory management functions for allocating memory on the host 12 | // and on the device. These functions provide an abstract interface 13 | // to that programming-model-specific interface. 14 | 15 | template 16 | T* 17 | pmsAllocHostBuffer( size_t nItems ) 18 | { 19 | T* ret = NULL; 20 | size_t nBytes = nItems * sizeof(T); 21 | CUDA_SAFE_CALL(cudaMallocHost(&ret, nBytes)); 22 | return ret; 23 | } 24 | 25 | 26 | template 27 | void 28 | pmsFreeHostBuffer( T* buf ) 29 | { 30 | CUDA_SAFE_CALL(cudaFreeHost(buf)); 31 | } 32 | 33 | #endif // PMSMEMMGMT_H 34 | -------------------------------------------------------------------------------- /src/mpi/contention-mt/cuda/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/cuda/level0 6 | 7 | # Which compiler to use? 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) -L$(top_builddir)/src/mpi/common 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | epcuda_PROGRAMS = MTBusCont 19 | 20 | # How to build those programs? 21 | MTBusCont_SOURCES = CUDADriver.cpp \ 22 | MTBusCont.cpp \ 23 | mtbcmain.cpp 24 | MTBusCont_LDADD = $(top_builddir)/src/cuda/level0/BusSpeedDownload.o -lSHOCCommonMPI $(CUDA_LIBS) $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/mpi/contention-mt/opencl/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/opencl/level0 6 | 7 | # Which compiler to use? 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) -L$(top_builddir)/src/mpi/common 12 | AM_CPPFLAGS = $(OCL_CPPFLAGS) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epopencldir = $(bindir)/EP/OpenCL 16 | 17 | # What programs should be installed to that destination? 18 | epopencl_PROGRAMS = MTBusCont 19 | 20 | # How to build those programs? 21 | MTBusCont_SOURCES = OCLDriver.cpp \ 22 | MTBusCont.cpp \ 23 | mtbcmain.cpp 24 | MTBusCont_LDADD = $(top_builddir)/src/opencl/level0/BusSpeedDownload.o -lSHOCCommonMPI $(OCL_LIBS) $(LIBS) 25 | 26 | -------------------------------------------------------------------------------- /src/common/HostStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef HOSTSTENCILFACTORY_H 2 | #define HOSTSTENCILFACTORY_H 3 | 4 | #include "StencilFactory.h" 5 | 6 | // **************************************************************************** 7 | // Class: HostStencilFactory 8 | // 9 | // Purpose: 10 | // Class to generate stencils for hosts. 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: October 28, 2009 14 | // 15 | // **************************************************************************** 16 | template 17 | class HostStencilFactory: public StencilFactory 18 | { 19 | public: 20 | HostStencilFactory( void ) 21 | : StencilFactory( "HostStencil" ) 22 | { 23 | // nothing else to do 24 | } 25 | 26 | virtual Stencil* BuildStencil( const OptionParser& options ); 27 | virtual void CheckOptions( const OptionParser& options ) const; 28 | }; 29 | 30 | #endif // HOSTSTENCILFACTORY_H 31 | -------------------------------------------------------------------------------- /src/common/StencilUtil.cpp: -------------------------------------------------------------------------------- 1 | #include "StencilUtil.h" 2 | 3 | 4 | template 5 | void 6 | StencilValidater::PrintValidationErrors( std::ostream& s, 7 | const std::vector >& validationErrors, 8 | unsigned int nValErrsToPrint ) const 9 | { 10 | unsigned int nErrorsPrinted = 0; 11 | for( typename std::vector >::const_iterator iter = validationErrors.begin(); 12 | iter != validationErrors.end(); 13 | iter++ ) 14 | { 15 | if( nErrorsPrinted <= nValErrsToPrint ) 16 | { 17 | s << "out[" << iter->i 18 | << "][" << iter->j 19 | << "]=" << iter->val 20 | << ", expected " << iter->exp 21 | << ", relErr " << iter->relErr 22 | << '\n'; 23 | } 24 | nErrorsPrinted++; 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = tpmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = Stencil2D 21 | 22 | # how to build those programs 23 | Stencil2D_SOURCES = Stencil2Dmain.cpp \ 24 | OpenCLStencil.cpp \ 25 | OpenCLStencilFactory.cpp \ 26 | CommonOpenCLStencilFactory.cpp \ 27 | stencil2d_cl.cpp \ 28 | main.cpp 29 | Stencil2D_LDADD = $(L1LIBS) 30 | 31 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use to build and link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | tpcudadir = $(bindir)/TP/CUDA 16 | 17 | # What programs should be installed to that destination? 18 | tpcuda_PROGRAMS = Stencil2D 19 | 20 | # How to build those programs? 21 | Stencil2D_SOURCES = CUDAStencil.cpp \ 22 | CommonCUDAStencilFactory.cpp \ 23 | Stencil2Dmain.cpp \ 24 | CUDAStencilFactory.cpp \ 25 | main.cpp 26 | Stencil2D_LDADD = CUDAStencilKernel.o $(CUDA_LIBS) $(LIBS) 27 | 28 | -------------------------------------------------------------------------------- /src/common/HostStencil.h: -------------------------------------------------------------------------------- 1 | #ifndef HOSTSTENCIL_H 2 | #define HOSTSTENCIL_H 3 | 4 | #include "Stencil.h" 5 | 6 | // **************************************************************************** 7 | // Class: HostStencil 8 | // 9 | // Purpose: 10 | // Stencils for hosts. 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: October 28, 2009 14 | // 15 | // **************************************************************************** 16 | template 17 | class HostStencil : public Stencil 18 | { 19 | protected: 20 | virtual void DoPreIterationWork( Matrix2D& mtx, unsigned int iter ); 21 | 22 | public: 23 | HostStencil( T wCenter, 24 | T wCardinal, 25 | T wDiagonal ) 26 | : Stencil( wCenter, wCardinal, wDiagonal ) 27 | { 28 | // nothing else to do 29 | } 30 | 31 | virtual void operator()( Matrix2D&, unsigned int nIters ); 32 | }; 33 | 34 | #endif /* HOSTSTENCIL_H */ 35 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/README.txt: -------------------------------------------------------------------------------- 1 | 2 | A relatively naive 9-point stencil operation over a 2D array. The result 3 | computed via OpenCL is compared against the result computed on the host CPU. 4 | 5 | In the OpenCL implementation, a thread block copies data from the array in 6 | device global memory to shared memory with a 1-element-wide halo, and each 7 | thread in the thread block computes one data point of the array. 8 | 9 | Double buffering in device global memory is used to avoid problems with mixing 10 | 'new' and 'old' array data, since there no synchronization across thread 11 | blocks is available in the device code. 12 | 13 | The number of iterations, the weights used in the stencil operation, and the 14 | dimensions of the 2D array are all user configurable. 15 | 16 | Better performance is likely possible. Some potential optimizations include: 17 | having a thread compute multiple data points; reducing the branching logic in 18 | the code that loads shared memory. 19 | 20 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/MPICUDAStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef MPICUDASTENCILFACTORY_H 2 | #define MPICUDASTENCILFACTORY_H 3 | 4 | #include "CommonCUDAStencilFactory.h" 5 | 6 | // **************************************************************************** 7 | // Class: MPICUDAStencilFactory 8 | // 9 | // Purpose: 10 | // MPI implementation of the CUDA stencil factory. 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: November 5, 2009 14 | // 15 | // **************************************************************************** 16 | template 17 | class MPICUDAStencilFactory : public CommonCUDAStencilFactory 18 | { 19 | public: 20 | MPICUDAStencilFactory( void ) 21 | : CommonCUDAStencilFactory( "MPICUDAStencil" ) 22 | { 23 | // nothing else to do 24 | } 25 | 26 | virtual Stencil* BuildStencil( const OptionParser& options ); 27 | virtual void CheckOptions( const OptionParser& options ) const; 28 | }; 29 | 30 | #endif // MPICUDASTENCILFACTORY_H 31 | -------------------------------------------------------------------------------- /src/mpi/common/MPIHostStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef MPIHOSTSTENCILFACTORY_H 2 | #define MPIHOSTSTENCILFACTORY_H 3 | 4 | #include "StencilFactory.h" 5 | 6 | // **************************************************************************** 7 | // Class: MPIHostStencilFactory 8 | // 9 | // Purpose: 10 | // Class to generate stencils for MPI Hosts. 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: November 5, 2009 14 | // 15 | // **************************************************************************** 16 | template 17 | class MPIHostStencilFactory: public StencilFactory 18 | { 19 | public: 20 | MPIHostStencilFactory( void ) 21 | : StencilFactory( "MPIHostStencil" ) 22 | { 23 | // nothing else to do 24 | } 25 | 26 | virtual Stencil* BuildStencil( const OptionParser& options ); 27 | virtual void AddOptions( OptionParser& odesc ) const; 28 | virtual void CheckOptions( const OptionParser& options ) const; 29 | }; 30 | 31 | #endif // MPIHOSTSTENCILFACTORY_H 32 | -------------------------------------------------------------------------------- /src/common/SerializableObject.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _SERIALIZABLE_OBJECT_H 3 | #define _SERIALIZABLE_OBJECT_H 4 | 5 | #include 6 | #include 7 | 8 | // **************************************************************************** 9 | // Class: SerializableObject 10 | // 11 | // Purpose: 12 | // Abstract class with two pure virtual methods for serializing and 13 | // unserializing an object to string. 14 | // 15 | // Notes: 16 | // All Devices, Platforms, Node Containers and Multi-Node Containers 17 | // that are sent over the network must implement this interface. 18 | // 19 | // Programmer: Gabriel Marin 20 | // Creation: August 21, 2009 21 | // 22 | // Modifications: 23 | // 24 | // **************************************************************************** 25 | class SerializableObject 26 | { 27 | public: 28 | SerializableObject() {} 29 | virtual void writeObject (std::ostringstream &oss) const = 0; 30 | virtual void readObject (std::istringstream &iss) = 0; 31 | }; 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/cuda/level1/reduction/TPReduction.h: -------------------------------------------------------------------------------- 1 | #ifndef TPREDUCTION_H_ 2 | #define TPREDUCTION_H_ 3 | 4 | #include "mpi.h" 5 | 6 | template 7 | void RunTestLaunchKernel(int num_blocks, 8 | int num_threads, 9 | int smem_size, 10 | T* d_idata, 11 | T* d_odata, 12 | int size ); 13 | 14 | // Template specializations for MPI allreduce call. 15 | template 16 | inline void globalReduction(T* local_result, T* global_result); 17 | 18 | template <> 19 | inline void globalReduction(float* local_result, float* global_result) 20 | { 21 | MPI_Allreduce(local_result, global_result, 1, MPI_FLOAT, 22 | MPI_SUM, MPI_COMM_WORLD); 23 | } 24 | 25 | template <> 26 | inline void globalReduction(double* local_result, double* global_result) 27 | { 28 | MPI_Allreduce(local_result, global_result, 1, MPI_DOUBLE, 29 | MPI_SUM, MPI_COMM_WORLD); 30 | } 31 | 32 | #endif // TPREDUCTION_H_ 33 | -------------------------------------------------------------------------------- /src/opencl/level1/scan/TPScan.h: -------------------------------------------------------------------------------- 1 | #ifndef __TPSCAN_H 2 | #define __TPSCAN_H 3 | 4 | // When using MPICH and MPICH-derived MPI implementations, there is a 5 | // naming conflict between stdio.h and MPI's C++ binding. 6 | // Since we do not use the C++ MPI binding, we can avoid the ordering 7 | // issue by ignoring the C++ MPI binding headers. 8 | // This #define should be quietly ignored when using other MPI implementations. 9 | #define MPICH_SKIP_MPICXX 10 | #include "mpi.h" 11 | 12 | // Templated wrapper for MPI_Exscan 13 | template 14 | inline void globalExscan(T* local_result, T* global_result); 15 | 16 | template <> 17 | inline void globalExscan(float* local_result, float* global_result) 18 | { 19 | MPI_Exscan(local_result, global_result, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); 20 | } 21 | 22 | template <> 23 | inline void globalExscan(double* local_result, double* global_result) 24 | { 25 | MPI_Exscan(local_result, global_result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 26 | } 27 | 28 | #endif // __TPSCAN_H 29 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/tpmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | tpopencldir = $(bindir)/TP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | tpopencl_PROGRAMS = Stencil2D 20 | 21 | # how to build those programs 22 | Stencil2D_SOURCES = OpenCLStencil.cpp \ 23 | OpenCLStencilFactory.cpp \ 24 | CommonOpenCLStencilFactory.cpp \ 25 | stencil2d_cl.cpp \ 26 | Stencil2Dmain.cpp \ 27 | MPIOpenCLStencil.cpp \ 28 | MPIOpenCLStencilFactory.cpp \ 29 | main.cpp 30 | Stencil2D_LDADD = $(L1LIBS) 31 | 32 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | 2 | The Scalable HeterOgeneous Computing (SHOC) benchmark suite is a 3 | collection of benchmark programs testing the performance and 4 | stability of systems using computing devices with non-traditional architectures 5 | for general purpose computing. Its initial focus is on systems containing 6 | Graphics Processing Units (GPUs) and multi-core processors, and on the 7 | OpenCL programming standard. It can be used on clusters as well as individual 8 | hosts. 9 | 10 | Documentation on configuring, building, and running the SHOC benchmark 11 | programs is contained in the SHOC user manual, in the doc subdirectory 12 | of the SHOC source code tree. The file INSTALL.txt contains a sketch of 13 | those instructions for rapid installation. 14 | 15 | Installation should be familiar to anyone who is experienced with configure 16 | and make, see the config directory for some examples. Also, if your 17 | platform requires regenerating the configure script, see build-aux/bootstrap.sh 18 | and the manual for more details. 19 | 20 | Last update: 2014-04-13 15:39:22 kspaff 21 | 22 | -------------------------------------------------------------------------------- /src/cuda/level1/sort/sort_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef SORT_KERNEL_H_ 2 | #define SORT_KERNEL_H_ 3 | 4 | #include 5 | 6 | #define WARP_SIZE 32 7 | #define SORT_BLOCK_SIZE 128 8 | #define SCAN_BLOCK_SIZE 256 9 | 10 | typedef unsigned int uint; 11 | 12 | __global__ void radixSortBlocks(uint nbits, uint startbit, uint4* keysOut, 13 | uint4* valuesOut, uint4* keysIn, uint4* valuesIn); 14 | 15 | __global__ void findRadixOffsets(uint2* keys, uint* counters, 16 | uint* blockOffsets, uint startbit, uint numElements, uint totalBlocks); 17 | 18 | __global__ void reorderData(uint startbit, uint *outKeys, uint *outValues, 19 | uint2 *keys, uint2 *values, uint *blockOffsets, uint *offsets, 20 | uint *sizes, uint totalBlocks); 21 | 22 | // Scan Kernels 23 | __global__ void vectorAddUniform4(uint *d_vector, const uint *d_uniforms, 24 | const int n); 25 | 26 | __global__ void scan(uint *g_odata, uint *g_idata, uint *g_blockSums, 27 | const int n, const bool fullBlock, const bool storeSum); 28 | 29 | #endif // SORT_KERNEL_H_ 30 | -------------------------------------------------------------------------------- /src/common/ValidateMatrix2D.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "ValidateMatrix2D.h" 4 | 5 | 6 | template 7 | std::vector > 8 | Validate::operator()( const Matrix2D& s, const Matrix2D& t ) 9 | { 10 | std::vector > ret; 11 | 12 | // ensure matrices are same shape 13 | assert( (s.GetNumRows() == t.GetNumRows()) && (s.GetNumColumns() == t.GetNumColumns()) ); 14 | 15 | for( unsigned int i = 0; i < s.GetNumRows(); i++ ) 16 | { 17 | for( unsigned int j = 0; j < s.GetNumColumns(); j++ ) 18 | { 19 | T expVal = s.GetConstData()[i][j]; 20 | T actualVal = t.GetConstData()[i][j]; 21 | T delta = fabsf( actualVal - expVal ); 22 | T relError = (expVal != 0.0f) ? delta / expVal : 0.0f; 23 | 24 | if( relError > relErrThreshold ) 25 | { 26 | ret.push_back( ValidationErrorInfo( i, j, actualVal, expVal, relError ) ); 27 | } 28 | } 29 | } 30 | 31 | return ret; 32 | } 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/cuda/level0/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | 5 | SUBDIRS = $(MAYBE_MPI) 6 | 7 | include $(top_builddir)/config/config.mk 8 | include $(top_builddir)/config/targets.mk 9 | 10 | # How to find source files 11 | VPATH = $(srcdir):$(srcdir)/../common 12 | 13 | AM_LDFLAGS = $(CUDA_LDFLAGS) 14 | AM_CPPFLAGS = $(CUDA_INC) 15 | 16 | # What is the destination for programs built from this directory? 17 | serialcudadir = $(bindir)/Serial/CUDA 18 | 19 | # What programs should be installed in the bin/Serial/CUDA destination? 20 | serialcuda_PROGRAMS = BusSpeedDownload \ 21 | BusSpeedReadback \ 22 | DeviceMemory \ 23 | MaxFlops 24 | 25 | BusSpeedDownload_SOURCES = main.cpp 26 | BusSpeedDownload_LDADD = BusSpeedDownload.o $(CUDA_LIBS) $(LIBS) 27 | 28 | BusSpeedReadback_SOURCES = main.cpp 29 | BusSpeedReadback_LDADD = BusSpeedReadback.o $(CUDA_LIBS) $(LIBS) 30 | 31 | DeviceMemory_SOURCES = main.cpp 32 | DeviceMemory_LDADD = DeviceMemory.o $(CUDA_LIBS) $(LIBS) 33 | 34 | MaxFlops_SOURCES = main.cpp 35 | MaxFlops_LDADD = MaxFlops.o $(CUDA_LIBS) $(LIBS) 36 | 37 | -------------------------------------------------------------------------------- /src/cuda/level1/reduction/tpRedLaunchKernel.cu: -------------------------------------------------------------------------------- 1 | #include "reduction_kernel.h" 2 | 3 | template 4 | void 5 | RunTestLaunchKernel( int num_blocks, 6 | int num_threads, 7 | int smem_size, 8 | T* d_idata, 9 | T* d_odata, 10 | int size ) 11 | { 12 | // In CUDA 4.0 we will be able to remove this level of indirection 13 | // if we use the cuConfigureCall and cuLaunchKernel functions. 14 | reduce<<>>(d_idata, d_odata, size); 15 | } 16 | 17 | 18 | // ensure that the template functions are instantiated 19 | // Unlike the Stencil2D CUDA version that needs to instantiate objects, 20 | // we need to instantiate template functions. Declaration of the needed 21 | // specializations seem to work for several recent versions of g++ that 22 | // people are likely to be using underneath nvcc. 23 | template void RunTestLaunchKernel( int, int, int, float*, float*, int ); 24 | template void RunTestLaunchKernel( int, int, int, double*, double*, int ); 25 | 26 | -------------------------------------------------------------------------------- /src/cuda/level0/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../common 6 | 7 | # Which compiler to use to link 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(CUDA_LDFLAGS) 12 | AM_CPPFLAGS = $(CUDA_INC) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epcudadir = $(bindir)/EP/CUDA 16 | 17 | # What programs should be installed in the destination? 18 | epcuda_PROGRAMS = BusSpeedDownload \ 19 | BusSpeedReadback \ 20 | DeviceMemory \ 21 | MaxFlops 22 | 23 | BusSpeedDownload_SOURCES = main.cpp 24 | BusSpeedDownload_LDADD = BusSpeedDownload.o $(CUDA_LIBS) $(LIBS) 25 | 26 | BusSpeedReadback_SOURCES = main.cpp 27 | BusSpeedReadback_LDADD = BusSpeedReadback.o $(CUDA_LIBS) $(LIBS) 28 | 29 | DeviceMemory_SOURCES = main.cpp 30 | DeviceMemory_LDADD = DeviceMemory.o $(CUDA_LIBS) $(LIBS) 31 | 32 | MaxFlops_SOURCES = main.cpp 33 | MaxFlops_LDADD = MaxFlops.o $(CUDA_LIBS) $(LIBS) 34 | 35 | -------------------------------------------------------------------------------- /config/conf-crossarm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Configure SHOC for cross-compilation using ARM cross compilers. 5 | # Gives an example of how to cross compile, should be adaptable to 6 | # other cross compilation targets. 7 | # 8 | # Assumes we are using CodeSourcery Lite ARM cross compilers. 9 | # Assumes cross-compilers, cross-linkers, etc. are in the PATH. 10 | # Assumes CodeSourcery sysroot is in /opt/libc. 11 | # 12 | # Assumes no CUDA support on target system. 13 | # 14 | # Since OpenCL is library based, you have to explicitly specify CPPFLAGS to 15 | # find the OpenCL headers. You may also need to specify LDFLAGS, depending on 16 | # whether the OpenCL libraries are installed in a location searched by 17 | # the linker such as /usr/lib. 18 | # 19 | # Does not (yet?) support MPI. 20 | # 21 | sh ./configure \ 22 | CPPFLAGS="-I$HOME/private/Projects/ARM/ARM-OpenCL-1.1/include" \ 23 | LDFLAGS="-L$HOME/private/Projects/ARM/ARM-OpenCL-1.1/lib -Wl,-rpath=/opt/libc/lib:/opt/libc/usr/lib -Wl,--dynamic-linker=/opt/libc/lib/ld-linux.so.3" \ 24 | --host=arm-none-linux-gnueabi \ 25 | --with-opencl \ 26 | --without-cuda \ 27 | --without-mpi 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/OpenCLStencilFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "OpenCLStencilFactory.h" 5 | #include "OpenCLStencil.h" 6 | #include "OpenCLDeviceInfo.h" 7 | 8 | 9 | 10 | template 11 | Stencil* 12 | OpenCLStencilFactory::BuildStencil( const OptionParser& options ) 13 | { 14 | // get options for base class 15 | T wCenter; 16 | T wCardinal; 17 | T wDiagonal; 18 | size_t lRows; 19 | size_t lCols; 20 | this->ExtractOptions( options, 21 | wCenter, 22 | wCardinal, 23 | wDiagonal, 24 | lRows, 25 | lCols ); 26 | 27 | // build the stencil object 28 | return new OpenCLStencil( wCenter, 29 | wCardinal, 30 | wDiagonal, 31 | lRows, 32 | lCols, 33 | this->dev, 34 | this->ctx, 35 | this->queue ); 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/MPIOpenCLStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef MPIOPENCLSTENCILFACTORY_H 2 | #define MPIOPENCLSTENCILFACTORY_H 3 | 4 | #include "CommonOpenCLStencilFactory.h" 5 | 6 | // **************************************************************************** 7 | // Class: MPIOpenCLStencilFactory 8 | // 9 | // Purpose: 10 | // MPI implementation of the OpenCL stencil factory. 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: November 5, 2009 14 | // 15 | // **************************************************************************** 16 | template 17 | class MPIOpenCLStencilFactory : public CommonOpenCLStencilFactory 18 | { 19 | public: 20 | MPIOpenCLStencilFactory( cl_device_id _dev, 21 | cl_context _ctx, 22 | cl_command_queue _queue ) 23 | : CommonOpenCLStencilFactory( "MPIOpenCLStencil", _dev, _ctx, _queue ) 24 | { 25 | // nothing else to do 26 | } 27 | 28 | virtual Stencil* BuildStencil( const OptionParser& options ); 29 | virtual void CheckOptions( const OptionParser& options ) const; 30 | }; 31 | 32 | #endif // MPIOPENCLSTENCILFACTORY_H 33 | -------------------------------------------------------------------------------- /src/common/Graph.h: -------------------------------------------------------------------------------- 1 | #define MAX_LINE_LENGTH 500000 2 | 3 | class Graph 4 | { 5 | unsigned int num_verts; 6 | unsigned int num_edges; 7 | unsigned int adj_list_length; 8 | unsigned int *edge_offsets; 9 | unsigned int *edge_list; 10 | unsigned int *edge_costs; 11 | unsigned int max_degree; 12 | int graph_type; 13 | 14 | bool if_delete_arrays; 15 | 16 | void SetAllCosts(unsigned int c); 17 | public: 18 | Graph(); 19 | ~Graph(); 20 | void LoadMetisGraph(const char *filename); 21 | void SaveMetisGraph(const char *filename); 22 | unsigned int GetNumVertices(); 23 | unsigned int GetNumEdges(); 24 | unsigned int GetMaxDegree(); 25 | 26 | unsigned int *GetEdgeOffsets(); 27 | unsigned int *GetEdgeList(); 28 | unsigned int *GetEdgeCosts(); 29 | 30 | unsigned int **GetEdgeOffsetsPtr(); 31 | unsigned int **GetEdgeListPtr(); 32 | unsigned int **GetEdgeCostsPtr(); 33 | 34 | unsigned int *GetVertexLengths(unsigned int *cost,unsigned int source); 35 | int GetMetisGraphType(); 36 | unsigned int GetAdjacencyListLength(); 37 | void GenerateSimpleKWayGraph(unsigned int verts,unsigned int degree); 38 | }; 39 | -------------------------------------------------------------------------------- /src/common/SerialStencilUtil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "SerialStencilUtil.h" 4 | #include "ValidateMatrix2D.h" 5 | 6 | 7 | template 8 | void 9 | SerialStencilValidater::ValidateResult( const Matrix2D& exp, 10 | const Matrix2D& data, 11 | double valErrThreshold, 12 | unsigned int nValErrsToPrint ) const 13 | { 14 | Validate val( valErrThreshold ); 15 | std::vector > validationErrors = val( exp, data ); 16 | std::ostringstream valResultStr; 17 | 18 | valResultStr << validationErrors.size() << " validation errors"; 19 | if( (validationErrors.size() > 0) && (nValErrsToPrint > 0) ) 20 | { 21 | this->PrintValidationErrors( valResultStr, validationErrors, nValErrsToPrint ); 22 | } 23 | std::cout << valResultStr.str() << std::endl; 24 | } 25 | 26 | 27 | 28 | // Modifications: 29 | // Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 30 | // Split timing reports into detailed and summary. For 31 | // serial code, we report all trial values. 32 | // 33 | void 34 | SerialStencilTimingReporter::ReportTimings( ResultDatabase& resultDB ) const 35 | { 36 | resultDB.DumpDetailed( std::cout ); 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/mpi/common/MPIHostStencil.h: -------------------------------------------------------------------------------- 1 | #ifndef MPIHOSTSTENCIL_H 2 | #define MPIHOSTSTENCIL_H 3 | 4 | #include 5 | #include "mpi.h" 6 | #include "HostStencil.h" 7 | #include "MPI2DGridProgram.h" 8 | 9 | 10 | // **************************************************************************** 11 | // Class: MPIHostStencil 12 | // 13 | // Purpose: 14 | // Stencils for MPI hosts. 15 | // 16 | // Programmer: Phil Roth 17 | // Creation: November 5, 2009 18 | // 19 | // **************************************************************************** 20 | template 21 | class MPIHostStencil : public HostStencil, public MPI2DGridProgram 22 | { 23 | private: 24 | std::ofstream ofs; 25 | bool dumpData; 26 | 27 | protected: 28 | virtual void DoPreIterationWork( Matrix2D& mtx, unsigned int iter ); 29 | 30 | public: 31 | MPIHostStencil( T wCenter, 32 | T wCardinal, 33 | T wDiagonal, 34 | size_t mpiGridRows, 35 | size_t mpiGridCols, 36 | unsigned int nItersPerHaloExchange, 37 | bool dumpData = false ); 38 | 39 | virtual void operator()( Matrix2D&, unsigned int nIters ); 40 | }; 41 | 42 | #endif /* MPIHOSTSTENCIL_H */ 43 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/CUDAStencil.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDASTENCIL_H 2 | #define CUDASTENCIL_H 3 | 4 | #include "Stencil.h" 5 | 6 | // **************************************************************************** 7 | // Class: CUDAStencil 8 | // 9 | // Purpose: 10 | // CUDA implementation of 9-point stencil. 11 | // 12 | // Programmer: Phil Roth 13 | // Creation: October 28, 2009 14 | // 15 | // **************************************************************************** 16 | template 17 | class CUDAStencil : public Stencil 18 | { 19 | private: 20 | size_t lRows; 21 | size_t lCols; 22 | int device; 23 | 24 | protected: 25 | virtual void DoPreIterationWork( T* currBuf, // in device global memory 26 | T* altBuf, // in device global memory 27 | Matrix2D& mtx, 28 | unsigned int iter ); 29 | 30 | public: 31 | CUDAStencil( T _wCenter, 32 | T _wCardinal, 33 | T _wDiagonal, 34 | size_t _lRows, 35 | size_t _lCols, 36 | int _device ); 37 | 38 | virtual void operator()( Matrix2D&, unsigned int nIters ); 39 | }; 40 | 41 | #endif /* CUDASTENCIL_H */ 42 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/CommonCUDAStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMONCUDASTENCILFACTORY_H 2 | #define COMMONCUDASTENCILFACTORY_H 3 | 4 | #include 5 | #include "StencilFactory.h" 6 | 7 | // **************************************************************************** 8 | // Class: CommonCUDAStencilFactory 9 | // 10 | // Purpose: 11 | // CUDA implementation of stencil factory. 12 | // 13 | // Programmer: Phil Roth 14 | // Creation: October 28, 2009 15 | // 16 | // **************************************************************************** 17 | template 18 | class CommonCUDAStencilFactory : public StencilFactory 19 | { 20 | protected: 21 | void ExtractOptions( const OptionParser& options, 22 | T& wCenter, 23 | T& wCardinal, 24 | T& wDiagonal, 25 | size_t& lRows, 26 | size_t& lCols, 27 | std::vector& devices ); 28 | 29 | public: 30 | CommonCUDAStencilFactory( std::string _sname ) 31 | : StencilFactory( _sname ) 32 | { 33 | // nothing else to do 34 | } 35 | 36 | virtual void CheckOptions( const OptionParser& opts ) const; 37 | }; 38 | 39 | #endif // COMMONCUDASTENCILFACTORY_H 40 | 41 | -------------------------------------------------------------------------------- /src/opencl/level2/s3d/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(OCL_CPPFLAGS) 14 | L1LIBS = -lSHOCCommonOpenCL -lSHOCCommon $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the destination? 20 | serialopencl_PROGRAMS = S3D 21 | 22 | # how to build those programs 23 | S3D_SOURCES = S3D.cpp \ 24 | main.cpp \ 25 | gr_base_cl.cpp \ 26 | qssa_cl.cpp \ 27 | qssab_cl.cpp \ 28 | qssa2_cl.cpp \ 29 | ratt_cl.cpp \ 30 | ratt2_cl.cpp \ 31 | ratt3_cl.cpp \ 32 | ratt4_cl.cpp \ 33 | ratt5_cl.cpp \ 34 | ratt6_cl.cpp \ 35 | ratt7_cl.cpp \ 36 | ratt8_cl.cpp \ 37 | ratt9_cl.cpp \ 38 | ratt10_cl.cpp \ 39 | ratx_cl.cpp \ 40 | ratxb_cl.cpp \ 41 | ratx2_cl.cpp \ 42 | ratx4_cl.cpp \ 43 | rdsmh_cl.cpp \ 44 | rdwdot_cl.cpp \ 45 | rdwdot2_cl.cpp \ 46 | rdwdot3_cl.cpp \ 47 | rdwdot6_cl.cpp \ 48 | rdwdot7_cl.cpp \ 49 | rdwdot8_cl.cpp \ 50 | rdwdot9_cl.cpp \ 51 | rdwdot10_cl.cpp 52 | S3D_LDADD = $(L1LIBS) 53 | 54 | -------------------------------------------------------------------------------- /src/common/InitializeMatrix2D.h: -------------------------------------------------------------------------------- 1 | #ifndef INITIALIZE_H 2 | #define INITIALIZE_H 3 | 4 | #include 5 | #include "Matrix2D.h" 6 | 7 | // **************************************************************************** 8 | // Class: Initialize 9 | // 10 | // Purpose: 11 | // Initialize 2D matrices. 12 | // 13 | // Programmer: Phil Roth 14 | // Creation: October 28, 2009 15 | // 16 | // **************************************************************************** 17 | template 18 | class Initialize : public std::unary_function&, void> 19 | { 20 | private: 21 | long seed; 22 | unsigned int haloWidth; // width of halo 23 | T haloVal; // value to use for halo 24 | int rowPeriod; // period for row values 25 | int colPeriod; // period for column values 26 | 27 | public: 28 | Initialize( long int _seed, 29 | unsigned int _halo = 1, 30 | T _haloVal = 0, 31 | int _rowPeriod = -1, 32 | int _colPeriod = -1 ) 33 | : seed( _seed ), 34 | haloWidth( _halo ), 35 | haloVal( _haloVal ), 36 | rowPeriod( _rowPeriod ), 37 | colPeriod( _colPeriod ) 38 | { 39 | // nothing else to do 40 | } 41 | 42 | void operator()( Matrix2D& mtx ); 43 | }; 44 | 45 | #endif // INITIALIZE_H 46 | -------------------------------------------------------------------------------- /src/opencl/level2/s3d/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../../common:$(srcdir)/../../../../common 6 | 7 | # Which compiler to use 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) 12 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I${top_srcdir}/src/mpi/common 13 | L1LIBS = -lSHOCCommonOpenCL -lSHOCCommon $(OCL_LIBS) $(LIBS) 14 | 15 | # what is the destination for programs built from this directory? 16 | epopencldir = $(bindir)/EP/OpenCL 17 | 18 | # what programs should be installed in the destination? 19 | epopencl_PROGRAMS = S3D 20 | 21 | # how to build those programs 22 | S3D_SOURCES = S3D.cpp \ 23 | gr_base_cl.cpp \ 24 | qssa_cl.cpp \ 25 | qssab_cl.cpp \ 26 | qssa2_cl.cpp \ 27 | ratt_cl.cpp \ 28 | ratt2_cl.cpp \ 29 | ratt3_cl.cpp \ 30 | ratt4_cl.cpp \ 31 | ratt5_cl.cpp \ 32 | ratt6_cl.cpp \ 33 | ratt7_cl.cpp \ 34 | ratt8_cl.cpp \ 35 | ratt9_cl.cpp \ 36 | ratt10_cl.cpp \ 37 | ratx_cl.cpp \ 38 | ratxb_cl.cpp \ 39 | ratx2_cl.cpp \ 40 | ratx4_cl.cpp \ 41 | rdsmh_cl.cpp \ 42 | rdwdot_cl.cpp \ 43 | rdwdot2_cl.cpp \ 44 | rdwdot3_cl.cpp \ 45 | rdwdot6_cl.cpp \ 46 | rdwdot7_cl.cpp \ 47 | rdwdot8_cl.cpp \ 48 | rdwdot9_cl.cpp \ 49 | rdwdot10_cl.cpp \ 50 | main.cpp 51 | S3D_LDADD = $(L1LIBS) 52 | 53 | -------------------------------------------------------------------------------- /src/cuda/level1/neuralnet/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | SUBDIRS = $(MAYBE_MPI) 5 | 6 | include $(top_builddir)/config/config.mk 7 | include $(top_builddir)/config/targets.mk 8 | 9 | # How to find source files 10 | VPATH = $(srcdir):$(srcdir)/../../common:$(srcdir)/../../../common 11 | 12 | AM_LDFLAGS = $(CUDA_LDFLAGS) 13 | AM_CPPFLAGS = $(CUDA_INC) 14 | 15 | # What is the destination for programs built from this directory? 16 | serialcudadir = $(bindir)/Serial/CUDA 17 | 18 | # What programs should be installed to that destination? 19 | serialcuda_PROGRAMS = NeuralNet 20 | 21 | # How to build those programs? 22 | NeuralNet_SOURCES = main.cpp 23 | NeuralNet_LDADD = NeuralNet.o $(CUDA_LIBS) $(LIBS) -lcublas 24 | 25 | # Unzip and move data to the bin directory, if supported 26 | if DATA_UNZIP 27 | #Adding these lines would cause automake to execute these lines 28 | #with install-data flags but this requires listing each file 29 | #separately; instead we use a custom command below 30 | #data_DATA = nn_data 31 | #nn_data: $(srcdir)/nn_data.zip 32 | # $(UNZIP) -o $(srcdir)/nn_data.zip 33 | # cp -prf nn_data $(bindir)/. 34 | 35 | #This overrides the default install-data command 36 | install-data-local: 37 | $(UNZIP) -o $(srcdir)/nn_data.zip 38 | cp -prf nn_data $(bindir)/. 39 | endif 40 | 41 | clean: 42 | rm -rf nn_data NeuralNet 43 | -------------------------------------------------------------------------------- /src/opencl/level0/Makefile.am: -------------------------------------------------------------------------------- 1 | if BUILD_MPI 2 | MAYBE_MPI = epmpi 3 | endif 4 | 5 | SUBDIRS = $(MAYBE_MPI) 6 | 7 | include $(top_builddir)/config/config.mk 8 | include $(top_builddir)/config/targets.mk 9 | 10 | # How to find source files 11 | VPATH = $(srcdir):$(srcdir)/../common 12 | 13 | AM_LDFLAGS = $(OCL_LDFLAGS) 14 | AM_CPPFLAGS = $(OCL_CPPFLAGS) 15 | 16 | # what is the destination for programs built from this directory? 17 | serialopencldir = $(bindir)/Serial/OpenCL 18 | 19 | # what programs should be installed in the bin/Serial/OpenCL destination? 20 | serialopencl_PROGRAMS = BusSpeedDownload \ 21 | BusSpeedReadback \ 22 | DeviceMemory \ 23 | KernelCompile \ 24 | MaxFlops \ 25 | QueueDelay 26 | 27 | BusSpeedDownload_SOURCES = BusSpeedDownload.cpp main.cpp 28 | BusSpeedDownload_LDADD = $(OCL_LIBS) $(LIBS) 29 | 30 | BusSpeedReadback_SOURCES = BusSpeedReadback.cpp main.cpp 31 | BusSpeedReadback_LDADD = $(OCL_LIBS) $(LIBS) 32 | 33 | DeviceMemory_SOURCES = DeviceMemory.cpp main.cpp 34 | DeviceMemory_LDADD = $(OCL_LIBS) $(LIBS) 35 | 36 | KernelCompile_SOURCES = KernelCompile.cpp main.cpp 37 | KernelCompile_LDADD = $(OCL_LIBS) $(LIBS) 38 | 39 | MaxFlops_SOURCES = MaxFlops.cpp main.cpp 40 | MaxFlops_LDADD = $(OCL_LIBS) $(LIBS) 41 | 42 | QueueDelay_SOURCES = QueueDelay.cpp main.cpp 43 | QueueDelay_LDADD = $(OCL_LIBS) $(LIBS) 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/mpi/common/MPIStencilUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef STENCIL_UTILS_MPI_H 2 | #define STENCIL_UTILS_MPI_H 3 | 4 | #include "StencilUtil.h" 5 | 6 | 7 | // **************************************************************************** 8 | // Class: MPIStencilValidater 9 | // 10 | // Purpose: 11 | // MPI version of stencil validator. 12 | // 13 | // Programmer: Phil Roth 14 | // Creation: October 29, 2009 15 | // 16 | // **************************************************************************** 17 | template 18 | class MPIStencilValidater : public StencilValidater 19 | { 20 | public: 21 | virtual void ValidateResult( const Matrix2D& exp, 22 | const Matrix2D& data, 23 | double valErrThreshold, 24 | unsigned int nValErrsToPrint ) const; 25 | }; 26 | 27 | 28 | // **************************************************************************** 29 | // Class: MPIStencilTimingReporter 30 | // 31 | // Purpose: 32 | // MPI version of stencil timing reporter. 33 | // 34 | // Programmer: Phil Roth 35 | // Creation: October 29, 2009 36 | // 37 | // **************************************************************************** 38 | class MPIStencilTimingReporter : public StencilTimingReporter 39 | { 40 | public: 41 | virtual void ReportTimings( ResultDatabase& resultDB ) const; 42 | }; 43 | 44 | 45 | #endif // STENCIL_UTILS_MPI_H 46 | -------------------------------------------------------------------------------- /src/cuda/common/support.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPPORT_H 2 | #define SUPPORT_H 3 | 4 | #include 5 | #include 6 | #include "cudacommon.h" 7 | #include 8 | using std::cin; 9 | using std::cout; 10 | 11 | // **************************************************************************** 12 | // Method: findAvailBytes 13 | // 14 | // Purpose: returns maximum number of bytes *allocatable* (likely less than 15 | // device memory size) on the device. 16 | // 17 | // Arguments: None. 18 | // 19 | // Programmer: Collin McCurdy 20 | // Creation: June 8, 2010 21 | // 22 | // **************************************************************************** 23 | inline unsigned long 24 | findAvailBytes(void) 25 | { 26 | int device; 27 | cudaGetDevice(&device); 28 | CHECK_CUDA_ERROR(); 29 | cudaDeviceProp deviceProp; 30 | cudaGetDeviceProperties(&deviceProp, device); 31 | CHECK_CUDA_ERROR(); 32 | unsigned long total_bytes = deviceProp.totalGlobalMem; 33 | unsigned long avail_bytes = total_bytes; 34 | void* work; 35 | 36 | while (1) { 37 | cudaMalloc(&work, avail_bytes); 38 | if (cudaGetLastError() == cudaSuccess) { 39 | break; 40 | } 41 | avail_bytes -= (1024*1024); 42 | } 43 | cudaFree(work); 44 | CHECK_CUDA_ERROR(); 45 | 46 | return avail_bytes; 47 | } 48 | 49 | 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/CUDAStencilFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "CUDAStencilFactory.h" 5 | #include "CUDAStencil.h" 6 | 7 | 8 | template 9 | Stencil* 10 | CUDAStencilFactory::BuildStencil( const OptionParser& options ) 11 | { 12 | // get options for base class 13 | T wCenter; 14 | T wCardinal; 15 | T wDiagonal; 16 | size_t lRows; 17 | size_t lCols; 18 | std::vector devs; 19 | this->ExtractOptions( options, 20 | wCenter, 21 | wCardinal, 22 | wDiagonal, 23 | lRows, 24 | lCols, 25 | devs ); 26 | 27 | // determine whcih device to use 28 | // We would really prefer this to be done in main() but 29 | // since BuildStencil is a virtual function, we cannot change its 30 | // signature, and OptionParser provides no way to override an 31 | // options' value after it is set during parsing. 32 | int chosenDevice = (int)devs[0]; 33 | 34 | return new CUDAStencil( wCenter, 35 | wCardinal, 36 | wDiagonal, 37 | lRows, 38 | lCols, 39 | chosenDevice ); 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/common/SerialStencilUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef STENCIL_UTILS_SERIAL_H 2 | #define STENCIL_UTILS_SERIAL_H 3 | 4 | #include "StencilUtil.h" 5 | 6 | 7 | // **************************************************************************** 8 | // Class: SerialStencilValidater 9 | // 10 | // Purpose: 11 | // Single-processor version of stencil validator. 12 | // 13 | // Programmer: Phil Roth 14 | // Creation: October 29, 2009 15 | // 16 | // **************************************************************************** 17 | template 18 | class SerialStencilValidater : public StencilValidater 19 | { 20 | public: 21 | virtual void ValidateResult( const Matrix2D& exp, 22 | const Matrix2D& data, 23 | double valErrThreshold, 24 | unsigned int nValErrsToPrint ) const; 25 | }; 26 | 27 | 28 | // **************************************************************************** 29 | // Class: SerialStencilTimingReporter 30 | // 31 | // Purpose: 32 | // Single-processor version of stencil timing reporter. 33 | // 34 | // Programmer: Phil Roth 35 | // Creation: October 29, 2009 36 | // 37 | // **************************************************************************** 38 | class SerialStencilTimingReporter : public StencilTimingReporter 39 | { 40 | public: 41 | virtual void ReportTimings( ResultDatabase& resultDB ) const; 42 | }; 43 | 44 | 45 | #endif // STENCIL_UTILS_SERIAL_H 46 | -------------------------------------------------------------------------------- /src/common/StencilFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "StencilFactory.h" 3 | #include "InvalidArgValue.h" 4 | 5 | 6 | template 7 | void 8 | StencilFactory::CheckOptions( const OptionParser& options ) const 9 | { 10 | // number of iterations must be positive 11 | unsigned int nIters = (unsigned int)options.getOptionInt( "num-iters" ); 12 | if( nIters == 0 ) 13 | { 14 | throw InvalidArgValue( "number of iterations must be positive" ); 15 | } 16 | 17 | // no restrictions on weight values, just that we have them 18 | } 19 | 20 | template 21 | void 22 | StencilFactory::ExtractOptions( const OptionParser& options, 23 | T& wCenter, 24 | T& wCardinal, 25 | T& wDiagonal ) 26 | { 27 | wCenter = options.getOptionFloat( "weight-center" ); 28 | wCardinal = options.getOptionFloat( "weight-cardinal" ); 29 | wDiagonal = options.getOptionFloat( "weight-diagonal" ); 30 | } 31 | 32 | 33 | template 34 | std::vector 35 | StencilFactory::GetStandardProblemSize( int sizeClass ) 36 | { 37 | const int probSizes[4] = { 512, 1024, 2048, 4096 }; 38 | if (!(sizeClass >= 0 && sizeClass < 5)) 39 | { 40 | throw InvalidArgValue( "Size class must be between 1-4" ); 41 | } 42 | 43 | std::vector ret( 2, probSizes[sizeClass - 1] ); 44 | return ret; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/opencl/level0/epmpi/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir)/..:$(srcdir)/../../common 6 | 7 | # which compiler to use to link 8 | CXX= ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | # which flags to use 12 | AM_LDFLAGS = $(OCL_LDFLAGS) 13 | CPPFLAGS += $(MPI_CPPFLAGS) $(OCL_CPPFLAGS) -I$(top_srcdir)/src/opencl/common -I$(top_srcdir)/src/mpi/common 14 | L0LIBS = -lSHOCCommonOpenCL -lSHOCCommon $(OCL_LIBS) $(LIBS) 15 | 16 | # what is the destination for programs built from this directory? 17 | epopencldir = $(bindir)/EP/OpenCL 18 | 19 | # what programs should be installed in the bin/Serial/OpenCL destination? 20 | epopencl_PROGRAMS = BusSpeedDownload \ 21 | BusSpeedReadback \ 22 | DeviceMemory \ 23 | KernelCompile \ 24 | MaxFlops \ 25 | QueueDelay 26 | 27 | BusSpeedDownload_SOURCES = BusSpeedDownload.cpp main.cpp 28 | BusSpeedDownload_LDADD = $(L0LIBS) 29 | 30 | BusSpeedReadback_SOURCES = BusSpeedReadback.cpp main.cpp 31 | BusSpeedReadback_LDADD = $(L0LIBS) 32 | 33 | DeviceMemory_SOURCES = DeviceMemory.cpp main.cpp 34 | DeviceMemory_LDADD = $(L0LIBS) 35 | 36 | KernelCompile_SOURCES = KernelCompile.cpp main.cpp 37 | KernelCompile_LDADD = $(L0LIBS) 38 | 39 | MaxFlops_SOURCES = MaxFlops.cpp main.cpp 40 | MaxFlops_LDADD = $(L0LIBS) 41 | 42 | QueueDelay_SOURCES = QueueDelay.cpp main.cpp 43 | QueueDelay_LDADD = $(L0LIBS) 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/mpi/common/MPIHostStencilFactory.cpp: -------------------------------------------------------------------------------- 1 | #include "mpi.h" 2 | #include 3 | #include "MPIHostStencilFactory.h" 4 | #include "MPIHostStencil.h" 5 | 6 | template 7 | Stencil* 8 | MPIHostStencilFactory::BuildStencil( const OptionParser& opts ) 9 | { 10 | // get options for base class 11 | T wCenter; 12 | T wCardinal; 13 | T wDiagonal; 14 | StencilFactory::ExtractOptions( opts, wCenter, wCardinal, wDiagonal ); 15 | 16 | // get our options 17 | std::vector mpiDims = opts.getOptionVecInt( "msize" ); 18 | long nItersPerExchange = opts.getOptionInt( "iters-per-exchange" ); 19 | 20 | return new MPIHostStencil( wCenter, 21 | wCardinal, 22 | wDiagonal, 23 | (size_t)mpiDims[0], 24 | (size_t)mpiDims[1], 25 | (unsigned int)nItersPerExchange 26 | ); 27 | } 28 | 29 | 30 | template 31 | void 32 | MPIHostStencilFactory::AddOptions( OptionParser& opts ) const 33 | { 34 | MPI2DGridProgram::AddOptions( opts ); 35 | } 36 | 37 | 38 | template 39 | void 40 | MPIHostStencilFactory::CheckOptions( const OptionParser& opts ) const 41 | { 42 | // let base class check its options 43 | StencilFactory::CheckOptions( opts ); 44 | 45 | // check our options 46 | MPI2DGridProgram::CheckOptions( opts ); 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /config/common.mk.in: -------------------------------------------------------------------------------- 1 | # === Basics === 2 | #CC = @CC@ 3 | #CXX = @CXX@ 4 | #LD = @CXX@ 5 | #AR = @AR@ 6 | #RANLIB = @RANLIB@ 7 | 8 | # In CPPFLAGS, note src/common is from the SHOC source tree, so we must 9 | # use $(srcdir). In contrast, the files in config used in the build are 10 | # generated as part of the configuration, so we want to find them in the 11 | # build tree - hence we do not use $(srcdir) for that -I specification. 12 | #CPPFLAGS += -I$(top_srcdir)/src/common -I$(top_builddir)/config @CPPFLAGS@ 13 | CPPFLAGS += -I$(top_srcdir)/src/common -I$(top_builddir)/config 14 | #CFLAGS += @CFLAGS@ 15 | #CXXFLAGS += @CXXFLAGS@ 16 | NVCXXFLAGS = @NVCXXFLAGS@ 17 | #ARFLAGS = rcv 18 | #LDFLAGS = @LDFLAGS@ -L$(top_builddir)/src/common 19 | LDFLAGS += -L$(top_builddir)/src/common 20 | LIBS = @LIBS@ 21 | 22 | USE_MPI = @USE_MPI@ 23 | MPICXX = @MPICXX@ 24 | MPI_CPPFLAGS = -DPARALLEL 25 | 26 | OCL_CPPFLAGS = -I$(top_srcdir)/src/opencl/common 27 | OCL_LDFLAGS = -L$(top_builddir)/src/opencl/common 28 | OCL_LIBS = -lSHOCCommonOpenCL -lSHOCCommon @OPENCL_LIBS@ 29 | 30 | NVCC = @NVCC@ 31 | CUDA_CXX = @NVCC@ 32 | CUDA_INC = -I@CUDA_INCDIR@ -I$(top_srcdir)/src/cuda/common 33 | CUDA_LDFLAGS = -L$(top_builddir)/src/cuda/common 34 | CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ -I$(top_srcdir)/src/cuda/common 35 | 36 | USE_CUDA = @USE_CUDA@ 37 | ifeq ($(USE_CUDA),yes) 38 | CUDA_LIBS := -lSHOCCommon $(shell $(top_srcdir)/config/find_cuda_libs.sh @NVCC@) 39 | else 40 | CUDA_LIBS = 41 | endif 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /src/common/StencilUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef STENCIL_UTILS_H 2 | #define STENCIL_UTILS_H 3 | 4 | #include "Matrix2D.h" 5 | #include "ResultDatabase.h" 6 | #include "ValidateMatrix2D.h" 7 | 8 | 9 | // **************************************************************************** 10 | // Class: StencilValidater 11 | // 12 | // Purpose: 13 | // Validate results of stencil operations and print errors. 14 | // 15 | // Programmer: Phil Roth 16 | // Creation: October 29, 2009 17 | // 18 | // **************************************************************************** 19 | template 20 | class StencilValidater 21 | { 22 | protected: 23 | void PrintValidationErrors( std::ostream& s, 24 | const std::vector >& validationErrors, 25 | unsigned int nValErrsToPrint ) const; 26 | public: 27 | virtual void ValidateResult( const Matrix2D& exp, 28 | const Matrix2D& data, 29 | double valErrThreshold, 30 | unsigned int nValErrsToPrint ) const = 0; 31 | }; 32 | 33 | 34 | // **************************************************************************** 35 | // Class: StencilTimingReporter 36 | // 37 | // Purpose: 38 | // Report timing results of stencil operations. 39 | // 40 | // Programmer: Phil Roth 41 | // Creation: October 29, 2009 42 | // 43 | // **************************************************************************** 44 | class StencilTimingReporter 45 | { 46 | public: 47 | virtual void ReportTimings( ResultDatabase& resultDB ) const = 0; 48 | }; 49 | 50 | 51 | #endif // STENCIL_UTILS_H 52 | -------------------------------------------------------------------------------- /src/cuda/level1/stencil2d/MPICUDAStencil.h: -------------------------------------------------------------------------------- 1 | #ifndef MPICUDASTENCIL_H 2 | #define MPICUDASTENCIL_H 3 | 4 | #include 5 | #include 6 | #include "CUDAStencil.h" 7 | #include "MPI2DGridProgram.h" 8 | 9 | 10 | // **************************************************************************** 11 | // Class: MPICUDAStencil 12 | // 13 | // Purpose: 14 | // MPI implementation of CUDA stencil 15 | // 16 | // Programmer: Phil Roth 17 | // Creation: November 5, 2009 18 | // 19 | // **************************************************************************** 20 | template 21 | class MPICUDAStencil : public CUDAStencil, public MPI2DGridProgram 22 | { 23 | private: 24 | std::ofstream ofs; 25 | bool dumpData; 26 | 27 | virtual void DoPreIterationWork( T* currBuf, // in device global memory 28 | T* altBuf, // in device global memory 29 | Matrix2D& mtx, 30 | unsigned int iter ); 31 | 32 | public: 33 | MPICUDAStencil( T _wCenter, 34 | T _wCardinal, 35 | T _wDiagonal, 36 | size_t _lRows, 37 | size_t _lCols, 38 | size_t _mpiGridRows, 39 | size_t _mpiGridCols, 40 | unsigned int _nItersPerHaloExchange, 41 | int _deviceIdx = 0, 42 | bool dumpData = false ); 43 | 44 | virtual void operator()( Matrix2D&, unsigned int nIters ); 45 | }; 46 | 47 | #endif // MPICUDASTENCIL_H 48 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2011, UT-Battelle, LLC 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | * Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor 14 | the names of its contributors may be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/CommonOpenCLStencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMONOPENCLSTENCILFACTORY_H 2 | #define COMMONOPENCLSTENCILFACTORY_H 3 | 4 | #include 5 | #include "StencilFactory.h" 6 | #include "support.h" 7 | 8 | // **************************************************************************** 9 | // Class: CommonOpenCLStencilFactory 10 | // 11 | // Purpose: 12 | // OpenCL implementation of the stencil factory. 13 | // 14 | // Programmer: Phil Roth 15 | // Creation: October 28, 2009 16 | // 17 | // **************************************************************************** 18 | template 19 | class CommonOpenCLStencilFactory : public StencilFactory 20 | { 21 | protected: 22 | cl_device_id dev; 23 | cl_context ctx; 24 | cl_command_queue queue; 25 | 26 | void ExtractOptions( const OptionParser& options, 27 | T& wCenter, 28 | T& wCardinal, 29 | T& wDiagonal, 30 | size_t& lRows, 31 | size_t& lCols ); 32 | 33 | public: 34 | CommonOpenCLStencilFactory( std::string _sname, 35 | cl_device_id _dev, 36 | cl_context _ctx, 37 | cl_command_queue _queue ) 38 | : StencilFactory( _sname ), 39 | dev( _dev ), 40 | ctx( _ctx ), 41 | queue( _queue ) 42 | { 43 | // nothing else to do 44 | } 45 | 46 | virtual void CheckOptions( const OptionParser& options ) const; 47 | }; 48 | 49 | #endif // COMMONOPENCLSTENCILFACTORY_H 50 | -------------------------------------------------------------------------------- /src/common/Matrix2DFileSupport.cpp: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX2DFILESUPPORT_H 2 | #define MATRIX2DFILESUPPORT_H 3 | 4 | template 5 | std::string 6 | GetMatrixFileName( std::string baseName ) 7 | { 8 | // nothing to do - this should never be instantiated 9 | assert( false ); 10 | return ""; 11 | } 12 | 13 | template<> 14 | std::string 15 | GetMatrixFileName( std::string baseName ) 16 | { 17 | return baseName + "-sp.dat"; 18 | } 19 | 20 | template<> 21 | std::string 22 | GetMatrixFileName( std::string baseName ) 23 | { 24 | return baseName + "-dp.dat"; 25 | } 26 | 27 | 28 | template 29 | bool 30 | SaveMatrixToFile( const Matrix2D& m, std::string fileName ) 31 | { 32 | bool ok = true; 33 | 34 | std::ofstream ofs( fileName.c_str(), ios::out | ios::binary ); 35 | if( ofs.is_open() ) 36 | { 37 | ok = m.WriteTo( ofs ); 38 | ofs.close(); 39 | } 40 | else 41 | { 42 | std::cerr << "Unable to write matrix to file \'" << fileName << "\'" << std::endl; 43 | ok = false; 44 | } 45 | return ok; 46 | } 47 | 48 | 49 | template 50 | bool 51 | ReadMatrixFromFile( Matrix2D& m, std::string fileName ) 52 | { 53 | bool ok = true; 54 | 55 | std::ifstream ifs( fileName.c_str(), ios::in | ios::binary ); 56 | if( ifs.is_open() ) 57 | { 58 | ok = m.ReadFrom( ifs ); 59 | ifs.close(); 60 | } 61 | else 62 | { 63 | std::cerr << "Unable to read matrix from file \'" << fileName << "\'" << std::endl; 64 | ok = false; 65 | } 66 | return ok; 67 | } 68 | 69 | #endif // MATRIX2DFILESUPPORT_H 70 | -------------------------------------------------------------------------------- /data/devices.csv: -------------------------------------------------------------------------------- 1 | device,name,Host CPU,Host Memory,PCIe Gen (Host),Motherboard,Host OS,Driver Version 2 | hd5870,ATI Radeon HD5870,,,,,, 3 | gtx580,NV GeForce GTX580,,,,,, 4 | gtx680,NV GeForce GTX680,,,,,, 5 | gtx690,NV GeForce GTX690,,,,,, 6 | gtx480,NV GeForce GTX480,,,,,, 7 | gtx980,NV GeForce GTX980,Intel i5-2550K 3.30 GHz,"8 GB DDR3, 1600 MHz",2,GIGABYTE GA-Z68MA-D2H-B3,Linux 3.13.0-39, 8 | m2070ecc,NV Tesla M2070-ECC,,,,,, 9 | c2050noecc,NV Tesla C2050-No ECC,,,,,, 10 | c1060, NV Tesla C1060,,,,,, 11 | ion,NV ION,,,,,, 12 | nehalem,Intel 2.27Ghz Nehalem,,,,,, 13 | gtx570,NV GeForce GTX570,,,,,, 14 | gtx465,NV GeForce GTX465,,,,,, 15 | gtx470,NV GeForce GTX470,,,,,, 16 | hd5750,ATI Radeon HD5750,,,,,, 17 | hd5770,ATI Radeon HD5770,,,,,, 18 | hd5850,ATI Radeon HD5850,,,,,, 19 | hd5970,ATI Radeon HD5970,,,,,, 20 | m2090,NV Tesla M2090,,,,,, 21 | sb00,Intel Core i7-2600,,,,,, 22 | haswellgt2,Intel GT2 iGPU,Intel Core i7-4770,"16 GB DDR3, 1600MHz",3,Aspire T3-605,Linux 3.13.0-34,Intel Beignet 1.0.0 23 | haswell,Intel Core i7-4770,Intel Core i7-4770,"16 GB DDR3, 1600MHz",3,Aspire T3-605,Linux 3.13.0-34,Intel OCL 1.2.0.117 24 | hd7970,ATI Radeon HD7970,,,,,, 25 | hd5670,ATI Radeon HD5670,,,,,, 26 | llano,AMD Llano A8-3850 fGPU,,,,,, 27 | trinity1,AMD Trinity A10-5800K CPU,,,,,, 28 | trinity2,AMD Trinity A10-5800K fGPU,,,,,, 29 | gtxtitan, NV GeForce GTX Titan,,,,,, 30 | jetsontk1, NV Jetson TK1,,,,,, 31 | K40,NVIDIA K40c,Intel E5520 Nehalem 2.27 GHz ,12 GB DDR3,2,Supermicro X8DTG-QF,, 32 | gtx750TiSC,NVIDIA EVGA GeForce 750 Ti Superclocked,Intel Xeon W3505 2.53 GHz,12 GB DDR3-1333,2,,Ubuntu 12.04,NV 331.67 33 | w9100,AMD FirePro 9100,,,,,, 34 | -------------------------------------------------------------------------------- /src/mpi/contention/opencl/Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_builddir)/config/config.mk 2 | include $(top_builddir)/config/targets.mk 3 | 4 | # How to find source files 5 | VPATH = $(srcdir):$(srcdir)/..:$(top_srcdir)/src/opencl/level0 6 | 7 | # Which compiler to use? 8 | CXX = ${MPICXX} 9 | CXXLD = ${MPICXX} 10 | 11 | AM_LDFLAGS = $(OCL_LDFLAGS) -L$(top_builddir)/src/mpi/common 12 | AM_CPPFLAGS = $(OCL_CPPFLAGS) $(MPI_CPPFLAGS) -I$(top_srcdir)/src/mpi/common 13 | 14 | # What is the destination for programs built from this directory? 15 | epopencldir = $(bindir)/EP/OpenCL 16 | 17 | # What programs should be installed to that destination? 18 | epopencl_PROGRAMS = BusCont 19 | 20 | # How to build those programs? 21 | # Note: we would prefer to put BusSpeedDownload and main.cpp in the 22 | # SOURCES list. However, our VPATH must contain src/opencl/level0 23 | # so that our build can find the BusSpeedDownload.cpp file. Since 24 | # this directory is built after that src/opencl/level0 directory is 25 | # built, that directory already has a BusSpeedDownload.o and a main.o file. 26 | # With our VPATH, gnumake finds those existing .o files and doesn't build 27 | # them here. This isn't as much a problem for BusSpeedDownload.o, which 28 | # is built the same as for the non-contention tests, but the main.cpp file 29 | # we use is different than the main.cpp file used in the non-contention tests. 30 | # 31 | # Hence, we have to list the object files for those files in our LDADD list. 32 | # 33 | BusCont_SOURCES = OCLDriver.cpp \ 34 | BusCont.cpp \ 35 | bcmain.cpp 36 | BusCont_LDADD = $(top_builddir)/src/opencl/level0/BusSpeedDownload.o -lSHOCCommonMPI $(OCL_LIBS) $(LIBS) 37 | 38 | -------------------------------------------------------------------------------- /config/conf-titan.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Titan is a Cray XK7 with NVIDIA K20X (Kepler) GPUs, one per node. 4 | 5 | # In the following, we are building with the Cray compiler drivers (named 6 | # cc for C, CC for C++). These drivers know how to find CUDA and OpenCL, 7 | # as long as the CUDA module is loaded when we configure, and they also know 8 | # how to build MPI programs. 9 | # However, during configuration the autoconf script tries to run the 10 | # executables it builds and since we expect to be building on the login node, 11 | # some of the libraries the compiler driver links in are not available 12 | # for running the program. 13 | # Thus, we must trick configure into thinking we are cross compiling. The 14 | # --host flag is how we indicate we are cross compiling. 15 | 16 | # A typical build might look like: 17 | # $ module swap PrgEnv-pgi PrgEnv-gnu 18 | # $ module load craype-accel-nvidia35 19 | # $ sh ./config/conf-titan.sh 20 | # $ make 21 | 22 | # We explicitly pass MPICXX variable because the SHOC configure script 23 | # only tries more common MPI C++ compiler names like mpicxx. 24 | 25 | # We explicitly pass a value in the CUDA_CPPFLAGS environment variable 26 | # to limit the number of CUDA architectures the SHOC build will support. 27 | # We do this mainly to reduce the amount of time it takes to build SHOC, 28 | # though it has some beneficial effect on the final sizes of the executables 29 | # compared to the default. 30 | # 31 | 32 | 33 | CC=cc \ 34 | CXX=CC \ 35 | MPICXX=CC \ 36 | sh ./configure \ 37 | CUDA_CPPFLAGS="-gencode=arch=compute_35,code=sm_35" \ 38 | --host=x86_64-unknown-linux-gnu \ 39 | --with-opencl \ 40 | --with-cuda \ 41 | --with-mpi 42 | 43 | -------------------------------------------------------------------------------- /src/common/StencilFactory.h: -------------------------------------------------------------------------------- 1 | #ifndef STENCILFACTORY_H 2 | #define STENCILFACTORY_H 3 | 4 | #include 5 | #include "OptionParser.h" 6 | #include "Stencil.h" 7 | 8 | // **************************************************************************** 9 | // Class: StencilFactory 10 | // 11 | // Purpose: 12 | // Class to generate stencils. 13 | // 14 | // Programmer: Phil Roth 15 | // Creation: October 28, 2009 16 | // 17 | // **************************************************************************** 18 | template 19 | class StencilFactory 20 | { 21 | public: 22 | typedef std::map FactoryMap; 23 | 24 | private: 25 | // map of class name to a StencilFactory object 26 | // would be much easier if C++ classes were first class objects 27 | // so that we could programmatically construct a class name and 28 | // then create an instance of that class 29 | static FactoryMap* factoryMap; 30 | 31 | std::string sname; 32 | 33 | protected: 34 | void ExtractOptions( const OptionParser& options, 35 | T& wCenter, 36 | T& wCardinal, 37 | T& wDiagonal ); 38 | 39 | public: 40 | StencilFactory( std::string _sname ) 41 | : sname( _sname ) 42 | { 43 | // nothing else to do 44 | } 45 | virtual ~StencilFactory( void ) { } 46 | 47 | std::string GetStencilName( void ) { return sname; } 48 | 49 | virtual Stencil* BuildStencil( const OptionParser& options ) = 0; 50 | virtual void CheckOptions( const OptionParser& options ) const = 0; 51 | 52 | static std::vector GetStandardProblemSize( int sizeClass ); 53 | }; 54 | 55 | #endif // STENCILFACTORY_H 56 | -------------------------------------------------------------------------------- /src/mpi/common/ParallelHelpers.h: -------------------------------------------------------------------------------- 1 | #ifndef PARALLEL_HELPERS_H 2 | #define PARALLEL_HELPERS_H 3 | 4 | #include 5 | #include "GetMPIType.h" 6 | 7 | // **************************************************************************** 8 | // File: ParallelHelpers.h 9 | // 10 | // Purpose: 11 | // Various C++ encapsulations of MPI routines 12 | // 13 | // Programmer: Jeremy Meredith 14 | // Creation: August 14, 2009 15 | // 16 | // Modifications: 17 | // Jeremy Meredith, Tue Jan 12 14:39:40 EST 2010 18 | // Added ParAllGather. 19 | // 20 | // **************************************************************************** 21 | 22 | template 23 | T ParSumAcrossProcessors(const T &val, MPI_Comm comm) 24 | { 25 | T newval; 26 | MPI_Allreduce((void*)&val, &newval, 1, 27 | GetMPIType(val), MPI_SUM, comm); 28 | return newval; 29 | } 30 | 31 | template 32 | vector ParGather(const T &val, MPI_Comm comm) 33 | { 34 | int rank, size; 35 | MPI_Comm_size(comm, &size); 36 | MPI_Comm_rank(comm, &rank); 37 | vector retval; 38 | if (rank==0) 39 | retval.resize(size); 40 | MPI_Datatype t = GetMPIType(val); 41 | MPI_Gather((void*)(&val), 1, t, 42 | &(retval[0]), 1, t, 43 | 0, comm); 44 | return retval; 45 | } 46 | 47 | template 48 | vector ParAllGather(const T &val, MPI_Comm comm) 49 | { 50 | int rank, size; 51 | MPI_Comm_size(comm, &size); 52 | vector retval; 53 | retval.resize(size); 54 | MPI_Datatype t = GetMPIType(val); 55 | MPI_Allgather((void*)(&val), 1, t, 56 | &(retval[0]), 1, t, 57 | comm); 58 | return retval; 59 | } 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/MPIOpenCLStencil.h: -------------------------------------------------------------------------------- 1 | #ifndef MPIOPENCLSTENCIL_H 2 | #define MPIOPENCLSTENCIL_H 3 | 4 | #include 5 | #include 6 | #include "OpenCLStencil.h" 7 | #include "MPI2DGridProgram.h" 8 | 9 | 10 | // **************************************************************************** 11 | // Class: MPIOpenCLStencil 12 | // 13 | // Purpose: 14 | // MPI implementation of OpenCL stencil 15 | // 16 | // Programmer: Phil Roth 17 | // Creation: November 5, 2009 18 | // 19 | // **************************************************************************** 20 | template 21 | class MPIOpenCLStencil : public OpenCLStencil, public MPI2DGridProgram 22 | { 23 | private: 24 | std::ofstream ofs; 25 | bool dumpData; 26 | 27 | T* eData; 28 | T* wData; 29 | 30 | virtual void DoPreIterationWork( cl_mem buf, 31 | cl_mem altbuf, 32 | Matrix2D& mtx, 33 | unsigned int iter, 34 | cl_command_queue queue ); 35 | 36 | public: 37 | MPIOpenCLStencil( T wCenter, 38 | T wCardinal, 39 | T wDiagonal, 40 | size_t _lRows, 41 | size_t _lCols, 42 | size_t _mpiGridRows, 43 | size_t _mpiGridCols, 44 | unsigned int _nItersPerHaloExchange, 45 | cl_device_id dev, 46 | cl_context ctx, 47 | cl_command_queue queue, 48 | bool _dumpData = false ); 49 | virtual ~MPIOpenCLStencil( void ); 50 | 51 | virtual void operator()( Matrix2D&, unsigned int nIters ); 52 | }; 53 | 54 | #endif // MPIOPENCLSTENCIL_H 55 | -------------------------------------------------------------------------------- /LICENSE-CUDPP.txt: -------------------------------------------------------------------------------- 1 | Some portions of the source code are based on the CUDA Data Parallel Primitives 2 | Library and are subject to the following. 3 | 4 | Copyright (c) 2007-2011 The Regents of the University of California, Davis 5 | campus ("The Regents") and NVIDIA Corporation ("NVIDIA"). All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | * Neither the name of the The Regents, nor NVIDIA, nor the names of its 16 | contributors may be used to endorse or promote products derived from this 17 | software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 23 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 27 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /src/cuda/level1/scan/TPScan.h: -------------------------------------------------------------------------------- 1 | #ifndef __TPSCAN_H 2 | #define __TPSCAN_H 3 | 4 | // When using MPICH and MPICH-derived MPI implementations, there is a 5 | // naming conflict between stdio.h and MPI's C++ binding. 6 | // Since we do not use the C++ MPI binding, we can avoid the ordering 7 | // issue by ignoring the C++ MPI binding headers. 8 | // This #define should be quietly ignored when using other MPI implementations. 9 | #define MPICH_SKIP_MPICXX 10 | #include "mpi.h" 11 | 12 | // Templated wrapper for MPI_Exscan 13 | template 14 | inline void globalExscan(T* local_result, T* global_result); 15 | 16 | template <> 17 | inline void globalExscan(float* local_result, float* global_result) 18 | { 19 | MPI_Exscan(local_result, global_result, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); 20 | } 21 | 22 | template <> 23 | inline void globalExscan(double* local_result, double* global_result) 24 | { 25 | MPI_Exscan(local_result, global_result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 26 | } 27 | 28 | template 29 | void 30 | LaunchReduceKernel( int num_blocks, 31 | int num_threads, 32 | int smem_size, 33 | T* d_idata, 34 | T* d_odata, 35 | int size ); 36 | 37 | template 38 | void 39 | LaunchTopScanKernel( int num_blocks, 40 | int num_threads, 41 | int smem_size, 42 | T* d_block_sums, 43 | int size ); 44 | 45 | template 46 | void 47 | LaunchBottomScanKernel( int num_blocks, 48 | int num_threads, 49 | int smem_size, 50 | T* g_idata, 51 | T* g_odata, 52 | T* d_block_sums, 53 | int size ); 54 | 55 | #endif // __TPSCAN_H 56 | -------------------------------------------------------------------------------- /src/common/Stencil.h: -------------------------------------------------------------------------------- 1 | #ifndef STENCIL_H 2 | #define STENCIL_H 3 | 4 | #include 5 | #include 6 | #include "Matrix2D.h" 7 | 8 | // **************************************************************************** 9 | // Class: Stencil 10 | // 11 | // Purpose: 12 | // 9-point stencil. 13 | // 14 | // Programmer: Phil Roth 15 | // Creation: October 28, 2009 16 | // 17 | // **************************************************************************** 18 | template 19 | class Stencil : public std::binary_function&, unsigned int, void> 20 | { 21 | protected: 22 | T wCenter; 23 | T wCardinal; 24 | T wDiagonal; 25 | 26 | protected: 27 | T GetCenterWeight( void ) const { return wCenter; } 28 | T GetCardinalWeight( void ) const { return wCardinal; } 29 | T GetDiagonalWeight( void ) const { return wDiagonal; } 30 | 31 | public: 32 | Stencil( T _wCenter, 33 | T _wCardinal, 34 | T _wDiagonal ) 35 | : wCenter( _wCenter ), 36 | wCardinal( _wCardinal ), 37 | wDiagonal( _wDiagonal ) 38 | { 39 | // nothing else to do 40 | } 41 | 42 | virtual ~Stencil( void ) 43 | { 44 | // nothing to do 45 | } 46 | 47 | 48 | /* 49 | * This is a 9-point stencil using three weights: 50 | * wCenter is applied to the stencil 'center' 51 | * wCardinal is applied to the sum of the stencil NSEW values 52 | * wDiagonal is applied to the sum of the stencil diagonal values 53 | * 54 | * note two things: 55 | * We use the overall boundary values but do not update them. 56 | * We apply wCardinal and wDiagonal *only* to the sum of the NSEW and 57 | * diagonal values. We don't do any other averaging, etc. 58 | */ 59 | virtual void operator()( Matrix2D& m, unsigned int nIters ) = 0; 60 | }; 61 | 62 | #endif // STENCIL_H 63 | -------------------------------------------------------------------------------- /src/opencl/common/OpenCLNodePlatformContainer.h: -------------------------------------------------------------------------------- 1 | #ifndef OPENCL_NODE_PLATFORM_CONTAINER_H 2 | #define OPENCL_NODE_PLATFORM_CONTAINER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "support.h" 8 | #include "OpenCLPlatform.h" 9 | #include "NodePlatformContainer.h" 10 | 11 | using namespace std; 12 | 13 | // **************************************************************************** 14 | // Class: OpenCLNodePlatformContainer 15 | // 16 | // Purpose: 17 | // A container for all OpenCL platforms on a node. 18 | // 19 | // Notes: Extends the generic node platform container class 20 | // 21 | // Programmer: Gabriel Marin 22 | // Creation: September 22, 2009 23 | // 24 | // Modifications: 25 | // 26 | // **************************************************************************** 27 | namespace SHOC { 28 | 29 | class OpenCLNodePlatformContainer : public NodePlatformContainer 30 | { 31 | private: 32 | static const int MAGIC_KEY_OPENCL_NODE_CONTAINER; 33 | 34 | public: 35 | // constructor collects information about all platforms on this node 36 | OpenCLNodePlatformContainer (bool do_initialize = true); 37 | OpenCLNodePlatformContainer (const OpenCLNodePlatformContainer &ondc); 38 | OpenCLNodePlatformContainer& operator= (const OpenCLNodePlatformContainer &ondc); 39 | 40 | ~OpenCLNodePlatformContainer () { } 41 | 42 | void Print (ostream &os) const; 43 | 44 | void initialize(); 45 | 46 | virtual void writeObject (ostringstream &oss) const; 47 | virtual void readObject (istringstream &iss); 48 | 49 | bool operator< (const OpenCLNodePlatformContainer &ndc) const; 50 | bool operator> (const OpenCLNodePlatformContainer &ndc) const; 51 | bool operator== (const OpenCLNodePlatformContainer &ndc) const; 52 | }; 53 | }; 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/common/ValidateMatrix2D.h: -------------------------------------------------------------------------------- 1 | #ifndef VALIDATE_H 2 | #define VALIDATE_H 3 | 4 | #include 5 | #include 6 | #include "Matrix2D.h" 7 | 8 | 9 | // **************************************************************************** 10 | // Struct: ValidationErrorInfo 11 | // 12 | // Purpose: 13 | // Stores information about validation errors originating in a 2D grid. 14 | // 15 | // Programmer: Phil Roth 16 | // Creation: October 28, 2009 17 | // 18 | // **************************************************************************** 19 | template 20 | struct ValidationErrorInfo 21 | { 22 | int i; 23 | int j; 24 | T val; 25 | T exp; 26 | double relErr; 27 | 28 | ValidationErrorInfo( int _i, int _j, 29 | T _val, 30 | T _exp, 31 | double _relErr ) 32 | : i( _i ), 33 | j( _j ), 34 | val( _val ), 35 | exp( _exp ), 36 | relErr( _relErr ) 37 | { 38 | // nothing else to do 39 | } 40 | }; 41 | 42 | // **************************************************************************** 43 | // Class: Validate 44 | // 45 | // Purpose: 46 | // Compares 2D matrices. 47 | // 48 | // Programmer: Phil Roth 49 | // Creation: October 28, 2009 50 | // 51 | // **************************************************************************** 52 | template 53 | class Validate : public std::binary_function&, const Matrix2D&, std::vector > > 54 | { 55 | private: 56 | double relErrThreshold; 57 | 58 | public: 59 | Validate( double _relErrThreshold ) 60 | : relErrThreshold( _relErrThreshold ) 61 | { 62 | // nothing else to do 63 | } 64 | 65 | std::vector > operator()( const Matrix2D& s, const Matrix2D& t ); 66 | }; 67 | 68 | #endif // VALIDATE_H 69 | -------------------------------------------------------------------------------- /src/common/Matrix2D.cpp: -------------------------------------------------------------------------------- 1 | #ifdef HAVE_STDINT_H 2 | #include 3 | #endif // HAVE_STDINT_H 4 | #include "Matrix2D.h" 5 | 6 | #ifdef _WIN32 7 | typedef unsigned int uint32_t; 8 | #endif 9 | 10 | 11 | template 12 | bool 13 | Matrix2D::ReadFrom( std::istream& s ) 14 | { 15 | uint32_t nRowsUint; 16 | uint32_t nColsUint; 17 | 18 | s.read( (char*)&nRowsUint, sizeof(nRowsUint) ); 19 | s.read( (char*)&nColsUint, sizeof(nColsUint) ); 20 | 21 | uint32_t nPaddedColsUint = FindNumPaddedColumns( nColsUint, pad ); 22 | 23 | T* newDataFlat = new T[nRowsUint * nPaddedColsUint]; 24 | T** newData = new T*[nRowsUint]; 25 | for( size_t i = 0; i < nRowsUint; i++ ) 26 | { 27 | newData[i] = &(newDataFlat[i * nPaddedColsUint]); 28 | s.read( (char*)newData[i], nColsUint * sizeof(T) ); 29 | } 30 | 31 | if( s.good() ) 32 | { 33 | // we successfully read the matrix 34 | // release any old data 35 | delete[] data; 36 | delete[] flatData; 37 | 38 | // re-initialize with new data 39 | nRows = nRowsUint; 40 | nColumns = nColsUint; 41 | nPaddedColumns = nPaddedColsUint; 42 | flatData = newDataFlat; 43 | data = newData; 44 | } 45 | else 46 | { 47 | delete[] newDataFlat; 48 | delete[] newData; 49 | } 50 | 51 | return s.good(); 52 | } 53 | 54 | 55 | // note we do not write padding to output file 56 | template 57 | bool 58 | Matrix2D::WriteTo( std::ostream& s ) const 59 | { 60 | uint32_t nRowsUint = nRows; 61 | uint32_t nColsUint = nColumns; 62 | 63 | s.write( (const char*)&nRowsUint, sizeof(nRowsUint) ); 64 | s.write( (const char*)&nColsUint, sizeof(nColsUint) ); 65 | for( uint32_t r = 0; r < nRows; r++ ) 66 | { 67 | s.write( (const char*)data[r], nColumns * sizeof(T) ); 68 | } 69 | 70 | return s.good(); 71 | } 72 | 73 | -------------------------------------------------------------------------------- /src/opencl/level1/reduction/reduction.cl: -------------------------------------------------------------------------------- 1 | #ifdef SINGLE_PRECISION 2 | #define FPTYPE float 3 | #elif K_DOUBLE_PRECISION 4 | #pragma OPENCL EXTENSION cl_khr_fp64: enable 5 | #define FPTYPE double 6 | #elif AMD_DOUBLE_PRECISION 7 | #pragma OPENCL EXTENSION cl_amd_fp64: enable 8 | #define FPTYPE double 9 | #endif 10 | 11 | __kernel void 12 | reduce(__global const FPTYPE *g_idata, __global FPTYPE *g_odata, 13 | __local FPTYPE* sdata, const unsigned int n) 14 | { 15 | const unsigned int tid = get_local_id(0); 16 | unsigned int i = (get_group_id(0)*(get_local_size(0)*2)) + tid; 17 | const unsigned int gridSize = get_local_size(0)*2*get_num_groups(0); 18 | const unsigned int blockSize = get_local_size(0); 19 | 20 | sdata[tid] = 0; 21 | 22 | // Reduce multiple elements per thread, strided by grid size 23 | while (i < n) 24 | { 25 | sdata[tid] += g_idata[i] + g_idata[i+blockSize]; 26 | i += gridSize; 27 | } 28 | barrier(CLK_LOCAL_MEM_FENCE); 29 | 30 | // do reduction in shared mem 31 | for (unsigned int s = blockSize / 2; s > 0; s >>= 1) 32 | { 33 | if (tid < s) 34 | { 35 | sdata[tid] += sdata[tid + s]; 36 | } 37 | barrier(CLK_LOCAL_MEM_FENCE); 38 | } 39 | 40 | // Write result back to global memory 41 | if (tid == 0) 42 | { 43 | g_odata[get_group_id(0)] = sdata[0]; 44 | } 45 | } 46 | 47 | 48 | // Currently, CPUs on Snow Leopard only support a work group size of 1 49 | // So, we have a separate version of the kernel which doesn't use 50 | // local memory. This version is only used when the maximum 51 | // supported local group size is 1. 52 | __kernel void 53 | reduceNoLocal(__global FPTYPE *g_idata, __global FPTYPE *g_odata, 54 | unsigned int n) 55 | { 56 | FPTYPE sum = 0.0f; 57 | for (int i = 0; i < n; i++) 58 | { 59 | sum += g_idata[i]; 60 | } 61 | g_odata[0] = sum; 62 | } 63 | -------------------------------------------------------------------------------- /config/conf-osx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Configure to build OpenCL and CUDA tests. 4 | 5 | # By default, building on recent OS X systems will build 64-bit versions 6 | # of all libraries and executables. 7 | # 8 | # However, if you are using an earlier version of CUDA than 4.0, or 9 | # are on a Mac without a x86_64 processor, you can add the -m32 flag 10 | # in the configure script below to build 32-bit executables 11 | # (assuming you are building with gcc - use whatever flags are necessary 12 | # for your compiler). For example: 13 | #sh ./configure \ 14 | # CXXFLAGS="-m32" \ 15 | # CFLAGS="-m32" \ 16 | # NVCXXFLAGS="-m32" \ 17 | # --with-opencl --with-cuda 18 | 19 | # 20 | # On OS X 10.9 (Mavericks) , the Xcode toolchain defaults to using libc++ 21 | # as the C++ standard library. CUDA 6.0's nvcc does not support libc++, 22 | # so we have to specify to use libstdc++ instead. 23 | # 24 | 25 | # 26 | # The gencode specification here is for a GPU with compute capability 3.0, 27 | # such as a GeForce GT 750M in some recent MacBook Pro laptops. 28 | # Modify it to suit your GPU's compute capability. 29 | # 30 | 31 | 32 | sh ./configure \ 33 | CUDA_CPPFLAGS="-gencode=arch=compute_30,code=sm_30" \ 34 | CXXFLAGS="-stdlib=libstdc++" \ 35 | --with-opencl --with-cuda 36 | 37 | # Example simple config for Mavericks (10.9.2) and CUDA 6.0rc, where 38 | # driving with g++ can be problematic. 39 | #sh ./configure \ 40 | # CXX="nvcc" \ 41 | # CPP="nvcc" \ 42 | # --without-mpi \ 43 | # --without-opencl --with-cuda 44 | 45 | # Another issue on Mavericks (10.9.2) arises when compiling opencl with 46 | # clang. An alternative is to use gcc-4.8 (tested with the default config 47 | # in homebrew) and the following: 48 | #sh ./configure \ 49 | # CXXFLAGS="-m64" \ 50 | # CFLAGS="-m64" \ 51 | # NVCXXFLAGS="-m64" \ 52 | # CPP="g++-4.8" \ 53 | # CXX="g++-4.8" \ 54 | # --with-opencl --without-cuda --without-mpi 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/opencl/common/OpenCLPlatform.h: -------------------------------------------------------------------------------- 1 | #ifndef OPENCL_PLATFORM_H 2 | #define OPENCL_PLATFORM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "support.h" 8 | #include "OpenCLDeviceInfo.h" 9 | #include "Platform.h" 10 | 11 | using namespace std; 12 | 13 | namespace SHOC { 14 | 15 | // **************************************************************************** 16 | // Class: OpenCLPlatform 17 | // 18 | // Purpose: 19 | // Implements an OpenCL platform. A platform contains information about 20 | // zero or more devices. 21 | // 22 | // Notes: Extends the generic platform class 23 | // 24 | // Programmer: Gabriel Marin 25 | // Creation: September 22, 2009 26 | // 27 | // Modifications: 28 | // 29 | // **************************************************************************** 30 | class OpenCLPlatform : public Platform 31 | { 32 | private: 33 | string platformName; 34 | string platformVendor; 35 | string platformVersion; 36 | string platformExtensions; 37 | static const int MAGIC_KEY_OPENCL_PLATFORM; 38 | 39 | static std::string LookupInfo( cl_platform_id platformID, cl_platform_info paramName ); 40 | 41 | public: 42 | // constructer collects information about all devices on this node 43 | OpenCLPlatform (); 44 | OpenCLPlatform (cl_platform_id platformID); 45 | OpenCLPlatform (const OpenCLPlatform &ocp); 46 | OpenCLPlatform& operator= (const OpenCLPlatform &ocp); 47 | 48 | ~OpenCLPlatform () { } 49 | 50 | void Print (ostream &os) const; 51 | 52 | virtual void writeObject (ostringstream &oss) const; 53 | virtual void readObject (istringstream &iss); 54 | 55 | bool operator< (const OpenCLPlatform &ocp) const; 56 | bool operator> (const OpenCLPlatform &ocp) const; 57 | bool operator== (const OpenCLPlatform &ocp) const; 58 | }; 59 | }; 60 | 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/opencl/level1/md/md.cl: -------------------------------------------------------------------------------- 1 | #ifdef SINGLE_PRECISION 2 | #define POSVECTYPE float4 3 | #define FORCEVECTYPE float4 4 | #define FPTYPE float 5 | #elif K_DOUBLE_PRECISION 6 | #pragma OPENCL EXTENSION cl_khr_fp64: enable 7 | #define POSVECTYPE double4 8 | #define FORCEVECTYPE double4 9 | #define FPTYPE double 10 | #elif AMD_DOUBLE_PRECISION 11 | #pragma OPENCL EXTENSION cl_amd_fp64: enable 12 | #define POSVECTYPE double4 13 | #define FORCEVECTYPE double4 14 | #define FPTYPE double 15 | #endif 16 | 17 | __kernel void compute_lj_force(__global FORCEVECTYPE *force, 18 | __global POSVECTYPE *position, 19 | const int neighCount, 20 | __global int* neighList, 21 | const FPTYPE cutsq, 22 | const FPTYPE lj1, 23 | const FPTYPE lj2, 24 | const int inum) 25 | { 26 | uint idx = get_global_id(0); 27 | 28 | POSVECTYPE ipos = position[idx]; 29 | FORCEVECTYPE f = {0.0f, 0.0f, 0.0f, 0.0f}; 30 | 31 | int j = 0; 32 | while (j < neighCount) 33 | { 34 | int jidx = neighList[j*inum + idx]; 35 | 36 | // Uncoalesced read 37 | POSVECTYPE jpos = position[jidx]; 38 | 39 | // Calculate distance 40 | FPTYPE delx = ipos.x - jpos.x; 41 | FPTYPE dely = ipos.y - jpos.y; 42 | FPTYPE delz = ipos.z - jpos.z; 43 | FPTYPE r2inv = delx*delx + dely*dely + delz*delz; 44 | 45 | // If distance is less than cutoff, calculate force 46 | if (r2inv < cutsq) 47 | { 48 | r2inv = 1.0f/r2inv; 49 | FPTYPE r6inv = r2inv * r2inv * r2inv; 50 | FPTYPE forceC = r2inv*r6inv*(lj1*r6inv - lj2); 51 | 52 | f.x += delx * forceC; 53 | f.y += dely * forceC; 54 | f.z += delz * forceC; 55 | } 56 | j++; 57 | } 58 | // store the results 59 | force[idx] = f; 60 | } 61 | -------------------------------------------------------------------------------- /src/mpi/common/MPIHostStencil.cpp: -------------------------------------------------------------------------------- 1 | #include "mpi.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "MPIHostStencil.h" 7 | 8 | 9 | template 10 | MPIHostStencil::MPIHostStencil( T _wCenter, 11 | T _wCardinal, 12 | T _wDiagonal, 13 | size_t _mpiGridRows, 14 | size_t _mpiGridCols, 15 | unsigned int _nItersPerHaloExchange, 16 | bool _dumpData ) 17 | : HostStencil( _wCenter, 18 | _wCardinal, 19 | _wDiagonal ), 20 | MPI2DGridProgram( _mpiGridRows, 21 | _mpiGridCols, 22 | _nItersPerHaloExchange ), 23 | dumpData( _dumpData ) 24 | { 25 | if( dumpData ) 26 | { 27 | std::ostringstream fnamestr; 28 | fnamestr << "host." << std::setw( 4 ) << std::setfill('0') << this->GetCommWorldRank(); 29 | ofs.open( fnamestr.str().c_str() ); 30 | } 31 | } 32 | 33 | 34 | template 35 | void 36 | MPIHostStencil::operator()( Matrix2D& mtx, unsigned int nIters ) 37 | { 38 | if( this->ParticipatingInProgram() ) 39 | { 40 | HostStencil::operator()( mtx, nIters ); 41 | if( dumpData ) 42 | { 43 | this->DumpData( ofs, mtx, "after all iterations" ); 44 | } 45 | } 46 | MPI_Barrier( MPI_COMM_WORLD ); 47 | } 48 | 49 | 50 | template 51 | void 52 | MPIHostStencil::DoPreIterationWork( Matrix2D& mtx, unsigned int iter ) 53 | { 54 | if( (iter % this->GetNumberIterationsPerHaloExchange() ) == 0 ) 55 | { 56 | if( dumpData ) 57 | { 58 | this->DumpData( ofs, mtx, "before halo exchange" ); 59 | } 60 | this->DoHaloExchange( mtx ); 61 | if( dumpData ) 62 | { 63 | this->DumpData( ofs, mtx, "after halo exchange" ); 64 | } 65 | } 66 | } 67 | 68 | 69 | -------------------------------------------------------------------------------- /config/find_cuda_libs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # We do not use nvcc to link CUDA programs, because we may be linking 4 | # against MPI libraries also, and prefer to allow the MPI compiler 5 | # drivers to handle the link. 6 | # 7 | # This requires us to determine which libraries are needed to link CUDA 8 | # programs. We use nvcc -dryrun to determine which libraries are 9 | # needed to link CUDA programs. Prior to the release of CUDA version 6.0, 10 | # the output of nvcc -dryrun included a line of the form LIBRARIES=... 11 | # that indicated all libraries needed to link as -llib flags. 12 | # The nvcc distributed with CUDA 6.0 no longer lists the libraries 13 | # in the LIBRARIES line itself, but only as part of the actual 14 | # command that would have been executed to link the executable. 15 | # 16 | # For CUDA < 6.0, we just use the output of the LIBRARIES line. 17 | # For CUDA 6.0, we determine the libraries to use by: 18 | # 19 | # Running nvcc -dryrun and saving the LIBRARIES line from the output. 20 | # Re-running nvcc -dryrun and parsing the link line to remove 21 | # everything before the LIBRARIES contents and possibly a -Wl,--end-group 22 | # specification. 23 | # 24 | if [ "$#" -ne 1 ] 25 | then 26 | echo "Usage: $0 " >&2 27 | echo " where is the filename or path to the nvcc executable to use." >&2 28 | exit 1 29 | fi 30 | NVCC=$1 31 | #echo "Using NVCC=$NVCC" 32 | 33 | cudart_flag_supported=0 34 | $NVCC -dryrun -cudart shared bogus.cu > /dev/null 2>&1 35 | if [ $? -eq 0 ] 36 | then 37 | cudart_flag_supported=1 38 | fi 39 | #echo "cudart_flag_supported=$cudart_flag_supported" 40 | 41 | libspec=`$NVCC -dryrun bogus.cu 2>&1 | grep LIBRARIES | sed 's/^.*LIBRARIES=//'` 42 | #echo "libspec=$libspec" 43 | if [ $cudart_flag_supported -eq 1 ] 44 | then 45 | cudalibs=`$NVCC -dryrun bogus.cu 2>&1 | tail -1 | sed "s#^.*-o \"a.out\"##" | sed 's#"[a-zA-Z0-9./_-]*\.o"##g' | sed 's/-Wl,--start-group//' | sed 's/-Wl,--end-group//'` 46 | else 47 | cudalibs=$libspec 48 | fi 49 | 50 | echo $cudalibs 51 | 52 | -------------------------------------------------------------------------------- /src/common/HostStencil.cpp: -------------------------------------------------------------------------------- 1 | #include // for memcpy 2 | #include "HostStencil.h" 3 | 4 | 5 | template 6 | void 7 | HostStencil::operator()( Matrix2D& mtx, unsigned int nIters ) 8 | { 9 | // we need a temp space buffer 10 | Matrix2D tmpMtx( mtx.GetNumRows(), mtx.GetNumColumns() ); 11 | 12 | // be able to access the matrices as 2D arrays 13 | typename Matrix2D::DataPtr mtxData = mtx.GetData(); 14 | typename Matrix2D::DataPtr tmpMtxData = tmpMtx.GetData(); 15 | 16 | 17 | for( unsigned int iter = 0; iter < nIters; iter++ ) 18 | { 19 | DoPreIterationWork( mtx, iter ); 20 | 21 | /* copy the "real" data to the temp matrix */ 22 | memcpy( tmpMtx.GetFlatData(), 23 | mtx.GetFlatData(), 24 | mtx.GetDataSize() ); 25 | 26 | 27 | /* Apply the stencil operator */ 28 | for( size_t i = 1; i < mtx.GetNumRows()-1; i++ ) 29 | { 30 | for( size_t j = 1; j < mtx.GetNumColumns()-1; j++ ) 31 | { 32 | T oldCenterValue = tmpMtxData[i][j]; 33 | T oldNSEWValues = (tmpMtxData[i-1][j] + 34 | tmpMtxData[i+1][j] + 35 | tmpMtxData[i][j-1] + 36 | tmpMtxData[i][j+1]); 37 | T oldDiagonalValues = (tmpMtxData[i-1][j-1] + 38 | tmpMtxData[i+1][j-1] + 39 | tmpMtxData[i-1][j+1] + 40 | tmpMtxData[i+1][j+1]); 41 | 42 | mtxData[i][j] = this->wCenter * oldCenterValue + 43 | this->wCardinal * oldNSEWValues + 44 | this->wDiagonal * oldDiagonalValues; 45 | } 46 | } 47 | } 48 | } 49 | 50 | 51 | template 52 | void 53 | HostStencil::DoPreIterationWork( Matrix2D& mtx, unsigned int iter ) 54 | { 55 | // we have nothing to do 56 | } 57 | 58 | -------------------------------------------------------------------------------- /data/REPORTING_RESULTS: -------------------------------------------------------------------------------- 1 | The data directory is meant to serve as a resource for SHOC runs using the 2 | shocdriver on different devices and platforms. Results can be contributed 3 | either by emailing a SHOC development member, issuing a pull request, or 4 | committing an update (for those with write access). If you would like to 5 | report a result for a device that is not listed, please provide the 6 | following: 7 | 8 | devices.csv: 9 | ----------- 10 | For a particular discrete accelerator, it is helpful to have the host system 11 | information to make comparisons between different devices and replicate 12 | tests. New results should include not just the device name and vendor 13 | but also the host platform used for testing. 14 | 15 | On a Linux system the CPU, memory size and speed, and motherboard can be 16 | found using dmidecode or lshw. In certain cases dmesg can also be used to 17 | find the specific motherboard model number. For OS, the Linux kernel version 18 | from uname is preferred or the common name (OSX 10.6) can suffice for other 19 | OSes. 20 | 21 | specs.csv: 22 | ----------- 23 | For a particular accelerator, the key specifications for the device are 24 | reported. PCIe Gen (2.0, 3.0, etc.) and introduction date (or first sale date) 25 | are new fields that have been added. 26 | 27 | For devices with a boost clock (e.g. NVIDIA Maxwell), report the 28 | boost clock instead of the base clock. (This boost clock is the 29 | average clock speed at which the device actually runs, not 30 | the absolute maximum clockspeed the device supports, and so should 31 | most closely represent the card's real-life performance.) 32 | 33 | platforms.csv: 34 | -------------- 35 | Here the software platforms for a particular set of experiments are added. 36 | 37 | results.csv: 38 | ------------ 39 | Results from a SHOC run using shocdriver are added to the results CSV and 40 | are correlated with the device and platform. New fields include the test 41 | size (4 is recommended) and the test date which should be the date when 42 | the test was last run for the listed results. 43 | -------------------------------------------------------------------------------- /src/common/OptionParser.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTION_PARSER_H 2 | #define OPTION_PARSER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "Option.h" 10 | 11 | using namespace std; 12 | 13 | // **************************************************************************** 14 | // Class: OptionParser 15 | // 16 | // Purpose: 17 | // Class used to specify and parse command-line options to programs. 18 | // 19 | // Programmer: Kyle Spafford 20 | // Creation: August 4, 2009 21 | // 22 | // **************************************************************************** 23 | class OptionParser 24 | { 25 | private: 26 | typedef std::map OptionMap; 27 | 28 | OptionMap optionMap; 29 | map shortLetterMap; 30 | 31 | bool helpRequested; 32 | 33 | public: 34 | 35 | OptionParser(); 36 | void addOption(const string &longName, 37 | OptionType type, 38 | const string &defaultValue, 39 | const string &helpText = "No help specified", 40 | char shortLetter = '\0'); 41 | 42 | void print() const; 43 | 44 | //Returns false on failure, true on success 45 | bool parse(int argc, const char *const argv[]); 46 | bool parse(const vector &args); 47 | bool parseFile(const string &fileName); 48 | 49 | //Accessors for options 50 | long long getOptionInt(const string &name) const; 51 | float getOptionFloat(const string &name) const; 52 | bool getOptionBool(const string &name) const; 53 | string getOptionString(const string &name) const; 54 | 55 | vector getOptionVecInt(const string &name) const; 56 | vector getOptionVecFloat(const string &name) const; 57 | vector getOptionVecString(const string &name) const; 58 | 59 | void printHelp(const string &optionName) const; 60 | void usage() const; 61 | 62 | bool HelpRequested( void ) const { return helpRequested; } 63 | }; 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/opencl/level1/fft/fftlib.h: -------------------------------------------------------------------------------- 1 | #ifndef FFTLIB_H 2 | #define FFTLIB_H 3 | 4 | #include "OptionParser.h" 5 | 6 | struct cplxflt { 7 | float x; 8 | float y; 9 | }; 10 | 11 | struct cplxdbl { 12 | double x; 13 | double y; 14 | }; 15 | 16 | void init(OptionParser& op, 17 | bool _do_dp, 18 | cl_device_id fftDev, 19 | cl_context fftCtx, 20 | cl_command_queue fftQueue, 21 | cl_program& fftProg, 22 | cl_kernel& fftKrnl, 23 | cl_kernel& ifftKrnl, 24 | cl_kernel& chkKrnl); 25 | 26 | void deinit(cl_command_queue fftQueue, 27 | cl_program& fftProg, 28 | cl_kernel& fftKrnl, 29 | cl_kernel& ifftKrnl, 30 | cl_kernel& chkKrnl); 31 | 32 | // Replaces forward and inverse, call with the 33 | // appropriate kernel 34 | void transform(void* workp, 35 | const int n_ffts, 36 | Event& fftEvent, 37 | cl_kernel& fftKrnl, 38 | cl_command_queue& fftQueue); 39 | 40 | int check(const void* work, 41 | const void* check, 42 | const int half_n_ffts, 43 | const int half_n_cmplx, 44 | cl_kernel& chkKrnl, 45 | cl_command_queue& fftQueue); 46 | 47 | void allocDeviceBuffer(void** bufferp, 48 | const unsigned long bytes, 49 | cl_context fftCtx, 50 | cl_command_queue fftQueue); 51 | 52 | void freeDeviceBuffer(void* buffer, 53 | cl_context fftCtx, 54 | cl_command_queue fftQueue); 55 | 56 | void allocHostBuffer(void** bufp, 57 | const unsigned long bytes, 58 | cl_context fftCtx, 59 | cl_command_queue fftQueue); 60 | 61 | void freeHostBuffer(void* buf, 62 | cl_context fftCtx, 63 | cl_command_queue fftQueue); 64 | 65 | void copyToDevice(void* to_device, void* from_host, 66 | const unsigned long bytes, cl_command_queue fftQueue); 67 | 68 | void copyFromDevice(void* to_host, void* from_device, 69 | const unsigned long bytes, cl_command_queue fftQueue); 70 | 71 | #endif // FFTLIB_H 72 | -------------------------------------------------------------------------------- /src/common/Timer.h: -------------------------------------------------------------------------------- 1 | #ifndef TIMER_H 2 | #define TIMER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #ifndef _WIN32 11 | #include 12 | #include "config.h" 13 | #endif 14 | 15 | 16 | // decide which timer type we are supposed to use 17 | #if defined(_WIN32) 18 | # define TIMEINFO _timeb 19 | #elif defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_PROCESS_CPUTIME_ID) 20 | # define TIMEINFO timespec 21 | #elif defined(HAVE_GETTIMEOFDAY) 22 | # define TIMEINFO timeval 23 | #else 24 | # error No supported timer available. 25 | #endif 26 | 27 | 28 | // **************************************************************************** 29 | // Class: Timer 30 | // 31 | // Purpose: 32 | // Encapsulated a set of hierarchical timers. Starting a timer 33 | // returns a handle to a timer. Pass this handle, and a description, 34 | // into the timer Stop routine. Timers can nest and output will 35 | // be displayed in a tree format. 36 | // 37 | // Externally, Timer represents time in units of seconds. 38 | // 39 | // Programmer: Jeremy Meredith 40 | // Creation: August 6, 2004 41 | // 42 | // **************************************************************************** 43 | class Timer 44 | { 45 | public: 46 | static Timer *Instance(); 47 | 48 | static int Start(); 49 | 50 | // Returns time since start of corresponding timer (determined by handle), 51 | // in seconds. 52 | static double Stop(int handle, const std::string &descr); 53 | static void Insert(const std::string &descr, double value); 54 | 55 | static void Dump(std::ostream&); 56 | 57 | private: 58 | 59 | int real_Start(); 60 | double real_Stop(int, const std::string &); 61 | void real_Insert(const std::string &descr, double value); 62 | void real_Dump(std::ostream&); 63 | 64 | Timer(); 65 | ~Timer(); 66 | 67 | static Timer *instance; 68 | 69 | std::vector startTimes; 70 | std::vector timeLengths; 71 | std::vector descriptions; 72 | int currentActiveTimers; 73 | }; 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /src/cuda/level2/s3d/gr_base.h: -------------------------------------------------------------------------------- 1 | #ifndef GETRATES_BASE_H 2 | #define GETRATES_BASE_H 3 | 4 | #include "S3D.h" 5 | 6 | template 7 | __global__ void 8 | LAUNCH_BOUNDS (GR_BASE_THRD, GR_BASE_BLK) 9 | gr_base(const real* P, const real* T, const real* Y, real* C, real TCONV, 10 | real PCONV) { 11 | 12 | const real TEMP = T[threadIdx.x + (blockIdx.x * blockDim.x)]*TCONV; 13 | const real PRES = P[threadIdx.x + (blockIdx.x * blockDim.x)]*PCONV; 14 | const real SMALL = FLT_MIN; 15 | 16 | real SUM, ctmp; 17 | 18 | SUM = 0.0f; 19 | 20 | C(1) = ctmp = Y(1) *4.96046521e-1; 21 | SUM += ctmp; 22 | C(2) = ctmp = Y(2) *9.92093043e-1; 23 | SUM += ctmp; 24 | C(3) = ctmp = Y(3) *6.25023433e-2; 25 | SUM += ctmp; 26 | C(4) = ctmp = Y(4) *3.12511716e-2; 27 | SUM += ctmp; 28 | C(5) = ctmp = Y(5) *5.87980383e-2; 29 | SUM += ctmp; 30 | C(6) = ctmp = Y(6) *5.55082499e-2; 31 | SUM += ctmp; 32 | C(7) = ctmp = Y(7) *3.02968146e-2; 33 | SUM += ctmp; 34 | C(8) = ctmp = Y(8) *2.93990192e-2; 35 | SUM += ctmp; 36 | C(9) = ctmp = Y(9) *6.65112065e-2; 37 | SUM += ctmp; 38 | C(10) = ctmp = Y(10)*6.23323639e-2; 39 | SUM += ctmp; 40 | C(11) = ctmp = Y(11)*3.57008335e-2; 41 | SUM += ctmp; 42 | C(12) = ctmp = Y(12)*2.27221341e-2; 43 | SUM += ctmp; 44 | C(13) = ctmp = Y(13)*3.33039255e-2; 45 | SUM += ctmp; 46 | C(14) = ctmp = Y(14)*3.84050525e-2; 47 | SUM += ctmp; 48 | C(15) = ctmp = Y(15)*3.56453112e-2; 49 | SUM += ctmp; 50 | C(16) = ctmp = Y(16)*3.32556033e-2; 51 | SUM += ctmp; 52 | C(17) = ctmp = Y(17)*2.4372606e-2; 53 | SUM += ctmp; 54 | C(18) = ctmp = Y(18)*2.37882046e-2; 55 | SUM += ctmp; 56 | C(19) = ctmp = Y(19)*2.26996304e-2; 57 | SUM += ctmp; 58 | C(20) = ctmp = Y(20)*2.43467162e-2; 59 | SUM += ctmp; 60 | C(21) = ctmp = Y(21)*2.37635408e-2; 61 | SUM += ctmp; 62 | C(22) = ctmp = Y(22)*3.56972032e-2; 63 | SUM += ctmp; 64 | 65 | SUM = DIV (PRES, (SUM * (TEMP) * 8.314510e7)); 66 | 67 | #pragma unroll 22 68 | for (unsigned k=1; k<=22; k++) { 69 | C(k) = MAX(C(k), SMALL) * SUM; 70 | } 71 | } 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /src/cuda/common/cudacommon.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDACOMMON_H 2 | #define CUDACOMMON_H 3 | 4 | // workaround for OS X Snow Leopard w/ gcc 4.2.1 and CUDA 2.3a 5 | // (undefined __sync_fetch_and_add) 6 | #if defined(__APPLE__) 7 | # if _GLIBCXX_ATOMIC_BUILTINS == 1 8 | #undef _GLIBCXX_ATOMIC_BUILTINS 9 | #endif // _GLIBC_ATOMIC_BUILTINS 10 | #endif // __APPLE__ 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | // On Windows, if we call exit, our console may disappear, 17 | // taking the error message with it, so prompt before exiting. 18 | #if defined(_WIN32) 19 | #define safe_exit(val) \ 20 | { \ 21 | cout << "Press return to exit\n"; \ 22 | cin.get(); \ 23 | exit(val); \ 24 | } 25 | #else 26 | #define safe_exit(val) exit(val) 27 | #endif 28 | 29 | #define CHECK_CUDA_ERROR() \ 30 | { \ 31 | cudaError_t err = cudaGetLastError(); \ 32 | if (err != cudaSuccess) \ 33 | { \ 34 | printf("error=%d name=%s at " \ 35 | "ln: %d\n ",err,cudaGetErrorString(err),__LINE__); \ 36 | safe_exit(-1); \ 37 | } \ 38 | } 39 | 40 | // Alternative macro to catch CUDA errors 41 | #define CUDA_SAFE_CALL( call) do { \ 42 | cudaError err = call; \ 43 | if (cudaSuccess != err) { \ 44 | fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ 45 | __FILE__, __LINE__, cudaGetErrorString( err) ); \ 46 | safe_exit(EXIT_FAILURE); \ 47 | } \ 48 | } while (0) 49 | 50 | // Alleviate aliasing issues 51 | #define RESTRICT __restrict__ 52 | 53 | #endif // CUDACOMMON_H 54 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/OpenCLStencil.h: -------------------------------------------------------------------------------- 1 | #ifndef OPENCLSTENCIL_H 2 | #define OPENCLSTENCIL_H 3 | 4 | #include 5 | #include "Stencil.h" 6 | #include "support.h" 7 | 8 | 9 | // **************************************************************************** 10 | // Class: OpenCLStencil 11 | // 12 | // Purpose: 13 | // OpenCL implementation of 9-point stencil. 14 | // 15 | // Programmer: Phil Roth 16 | // Creation: October 28, 2009 17 | // 18 | // **************************************************************************** 19 | template 20 | class OpenCLStencil : public Stencil 21 | { 22 | private: 23 | size_t lRows; 24 | size_t lCols; 25 | 26 | cl_context context; 27 | cl_device_id device; 28 | cl_command_queue queue; 29 | cl_kernel kernel; 30 | 31 | protected: 32 | cl_kernel copyRectKernel; 33 | 34 | virtual void DoPreIterationWork( cl_mem buf, 35 | cl_mem altBuf, 36 | Matrix2D& mtx, 37 | unsigned int iter, 38 | cl_command_queue queue ); 39 | 40 | void SetCopyRectKernelArgs( cl_mem dest, 41 | int destOffset, 42 | int destPitch, 43 | cl_mem src, 44 | int srcOffset, 45 | int srcPitch, 46 | int width, 47 | int height ); 48 | 49 | void SetStencilKernelArgs( cl_mem currData, 50 | cl_mem newData, 51 | int alignment, 52 | T wCenter, 53 | T wCardinal, 54 | T wDiagonal, 55 | size_t localDataSize ); 56 | 57 | static void ClearWaitEvents( std::vector& waitEvents ); 58 | 59 | cl_context GetContext( void ) { return context; } 60 | 61 | public: 62 | OpenCLStencil( T wCenter, 63 | T wCardinal, 64 | T wDiagonal, 65 | size_t _lRows, 66 | size_t _lCols, 67 | cl_device_id dev, 68 | cl_context ctx, 69 | cl_command_queue queue ); 70 | 71 | virtual void operator()( Matrix2D&, unsigned int nIters ); 72 | }; 73 | 74 | #endif // OPENCLSTENCIL_H 75 | -------------------------------------------------------------------------------- /tools/numatest.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | 3 | # Parse the arguments 4 | $platform = "OpenCL"; 5 | while ($_ = shift @ARGV) 6 | { 7 | if (/^-cuda$/ or /^--cuda$/) 8 | { 9 | $platform = "CUDA"; 10 | } 11 | elsif (/^-opencl$/ or /^--opencl$/) 12 | { 13 | $platform = "OpenCL"; 14 | } 15 | else 16 | { 17 | print STDERR "Unknown argument: '$_'\n"; 18 | print STDERR "\n"; 19 | print STDERR "Usage: $0 [--cuda | --opencl]\n"; 20 | print STDERR " (defaults to OpenCL)\n"; 21 | print STDERR "\n"; 22 | exit 1; 23 | } 24 | } 25 | print "Using platform: $platform\n"; 26 | 27 | # Get the CUDA/OpenCL devices available 28 | @devicequeryoutput = `../bin/Serial/$platform/BusSpeedDownload -i`; 29 | $num_devs = (grep(/Number of devices/, @devicequeryoutput))[0]; 30 | $num_devs =~ s/^.*=\s*//; 31 | chomp($num_devs); 32 | print "Number of $platform devices: $num_devs\n"; 33 | 34 | # Get the NUMA nodes available 35 | @numaoutput = `numactl --show`; 36 | $numa_node_str = (grep(/nodebind/, @numaoutput))[0]; 37 | $numa_node_str =~ s/^.*:\s*//; 38 | chomp($numa_node_str); 39 | @numa_nodes = split /\s+/, $numa_node_str; 40 | print "Number of NUMA nodes= @numa_nodes\n"; 41 | 42 | # Check download speed and latency for all NUMA node / device pairings 43 | foreach $n (@numa_nodes) 44 | { 45 | for ($d = 0; $d < $num_devs; $d++) 46 | { 47 | @down_output = `numactl --cpunodebind=$n ../bin/Serial/$platform/BusSpeedDownload -d $d`; 48 | 49 | $bw_str = (grep(/DownloadSpeed\s+65536kB/, @down_output))[0]; 50 | @bw_cols = split /\s+/, $bw_str; 51 | $bw_median = $bw_cols[3]; 52 | 53 | $lat_str = (grep(/DownloadTime\s+1kB/, @down_output))[0]; 54 | @lat_cols = split /\s+/, $lat_str; 55 | $lat_median = $lat_cols[3]; 56 | 57 | print "NUMA Node=$n Device=$d Median Download Latency=$lat_median ms, Speed=$bw_median GB/sec\n"; 58 | } 59 | } 60 | 61 | # Check readback speed and latency for all NUMA node / device pairings 62 | foreach $n (@numa_nodes) 63 | { 64 | for ($d = 0; $d < $num_devs; $d++) 65 | { 66 | @up_output = `numactl --cpunodebind=$n ../bin/Serial/$platform/BusSpeedReadback -d $d`; 67 | 68 | $bw_str = (grep(/ReadbackSpeed\s+65536kB/, @up_output))[0]; 69 | @bw_cols = split /\s+/, $bw_str; 70 | $bw_median = $bw_cols[3]; 71 | 72 | $lat_str = (grep(/ReadbackTime\s+1kB/, @up_output))[0]; 73 | @lat_cols = split /\s+/, $lat_str; 74 | $lat_median = $lat_cols[3]; 75 | 76 | print "NUMA Node=$n Device=$d Median Upload Latency=$lat_median ms, Speed=$bw_median GB/sec\n"; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/mpi/common/MPIStencilUtil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "MPIStencilUtil.h" 4 | #include "ParallelResultDatabase.h" 5 | 6 | template 7 | void 8 | MPIStencilValidater::ValidateResult( const Matrix2D& exp, 9 | const Matrix2D& data, 10 | double valErrThreshold, 11 | unsigned int nValErrsToPrint ) const 12 | { 13 | Validate val( valErrThreshold ); 14 | std::vector > validationErrors = val( exp, data ); 15 | std::ostringstream valResultStr; 16 | 17 | // gather validation results to rank 0, who handles results 18 | int nValErrors = validationErrors.size(); 19 | int totalValErrors = 0; 20 | MPI_Reduce( &nValErrors, // input from each 21 | &totalValErrors, // output (only valid at root) 22 | 1, // count 23 | MPI_INT, // datatype 24 | MPI_SUM, // reduction operation 25 | 0, // root 26 | MPI_COMM_WORLD ); // comm 27 | 28 | int cwrank; 29 | MPI_Comm_rank( MPI_COMM_WORLD, &cwrank ); 30 | if( cwrank == 0 ) 31 | { 32 | valResultStr << totalValErrors << " validation errors"; 33 | 34 | if( (totalValErrors > 0) && (nValErrsToPrint > 0) ) 35 | { 36 | unsigned int valErrPrintsRemaining = nValErrsToPrint; 37 | this->PrintValidationErrors( valResultStr, validationErrors, valErrPrintsRemaining ); 38 | if( validationErrors.size() <= valErrPrintsRemaining ) 39 | { 40 | // TODO do we want to collect validation errors from 41 | // other processes? 42 | valResultStr << " more validation errors in processes other than rank 0\n"; 43 | } 44 | } 45 | std::cout << valResultStr.str() << std::endl; 46 | } 47 | } 48 | 49 | 50 | 51 | 52 | // Modifications: 53 | // Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 54 | // Split timing reports into detailed and summary. For 55 | // parallel code, don't report per-process values. 56 | // 57 | void 58 | MPIStencilTimingReporter::ReportTimings( ResultDatabase& resultDB ) const 59 | { 60 | ParallelResultDatabase pdb; 61 | pdb.MergeSerialDatabases( resultDB, MPI_COMM_WORLD ); 62 | 63 | int cwrank; 64 | MPI_Comm_rank( MPI_COMM_WORLD, &cwrank ); 65 | if( cwrank == 0 ) 66 | { 67 | pdb.DumpSummary( std::cout ); 68 | } 69 | } 70 | 71 | -------------------------------------------------------------------------------- /src/opencl/level1/stencil2d/CommonOpenCLStencilFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "CommonOpenCLStencilFactory.h" 5 | #include "InvalidArgValue.h" 6 | 7 | 8 | 9 | 10 | template 11 | void 12 | CommonOpenCLStencilFactory::CheckOptions( const OptionParser& opts ) const 13 | { 14 | // let base class check its options first 15 | StencilFactory::CheckOptions( opts ); 16 | 17 | // check our options 18 | std::vector shDims = opts.getOptionVecInt( "lsize" ); 19 | if( shDims.size() != 2 ) 20 | { 21 | throw InvalidArgValue( "lsize must have two dimensions" ); 22 | } 23 | if( (shDims[0] <= 0) || (shDims[1] <= 0) ) 24 | { 25 | throw InvalidArgValue( "all lsize values must be positive" ); 26 | } 27 | 28 | std::vector arrayDims = opts.getOptionVecInt( "customSize" ); 29 | assert( arrayDims.size() == 2 ); 30 | // If both of these are zero, we're using a non-custom size, skip this test 31 | if (arrayDims[0] == 0 && arrayDims[0] == 0) 32 | { 33 | return; 34 | } 35 | size_t gRows = (size_t)arrayDims[0]; 36 | size_t gCols = (size_t)arrayDims[1]; 37 | size_t lRows = (size_t)shDims[0]; 38 | size_t lCols = (size_t)shDims[1]; 39 | 40 | // verify that local dimensions evenly divide global dimensions 41 | if( ((gRows % lRows) != 0) || (lRows > gRows) ) 42 | { 43 | throw InvalidArgValue( "overall rows must be even multiple of lsize rows" ); 44 | } 45 | if( ((gCols % lCols) != 0) || (lCols > gCols) ) 46 | { 47 | throw InvalidArgValue( "overall columns must be even multiple of lsize columns" ); 48 | } 49 | 50 | // TODO ensure local dims are smaller than OpenCL implementation limits 51 | } 52 | 53 | 54 | template 55 | void 56 | CommonOpenCLStencilFactory::ExtractOptions( const OptionParser& options, 57 | T& wCenter, 58 | T& wCardinal, 59 | T& wDiagonal, 60 | size_t& lRows, 61 | size_t& lCols ) 62 | { 63 | // let base class extract its options 64 | StencilFactory::ExtractOptions( options, wCenter, wCardinal, wDiagonal ); 65 | 66 | // extract our options 67 | std::vector ldims = options.getOptionVecInt( "lsize" ); 68 | assert( ldims.size() == 2 ); 69 | lRows = (size_t)ldims[0]; 70 | lCols = (size_t)ldims[1]; 71 | } 72 | 73 | 74 | -------------------------------------------------------------------------------- /src/cuda/level1/scan/tpScanLaunchKernel.cu: -------------------------------------------------------------------------------- 1 | #include "scan_kernel.h" 2 | 3 | template 4 | void 5 | LaunchReduceKernel( int num_blocks, 6 | int num_threads, 7 | int smem_size, 8 | T* d_idata, 9 | T* d_odata, 10 | int size ) 11 | { 12 | // In CUDA 4.0 we will be able to remove this level of indirection 13 | // if we use the cuConfigureCall and cuLaunchKernel functions. 14 | reduce<<>> 15 | (d_idata, d_odata, size); 16 | } 17 | 18 | template 19 | void 20 | LaunchTopScanKernel( int num_blocks, 21 | int num_threads, 22 | int smem_size, 23 | T* d_block_sums, 24 | int size ) 25 | { 26 | // In CUDA 4.0 we will be able to remove this level of indirection 27 | // if we use the cuConfigureCall and cuLaunchKernel functions. 28 | scan_single_block<<>> 29 | (d_block_sums, size); 30 | } 31 | 32 | template 33 | void 34 | LaunchBottomScanKernel( int num_blocks, 35 | int num_threads, 36 | int smem_size, 37 | T* g_idata, 38 | T* g_odata, 39 | T* d_block_sums, 40 | int size ) 41 | { 42 | // In CUDA 4.0 we will be able to remove this level of indirection 43 | // if we use the cuConfigureCall and cuLaunchKernel functions. 44 | bottom_scan<<>>(g_idata, g_odata, 45 | d_block_sums, size); 46 | } 47 | 48 | // Ensure that the template functions are instantiated 49 | // Unlike the Stencil2D CUDA version that needs to instantiate objects, 50 | // we need to instantiate template functions. Declaration of the needed 51 | // specializations seem to work for several recent versions of g++ that 52 | // people are likely to be using underneath nvcc. 53 | template void LaunchReduceKernel( int, int, int, float*, float*, int ); 54 | template void LaunchReduceKernel( int, int, int, double*, double*, int ); 55 | 56 | template void LaunchTopScanKernel( int, int, int, float*, int ); 57 | template void LaunchTopScanKernel( int, int, int, double*, int ); 58 | 59 | template void LaunchBottomScanKernel( int, int, int, float*, float*, float*, int ); 60 | template void LaunchBottomScanKernel( int, int, int, double*, double*, double*, int ); 61 | 62 | -------------------------------------------------------------------------------- /src/common/Utility.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILITY_H 2 | #define UTILITY_H 3 | 4 | #include 5 | #include 6 | 7 | // **************************************************************************** 8 | // File: Utility.h 9 | // 10 | // Purpose: 11 | // Various generic utility routines having to do with string and number 12 | // manipulation. 13 | // 14 | // Programmer: Jeremy Meredith 15 | // Creation: September 18, 2009 16 | // Modified: Jan 2010, rothpc 17 | // Jeremy Meredith, Tue Oct 9 17:25:25 EDT 2012 18 | // Round is c99, not Windows-friendly. Assuming we are using 19 | // positive values, replaced it with an equivalent of int(x+.5). 20 | // 21 | // **************************************************************************** 22 | 23 | inline std::string HumanReadable(long long value, long long *rounding=0) 24 | { 25 | std::ostringstream vstr; 26 | long long pVal; 27 | if (value>10ll*1024*1024*1024) 28 | { 29 | pVal = (long long)(0.5 + value/(1024.0*1024*1024)); 30 | if (rounding) 31 | *rounding = pVal*1024*1024*1024 - value; 32 | vstr << pVal << 'G'; 33 | } 34 | else if (value>10ll*1024*1024) 35 | { 36 | pVal = (long long)(0.5 + value/(1024.0*1024)); 37 | if (rounding) 38 | *rounding = pVal*1024*1024 - value; 39 | vstr << pVal << 'M'; 40 | } 41 | else if (value>10ll*1024) 42 | { 43 | pVal = (long long)(0.5 + value/(1024.0)); 44 | if (rounding) 45 | *rounding = pVal*1024 - value; 46 | vstr << pVal << 'k'; 47 | } 48 | else 49 | { 50 | if (rounding) 51 | *rounding = 0; 52 | vstr << value; 53 | } 54 | return vstr.str(); 55 | } 56 | 57 | inline vector SplitValues(const std::string &buff, char delim) 58 | { 59 | vector output; 60 | std::string tmp=""; 61 | for (size_t i=0; i){ 12 | chomp(); 13 | my $ln=$_; 14 | my @elems = split(/\t+/,$ln); 15 | if( $elems[0] eq "test" && $elems[1] eq "atts" ){ 16 | $before = 0; 17 | } 18 | if( $before == 0 && ($#elems == 0 || $elems[0] eq "Note:") ){ 19 | $after = 1; 20 | } 21 | 22 | if( $before == 1 ){ 23 | print "$ln\n"; 24 | }elsif( $after == 1 ){ 25 | push(@lastLines, $ln); 26 | }else{ 27 | # push each line in an array so we retrieve it later 28 | push(@lines, $ln); 29 | my $i=0; 30 | # for each element in this line, find its length 31 | foreach(@elems){ 32 | my $elem=$_; 33 | # ignore elements that are of zero length (split() splits *around* delimiters, so multiple consecutive delimiters are returned as zero length strings). 34 | if( length($elem) > 0 ){ 35 | # keep track of the longest string per column 36 | my ($lenA, $lenB); 37 | if( $elem =~ /(\d*)\.(\d*)/ ){ 38 | $lenA = length($1); 39 | $lenB = length($2); 40 | }else{ 41 | $lenA = 1; 42 | $lenB = length($elem); 43 | } 44 | if($elem_lengthsA[$i] == 0 || $lenA > $elem_lengthsA[$i]){ 45 | $elem_lengthsA[$i] = $lenA; 46 | } 47 | if($elem_lengthsB[$i] == 0 || $lenB > $elem_lengthsB[$i]){ 48 | $elem_lengthsB[$i] = $lenB; 49 | } 50 | $i++ 51 | } 52 | } 53 | } 54 | } 55 | 56 | # iterate over the input (that we've stored into the array @lines) and print it 57 | foreach(@lines){ 58 | my $ln = $_; 59 | my @elems = split(/\t+/,$ln); 60 | my $i=0; 61 | foreach(@elems){ 62 | my $elem = $_; 63 | # skip delimiters 64 | if( length($elem) > 0 ){ 65 | # find the maximum length of this column and use it as the string length (+1) 66 | my $tmp_len = 1+$elem_lengthsA[$i]+$elem_lengthsB[$i]; 67 | if( $elem !~ /\d*\.\d*/ ){ 68 | my $frmt = " %-".$tmp_len."s "; 69 | printf($frmt,$elem); 70 | }else { 71 | my $frmt = " %".$tmp_len.".".$elem_lengthsB[$i]."lf "; 72 | printf($frmt,$elem); 73 | } 74 | $i++; 75 | } 76 | } 77 | print "\n"; 78 | } 79 | 80 | foreach(@lastLines){ 81 | print "$_\n"; 82 | } 83 | -------------------------------------------------------------------------------- /src/opencl/level2/s3d/rdwdot2.cl: -------------------------------------------------------------------------------- 1 | #ifdef K_DOUBLE_PRECISION 2 | #define DOUBLE_PRECISION 3 | #pragma OPENCL EXTENSION cl_khr_fp64: enable 4 | #elif AMD_DOUBLE_PRECISION 5 | #define DOUBLE_PRECISION 6 | #pragma OPENCL EXTENSION cl_amd_fp64: enable 7 | #endif 8 | 9 | // Macros to explicitly control precision of the constants, otherwise 10 | // known to cause problems for some Compilers 11 | #ifdef DOUBLE_PRECISION 12 | #define CPREC(a) a 13 | #else 14 | #define CPREC(a) a##f 15 | #endif 16 | 17 | //replace divisions by multiplication with the reciprocal 18 | #define REPLACE_DIV_WITH_RCP 1 19 | 20 | //Call the appropriate math function based on precision 21 | #ifdef DOUBLE_PRECISION 22 | #define real double 23 | #if REPLACE_DIV_WITH_RCP 24 | #define DIV(x,y) ((x)*(1.0/(y))) 25 | #else 26 | #define DIV(x,y) ((x)/(y)) 27 | #endif 28 | #define POW pow 29 | #define EXP exp 30 | #define EXP10 exp10 31 | #define EXP2 exp2 32 | #define MAX fmax 33 | #define MIN fmin 34 | #define LOG log 35 | #define LOG10 log10 36 | #else 37 | #define real float 38 | #if REPLACE_DIV_WITH_RCP 39 | #define DIV(x,y) ((x)*(1.0f/(y))) 40 | #else 41 | #define DIV(x,y) ((x)/(y)) 42 | #endif 43 | #define POW pow 44 | #define EXP exp 45 | #define EXP10 exp10 46 | #define EXP2 exp2 47 | #define MAX fmax 48 | #define MIN fmin 49 | #define LOG log 50 | #define LOG10 log10 51 | #endif 52 | 53 | //Kernel indexing macros 54 | #define thread_num (get_global_id(0)) 55 | #define idx2(p,z) (p[(((z)-1)*(N_GP)) + thread_num]) 56 | #define idx(x, y) ((x)[(y)-1]) 57 | #define C(q) idx2(C, q) 58 | #define Y(q) idx2(Y, q) 59 | #define RF(q) idx2(RF, q) 60 | #define EG(q) idx2(EG, q) 61 | #define RB(q) idx2(RB, q) 62 | #define RKLOW(q) idx2(RKLOW, q) 63 | #define ROP(q) idx(ROP, q) 64 | #define WDOT(q) idx2(WDOT, q) 65 | #define RKF(q) idx2(RKF, q) 66 | #define RKR(q) idx2(RKR, q) 67 | #define A_DIM (11) 68 | #define A(b, c) idx2(A, (((b)*A_DIM)+c) ) 69 | 70 | #define ROP2(a) (RKF(a) - RKR (a)) 71 | 72 | 73 | __kernel void 74 | rdwdot2_kernel (__global const real* RKF, __global const real* RKR, 75 | __global real* WDOT, const real rateconv, __global const real* molwt) 76 | { 77 | 78 | WDOT(21) = (ROP2(145) +ROP2(185) +ROP2(187) +ROP2(189) 79 | -ROP2(190) -ROP2(191) -ROP2(192) -ROP2(193) 80 | -ROP2(194) -ROP2(195) -ROP2(196) -ROP2(197) 81 | -ROP2(198) +ROP2(200) +ROP2(202) +ROP2(203) 82 | +ROP2(205))*rateconv *molwt[20]; 83 | 84 | WDOT(20) = (+ROP2(121) +ROP2(146) +ROP2(165) +ROP2(167) 85 | -ROP2(185) -ROP2(186) -ROP2(187) -ROP2(188) 86 | -ROP2(189) +ROP2(192) +ROP2(195) +ROP2(196) 87 | +ROP2(197) +ROP2(198) +ROP2(206))*rateconv *molwt[19]; 88 | 89 | WDOT(22) = 0.0; 90 | } 91 | -------------------------------------------------------------------------------- /src/opencl/level1/bfs/bfs_iiit.cl: -------------------------------------------------------------------------------- 1 | #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable 2 | #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable 3 | #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics: enable 4 | #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics: enable 5 | 6 | 7 | //Sungpack Hong, Sang Kyun Kim, Tayo Oguntebi, and Kunle Olukotun. 2011. 8 | //Accelerating CUDA graph algorithms at maximum warp. 9 | //In Proceedings of the 16th ACM symposium on Principles and practice of 10 | //parallel programming (PPoPP '11). ACM, New York, NY, USA, 267-276. 11 | // **************************************************************************** 12 | // Function: BFS_kernel_warp 13 | // 14 | // Purpose: 15 | // Perform BFS on the given graph 16 | // 17 | // Arguments: 18 | // levels: array that stores the level of vertices 19 | // edgeArray: array that gives offset of a vertex in edgeArrayAux 20 | // edgeArrayAux: array that gives the edge list of a vertex 21 | // W_SZ: the warp size to use to process vertices 22 | // CHUNK_SZ: the number of vertices each warp processes 23 | // numVertices: number of vertices in the given graph 24 | // curr: the current BFS level 25 | // flag: set when more vertices remain to be traversed 26 | // 27 | // Returns: nothing 28 | // 29 | // Programmer: Aditya Sarwade 30 | // Creation: June 16, 2011 31 | // 32 | // Modifications: 33 | // 34 | // **************************************************************************** 35 | __kernel void BFS_kernel_warp( 36 | __global unsigned int *levels, 37 | __global unsigned int *edgeArray, 38 | __global unsigned int *edgeArrayAux, 39 | int W_SZ, 40 | int CHUNK_SZ, 41 | unsigned int numVertices, 42 | int curr, 43 | __global int *flag) 44 | { 45 | 46 | int tid = get_global_id(0); 47 | int W_OFF = tid % W_SZ; 48 | int W_ID = tid / W_SZ; 49 | int v1= W_ID * CHUNK_SZ; 50 | int chk_sz=CHUNK_SZ+1; 51 | 52 | if((v1+CHUNK_SZ)>=numVertices) 53 | { 54 | chk_sz = numVertices-v1+1;//(v1+CHUNK_SZ) - numVertices; 55 | if(chk_sz<0) 56 | chk_sz=0; 57 | } 58 | 59 | //each warp processes nodes one by one 60 | for(int v=v1; v< chk_sz-1+v1; v++) 61 | { 62 | if(levels[v] == curr) 63 | { 64 | unsigned int num_nbr = edgeArray[v+1]-edgeArray[v]; 65 | unsigned int nbr_off = edgeArray[v]; 66 | for(int i=W_OFF; i 5 | #include 6 | 7 | #ifndef _WIN32 8 | #include 9 | #endif 10 | 11 | 12 | // **************************************************************************** 13 | // Class: ProgressBar 14 | // 15 | // Purpose: 16 | // Simple text progress bar class. 17 | // 18 | // Programmer: Gabriel Marin 19 | // Creation: October 12, 2009 20 | // 21 | // Modifications: 22 | // 23 | // **************************************************************************** 24 | class ProgressBar 25 | { 26 | private: 27 | int itersDone; 28 | int totalIters; 29 | static const char barDone[81]; 30 | double rTotal; 31 | double percDone; 32 | 33 | public: 34 | // Constructor 35 | // 36 | // Arguments: 37 | // _totalIters total work amount to be tracked 38 | ProgressBar (int _totalIters = 0) 39 | { 40 | totalIters = _totalIters; 41 | itersDone = 0; 42 | if (totalIters) 43 | { 44 | rTotal = 100.0/totalIters; 45 | } else 46 | { 47 | rTotal = 0.0; 48 | } 49 | percDone = itersDone*rTotal; 50 | } 51 | 52 | // Method: setTotalIters 53 | // 54 | // Purpose: setter for the total work amount 55 | // 56 | // Arguments: 57 | // _totalIters total work amount to be tracked 58 | void setTotalIters (int _totalIters) 59 | { 60 | totalIters = _totalIters; 61 | if (totalIters) 62 | { 63 | rTotal = 100.0/totalIters; 64 | percDone = itersDone*rTotal; 65 | } 66 | } 67 | 68 | // Method: setItersDone 69 | // 70 | // Purpose: setter for the completed work amount 71 | // 72 | // Arguments: 73 | // _itersDone completed work amount 74 | void setItersDone (int _itersDone) 75 | { 76 | itersDone = _itersDone; 77 | percDone = itersDone*rTotal; 78 | } 79 | 80 | // Method: addItersDone 81 | // 82 | // Purpose: update amount of completed work 83 | // 84 | // Arguments: 85 | // _inc amount of newly completed work 86 | void addItersDone (int _inc = 1) 87 | { 88 | itersDone += _inc; 89 | percDone = itersDone*rTotal; 90 | } 91 | 92 | // Method: Show 93 | // 94 | // Purpose: display progress bar 95 | // 96 | // Arguments: 97 | // fd output file descriptor 98 | void Show (FILE *fd) 99 | { 100 | int lenDone = (int)(percDone/2.0 + 0.5); 101 | fprintf(fd, "\r|%.*s%*s| %5.1lf%%", lenDone, barDone, 50-lenDone, "", percDone); 102 | fflush(fd); 103 | } 104 | }; 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /src/cuda/level2/qtclustering/comm.cpp: -------------------------------------------------------------------------------- 1 | #include "comm.h" 2 | #include 3 | 4 | using namespace std; 5 | 6 | #if defined(PARALLEL) 7 | MPI_Comm _qtc_mpi_communicator = MPI_COMM_WORLD; 8 | #endif 9 | 10 | int comm_get_rank(void){ 11 | int rank=0; 12 | #if defined(PARALLEL) 13 | MPI_Comm_rank( _qtc_mpi_communicator, &rank ); 14 | #endif 15 | return rank; 16 | } 17 | 18 | int comm_get_size(void){ 19 | int node_count=1; 20 | #if defined(PARALLEL) 21 | MPI_Comm_size( _qtc_mpi_communicator, &node_count ); 22 | #endif // defined(PARALLEL) 23 | return node_count; 24 | } 25 | 26 | 27 | void comm_broadcast( void *ptr, int cnt, int type, int source){ 28 | #if defined(PARALLEL) 29 | switch(type){ 30 | case COMM_TYPE_INT: 31 | MPI_Bcast ( ptr, cnt, MPI_INT, source, _qtc_mpi_communicator ); 32 | break; 33 | case COMM_TYPE_FLOAT: 34 | MPI_Bcast ( ptr, cnt, MPI_FLOAT, source, _qtc_mpi_communicator ); 35 | break; 36 | default: 37 | break; 38 | } 39 | #endif // defined(PARALLEL) 40 | return; 41 | } 42 | 43 | 44 | void comm_barrier(){ 45 | #if defined(PARALLEL) 46 | MPI_Barrier (_qtc_mpi_communicator); 47 | #endif 48 | return; 49 | } 50 | 51 | void comm_find_winner(int *max_card, int *winner_node, int *winner_index, int cwrank, int max_index){ 52 | #if defined(PARALLEL) 53 | int glb_max_card = 0, index = *winner_index; 54 | // Reduce the cardinalities to see what the highest value is. 55 | MPI_Allreduce (max_card, &glb_max_card, 1, MPI_INT, MPI_MAX, _qtc_mpi_communicator); 56 | 57 | // If I'm not one of the winners, set my index to max 58 | if(*max_card != glb_max_card) 59 | index = max_index; 60 | 61 | MPI_Allreduce (&index, winner_index, 1, MPI_INT, MPI_MIN, _qtc_mpi_communicator); 62 | 63 | *max_card = glb_max_card; 64 | 65 | if( index == *winner_index ){ 66 | *winner_node = cwrank; 67 | }else{ 68 | *winner_node = -1; 69 | } 70 | 71 | #else 72 | *winner_node = 0; 73 | #endif // defined(PARALLEL) 74 | return; 75 | } 76 | 77 | 78 | void comm_update_communicator(int cwrank, int active_node_count){ 79 | #if defined(PARALLEL) 80 | static int previous_active_node_count = -1; 81 | int this_node_participates = 1; 82 | 83 | if( -1 == previous_active_node_count ){ 84 | previous_active_node_count = active_node_count; 85 | return; 86 | } 87 | 88 | if(active_node_count < previous_active_node_count ){ 89 | if( cwrank >= active_node_count ){ 90 | this_node_participates = 0; 91 | std::cout << "[" << cwrank << "] Shrinking the communicator and staying out of it." << std::endl; 92 | } 93 | MPI_Comm_split(_qtc_mpi_communicator, this_node_participates, cwrank, &_qtc_mpi_communicator); 94 | } 95 | previous_active_node_count = active_node_count; 96 | #endif 97 | return; 98 | } 99 | --------------------------------------------------------------------------------