├── opencl ├── dwt2d │ ├── dwt_cl │ │ ├── common.h │ │ └── dwt_cl.h │ ├── run.sh │ ├── dwt.h │ ├── components.cpp │ ├── components.h │ ├── README │ └── Makefile ├── lud │ ├── ocl │ │ ├── .gitignore │ │ └── makefile │ ├── tools │ │ ├── Makefile │ │ └── README │ ├── run │ ├── README │ └── base │ │ └── lud_base.c ├── hybridsort │ ├── run │ ├── mergesort.h │ ├── README │ ├── Makefile │ └── bucketsort.h ├── heartwall │ ├── run │ ├── util │ │ ├── avi │ │ │ ├── avilib.c │ │ │ └── avilib.h │ │ └── timer │ │ │ └── timer.h │ └── README ├── leukocyte │ ├── meschach_lib │ │ ├── config.cache │ │ ├── confdefs.h │ │ ├── MACHINES │ │ │ ├── TurboC │ │ │ │ ├── filelist │ │ │ │ ├── meschtc.zip │ │ │ │ └── README │ │ │ ├── Cray │ │ │ │ ├── mesch-cray.tar.Z │ │ │ │ ├── patch.3 │ │ │ │ └── patch.2 │ │ │ └── MicroSoft │ │ │ │ └── stewart.zip │ │ ├── mesch12b.tar.gz │ │ ├── rk4.dat │ │ ├── config.log │ │ └── ls.dat │ ├── OpenCL │ │ ├── run │ │ ├── avilib.c │ │ ├── avilib.h │ │ ├── misc_math.h │ │ ├── OpenCL_helper_library.h │ │ ├── track_ellipse_opencl.h │ │ ├── find_ellipse_opencl.h │ │ └── track_ellipse.h │ ├── README │ └── Makefile ├── myocyte │ ├── run │ ├── README │ ├── main.c │ ├── main.h │ └── util │ │ └── num │ │ └── num.h ├── srad │ ├── output │ │ └── .gitignore │ ├── run │ ├── README │ └── kernel │ │ └── Makefile ├── bfs │ ├── run │ ├── readme │ └── README_fpga.md ├── hotspot3D │ ├── README │ ├── run │ └── CL_helper.h ├── nn │ ├── run │ ├── filelist.txt │ ├── nn_kernel_v1.cl │ ├── benchmark_fpga.sh │ ├── nn_kernel_v3.cl │ ├── nn_kernel_v5.cl │ ├── nn_kernel_v0.cl │ ├── Makefile │ ├── README_fpga.md │ ├── ipoint.h │ ├── nn_kernel_v2.cl │ └── nn_kernel_v4.cl ├── nw │ ├── run │ ├── work_group_size.h │ ├── README │ ├── nw_kernel_v1.cl │ └── nw_kernel_v3.cl ├── kmeans │ ├── run │ ├── README │ └── Makefile ├── lavaMD │ ├── run │ ├── README │ ├── makefile │ └── README_fpga.md ├── backprop │ ├── run │ ├── imagenet.c │ ├── Makefile │ ├── README_fpga.md │ └── facetrain.c ├── pathfinder │ ├── run │ ├── pathfinder_common.h │ ├── pathfinder_kernel_v1.cl │ ├── Makefile │ └── pathfinder_kernel_v3.cl ├── gaussian │ ├── run │ ├── Makefile │ └── gettimeofday.h ├── b+tree │ ├── run │ ├── penmp │ ├── kernel │ │ ├── kernel_gpu_opencl_altera_v0.cl │ │ ├── kernel_gpu_opencl_altera_v2.cl │ │ ├── kernel_gpu_opencl_altera_v1.cl │ │ ├── kernel_gpu_opencl_altera_v3.cl │ │ └── kernel_gpu_opencl_altera_v4.cl │ ├── README │ ├── problem_size.h │ ├── README_fpga │ └── main.h ├── cfd │ ├── CLHelper2.cpp │ ├── README │ ├── run │ └── common.h ├── particlefilter │ ├── run │ └── README.txt ├── hotspot │ ├── README │ ├── run │ └── OpenCL_helper_library.h └── streamcluster │ ├── readme │ ├── run │ └── Makefile ├── cuda ├── hybridsort │ ├── run │ ├── mergesort.cuh │ ├── bucketsort.cuh │ ├── README │ └── Makefile ├── nw │ ├── run │ ├── needle.h │ ├── README │ └── Makefile ├── backprop │ ├── run │ └── imagenet.c ├── lavaMD │ ├── run │ └── kernel │ │ └── kernel_gpu_cuda_wrapper.h ├── leukocyte │ ├── meschach_lib │ │ ├── confdefs.h │ │ ├── config.cache │ │ ├── MACHINES │ │ │ ├── TurboC │ │ │ │ ├── filelist │ │ │ │ ├── meschtc.zip │ │ │ │ └── README │ │ │ ├── Cray │ │ │ │ ├── mesch-cray.tar.Z │ │ │ │ ├── patch.3 │ │ │ │ └── patch.2 │ │ │ ├── MicroSoft │ │ │ │ └── stewart.zip │ │ │ └── OS2 │ │ │ │ └── README │ │ ├── mesch12b.tar.gz │ │ ├── rk4.dat │ │ ├── config.log │ │ └── ls.dat │ ├── run │ ├── CUDA │ │ ├── run │ │ ├── avilib.c │ │ ├── avilib.h │ │ ├── misc_math.h │ │ ├── track_ellipse_kernel.h │ │ ├── find_ellipse_kernel.h │ │ └── track_ellipse.h │ ├── README │ ├── Makefile │ └── result.txt ├── myocyte │ ├── run │ ├── main.cu │ ├── timer.c │ ├── README │ ├── define.c │ └── Makefile ├── nn │ ├── run │ ├── filelist_4 │ ├── nn_pbs.sh │ ├── gen_dataset.sh │ ├── Makefile │ └── README ├── srad │ ├── srad_v1 │ │ ├── run │ │ ├── timer.c │ │ ├── include.h │ │ ├── define.c │ │ ├── makefile │ │ ├── extract_kernel.cu │ │ ├── compress_kernel.cu │ │ └── prepare_kernel.cu │ ├── srad_v2 │ │ ├── run │ │ ├── srad.h │ │ ├── Makefile │ │ ├── Makefile_nvidia │ │ └── README │ ├── README │ └── Makefile ├── bfs │ ├── run │ ├── README │ └── Makefile ├── pathfinder │ ├── run │ ├── README │ └── Makefile ├── heartwall │ ├── run │ ├── AVI │ │ ├── avilib.c │ │ ├── avilib.h │ │ └── makefile │ ├── README │ ├── Makefile │ └── setdevice.cu ├── kmeans │ ├── run │ ├── README │ └── Makefile ├── dwt2d │ ├── autorun.sh │ ├── run.sh │ └── README ├── mummergpu │ ├── src │ │ ├── michaelinput.txt │ │ ├── morton.c │ │ └── info.xml │ ├── Makefile │ ├── run │ └── experiments │ │ └── e2e │ │ ├── s_suis.mummergpu-1.0.million_reads.fna.C.stats │ │ ├── s_suis.mummergpu-1.0.million_reads.fna.stats │ │ ├── h_sapiens.mummergpu-1.0.million_reads.fna.C.stats │ │ ├── lmonocytogenes.mummergpu-1.0.million_reads.fna.C.stats │ │ ├── cbriggsae.mummergpu-1.0.half_million_reads.fna.C.stats │ │ ├── h_sapiens.mummergpu-1.0.half_million_reads.fna.C.stats │ │ ├── h_sapiens.mummergpu-1.0.million_reads.fna.stats │ │ ├── lmonocytogenes.mummergpu-1.0.million_reads.fna.stats │ │ ├── cbriggsae.mummergpu-1.0.half_million_reads.fna.stats │ │ ├── h_sapiens.mummergpu-1.0.half_million_reads.fna.stats │ │ ├── s_suis.m.million_reads.fna.C.stats │ │ ├── s_suis.m.million_reads.fna.stats │ │ ├── backup │ │ ├── s_suis.m.million_reads.fna.stats │ │ ├── h_sapiens.m.million_reads.fna.stats │ │ ├── lmonocytogenes.m.million_reads.fna.stats │ │ └── cbriggsae.m.half_million_reads.fna.stats │ │ ├── h_sapiens.m.million_reads.fna.C.stats │ │ ├── lmonocytogenes.m.million_reads.fna.C.stats │ │ ├── s_suis.Tmn.million_reads.fna.stats │ │ ├── s_suis.Tn.million_reads.fna.stats │ │ ├── h_sapiens.m.half_million_reads.fna.C.stats │ │ ├── h_sapiens.m.million_reads.fna.stats │ │ ├── lmonocytogenes.m.million_reads.fna.stats │ │ ├── cbriggsae.m.half_million_reads.fna.C.stats │ │ ├── h_sapiens.Tmn.million_reads.fna.stats │ │ ├── h_sapiens.Tn.half_million_reads.fna.stats │ │ ├── h_sapiens.Tn.million_reads.fna.stats │ │ ├── lmonocytogenes.Tmn.million_reads.fna.stats │ │ ├── lmonocytogenes.Tn.million_reads.fna.stats │ │ ├── cbriggsae.Tn.half_million_reads.fna.stats │ │ ├── cbriggsae.m.half_million_reads.fna.stats │ │ └── cbriggsae.Tmn.half_million_reads.fna.stats ├── huffman │ ├── run │ ├── stdafx.h │ ├── cpuencode.h │ ├── parameters.h │ ├── comparison_helpers.h │ └── cuda_helpers.h ├── streamcluster │ ├── run │ └── Makefile ├── gaussian │ ├── run │ └── Makefile ├── lud │ ├── run │ ├── tools │ │ ├── Makefile │ │ └── README │ ├── Makefile │ ├── cuda │ │ ├── README │ │ └── Makefile │ ├── README │ └── base │ │ └── lud_base.c ├── b+tree │ ├── run │ ├── penmp │ ├── README │ └── main.h ├── particlefilter │ ├── run │ └── Makefile ├── hotspot3D │ ├── run │ └── Makefile ├── hotspot │ ├── README │ ├── run │ └── Makefile └── cfd │ ├── run │ ├── Makefile │ └── README ├── openmp ├── nn │ ├── run │ ├── hurricane_gen │ ├── filelist_4 │ ├── README │ ├── gen_dataset.sh │ └── Makefile ├── backprop │ ├── run │ ├── README │ ├── imagenet.c │ ├── Makefile │ └── facetrain.c ├── leukocyte │ ├── meschach_lib │ │ ├── config.cache │ │ ├── confdefs.h │ │ ├── MACHINES │ │ │ ├── TurboC │ │ │ │ ├── filelist │ │ │ │ ├── meschtc.zip │ │ │ │ └── README │ │ │ ├── Cray │ │ │ │ ├── mesch-cray.tar.Z │ │ │ │ ├── patch.3 │ │ │ │ └── patch.2 │ │ │ └── MicroSoft │ │ │ │ └── stewart.zip │ │ ├── mesch12b.tar.gz │ │ ├── rk4.dat │ │ ├── config.log │ │ └── ls.dat │ ├── run │ ├── OpenMP │ │ ├── README │ │ ├── avilib.c │ │ ├── avilib.h │ │ ├── misc_math.h │ │ ├── track_ellipse.h │ │ └── Makefile │ └── Makefile ├── lavaMD │ ├── run │ └── kernel │ │ └── kernel_cpu.h ├── myocyte │ ├── run │ ├── main.c │ ├── timer.c │ ├── README │ ├── Makefile │ └── define.c ├── bfs │ ├── run │ ├── run_offload │ └── Makefile ├── pathfinder │ ├── run │ ├── README.txt │ └── Makefile ├── lud │ ├── run_offload │ ├── tools │ │ ├── Makefile │ │ └── README │ ├── run │ ├── Makefile │ ├── README │ └── base │ │ └── lud_base.c ├── nw │ ├── run │ ├── run_offload │ └── Makefile ├── srad │ ├── srad_v2 │ │ ├── run │ │ ├── Makefile │ │ └── README │ ├── srad_v1 │ │ ├── run │ │ ├── timer.c │ │ ├── include.h │ │ ├── define.c │ │ └── makefile │ └── Makefile ├── heartwall │ ├── run │ ├── README │ ├── AVI │ │ ├── avilib.c │ │ ├── avilib.h │ │ └── makefile │ └── makefile ├── mummergpu │ ├── src │ │ ├── michaelinput.txt │ │ ├── morton.c │ │ └── info.xml │ ├── NOTES │ ├── Makefile │ ├── run │ └── experiments │ │ └── e2e │ │ ├── s_suis.mummergpu-1.0.million_reads.fna.C.stats │ │ ├── h_sapiens.mummergpu-1.0.million_reads.fna.C.stats │ │ ├── s_suis.mummergpu-1.0.million_reads.fna.stats │ │ ├── h_sapiens.mummergpu-1.0.half_million_reads.fna.C.stats │ │ ├── h_sapiens.mummergpu-1.0.million_reads.fna.stats │ │ ├── lmonocytogenes.mummergpu-1.0.million_reads.fna.C.stats │ │ ├── cbriggsae.mummergpu-1.0.half_million_reads.fna.C.stats │ │ ├── h_sapiens.mummergpu-1.0.half_million_reads.fna.stats │ │ ├── lmonocytogenes.mummergpu-1.0.million_reads.fna.stats │ │ ├── cbriggsae.mummergpu-1.0.half_million_reads.fna.stats │ │ ├── s_suis.m.million_reads.fna.C.stats │ │ ├── s_suis.m.million_reads.fna.stats │ │ ├── s_suis.Tmn.million_reads.fna.stats │ │ ├── s_suis.Tn.million_reads.fna.stats │ │ ├── backup │ │ ├── s_suis.m.million_reads.fna.stats │ │ ├── h_sapiens.m.million_reads.fna.stats │ │ ├── lmonocytogenes.m.million_reads.fna.stats │ │ └── cbriggsae.m.half_million_reads.fna.stats │ │ ├── h_sapiens.m.half_million_reads.fna.C.stats │ │ ├── h_sapiens.m.million_reads.fna.C.stats │ │ ├── h_sapiens.m.million_reads.fna.stats │ │ ├── lmonocytogenes.m.million_reads.fna.C.stats │ │ ├── cbriggsae.m.half_million_reads.fna.C.stats │ │ ├── h_sapiens.Tmn.million_reads.fna.stats │ │ ├── h_sapiens.Tn.half_million_reads.fna.stats │ │ ├── h_sapiens.Tn.million_reads.fna.stats │ │ ├── lmonocytogenes.Tn.million_reads.fna.stats │ │ ├── lmonocytogenes.m.million_reads.fna.stats │ │ ├── cbriggsae.m.half_million_reads.fna.stats │ │ ├── lmonocytogenes.Tmn.million_reads.fna.stats │ │ ├── cbriggsae.Tmn.half_million_reads.fna.stats │ │ └── cbriggsae.Tn.half_million_reads.fna.stats ├── particlefilter │ ├── run │ └── Makefile ├── streamcluster │ ├── run │ └── Makefile ├── cfd │ ├── run_offload │ ├── run │ └── README ├── b+tree │ ├── penmp │ ├── run │ └── main.h ├── kmeans │ ├── run │ ├── Makefile │ ├── kmeans_openmp │ │ ├── README │ │ └── Makefile │ └── kmeans_serial │ │ ├── README │ │ └── Makefile ├── hotspot │ ├── run_offload │ ├── run │ ├── Makefile │ └── README └── hotspot3D │ ├── run │ └── Makefile ├── others └── rng │ └── rng │ ├── html │ ├── doxygen.png │ ├── tab_b.gif │ ├── tab_l.gif │ ├── tab_r.gif │ └── search │ │ ├── close.png │ │ ├── search.png │ │ └── nomatches.html │ └── latex │ ├── files.tex │ └── Makefile ├── common ├── make_email.sh └── set_bsp.bat └── .gitignore /opencl/dwt2d/dwt_cl/common.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cuda/hybridsort/run: -------------------------------------------------------------------------------- 1 | ./hybridsort r -------------------------------------------------------------------------------- /cuda/nw/run: -------------------------------------------------------------------------------- 1 | ./needle 23040 10 2 | -------------------------------------------------------------------------------- /opencl/lud/ocl/.gitignore: -------------------------------------------------------------------------------- 1 | lud 2 | -------------------------------------------------------------------------------- /cuda/backprop/run: -------------------------------------------------------------------------------- 1 | ./backprop 65536 2 | -------------------------------------------------------------------------------- /cuda/lavaMD/run: -------------------------------------------------------------------------------- 1 | ./lavaMD -boxes1d 10 -------------------------------------------------------------------------------- /opencl/hybridsort/run: -------------------------------------------------------------------------------- 1 | ./hybridsort r -------------------------------------------------------------------------------- /openmp/nn/run: -------------------------------------------------------------------------------- 1 | ./nn filelist_4 5 30 90 -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/confdefs.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/config.cache: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cuda/myocyte/run: -------------------------------------------------------------------------------- 1 | ./myocyte.out 100 1 0 2 | -------------------------------------------------------------------------------- /opencl/heartwall/run: -------------------------------------------------------------------------------- 1 | ./heartwall 20 2 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/config.cache: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openmp/backprop/run: -------------------------------------------------------------------------------- 1 | ./backprop 65536 2 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/config.cache: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cuda/nn/run: -------------------------------------------------------------------------------- 1 | ./nn filelist_4 -r 5 -lat 30 -lng 90 -------------------------------------------------------------------------------- /cuda/srad/srad_v1/run: -------------------------------------------------------------------------------- 1 | ./srad 100 0.5 8000 8000 -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/confdefs.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /opencl/myocyte/run: -------------------------------------------------------------------------------- 1 | ./myocyte.out -time 100 2 | -------------------------------------------------------------------------------- /opencl/srad/output/.gitignore: -------------------------------------------------------------------------------- 1 | image_out.pgm 2 | -------------------------------------------------------------------------------- /openmp/lavaMD/run: -------------------------------------------------------------------------------- 1 | ./lavaMD -cores 4 -boxes1d 10 -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/confdefs.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /openmp/myocyte/run: -------------------------------------------------------------------------------- 1 | ./myocyte.out 100 1 0 4 2 | -------------------------------------------------------------------------------- /cuda/bfs/run: -------------------------------------------------------------------------------- 1 | ./bfs ../../data/bfs/graph1MW_6.txt 2 | -------------------------------------------------------------------------------- /cuda/pathfinder/run: -------------------------------------------------------------------------------- 1 | ./pathfinder 1000000 1000 15 2 | -------------------------------------------------------------------------------- /opencl/bfs/run: -------------------------------------------------------------------------------- 1 | ./bfs ../../data/bfs/graph1MW_6.txt -------------------------------------------------------------------------------- /opencl/hotspot3D/README: -------------------------------------------------------------------------------- 1 | USAGE: 2 | make clean 3 | make -------------------------------------------------------------------------------- /openmp/bfs/run: -------------------------------------------------------------------------------- 1 | ./bfs 4 ../../data/bfs/graph1MW_6.txt -------------------------------------------------------------------------------- /openmp/pathfinder/run: -------------------------------------------------------------------------------- 1 | ./pathfinder 1000000 1000 2 | -------------------------------------------------------------------------------- /cuda/srad/srad_v2/run: -------------------------------------------------------------------------------- 1 | ./srad 2048 2048 0 127 0 127 0.5 2 -------------------------------------------------------------------------------- /opencl/nn/run: -------------------------------------------------------------------------------- 1 | ./nn filelist.txt -r 5 -lat 30 -lng 90 2 | -------------------------------------------------------------------------------- /opencl/nw/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | ./nw 23040 10 $version 3 | -------------------------------------------------------------------------------- /openmp/lud/run_offload: -------------------------------------------------------------------------------- 1 | ./omp/lud_omp_offload -s 8000 2 | -------------------------------------------------------------------------------- /openmp/nw/run: -------------------------------------------------------------------------------- 1 | threads=$1 2 | ./needle 23040 10 $threads -------------------------------------------------------------------------------- /openmp/nw/run_offload: -------------------------------------------------------------------------------- 1 | ./needle_offload 16384 10 16 2 | -------------------------------------------------------------------------------- /cuda/heartwall/run: -------------------------------------------------------------------------------- 1 | ./heartwall ../../data/heartwall/test.avi 20 -------------------------------------------------------------------------------- /cuda/kmeans/run: -------------------------------------------------------------------------------- 1 | ./kmeans -o -i ../../data/kmeans/kdd_cup 2 | -------------------------------------------------------------------------------- /opencl/kmeans/run: -------------------------------------------------------------------------------- 1 | ./kmeans -o -i ../../data/kmeans/kdd_cup 2 | -------------------------------------------------------------------------------- /opencl/lavaMD/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | ./lavaMD -boxes1d 10 $version -------------------------------------------------------------------------------- /openmp/srad/srad_v2/run: -------------------------------------------------------------------------------- 1 | ./srad 2048 2048 0 127 0 127 2 0.5 2 -------------------------------------------------------------------------------- /opencl/backprop/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | ./backprop 12800000 $version 3 | -------------------------------------------------------------------------------- /opencl/srad/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | ./srad 100 0.5 8000 8000 $version 3 | -------------------------------------------------------------------------------- /cuda/dwt2d/autorun.sh: -------------------------------------------------------------------------------- 1 | ./dwt RGB_color_solid_cube_1.bmp -d 1024x1024 -f -5 -------------------------------------------------------------------------------- /cuda/mummergpu/src/michaelinput.txt: -------------------------------------------------------------------------------- 1 | ~mwb7w/cs6501/project/mummergpu/data/ -------------------------------------------------------------------------------- /openmp/bfs/run_offload: -------------------------------------------------------------------------------- 1 | ./bfs_offload 4 ../../data/bfs/graph1MW_6.txt 2 | -------------------------------------------------------------------------------- /openmp/heartwall/run: -------------------------------------------------------------------------------- 1 | ./heartwall ../../data/heartwall/test.avi 20 4 2 | -------------------------------------------------------------------------------- /cuda/huffman/run: -------------------------------------------------------------------------------- 1 | ./pavle ../../data/huffman/test1024_H2.206587175259.in 2 | -------------------------------------------------------------------------------- /cuda/leukocyte/run: -------------------------------------------------------------------------------- 1 | ./CUDA/leukocyte ../../data/leukocyte/testfile.avi 5 2 | -------------------------------------------------------------------------------- /cuda/streamcluster/run: -------------------------------------------------------------------------------- 1 | ./sc_gpu 10 20 256 65536 65536 1000 none output.txt 1 -------------------------------------------------------------------------------- /opencl/pathfinder/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | ./pathfinder 1000000 1000 32 $version 3 | -------------------------------------------------------------------------------- /openmp/leukocyte/run: -------------------------------------------------------------------------------- 1 | ./OpenMP/leukocyte 5 4 ../../data/leukocyte/testfile.avi -------------------------------------------------------------------------------- /openmp/mummergpu/src/michaelinput.txt: -------------------------------------------------------------------------------- 1 | ~mwb7w/cs6501/project/mummergpu/data/ -------------------------------------------------------------------------------- /openmp/particlefilter/run: -------------------------------------------------------------------------------- 1 | ./particle_filter -x 128 -y 128 -z 10 -np 10000 2 | -------------------------------------------------------------------------------- /openmp/srad/srad_v1/run: -------------------------------------------------------------------------------- 1 | threads=$1 2 | ./srad 100 0.5 8000 8000 $threads 3 | -------------------------------------------------------------------------------- /openmp/streamcluster/run: -------------------------------------------------------------------------------- 1 | ./sc_omp 10 20 256 65536 65536 1000 none output.txt 4 -------------------------------------------------------------------------------- /cuda/gaussian/run: -------------------------------------------------------------------------------- 1 | ./gaussian -f ../../data/gaussian/matrix4.txt 2 | ./gaussian -s 16 -------------------------------------------------------------------------------- /cuda/leukocyte/CUDA/run: -------------------------------------------------------------------------------- 1 | ./leukocyte ../../../data/leukocyte/testfile.avi 10 2 | -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/run: -------------------------------------------------------------------------------- 1 | ./leukocyte ../../../data/leukocyte/testfile.avi 10 2 | -------------------------------------------------------------------------------- /openmp/cfd/run_offload: -------------------------------------------------------------------------------- 1 | ./euler3d_cpu_offload ../../data/cfd/fvcorr.domn.193K 2 | -------------------------------------------------------------------------------- /opencl/gaussian/run: -------------------------------------------------------------------------------- 1 | #./gaussian -f ../../data/gaussian/matrix4.txt 2 | ./gaussian -s 256 -------------------------------------------------------------------------------- /cuda/lud/run: -------------------------------------------------------------------------------- 1 | #./cuda/lud_cuda -i ../../data/lud/2048.dat 2 | ./cuda/lud_cuda -s 11520 3 | -------------------------------------------------------------------------------- /cuda/lud/tools/Makefile: -------------------------------------------------------------------------------- 1 | gen_input: gen_input.c 2 | gcc -o gen_input -O3 gen_input.c -fopenmp 3 | -------------------------------------------------------------------------------- /cuda/b+tree/run: -------------------------------------------------------------------------------- 1 | ./b+tree.out file ../../data/b+tree/mil.txt command ../../data/b+tree/command.txt 2 | -------------------------------------------------------------------------------- /cuda/leukocyte/README: -------------------------------------------------------------------------------- 1 | ######OUTPUT FOR VALIDATION######## 2 | USAGE: 3 | make clean 4 | make OUTPUT=Y -------------------------------------------------------------------------------- /opencl/b+tree/run: -------------------------------------------------------------------------------- 1 | ./b+tree.out file ../../data/b+tree/mil.txt command ../../data/b+tree/command.txt 2 | -------------------------------------------------------------------------------- /opencl/lud/tools/Makefile: -------------------------------------------------------------------------------- 1 | gen_input: gen_input.c 2 | gcc -o gen_input -O3 gen_input.c -fopenmp 3 | -------------------------------------------------------------------------------- /openmp/backprop/README: -------------------------------------------------------------------------------- 1 | To change the number of OMP threads, 2 | please modify NUM_THREAD in backprop.h -------------------------------------------------------------------------------- /openmp/heartwall/README: -------------------------------------------------------------------------------- 1 | ######OUTPUT FOR VALIDATION######## 2 | USAGE: 3 | make clean 4 | make OUTPUT=Y -------------------------------------------------------------------------------- /openmp/lud/tools/Makefile: -------------------------------------------------------------------------------- 1 | gen_input: gen_input.c 2 | gcc -o gen_input -O3 gen_input.c -fopenmp 3 | -------------------------------------------------------------------------------- /cuda/b+tree/penmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/b+tree/penmp -------------------------------------------------------------------------------- /opencl/lud/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | #./lud -i ../../data/lud/2048.dat $version 3 | ./lud -s 11520 $version 4 | -------------------------------------------------------------------------------- /cuda/dwt2d/run.sh: -------------------------------------------------------------------------------- 1 | ./dwt2d 192.bmp -d 192x192 -f -5 -l 3 2 | ls 3 | ./dwt2d rgb.bmp -d 1024x1024 -f -5 -l 3 4 | -------------------------------------------------------------------------------- /cuda/myocyte/main.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/myocyte/main.cu -------------------------------------------------------------------------------- /opencl/b+tree/penmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/b+tree/penmp -------------------------------------------------------------------------------- /opencl/dwt2d/run.sh: -------------------------------------------------------------------------------- 1 | ./dwt2d 192.bmp -d 192x192 -f -5 -l 3 2 | ls 3 | ./dwt2d rgb.bmp -d 1024x1024 -f -5 -l 3 4 | -------------------------------------------------------------------------------- /opencl/myocyte/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/myocyte/README -------------------------------------------------------------------------------- /opencl/myocyte/main.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/myocyte/main.c -------------------------------------------------------------------------------- /openmp/b+tree/penmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/b+tree/penmp -------------------------------------------------------------------------------- /openmp/b+tree/run: -------------------------------------------------------------------------------- 1 | ./b+tree.out core 2 file ../../data/b+tree/mil.txt command ../../data/b+tree/command.txt 2 | -------------------------------------------------------------------------------- /openmp/leukocyte/OpenMP/README: -------------------------------------------------------------------------------- 1 | ######OUTPUT FOR VALIDATION######## 2 | USAGE: 3 | make clean 4 | make OUTPUT=Y -------------------------------------------------------------------------------- /openmp/mummergpu/NOTES: -------------------------------------------------------------------------------- 1 | The OMP version was developed by adding pragmas to the original mummergpu implementation. -------------------------------------------------------------------------------- /openmp/myocyte/main.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/myocyte/main.c -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/TurboC/filelist: -------------------------------------------------------------------------------- 1 | README 2 | filelist 3 | machine.h 4 | mail 5 | meschach.mak 6 | -------------------------------------------------------------------------------- /cuda/lud/Makefile: -------------------------------------------------------------------------------- 1 | all: lud_cuda 2 | 3 | lud_cuda: 4 | cd cuda; make 5 | 6 | clean: 7 | cd cuda; make clean 8 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/TurboC/filelist: -------------------------------------------------------------------------------- 1 | README 2 | filelist 3 | machine.h 4 | mail 5 | meschach.mak 6 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/TurboC/filelist: -------------------------------------------------------------------------------- 1 | README 2 | filelist 3 | machine.h 4 | mail 5 | meschach.mak 6 | -------------------------------------------------------------------------------- /openmp/nn/hurricane_gen: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/nn/hurricane_gen -------------------------------------------------------------------------------- /cuda/mummergpu/Makefile: -------------------------------------------------------------------------------- 1 | all: mummer 2 | 3 | mummer: 4 | cd src; make 5 | 6 | clean: 7 | cd src; make clean 8 | 9 | -------------------------------------------------------------------------------- /openmp/lud/run: -------------------------------------------------------------------------------- 1 | threads=$1 2 | #./omp/lud_omp -n $threads -i ../../data/lud/2048.dat 3 | ./omp/lud_omp -s 11520 -n $threads -------------------------------------------------------------------------------- /openmp/mummergpu/Makefile: -------------------------------------------------------------------------------- 1 | all: mummer 2 | 3 | mummer: 4 | cd src; make 5 | 6 | clean: 7 | cd src; make clean 8 | 9 | -------------------------------------------------------------------------------- /cuda/heartwall/AVI/avilib.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/heartwall/AVI/avilib.c -------------------------------------------------------------------------------- /cuda/heartwall/AVI/avilib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/heartwall/AVI/avilib.h -------------------------------------------------------------------------------- /cuda/leukocyte/CUDA/avilib.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/leukocyte/CUDA/avilib.c -------------------------------------------------------------------------------- /cuda/leukocyte/CUDA/avilib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/leukocyte/CUDA/avilib.h -------------------------------------------------------------------------------- /openmp/cfd/run: -------------------------------------------------------------------------------- 1 | ./euler3d_cpu ../../data/cfd/fvcorr.domn.097K 1000 2 | ./pre_euler3d_cpu ../../data/cfd/fvcorr.domn.097K 1000 3 | -------------------------------------------------------------------------------- /opencl/b+tree/kernel/kernel_gpu_opencl_altera_v0.cl: -------------------------------------------------------------------------------- 1 | #include "kernel_gpu_opencl_v0.cl" 2 | #include "kernel_gpu_opencl_2_v0.cl" 3 | -------------------------------------------------------------------------------- /opencl/b+tree/kernel/kernel_gpu_opencl_altera_v2.cl: -------------------------------------------------------------------------------- 1 | #include "kernel_gpu_opencl_v2.cl" 2 | #include "kernel_gpu_opencl_2_v2.cl" 3 | -------------------------------------------------------------------------------- /openmp/heartwall/AVI/avilib.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/heartwall/AVI/avilib.c -------------------------------------------------------------------------------- /openmp/heartwall/AVI/avilib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/heartwall/AVI/avilib.h -------------------------------------------------------------------------------- /openmp/kmeans/run: -------------------------------------------------------------------------------- 1 | ./kmeans_serial/kmeans -i ../../data/kmeans/kdd_cup 2 | ./kmeans_openmp/kmeans -n 4 -i ../../data/kmeans/kdd_cup -------------------------------------------------------------------------------- /others/rng/rng/html/doxygen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/others/rng/rng/html/doxygen.png -------------------------------------------------------------------------------- /others/rng/rng/html/tab_b.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/others/rng/rng/html/tab_b.gif -------------------------------------------------------------------------------- /others/rng/rng/html/tab_l.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/others/rng/rng/html/tab_l.gif -------------------------------------------------------------------------------- /others/rng/rng/html/tab_r.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/others/rng/rng/html/tab_r.gif -------------------------------------------------------------------------------- /cuda/nn/filelist_4: -------------------------------------------------------------------------------- 1 | ../../data/nn/cane4_0.db 2 | ../../data/nn/cane4_1.db 3 | ../../data/nn/cane4_2.db 4 | ../../data/nn/cane4_3.db 5 | -------------------------------------------------------------------------------- /cuda/particlefilter/run: -------------------------------------------------------------------------------- 1 | ./particlefilter_naive -x 128 -y 128 -z 10 -np 1000 2 | ./particlefilter_float -x 128 -y 128 -z 10 -np 1000 3 | -------------------------------------------------------------------------------- /opencl/b+tree/kernel/kernel_gpu_opencl_altera_v1.cl: -------------------------------------------------------------------------------- 1 | #include "kernel_gpu_opencl_v1.cl" 2 | #include "kernel_gpu_opencl_2_v1.cl" 3 | 4 | -------------------------------------------------------------------------------- /opencl/b+tree/kernel/kernel_gpu_opencl_altera_v3.cl: -------------------------------------------------------------------------------- 1 | #include "kernel_gpu_opencl_v3.cl" 2 | #include "kernel_gpu_opencl_2_v3.cl" 3 | 4 | -------------------------------------------------------------------------------- /opencl/b+tree/kernel/kernel_gpu_opencl_altera_v4.cl: -------------------------------------------------------------------------------- 1 | #include "kernel_gpu_opencl_v4.cl" 2 | #include "kernel_gpu_opencl_2_v4.cl" 3 | 4 | -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/avilib.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/leukocyte/OpenCL/avilib.c -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/avilib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/leukocyte/OpenCL/avilib.h -------------------------------------------------------------------------------- /openmp/hotspot/run_offload: -------------------------------------------------------------------------------- 1 | ./hotspot_offload 1024 1024 10000 16 ../../data/hotspot/temp_1024 ../../data/hotspot/power_1024 output.out 2 | -------------------------------------------------------------------------------- /openmp/leukocyte/OpenMP/avilib.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/leukocyte/OpenMP/avilib.c -------------------------------------------------------------------------------- /openmp/leukocyte/OpenMP/avilib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/leukocyte/OpenMP/avilib.h -------------------------------------------------------------------------------- /openmp/nn/filelist_4: -------------------------------------------------------------------------------- 1 | ../../data/nn/cane4_0.db 2 | ../../data/nn/cane4_1.db 3 | ../../data/nn/cane4_2.db 4 | ../../data/nn/cane4_3.db 5 | -------------------------------------------------------------------------------- /opencl/heartwall/util/avi/avilib.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/heartwall/util/avi/avilib.c -------------------------------------------------------------------------------- /opencl/heartwall/util/avi/avilib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/heartwall/util/avi/avilib.h -------------------------------------------------------------------------------- /opencl/nn/filelist.txt: -------------------------------------------------------------------------------- 1 | ../../data/nn/cane4_0.db 2 | ../../data/nn/cane4_1.db 3 | ../../data/nn/cane4_2.db 4 | ../../data/nn/cane4_3.db 5 | 6 | -------------------------------------------------------------------------------- /opencl/pathfinder/pathfinder_common.h: -------------------------------------------------------------------------------- 1 | #ifndef BSIZE 2 | #define BSIZE 256 3 | #endif 4 | 5 | #ifndef SSIZE 6 | #define SSIZE 16 7 | #endif -------------------------------------------------------------------------------- /others/rng/rng/html/search/close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/others/rng/rng/html/search/close.png -------------------------------------------------------------------------------- /others/rng/rng/html/search/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/others/rng/rng/html/search/search.png -------------------------------------------------------------------------------- /cuda/hotspot3D/run: -------------------------------------------------------------------------------- 1 | #./hotspot3D 512 512 8 1000 ../../data/hotspot3D/power_512x8 ../../data/hotspot3D/temp_512x8 output.txt 2 | ./hotspot3D 960 960 100 100 3 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/mesch12b.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/leukocyte/meschach_lib/mesch12b.tar.gz -------------------------------------------------------------------------------- /opencl/leukocyte/README: -------------------------------------------------------------------------------- 1 | To run the program: 2 | cd OpenCL 3 | ./run 4 | 5 | ######OUTPUT FOR VALIDATION######## 6 | USAGE: 7 | make clean 8 | make OUTPUT=Y 9 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/mesch12b.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/leukocyte/meschach_lib/mesch12b.tar.gz -------------------------------------------------------------------------------- /openmp/hotspot3D/run: -------------------------------------------------------------------------------- 1 | #./hotspot3D 512 512 8 1000 ../../data/hotspot3D/power_512x8 ../../data/hotspot3D/temp_512x8 output.txt 2 | ./hotspot3D 960 960 100 100 3 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/mesch12b.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/leukocyte/meschach_lib/mesch12b.tar.gz -------------------------------------------------------------------------------- /cuda/leukocyte/Makefile: -------------------------------------------------------------------------------- 1 | all: CUDA/leukocyte 2 | 3 | CUDA/leukocyte: 4 | cd CUDA; make 5 | 6 | clean: 7 | cd CUDA; make clean 8 | rm -f meschach_lib/meschach.a 9 | -------------------------------------------------------------------------------- /openmp/pathfinder/README.txt: -------------------------------------------------------------------------------- 1 | To compile: 2 | make 3 | 4 | To execute: 5 | 6 | pathfiner width number_of_steps 7 | typical command: ./pathfinder 100000 100 > out 8 | -------------------------------------------------------------------------------- /cuda/pathfinder/README: -------------------------------------------------------------------------------- 1 | To compile the program: 2 | 3 | nvcc -cuda dynproc.cu 4 | nvcc -o dynproc dynproc.cu.cpp 5 | 6 | Usage: dynproc row_len col_len pyramid_height 7 | -------------------------------------------------------------------------------- /opencl/leukocyte/Makefile: -------------------------------------------------------------------------------- 1 | all: OpenCL/leukocyte 2 | 3 | OpenCL/leukocyte: 4 | cd OpenCL; make 5 | 6 | clean: 7 | cd OpenCL; make clean 8 | rm -f meschach_lib/meschach.a 9 | -------------------------------------------------------------------------------- /openmp/leukocyte/Makefile: -------------------------------------------------------------------------------- 1 | all: OpenMP/leukocyte 2 | 3 | OpenMP/leukocyte: 4 | cd OpenMP; make 5 | 6 | clean: 7 | cd OpenMP; make clean 8 | rm -f meschach_lib/meschach.a 9 | -------------------------------------------------------------------------------- /cuda/b+tree/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | The kernel 1 & 2: RD_WG_SIZE_0_0 RD_WG_SIZE_0 3 | 4 | USAGE: 5 | make clean 6 | make KERNEL_DIM="-DRD_WG_SIZE_0=256" 7 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/TurboC/meschtc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/leukocyte/meschach_lib/MACHINES/TurboC/meschtc.zip -------------------------------------------------------------------------------- /cuda/mummergpu/run: -------------------------------------------------------------------------------- 1 | usage: mummergpu [options] reference.fna query.fna 2 | 3 | bin/mummergpu ../../data/mummergpu/NC_003997.fna ../../data/mummergpu/NC_003997_q100bp.fna > NC_00399.out -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/Cray/mesch-cray.tar.Z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/leukocyte/meschach_lib/MACHINES/Cray/mesch-cray.tar.Z -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/MicroSoft/stewart.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/cuda/leukocyte/meschach_lib/MACHINES/MicroSoft/stewart.zip -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/TurboC/meschtc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/leukocyte/meschach_lib/MACHINES/TurboC/meschtc.zip -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/TurboC/meschtc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/leukocyte/meschach_lib/MACHINES/TurboC/meschtc.zip -------------------------------------------------------------------------------- /openmp/srad/srad_v2/Makefile: -------------------------------------------------------------------------------- 1 | # C compiler 2 | CC = g++ 3 | CC_FLAGS = -g -fopenmp -O2 4 | 5 | bfs: 6 | $(CC) $(CC_FLAGS) srad.cpp -o srad 7 | 8 | clean: 9 | rm -f srad 10 | -------------------------------------------------------------------------------- /cuda/bfs/README: -------------------------------------------------------------------------------- 1 | The original BFS CUDA code was obtained from Pawan Harish and P. J. Narayanan at IIIT, 2 | who have given us permission to include it as part of Rodinia under Rodinia's license. -------------------------------------------------------------------------------- /opencl/hotspot3D/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | #./hotspot3D 512 512 8 1000 ../../data/hotspot3D/power_512x8 ../../data/hotspot3D/temp_512x8 output.txt $version 3 | ./hotspot3D 960 960 100 100 $version 4 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/Cray/mesch-cray.tar.Z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/leukocyte/meschach_lib/MACHINES/Cray/mesch-cray.tar.Z -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/MicroSoft/stewart.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/opencl/leukocyte/meschach_lib/MACHINES/MicroSoft/stewart.zip -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/Cray/mesch-cray.tar.Z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/leukocyte/meschach_lib/MACHINES/Cray/mesch-cray.tar.Z -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/MicroSoft/stewart.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpga-opencl-benchmarks/rodinia_fpga/HEAD/openmp/leukocyte/meschach_lib/MACHINES/MicroSoft/stewart.zip -------------------------------------------------------------------------------- /common/make_email.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | EMAIL=$1 4 | shift 5 | OUT=$1 6 | shift 7 | 8 | { 9 | date 10 | make $* 11 | date 12 | } 2>&1 | tee $OUT | mailx -s "make $*" $EMAIL 13 | 14 | -------------------------------------------------------------------------------- /cuda/heartwall/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | USAGE: 3 | make clean 4 | make KERNEL_DIM="-DRD_WG_SIZE_0=256" 5 | 6 | ######OUTPUT FOR VALIDATION######## 7 | USAGE: 8 | make clean 9 | make OUTPUT=Y -------------------------------------------------------------------------------- /openmp/lud/Makefile: -------------------------------------------------------------------------------- 1 | all: lud_omp 2 | 3 | lud_omp: 4 | cd omp; make 5 | 6 | lud_omp_offload: 7 | cd omp; make -f Makefile.offload 8 | 9 | clean: 10 | cd omp; make clean ; make -f Makefile.offload clean 11 | -------------------------------------------------------------------------------- /opencl/cfd/CLHelper2.cpp: -------------------------------------------------------------------------------- 1 | #include "CLHelper2.h" 2 | 3 | cl_context context; 4 | cl_command_queue cmd_queue; 5 | cl_device_type device_type; 6 | cl_device_id * device_list; 7 | cl_int num_devices; 8 | 9 | -------------------------------------------------------------------------------- /opencl/heartwall/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | USAGE: 3 | make clean 4 | make KERNEL_DIM="-DRD_WG_SIZE_0=256" 5 | 6 | ######OUTPUT FOR VALIDATION######## 7 | USAGE: 8 | make clean 9 | make OUTPUT=Y -------------------------------------------------------------------------------- /cuda/nn/nn_pbs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #PBS -l nodes=2:ppn=1 3 | #PBS -o nn_out 4 | #PBS -j oe 5 | #PBS -M mag3dn@virginia.edu 6 | 7 | export OMP_NUM_THREADS=8 8 | cd Desktop/upbench/nn_openMP 9 | ./nn filelist_4 3 30 90 10 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/rk4.dat: -------------------------------------------------------------------------------- 1 | # No. of a problem 2 | 1 3 | # Initial time 4 | 0 5 | # Final time 6 | 1 7 | # Solution is x(t) = (cos(t),-sin(t)) 8 | # x(0) = 9 | Vector: dim: 2 10 | 1 0 11 | # Step size 12 | 0.1 13 | -------------------------------------------------------------------------------- /opencl/particlefilter/run: -------------------------------------------------------------------------------- 1 | #./OCL_particlefilter_naive -x 128 -y 128 -z 10 -np 10000 2 | ./OCL_particlefilter_double -x 128 -y 128 -z 10 -np 400000 3 | ./OCL_particlefilter_single -x 128 -y 128 -z 10 -np 400000 4 | cat ./output.txt 5 | -------------------------------------------------------------------------------- /opencl/srad/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | The kernel has square shape 3 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe the total number of threads 4 | 5 | USAGE: 6 | make clean 7 | make KERNEL_DIM="-DRD_WG_SIZE_0=256" -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/rk4.dat: -------------------------------------------------------------------------------- 1 | # No. of a problem 2 | 1 3 | # Initial time 4 | 0 5 | # Final time 6 | 1 7 | # Solution is x(t) = (cos(t),-sin(t)) 8 | # x(0) = 9 | Vector: dim: 2 10 | 1 0 11 | # Step size 12 | 0.1 13 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/rk4.dat: -------------------------------------------------------------------------------- 1 | # No. of a problem 2 | 1 3 | # Initial time 4 | 0 5 | # Final time 6 | 1 7 | # Solution is x(t) = (cos(t),-sin(t)) 8 | # x(0) = 9 | Vector: dim: 2 10 | 1 0 11 | # Step size 12 | 0.1 13 | -------------------------------------------------------------------------------- /cuda/srad/README: -------------------------------------------------------------------------------- 1 | SRAD_v1 processes a real image while SRAD_v2 randomizes the inputs. 2 | SRAD_v1 puts more computation on the GPU, e.g. initializations and reductions. 3 | Some of SRAD_v2 kernels takes more advantage of GPU's scratchpad memory. -------------------------------------------------------------------------------- /opencl/b+tree/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | The kernel 1: RD_WG_SIZE_0_0 RD_WG_SIZE_0 3 | The kernel 2: RD_WG_SIZE_1_0 RD_WG_SIZE_1 4 | 5 | USAGE: 6 | make clean 7 | make KERNEL_DIM="-DRD_WG_SIZE_0=256 -DRD_WG_SIZE_1=256" 8 | -------------------------------------------------------------------------------- /opencl/lavaMD/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 3 | 4 | USAGE: 5 | make clean 6 | make KERNEL_DIM="-DRD_WG_SIZE_0=128" 7 | 8 | ######OUTPUT FOR VALIDATION######## 9 | USAGE: 10 | make clean 11 | make OUTPUT=Y -------------------------------------------------------------------------------- /openmp/kmeans/Makefile: -------------------------------------------------------------------------------- 1 | all: OPENMP/kmeans SERIAL/kmeans 2 | 3 | OPENMP/kmeans: 4 | cd kmeans_openmp; make 5 | 6 | SERIAL/kmeans: 7 | cd kmeans_serial; make 8 | 9 | clean: 10 | cd kmeans_serial; make clean 11 | cd kmeans_openmp; make clean 12 | 13 | -------------------------------------------------------------------------------- /openmp/nn/README: -------------------------------------------------------------------------------- 1 | To build and run nearest neighbor: 2 | make nn 3 | ./nn filelist_4 3 30 90 4 | 5 | To generate new data sets: 6 | Edit gen_dataset.sh and select the size of the desired data set 7 | make hurricane_gen 8 | ./hurricane_gen 9 | -------------------------------------------------------------------------------- /cuda/hotspot/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | The kernel has square shape 3 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension 4 | The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 5 | 6 | USAGE: 7 | make clean 8 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" -------------------------------------------------------------------------------- /cuda/huffman/stdafx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "cutil.h" 12 | -------------------------------------------------------------------------------- /cuda/myocyte/timer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Returns the current system time in microseconds 5 | long long get_time() { 6 | struct timeval tv; 7 | gettimeofday(&tv, NULL); 8 | return (tv.tv_sec * 1000000) + tv.tv_usec; 9 | } 10 | -------------------------------------------------------------------------------- /openmp/myocyte/timer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Returns the current system time in microseconds 5 | long long get_time() { 6 | struct timeval tv; 7 | gettimeofday(&tv, NULL); 8 | return (tv.tv_sec * 1000000) + tv.tv_usec; 9 | } 10 | -------------------------------------------------------------------------------- /openmp/pathfinder/Makefile: -------------------------------------------------------------------------------- 1 | SRC = pathfinder.cpp 2 | EXE = pathfinder 3 | FLAGS = -fopenmp -O3 -lrt 4 | 5 | release: 6 | $(CXX) $(SRC) $(FLAGS) -o $(EXE) 7 | 8 | debug: 9 | $(CXX) $(SRC) -g -Wall -o $(EXE) 10 | 11 | clean: 12 | rm -f pathfinder 13 | 14 | 15 | -------------------------------------------------------------------------------- /cuda/srad/srad_v1/timer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Returns the current system time in microseconds 5 | long long get_time() { 6 | struct timeval tv; 7 | gettimeofday(&tv, NULL); 8 | return (tv.tv_sec * 1000000) + tv.tv_usec; 9 | } 10 | -------------------------------------------------------------------------------- /openmp/srad/srad_v1/timer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Returns the current system time in microseconds 5 | long long get_time() { 6 | struct timeval tv; 7 | gettimeofday(&tv, NULL); 8 | return (tv.tv_sec * 1000000) + tv.tv_usec; 9 | } 10 | -------------------------------------------------------------------------------- /opencl/hotspot/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | The kernel has square shape 3 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension 4 | The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 5 | 6 | USAGE: 7 | make clean 8 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" -------------------------------------------------------------------------------- /cuda/nn/gen_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #./hurricane_gen 15690 1 4 | #./hurricane_gen 21380 2 5 | ./hurricane_gen 42760 4 6 | #./hurricane_gen 85520 8 7 | #./hurricane_gen 171040 16 8 | #./hurricane_gen 342080 32 9 | #./hurricane_gen 684160 64 10 | #./hurricane_gen 21893120 2048 11 | -------------------------------------------------------------------------------- /openmp/nn/gen_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #./hurricane_gen 15690 1 4 | #./hurricane_gen 21380 2 5 | ./hurricane_gen 42760 4 6 | #./hurricane_gen 85520 8 7 | #./hurricane_gen 171040 16 8 | #./hurricane_gen 342080 32 9 | #./hurricane_gen 684160 64 10 | #./hurricane_gen 21893120 2048 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/run: -------------------------------------------------------------------------------- 1 | usage: mummergpu [options] reference.fna query.fna (-C run CPU version) 2 | 3 | To change the number of OMP threads, please change N_THREADS in mummergpu_gold.cpp 4 | 5 | 6 | bin/mummergpu -C ../../data/mummergpu/NC_003997.fna ../../data/mummergpu/NC_003997_q100bp.fna > NC_00399.out -------------------------------------------------------------------------------- /cuda/nw/needle.h: -------------------------------------------------------------------------------- 1 | #ifdef RD_WG_SIZE_0_0 2 | #define BLOCK_SIZE RD_WG_SIZE_0_0 3 | #elif defined(RD_WG_SIZE_0) 4 | #define BLOCK_SIZE RD_WG_SIZE_0 5 | #elif defined(RD_WG_SIZE) 6 | #define BLOCK_SIZE RD_WG_SIZE 7 | #else 8 | #define BLOCK_SIZE 16 9 | #endif 10 | //#define TRACE 11 | 12 | -------------------------------------------------------------------------------- /openmp/kmeans/kmeans_openmp/README: -------------------------------------------------------------------------------- 1 | Usage: ./kmeans [switches] -i filename 2 | -i filename : file containing data to be clustered 3 | -b :input file is in binary format 4 | -k : number of clusters (default is 8) 5 | -t threshold : threshold value -------------------------------------------------------------------------------- /cuda/srad/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: SRAD_V1 SRAD_V2 3 | 4 | SRAD_V1: 5 | cd srad_v1; make 6 | 7 | SRAD_V2: 8 | cd srad_v2; make 9 | 10 | clean: SRAD_V1_clean SRAD_V2_clean 11 | 12 | SRAD_V1_clean: 13 | cd srad_v1; make clean 14 | 15 | SRAD_V2_clean: 16 | cd srad_v2; make clean 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /opencl/hotspot/run: -------------------------------------------------------------------------------- 1 | #8192x8192 input files should be generated manually using the input generator. 2 | version=$1 3 | #./hotspot 8192 1 2000 ../../data/hotspot/temp_8192 ../../data/hotspot/power_8192 output.txt $version 4 | ./hotspot 8000 6 100 ../../data/hotspot/temp_8192 ../../data/hotspot/power_8192 $version 5 | -------------------------------------------------------------------------------- /opencl/streamcluster/readme: -------------------------------------------------------------------------------- 1 | 2 | Streamcluster implemented in OpenCL. 3 | 4 | compilation options: 5 | (1) release: normal version; 6 | (2) errmsg: output error message to help debugging; 7 | (3) ptx: generate ptx code; 8 | (4) res: output the usage of resources, like shared memory, etc. 9 | 10 | 11 | -------------------------------------------------------------------------------- /openmp/hotspot3D/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | OUTPUT = output.txt 4 | 5 | 3D: 6 | $(CC) -o hotspot3D $(CFLAGS) hotspot3D.c -lm $(OPENMP_INC) 7 | 8 | verify: 9 | $(CC) -o hotspot3D $(CFLAGS) hotspot3D.c -lm $(OPENMP_INC) -DVERIFY 10 | 11 | clean: 12 | rm -f hotspot3D $(OUTPUT) 13 | -------------------------------------------------------------------------------- /openmp/srad/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: SRAD_V1 SRAD_V2 3 | 4 | SRAD_V1: 5 | cd srad_v1; make 6 | 7 | SRAD_V2: 8 | cd srad_v2; make 9 | 10 | clean: SRAD_V1_clean SRAD_V2_clean 11 | 12 | SRAD_V1_clean: 13 | cd srad_v1; make clean 14 | 15 | SRAD_V2_clean: 16 | cd srad_v2; make clean 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /cuda/huffman/cpuencode.h: -------------------------------------------------------------------------------- 1 | #ifndef _CE_H_ 2 | #define _CE_H_ 3 | 4 | extern "C" 5 | void cpu_vlc_encode(unsigned int* indata, unsigned int num_elements, 6 | unsigned int* outdata, unsigned int *outsize, 7 | unsigned int *codewords, unsigned int* codewordlens); 8 | #endif 9 | 10 | 11 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/config.log: -------------------------------------------------------------------------------- 1 | This file contains any messages produced by compilers while 2 | running configure, to aid debugging if configure makes a mistake. 3 | 4 | configure:575: checking for acc 5 | configure:605: checking for cc 6 | configure:635: checking for gcc 7 | configure:665: checking for cc 8 | -------------------------------------------------------------------------------- /cuda/nn/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | all : nn hurricane_gen 4 | 5 | clean : 6 | $(RM) *.o nn hurricane_gen 7 | 8 | nn : nn_cuda.o 9 | $(CUDA_CC) $(CUDA_FLAGS) $^ -o $@ $(NVML_LIB) 10 | 11 | hurricane_gen : hurricane_gen.o 12 | 13 | #data : 14 | # mkdir data 15 | # ./gen_dataset.sh 16 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/config.log: -------------------------------------------------------------------------------- 1 | This file contains any messages produced by compilers while 2 | running configure, to aid debugging if configure makes a mistake. 3 | 4 | configure:575: checking for acc 5 | configure:605: checking for cc 6 | configure:635: checking for gcc 7 | configure:665: checking for cc 8 | -------------------------------------------------------------------------------- /openmp/hotspot/run: -------------------------------------------------------------------------------- 1 | #8192x8192 input files should be generated manually using the input generator. 2 | threads=$1 3 | #./hotspot 8192 8192 2000 $threads ../../data/hotspot/temp_8192 ../../data/hotspot/power_8192 output.txt 4 | ./hotspot 8000 8000 100 $threads ../../data/hotspot/temp_8192 ../../data/hotspot/power_8192 5 | -------------------------------------------------------------------------------- /openmp/kmeans/kmeans_serial/README: -------------------------------------------------------------------------------- 1 | Usage: ./kmeans [switches] -i filename 2 | -i filename : file containing data to be clustered 3 | -b : input file is in binary format 4 | -k : number of clusters (default is 8) 5 | -t threshold : threshold value -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/config.log: -------------------------------------------------------------------------------- 1 | This file contains any messages produced by compilers while 2 | running configure, to aid debugging if configure makes a mistake. 3 | 4 | configure:575: checking for acc 5 | configure:605: checking for cc 6 | configure:635: checking for gcc 7 | configure:665: checking for cc 8 | -------------------------------------------------------------------------------- /cuda/gaussian/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | CC := $(CUDA_DIR)/bin/nvcc 4 | 5 | INCLUDE := $(CUDA_DIR)/include 6 | 7 | SRC = gaussian.cu 8 | EXE = gaussian 9 | 10 | release: $(SRC) 11 | $(CC) $(KERNEL_DIM) $(SRC) -o $(EXE) -I$(INCLUDE) -L$(CUDA_LIB_DIR) 12 | 13 | clean: 14 | rm gaussian 15 | -------------------------------------------------------------------------------- /cuda/hotspot/run: -------------------------------------------------------------------------------- 1 | #8192x8192 input files should be generated manually using the input generator. 2 | #pyramid size of 3 for K20 and 2 for 980Ti 3 | #./hotspot 8192 3 1000 ../../data/hotspot/temp_8192 ../../data/hotspot/power_8192 output.txt 4 | ./hotspot 8000 3 100 ../../data/hotspot/temp_8192 ../../data/hotspot/power_8192 5 | -------------------------------------------------------------------------------- /openmp/bfs/Makefile: -------------------------------------------------------------------------------- 1 | # C compiler 2 | CXX = g++ 3 | ICC = icc 4 | CXX_FLAGS = -g -fopenmp -O3 -lrt 5 | 6 | bfs: bfs.cpp 7 | $(CXX) $(CXX_FLAGS) bfs.cpp -o bfs 8 | 9 | bfs_offload: bfs.cpp 10 | $(ICC) $(CXX_FLAGS) -DOMP_OFFLOAD bfs.cpp -o bfs_offload 11 | 12 | clean: 13 | rm -f bfs bfs_offload result.txt 14 | -------------------------------------------------------------------------------- /openmp/nn/Makefile: -------------------------------------------------------------------------------- 1 | LOCAL_CC = gcc -g -O3 -Wall 2 | CC = gcc -O3 -Wall 3 | CFLAGS = -lm -fopenmp -Wall 4 | 5 | 6 | all : nn 7 | 8 | clean : 9 | rm -rf *.o nn 10 | 11 | nn : nn_openmp.c 12 | $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -lm 13 | 14 | hurricane_gen : hurricane_gen.c 15 | $(LOCAL_CC) -o $@ $< -lm 16 | 17 | -------------------------------------------------------------------------------- /cuda/hybridsort/mergesort.cuh: -------------------------------------------------------------------------------- 1 | #ifndef __MERGESORT 2 | #define __MERGESORT 3 | 4 | #include "bucketsort.cuh" 5 | 6 | float4 *runMergeSort(int listsize, int divisions, 7 | float4 *d_origList, float4 *d_resultList, 8 | int *sizes, int *nullElements, 9 | unsigned int *origOffsets); 10 | 11 | #endif -------------------------------------------------------------------------------- /opencl/bfs/readme: -------------------------------------------------------------------------------- 1 | 2 | BFS implemented in OpenCL. 3 | 4 | compilation options: 5 | (1) release: normal version; 6 | (2) errmsg: output error message to help debugging; 7 | (3) ptx: generate ptx code; 8 | (4) profile: to gather kernel execution time (timer-dependent); 9 | (5) res: output the usage of resources, like shared memory, etc. 10 | 11 | 12 | -------------------------------------------------------------------------------- /opencl/lud/README: -------------------------------------------------------------------------------- 1 | The dimension of input matrix should be multiple size of block size. 2 | 3 | ******Adjustable work group size***** 4 | The kernel has square shape 5 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension 6 | The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 7 | 8 | USAGE: 9 | make clean 10 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" -------------------------------------------------------------------------------- /openmp/particlefilter/Makefile: -------------------------------------------------------------------------------- 1 | #makefile 2 | 3 | openmp: ex_particle_OPENMP_seq.c 4 | gcc -O3 -ffast-math -fopenmp ex_particle_OPENMP_seq.c -o particle_filter -lm 5 | 6 | 7 | icc_openmp: ex_particle_OPENMP_seq.c 8 | icc -O3 -fast -openmp ex_particle_OPENMP_seq.c -o particle_filter 9 | 10 | 11 | clean: 12 | rm particle_filter 13 | -------------------------------------------------------------------------------- /cuda/cfd/run: -------------------------------------------------------------------------------- 1 | #There are three datasets: 2 | 3 | #./euler3d ../../data/cfd/fvcorr.domn.097K 4 | #./euler3d ../../data/cfd/fvcorr.domn.193K 5 | #./euler3d ../../data/cfd/missile.domn.0.2M 6 | 7 | #replace euler3d with other CFD programs 8 | 9 | ./euler3d ../../data/cfd/fvcorr.domn.097K 1000 10 | ./pre_euler3d ../../data/cfd/fvcorr.domn.097K 1000 -------------------------------------------------------------------------------- /cuda/heartwall/AVI/makefile: -------------------------------------------------------------------------------- 1 | all: avimod.o avilib.o 2 | 3 | # compile supporting function file into object (binary) 4 | avimod.o: avilib.h avimod.c 5 | gcc -c avimod.c 6 | 7 | # compile supporting function file into object (binary) 8 | avilib.o: avilib.h avilib.c 9 | gcc -c avilib.c 10 | 11 | # delete files 12 | clean: 13 | rm *.o *.out 14 | -------------------------------------------------------------------------------- /openmp/heartwall/AVI/makefile: -------------------------------------------------------------------------------- 1 | all: avimod.o avilib.o 2 | 3 | # compile supporting function file into object (binary) 4 | avimod.o: avilib.h avimod.c 5 | gcc -c avimod.c 6 | 7 | # compile supporting function file into object (binary) 8 | avilib.o: avilib.h avilib.c 9 | gcc -c avilib.c 10 | 11 | # delete files 12 | clean: 13 | rm *.o *.out 14 | -------------------------------------------------------------------------------- /opencl/gaussian/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | #C compiler 4 | CC = g++ 5 | 6 | SRC = clutils.cpp gaussianElim.cpp utils.cpp 7 | 8 | CC_FLAGS = -g -O3 9 | 10 | EXE = gaussian 11 | 12 | release: 13 | $(CC) $(KERNEL_DIM) $(CC_FLAGS) -lOpenCL -fopenmp $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) 14 | 15 | clean: 16 | rm -f $(EXE) 17 | -------------------------------------------------------------------------------- /opencl/streamcluster/run: -------------------------------------------------------------------------------- 1 | #./streamcluster 10 20 16 2048000 204800 1000 none output.txt 1 2 | 3 | for (( i = 1, j=1; i <= 1; i++, j=j*2 )) 4 | do 5 | #echo `expr $j \* 10240` `expr $j \* 1024` 6 | #./sc_gpu 10 20 16 `expr $j \* 10240` `expr $j \* 1024` 1000 none output.txt 1 7 | ./streamcluster 10 20 256 65536 65536 1000 none output.txt 1 -t gpu -d 0 8 | done 9 | -------------------------------------------------------------------------------- /cuda/leukocyte/CUDA/misc_math.h: -------------------------------------------------------------------------------- 1 | #ifndef MISC_MATH_H 2 | #define MISC_MATH_H 3 | 4 | #include "matrix.h" 5 | 6 | #define PI 3.14159 7 | 8 | 9 | extern int double_eq(double f1, double f2); 10 | extern MAT * gradient_x(MAT * input); 11 | extern MAT * gradient_y(MAT * input); 12 | extern double mean(VEC * in); 13 | extern double std_dev(VEC * in); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /opencl/kmeans/README: -------------------------------------------------------------------------------- 1 | NOTE: The current Kmeans implementation doesn't use texture/constant memories, and is different from the CUDA implementation. 2 | 3 | ******Adjustable work group size***** 4 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 for kernel_swap 5 | RD_WG_SIZE_1 or RD_WG_SIZE_1_0 for kernel_kmeans 6 | 7 | USAGE: 8 | make clean 9 | make KERNEL_DIM="-DRD_WG_SIZE_0=128 -DRD_WG_SIZE_1=512" -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/misc_math.h: -------------------------------------------------------------------------------- 1 | #ifndef MISC_MATH_H 2 | #define MISC_MATH_H 3 | 4 | #include "matrix.h" 5 | 6 | #define PI 3.14159 7 | 8 | 9 | extern int double_eq(double f1, double f2); 10 | extern MAT * gradient_x(MAT * input); 11 | extern MAT * gradient_y(MAT * input); 12 | extern double mean(VEC * in); 13 | extern double std_dev(VEC * in); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /openmp/leukocyte/OpenMP/misc_math.h: -------------------------------------------------------------------------------- 1 | #ifndef MISC_MATH_H 2 | #define MISC_MATH_H 3 | 4 | #include "matrix.h" 5 | 6 | #define PI 3.14159 7 | 8 | 9 | extern int double_eq(double f1, double f2); 10 | extern MAT * gradient_x(MAT * input); 11 | extern MAT * gradient_y(MAT * input); 12 | extern double mean(VEC * in); 13 | extern double std_dev(VEC * in); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /openmp/srad/srad_v2/README: -------------------------------------------------------------------------------- 1 | Usage: 2 | srad 128 128 0 31 0 31 4 0.5 2 3 | 4 | 128 //number of rows in the domain 5 | 128 //number of cols in the domain 6 | 0 //y1 position of the speckle 7 | 31 //y2 position of the speckle 8 | 0 //x1 position of the speckle 9 | 31 //x2 position of the speckle 10 | 4 //number of threads 11 | 0.5 //Lambda value 12 | 2 //number of iterations 13 | 14 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/Cray/patch.3: -------------------------------------------------------------------------------- 1 | *** zmatrix.h Tue Mar 8 15:50:26 1994 2 | --- zmatrix.h.orig Fri Oct 28 01:52:48 1994 3 | *************** 4 | *** 34,39 **** 5 | --- 34,41 ---- 6 | 7 | /* Type definitions for complex vectors and matrices */ 8 | 9 | + #undef complex 10 | + #define complex Complex 11 | 12 | /* complex definition */ 13 | typedef struct { 14 | -------------------------------------------------------------------------------- /opencl/gaussian/gettimeofday.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef _WIN32 3 | #include 4 | /** 5 | Based on code seen at. 6 | 7 | http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html 8 | 9 | http://msdn.microsoft.com/en-us/library/ms740560 10 | 11 | */ 12 | int gettimeofday(struct timeval *tv, struct timezone *tz); 13 | #else 14 | #include 15 | #endif 16 | 17 | 18 | -------------------------------------------------------------------------------- /openmp/nw/Makefile: -------------------------------------------------------------------------------- 1 | # C compiler 2 | #CC = g++ 3 | ICC = icc 4 | CC_FLAGS = -g -O3 -fopenmp -lrt 5 | OFFLOAD_CC_FLAGS = -offload-option,mic,compiler,"-no-opt-prefetch" 6 | 7 | needle: 8 | $(CXX) $(CC_FLAGS) needle.cpp -o needle 9 | 10 | needle_offload: 11 | $(ICC) $(CC_FLAGS) $(OFFLOAD_CC_FLAGS) -DOMP_OFFLOAD needle.cpp -o needle_offload 12 | 13 | clean: 14 | rm -f needle needle_offload 15 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/Cray/patch.3: -------------------------------------------------------------------------------- 1 | *** zmatrix.h Tue Mar 8 15:50:26 1994 2 | --- zmatrix.h.orig Fri Oct 28 01:52:48 1994 3 | *************** 4 | *** 34,39 **** 5 | --- 34,41 ---- 6 | 7 | /* Type definitions for complex vectors and matrices */ 8 | 9 | + #undef complex 10 | + #define complex Complex 11 | 12 | /* complex definition */ 13 | typedef struct { 14 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/Cray/patch.3: -------------------------------------------------------------------------------- 1 | *** zmatrix.h Tue Mar 8 15:50:26 1994 2 | --- zmatrix.h.orig Fri Oct 28 01:52:48 1994 3 | *************** 4 | *** 34,39 **** 5 | --- 34,41 ---- 6 | 7 | /* Type definitions for complex vectors and matrices */ 8 | 9 | + #undef complex 10 | + #define complex Complex 11 | 12 | /* complex definition */ 13 | typedef struct { 14 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/s_suis.mummergpu-1.0.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,77488.734375 2 | Kernel,13762.594727 3 | Print matches,59622.070312 4 | Copy queries to GPU,0.006000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,658.775024 8 | Suffix tree constructions,2470.503906 9 | Minimum substring length, 10 10 | Average query length, 35.964771 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/s_suis.mummergpu-1.0.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,65867.875000 2 | Kernel,1180.202026 3 | Print matches,59613.746094 4 | Copy queries to GPU,208.202011 5 | Copy output from GPU,281.799011 6 | Copy suffix tree to GPU,82.073997 7 | Read queries from disk,1076.084961 8 | Suffix tree constructions,2442.489990 9 | Minimum substring length, 10 10 | Average query length, 35.964771 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/s_suis.mummergpu-1.0.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,77488.734375 2 | Kernel,13762.594727 3 | Print matches,59622.070312 4 | Copy queries to GPU,0.006000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,658.775024 8 | Suffix tree constructions,2470.503906 9 | Minimum substring length, 10 10 | Average query length, 35.964771 11 | -------------------------------------------------------------------------------- /cuda/lud/cuda/README: -------------------------------------------------------------------------------- 1 | The dimension of input matrix should be multiple size of block size. The block 2 | size can be obtained from lud_kernel.cu 3 | 4 | ******Adjustable work group size***** 5 | The kernel has square shape 6 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimension 7 | The actually dimension = RD_WG_SIZE_0 * RD_WG_SIZE_0 8 | 9 | USAGE: 10 | make clean 11 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" 12 | -------------------------------------------------------------------------------- /cuda/lud/tools/README: -------------------------------------------------------------------------------- 1 | gen_input will genearte the input matrix with given dimension. Running it in 2 | following way: 3 | 4 | ./gen_input [matrix_dimension] 5 | 6 | e.g.: 7 | ./gen_input 32 8 | will generate the input matrix of 32x32. The ouput contains three files: 9 | ${matrix_dim}.dat, l-${matrix_dim}.dat, u-${matrix_dim}.dat. In the above 10 | example, the output should be 32.dat, l-32.dat, u-32.dat . 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,225188.843750 2 | Kernel,35962.871094 3 | Print matches,156411.468750 4 | Copy queries to GPU,0.005000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,7549.857910 8 | Suffix tree constructions,24214.322266 9 | Minimum substring length, 15 10 | Average query length, 32.000000 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/lmonocytogenes.mummergpu-1.0.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,85775.937500 2 | Kernel,70396.984375 3 | Print matches,7346.295898 4 | Copy queries to GPU,0.015000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,2285.459961 8 | Suffix tree constructions,3773.361084 9 | Minimum substring length, 20 10 | Average query length, 116.140053 11 | -------------------------------------------------------------------------------- /cuda/srad/srad_v2/srad.h: -------------------------------------------------------------------------------- 1 | #define STR_SIZE 256 2 | 3 | #ifdef RD_WG_SIZE_0_0 4 | #define BLOCK_SIZE RD_WG_SIZE_0_0 5 | #elif defined(RD_WG_SIZE_0) 6 | #define BLOCK_SIZE RD_WG_SIZE_0 7 | #elif defined(RD_WG_SIZE) 8 | #define BLOCK_SIZE RD_WG_SIZE 9 | #else 10 | #define BLOCK_SIZE 16 11 | #endif 12 | 13 | #define GPU 14 | #define TIMER 15 | //#define OUTPUT 16 | 17 | -------------------------------------------------------------------------------- /opencl/backprop/imagenet.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "backprop.h" 5 | 6 | extern int layer_size; 7 | 8 | void load(BPNN *net) 9 | //BPNN *net; 10 | { 11 | float *units; 12 | int nr, i, k; 13 | 14 | nr = layer_size; 15 | units = net->input_units; 16 | 17 | k = 1; 18 | for (i = 0; i < nr; i++) { 19 | units[k] = (float) rand()/RAND_MAX ; 20 | k++; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /opencl/lud/tools/README: -------------------------------------------------------------------------------- 1 | gen_input will genearte the input matrix with given dimension. Running it in 2 | following way: 3 | 4 | ./gen_input [matrix_dimension] 5 | 6 | e.g.: 7 | ./gen_input 32 8 | will generate the input matrix of 32x32. The ouput contains three files: 9 | ${matrix_dim}.dat, l-${matrix_dim}.dat, u-${matrix_dim}.dat. In the above 10 | example, the output should be 32.dat, l-32.dat, u-32.dat . 11 | -------------------------------------------------------------------------------- /openmp/lud/tools/README: -------------------------------------------------------------------------------- 1 | gen_input will genearte the input matrix with given dimension. Running it in 2 | following way: 3 | 4 | ./gen_input [matrix_dimension] 5 | 6 | e.g.: 7 | ./gen_input 32 8 | will generate the input matrix of 32x32. The ouput contains three files: 9 | ${matrix_dim}.dat, l-${matrix_dim}.dat, u-${matrix_dim}.dat. In the above 10 | example, the output should be 32.dat, l-32.dat, u-32.dat . 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,225188.843750 2 | Kernel,35962.871094 3 | Print matches,156411.468750 4 | Copy queries to GPU,0.005000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,7549.857910 8 | Suffix tree constructions,24214.322266 9 | Minimum substring length, 15 10 | Average query length, 32.000000 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/s_suis.mummergpu-1.0.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,65867.875000 2 | Kernel,1180.202026 3 | Print matches,59613.746094 4 | Copy queries to GPU,208.202011 5 | Copy output from GPU,281.799011 6 | Copy suffix tree to GPU,82.073997 7 | Read queries from disk,1076.084961 8 | Suffix tree constructions,2442.489990 9 | Minimum substring length, 10 10 | Average query length, 35.964771 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/cbriggsae.mummergpu-1.0.half_million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,298909.031250 2 | Kernel,261586.718750 3 | Print matches,11726.251953 4 | Copy queries to GPU,0.038000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,3757.282959 8 | Suffix tree constructions,17677.109375 9 | Minimum substring length, 100 10 | Average query length, 617.019836 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.half_million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,148932.937500 2 | Kernel,19244.375000 3 | Print matches,99989.898438 4 | Copy queries to GPU,0.006000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,4811.838867 8 | Suffix tree constructions,24209.560547 9 | Minimum substring length, 14 10 | Average query length, 29.000000 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,214240.234375 2 | Kernel,1482.804932 3 | Print matches,156926.234375 4 | Copy queries to GPU,227.016998 5 | Copy output from GPU,378.101013 6 | Copy suffix tree to GPU,20368.777344 7 | Read queries from disk,1086.651001 8 | Suffix tree constructions,31393.650391 9 | Minimum substring length, 15 10 | Average query length, 32.000000 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/lmonocytogenes.mummergpu-1.0.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,26412.919922 2 | Kernel,6822.855469 3 | Print matches,7341.041016 4 | Copy queries to GPU,660.661987 5 | Copy output from GPU,1474.561035 6 | Copy suffix tree to GPU,355.412994 7 | Read queries from disk,3942.376221 8 | Suffix tree constructions,3762.101074 9 | Minimum substring length, 20 10 | Average query length, 116.140053 11 | -------------------------------------------------------------------------------- /opencl/hybridsort/mergesort.h: -------------------------------------------------------------------------------- 1 | #ifndef __MERGESORT 2 | #define __MERGESORT 3 | 4 | #include "bucketsort.h" 5 | 6 | cl_float4 *runMergeSort(int listsize, int divisions, 7 | cl_float4 *d_origList, cl_float4 *d_resultList, 8 | int *sizes, int *nullElements, 9 | unsigned int *origOffsets); 10 | void init_mergesort(int listsize); 11 | void finish_mergesort(); 12 | double getMergeTime(); 13 | #endif 14 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.half_million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,148932.937500 2 | Kernel,19244.375000 3 | Print matches,99989.898438 4 | Copy queries to GPU,0.006000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,4811.838867 8 | Suffix tree constructions,24209.560547 9 | Minimum substring length, 14 10 | Average query length, 29.000000 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,214240.234375 2 | Kernel,1482.804932 3 | Print matches,156926.234375 4 | Copy queries to GPU,227.016998 5 | Copy output from GPU,378.101013 6 | Copy suffix tree to GPU,20368.777344 7 | Read queries from disk,1086.651001 8 | Suffix tree constructions,31393.650391 9 | Minimum substring length, 15 10 | Average query length, 32.000000 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/lmonocytogenes.mummergpu-1.0.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,85775.937500 2 | Kernel,70396.984375 3 | Print matches,7346.295898 4 | Copy queries to GPU,0.015000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,2285.459961 8 | Suffix tree constructions,3773.361084 9 | Minimum substring length, 20 10 | Average query length, 116.140053 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/cbriggsae.mummergpu-1.0.half_million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,74996.468750 2 | Kernel,26419.261719 3 | Print matches,11335.227539 4 | Copy queries to GPU,1766.451172 5 | Copy output from GPU,5470.040527 6 | Copy suffix tree to GPU,4449.362793 7 | Read queries from disk,3746.693848 8 | Suffix tree constructions,17640.742188 9 | Minimum substring length, 100 10 | Average query length, 617.019836 11 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.half_million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,145859.078125 2 | Kernel,677.914001 3 | Print matches,100534.265625 4 | Copy queries to GPU,327.256989 5 | Copy output from GPU,168.106995 6 | Copy suffix tree to GPU,13473.358398 7 | Read queries from disk,511.567993 8 | Suffix tree constructions,24072.339844 9 | Minimum substring length, 14 10 | Average query length, 29.000000 11 | -------------------------------------------------------------------------------- /opencl/cfd/README: -------------------------------------------------------------------------------- 1 | ******Adjustable work group size***** 2 | RD_WG_SIZE for all 3 | RD_WG_SIZE_1 or RD_WG_SIZE_1_0 for initialize_variables 4 | RD_WG_SIZE_2 or RD_WG_SIZE_2_0 for compute_step_factor 5 | RD_WG_SIZE_3 or RD_WG_SIZE_3_0 for compute_flux 6 | RD_WG_SIZE_4 or RD_WG_SIZE_4_0 for time_step 7 | 8 | USAGE: 9 | make clean 10 | make KERNEL_DIM="-DRD_WG_SIZE_1=128 -DRD_WG_SIZE_2=192 -DRD_WG_SIZE_3=128 -DRD_WG_SIZE_4=256" -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/cbriggsae.mummergpu-1.0.half_million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Total,298909.031250 2 | Kernel,261586.718750 3 | Print matches,11726.251953 4 | Copy queries to GPU,0.038000 5 | Copy output from GPU,0.000000 6 | Copy suffix tree to GPU,0.000000 7 | Read queries from disk,3757.282959 8 | Suffix tree constructions,17677.109375 9 | Minimum substring length, 100 10 | Average query length, 617.019836 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/h_sapiens.mummergpu-1.0.half_million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,145859.078125 2 | Kernel,677.914001 3 | Print matches,100534.265625 4 | Copy queries to GPU,327.256989 5 | Copy output from GPU,168.106995 6 | Copy suffix tree to GPU,13473.358398 7 | Read queries from disk,511.567993 8 | Suffix tree constructions,24072.339844 9 | Minimum substring length, 14 10 | Average query length, 29.000000 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/lmonocytogenes.mummergpu-1.0.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,26412.919922 2 | Kernel,6822.855469 3 | Print matches,7341.041016 4 | Copy queries to GPU,660.661987 5 | Copy output from GPU,1474.561035 6 | Copy suffix tree to GPU,355.412994 7 | Read queries from disk,3942.376221 8 | Suffix tree constructions,3762.101074 9 | Minimum substring length, 20 10 | Average query length, 116.140053 11 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/cbriggsae.mummergpu-1.0.half_million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Total,74996.468750 2 | Kernel,26419.261719 3 | Print matches,11335.227539 4 | Copy queries to GPU,1766.451172 5 | Copy output from GPU,5470.040527 6 | Copy suffix tree to GPU,4449.362793 7 | Read queries from disk,3746.693848 8 | Suffix tree constructions,17640.742188 9 | Minimum substring length, 100 10 | Average query length, 617.019836 11 | -------------------------------------------------------------------------------- /cuda/hybridsort/bucketsort.cuh: -------------------------------------------------------------------------------- 1 | #ifndef __BUCKETSORT 2 | #define __BUCKETSORT 3 | 4 | #define LOG_DIVISIONS 10 5 | #define DIVISIONS (1 << LOG_DIVISIONS) 6 | 7 | void init_bucketsort(int listsize); 8 | void finish_bucketsort(); 9 | void bucketSort(float *d_input, float *d_output, int listsize, 10 | int *sizes, int *nullElements, float minimum, float maximum, 11 | unsigned int *origOffsets); 12 | 13 | #endif -------------------------------------------------------------------------------- /cuda/lud/README: -------------------------------------------------------------------------------- 1 | -base 2 | Non-optimized, non-paralleled implementation. It is considered as a baseline implementation to be compared with OpenMP and CUDA implementations. 3 | 4 | -common 5 | Common functions used in all implementations. 6 | 7 | -data 8 | Some sample input matrices. 9 | 10 | -omp 11 | An paralleled implementation with OpenMP. 12 | 13 | -tools 14 | Tools to generate input matrix with random number. 15 | -------------------------------------------------------------------------------- /opencl/hotspot/OpenCL_helper_library.h: -------------------------------------------------------------------------------- 1 | #ifndef OPENCL_HELPER_LIBRARY_H 2 | #define OPENCL_HELPER_LIBRARY_H 3 | 4 | #include "../common/opencl_util.h" 5 | 6 | //#include 7 | //#include 8 | 9 | 10 | // Function prototypes 11 | //char *load_kernel_source(const char *filename); 12 | //long long get_time(); 13 | void fatal(const char *s); 14 | void fatal_CL(cl_int error, int line_no); 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /others/rng/rng/latex/files.tex: -------------------------------------------------------------------------------- 1 | \section{File List} 2 | Here is a list of all documented files with brief descriptions:\begin{DoxyCompactList} 3 | \item\contentsline{section}{\hyperlink{rng_8c}{rng.c} (Uniform and Normal RNG Implemented in OpenMP )}{\pageref{rng_8c}}{} 4 | \item\contentsline{section}{\hyperlink{rng_8cu}{rng.cu} (Uniform and Normal RNG Implemented in CUDA as device functions )}{\pageref{rng_8cu}}{} 5 | \end{DoxyCompactList} 6 | -------------------------------------------------------------------------------- /cuda/srad/srad_v1/include.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include -------------------------------------------------------------------------------- /opencl/nw/work_group_size.h: -------------------------------------------------------------------------------- 1 | #ifndef WORK_GROUP_SIZE_H_ 2 | #define WORK_GROUP_SIZE_H_ 3 | 4 | #ifndef BSIZE 5 | #ifdef RD_WG_SIZE_0_0 6 | #define BSIZE RD_WG_SIZE_0_0 7 | #elif defined(RD_WG_SIZE_0) 8 | #define BSIZE RD_WG_SIZE_0 9 | #elif defined(RD_WG_SIZE) 10 | #define BSIZE RD_WG_SIZE 11 | #else 12 | #define BSIZE 16 13 | #endif 14 | #endif // BSIZE 15 | 16 | #ifndef PAR 17 | #define PAR 4 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /openmp/srad/srad_v1/include.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include -------------------------------------------------------------------------------- /cuda/backprop/imagenet.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "backprop.h" 5 | 6 | extern layer_size; 7 | 8 | load(net) 9 | BPNN *net; 10 | { 11 | float *units; 12 | int nr, nc, imgsize, i, j, k; 13 | 14 | nr = layer_size; 15 | 16 | imgsize = nr * nc; 17 | units = net->input_units; 18 | 19 | k = 1; 20 | for (i = 0; i < nr; i++) { 21 | units[k] = (float) rand()/RAND_MAX ; 22 | k++; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /cuda/hybridsort/README: -------------------------------------------------------------------------------- 1 | USAGE: 2 | make clean 3 | make OUTPUT=Y VERIFY=Y TIMER=Y 4 | make HISTO_WG_SIZE_0="-DHISTO_WG_SIZE_0=96" 5 | make BUCKET_WG_SIZE_0="-DBUCKET_WG_SIZE_0=128" 6 | make BUCKET_WG_SIZE_1="-DBUCKET_WG_SIZE_1=32" 7 | make MERGE_WG_SIZE_0="-DMERGE_WG_SIZE_0=256" 8 | make MERGE_WG_SIZE_1="-DMERGE_WG_SIZE_1=208" 9 | 10 | 11 | Random Input of 4194304 floats: 12 | ./hybridsort r 13 | 14 | Specified Input: 15 | ./hybridsort "text file name here" -------------------------------------------------------------------------------- /opencl/cfd/run: -------------------------------------------------------------------------------- 1 | version=$1 2 | 3 | #There are three datasets: 4 | 5 | #./euler3d ../../data/cfd/fvcorr.domn.097K 0 6 | #./euler3d ../../data/cfd/fvcorr.domn.193K 7 | #./euler3d ../../data/cfd/missile.domn.0.2M 8 | 9 | #replace euler3d with other CFD programs 10 | 11 | # ./euler3d ../../data/cfd/fvcorr.domn.097K -t gpu -d 0 12 | ./euler3d ../../data/cfd/fvcorr.domn.097K 1000 16 $version 13 | #./pre_euler3d ../../data/cfd/fvcorr.domn.097K 1000 16 14 | -------------------------------------------------------------------------------- /opencl/hybridsort/README: -------------------------------------------------------------------------------- 1 | USAGE: 2 | make clean 3 | make OUTPUT=Y VERIFY=Y TIMER=Y 4 | make HISTO_WG_SIZE_0="-DHISTO_WG_SIZE_0=96" 5 | make BUCKET_WG_SIZE_0="-DBUCKET_WG_SIZE_0=128" 6 | make BUCKET_WG_SIZE_1="-DBUCKET_WG_SIZE_1=32" 7 | make MERGE_WG_SIZE_0="-DMERGE_WG_SIZE_0=256" 8 | make MERGE_WG_SIZE_1="-DMERGE_WG_SIZE_1=208" 9 | 10 | 11 | Random Input of 4194304 floats: 12 | ./hybridsort r 13 | 14 | Specified Input: 15 | ./hybridsort "text file name here" -------------------------------------------------------------------------------- /openmp/backprop/imagenet.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "backprop.h" 5 | 6 | extern int layer_size; 7 | 8 | void load(net) 9 | BPNN *net; 10 | { 11 | float *units; 12 | int nr, nc, imgsize, i, j, k; 13 | 14 | nr = layer_size; 15 | 16 | imgsize = nr * nc; 17 | units = net->input_units; 18 | 19 | k = 1; 20 | for (i = 0; i < nr; i++) { 21 | units[k] = (float) rand()/RAND_MAX ; 22 | k++; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /cuda/bfs/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | SRC = bfs.cu 4 | 5 | EXE = bfs 6 | 7 | release: $(SRC) 8 | $(CUDA_CC) $(SRC) -o $(EXE) $(CUDA_FLAGS) 9 | 10 | enum: $(SRC) 11 | $(CUDA_CC) -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) 12 | 13 | debug: $(SRC) 14 | $(CUDA_CC) -g $(SRC) -o $(EXE) $(CUDA_FLAGS) 15 | 16 | debugenum: $(SRC) 17 | $(CUDA_CC) -g -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) 18 | 19 | clean: $(SRC) 20 | $(RM) $(EXE) $(EXE).linkinfo result.txt 21 | -------------------------------------------------------------------------------- /opencl/dwt2d/dwt_cl/dwt_cl.h: -------------------------------------------------------------------------------- 1 | #ifndef DWT_CUDA_H 2 | #define DWT_CUDA_H 3 | 4 | 5 | namespace dwt_cuda { 6 | void fdwt53(int * in, int * out, int sizeX, int sizeY, int levels); 7 | // void rdwt53(int * in, int * out, int sizeX, int sizeY, int levels); 8 | // void fdwt97(float * in, float * out, int sizeX, int sizeY, int levels); 9 | // void rdwt97(float * in, float * out, int sizeX, int sizeY, int levels); 10 | } // namespace dwt_cuda 11 | #endif // DWT_CUDA_H 12 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/ls.dat: -------------------------------------------------------------------------------- 1 | # No. of a problem 2 | 2 3 | # A = 4 | Matrix: 5 by 3 5 | row 0: 3 -1 2 6 | row 1: 2 -1 1.2 7 | row 2: 2.5 1 -1.5 8 | row 3: 3 1 1 9 | row 4: -1 1 -2.2 10 | 11 | # b = 12 | Vector: dim: 5 13 | 5 3 2 4 6 14 | 15 | -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/OpenCL_helper_library.h: -------------------------------------------------------------------------------- 1 | #ifndef OPENCL_HELPER_LIBRARY_H 2 | #define OPENCL_HELPER_LIBRARY_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | 10 | // Function prototypes 11 | char *load_kernel_source(const char *filename); 12 | // long long get_time(); 13 | void fatal(const char *s); 14 | void fatal_CL(cl_int error, char *file, int line); 15 | void check_error(cl_int error, char *file, int line); 16 | 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /opencl/lud/ocl/makefile: -------------------------------------------------------------------------------- 1 | include ../../../common/make.config 2 | 3 | NAME = lud 4 | 5 | KERNEL_VERSIONS = 0 1 2 3 4 5 6 6 | KERNEL_NAMES = $(foreach var,$(KERNEL_VERSIONS),$(NAME)_kernel_v$(var)) 7 | KERNEL_ALTERA_NAMES = $(addsuffix .aocx, $(KERNEL_NAMES)) 8 | 9 | ifdef ALTERA 10 | AOT_PROGRAMS = $(KERNEL_ALTERA_NAMES) 11 | else 12 | AOT_PROGRAMS = 13 | endif 14 | 15 | AOCX: $(AOT_PROGRAMS) 16 | 17 | clean: 18 | $(RM) ../$(EXE) *.aoco lud_kernel 19 | cd ../; $(RM) $(OBJECTS) 20 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/ls.dat: -------------------------------------------------------------------------------- 1 | # No. of a problem 2 | 2 3 | # A = 4 | Matrix: 5 by 3 5 | row 0: 3 -1 2 6 | row 1: 2 -1 1.2 7 | row 2: 2.5 1 -1.5 8 | row 3: 3 1 1 9 | row 4: -1 1 -2.2 10 | 11 | # b = 12 | Vector: dim: 5 13 | 5 3 2 4 6 14 | 15 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/ls.dat: -------------------------------------------------------------------------------- 1 | # No. of a problem 2 | 2 3 | # A = 4 | Matrix: 5 by 3 5 | row 0: 3 -1 2 6 | row 1: 2 -1 1.2 7 | row 2: 2.5 1 -1.5 8 | row 3: 3 1 1 9 | row 4: -1 1 -2.2 10 | 11 | # b = 12 | Vector: dim: 5 13 | 5 3 2 4 6 14 | 15 | -------------------------------------------------------------------------------- /cuda/myocyte/README: -------------------------------------------------------------------------------- 1 | // The following are the command parameters to the application: 2 | // 1) Simulation time interval which is the number of miliseconds to simulate. Needs to be integer > 0 3 | // 2) Number of instances of simulation to run. Needs to be integer > 0. 4 | // 3) Method of parallelization. Need to be 0 for parallelization inside each simulation instance, or 1 for parallelization across instances. 5 | // Example: 6 | // myocyte.out 100 100 1 7 | // 8 | // for more information see main.cu 9 | -------------------------------------------------------------------------------- /cuda/leukocyte/CUDA/track_ellipse_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRACK_ELLIPSE_KERNEL_H_ 2 | #define _TRACK_ELLIPSE_KERNEL_H_ 3 | 4 | #include "matrix.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern void IMGVF_cuda_init(MAT **I, int Nc); 10 | extern void IMGVF_cuda_cleanup(MAT **IMGVF_out, int Nc); 11 | extern void IMGVF_cuda(MAT **I, MAT **IMGVF, double vx, double vy, double e, int max_iterations, double cutoff, int Nc); 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /openmp/hotspot/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | ICC = icc 4 | CXXFLAGS += -DOPEN 5 | 6 | OFFLOAD_CC_FLAGS = -offload-option,mic,compiler,"-no-opt-prefetch" 7 | 8 | hotspot: hotspot_openmp.cpp Makefile 9 | $(CXX) $(CXXFLAGS) hotspot_openmp.cpp -o hotspot $(OPENMP_INC) -lrt 10 | 11 | hotspot_offload: hotspot_openmp.cpp Makefile 12 | $(ICC) $(CC_FLAGS) $(OFFLOAD_CC_FLAGS) -DOMP_OFFLOAD hotspot_openmp.cpp -o hotspot_offload 13 | 14 | clean: 15 | rm -f hotspot hotspot_offload 16 | -------------------------------------------------------------------------------- /openmp/srad/srad_v1/define.c: -------------------------------------------------------------------------------- 1 | //====================================================================================================100 2 | //====================================================================================================100 3 | // DEFINE 4 | //====================================================================================================100 5 | //====================================================================================================100 6 | 7 | #define fp float 8 | -------------------------------------------------------------------------------- /openmp/lud/README: -------------------------------------------------------------------------------- 1 | -base 2 | Non-optimized, non-paralleled implementation. It is considered as a baseline implementation to be compared with OpenMP and CUDA implementations. 3 | 4 | -common 5 | Common functions used in all implementations. 6 | 7 | -cuda 8 | An blocked implementation optimized for CUDA 9 | 10 | -data 11 | Some sample input matrices. 12 | 13 | -omp 14 | An paralleled implementation with OpenMP. 15 | 16 | -tools 17 | Tools to generate input matrix with random number. 18 | -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/track_ellipse_opencl.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRACK_ELLIPSE_KERNEL_H_ 2 | #define _TRACK_ELLIPSE_KERNEL_H_ 3 | 4 | #include "matrix.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern void IMGVF_OpenCL_init(MAT **I, int Nc); 10 | extern void IMGVF_OpenCL_cleanup(MAT **IMGVF_out, int Nc); 11 | extern void IMGVF_OpenCL(MAT **I, MAT **IMGVF, double vx, double vy, double e, int max_iterations, double cutoff, int Nc); 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /opencl/nw/README: -------------------------------------------------------------------------------- 1 | Note: This program generate two sequences randomly. Please specify your own sequences for different uses. 2 | At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. 3 | Usage: ./nw 2048 10 ./nw.cl 4 | 2048 //the length of the sequence 5 | 10 //penalty value 6 | ./nw.cl //ocl kernel file 7 | 8 | ******Adjustable work group size***** 9 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 10 | 11 | USAGE: 12 | make clean 13 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" -------------------------------------------------------------------------------- /openmp/srad/srad_v1/makefile: -------------------------------------------------------------------------------- 1 | # Example 2 | # target: dependencies 3 | # command 1 4 | # command 2 5 | # . 6 | # . 7 | # . 8 | # command n 9 | # link objects(binaries) together 10 | a.out: main.o 11 | $(CC) main.o \ 12 | -lm -fopenmp -o srad -lrt 13 | 14 | # compile main function file into object (binary) 15 | main.o: main.c \ 16 | define.c \ 17 | graphics.c 18 | $(CC) main.c \ 19 | -c -O3 -fopenmp 20 | 21 | # delete all object files 22 | clean: 23 | rm *.o srad 24 | -------------------------------------------------------------------------------- /cuda/mummergpu/src/morton.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void morton(int i, int *x, int *y) 4 | { 5 | *x = 0; 6 | *y = 0; 7 | 8 | int b; 9 | for (b = 0; b < 16; b++) 10 | { 11 | *x |= (i & (1 << (b*2))) >> b; 12 | *y |= (i & (1 << (b*2+1))) >> (b+1); 13 | } 14 | } 15 | 16 | int main(int argc, char ** argv) 17 | { 18 | int i; 19 | for (i = 0; i < 100; i++) 20 | { 21 | int x; 22 | int y; 23 | 24 | morton(i,&x,&y); 25 | 26 | printf("%d: %d %d\n", i, x, y); 27 | } 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /cuda/nw/README: -------------------------------------------------------------------------------- 1 | Note: This program generate two sequences randomly. Please specify your own sequences for different uses. 2 | At the current stage, the program only supports two sequences with the same lengh, which can be divided by 16. 3 | Usage: needle 32 32 10 4 | 32 //the length of the 1st sequence 5 | 32 //the length of the 2nd sequence 6 | 10 //penalty value 7 | 8 | ******Adjustable work group size***** 9 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 10 | 11 | USAGE: 12 | make clean 13 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" -------------------------------------------------------------------------------- /cuda/srad/srad_v1/define.c: -------------------------------------------------------------------------------- 1 | //====================================================================================================100 2 | //====================================================================================================100 3 | // DEFINE 4 | //====================================================================================================100 5 | //====================================================================================================100 6 | 7 | #define fp float 8 | 9 | #define NUMBER_THREADS 512 10 | -------------------------------------------------------------------------------- /openmp/mummergpu/src/morton.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void morton(int i, int *x, int *y) 4 | { 5 | *x = 0; 6 | *y = 0; 7 | 8 | int b; 9 | for (b = 0; b < 16; b++) 10 | { 11 | *x |= (i & (1 << (b*2))) >> b; 12 | *y |= (i & (1 << (b*2+1))) >> (b+1); 13 | } 14 | } 15 | 16 | int main(int argc, char ** argv) 17 | { 18 | int i; 19 | for (i = 0; i < 100; i++) 20 | { 21 | int x; 22 | int y; 23 | 24 | morton(i,&x,&y); 25 | 26 | printf("%d: %d %d\n", i, x, y); 27 | } 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /others/rng/rng/html/search/nomatches.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
No Matches
10 |
11 | 12 | 13 | -------------------------------------------------------------------------------- /opencl/dwt2d/dwt.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _DWT_H 3 | #define _DWT_H 4 | 5 | template 6 | int nStage2dDWT(T *in, T *out, T * backup, int pixWidth, int pixHeight, int stages, bool forward); 7 | 8 | template 9 | int writeNStage2DDWT(T *component_cuda, int width, int height, 10 | int stages, const char * filename, const char * suffix); 11 | template 12 | int writeLinear(T *component_cuda, int width, int height, 13 | const char * filename, const char * suffix); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/s_suis.m.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,78982.734375,10702.850586,27660.042969,0.000000,0.000000,0.000000,0.000000,0.000000,893.048950,2.326000,31563.345703,1030.657959,0.000000,1861.874023,0.000000,4.404000,4945.572754,35.964771 3 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/s_suis.m.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,52184.414062,914.489990,8227.401367,35.258999,9.508000,285.678009,76.903999,1.758000,1056.972900,2.345000,33356.468750,1034.671021,0.000000,1866.384033,0.000000,4.386000,4978.430176,35.964771 3 | -------------------------------------------------------------------------------- /cuda/srad/srad_v2/Makefile: -------------------------------------------------------------------------------- 1 | include ../../../common/make.config 2 | 3 | 4 | SRC = srad.cu 5 | 6 | EXE = srad 7 | 8 | release: $(SRC) 9 | $(CUDA_CC) $(KERNEL_DIM) $(SRC) -o $(EXE) $(CUDA_FLAGS) 10 | 11 | enum: $(SRC) 12 | $(CUDA_CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) 13 | 14 | debug: $(SRC) 15 | $(CUDA_CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) $(CUDA_FLAGS) 16 | 17 | debugenum: $(SRC) 18 | $(CUDA_CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) 19 | 20 | clean: $(SRC) 21 | $(RM) $(EXE) $(EXE).linkinfo result.txt 22 | -------------------------------------------------------------------------------- /openmp/hotspot/README: -------------------------------------------------------------------------------- 1 | Usage: ./hotspot 2 | - number of rows in the grid (positive integer) 3 | - number of columns in the grid (positive integer) 4 | - number of iterations 5 | - number of threads 6 | - name of the file containing the initial temperature values of each cell 7 | - name of the file containing the dissipated power values of each cell -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/s_suis.m.million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,78982.734375,10702.850586,27660.042969,0.000000,0.000000,0.000000,0.000000,0.000000,893.048950,2.326000,31563.345703,1030.657959,0.000000,1861.874023,0.000000,4.404000,4945.572754,35.964771 3 | -------------------------------------------------------------------------------- /openmp/mummergpu/experiments/e2e/s_suis.m.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,52184.414062,914.489990,8227.401367,35.258999,9.508000,285.678009,76.903999,1.758000,1056.972900,2.345000,33356.468750,1034.671021,0.000000,1866.384033,0.000000,4.386000,4978.430176,35.964771 3 | -------------------------------------------------------------------------------- /cuda/lud/base/lud_base.c: -------------------------------------------------------------------------------- 1 | void lud_base(float *a, int size) 2 | { 3 | int i,j,k; 4 | float sum; 5 | 6 | for (i=0; i 0 3 | // 2) Number of instances of simulation to run. Needs to be integer > 0. 4 | // 3) Method of parallelization. Need to be 0 for parallelization inside each simulation instance, or 1 for parallelization across instances. 5 | // 4) Number of threads to use. Needs to be integer > 0. 6 | // Example: 7 | // a.out 100 100 1 4 8 | // 9 | // for more information see main.c 10 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/h_sapiens.m.half_million_reads.fna.C.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,134157.187500,7047.885742,54524.042969,0.000000,0.000000,0.000000,0.000000,0.000000,572.235962,21.434999,43668.285156,9099.093750,0.000000,18100.632812,0.000000,4.178000,534.338013,29.000000 3 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/h_sapiens.m.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,126368.960938,1171.603027,29528.537109,64.507996,12.795000,380.002014,626.255005,12.997000,1193.558960,17.958000,64631.667969,9007.871094,0.000000,17899.054688,0.000000,8.791000,983.672974,32.000000 3 | -------------------------------------------------------------------------------- /cuda/mummergpu/experiments/e2e/lmonocytogenes.m.million_reads.fna.stats: -------------------------------------------------------------------------------- 1 | Q,R,T,m,r,t,n,Total,Match kernel,Print Kernel,Queries to board,Match coords to board,Match coords from board,Tree to board,Ref str to board,Queries from disk,Ref from disk,Output to disk,Tree construction,Tree reorder,Tree flatten,Ref reorder,Build coord table,Coords to buffers,Avg qry length 2 | 0,0,0,1,0,0,0,20937.498047,4967.687500,2030.324097,128.699997,49.263000,1367.572998,112.639999,2.397000,3830.489014,3.577000,827.506042,1587.562988,0.000000,2890.739014,0.000000,3.795000,2695.646729,116.139305 3 | -------------------------------------------------------------------------------- /opencl/lud/base/lud_base.c: -------------------------------------------------------------------------------- 1 | void lud_base(float *a, int size) 2 | { 3 | int i,j,k; 4 | float sum; 5 | 6 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "components.h" 10 | #include "common.h" 11 | 12 | 13 | #ifdef __APPLE__ 14 | #include 15 | #else 16 | #include 17 | #endif 18 | 19 | 20 | 21 | 22 | /* the old "components.cu" has been separate into two parts 23 | "components.cpp",contains functions 24 | "components.cl", contains all kernel functions 25 | */ 26 | 27 | /* Separate compoents of 8bit RGB source image */ 28 | 29 | //need add some segments 30 | -------------------------------------------------------------------------------- /opencl/dwt2d/components.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include 8 | #ifndef _COMPONENTS_H 9 | #define _COMPONENTS_H 10 | 11 | /* Separate compoents of source 8bit RGB image */ 12 | 13 | template 14 | void rgbToComponents(T d_r, T d_g, T d_b, unsigned char * src, int width, int height); 15 | 16 | 17 | /* Copy a 8bit source image data into a color compoment of type T */ 18 | //template 19 | //void bwToComponent(T *d_c, unsigned char * src, int width, int height); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /cuda/kmeans/README: -------------------------------------------------------------------------------- 1 | Usage: ./kmeans [switches] -i filename 2 | 3 | -i filename :file containing data to be clustered 4 | -m max_nclusters :maximum number of clusters allowed [default=5] 5 | -n min_nclusters :minimum number of clusters allowed [default=5] 6 | -t threshold :threshold value [default=0.001] 7 | -l nloops :iteration for each number of clusters [default=1] 8 | -b :input file is in binary format 9 | -r :calculate RMSE [default=off] 10 | -o :output cluster center coordinates [default=off] -------------------------------------------------------------------------------- /cuda/huffman/comparison_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPARISON_HELPERS_H_ 2 | #define _COMPARISON_HELPERS_H_ 3 | 4 | template 5 | __inline int compare_vectors(T* data1, T* data2, unsigned int size) { 6 | printf("Comparing vectors: \n"); 7 | bool match = true; 8 | for(unsigned int i = 0; i < size; i++) 9 | if (data1[i]!= data2[i]) { 10 | match = false; 11 | printf("Diff: data1[%d]=%d, data1[%d]=%d.\n",i,data1[i],i,data2[i]); 12 | } 13 | 14 | if (match) { printf("PASS! vectors are matching!\n"); return 0; } 15 | else {printf("FAIL! vectors are NOT matching!\n"); return -1; } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /cuda/hotspot/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | SRC = hotspot.cu 4 | 5 | EXE = hotspot 6 | 7 | release: $(SRC) 8 | $(CUDA_CC) $(KERNEL_DIM) $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 9 | 10 | enum: $(SRC) 11 | $(CUDA_CC) $(KERNEL_DIM) -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 12 | 13 | debug: $(SRC) 14 | $(CUDA_CC) $(KERNEL_DIM) -g $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 15 | 16 | debugenum: $(SRC) 17 | $(CUDA_CC) $(KERNEL_DIM) -g -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 18 | 19 | clean: $(SRC) 20 | rm -f $(EXE) $(EXE).linkinfo result.txt 21 | -------------------------------------------------------------------------------- /cuda/particlefilter/Makefile: -------------------------------------------------------------------------------- 1 | #makefile 2 | 3 | include ../../common/make.config 4 | 5 | CC := $(CUDA_DIR)/bin/nvcc 6 | 7 | INCLUDE := $(CUDA_DIR)/include 8 | 9 | all: naive float 10 | 11 | naive: ex_particle_CUDA_naive_seq.cu 12 | $(CC) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -lcuda -g -lm -O3 -use_fast_math -arch sm_13 ex_particle_CUDA_naive_seq.cu -o particlefilter_naive 13 | 14 | float: ex_particle_CUDA_float_seq.cu 15 | $(CC) -I$(INCLUDE) -L$(CUDA_LIB_DIR) -lcuda -g -lm -O3 -use_fast_math -arch sm_13 ex_particle_CUDA_float_seq.cu -o particlefilter_float 16 | 17 | clean: 18 | rm particlefilter_naive particlefilter_float 19 | -------------------------------------------------------------------------------- /openmp/kmeans/kmeans_openmp/Makefile: -------------------------------------------------------------------------------- 1 | # C compiler 2 | CC = gcc 3 | CC_FLAGS = -g -fopenmp -O2 4 | 5 | kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o 6 | $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o -o kmeans 7 | 8 | %.o: %.[ch] 9 | $(CC) $(CC_FLAGS) $< -c 10 | 11 | cluster.o: cluster.c 12 | $(CC) $(CC_FLAGS) cluster.c -c 13 | 14 | getopt.o: getopt.c 15 | $(CC) $(CC_FLAGS) getopt.c -c 16 | 17 | kmeans.o: kmeans.c 18 | $(CC) $(CC_FLAGS) kmeans.c -c 19 | 20 | kmeans_clustering.o: kmeans_clustering.c kmeans.h 21 | $(CC) $(CC_FLAGS) kmeans_clustering.c -c 22 | 23 | clean: 24 | rm -f *.o *~ kmeans 25 | -------------------------------------------------------------------------------- /openmp/kmeans/kmeans_serial/Makefile: -------------------------------------------------------------------------------- 1 | # C compiler 2 | CC = gcc 3 | CC_FLAGS = -g -fopenmp -O2 4 | 5 | kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o 6 | $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o -o kmeans 7 | 8 | %.o: %.[ch] 9 | $(CC) $(CC_FLAGS) $< -c 10 | 11 | cluster.o: cluster.c 12 | $(CC) $(CC_FLAGS) cluster.c -c 13 | 14 | getopt.o: getopt.c 15 | $(CC) $(CC_FLAGS) getopt.c -c 16 | 17 | kmeans.o: kmeans.c 18 | $(CC) $(CC_FLAGS) kmeans.c -c 19 | 20 | kmeans_clustering.o: kmeans_clustering.c kmeans.h 21 | $(CC) $(CC_FLAGS) kmeans_clustering.c -c 22 | 23 | clean: 24 | rm -f *.o *~ kmeans 25 | -------------------------------------------------------------------------------- /opencl/dwt2d/README: -------------------------------------------------------------------------------- 1 | //DESCRITPTION 2 | 3 | This is the OpenCL version of the code. 4 | 5 | The JPEG2000 standard uses 2D Discrete Wavelet Transform (2D DWT), which consumes a significant part of the total encoding time 6 | 7 | 8 | // USE 9 | **************OUTPUT******************** 10 | USAGE: 11 | make clean 12 | make OUTPUT=Y 13 | 14 | 15 | **************PARAMETERS***************** 16 | USEAGE: 17 | ./dwt2d [otpions] src_img.rgb 18 | 19 | -d, --dimension dimensions of src img, e.g. 1920x1080 20 | -l, --level DWT level, default 3 21 | -f, --forward forward transform 22 | -5, --53 5/3 transform 23 | -------------------------------------------------------------------------------- /opencl/nn/nn_kernel_v1.cl: -------------------------------------------------------------------------------- 1 | //#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 2 | 3 | typedef struct latLong 4 | { 5 | float lat; 6 | float lng; 7 | } LatLong; 8 | 9 | __kernel void NearestNeighbor(__global LatLong* restrict d_locations, 10 | __global float * restrict d_distances, 11 | const int numRecords, 12 | const float lat, 13 | const float lng) 14 | { 15 | int i; 16 | for (i=0; i 0) 13 | { 14 | min = MIN(min, src[n - 1]); 15 | } 16 | if (n < cols - 1) 17 | { 18 | min = MIN(min, src[n + 1]); 19 | } 20 | dst[n] = wall[(t + 1) * cols + n] + min; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /cuda/srad/srad_v2/Makefile_nvidia: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # 3 | # Build script for project 4 | # 5 | ################################################################################ 6 | 7 | # Add source files here 8 | EXECUTABLE := srad 9 | # CUDA source files (compiled with cudacc) 10 | CUFILES := srad.cu 11 | # CUDA dependency files 12 | CU_DEPS := \ 13 | srad_kernel.cu \ 14 | 15 | # C/C++ source files (compiled with gcc / c++) 16 | CCFILES := \ 17 | 18 | 19 | ################################################################################ 20 | # Rules and targets 21 | 22 | include ../../common/common.mk 23 | -------------------------------------------------------------------------------- /opencl/streamcluster/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | #C compiler 4 | CC = g++ 5 | 6 | 7 | FLAGS = -O3 8 | SRC = streamcluster.cpp 9 | EXE = streamcluster 10 | 11 | release:$(SRC) 12 | $(CC) -lOpenCL $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) $(FLAGS) 13 | 14 | errmsg:$(SRC) 15 | $(CC) -lOpenCL $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) -D ERRMSG $(FLAGS) 16 | 17 | ptx:$(SRC) 18 | $(CC) -lOpenCL $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) -D PTX_MSG $(FLAGS) 19 | 20 | res:$(SRC) 21 | $(CC) -lOpenCL $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) -D RES_MSG $(FLAGS) 22 | 23 | clean: $(SRC) 24 | rm -f $(EXE) $(EXE).linkinfo result* 25 | -------------------------------------------------------------------------------- /cuda/nw/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | ifdef FOR 4 | CUDA_FLAGS += -DFOR 5 | endif 6 | 7 | SRC = needle.cu 8 | EXE = needle 9 | 10 | release: $(SRC) 11 | $(CUDA_CC) ${KERNEL_DIM} $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 12 | 13 | enum: $(SRC) 14 | $(CUDA_CC) ${KERNEL_DIM} -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 15 | 16 | debug: $(SRC) 17 | $(CUDA_CC) ${KERNEL_DIM} -g $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 18 | 19 | debugenum: $(SRC) 20 | $(CUDA_CC) ${KERNEL_DIM} -g -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 21 | 22 | clean: $(SRC) 23 | $(RM) $(EXE) $(EXE).linkinfo result.txt 24 | -------------------------------------------------------------------------------- /opencl/particlefilter/README.txt: -------------------------------------------------------------------------------- 1 | file: README.txt 2 | author: Donnie Newell - den4gr@virginia.edu 3 | last modified: 18Jan2012 4 | 5 | The optimized version of particlefilter has significant changes from 6 | the original particle filter kernel prior to December 2011. A brief list 7 | is included below: 8 | 9 | - float data types changed to double because of overflow issues 10 | - increased synchronization to correctly handle shared memory and partial blocks 11 | - additional memcopies to print intermediate results for each frame 12 | 13 | These additions result in different results and runtimes than previous versions. 14 | 15 | Single: single precision 16 | Double: double precision 17 | -------------------------------------------------------------------------------- /cuda/hybridsort/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | CC := $(CUDA_DIR)/bin/nvcc 3 | 4 | CC_FLAGS = -arch=sm_20 5 | 6 | ifdef VERIFY 7 | override VERIFY = -DVERIFY 8 | endif 9 | 10 | ifdef OUTPUT 11 | override OUTPUT = -DOUTPUT 12 | endif 13 | 14 | ifdef TIMER 15 | override TIMER = -DTIMER 16 | endif 17 | 18 | hybridsort: main.cu bucketsort.cu bucketsort.cuh bucketsort_kernel.cu histogram1024_kernel.cu mergesort.cu mergesort.cuh mergesort_kernel.cu 19 | $(CC) $(CC_FLAGS) $(VERIFY) $(OUTPUT) $(TIMER) $(HISTO_WG_SIZE_0) $(BUCKET_WG_SIZE_0) $(BUCKET_WG_SIZE_1) $(MERGE_WG_SIZE_0) $(MERGE_WG_SIZE_1) bucketsort.cu mergesort.cu main.cu -o hybridsort 20 | 21 | 22 | clean: 23 | rm *.o hybridsort 24 | -------------------------------------------------------------------------------- /cuda/srad/srad_v1/makefile: -------------------------------------------------------------------------------- 1 | include ../../../common/make.config 2 | 3 | # Example 4 | # target: dependencies 5 | # command 1 6 | # command 2 7 | # . 8 | # . 9 | # . 10 | # command n 11 | 12 | # link objects(binaries) together 13 | srad: main.o 14 | $(CUDA_CC) $(CUDA_FLAGS) main.o -lm -o $@ $(NVML_INC) $(NVML_LIB) 15 | 16 | # compile main function file into object (binary) 17 | main.o: main.cu \ 18 | define.c \ 19 | graphics.c \ 20 | extract_kernel.cu \ 21 | prepare_kernel.cu \ 22 | reduce_kernel.cu \ 23 | srad_kernel.cu \ 24 | srad2_kernel.cu \ 25 | compress_kernel.cu 26 | 27 | # delete all object files 28 | clean: 29 | $(RM) *.o srad 30 | -------------------------------------------------------------------------------- /opencl/nn/benchmark_fpga.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | filelist=`ls | grep filelist` 4 | version=(0 1 2 3 4 5) 5 | 6 | echo -n "File list/Version " 7 | for i in "${version[@]}" 8 | do 9 | echo -n "$i " 10 | done 11 | echo 12 | 13 | for i in $filelist 14 | do 15 | echo $i | xargs printf "%-22s" 16 | for j in "${version[@]}" 17 | do 18 | sum=0 19 | for k in {1..3} 20 | do 21 | time=`CL_DEVICE_TYPE=CL_DEVICE_TYPE_ACCELERATOR ./nn $i -r 5 -lat 30 -lng 90 v$j 2>&1 | grep Records -A 1 | grep -v Records | cut -f 3` 22 | sum=`echo $sum+$time | bc -l` 23 | done 24 | average=`echo $sum/3 | bc -l | xargs printf "%.3f"` 25 | echo $average | xargs printf "%-11.3f" 26 | done 27 | echo 28 | done 29 | -------------------------------------------------------------------------------- /opencl/nn/nn_kernel_v3.cl: -------------------------------------------------------------------------------- 1 | //#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 2 | 3 | typedef struct latLong 4 | { 5 | float lat; 6 | float lng; 7 | } LatLong; 8 | 9 | __kernel void NearestNeighbor(__global LatLong* restrict d_locations, 10 | __global float* restrict d_distances, 11 | const int numRecords, 12 | const float lat, 13 | const float lng) 14 | { 15 | int i; 16 | 17 | #pragma unroll 48 18 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | char *err_code(cl_int err_in); 8 | int output_device_info(cl_device_id device_id); 9 | void fatal(char* s); 10 | void readinput(float* v, int r, int c,int l,char*); 11 | void writeoutput(float* v,int r,int c,int l,char*); 12 | char* load_kernel_source(const char* filename); 13 | long long get_time(); 14 | float accuracy(float* arr1, float* arr2, int len); 15 | void computeTempCPU(float* pIn, float *tIn, float *tOut, 16 | int nx, int ny, int nz, float Cap, 17 | float Rx, float Ry, float Rz, 18 | float dt, float amb_temp, int numiter); 19 | -------------------------------------------------------------------------------- /opencl/kmeans/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | # C compiler 3 | CC = g++ 4 | CC_FLAGS = -g -O2 5 | 6 | kmeans: cluster.o getopt.o read_input.o kmeans_clustering.o rmse.o 7 | $(CC) $(KERNEL_DIM) $(CC_FLAGS) -lOpenCL kmeans.cpp cluster.o getopt.o read_input.o kmeans_clustering.o rmse.o -o kmeans -I$(OPENCL_INC) -I$(OPENCL_DIR)/shared/inc/ -L$(OPENCL_LIB) 8 | 9 | %.o: %.[ch] 10 | $(CC) $(CC_FLAGS) $< -c 11 | 12 | cluster.o: cluster.c 13 | $(CC) $(CC_FLAGS) cluster.c -c 14 | 15 | getopt.o: getopt.c 16 | $(CC) $(CC_FLAGS) getopt.c -c 17 | 18 | kmeans.o: kmeans.c 19 | $(CC) $(CC_FLAGS) read_input.c -c 20 | 21 | rmse.o: rmse.c 22 | $(CC) $(CC_FLAGS) rmse.c -c 23 | 24 | clean: 25 | rm -f *.o *~ kmeans *.linkinfo 26 | -------------------------------------------------------------------------------- /openmp/lavaMD/kernel/kernel_cpu.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | //========================================================================================================================================================================================================200 6 | // KERNEL_CPU HEADER 7 | //========================================================================================================================================================================================================200 8 | 9 | void kernel_cpu( par_str par, 10 | dim_str dim, 11 | box_str* box, 12 | FOUR_VECTOR* rv, 13 | fp* qv, 14 | FOUR_VECTOR* fv); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /cuda/srad/srad_v1/extract_kernel.cu: -------------------------------------------------------------------------------- 1 | // statistical kernel 2 | __global__ void extract( long d_Ne, 3 | fp *d_I){ // pointer to input image (DEVICE GLOBAL MEMORY) 4 | 5 | // indexes 6 | int bx = blockIdx.x; // get current horizontal block index 7 | int tx = threadIdx.x; // get current horizontal thread index 8 | int ei = (bx*NUMBER_THREADS)+tx; // unique thread id, more threads than actual elements !!! 9 | 10 | // copy input to output & log uncompress 11 | if(eilat) * (lat - latLong->lat) + (lng - latLong->lng) * (lng - latLong->lng) ); 22 | } 23 | } -------------------------------------------------------------------------------- /openmp/leukocyte/OpenMP/track_ellipse.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACK_ELLIPSE_H 2 | #define TRACK_ELLIPSE_H 3 | 4 | #include "find_ellipse.h" 5 | 6 | 7 | extern void ellipsetrack(avi_t *video, double *xc0, double *yc0, int num_centers, int R, int Np, int Nf); 8 | extern MAT *MGVF(MAT *I, double vx, double vy); 9 | extern void heaviside(MAT *H, MAT *z, double v, double e); 10 | extern void ellipseevolve(MAT *f, double *xc0, double *yc0, double *r0, double* t, int Np, double Er, double Ey); 11 | extern double sum_m(MAT *matrix); 12 | extern double sum_v(VEC *vector); 13 | extern double **alloc_2d_double(int x, int y); 14 | extern double ***alloc_3d_double(int x, int y, int z); 15 | extern void free_2d_double(double **p); 16 | extern void free_3d_double(double ***p); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /cuda/leukocyte/CUDA/track_ellipse.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACK_ELLIPSE_H 2 | #define TRACK_ELLIPSE_H 3 | 4 | #include "find_ellipse.h" 5 | 6 | 7 | extern void ellipsetrack(avi_t *video, double *xc0, double *yc0, int num_centers, int R, int Np, int Nf); 8 | extern MAT **MGVF(MAT **I, double vx, double vy, int Nc); 9 | extern void heaviside(MAT *H, MAT *z, double v, double e); 10 | extern void ellipseevolve(MAT *f, double *xc0, double *yc0, double *r0, double* t, int Np, double Er, double Ey); 11 | extern double sum_m(MAT *matrix); 12 | extern double sum_v(VEC *vector); 13 | extern double **alloc_2d_double(int x, int y); 14 | extern double ***alloc_3d_double(int x, int y, int z); 15 | extern void free_2d_double(double **p); 16 | extern void free_3d_double(double ***p); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /opencl/leukocyte/OpenCL/track_ellipse.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACK_ELLIPSE_H 2 | #define TRACK_ELLIPSE_H 3 | 4 | #include "find_ellipse.h" 5 | 6 | 7 | extern void ellipsetrack(avi_t *video, double *xc0, double *yc0, int num_centers, int R, int Np, int Nf); 8 | extern MAT **MGVF(MAT **I, double vx, double vy, int Nc); 9 | extern void heaviside(MAT *H, MAT *z, double v, double e); 10 | extern void ellipseevolve(MAT *f, double *xc0, double *yc0, double *r0, double* t, int Np, double Er, double Ey); 11 | extern double sum_m(MAT *matrix); 12 | extern double sum_v(VEC *vector); 13 | extern double **alloc_2d_double(int x, int y); 14 | extern double ***alloc_3d_double(int x, int y, int z); 15 | extern void free_2d_double(double **p); 16 | extern void free_3d_double(double ***p); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /cuda/dwt2d/README: -------------------------------------------------------------------------------- 1 | 2 | // DESCRIPTION 3 | 4 | This is the CUDA version of the code. 5 | 6 | The JPEG2000 standard uses 2D Discrete Wavelet Transform (2D DWT), which consumes a significant part of the total encoding time 7 | 8 | 9 | 10 | // USE 11 | **************OUTPUT******************** 12 | USAGE: 13 | make clean 14 | make OUTPUT=Y 15 | 16 | 17 | **************PARAMETERS***************** 18 | USEAGE: 19 | ./dwt2d [otpions] src_img.rgb 20 | 21 | -d, --dimension dimensions of src img, e.g. 1920x1080 22 | -c, --components number of color components, default 3 23 | -l, --level DWT level, default 3 24 | -f, --forward forward transform 25 | -5, --53 5/3 transform 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /openmp/backprop/Makefile: -------------------------------------------------------------------------------- 1 | # C compiler 2 | CC = gcc 3 | CC_FLAGS = -g -fopenmp -O3 4 | 5 | ifdef OUTPUT 6 | CC_FLAGS += -DOUTPUT 7 | endif 8 | 9 | 10 | backprop: backprop.o facetrain.o imagenet.o backprop_kernel.o 11 | $(CC) $(CC_FLAGS) backprop.o facetrain.o imagenet.o backprop_kernel.o -o backprop -lm 12 | 13 | %.o: %.[ch] 14 | $(CC) $(CC_FLAGS) $< -c 15 | 16 | facetrain.o: facetrain.c backprop.h 17 | $(CC) $(CC_FLAGS) facetrain.c -c 18 | 19 | backprop.o: backprop.c backprop.h 20 | $(CC) $(CC_FLAGS) backprop.c -c 21 | 22 | backprop_kernel.o: backprop_kernel.c backprop.h 23 | $(CC) $(CC_FLAGS) backprop_kernel.c -c 24 | 25 | imagenet.o: imagenet.c backprop.h 26 | $(CC) $(CC_FLAGS) imagenet.c -c 27 | 28 | 29 | clean: 30 | rm -f *.o *~ backprop backprop_cuda.linkinfo 31 | -------------------------------------------------------------------------------- /opencl/backprop/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | NAME = backprop 4 | ifeq ($(OS),Windows_NT) 5 | EXE = $(NAME).exe 6 | else 7 | EXE = $(NAME) 8 | endif 9 | SRC = backprop_ocl.cpp backprop.c facetrain.c imagenet.c 10 | 11 | ifdef OUTPUT 12 | EXTRA_FLAGS += -DOUTPUT 13 | endif 14 | 15 | ifdef ALTERA 16 | ifdef HOST_ONLY 17 | AOT_PROGRAMS = 18 | else 19 | AOT_PROGRAMS = $(KERNEL_ALTERA_NAMES) 20 | endif 21 | else 22 | AOT_PROGRAMS = 23 | endif 24 | 25 | EXTRA_FLAGS += -Wno-unused-result 26 | 27 | all: $(EXE) $(AOT_PROGRAMS) 28 | 29 | profile: CFLAGS += -DPROFILE 30 | profile: $(EXE) $(AOT_PROGRAMS) 31 | 32 | $(EXE): $(SRC) 33 | $(CXX) $(CFLAGS) $(SRC) -o $(EXE) $(OPENCL_INC) $(OPENCL_LIB) $(EXTRA_FLAGS) 34 | 35 | clean: 36 | $(RM) $(EXE) *.aoco $(KERNEL_NAMES) 37 | -------------------------------------------------------------------------------- /cuda/srad/srad_v2/README: -------------------------------------------------------------------------------- 1 | In srad.h, define either GPU or CPU computation 2 | Currently, the GPU implementation can only support x-, y-dimensions that can be divided by 16. 3 | 4 | Usage: 5 | srad 128 128 0 31 0 31 0.5 2 6 | 7 | 128 //number of rows in the domain 8 | 128 //number of cols in the domain 9 | 0 //y1 position of the speckle 10 | 31 //y2 position of the speckle 11 | 0 //x1 position of the speckle 12 | 31 //x2 position of the speckle 13 | 0.5 //Lambda value 14 | 2 //number of iterations 15 | 16 | 17 | ******Adjustable work group size***** 18 | The kernel has square shape 19 | RD_WG_SIZE_0 or RD_WG_SIZE_0_0 describe one dimesion 20 | The total thread number for one block is RD_WG_SIZE_0*RD_WG_SIZE_0 21 | 22 | USAGE: 23 | make clean 24 | make KERNEL_DIM="-DRD_WG_SIZE_0=16" -------------------------------------------------------------------------------- /cuda/lavaMD/kernel/kernel_gpu_cuda_wrapper.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | //========================================================================================================================================================================================================200 6 | // KERNEL_GPU_CUDA_WRAPPER HEADER 7 | //========================================================================================================================================================================================================200 8 | 9 | void kernel_gpu_cuda_wrapper( par_str parms_cpu, 10 | dim_str dim_cpu, 11 | box_str* box_cpu, 12 | FOUR_VECTOR* rv_cpu, 13 | fp* qv_cpu, 14 | FOUR_VECTOR* fv_cpu); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /opencl/dwt2d/Makefile: -------------------------------------------------------------------------------- 1 | #all: 2 | # g++ -o components.o -I/usr/local/cuda-5.5/include -c components.cpp 3 | # g++ -o prog -I/usr/local/cuda-5.5/include main.cpp components.o -lOpenCL 4 | # 5 | include ../../common/make.config 6 | 7 | 8 | ifdef OUTPUT 9 | override OUTPUT = -DOUTPUT 10 | endif 11 | 12 | 13 | OUTPUT = -DOUTPUT 14 | 15 | #C_C = g++ 16 | #OCL_LIB = -lOpenCL 17 | #OCL_INC = -I/usr/local/cuda-5.5/include 18 | 19 | default: dwt2d 20 | 21 | # components.cpp does not seem to be used 22 | #components: 23 | # $(C_C) -o components.o $(OCL_INC) -c components.cpp 24 | 25 | dwt2d: main.o 26 | $(CXX) -o $@ $< $(OPENCL_LIB) 27 | 28 | main.o: CXXFLAGS += $(OPENCL_INC) $(OPENCL_CFLAGS) $(OUTPUT) 29 | 30 | 31 | clean: 32 | rm -rf *.o dwt2d 33 | rm *.bmp.dwt.* 34 | 35 | -------------------------------------------------------------------------------- /cuda/cfd/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | all: euler3d euler3d_double pre_euler3d pre_euler3d_double 4 | 5 | euler3d: euler3d.cu 6 | $(CUDA_CC) $(CUDA_FLAGS) $(KERNEL_DIM) -Xptxas -v euler3d.cu -o euler3d $(NVML_INC) $(NVML_LIB) 7 | 8 | euler3d_double: euler3d_double.cu 9 | $(CUDA_CC) $(CUDA_FLAGS) $(KERNEL_DIM) -Xptxas -v euler3d_double.cu -o euler3d_double $(NVML_INC) $(NVML_LIB) 10 | 11 | pre_euler3d: pre_euler3d.cu 12 | $(CUDA_CC) $(CUDA_FLAGS) $(KERNEL_DIM) -Xptxas -v pre_euler3d.cu -o pre_euler3d $(NVML_INC) $(NVML_LIB) 13 | 14 | pre_euler3d_double: pre_euler3d_double.cu 15 | $(CUDA_CC) $(CUDA_FLAGS) $(KERNEL_DIM) -Xptxas -v pre_euler3d_double.cu -o pre_euler3d_double $(NVML_INC) $(NVML_LIB) 16 | 17 | clean: 18 | rm -f euler3d euler3d_double pre_euler3d pre_euler3d_double *.linkinfo 19 | -------------------------------------------------------------------------------- /opencl/cfd/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H_ 2 | #define COMMON_H_ 3 | 4 | #define GAMMA (1.4f) 5 | 6 | #define NDIM (3) 7 | #define NNB (4) 8 | 9 | #define RK (3) // 3rd order RK 10 | #define ff_mach (1.2f) 11 | #define deg_angle_of_attack (0.0f) 12 | 13 | #define VAR_DENSITY (0) 14 | #define VAR_MOMENTUM (1) 15 | #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM) 16 | #define NVAR (VAR_DENSITY_ENERGY+1) 17 | //#pragma OPENCL EXTENSION CL_MAD : enable 18 | 19 | //self-defined user type 20 | typedef struct{ 21 | float x; 22 | float y; 23 | float z; 24 | } FLOAT3; 25 | 26 | #define FLOAT3_ASSIGN(f3, rx, ry, rz) do { \ 27 | (f3).x = rx; \ 28 | (f3).y = ry; \ 29 | (f3).z = rz; \ 30 | } while (0) 31 | 32 | #endif /* COMMON_H_ */ 33 | -------------------------------------------------------------------------------- /cuda/mummergpu/src/info.xml: -------------------------------------------------------------------------------- 1 | 2 | Template 3 | 4 | CUDA 5 | 6 | projects\template 7 | doc/black_1x1.gif 8 | doc/black_1x1.gif 9 | doc/black_1x1.gif 10 | 11 | 12 | 13 | Bin\template.exe 14 | GeForce 8 15 | CUDA, GPGPU 16 | GPGPU 17 | 070301 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/Cray/patch.2: -------------------------------------------------------------------------------- 1 | *** iter0.c Mon Jun 20 15:22:36 1994 2 | --- iter0.c.orig Fri Oct 28 01:49:19 1994 3 | *************** 4 | *** 103,111 **** 5 | if (lenx > 0) ip->x = v_get(lenx); 6 | else ip->x = (VEC *)NULL; 7 | 8 | ! ip->Ax = ip->A_par = NULL; 9 | ! ip->ATx = ip->AT_par = NULL; 10 | ! ip->Bx = ip->B_par = NULL; 11 | ip->info = iter_std_info; 12 | ip->stop_crit = iter_std_stop_crit; 13 | ip->init_res = 0.0; 14 | --- 103,111 ---- 15 | if (lenx > 0) ip->x = v_get(lenx); 16 | else ip->x = (VEC *)NULL; 17 | 18 | ! ip->Ax = NULL; ip->A_par = NULL; 19 | ! ip->ATx = NULL; ip->AT_par = NULL; 20 | ! ip->Bx = NULL; ip->B_par = NULL; 21 | ip->info = iter_std_info; 22 | ip->stop_crit = iter_std_stop_crit; 23 | ip->init_res = 0.0; 24 | -------------------------------------------------------------------------------- /cuda/srad/srad_v1/prepare_kernel.cu: -------------------------------------------------------------------------------- 1 | // statistical kernel 2 | __global__ void prepare( long d_Ne, 3 | fp *d_I, // pointer to output image (DEVICE GLOBAL MEMORY) 4 | fp *d_sums, // pointer to input image (DEVICE GLOBAL MEMORY) 5 | fp *d_sums2){ 6 | 7 | // indexes 8 | int bx = blockIdx.x; // get current horizontal block index 9 | int tx = threadIdx.x; // get current horizontal thread index 10 | int ei = (bx*NUMBER_THREADS)+tx; // unique thread id, more threads than actual elements !!! 11 | 12 | // copy input to output & log uncompress 13 | if(ei 2 | Template 3 | 4 | CUDA 5 | 6 | projects\template 7 | doc/black_1x1.gif 8 | doc/black_1x1.gif 9 | doc/black_1x1.gif 10 | 11 | 12 | 13 | Bin\template.exe 14 | GeForce 8 15 | CUDA, GPGPU 16 | GPGPU 17 | 070301 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/Cray/patch.2: -------------------------------------------------------------------------------- 1 | *** iter0.c Mon Jun 20 15:22:36 1994 2 | --- iter0.c.orig Fri Oct 28 01:49:19 1994 3 | *************** 4 | *** 103,111 **** 5 | if (lenx > 0) ip->x = v_get(lenx); 6 | else ip->x = (VEC *)NULL; 7 | 8 | ! ip->Ax = ip->A_par = NULL; 9 | ! ip->ATx = ip->AT_par = NULL; 10 | ! ip->Bx = ip->B_par = NULL; 11 | ip->info = iter_std_info; 12 | ip->stop_crit = iter_std_stop_crit; 13 | ip->init_res = 0.0; 14 | --- 103,111 ---- 15 | if (lenx > 0) ip->x = v_get(lenx); 16 | else ip->x = (VEC *)NULL; 17 | 18 | ! ip->Ax = NULL; ip->A_par = NULL; 19 | ! ip->ATx = NULL; ip->AT_par = NULL; 20 | ! ip->Bx = NULL; ip->B_par = NULL; 21 | ip->info = iter_std_info; 22 | ip->stop_crit = iter_std_stop_crit; 23 | ip->init_res = 0.0; 24 | -------------------------------------------------------------------------------- /opencl/nw/nw_kernel_v1.cl: -------------------------------------------------------------------------------- 1 | inline int maximum(int a, int b, int c) 2 | { 3 | int k; 4 | if(a <= b) 5 | k = b; 6 | else 7 | k = a; 8 | 9 | if(k <= c) 10 | return(c); 11 | else 12 | return(k); 13 | } 14 | 15 | __kernel void nw_kernel1(__global int* restrict reference, 16 | __global int* restrict input_itemsets, 17 | int dim, 18 | int penalty) 19 | { 20 | for (int j = 1; j < dim - 1; ++j) 21 | { 22 | for (int i = 1; i < dim - 1; ++i) 23 | { 24 | int index = j * dim + i; 25 | input_itemsets[index]= maximum(input_itemsets[index - 1 - dim] + reference[index], 26 | input_itemsets[index - 1] - penalty, 27 | input_itemsets[index - dim] - penalty); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/Cray/patch.2: -------------------------------------------------------------------------------- 1 | *** iter0.c Mon Jun 20 15:22:36 1994 2 | --- iter0.c.orig Fri Oct 28 01:49:19 1994 3 | *************** 4 | *** 103,111 **** 5 | if (lenx > 0) ip->x = v_get(lenx); 6 | else ip->x = (VEC *)NULL; 7 | 8 | ! ip->Ax = ip->A_par = NULL; 9 | ! ip->ATx = ip->AT_par = NULL; 10 | ! ip->Bx = ip->B_par = NULL; 11 | ip->info = iter_std_info; 12 | ip->stop_crit = iter_std_stop_crit; 13 | ip->init_res = 0.0; 14 | --- 103,111 ---- 15 | if (lenx > 0) ip->x = v_get(lenx); 16 | else ip->x = (VEC *)NULL; 17 | 18 | ! ip->Ax = NULL; ip->A_par = NULL; 19 | ! ip->ATx = NULL; ip->AT_par = NULL; 20 | ! ip->Bx = NULL; ip->B_par = NULL; 21 | ip->info = iter_std_info; 22 | ip->stop_crit = iter_std_stop_crit; 23 | ip->init_res = 0.0; 24 | -------------------------------------------------------------------------------- /opencl/lavaMD/makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | NAME = lavaMD 4 | ifeq ($(OS),Windows_NT) 5 | EXE = $(NAME).exe 6 | else 7 | EXE = $(NAME) 8 | endif 9 | SRC = main.c ./kernel/kernel_gpu_opencl_wrapper.c ./util/timer/timer.c ./util/opencl/opencl.c ./util/num/num.c 10 | 11 | ifdef OUTPUT 12 | EXTRA_FLAGS += -DOUTPUT 13 | endif 14 | 15 | VERSIONS = 0 1 3 16 | KERNEL_NAMES = $(foreach var,$(VERSIONS),$(NAME)_kernel_v$(var)) 17 | KERNEL_ALTERA_NAMES = $(addsuffix .aocx, $(KERNEL_NAMES)) 18 | 19 | ifdef ALTERA 20 | ifdef HOST_ONLY 21 | AOT_PROGRAMS = 22 | else 23 | AOT_PROGRAMS = $(KERNEL_ALTERA_NAMES) 24 | endif 25 | else 26 | AOT_PROGRAMS = 27 | endif 28 | 29 | all: $(SRC) $(AOT_PROGRAMS) 30 | $(CC) $(CFLAGS) $(SRC) -o $(EXE) $(OPENCL_INC) $(OPENCL_LIB) $(EXTRA_FLAGS) 31 | 32 | clean: 33 | $(RM) $(EXE) *.aoco $(KERNEL_NAMES) 34 | -------------------------------------------------------------------------------- /openmp/leukocyte/OpenMP/Makefile: -------------------------------------------------------------------------------- 1 | ifdef OUTPUT 2 | override OUTPUT = -DOUTPUT 3 | endif 4 | 5 | 6 | # C compiler 7 | CC = gcc 8 | CC_FLAGS = -g -O3 -Wall -fopenmp -I$(MATRIX_DIR) 9 | 10 | # Matrix library path 11 | MATRIX_DIR = ../meschach_lib 12 | 13 | 14 | leukocyte: detect_main.o avilib.o find_ellipse.o track_ellipse.o misc_math.o $(MATRIX_DIR)/meschach.a 15 | $(CC) $(CC_FLAGS) -lm avilib.o find_ellipse.o track_ellipse.o misc_math.o detect_main.o -o leukocyte -lm $(MATRIX_DIR)/meschach.a -lpthread 16 | 17 | %.o: %.[ch] 18 | $(CC) $(OUTPUT) $(CC_FLAGS) $< -c 19 | 20 | detect_main.o: find_ellipse.h track_ellipse.h avilib.h 21 | find_ellipse.o: avilib.h 22 | track_ellipse.o: find_ellipse.h track_ellipse.h avilib.h 23 | 24 | $(MATRIX_DIR)/meschach.a: 25 | cd $(MATRIX_DIR); ./configure --with-all; make all; make clean 26 | 27 | clean: 28 | rm -f *.o leukocyte 29 | -------------------------------------------------------------------------------- /opencl/bfs/README_fpga.md: -------------------------------------------------------------------------------- 1 | # Kernel variations 2 | 3 | See the github Wiki page for more general information. 4 | 5 | ## v1 6 | 7 | Straightforward single work-item kernel created by wrapping v0 in a 8 | for loop from 0 to no_of_nodes and addin restrict. 9 | 10 | ## v2 11 | 12 | Contributed by Hagiwara-san from Altera. Uses unrolling for BFS_1 kernel 13 | and SIMD for BFS_2 kernel. More unrolling on BFS_1 kernel results in 14 | worse performance. 15 | 16 | ## v3 17 | 18 | Single work-item kernel created by unrolling loops as much as possible 19 | without using any resources more than 100% on the de5net_a7 board. 20 | 21 | ## v5 22 | 23 | Trying to resolve memory dependancy caused by g_cost[id]=g_cost[tid]+1 24 | using a temp variable that loads g_cost[tid] from memory earlier in the 25 | piepleine. The AOCL doesn't print anything about memory dependancy 26 | anymore. 27 | -------------------------------------------------------------------------------- /opencl/pathfinder/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | NAME = pathfinder 4 | ifeq ($(OS),Windows_NT) 5 | EXE = $(NAME).exe 6 | else 7 | EXE = $(NAME) 8 | endif 9 | SRC = main.cpp OpenCL.cpp 10 | 11 | VERSIONS = 0 1 2 3 4 5 7 9 12 | KERNEL_NAMES = $(foreach var,$(VERSIONS),$(NAME)_kernel_v$(var)) 13 | KERNEL_ALTERA_NAMES = $(addsuffix .aocx, $(KERNEL_NAMES)) 14 | 15 | ifdef BSIZE 16 | CXXFLAGS += -DBSIZE=$(BSIZE) 17 | endif 18 | 19 | ifdef SSIZE 20 | CXXFLAGS += -DSSIZE=$(SSIZE) 21 | endif 22 | 23 | ifdef ALTERA 24 | ifdef HOST_ONLY 25 | AOT_PROGRAMS = 26 | else 27 | AOT_PROGRAMS = $(KERNEL_ALTERA_NAMES) 28 | endif 29 | else 30 | AOT_PROGRAMS = 31 | endif 32 | 33 | all: $(SRC) $(AOT_PROGRAMS) 34 | $(CXX) $(CXXFLAGS) $(SRC) -o $(EXE) $(OPENCL_INC) $(OPENCL_LIB) $(EXTRA_FLAGS) $(LDFLAGS) 35 | 36 | clean: 37 | $(RM) $(EXE) *.aoco $(KERNEL_NAMES) 38 | -------------------------------------------------------------------------------- /opencl/nn/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | NAME = nn 4 | ifeq ($(OS),Windows_NT) 5 | EXE = $(NAME).exe 6 | else 7 | EXE = $(NAME) 8 | endif 9 | SRC = nearestNeighbor.cpp clutils.cpp utils.cpp 10 | 11 | VERSIONS = 0 1 2 3 4 5 12 | KERNEL_NAMES = $(foreach var,$(VERSIONS),$(NAME)_kernel_v$(var)) 13 | KERNEL_ALTERA_NAMES = $(addsuffix .aocx, $(KERNEL_NAMES)) 14 | 15 | ifdef ALTERA 16 | ifdef HOST_ONLY 17 | AOT_PROGRAMS = 18 | else 19 | AOT_PROGRAMS = $(KERNEL_ALTERA_NAMES) 20 | endif 21 | else 22 | AOT_PROGRAMS = 23 | endif 24 | 25 | ifdef ARM 26 | EXTRA_FLAGS = 27 | else ifeq ($(OS),Windows_NT) 28 | EXTRA_FLAGS = 29 | else 30 | EXTRA_FLAGS = -fopenmp 31 | endif 32 | 33 | release: $(SRC) $(AOT_PROGRAMS) 34 | $(CXX) $(CXXFLAGS) $(SRC) -o $(EXE) $(OPENCL_INC) $(OPENCL_LIB) $(EXTRA_FLAGS) 35 | 36 | clean: 37 | $(RM) $(EXE) *.aoco $(KERNEL_NAMES) 38 | -------------------------------------------------------------------------------- /cuda/hotspot3D/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | SRC = hotspot3D.cu 4 | 5 | EXE = hotspot3D 6 | 7 | OUTPUT = output.txt 8 | 9 | FLAGS = -g -G #-arch sm_20 --ptxas-options=-v 10 | release: $(SRC) 11 | $(CUDA_CC) $(KERNEL_DIM) $(FLAGS) $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 12 | 13 | verify: 14 | $(CUDA_CC) $(KERNEL_DIM) $(FLAGS) $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) -DVERIFY 15 | 16 | enum: $(SRC) 17 | $(CUDA_CC) $(KERNEL_DIM) $(FLAGS) -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 18 | 19 | debug: $(SRC) 20 | $(CUDA_CC) $(KERNEL_DIM) $(FLAGS) -g $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 21 | 22 | debugenum: $(SRC) 23 | $(CUDA_CC) $(KERNEL_DIM) $(FLAGS) -g -deviceemu $(SRC) -o $(EXE) $(CUDA_FLAGS) $(NVML_INC) $(NVML_LIB) 24 | 25 | clean: $(SRC) 26 | rm -f $(EXE) $(EXE).linkinfo $(OUTPUT) 27 | -------------------------------------------------------------------------------- /cuda/cfd/README: -------------------------------------------------------------------------------- 1 | Four versions of CFD: 2 | 3 | euler3d <-- redundant flux computation (GPU) 4 | euler3d_double <-- redundant flux computation double precision (GPU) 5 | pre_euler3d <-- pre-computed fluxes (GPU) 6 | pre_euler3d_double <-- pre-computed fluxes double precision (GPU) 7 | 8 | The original OpenMP and CUDA codes for CFD were obtained from Andrew Corrigan at George Mason University, 9 | who has given us permission to include it as part of Rodinia under Rodinia's license. 10 | 11 | ******Adjustable work group size***** 12 | RD_WG_SIZE for all 13 | RD_WG_SIZE_1 or RD_WG_SIZE_1_0 for initialize_variables 14 | RD_WG_SIZE_2 or RD_WG_SIZE_2_0 for compute_step_factor 15 | RD_WG_SIZE_3 or RD_WG_SIZE_3_0 for compute_flux 16 | RD_WG_SIZE_4 or RD_WG_SIZE_4_0 for time_step 17 | 18 | USAGE: 19 | make clean 20 | make KERNEL_DIM="-DRD_WG_SIZE_1=128 -DRD_WG_SIZE_2=192 -DRD_WG_SIZE_3=128 -DRD_WG_SIZE_4=256" -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/TurboC/README: -------------------------------------------------------------------------------- 1 | This directory contains a makefile for Borland C++. 2 | It was written by Andrew Gockel (contact information below). 3 | Use at own risk. This is provided as part of the standard Meschach 4 | distribution to give the library the widest possible use. 5 | However, problems with the makefile should be directed to the author, 6 | not the developers of Meschach (David Stewart and Zbigniew Leyk). 7 | 8 | No representations are made concerning the fitness of this software for any 9 | particular purpose. 10 | 11 | # Borland C++ V4 Makefile 12 | # 13 | # Saturday, 14 October, 1995 14 | # 15 | # Andrew Gockel 16 | # 123 Settlement Road 17 | # THE GAP, QLD., 4061 18 | # AUSTRALIA 19 | # 20 | # Email 21 | # INTERNET:andrew@kittyhawk.aero.rmit.edu.au 22 | # CIS:100245.1253@compuserve.com 23 | # MSN:Andrew_Gockel@msn.com 24 | # 25 | # c:\meschach\meschach.mak 26 | -------------------------------------------------------------------------------- /opencl/leukocyte/meschach_lib/MACHINES/TurboC/README: -------------------------------------------------------------------------------- 1 | This directory contains a makefile for Borland C++. 2 | It was written by Andrew Gockel (contact information below). 3 | Use at own risk. This is provided as part of the standard Meschach 4 | distribution to give the library the widest possible use. 5 | However, problems with the makefile should be directed to the author, 6 | not the developers of Meschach (David Stewart and Zbigniew Leyk). 7 | 8 | No representations are made concerning the fitness of this software for any 9 | particular purpose. 10 | 11 | # Borland C++ V4 Makefile 12 | # 13 | # Saturday, 14 October, 1995 14 | # 15 | # Andrew Gockel 16 | # 123 Settlement Road 17 | # THE GAP, QLD., 4061 18 | # AUSTRALIA 19 | # 20 | # Email 21 | # INTERNET:andrew@kittyhawk.aero.rmit.edu.au 22 | # CIS:100245.1253@compuserve.com 23 | # MSN:Andrew_Gockel@msn.com 24 | # 25 | # c:\meschach\meschach.mak 26 | -------------------------------------------------------------------------------- /openmp/leukocyte/meschach_lib/MACHINES/TurboC/README: -------------------------------------------------------------------------------- 1 | This directory contains a makefile for Borland C++. 2 | It was written by Andrew Gockel (contact information below). 3 | Use at own risk. This is provided as part of the standard Meschach 4 | distribution to give the library the widest possible use. 5 | However, problems with the makefile should be directed to the author, 6 | not the developers of Meschach (David Stewart and Zbigniew Leyk). 7 | 8 | No representations are made concerning the fitness of this software for any 9 | particular purpose. 10 | 11 | # Borland C++ V4 Makefile 12 | # 13 | # Saturday, 14 October, 1995 14 | # 15 | # Andrew Gockel 16 | # 123 Settlement Road 17 | # THE GAP, QLD., 4061 18 | # AUSTRALIA 19 | # 20 | # Email 21 | # INTERNET:andrew@kittyhawk.aero.rmit.edu.au 22 | # CIS:100245.1253@compuserve.com 23 | # MSN:Andrew_Gockel@msn.com 24 | # 25 | # c:\meschach\meschach.mak 26 | -------------------------------------------------------------------------------- /cuda/kmeans/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | # C compiler 4 | CC = gcc 5 | CC_FLAGS = -g -fopenmp -O2 6 | 7 | # CUDA compiler 8 | NVCC = $(CUDA_DIR)/bin/nvcc 9 | NVCC_FLAGS = -I$(CUDA_DIR)/include 10 | 11 | # 'make dbg=1' enables NVCC debugging 12 | ifeq ($(dbg),1) 13 | NVCC_FLAGS += -g -O0 14 | else 15 | NVCC_FLAGS += -O2 16 | endif 17 | 18 | # 'make emu=1' compiles the CUDA kernels for emulation 19 | ifeq ($(emu),1) 20 | NVCC_FLAGS += -deviceemu 21 | endif 22 | 23 | 24 | kmeans: cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o 25 | $(CC) $(CC_FLAGS) cluster.o getopt.o kmeans.o kmeans_clustering.o kmeans_cuda.o rmse.o -o kmeans -L$(CUDA_LIB_DIR) -lcuda -lcudart -lm 26 | 27 | %.o: %.[ch] 28 | $(CC) $(CC_FLAGS) $< -c 29 | 30 | kmeans_cuda.o: kmeans_cuda.cu 31 | $(NVCC) $(NVCC_FLAGS) -c kmeans_cuda.cu 32 | 33 | clean: 34 | rm -f *.o *~ kmeans kmeans_cuda.linkinfo 35 | -------------------------------------------------------------------------------- /cuda/nn/README: -------------------------------------------------------------------------------- 1 | Must have the CUDA Toolkit installed and nvcc working 2 | To build and run nearest neighbor: 3 | make nn 4 | ./nn filelist_4 -r 3 -lat 30 -lng 90 5 | 6 | To generate new data sets: 7 | Edit gen_dataset.sh and select the size of the desired data set 8 | make hurricane_gen 9 | ./hurricane_gen 10 | 11 | Full Usage: 12 | 13 | nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-h] 14 | 15 | example: 16 | $ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90 17 | 18 | filename the filename that lists the data input files 19 | -r [int] the number of records to return (default: 10) 20 | -lat [float] the latitude for nearest neighbors (default: 0) 21 | -lng [float] the longitude for nearest neighbors (default: 0) 22 | 23 | -h, --help Display the help file 24 | 25 | Note: The filename is required as the first parameter. 26 | 27 | -------------------------------------------------------------------------------- /cuda/streamcluster/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/make.config 2 | 3 | NVCC = $(CUDA_DIR)/bin/nvcc 4 | 5 | NVCC_FLAGS = -I$(CUDA_DIR)/include 6 | 7 | TARGET_G = sc_gpu 8 | 9 | 10 | # make dbg=1 tells nvcc to add debugging symbols to the binary 11 | ifeq ($(dbg),1) 12 | NVCC_FLAGS += -g -O0 13 | else 14 | NVCC_FLAGS += -O3 15 | endif 16 | 17 | # make emu=1 compiles the CUDA kernels for emulation 18 | ifeq ($(emu),1) 19 | NVCC_FLAGS += -deviceemu 20 | endif 21 | 22 | # make dp=1 compiles the CUDA kernels with double-precision support 23 | ifeq ($(dp),1) 24 | NVCC_FLAGS += --gpu-name sm_13 25 | endif 26 | 27 | 28 | $(TARGET_G): streamcluster_cuda_cpu.cpp streamcluster_cuda.cu streamcluster_header.cu 29 | $(NVCC) $(NVCC_FLAGS) streamcluster_cuda_cpu.cpp streamcluster_cuda.cu streamcluster_header.cu -o $(TARGET_G) -lcuda 30 | 31 | 32 | clean: 33 | rm -f *.o *~ *.txt $(TARGET_G) *.linkinfo 34 | 35 | -------------------------------------------------------------------------------- /cuda/heartwall/setdevice.cu: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // Set Device 3 | //////////////////////////////////////////////////////////////////////////////// 4 | 5 | void setdevice(void){ 6 | 7 | // variables 8 | int num_devices; 9 | int device; 10 | 11 | cudaGetDeviceCount(&num_devices); 12 | if (num_devices > 1) { 13 | 14 | // variables 15 | int max_multiprocessors; 16 | int max_device; 17 | cudaDeviceProp properties; 18 | 19 | // initialize variables 20 | max_multiprocessors = 0; 21 | max_device = 0; 22 | 23 | for (device = 0; device < num_devices; device++) { 24 | cudaGetDeviceProperties(&properties, device); 25 | if (max_multiprocessors < properties.multiProcessorCount) { 26 | max_multiprocessors = properties.multiProcessorCount; 27 | max_device = device; 28 | } 29 | } 30 | cudaSetDevice(max_device); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /openmp/myocyte/define.c: -------------------------------------------------------------------------------- 1 | //=============================================================================================================================================================================================================== 2 | //=============================================================================================================================================================================================================== 3 | // DEFINE / INCLUDE 4 | //=============================================================================================================================================================================================================== 5 | //=============================================================================================================================================================================================================== 6 | 7 | #define fp float 8 | 9 | #define EQUATIONS 91 10 | #define PARAMETERS 16 11 | -------------------------------------------------------------------------------- /opencl/nn/README_fpga.md: -------------------------------------------------------------------------------- 1 | # Kernel variations 2 | 3 | See the github Wiki page for more general information. 4 | 5 | ## v1 6 | 7 | Straightforward single work-item kernel created by wrapping v0 in a 8 | for loop from 0 to numRecords and adding strict. 9 | 10 | ## v2 11 | 12 | Optimized NDrange kernel by adding restrict, reqd_work_group_size, 13 | num_simd_work_items and num_compute_units to v0. SIMD_LANES 16 is 14 | the maximum number allowed by the compiler. COMPUTE_UNITS=3 was 15 | chosen to avoid fully utilizing the DSPs. 16 | 17 | ## v3 18 | 19 | Single work-item kernel created by adding #pragma unroll 48 to v1. 20 | #pragma unroll 64 fully utilizes the DSP units, so it was avoided 21 | at this point. 22 | 23 | ## v4 24 | 25 | Same as v2 but with COMPUTE_UNITS=4 (resulting in 100% DSP utlization 26 | and lower operating frequency). 27 | 28 | ## v5 29 | 30 | Same as v3 but with #pragma unroll 64 (resulting in 100% DSP utlization 31 | and lower operating frequency). 32 | 33 | -------------------------------------------------------------------------------- /cuda/myocyte/define.c: -------------------------------------------------------------------------------- 1 | //=============================================================================================================================================================================================================== 2 | //=============================================================================================================================================================================================================== 3 | // DEFINE / INCLUDE 4 | //=============================================================================================================================================================================================================== 5 | //=============================================================================================================================================================================================================== 6 | 7 | #define fp float 8 | 9 | #define NUMBER_THREADS 32 10 | 11 | #define EQUATIONS 91 12 | #define PARAMETERS 18 13 | -------------------------------------------------------------------------------- /opencl/lavaMD/README_fpga.md: -------------------------------------------------------------------------------- 1 | # Compilation 2 | 3 | Default: 4 | 5 | ``` 6 | make Altera=1 BOARD=[board_name] 7 | ``` 8 | 9 | Host only: 10 | ``` 11 | make ALTERA=1 HOST_ONLY=1 12 | ``` 13 | 14 | Custom kernel: 15 | 16 | ``` 17 | aoc [kernel_name] -g -v --report --board [board_name] -I../../ -DUSE_RESTRICT -DUNROLL=[UNROLL] 18 | ``` 19 | 20 | 21 | # Execution 22 | 23 | Default run: 24 | 25 | ``` 26 | ./run v[version_number] 27 | ``` 28 | 29 | Custom run: 30 | 31 | ``` 32 | ./lavaMD -boxes1d v[version_number] 33 | ``` 34 | 35 | 36 | # Kernel variations 37 | 38 | See the github Wiki page for more general information. 39 | 40 | ## v1 41 | 42 | Straightforward single work-item kernel created based on the OpenMP 43 | version. For the first two kernel arguments, only the necessary variable 44 | is passed instead of the whole struct. 45 | 46 | ## v3 47 | 48 | Uses shift register-based optimization for floating-point reduction, 49 | "__attribute__((max_global_work_dim(0)))," and unrolling. 50 | -------------------------------------------------------------------------------- /cuda/myocyte/Makefile: -------------------------------------------------------------------------------- 1 | # Example 2 | # target: dependencies 3 | # command 1 4 | # command 2 5 | # . 6 | # . 7 | # . 8 | # command n 9 | 10 | # link objects(binaries) together 11 | myocyte.out: main.o 12 | nvcc main.o \ 13 | -I/usr/local/cuda/include \ 14 | -L/usr/local/cuda/lib \ 15 | -lm -lcuda -lcudart \ 16 | -o myocyte.out 17 | 18 | # compile main function file into object (binary) 19 | main.o: define.c \ 20 | main.cu \ 21 | work.cu \ 22 | solver.cu \ 23 | embedded_fehlberg_7_8.cu \ 24 | master.cu \ 25 | kernel.cu \ 26 | kernel_ecc.cu \ 27 | kernel_cam.cu \ 28 | kernel_fin.cu \ 29 | work_2.cu \ 30 | solver_2.cu \ 31 | embedded_fehlberg_7_8_2.cu \ 32 | kernel_2.cu \ 33 | kernel_ecc_2.cu \ 34 | kernel_cam_2.cu \ 35 | kernel_fin_2.cu \ 36 | file.c \ 37 | timer.c 38 | nvcc main.cu \ 39 | -c -O3 -g 40 | 41 | # delete all object files 42 | clean: 43 | rm *.o *.out output.txt 44 | -------------------------------------------------------------------------------- /opencl/nn/ipoint.h: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * --- OpenSURF --- * 3 | * This library is distributed under the GNU GPL. Please * 4 | * contact chris.evans@irisys.co.uk for more information. * 5 | * * 6 | * C. Evans, Research Into Robust Visual Features, * 7 | * MSc University of Bristol, 2008. * 8 | * * 9 | ************************************************************/ 10 | 11 | #ifndef IPOINT_H 12 | #define IPOINT_H 13 | 14 | #include 15 | #include 16 | 17 | 18 | 19 | //------------------------------------------------------- 20 | typedef struct{ 21 | int x; 22 | int y; 23 | float descriptor[64]; 24 | } Ipoint; 25 | 26 | //------------------------------------------------------- 27 | 28 | typedef std::vector IpVec; 29 | #endif 30 | -------------------------------------------------------------------------------- /opencl/nw/nw_kernel_v3.cl: -------------------------------------------------------------------------------- 1 | inline int maximum(int a, int b, int c) 2 | { 3 | int k; 4 | if(a <= b) 5 | k = b; 6 | else 7 | k = a; 8 | 9 | if(k <= c) 10 | return(c); 11 | else 12 | return(k); 13 | } 14 | 15 | __attribute__((max_global_work_dim(0))) 16 | __kernel void nw_kernel1(__global int* restrict reference, 17 | __global int* restrict input_itemsets, 18 | int dim, 19 | int penalty) 20 | { 21 | for (int j = 1; j < dim - 1; ++j) 22 | { 23 | int backup = input_itemsets[j * dim]; 24 | 25 | #pragma ivdep array(input_itemsets) 26 | for (int i = 1; i < dim - 1; ++i) 27 | { 28 | int index = j * dim + i; 29 | input_itemsets[index] = backup = maximum(input_itemsets[index - 1 - dim] + reference[index], 30 | backup - penalty, 31 | input_itemsets[index - dim] - penalty); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /cuda/lud/cuda/Makefile: -------------------------------------------------------------------------------- 1 | include ../../../common/make.config 2 | 3 | DEFS += \ 4 | -DGPU_TIMER \ 5 | $(SPACE) 6 | 7 | CUDA_FLAGS += -I../common \ 8 | -use_fast_math \ 9 | -lm \ 10 | $(SPACE) 11 | 12 | CFLAGS += -I../common \ 13 | -I/usr/include/cuda \ 14 | -O3 \ 15 | -Wall \ 16 | $(SPACE) 17 | 18 | # Add source files here 19 | EXECUTABLE := lud_cuda 20 | # Cuda source files (compiled with cudacc) 21 | CUFILES := lud_kernel.cu 22 | # C/C++ source files (compiled with gcc / c++) 23 | CCFILES := lud.c lud_cuda.c ../common/common.c 24 | 25 | OBJS = ../common/common.o lud.o lud_kernel.o 26 | 27 | .PHONY: all clean 28 | all : $(EXECUTABLE) 29 | 30 | .c.o : 31 | $(CUDA_CC) $(CUDA_FLAGS) $(KERNEL_DIM) $(DEFS) -o $@ -c $< $(NVML_INC) $(NVML_LIB) 32 | 33 | %.o: %.cu 34 | $(CUDA_CC) $(CUDA_FLAGS) $(KERNEL_DIM) $(DEFS) -o $@ -c $< $(NVML_INC) $(NVML_LIB) 35 | 36 | 37 | $(EXECUTABLE) : $(OBJS) 38 | $(CUDA_CC) $(CUDA_FLAGS) -o $@ $? $(NVML_INC) $(NVML_LIB) 39 | 40 | clean: 41 | rm -f $(EXECUTABLE) $(OBJS) *.linkinfo 42 | -------------------------------------------------------------------------------- /opencl/b+tree/README_fpga: -------------------------------------------------------------------------------- 1 | # Problem size 2 | 3 | The problem size is defined by the input file specified by the -file command-line parameter. 4 | 5 | # Work-group configuration 6 | 7 | The number of threads in a work group with NDRange kernels is defined in problem_size.h. Macros DEFAULT_ORDER and DEFAULT_ORDER_2 define the sizes of the two kernels, kernel_gpu_opencl_v?.cl and kernel_gpu_opencl_2_v?.cl, respectively, where v? is a version number. They can be changed for example as follows: 8 | 9 | ``` 10 | make ALTERA=1 BOARD=AOC_BOARD_NAME KERNEL_DIM="-DRD_WG_SIZE_0_0=16 -DRD_WG_SIZE_1_0=16" 11 | ``` 12 | 13 | # How to Run the Benchmark 14 | 15 | The benchmark accepts two command-line parameters and a version specifier. The parameters serve as a problem input file and search command file, respectively. 16 | ``` 17 | ./b+tree.out file ../../data/b+tree/mil.txt command ../../data/b+tree/command.txt [v0|v1] 18 | ``` 19 | 20 | # Results 21 | 22 | Problem size (16, 16) failed to be compiled for Cyclone V with an error message of "Cannot fit kernel(s) on device". -------------------------------------------------------------------------------- /opencl/myocyte/main.h: -------------------------------------------------------------------------------- 1 | // #ifdef __cplusplus 2 | // extern "C" { 3 | // #endif 4 | 5 | //========================================================================================================================================================================================================200 6 | // PROTOTYPES 7 | //========================================================================================================================================================================================================200 8 | 9 | int 10 | main( int argc, 11 | char *argv []); 12 | 13 | //========================================================================================================================================================================================================200 14 | // END 15 | //========================================================================================================================================================================================================200 16 | 17 | // #ifdef __cplusplus 18 | // } 19 | // #endif 20 | -------------------------------------------------------------------------------- /common/set_bsp.bat: -------------------------------------------------------------------------------- 1 | @echo OFF 2 | echo Setting VisualStudio environment variables 3 | pushd %VCINSTALLDIR%\bin\amd64 4 | call vcvars64.bat 5 | 6 | echo Setting up the environment to run OpenCL BSP for Pikes Peak... 7 | set AOCL_BOARD_PACKAGE_ROOT=C:\altera\BSP_263944 8 | set PATH=%ALTERAOCLSDKROOT%\bin;%PATH% 9 | set PATH=%ALTERAOCLSDKROOT%\host\windows64\bin;%PATH% 10 | set PATH=%AOCL_BOARD_PACKAGE_ROOT%\Software\SDK\x64\Release;%PATH% 11 | set PATH=%QSYS_ROOTDIR%;%PATH% 12 | set PATH=C:\Program Files (x86)\GnuWin32\bin;%PATH% 13 | 14 | set CL_CONTEXT_COMPILER_MODE_ALTERA=3 15 | 16 | echo ALTERAOCLSDKROOT is set to %ALTERAOCLSDKROOT% 17 | echo AOCL_BOARD_PACKAGE_ROOT is set to %AOCL_BOARD_PACKAGE_ROOT% 18 | echo Add to PATH: %ALTERAOCLSDKROOT%\bin 19 | echo Add to PATH: %ALTERAOCLSDKROOT%\host\windows64\bin 20 | echo Add to PATH: %AOCL_BOARD_PACKAGE_ROOT%\Software\SDK\x64\Release 21 | echo Add to PATH: %QSYS_ROOTDIR% 22 | echo Add to PATH: C:\Program Files (x86)\GnuWin32\bin 23 | 24 | rem cd %AOCL_BOARD_PACKAGE_ROOT% 25 | popd 26 | :end 27 | -------------------------------------------------------------------------------- /cuda/leukocyte/result.txt: -------------------------------------------------------------------------------- 1 | 2 | 0,95.594450,126.896371 3 | 1,252.456485,127.203031 4 | 2,355.352429,128.625420 5 | 3,317.906252,133.450728 6 | 4,407.308127,133.548619 7 | 5,226.505024,139.852719 8 | 6,295.397088,143.370574 9 | 7,103.349816,147.522578 10 | 8,460.908342,149.476973 11 | 9,488.414478,152.894946 12 | 10,325.356500,163.952514 13 | 11,618.551254,163.928544 14 | 12,486.980187,167.956636 15 | 13,561.095198,171.448221 16 | 14,216.365544,174.360219 17 | 15,139.935104,177.069173 18 | 16,282.869907,177.009875 19 | 17,500.013305,183.357364 20 | 18,391.490716,181.618104 21 | 19,104.629716,185.765465 22 | 20,193.010887,185.118731 23 | 21,340.933959,189.059186 24 | 22,619.736409,194.483290 25 | 23,168.733885,196.825632 26 | 24,559.668652,201.679886 27 | 25,376.684191,208.365558 28 | 26,581.215416,208.396907 29 | 27,335.817556,213.548434 30 | 28,220.860465,217.981375 31 | 29,551.852736,221.077013 32 | 30,82.243168,224.156038 33 | 31,483.149965,223.409275 34 | 32,116.003606,226.044231 35 | 33,237.098415,252.939038 36 | 34,193.478092,264.986393 37 | 35,245.736067,264.030603 -------------------------------------------------------------------------------- /opencl/heartwall/util/timer/timer.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | //===============================================================================================================================================================================================================200 6 | // TIMER HEADER 7 | //===============================================================================================================================================================================================================200 8 | 9 | long long 10 | get_time(); 11 | 12 | //===============================================================================================================================================================================================================200 13 | // END 14 | //===============================================================================================================================================================================================================200 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | -------------------------------------------------------------------------------- /cuda/huffman/cuda_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUDA_HELPERS__ 2 | #define __CUDA_HELPERS__ 3 | /************************************************************************/ 4 | /* Init CUDA */ 5 | /************************************************************************/ 6 | #if __DEVICE_EMULATION__ 7 | 8 | bool InitCUDA(void){return true;} 9 | 10 | #else 11 | bool InitCUDA(void) 12 | { 13 | int count = 0; 14 | int i = 0; 15 | 16 | cudaGetDeviceCount(&count); 17 | if(count == 0) { 18 | fprintf(stderr, "There is no device.\n"); 19 | return false; 20 | } 21 | 22 | for(i = 0; i < count; i++) { 23 | cudaDeviceProp prop; 24 | if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) { 25 | if(prop.major >= 1) { 26 | break; 27 | } 28 | } 29 | } 30 | if(i == count) { 31 | fprintf(stderr, "There is no device supporting CUDA.\n"); 32 | return false; 33 | } 34 | cudaSetDevice(i); 35 | 36 | printf("CUDA initialized.\n"); 37 | return true; 38 | } 39 | #endif 40 | #endif -------------------------------------------------------------------------------- /opencl/myocyte/util/num/num.h: -------------------------------------------------------------------------------- 1 | // #ifdef __cplusplus 2 | // extern "C" { 3 | // #endif 4 | 5 | //===============================================================================================================================================================================================================200 6 | // PROTOTYPES 7 | //===============================================================================================================================================================================================================200 8 | 9 | int 10 | isInteger(char *str); 11 | 12 | //===============================================================================================================================================================================================================200 13 | // END 14 | //===============================================================================================================================================================================================================200 15 | 16 | // #ifdef __cplusplus 17 | // } 18 | // #endif 19 | -------------------------------------------------------------------------------- /opencl/pathfinder/pathfinder_kernel_v3.cl: -------------------------------------------------------------------------------- 1 | #define MIN(a, b) ((a)<=(b) ? (a) : (b)) 2 | 3 | #ifndef SSIZE 4 | #define SSIZE 4 5 | #endif 6 | 7 | __attribute__((max_global_work_dim(0))) 8 | __kernel void dynproc_kernel (__global int* restrict wall, 9 | __global int* restrict src, 10 | __global int* restrict dst, 11 | int cols, 12 | int t) 13 | { 14 | #pragma unroll SSIZE 15 | for(int n = 0; n < cols; n++) 16 | { 17 | int min = src[n]; 18 | // the following two accesses could be out-of-bound 19 | // however, adding a condition to prevent them from going out-of-bound prevents the compiler from coalescing the accesses 20 | // this does not cause any trouble at run-time 21 | int right = src[n + 1]; 22 | int left = src[n - 1]; 23 | 24 | if (n > 0) 25 | { 26 | min = MIN(min, left); 27 | } 28 | if (n < cols - 1) 29 | { 30 | min = MIN(min, right); 31 | } 32 | dst[n] = wall[(t + 1) * cols + n] + min; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /cuda/b+tree/main.h: -------------------------------------------------------------------------------- 1 | // # ifdef __cplusplus 2 | // extern "C" { 3 | // # endif 4 | 5 | //===============================================================================================================================================================================================================200 6 | // HEADER 7 | //===============================================================================================================================================================================================================200 8 | 9 | int 10 | main( int argc, 11 | char *argv []); 12 | 13 | //===============================================================================================================================================================================================================200 14 | // END 15 | //===============================================================================================================================================================================================================200 16 | 17 | // # ifdef __cplusplus 18 | // } 19 | // # endif 20 | -------------------------------------------------------------------------------- /opencl/b+tree/main.h: -------------------------------------------------------------------------------- 1 | // # ifdef __cplusplus 2 | // extern "C" { 3 | // # endif 4 | 5 | //===============================================================================================================================================================================================================200 6 | // HEADER 7 | //===============================================================================================================================================================================================================200 8 | 9 | int 10 | main( int argc, 11 | char *argv []); 12 | 13 | //===============================================================================================================================================================================================================200 14 | // END 15 | //===============================================================================================================================================================================================================200 16 | 17 | // # ifdef __cplusplus 18 | // } 19 | // # endif 20 | -------------------------------------------------------------------------------- /opencl/nn/nn_kernel_v2.cl: -------------------------------------------------------------------------------- 1 | //#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 2 | 3 | typedef struct latLong 4 | { 5 | float lat; 6 | float lng; 7 | } LatLong; 8 | 9 | #ifndef SIMD_LANES 10 | #define SIMD_LANES 16 11 | #endif 12 | 13 | #ifndef COMPUTE_UNITS 14 | #define COMPUTE_UNITS 3 15 | #endif 16 | 17 | __attribute__((reqd_work_group_size(64,1,1))) 18 | __attribute__((num_simd_work_items(SIMD_LANES))) 19 | __attribute__((num_compute_units(COMPUTE_UNITS))) 20 | __kernel void NearestNeighbor(__global LatLong* restrict d_locations, 21 | __global float* restrict d_distances, 22 | const int numRecords, 23 | const float lat, 24 | const float lng) 25 | { 26 | int globalId = get_global_id(0); 27 | 28 | if (globalId < numRecords) 29 | { 30 | __global LatLong *latLong = d_locations + globalId; 31 | __global float *dist = d_distances + globalId; 32 | *dist = (float)sqrt( (lat - latLong->lat) * (lat - latLong->lat) + (lng - latLong->lng) * (lng - latLong->lng) ); 33 | } 34 | } -------------------------------------------------------------------------------- /opencl/nn/nn_kernel_v4.cl: -------------------------------------------------------------------------------- 1 | //#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 2 | 3 | typedef struct latLong 4 | { 5 | float lat; 6 | float lng; 7 | } LatLong; 8 | 9 | #ifndef SIMD_LANES 10 | #define SIMD_LANES 16 11 | #endif 12 | 13 | #ifndef COMPUTE_UNITS 14 | #define COMPUTE_UNITS 4 15 | #endif 16 | 17 | __attribute__((reqd_work_group_size(64,1,1))) 18 | __attribute__((num_simd_work_items(SIMD_LANES))) 19 | __attribute__((num_compute_units(COMPUTE_UNITS))) 20 | __kernel void NearestNeighbor(__global LatLong* restrict d_locations, 21 | __global float* restrict d_distances, 22 | const int numRecords, 23 | const float lat, 24 | const float lng) 25 | { 26 | int globalId = get_global_id(0); 27 | 28 | if (globalId < numRecords) 29 | { 30 | __global LatLong *latLong = d_locations + globalId; 31 | __global float *dist = d_distances + globalId; 32 | *dist = (float)sqrt( (lat - latLong->lat) * (lat - latLong->lat) + (lng - latLong->lng) * (lng - latLong->lng) ); 33 | } 34 | } -------------------------------------------------------------------------------- /openmp/b+tree/main.h: -------------------------------------------------------------------------------- 1 | // # ifdef __cplusplus 2 | // extern "C" { 3 | // # endif 4 | 5 | //===============================================================================================================================================================================================================200 6 | // HEADER 7 | //===============================================================================================================================================================================================================200 8 | 9 | int 10 | main( int argc, 11 | char *argv []); 12 | 13 | //===============================================================================================================================================================================================================200 14 | // END 15 | //===============================================================================================================================================================================================================200 16 | 17 | // # ifdef __cplusplus 18 | // } 19 | // # endif 20 | -------------------------------------------------------------------------------- /openmp/backprop/facetrain.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "backprop.h" 5 | #include "omp.h" 6 | 7 | extern char *strcpy(); 8 | extern void exit(); 9 | 10 | int layer_size = 0; 11 | 12 | void backprop_face() 13 | { 14 | BPNN *net; 15 | int i; 16 | float out_err, hid_err; 17 | net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) 18 | printf("Input layer size : %d\n", layer_size); 19 | load(net); 20 | //entering the training kernel, only one iteration 21 | printf("Starting training kernel\n"); 22 | bpnn_train_kernel(net, &out_err, &hid_err); 23 | bpnn_free(net); 24 | printf("Training done\n"); 25 | } 26 | 27 | int setup(argc, argv) 28 | int argc; 29 | char *argv[]; 30 | { 31 | if(argc!=2){ 32 | fprintf(stderr, "usage: backprop \n"); 33 | exit(0); 34 | } 35 | 36 | layer_size = atoi(argv[1]); 37 | 38 | int seed; 39 | 40 | seed = 7; 41 | bpnn_initialize(seed); 42 | backprop_face(); 43 | 44 | exit(0); 45 | } 46 | -------------------------------------------------------------------------------- /cuda/leukocyte/meschach_lib/MACHINES/OS2/README: -------------------------------------------------------------------------------- 1 | Message received from Wenzel Matiaske: 2 | 3 | From mati1831@perform.ww.TU-Berlin.DE Wed Oct 4 11:34:38 1995 4 | Date: Wed, 04 Oct 95 17:32:37 GMT 5 | From: Wenzel Matiaske 6 | To: david.stewart@anu.edu.au 7 | Subject: meschach documentation 8 | 9 | Dear David, 10 | 11 | [snip] 12 | 13 | By the way: I have installed the library with EMX 0.9a + gcc under 14 | OS/2. I have included the makefile, may it is useful for others. 15 | 16 | [Note: I have also put the "standard" gcc machine.h here as well. 17 | -- David Stewart, 4th Oct, 1995] 18 | 19 | Best regards 20 | 21 | //wenzel 22 | 23 | -- 24 | ___ 25 | wenzel matiaske | / /_/-Berlin 26 | | 27 | | mail: Technical University Berlin 28 | | Dept. of Economics & Management, WW6 29 | | Uhlandstr. 4-5, D-10623 Berlin 30 | | phone: +49 30 314-22574 31 | | email: W.Matiaske@ww.TU-Berlin.de 32 | 33 | -------------------------------------------------------------------------------- /opencl/backprop/README_fpga.md: -------------------------------------------------------------------------------- 1 | # Compilation 2 | 3 | Default: 4 | 5 | ``` 6 | make Altera=1 BOARD=[board_name] 7 | ``` 8 | 9 | Host only: 10 | ``` 11 | make ALTERA=1 HOST_ONLY=1 12 | ``` 13 | 14 | Custom kernel: 15 | 16 | ``` 17 | aoc [kernel_name] -g -v --report --board [board_name] -I../../ -DUSE_RESTRICT -D[parameter_name]=[parameter_value] 18 | ``` 19 | 20 | 21 | # Execution 22 | 23 | Default run: 24 | 25 | ``` 26 | ./run v[version_number] 27 | ``` 28 | 29 | Custom run: 30 | 31 | ``` 32 | ./backprop v[version_number] 33 | ``` 34 | 35 | 36 | # Kernel variations 37 | 38 | See the github Wiki page for more general information. 39 | 40 | ## v1 41 | 42 | Straightforward single work-item kernel created based on the OpenMP 43 | version plus restrict. 44 | 45 | ## v3 46 | 47 | Uses shift register-based optimization for floating-point reduction 48 | in the first three kernels and also adds #pragma ivdep to both loops 49 | in the last kernel to avoid false load/store dependency on w and 50 | oldw global buffers. "__attribute__((max_global_work_dim(0)))" is 51 | also used. 52 | -------------------------------------------------------------------------------- /opencl/backprop/facetrain.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "backprop.h" 5 | #include "omp.h" 6 | 7 | extern char *strcpy(); 8 | extern void exit(); 9 | 10 | int layer_size = 0; 11 | 12 | void backprop_face() 13 | { 14 | BPNN *net; 15 | float out_err, hid_err; 16 | net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) 17 | 18 | printf("Input layer size : %d\n", layer_size); 19 | load(net); 20 | //entering the training kernel, only one iteration 21 | printf("Starting training kernel\n"); 22 | bpnn_train_kernel(net, &out_err, &hid_err); 23 | bpnn_free(net); 24 | printf("Finish the training for one iteration\n"); 25 | } 26 | 27 | int setup(int argc, char **argv) 28 | { 29 | if (argc!=2){ 30 | fprintf(stderr, "usage: backprop \n"); 31 | return -1; 32 | } 33 | layer_size = atoi(argv[1]); 34 | if (layer_size%16!=0){ 35 | fprintf(stderr, "The number of input points must be divided by 16\n"); 36 | return -1; 37 | } 38 | 39 | return 0; 40 | } 41 | --------------------------------------------------------------------------------