├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── makefile.yml ├── .gitignore ├── .travis.yml ├── ADA ├── Makefile ├── args.adb ├── hello.adb ├── nstream.adb ├── nstream_array.adb └── transpose.adb ├── AMPI ├── AMR │ ├── Makefile │ ├── README.md │ ├── amr.c │ └── implementation_details.md ├── Branch │ ├── Makefile │ ├── README │ ├── branch.c │ └── func_gen ├── DGEMM │ ├── Makefile │ └── dgemm.c ├── Nstream │ ├── Makefile │ └── nstream.c ├── PIC │ ├── Makefile │ └── pic.c ├── Random │ ├── Makefile │ └── random.c ├── Reduce │ ├── Makefile │ └── reduce.c ├── Sparse │ ├── Makefile │ ├── README │ └── sparse.c ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_global │ ├── Makefile │ └── global.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── BIBLIOGRAPHY.md ├── C1z ├── Makefile ├── generate-c-stencil.py ├── nstream-alloc-target.c ├── nstream-memcpy-target.c ├── nstream-memkind.c ├── nstream-mmap.c ├── nstream-mpi.c ├── nstream-openacc.c ├── nstream-openmp.c ├── nstream-petsc.c ├── nstream-target.c ├── nstream-taskloop.c ├── nstream-ua-target.c ├── nstream-usm-target.c ├── p2p-2d.c ├── p2p-avx-tasks-openmp.c ├── p2p-avx.c ├── p2p-hyperplane-2d-openmp.c ├── p2p-hyperplane-openmp.c ├── p2p-kernel.h ├── p2p-simd-openmp.c ├── p2p-sse.c ├── p2p-tasks-2d-openmp.c ├── p2p-tasks-openmp.c ├── p2p.c ├── prk_openmp.h ├── prk_petsc.h ├── prk_util.h ├── stencil-2d-openmp.c ├── stencil-cilk.c ├── stencil-openacc.c ├── stencil-openmp.c ├── stencil-target.c ├── stencil-taskloop.c ├── stencil_cilk.h ├── stencil_openacc.h ├── stencil_openmp.h ├── stencil_seq.h ├── stencil_target.h ├── stencil_taskloop.h ├── transpose-2d-openacc.c ├── transpose-2d-openmp.c ├── transpose-a2a-mpi.c ├── transpose-cilk.c ├── transpose-ispc.c ├── transpose-openacc.c ├── transpose-openmp.c ├── transpose-p2p-mpi.c ├── transpose-petsc.c ├── transpose-target.c ├── transpose-taskloop.c ├── transpose-thread.c └── transpose.ispc ├── CHAPEL ├── COPYRIGHT ├── LICENSE ├── Makefile ├── README.md ├── dgemm-summa.chpl ├── dgemm.chpl ├── nstream.chpl ├── p2p-serial-fast.chpl ├── pic.chpl ├── random_draw.c ├── random_draw.h ├── sparse.chpl ├── stencil-opt.chpl ├── stencil-serial.chpl ├── stencil.chpl ├── transpose-serial.chpl └── transpose.chpl ├── CHARM++ ├── Stencil │ ├── Makefile │ ├── ci_reqs.h │ ├── stencil.C │ └── stencil.ci ├── Synch_p2p │ ├── Makefile │ ├── p2p.C │ └── p2p.ci └── Transpose │ ├── Makefile │ ├── transpose.C │ └── transpose.ci ├── CODEOWNERS ├── CONTRIBUTING.md ├── COPYING ├── Csharp ├── Makefile ├── hello.cs ├── nstream.cs └── transpose.cs ├── Cxx11 ├── Makefile ├── PIC.md ├── add32.cl ├── add64.cl ├── dgemm-cblas.cc ├── dgemm-cublas-cudastf.cu ├── dgemm-cublas.cu ├── dgemm-cudastf.cu ├── dgemm-hipblas.cc ├── dgemm-managed-cublas.cu ├── dgemm-mpi-cblas.cc ├── dgemm-mpi-cublas.cu ├── dgemm-multigpu-cublas.cu ├── dgemm-multigpu-onemkl.cc ├── dgemm-onemkl.cc ├── dgemm-sycl.cc ├── dgemm-vector.cc ├── dgemm.cc ├── generate-cxx-stencil.py ├── generate-opencl-stencil.py ├── generate-sycl-stencil.py ├── nstream-boost-compute.cc ├── nstream-celerity.cc ├── nstream-cublas.cu ├── nstream-cuda.cu ├── nstream-device-thrust.cu ├── nstream-dpcpp.cc ├── nstream-executors.cc ├── nstream-halide.cc ├── nstream-hip.cc ├── nstream-hipblas.cc ├── nstream-hipstl.cc ├── nstream-host-thrust.cc ├── nstream-hpx.cc ├── nstream-kokkos.cc ├── nstream-managed-cuda.cu ├── nstream-managed-hip.cc ├── nstream-mpi.cc ├── nstream-multigpu-dpcpp.cc ├── nstream-nccl.cu ├── nstream-occa.cc ├── nstream-onedpl.cc ├── nstream-onemkl.cc ├── nstream-openacc.cc ├── nstream-opencl.cc ├── nstream-openmp-target.cc ├── nstream-openmp.cc ├── nstream-pstl.cc ├── nstream-raja.cc ├── nstream-ranges.cc ├── nstream-stdpar.cc ├── nstream-stl.cc ├── nstream-sycl-explicit-usm.cc ├── nstream-sycl-explicit.cc ├── nstream-sycl-usm.cc ├── nstream-sycl.cc ├── nstream-taskloop.cc ├── nstream-tbb.cc ├── nstream-upcxx.cc ├── nstream-valarray-boost-compute.cc ├── nstream-valarray.cc ├── nstream-vector-raja.cc ├── nstream-vector.cc ├── nstream.cc ├── nstream.okl ├── nstream32.cl ├── nstream64.cl ├── opencl.hpp ├── p2p-cuda.cu ├── p2p-cudastf.cu ├── p2p-doacross-openmp.cc ├── p2p-hyperplane-openacc.cc ├── p2p-hyperplane-openmp.cc ├── p2p-hyperplane-pstl.cc ├── p2p-hyperplane-stl.cc ├── p2p-hyperplane-sycl-graph.cc ├── p2p-hyperplane-sycl.cc ├── p2p-hyperplane-tbb.cc ├── p2p-innerloop-opencl.cc ├── p2p-innerloop-tbb.cc ├── p2p-kernel.h ├── p2p-raja.cc ├── p2p-tasks-openmp.cc ├── p2p-tasks-tbb.cc ├── p2p-tbb.cc ├── p2p-vector-raja.cc ├── p2p-vector.cc ├── p2p.cc ├── p2p.cl ├── pic-sycl.cc ├── pic.cc ├── prk_cuda.h ├── prk_dgemm_codeplay.h ├── prk_executors.h ├── prk_hip.h ├── prk_hpx.h ├── prk_kokkos.h ├── prk_mpi.h ├── prk_nccl.h ├── prk_nvshmem.h ├── prk_opencl.h ├── prk_openmp.h ├── prk_oshmem.h ├── prk_pstl.h ├── prk_raja.h ├── prk_ranges.h ├── prk_simd.h ├── prk_sycl.h ├── prk_tbb.h ├── prk_thrust.h ├── prk_upcxx.h ├── prk_util.h ├── random_draw.c ├── random_draw.h ├── sgemm-cblas.cc ├── sgemm-cublas.cu ├── sgemm-hipblas.cc ├── sparse-vector.cc ├── sparse.cc ├── stencil-2d-sycl.cc ├── stencil-cuda.cu ├── stencil-cudastf.cu ├── stencil-halide.cc ├── stencil-hip.cc ├── stencil-kokkos.cc ├── stencil-mpi.cc ├── stencil-openacc.cc ├── stencil-opencl.cc ├── stencil-openmp-target.cc ├── stencil-openmp.cc ├── stencil-pstl.cc ├── stencil-raja.cc ├── stencil-ranges.cc ├── stencil-stl.cc ├── stencil-sycl-usm.cc ├── stencil-sycl.cc ├── stencil-taskloop.cc ├── stencil-tbb.cc ├── stencil-vector-raja.cc ├── stencil-vector.cc ├── stencil.cc ├── stencil_cuda.hpp ├── stencil_hip.hpp ├── stencil_kokkos.hpp ├── stencil_openacc.hpp ├── stencil_openmp.hpp ├── stencil_pgnu.hpp ├── stencil_pstl.hpp ├── stencil_raja.hpp ├── stencil_rajaview.hpp ├── stencil_ranges.hpp ├── stencil_seq.hpp ├── stencil_stl.hpp ├── stencil_sycl.hpp ├── stencil_target.hpp ├── stencil_taskloop.hpp ├── stencil_tbb.hpp ├── stencil_vector.hpp ├── transpose-2d-sycl.cc ├── transpose-a2a-mpi-nccl.cu ├── transpose-a2a-mpi.cc ├── transpose-a2a-nvshmem.cu ├── transpose-async.cc ├── transpose-cblas.cc ├── transpose-cublas.cu ├── transpose-cuda.cu ├── transpose-cudastf.cu ├── transpose-device-thrust.cu ├── transpose-dpcpp.cc ├── transpose-executors.cc ├── transpose-get-mpi.cc ├── transpose-get-nvshmem.cu ├── transpose-get-oshmem.cc ├── transpose-hip.cc ├── transpose-hipblas.cc ├── transpose-host-thrust.cc ├── transpose-kernel.h ├── transpose-kokkos.cc ├── transpose-occa.cc ├── transpose-openacc.cc ├── transpose-opencl.cc ├── transpose-openmp-target.cc ├── transpose-openmp.cc ├── transpose-p2p-mpi-nccl.cu ├── transpose-p2p-mpi.cc ├── transpose-pstl.cc ├── transpose-ptr-nvshmem.cu ├── transpose-raja.cc ├── transpose-ranges.cc ├── transpose-stdpar.cc ├── transpose-stl.cc ├── transpose-sycl-usm.cc ├── transpose-sycl.cc ├── transpose-taskloop.cc ├── transpose-tbb.cc ├── transpose-thread.cc ├── transpose-valarray.cc ├── transpose-vector-raja.cc ├── transpose-vector.cc ├── transpose.cc ├── transpose.okl ├── transpose32.cl ├── transpose64.cl ├── xgemm-cblas.cc ├── xgemm-cublas.cu ├── xgemm-hipblas.cc └── xgemm-onemkl.cc ├── FENIX ├── AMR │ ├── Makefile │ ├── README.md │ ├── amr.c │ ├── implementation_details.md │ └── timestep.c ├── Sparse │ ├── Makefile │ ├── README │ └── timestep.c ├── Stencil │ ├── Makefile │ ├── stencil.c │ └── timestep.c ├── Synch_p2p │ ├── Makefile │ ├── p2p.c │ └── timestep.c └── Transpose │ ├── Makefile │ ├── timestep.c │ └── transpose.c ├── FG_MPI ├── Branch │ ├── Makefile │ ├── README │ ├── branch.c │ └── func_gen ├── DGEMM │ ├── Makefile │ └── dgemm.c ├── Nstream │ ├── Makefile │ └── nstream.c ├── PIC-static │ ├── Makefile │ └── pic.c ├── Random │ ├── Makefile │ └── random.c ├── Reduce │ ├── Makefile │ └── reduce.c ├── Sparse │ ├── Makefile │ ├── README │ └── sparse.c ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_global │ ├── Makefile │ └── global.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── FORTRAN ├── Makefile ├── README.md ├── dgemm-blas.F90 ├── dgemm-ga.F90 ├── dgemm-openmp-target.F90 ├── dgemm-openmp.F90 ├── dgemm-pretty.F90 ├── dgemm-stdpar.F90 ├── dgemm-taskloop-openmp.F90 ├── dgemm.F90 ├── generate-fortran-stencil.py ├── nstream-coarray.F90 ├── nstream-cufortran.F90 ├── nstream-ga.F90 ├── nstream-mpi.F90 ├── nstream-openacc.F90 ├── nstream-openmp-target.F90 ├── nstream-openmp.F90 ├── nstream-pretty.F90 ├── nstream-stdpar.F90 ├── nstream-taskloop-openmp.F90 ├── nstream.F90 ├── p2p-async-openacc.F90 ├── p2p-coarray.F90 ├── p2p-doacross-openmp.F90 ├── p2p-innerloop-openacc.F90 ├── p2p-innerloop-openmp.F90 ├── p2p-innerloop.F90 ├── p2p-openacc.F90 ├── p2p-openmp-target.F90 ├── p2p-tasks-openmp.F90 ├── p2p.F90 ├── pic-openmp.F90 ├── pic.F90 ├── pic_soa-openmp.F90 ├── pic_soa.F90 ├── prk_mod.F90 ├── prk_mpi.F90 ├── stencil-coarray.F90 ├── stencil-openacc.F90 ├── stencil-openmp-target.F90 ├── stencil-openmp.F90 ├── stencil-pretty.F90 ├── stencil-stdpar.F90 ├── stencil-taskloop-openmp.F90 ├── stencil.F90 ├── stencil_openmp.F90 ├── stencil_pretty.F90 ├── stencil_serial.F90 ├── stencil_target.F90 ├── stencil_taskloop.F90 ├── transpose-a2a-mpi.F90 ├── transpose-acc-mpi.F90 ├── transpose-coarray.F90 ├── transpose-cufortran.F90 ├── transpose-ga.F90 ├── transpose-get-mpi.F90 ├── transpose-openacc.F90 ├── transpose-openmp-target-loop.F90 ├── transpose-openmp-target.F90 ├── transpose-openmp.F90 ├── transpose-p2p-mpi.F90 ├── transpose-pointer.F90 ├── transpose-pretty.F90 ├── transpose-stdpar.F90 ├── transpose-taskloop-openmp.F90 ├── transpose-tasks-openmp.F90 └── transpose.F90 ├── GO ├── Makefile ├── README.md ├── dgemm.go ├── nstream.go └── transpose.go ├── GRAPPA ├── Nstream │ ├── Makefile │ └── nstream.cpp ├── Random │ ├── Makefile │ └── random.cpp ├── Stencil │ ├── Makefile │ └── stencil.cpp ├── Synch_global │ ├── Makefile │ └── global.cpp ├── Synch_p2p │ ├── Makefile │ └── p2p.cpp └── Transpose │ ├── Makefile │ └── transpose.cpp ├── GettingStarted.md ├── JAVA ├── Makefile ├── README.md ├── nstream.java ├── p2p.java ├── stencil.java └── transpose.java ├── JULIA ├── Project.toml ├── README.md ├── dgemm-blis.jl ├── dgemm-cuda.jl ├── dgemm-mkl.jl ├── dgemm-octavian.jl ├── dgemm-pretty.jl ├── dgemm-tullio-cuda.jl ├── dgemm-tullio.jl ├── dgemm.jl ├── nstream-cuda-kernel.jl ├── nstream-cuda-pretty.jl ├── nstream-mpi.jl ├── nstream-pretty-mpi.jl ├── nstream-pretty.jl ├── nstream.jl ├── p2p.jl ├── stencil.jl ├── transpose-pretty.jl └── transpose.jl ├── LEGION ├── Stencil │ ├── Makefile │ └── stencil.cc └── Transpose │ ├── Makefile │ └── transpose.cc ├── LUA ├── README.md └── nstream.lua ├── MPI1 ├── AMR │ ├── Makefile │ ├── README.md │ ├── amr.c │ ├── implementation_details.md │ └── timestep.c ├── Branch │ ├── Makefile │ ├── README │ ├── branch.c │ └── func_gen ├── DGEMM │ ├── Makefile │ └── dgemm.c ├── Nstream │ ├── Makefile │ └── nstream.c ├── PIC-static │ ├── Makefile │ └── pic.c ├── Random │ ├── Makefile │ └── random.c ├── Reduce │ ├── Makefile │ └── reduce.c ├── Sparse │ ├── Makefile │ ├── README │ └── sparse.c ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_global │ ├── Makefile │ └── global.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ ├── transpose-a2a.c │ └── transpose.c ├── MPIOPENMP ├── Nstream │ ├── Makefile │ └── nstream.c ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_p2p │ ├── Makefile │ ├── README │ ├── p2p.c │ └── runp2p └── Transpose │ ├── Makefile │ └── transpose.c ├── MPIRMA ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── MPISHM ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── Makefile ├── OCTAVE ├── nstream-pretty.m ├── nstream.m ├── p2p.m ├── stencil-pretty.m ├── stencil.m ├── test_args.m ├── transpose-pretty.m └── transpose.m ├── OPENMP ├── Branch │ ├── Makefile │ ├── README │ ├── branch.c │ └── func_gen ├── DGEMM │ ├── Makefile │ └── dgemm.c ├── Nstream │ ├── Makefile │ └── nstream.c ├── PIC │ ├── Makefile │ └── pic.c ├── Random │ ├── Makefile │ └── random.c ├── Reduce │ ├── Makefile │ └── reduce.c ├── Refcount │ ├── Makefile │ ├── refcount.c │ └── sweepRefCount.py ├── Sparse │ ├── Makefile │ ├── README │ └── sparse.c ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_global │ ├── Makefile │ └── global.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── PYTHON ├── README.md ├── dgemm-numpy.py ├── dgemm.py ├── dgemm_list.py ├── dgemm_omp.py ├── nstream-cupy.py ├── nstream-mpi.py ├── nstream-numba.py ├── nstream-numpy-mpi.py ├── nstream-numpy-shmem.py ├── nstream-numpy.py ├── nstream.py ├── p2p-numba-mpi.py ├── p2p-numba-shmem.py ├── p2p-numba.py ├── p2p-numpy-mpi.py ├── p2p-numpy-shmem.py ├── p2p-numpy.py ├── p2p.py ├── sparse-numpy.py ├── sparse-scipy.py ├── sparse.py ├── stencil-cupy.py ├── stencil-numba-mpi.py ├── stencil-numba-shmem.py ├── stencil-numba.py ├── stencil-numpy-mpi.py ├── stencil-numpy-shmem.py ├── stencil-numpy.py ├── stencil.py ├── transpose-cupy.py ├── transpose-numba.py ├── transpose-numpy-mpi-p2p.py ├── transpose-numpy-mpi-rma.py ├── transpose-numpy-mpi.py ├── transpose-numpy-put.py ├── transpose-numpy-shmem-a2a.py ├── transpose-numpy-shmem-get.py ├── transpose-numpy.py └── transpose.py ├── README.md ├── README.special ├── RUBY └── nstream.rb ├── RUST ├── Makefile ├── common │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── dgemm-blis │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── dgemm-iter │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── dgemm-rayon │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── dgemm │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── legacy │ ├── Makefile │ ├── p2p.rs │ ├── stencil-old.rs │ ├── stencil.rs │ └── transpose.rs ├── nstream-iter │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── nstream-rayon │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── nstream-unsafe │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── nstream │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── p2p │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── pic │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── stencil │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── transpose-iter │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── transpose-rayon │ ├── Cargo.toml │ └── src │ │ └── main.rs └── transpose │ ├── Cargo.toml │ └── src │ └── main.rs ├── SCALA ├── Makefile ├── README.md ├── nstream.scala ├── p2p.scala ├── stencil.scala └── transpose.scala ├── SERIAL ├── AMR │ ├── Makefile │ └── amr.c ├── Branch │ ├── Makefile │ ├── README │ ├── branch.c │ └── func_gen ├── DGEMM │ ├── Makefile │ └── dgemm.c ├── Nstream │ ├── Makefile │ └── nstream.c ├── PIC │ ├── Makefile │ └── pic.c ├── Random │ ├── Makefile │ └── random.c ├── Reduce │ ├── Makefile │ └── reduce.c ├── Sparse │ ├── Makefile │ ├── README │ └── sparse.c ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── SHMEM ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ ├── transpose.c │ └── transpose_a2a.c ├── SUPPORT.md ├── UPC ├── README.md ├── Stencil │ ├── Makefile │ └── stencil.c ├── Synch_p2p │ ├── Makefile │ └── p2p.c └── Transpose │ ├── Makefile │ └── transpose.c ├── ci ├── build-run-prk.sh ├── install-armci-mpi.sh ├── install-autotools.sh ├── install-berkeley-upc.sh ├── install-boost.sh ├── install-chapel.sh ├── install-charm++.sh ├── install-clang.sh ├── install-cmake.sh ├── install-cudastf.sh ├── install-deps.sh ├── install-executors.sh ├── install-fgmpi.sh ├── install-ga.sh ├── install-gasnet.sh ├── install-gcc.sh ├── install-grappa.sh ├── install-hpx.sh ├── install-hpx3.sh ├── install-hpx5.sh ├── install-hydra.sh ├── install-intrepid-upc.sh ├── install-julia.sh ├── install-kokkos.sh ├── install-legion.sh ├── install-libfabric.sh ├── install-mpi.sh ├── install-musl.sh ├── install-occa.sh ├── install-octave.sh ├── install-opencoarrays.sh ├── install-ornl-openshmem.sh ├── install-oshmpi.sh ├── install-petsc.sh ├── install-pstl.sh ├── install-python.sh ├── install-raja.sh ├── install-ranges.sh ├── install-rust.sh ├── install-sandia-openshmem.sh ├── install-sycl.sh ├── install-tbb.sh └── install-upcxx.sh ├── common ├── AMPI.defs ├── CHARM++.defs ├── FENIX.defs ├── FENIX_bail_out.c ├── FG_MPI.defs ├── GRAPPA.defs ├── LEGION.defs ├── MPI.defs ├── MPIOPENMP.defs ├── MPI_bail_out.c ├── OPENMP.defs ├── OPENMP_bail_out.c ├── PRKVERSION ├── README.freebsd ├── RUST.defs ├── SERIAL.defs ├── SHMEM.defs ├── SHMEM_bail_out.c ├── Stencil │ ├── loop_gen │ └── loop_gen_amr ├── UPC.defs ├── make.common ├── make.defs.arm ├── make.defs.armgcc ├── make.defs.boost ├── make.defs.cray ├── make.defs.cuda ├── make.defs.freebsd ├── make.defs.gcc ├── make.defs.hip ├── make.defs.ibmbg ├── make.defs.ibmp9nv ├── make.defs.intel ├── make.defs.llvm ├── make.defs.musl ├── make.defs.nvhpc ├── make.defs.old ├── make.defs.oneapi ├── make.defs.pgi ├── make.defs.upcxx-hpx ├── random_draw.c ├── topology.c └── wtime.c ├── doc ├── AMR-PRK.pdf ├── Boost.md ├── HALIDE.md ├── HIP.md ├── HPX.md ├── Hammond-PPP2019.pdf ├── IXPUG_Invited2_Hammond.pdf ├── KOKKOS.md ├── OCCA.md ├── OpenCL.md ├── PRK for ETH.pdf ├── ParallelSTL.md ├── RAJA.md ├── SYCL.md ├── flang-new.md ├── oneAPI.md ├── par-res-kern-report-v1.0.pdf └── par-res-kern-report-v1.3.pdf ├── include ├── lcg.h ├── par-res-kern_fenix.h ├── par-res-kern_fg-mpi.h ├── par-res-kern_general.h ├── par-res-kern_legion.h ├── par-res-kern_mpi.h ├── par-res-kern_mpiomp.h ├── par-res-kern_omp.h ├── par-res-kern_shmem.h ├── par-res-kern_upc.h └── random_draw.h ├── logo ├── PRK logo.jpeg ├── PRK logo.pdf ├── PRK logo.png ├── PRK logo.pptx └── README.md └── scripts ├── small ├── Makefile_FENIX ├── Makefile_FG_MPI ├── runall ├── runampi ├── runcharm++ ├── rundarwin ├── runfenix.in ├── runfgmpi.in ├── runfreaks ├── rungrappa ├── runlegion ├── runmpi1 ├── runmpiopenmp ├── runmpirma ├── runmpishm ├── runopenmp ├── runserial ├── runshmem └── runupc └── wide ├── Makefile_FG_MPI ├── runall ├── runcharm++ ├── rundarwin ├── runfgmpi.in ├── runfreaks ├── rungrappa ├── runmpi1 ├── runmpiopenmp ├── runmpirma ├── runmpishm ├── runopenmp ├── runserial ├── runshmem └── runupc /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## What type of issue is this? 2 | 3 | - [ ] Bug in the code or other problem 4 | - [ ] Inadequate/incorrect documation 5 | - [ ] Feature request 6 | 7 | If this is a bug report, please use the following template. 8 | Otherwise, please delete the rest of the template. 9 | 10 | ## Where does this bug appear? 11 | 12 | Check all that apply: 13 | - [ ] MacOS 14 | - [ ] Linux 15 | - [ ] Cray 16 | - [ ] GCC 17 | - [ ] Clang 18 | - [ ] Intel compiler 19 | - [ ] MPICH and derivatives (MVAPICH2, Intel MPI, Cray MPI, etc.) 20 | - [ ] Open-MPI 21 | 22 | ### Operating system 23 | 24 | What is the output of `uname -a`? 25 | 26 | ### Compiler 27 | 28 | What is the output of `${COMPILER} -v` or `${COMPILER} --version`? 29 | 30 | ### PRK build information 31 | 32 | Please attach or inline `make.defs`. 33 | 34 | ## Output showing problem 35 | 36 | If the output is short, please inline it here. 37 | Otherwise, please pipe it to a plain text file and attach that file. 38 | Note that you may need to use `$command 2>&1 $log` to capture the error messages. 39 | 40 | **Please do not attach screenshots of your terminal.** 41 | 42 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | If this pull request is fixing a bug, please link the associated issue. 2 | The rest of this template does not apply. 3 | 4 | If this pull request is providing a new implementation of the PRKs, 5 | please use the following template. 6 | 7 | Note that checking all of the boxes is not required. 8 | 9 | ## New PRK implementation checklist 10 | 11 | ### Which kernels are implemented? 12 | 13 | - [ ] synch_p2p (p2p) 14 | - [ ] stencil 15 | - [ ] transpose 16 | - [ ] nstream 17 | - [ ] dgemm 18 | - [ ] reduce 19 | - [ ] sparse 20 | - [ ] branch 21 | - [ ] random 22 | - [ ] refcount 23 | - [ ] synch_global 24 | - [ ] PIC 25 | - [ ] AMR 26 | 27 | ### Documentation and build examples 28 | 29 | If your implementation uses a new programming model that is not 30 | ubiquitious (i.e. included in the system compiler on most systems) 31 | then you need to provide a link to the appropriate documentation 32 | for a new user to install it, etc. 33 | 34 | We strongly recommend that you add the appropriate features 35 | to `make.defs.${toolchain}` if appropriate. 36 | 37 | ### Do you certify that your contribution is made in good faith and does not attempt to introduce any negative behavior into this project? 38 | 39 | - [ ] Yes 40 | - [ ] No 41 | -------------------------------------------------------------------------------- /.github/workflows/makefile.yml: -------------------------------------------------------------------------------- 1 | name: Makefile CI 2 | 3 | on: 4 | push: 5 | branches: [ default ] 6 | pull_request: 7 | branches: [ default ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Test Python 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install numpy 21 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 22 | bash ./ci/build-run-prk.sh /tmp allpython 23 | 24 | - name: Test C++ 25 | run: CXX=g++ bash ./ci/build-run-prk.sh /tmp allcxx 26 | 27 | - name: Test Fortran 28 | run: FC=gfortran bash ./ci/build-run-prk.sh /tmp allfortran 29 | 30 | - name: Test C11 31 | run: CC=gcc bash ./ci/build-run-prk.sh /tmp allc1z 32 | -------------------------------------------------------------------------------- /ADA/Makefile: -------------------------------------------------------------------------------- 1 | AC = gnat 2 | ACFLAGS = make 3 | 4 | all: hello args nstream nstream_array transpose 5 | 6 | %: %.adb 7 | $(AC) $(ACFLAGS) $< -o $@ 8 | 9 | clean: 10 | -rm -f *.i *.o *.ali 11 | -rm -f hello args nstream nstream_array transpose 12 | 13 | -------------------------------------------------------------------------------- /ADA/args.adb: -------------------------------------------------------------------------------- 1 | with 2 | Ada.Text_IO, 3 | Ada.Integer_Text_IO, 4 | Ada.Strings, 5 | Ada.Strings.Bounded, 6 | Ada.Command_line; 7 | 8 | use 9 | Ada.Text_IO, 10 | Ada.Integer_Text_IO, 11 | Ada.Strings, 12 | Ada.Strings.Bounded, 13 | Ada.Command_line; 14 | 15 | procedure Args is 16 | 17 | package BS is new Ada.Strings.Bounded.Generic_Bounded_Length (Max => 15); 18 | use BS; 19 | 20 | S : Bounded_String; 21 | 22 | A : Integer := 1; 23 | I : Integer := 10; 24 | N : Integer := 1_000_000; 25 | 26 | begin 27 | 28 | Put_Line("Args, World!"); 29 | 30 | --Put("Argument_Count="); 31 | --Put(Item => Argument_Count, Width => 1); 32 | Put_Line("Argument_Count=" & Argument_Count'Image); 33 | 34 | if Argument_Count > 0 then 35 | Put_Line("Arg1=" & Argument(1)); 36 | end if; 37 | if Argument_Count > 1 then 38 | Put_Line("Arg2=" & Argument(2)); 39 | end if; 40 | 41 | end Args; 42 | 43 | -------------------------------------------------------------------------------- /ADA/hello.adb: -------------------------------------------------------------------------------- 1 | with Text_IO; use Text_IO; 2 | procedure Hello is 3 | begin 4 | Put_Line("Hello, World!"); 5 | end Hello; 6 | 7 | -------------------------------------------------------------------------------- /AMPI/AMR/README.md: -------------------------------------------------------------------------------- 1 | FINE_GRAIN: all work configurations are load balanced as well as possible, 2 | regardless of communication involved, as follows: The background grid is 3 | split completely and evenly among all participating ranks. When a 4 | refinement comes into existence, it is split into a number of pieces equal 5 | to the number of ranks, and each piece is assigned to a distinct rank 6 | without regard for locality. 7 | NO_TALK: this strategy minimizes communication, as follows: The background 8 | grid is split completely and evenly among all participating ranks. When a 9 | refinement comes into existence, it is split into pieces that exactly 10 | coincide with pieces of the background grid assigned to individual ranks. 11 | Each refinement piece is assigned to the same rank that owns the underlying 12 | piece of the background grid. 13 | HIGH_WATER: the background grid and one refinement together are divided 14 | statically as evenly as possible among the ranks. Each rank receives 15 | exactly one grid or one piece of a grid at a time. This means the code 16 | will not work for a single rank; this case is captured by the serial code. 17 | The decomposition and assignment of pieces of BG to the ranks is static. 18 | Refinements are partitioned identically and statically as well. 19 | AMNESIA: each configuration of BG and refinements is partitioned as evenly as 20 | possible, such that each rank receives one (chunk of) a grid. Because 21 | the refinements are all of the same size, this means that they are all 22 | partitioned identically. But there will be two different partitionings 23 | of the BG, one in the presence of a refinement, and one without. 24 | -------------------------------------------------------------------------------- /AMPI/Branch/Makefile: -------------------------------------------------------------------------------- 1 | ifndef NUMBER_OF_FUNCTIONS 2 | NUMBER_OF_FUNCTIONS=40 3 | endif 4 | 5 | ifndef MATRIX_RANK 6 | MATRIX_RANK=10 7 | endif 8 | 9 | include ../../common/AMPI.defs 10 | ##### User configurable options ##### 11 | 12 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 13 | #description: change above into something that is a decent optimization on you system 14 | 15 | #uncomment any of the following flags (and change values) to change defaults 16 | 17 | USERFLAGS = 18 | #description: parameter to specify optional flags 19 | 20 | EXTOBJS = 21 | LIBS = 22 | LIBPATHS = 23 | INCLUDEPATHS = 24 | 25 | ### End User configurable options ### 26 | 27 | ifndef RESTRICT_KEYWORD 28 | RESTRICT_KEYWORD=0 29 | endif 30 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 31 | # data accessed through pointers (requires -restrict compiler flag) 32 | 33 | ifndef VERBOSE 34 | VERBOSE=0 35 | endif 36 | #description: default diagnostic style is silent 37 | 38 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 39 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0]" 45 | 46 | TUNEFLAGS = $(RESTRICTFLAG) $(VERBOSEFLAG) $(USERFLAGS) 47 | PROGRAM = branch 48 | OBJS = $(PROGRAM).o $(COMOBJS) func.o 49 | 50 | #default: usage 51 | # @echo " \"make veryclean\" to remove custom built files as well" 52 | 53 | include ../../common/make.common 54 | 55 | func.c: 56 | @echo "############################################################" 57 | @echo "##### No file func.c -- invoking func_gen to create it #####" 58 | @echo "############################################################" 59 | ./func_gen ${MATRIX_RANK} ${NUMBER_OF_FUNCTIONS} 60 | 61 | veryclean: 62 | @rm -f func.c ___* 63 | make clean 64 | -------------------------------------------------------------------------------- /AMPI/Branch/README: -------------------------------------------------------------------------------- 1 | Option INS_HEAVY of the Branching code requires a customized version 2 | of file func.c, which contains a variable number of functions of 3 | variable size. This file is built by invoking script "func_gen," 4 | which takes two integer input parameters. The first is the rank of 5 | the square matrices whose elements get initialized individually by 6 | one of the functions in func.c. Hence, the number of instructions 7 | associated with each such function is proportional to rank*rank. 8 | The second parameters is the number of functions created. These 9 | functions are all slightly different, but all have the same size. 10 | 11 | Usage: func_gen 12 | The function is invoked with default values 10 and 40 if no 13 | values for these variables are supplied on the make command line. 14 | 15 | -------------------------------------------------------------------------------- /AMPI/DGEMM/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = -lm 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef VERBOSE 21 | VERBOSE=0 22 | endif 23 | #description: default diagnostic style is silent 24 | 25 | ifndef BOFFSET 26 | BOFFSET=12 27 | endif 28 | #description: set this flag to some value to override default first array 29 | # dimension padding (12) of tiles used in non-MKL version 30 | 31 | PROGRAM = dgemm 32 | VERBOSEFLAG= -DVERBOSE=$(VERBOSE) 33 | OFFSETFLAG = -DBOFFSET=$(BOFFSET) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT \n\ 37 | BOFFSET=? override default first array dimension padding of tiles [12] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(OFFSETFLAG) $(VERBOSEFLAG) $(USERFLAGS) 41 | OBJS = $(PROGRAM).o $(COMOBJS) 42 | 43 | include ../../common/make.common 44 | -------------------------------------------------------------------------------- /AMPI/Nstream/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef VERBOSE 28 | VERBOSE=0 29 | endif 30 | #description: default diagnostic style is silent 31 | 32 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 33 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT \n\ 37 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 41 | PROGRAM = nstream 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /AMPI/Reduce/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | 3 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 4 | #description: change above into something that is a decent optimization on you system 5 | 6 | #uncomment any of the following flags (and change values) to change defaults 7 | 8 | USERFLAGS = 9 | #description: parameter to specify optional flags 10 | 11 | EXTOBJS = 12 | LIBS = 13 | LIBPATHS = 14 | INCLUDEPATHS = 15 | 16 | ### End User configurable options ### 17 | 18 | ifndef RESTRICT_KEYWORD 19 | RESTRICT_KEYWORD=0 20 | endif 21 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 22 | # data accessed through pointers (requires -restrict compiler flag) 23 | 24 | ifndef VERBOSE 25 | VERBOSE=0 26 | endif 27 | #description: default diagnostic style is silent 28 | 29 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 30 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 31 | 32 | OPTIONSSTRING="Make options:\n\ 33 | OPTION MEANING DEFAULT\n\ 34 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 35 | VERBOSE=0/1 omit/include verbose run information [0]" 36 | 37 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 38 | PROGRAM = reduce 39 | OBJS = $(PROGRAM).o $(COMOBJS) 40 | 41 | include ../../common/make.common 42 | -------------------------------------------------------------------------------- /AMPI/Sparse/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef SCRAMBLE 21 | SCRAMBLE=1 22 | endif 23 | #description: if flag is true, grid indices are scrambled to produce irregular stride 24 | 25 | ifndef RESTRICT_KEYWORD 26 | RESTRICT_KEYWORD=0 27 | endif 28 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 29 | # data accessed through pointers (requires -restrict compiler flag) 30 | 31 | ifndef TESTDENSE 32 | TESTDENSE=0 33 | endif 34 | #description: if flag is set, sparse matrix will be embedded in dense matrix 35 | 36 | ifndef VERBOSE 37 | VERBOSE=0 38 | endif 39 | #description: default diagnostic style is silent 40 | 41 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 42 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 43 | SCRAMBLEFLAG= -DSCRAMBLE=$(SCRAMBLE) 44 | DENSEFLAG = -DTESTDENSE=$(TESTDENSE) 45 | 46 | OPTIONSSTRING="Make options:\n\ 47 | OPTION MEANING DEFAULT\n\ 48 | SCRAMBLE=0/1 regular/irregular sparsity pattern [1] \n\ 49 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 50 | TESTDENSE=0/1 store sparse matrix in sparse/dense formet [0] \n\ 51 | VERBOSE=0/1 omit/include verbose run information [0]" 52 | 53 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(SCRAMBLEFLAG) $(DENSEFLAG) \ 54 | $(RESTRICTFLAG) 55 | PROGRAM = sparse 56 | OBJS = $(PROGRAM).o $(COMOBJS) 57 | 58 | include ../../common/make.common 59 | -------------------------------------------------------------------------------- /AMPI/Sparse/README: -------------------------------------------------------------------------------- 1 | This program constructs a sparse matrix and performs a (parallel) 2 | sparse matrix-vector multiplication. The sparse matrix is built as 3 | follows. The standard star-shaped discretization stencil with a 4 | user-specified radius is applied to a structured 2-dimensional 5 | grid. Example of a stencil with radius r=2: 6 | 7 | 0 8 | | 9 | 0 10 | | 11 | 0--0--0--0--0 12 | | 13 | 0 14 | | 15 | 0 16 | 17 | Here, the `0' symbol signifies inclusion in the stencil. A square grid 18 | with linear dimension (2^n) has 2^(2n) = 4^n points. The resulting 19 | matrix has (4^n) rows and (4^n) columns, for a total of (16^n) 20 | elements. The user specifies n. The stencil is applied in a periodic 21 | fashion, i.e. it wraps around the edges of the grid. 22 | 23 | If the scramble flag is unset in the Makefile, the discretization 24 | stencil results in a regularly banded sparse matrix, which can be 25 | stored efficiently in vectors, in principle. If the scramble flag is 26 | maintained, the columns of the matrix are permuted, resulting in a 27 | general irregular sparse matrix, but with a known number of nonzeroes 28 | per row (4r+1). We use Compressed Row Storage for accessing the matrix 29 | elements, even in the case of an unset scramble flag. Numerical values 30 | of matrix elements are chosen judiciously to make verification 31 | easy. They do not correspond to any realistic discretization of a 32 | continuum problem. 33 | -------------------------------------------------------------------------------- /AMPI/Synch_global/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef VERBOSE 21 | VERBOSE=0 22 | endif 23 | #description: default diagnostic style is silent 24 | 25 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 26 | 27 | OPTIONSSTRING="Make options:\n\ 28 | OPTION MEANING DEFAULT\n\ 29 | VERBOSE=0/1 omit/include verbose run information [0]" 30 | 31 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) 32 | PROGRAM = global 33 | OBJS = $(PROGRAM).o $(COMOBJS) 34 | 35 | include ../../common/make.common 36 | -------------------------------------------------------------------------------- /AMPI/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef VERBOSE 27 | VERBOSE=0 28 | endif 29 | #description: default diagnostic style is silent 30 | 31 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 32 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 33 | 34 | OPTIONSSTRING="Make options:\n\ 35 | OPTION MEANING DEFAULT\n\ 36 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 37 | VERBOSE=0/1 omit/include verbose run information [0]" 38 | 39 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 40 | PROGRAM = p2p 41 | # objects below are the default, used by "clean," if invoked 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /AMPI/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/AMPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef SYNCHRONOUS 28 | SYNCHRONOUS=0 29 | endif 30 | #description: turn on synchronous (blocking) communications 31 | 32 | ifndef VERBOSE 33 | VERBOSE=0 34 | endif 35 | #description: default diagnostic style is silent 36 | 37 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 38 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 39 | BLOCKFLAG = -DSYNCHRONOUS=$(SYNCHRONOUS) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0] \n\ 45 | SYNCHRONOUS=0/1 Use a/synchronous communications [0]" 46 | 47 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) $(BLOCKFLAG) 48 | PROGRAM = transpose 49 | OBJS = $(PROGRAM).o $(COMOBJS) 50 | 51 | include ../../common/make.common 52 | -------------------------------------------------------------------------------- /BIBLIOGRAPHY.md: -------------------------------------------------------------------------------- 1 | This file contains references to publications about the PRK. 2 | 3 | # Original reference 4 | 5 | R. F. Van der Wijngaart and T. G. Mattson. 6 | HPEC 2014. 7 | "The Parallel Research Kernels." 8 | https://doi.org/10.1109/HPEC.2014.7040972 9 | 10 | # Distributed memory implementations 11 | 12 | R. F. Van der Wijngaart, A. Kayi, J. R. Hammond, G. Jost, T. St. John, S. Sridharan, T. G. Mattson, J. Abercrombie, and J. Nelson. 13 | ISC 2016. 14 | "Comparing runtime systems with exascale ambitions using the Parallel Research Kernels." 15 | https://doi.org/10.1007/978-3-319-41321-1_17 16 | 17 | R. F. Van der Wijngaart, S. Sridharan, A. Kayi, G. Jost, J. Hammond, T. Mattson, and J. Nelson. 18 | PGAS 2015. 19 | "Using the Parallel Research Kernels to study PGAS models." 20 | https://doi.org/10.1109/PGAS.2015.24 21 | 22 | # Irregular kernels 23 | 24 | E. Georganas, R. F. Van der Wijngaart and T. G. Mattson. 25 | IPDPS 2016. 26 | "Design and Implementation of a Parallel Research Kernel for Assessing Dynamic Load-Balancing Capabilities." 27 | https://doi.org/10.1109/IPDPS.2016.65 28 | 29 | Rob F. Van der Wijngaart, Evangelos Georganas, Timothy G. Mattson, and Andrew Wissink. 30 | ISC 2017. 31 | "A New Parallel Research Kernel to Expand Research on Dynamic Load-Balancing Capabilities." 32 | https://link.springer.com/chapter/10.1007/978-3-319-58667-0_14 33 | 34 | # Modern C++ implementations 35 | 36 | Jeff R. Hammond and Timothy G. Mattson. 37 | IWOCL 2019. 38 | "Evaluating data parallelism in C++ using the Parallel Research Kernels." 39 | https://doi.org/10.1145/3318170.3318192 40 | -------------------------------------------------------------------------------- /C1z/prk_petsc.h: -------------------------------------------------------------------------------- 1 | #ifndef PRK_PETSC_H_ 2 | #define PRK_PETSC_H_ 3 | 4 | #define PRK_PETSC_USE_MPI 1 5 | 6 | #ifdef PRK_PETSC_USE_MPI 7 | #include 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #endif // PRK_PETSC_H_ 16 | -------------------------------------------------------------------------------- /CHAPEL/COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright 2020-2024 Hewlett Packard Enterprise Development LP 2 | Copyright 2004-2019 Cray Inc. 3 | (See LICENSE file for more details) 4 | -------------------------------------------------------------------------------- /CHAPEL/Makefile: -------------------------------------------------------------------------------- 1 | # Chapel Makefile 2 | 3 | # TODO Find a way to make this fit into the existing Makefile system 4 | 5 | OPTFLAGS = --fast #-sassertNoSlicing 6 | #MODULEFLAGS = --module-dir . 7 | 8 | BLAS = -lblas 9 | 10 | # Debugging 11 | DEBUGFLAGS = 12 | ifdef DEBUG 13 | DEBUGFLAGS = --savec c 14 | endif 15 | 16 | CHPL = chpl 17 | 18 | SOURCES = dgemm.chpl dgemm-summa.chpl nstream.chpl p2p-serial-fast.chpl pic.chpl sparse.chpl \ 19 | stencil.chpl stencil-serial.chpl stencil-opt.chpl transpose.chpl transpose-serial.chpl 20 | 21 | EXECUTABLE = $(SOURCES:.chpl=) dgemm-summa-blas 22 | 23 | all: $(EXECUTABLE) 24 | 25 | %: %.chpl 26 | $(CHPL) $(OPTFLAGS) $(MODULEFLAGS) $(DEBUGFLAGS) $*.chpl -o $* 27 | 28 | dgemm-summa: dgemm-summa.chpl 29 | $(CHPL) $(OPTFLAGS) -s useBlockDist=true -s blasImpl=off $(MODULEFLAGS) $(DEBUGFLAGS) $^ -o $@ 30 | 31 | dgemm-summa-blas: dgemm-summa.chpl 32 | $(CHPL) $(OPTFLAGS) -s useBlockDist=true -s blasImpl=blas $(MODULEFLAGS) $(DEBUGFLAGS) $^ $(BLAS) -o $@ 33 | 34 | clean: 35 | rm -f $(EXECUTABLE) 36 | -------------------------------------------------------------------------------- /CHARM++/Stencil/ci_reqs.h: -------------------------------------------------------------------------------- 1 | #define WEIGHTSIZE ((2*RADIUS+1)*(2*RADIUS+1)) 2 | #define RADIUSTIMESWIDTH ((RADIUS)*(width)) 3 | -------------------------------------------------------------------------------- /CHARM++/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/CHARM++.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | USERFLAGS = 8 | #description: parameter to specify optional flags 9 | 10 | #set the following variables for custom libraries and/or other objects 11 | EXTOBJS = 12 | LIBS = 13 | LIBPATHS = 14 | INCLUDEPATHS = 15 | 16 | ### End User configurable options ### 17 | 18 | ifndef RESTRICT_KEYWORD 19 | RESTRICT_KEYWORD=0 20 | endif 21 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 22 | # data accessed through pointers (requires -restrict compiler flag) 23 | 24 | ifndef VERBOSE 25 | VERBOSE=0 26 | endif 27 | #description: default diagnostic style is silent 28 | 29 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 30 | RESTRICTFLAG = -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 31 | 32 | OPTIONSSTRING="Make options:\n\ 33 | OPTION MEANING DEFAULT\n\ 34 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 35 | VERBOSE=0/1 omit/include verbose run information [0]" 36 | 37 | TUNEFLAGS = $(RESTRICTFLAG) $(VERBOSEFLAG) $(USERFLAGS) 38 | PROGRAM = p2p 39 | OBJS = $(PROGRAM).o $(COMOBJS) 40 | 41 | include ../../common/make.common 42 | 43 | -------------------------------------------------------------------------------- /CHARM++/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/CHARM++.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | RESTRICTFLAG = -DRESTRICT_KEYWORD 10 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 11 | # data accessed through pointers 12 | 13 | #DEBUGFLAG = -DVERBOSE 14 | #description: default diagnostic style is silent 15 | 16 | USERFLAGS = 17 | #description: parameter to specify optional flags 18 | 19 | #set the following variables for custom libraries and/or other objects 20 | EXTOBJS = 21 | LIBS = 22 | LIBPATHS = 23 | INCLUDEPATHS = 24 | 25 | ### End User configurable options ### 26 | 27 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) 28 | PROGRAM = transpose 29 | OBJS = $(PROGRAM).o $(COMOBJS) 30 | 31 | include ../../common/make.common 32 | 33 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Lines starting with '#' are comments. 2 | # Each line is a file pattern followed by one or more owners. 3 | 4 | # These owners will be the default owners for everything in the repo. 5 | * @jeffhammond 6 | 7 | # Order is important. The last matching pattern has the most precedence. 8 | # So if a pull request only touches javascript files, only these owners 9 | # will be requested to review. 10 | AMPI/* @rfvander 11 | C1z/* @jeffhammond 12 | #CHAPEL/* @ben-albrecht @npadmana 13 | CHARM++/* @rfvander @philmiller-charmworks 14 | Cxx11/* @jeffhammond 15 | FENIX/* @rfvander @marcgamell 16 | FG_MPI/* @rfvander 17 | FORTRAN/* @jeffhammond 18 | FORTRAN/*coarray.f90 @afanfa @zbeekman @jeffhammond 19 | GRAPPA/* @nelsonje 20 | JULIA/* @kpamnany @jeffhammond 21 | LEGION/* @magnatelee @elliottslaughter @apokayi @rfvander 22 | MPI1/* @rfvander 23 | MPIOPENMP/* @rfvander 24 | MPIRMA/* @rfvander @srinivas212 @jeffhammond 25 | MPISHM/* @rfvander @srinivas212 @jeffhammond 26 | OCTAVE/* @jeffhammond 27 | OPENMP/* @rfvander 28 | PYTHON/* @jeffhammond 29 | RUST/* @jeffhammond 30 | SERIAL/* @rfvander 31 | SHMEM/* @jdinan @rfvander 32 | UPC/* @apokayi 33 | ci/* @jeffhammond 34 | 35 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/COPYING -------------------------------------------------------------------------------- /Csharp/Makefile: -------------------------------------------------------------------------------- 1 | all: hello.exe nstream.exe transpose.exe 2 | 3 | %.exe: %.cs 4 | mcs $< -out:$@ 5 | 6 | clean: 7 | -rm -f *.exe 8 | -------------------------------------------------------------------------------- /Csharp/hello.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | class Hello { 4 | static void Main(string[] args) { 5 | Console.WriteLine("Hello World!"); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /Cxx11/PIC.md: -------------------------------------------------------------------------------- 1 | # Sequential 2 | ```sh 3 | ./pic 10 1000 1000000 1 2 GEOMETRIC 0.99 4 | ./pic 10 1000 1000000 0 1 SINUSOIDAL 5 | ./pic 10 1000 1000000 1 0 LINEAR 1.0 3.0 6 | ./pic 10 1000 1000000 1 0 PATCH 0 200 100 200 7 | ``` 8 | 9 | # SYCL 10 | ```sh 11 | ./pic-sycl 10 1000 1000000 1 2 GEOMETRIC 0.99 12 | ./pic-sycl 10 1000 1000000 0 1 SINUSOIDAL 13 | ./pic-sycl 10 1000 1000000 1 0 LINEAR 1.0 3.0 14 | ./pic-sycl 10 1000 1000000 1 0 PATCH 0 200 100 200 15 | ``` 16 | -------------------------------------------------------------------------------- /Cxx11/add32.cl: -------------------------------------------------------------------------------- 1 | __kernel void add32(const int n, __global float * inout) 2 | { 3 | const int i = get_global_id(0); 4 | const int j = get_global_id(1); 5 | 6 | if ( (i(y)?(x):(y)) 12 | #endif 13 | 14 | __kernel void p2p32(const int n, __global float * grid) 15 | { 16 | const int j = get_global_id(0); 17 | for (int i=2; i<=2*n-2; i++) { 18 | // for (int j=MAX(2,i-n+2); j<=MIN(i,n); j++) { 19 | if ( ( j >= MAX(2,i-n+2) ) && ( j <= MIN(i,n) ) ) { 20 | const int x = i-j+2-1; 21 | const int y = j-1; 22 | grid[x*n+y] = grid[(x-1)*n+ y ] 23 | + grid[ x *n+(y-1)] 24 | - grid[(x-1)*n+(y-1)]; 25 | } 26 | barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 27 | } 28 | } 29 | 30 | __kernel void p2p64(const int n, __global double * grid) 31 | { 32 | const int j = get_global_id(0); 33 | #if 0 34 | if (j==0) { 35 | for (int i=2; i<=2*n-2; i++) { 36 | for (int j=MAX(2,i-n+2); j<=MIN(i,n); j++) { 37 | //printf("i,j=%d,%d\n",i,j); 38 | const int x = i-j+2-1; 39 | const int y = j-1; 40 | grid[x*n+y] = grid[(x-1)*n+ y ] 41 | + grid[ x *n+(y-1)] 42 | - grid[(x-1)*n+(y-1)]; 43 | } 44 | } 45 | } 46 | #else 47 | for (int i=2; i<=2*n-2; i++) { 48 | // for (int j=MAX(2,i-n+2); j<=MIN(i,n); j++) 49 | if ( ( j >= MAX(2,i-n+2) ) && ( j <= MIN(i,n) ) ) { 50 | //printf("i,j=%d,%d\n",i,j); 51 | const int x = i-j+2-1; 52 | const int y = j-1; 53 | grid[x*n+y] = grid[(x-1)*n+ y ] 54 | + grid[ x *n+(y-1)] 55 | - grid[(x-1)*n+(y-1)]; 56 | } 57 | barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 58 | } 59 | #endif 60 | } 61 | -------------------------------------------------------------------------------- /Cxx11/prk_hpx.h: -------------------------------------------------------------------------------- 1 | /// 2 | /// Copyright (c) 2019, Intel Corporation 3 | /// 4 | /// Redistribution and use in source and binary forms, with or without 5 | /// modification, are permitted provided that the following conditions 6 | /// are met: 7 | /// 8 | /// * Redistributions of source code must retain the above copyright 9 | /// notice, this list of conditions and the following disclaimer. 10 | /// * Redistributions in binary form must reproduce the above 11 | /// copyright notice, this list of conditions and the following 12 | /// disclaimer in the documentation and/or other materials provided 13 | /// with the distribution. 14 | /// * Neither the name of Intel Corporation nor the names of its 15 | /// contributors may be used to endorse or promote products 16 | /// derived from this software without specific prior written 17 | /// permission. 18 | /// 19 | /// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | /// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | /// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 | /// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 | /// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 | /// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | /// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | /// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | /// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | /// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | /// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | /// POSSIBILITY OF SUCH DAMAGE. 31 | 32 | #ifndef PRK_HPX_H 33 | #define PRK_HPX_H 34 | 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | 41 | #endif /* PRK_HPX_H */ 42 | -------------------------------------------------------------------------------- /Cxx11/prk_kokkos.h: -------------------------------------------------------------------------------- 1 | /// 2 | /// Copyright (c) 2018, Intel Corporation 3 | /// 4 | /// Redistribution and use in source and binary forms, with or without 5 | /// modification, are permitted provided that the following conditions 6 | /// are met: 7 | /// 8 | /// * Redistributions of source code must retain the above copyright 9 | /// notice, this list of conditions and the following disclaimer. 10 | /// * Redistributions in binary form must reproduce the above 11 | /// copyright notice, this list of conditions and the following 12 | /// disclaimer in the documentation and/or other materials provided 13 | /// with the distribution. 14 | /// * Neither the name of Intel Corporation nor the names of its 15 | /// contributors may be used to endorse or promote products 16 | /// derived from this software without specific prior written 17 | /// permission. 18 | /// 19 | /// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | /// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | /// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 | /// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 | /// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 | /// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | /// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | /// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | /// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | /// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | /// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | /// POSSIBILITY OF SUCH DAMAGE. 31 | 32 | #ifndef PRK_KOKKOS_H 33 | #define PRK_KOKKOS_H 34 | 35 | #include 36 | 37 | #endif /* PRK_KOKKOS_H */ 38 | -------------------------------------------------------------------------------- /Cxx11/prk_upcxx.h: -------------------------------------------------------------------------------- 1 | /// 2 | /// Copyright (c) 2019, Intel Corporation 3 | /// 4 | /// Redistribution and use in source and binary forms, with or without 5 | /// modification, are permitted provided that the following conditions 6 | /// are met: 7 | /// 8 | /// * Redistributions of source code must retain the above copyright 9 | /// notice, this list of conditions and the following disclaimer. 10 | /// * Redistributions in binary form must reproduce the above 11 | /// copyright notice, this list of conditions and the following 12 | /// disclaimer in the documentation and/or other materials provided 13 | /// with the distribution. 14 | /// * Neither the name of Intel Corporation nor the names of its 15 | /// contributors may be used to endorse or promote products 16 | /// derived from this software without specific prior written 17 | /// permission. 18 | /// 19 | /// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | /// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | /// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 | /// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 | /// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 | /// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | /// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | /// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | /// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | /// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | /// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | /// POSSIBILITY OF SUCH DAMAGE. 31 | 32 | #ifndef PRK_UPCXX_H 33 | #define PRK_UPCXX_H 34 | 35 | #include 36 | 37 | #endif /* PRK_UPCXX_H */ 38 | -------------------------------------------------------------------------------- /Cxx11/transpose.okl: -------------------------------------------------------------------------------- 1 | @kernel void transpose(int N, double * A, double * B) 2 | { 3 | for(int j = 0; j < N; ++j; outer) { 4 | for(int i = 0; i < N; ++i; inner) { 5 | if ((i 12 | The function is invoked with default values 10 and 40 if no 13 | values for these variables are supplied on the make command line. 14 | 15 | -------------------------------------------------------------------------------- /FG_MPI/DGEMM/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = -lm 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef VERBOSE 21 | VERBOSE=0 22 | endif 23 | #description: default diagnostic style is silent 24 | 25 | ifndef BOFFSET 26 | BOFFSET=12 27 | endif 28 | #description: set this flag to some value to override default first array 29 | # dimension padding (12) of tiles used in non-MKL version 30 | 31 | PROGRAM = dgemm 32 | VERBOSEFLAG= -DVERBOSE=$(VERBOSE) 33 | OFFSETFLAG = -DBOFFSET=$(BOFFSET) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT \n\ 37 | BOFFSET=? override default first array dimension padding of tiles [12] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(OFFSETFLAG) $(VERBOSEFLAG) $(USERFLAGS) 41 | OBJS = $(PROGRAM).o $(COMOBJS) 42 | 43 | include ../../common/make.common 44 | -------------------------------------------------------------------------------- /FG_MPI/Nstream/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef VERBOSE 28 | VERBOSE=0 29 | endif 30 | #description: default diagnostic style is silent 31 | 32 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 33 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT \n\ 37 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 41 | PROGRAM = nstream 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /FG_MPI/PIC-static/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | COMOBJS += random_draw.o 3 | 4 | ##### User configurable options ##### 5 | 6 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 7 | #description: change above into something that is a decent optimization on you system 8 | 9 | #uncomment any of the following flags (and change values) to change defaults 10 | 11 | USERFLAGS = 12 | #description: parameter to specify optional flags 13 | 14 | #set the following variables for custom libraries and/or other objects 15 | EXTOBJS = 16 | LIBS = -lm 17 | LIBPATHS = 18 | INCLUDEPATHS = 19 | 20 | ### End User configurable options ### 21 | 22 | ifndef RESTRICT_KEYWORD 23 | RESTRICT_KEYWORD=0 24 | endif 25 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 26 | # data accessed through pointers (requires -restrict compiler flag) 27 | 28 | ifndef VERBOSE 29 | VERBOSE=0 30 | endif 31 | #description: default diagnostic style is silent 32 | 33 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 34 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 35 | 36 | OPTIONSSTRING="Make options:\n\ 37 | OPTION MEANING DEFAULT \n\ 38 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 39 | VERBOSE=0/1 omit/include verbose run information [0]" 40 | 41 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 42 | PROGRAM = pic 43 | OBJS = $(PROGRAM).o $(COMOBJS) 44 | 45 | include ../../common/make.common 46 | -------------------------------------------------------------------------------- /FG_MPI/Reduce/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | 3 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 4 | #description: change above into something that is a decent optimization on you system 5 | 6 | #uncomment any of the following flags (and change values) to change defaults 7 | 8 | USERFLAGS = 9 | #description: parameter to specify optional flags 10 | 11 | EXTOBJS = 12 | LIBS = 13 | LIBPATHS = 14 | INCLUDEPATHS = 15 | 16 | ### End User configurable options ### 17 | 18 | ifndef RESTRICT_KEYWORD 19 | RESTRICT_KEYWORD=0 20 | endif 21 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 22 | # data accessed through pointers (requires -restrict compiler flag) 23 | 24 | ifndef VERBOSE 25 | VERBOSE=0 26 | endif 27 | #description: default diagnostic style is silent 28 | 29 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 30 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 31 | 32 | OPTIONSSTRING="Make options:\n\ 33 | OPTION MEANING DEFAULT\n\ 34 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 35 | VERBOSE=0/1 omit/include verbose run information [0]" 36 | 37 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 38 | PROGRAM = reduce 39 | OBJS = $(PROGRAM).o $(COMOBJS) 40 | 41 | include ../../common/make.common 42 | -------------------------------------------------------------------------------- /FG_MPI/Sparse/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef SCRAMBLE 21 | SCRAMBLE=1 22 | endif 23 | #description: if flag is true, grid indices are scrambled to produce irregular stride 24 | 25 | ifndef RESTRICT_KEYWORD 26 | RESTRICT_KEYWORD=0 27 | endif 28 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 29 | # data accessed through pointers (requires -restrict compiler flag) 30 | 31 | ifndef TESTDENSE 32 | TESTDENSE=0 33 | endif 34 | #description: if flag is set, sparse matrix will be embedded in dense matrix 35 | 36 | ifndef VERBOSE 37 | VERBOSE=0 38 | endif 39 | #description: default diagnostic style is silent 40 | 41 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 42 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 43 | SCRAMBLEFLAG= -DSCRAMBLE=$(SCRAMBLE) 44 | DENSEFLAG = -DTESTDENSE=$(TESTDENSE) 45 | 46 | OPTIONSSTRING="Make options:\n\ 47 | OPTION MEANING DEFAULT\n\ 48 | SCRAMBLE=0/1 regular/irregular sparsity pattern [1] \n\ 49 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 50 | TESTDENSE=0/1 store sparse matrix in sparse/dense format [0] \n\ 51 | VERBOSE=0/1 omit/include verbose run information [0]" 52 | 53 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(SCRAMBLEFLAG) $(DENSEFLAG) \ 54 | $(RESTRICTFLAG) 55 | PROGRAM = sparse 56 | OBJS = $(PROGRAM).o $(COMOBJS) 57 | 58 | include ../../common/make.common 59 | -------------------------------------------------------------------------------- /FG_MPI/Sparse/README: -------------------------------------------------------------------------------- 1 | This program constructs a sparse matrix and performs a (parallel) 2 | sparse matrix-vector multiplication. The sparse matrix is built as 3 | follows. The standard star-shaped discretization stencil with a 4 | user-specified radius is applied to a structured 2-dimensional 5 | grid. Example of a stencil with radius r=2: 6 | 7 | 0 8 | | 9 | 0 10 | | 11 | 0--0--0--0--0 12 | | 13 | 0 14 | | 15 | 0 16 | 17 | Here, the `0' symbol signifies inclusion in the stencil. A square grid 18 | with linear dimension (2^n) has 2^(2n) = 4^n points. The resulting 19 | matrix has (4^n) rows and (4^n) columns, for a total of (16^n) 20 | elements. The user specifies n. The stencil is applied in a periodic 21 | fashion, i.e. it wraps around the edges of the grid. 22 | 23 | If the scramble flag is unset in the Makefile, the discretization 24 | stencil results in a regularly banded sparse matrix, which can be 25 | stored efficiently in vectors, in principle. If the scramble flag is 26 | maintained, the columns of the matrix are permuted, resulting in a 27 | general irregular sparse matrix, but with a known number of nonzeroes 28 | per row (4r+1). We use Compressed Row Storage for accessing the matrix 29 | elements, even in the case of an unset scramble flag. Numerical values 30 | of matrix elements are chosen judiciously to make verification 31 | easy. They do not correspond to any realistic discretization of a 32 | continuum problem. 33 | -------------------------------------------------------------------------------- /FG_MPI/Synch_global/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef VERBOSE 21 | VERBOSE=0 22 | endif 23 | #description: default diagnostic style is silent 24 | 25 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 26 | 27 | OPTIONSSTRING="Make options:\n\ 28 | OPTION MEANING DEFAULT\n\ 29 | VERBOSE=0/1 omit/include verbose run information [0]" 30 | 31 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) 32 | PROGRAM = global 33 | OBJS = $(PROGRAM).o $(COMOBJS) 34 | 35 | include ../../common/make.common 36 | -------------------------------------------------------------------------------- /FG_MPI/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef VERBOSE 27 | VERBOSE=0 28 | endif 29 | #description: default diagnostic style is silent 30 | 31 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 32 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 33 | 34 | OPTIONSSTRING="Make options:\n\ 35 | OPTION MEANING DEFAULT\n\ 36 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 37 | VERBOSE=0/1 omit/include verbose run information [0]" 38 | 39 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 40 | PROGRAM = p2p 41 | # objects below are the default, used by "clean," if invoked 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /FG_MPI/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/FG_MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef SYNCHRONOUS 28 | SYNCHRONOUS=0 29 | endif 30 | #description: turn on synchronous (blocking) communications 31 | 32 | ifndef VERBOSE 33 | VERBOSE=0 34 | endif 35 | #description: default diagnostic style is silent 36 | 37 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 38 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 39 | BLOCKFLAG = -DSYNCHRONOUS=$(SYNCHRONOUS) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0] \n\ 45 | SYNCHRONOUS=0/1 Use a/synchronous communications [0]" 46 | 47 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) $(BLOCKFLAG) 48 | PROGRAM = transpose 49 | OBJS = $(PROGRAM).o $(COMOBJS) 50 | 51 | include ../../common/make.common 52 | -------------------------------------------------------------------------------- /GO/Makefile: -------------------------------------------------------------------------------- 1 | all: nstream transpose dgemm 2 | 3 | %: %.go 4 | go build $< 5 | 6 | # installed in ~/go 7 | # not quick enough to call every time (~14s for repeat invocation) 8 | deps: 9 | go get -u gonum.org/v1/gonum/... 10 | 11 | clean: 12 | -rm -f nstream 13 | -rm -f transpose 14 | -rm -f dgemm 15 | 16 | -------------------------------------------------------------------------------- /GO/README.md: -------------------------------------------------------------------------------- 1 | # How to build 2 | 3 | I do not understand this nonsense... 4 | ``` 5 | export GOPATH=${PWD} 6 | go mod init prk 7 | go get gonum.org/v1/gonum/mat 8 | ``` 9 | 10 | # How to run 11 | 12 | ``` 13 | go run nstream.go -i=10 -n=100000000 14 | ``` 15 | -------------------------------------------------------------------------------- /GRAPPA/Nstream/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/GRAPPA.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on your system 6 | 7 | #uncomment any of the following flags (and change values to change defaults 8 | 9 | #RESTRICTFLAG = -DRESTRICT_KEYWORD 10 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 11 | # data accessed through pointers 12 | 13 | #DEBUGFLAG = -DVERBOSE 14 | #description: default diagnostic style is silent 15 | 16 | USERFLAGS = 17 | #description: parameter to specify optional flags 18 | 19 | #set the following variables for custom libraries and/or other objects 20 | EXTOBJS = 21 | LIBS = $(GRAPPA_LDLIBS) 22 | LIBPATHS = $(GRAPPA_LDFLAGS) 23 | 24 | ### End User configurable optionsn ### 25 | CXXFLAGS = $(GRAPPA_CXXFLAGS) 26 | 27 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) $(CXXFLAGS) 28 | PROGRAM = nstream 29 | OBJS = $(PROGRAM).o $(COMOBJS) 30 | 31 | include ../../common/make.common -------------------------------------------------------------------------------- /GRAPPA/Random/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/GRAPPA.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on your system 6 | 7 | #uncomment any of the following flags (and change values to change defaults 8 | 9 | #RESTRICTFLAG = -DRESTRICT_KEYWORD 10 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 11 | # data accessed through pointers 12 | 13 | #DEBUGFLAG = -DVERBOSE 14 | #description: default diagnostic style is silent 15 | 16 | USERFLAGS = 17 | #description: parameter to specify optional flags 18 | 19 | #set the following variables for custom libraries and/or other objects 20 | EXTOBJS = 21 | LIBS = $(GRAPPA_LDLIBS) 22 | LIBPATHS = $(GRAPPA_LDFLAGS) 23 | 24 | ### End User configurable optionsn ### 25 | CXXFLAGS = $(GRAPPA_CXXFLAGS) 26 | 27 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) $(CXXFLAGS) 28 | PROGRAM = random 29 | OBJS = $(PROGRAM).o $(COMOBJS) 30 | 31 | include ../../common/make.common -------------------------------------------------------------------------------- /GRAPPA/Synch_global/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/GRAPPA.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | #RESTRICTFLAG = -DRESTRICT_KEYWORD 10 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 11 | # data accessed through pointers 12 | 13 | #DEBUGFLAG = -DVERBOSE 14 | #description: default diagnostic style is silent 15 | 16 | USERFLAGS = 17 | #description: parameter to specify optional flags 18 | 19 | #set the following variables for custom libraries and/or other objects 20 | EXTOBJS = 21 | LIBS = $(GRAPPA_LDLIBS) 22 | LIBPATHS = $(GRAPPA_LDFLAGS) 23 | 24 | ### End User configurable options ### 25 | 26 | # this is used by the C++ implicit compile rule 27 | CXXFLAGS= $(GRAPPA_CXXFLAGS) 28 | 29 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) $(CXXFLAGS) 30 | PROGRAM = global 31 | OBJS = $(PROGRAM).o $(COMOBJS) 32 | 33 | include ../../common/make.common 34 | -------------------------------------------------------------------------------- /GRAPPA/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/GRAPPA.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | #RESTRICTFLAG = -DRESTRICT_KEYWORD 10 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 11 | # data accessed through pointers 12 | 13 | #DEBUGFLAG = -DVERBOSE 14 | #description: default diagnostic style is silent 15 | 16 | USERFLAGS = 17 | #description: parameter to specify optional flags 18 | 19 | #set the following variables for custom libraries and/or other objects 20 | EXTOBJS = 21 | LIBS = $(GRAPPA_LDLIBS) 22 | LIBPATHS = $(GRAPPA_LDFLAGS) 23 | 24 | ### End User configurable options ### 25 | 26 | # this is used by the C++ implicit compile rule 27 | CXXFLAGS= $(GRAPPA_CXXFLAGS) 28 | 29 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) $(CXXFLAGS) 30 | PROGRAM = p2p 31 | OBJS = $(PROGRAM).o $(COMOBJS) 32 | 33 | include ../../common/make.common 34 | -------------------------------------------------------------------------------- /GRAPPA/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/GRAPPA.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | #RESTRICTFLAG = -DRESTRICT_KEYWORD 10 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 11 | # data accessed through pointers 12 | 13 | #DEBUGFLAG = -DVERBOSE 14 | #description: default diagnostic style is silent 15 | 16 | USERFLAGS = 17 | #description: parameter to specify optional flags 18 | 19 | #set the following variables for custom libraries and/or other objects 20 | EXTOBJS = 21 | LIBS = $(GRAPPA_LDLIBS) 22 | LIBPATHS = $(GRAPPA_LDFLAGS) 23 | 24 | ### End User configurable options ### 25 | 26 | # this is used by the C++ implicit compile rule 27 | CXXFLAGS= $(GRAPPA_CXXFLAGS) 28 | 29 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) $(CXXFLAGS) 30 | PROGRAM = transpose 31 | OBJS = $(PROGRAM).o $(COMOBJS) 32 | 33 | include ../../common/make.common 34 | -------------------------------------------------------------------------------- /JAVA/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: nstream.class stencil.class transpose.class p2p.class 3 | 4 | %.class: %.java 5 | javac $< 6 | 7 | clean: 8 | rm -f nstream.class stencil.class transpose.class p2p.class 9 | -------------------------------------------------------------------------------- /JAVA/README.md: -------------------------------------------------------------------------------- 1 | # How to build 2 | 3 | Just type `make` 4 | 5 | # How to run 6 | 7 | You have to set the classpath to the current directory, at least some of the time. 8 | 9 | ``` 10 | java -cp . nstream 10 10000000 11 | java -cp . stencil 10 1000 12 | java -cp . stencil 10 1000 star 4 13 | java -cp . stencil 10 1000 grid 2 14 | java -cp . transpose 10 1000 15 | java -cp . transpose 10 1000 32 16 | ``` 17 | -------------------------------------------------------------------------------- /JULIA/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | BLISBLAS = "6f275bd8-fec0-4d39-945b-7e95a765fa1e" 3 | CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" 4 | KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" 5 | LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" 6 | LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" 7 | MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" 8 | MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" 9 | MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" 10 | Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" 11 | Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" 12 | 13 | [compat] 14 | BLISBLAS = "0.1" 15 | CUDA = "5.5" 16 | KernelAbstractions = "0.9" 17 | LinearAlgebra = "1.11" 18 | LoopVectorization = "0.12" 19 | MKL = "0.7" 20 | MPI = "0.20" 21 | MPIPreferences = "0.1" 22 | Octavian = "0.3" 23 | Tullio = "0.3" 24 | julia = "1.11" 25 | -------------------------------------------------------------------------------- /JULIA/README.md: -------------------------------------------------------------------------------- 1 | (Note: Requires Julia >= 1.11) 2 | 3 | # Instantiate Julia environment 4 | 5 | ``` 6 | julia --project -e 'using Pkg; Pkg.instantiate()' 7 | ``` 8 | 9 | To get the `mpiexecjl` driver run: 10 | ``` 11 | julia --project -e 'using MPI; MPI.install_mpiexecjl()' 12 | ``` 13 | 14 | Afterwards, put '$HOME/.julia/bin' on `PATH`, e.g. 15 | ```sh 16 | export PATH=$HOME/.julia/bin:$PATH 17 | ``` 18 | 19 | (If you don't want to modify `PATH` use `$HOME/.julia/bin/mpiexecjl` directly to run the MPI code.) 20 | 21 | ## Optional: System MPI 22 | 23 | If you want to use a system MPI run the following: 24 | ``` 25 | julia --project -e 'using MPIPreferences; MPIPreferences.use_system_binary()' 26 | ``` 27 | 28 | # Run stuff 29 | 30 | No MPI: 31 | ```sh 32 | julia nstream.jl 10 1000000 33 | ``` 34 | 35 | With MPI: 36 | ```sh 37 | mpiexecjl -n 4 julia --project nstream-mpi.jl 10 1000000 38 | ``` 39 | -------------------------------------------------------------------------------- /LUA/README.md: -------------------------------------------------------------------------------- 1 | # How to run 2 | 3 | ``` 4 | ./nstream.lua 10 1000000 5 | ``` 6 | -------------------------------------------------------------------------------- /MPI1/AMR/README.md: -------------------------------------------------------------------------------- 1 | FINE_GRAIN: all work configurations are load balanced as well as possible, 2 | regardless of communication involved, as follows: The background grid is 3 | split completely and evenly among all participating ranks. When a 4 | refinement comes into existence, it is split into a number of pieces equal 5 | to the number of ranks, and each piece is assigned to a distinct rank 6 | without regard for locality. 7 | NO_TALK: this strategy minimizes communication, as follows: The background 8 | grid is split completely and evenly among all participating ranks. When a 9 | refinement comes into existence, it is split into pieces that exactly 10 | coincide with pieces of the background grid assigned to individual ranks. 11 | Each refinement piece is assigned to the same rank that owns the underlying 12 | piece of the background grid. 13 | HIGH_WATER: the background grid and one refinement together are divided 14 | statically as evenly as possible among the ranks. Each rank receives 15 | exactly one grid or one piece of a grid at a time. This means the code 16 | will not work for a single rank; this case is captured by the serial code. 17 | The decomposition and assignment of pieces of BG to the ranks is static. 18 | Refinements are partitioned identically and statically as well. 19 | AMNESIA: each configuration of BG and refinements is partitioned as evenly as 20 | possible, such that each rank receives one (chunk of) a grid. Because 21 | the refinements are all of the same size, this means that they are all 22 | partitioned identically. But there will be two different partitionings 23 | of the BG, one in the presence of a refinement, and one without. 24 | -------------------------------------------------------------------------------- /MPI1/Branch/Makefile: -------------------------------------------------------------------------------- 1 | ifndef NUMBER_OF_FUNCTIONS 2 | NUMBER_OF_FUNCTIONS=40 3 | endif 4 | 5 | ifndef MATRIX_RANK 6 | MATRIX_RANK=10 7 | endif 8 | 9 | include ../../common/MPI.defs 10 | ##### User configurable options ##### 11 | 12 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 13 | #description: change above into something that is a decent optimization on you system 14 | 15 | #uncomment any of the following flags (and change values) to change defaults 16 | 17 | USERFLAGS = 18 | #description: parameter to specify optional flags 19 | 20 | EXTOBJS = 21 | LIBS = 22 | LIBPATHS = 23 | INCLUDEPATHS = 24 | 25 | ### End User configurable options ### 26 | 27 | ifndef RESTRICT_KEYWORD 28 | RESTRICT_KEYWORD=0 29 | endif 30 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 31 | # data accessed through pointers (requires -restrict compiler flag) 32 | 33 | ifndef VERBOSE 34 | VERBOSE=0 35 | endif 36 | #description: default diagnostic style is silent 37 | 38 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 39 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0]" 45 | 46 | TUNEFLAGS = $(RESTRICTFLAG) $(VERBOSEFLAG) $(USERFLAGS) 47 | PROGRAM = branch 48 | OBJS = $(PROGRAM).o $(COMOBJS) func.o 49 | 50 | #default: usage 51 | # @echo " \"make veryclean\" to remove custom built files as well" 52 | 53 | include ../../common/make.common 54 | 55 | func.c: 56 | @echo "############################################################" 57 | @echo "##### No file func.c -- invoking func_gen to create it #####" 58 | @echo "############################################################" 59 | ./func_gen ${MATRIX_RANK} ${NUMBER_OF_FUNCTIONS} 60 | 61 | veryclean: 62 | @rm -f func.c ___* 63 | make clean 64 | -------------------------------------------------------------------------------- /MPI1/Branch/README: -------------------------------------------------------------------------------- 1 | Option INS_HEAVY of the Branching code requires a customized version 2 | of file func.c, which contains a variable number of functions of 3 | variable size. This file is built by invoking script "func_gen," 4 | which takes two integer input parameters. The first is the rank of 5 | the square matrices whose elements get initialized individually by 6 | one of the functions in func.c. Hence, the number of instructions 7 | associated with each such function is proportional to rank*rank. 8 | The second parameters is the number of functions created. These 9 | functions are all slightly different, but all have the same size. 10 | 11 | Usage: func_gen 12 | The function is invoked with default values 10 and 40 if no 13 | values for these variables are supplied on the make command line. 14 | 15 | -------------------------------------------------------------------------------- /MPI1/DGEMM/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = -lm 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef VERBOSE 21 | VERBOSE=0 22 | endif 23 | #description: default diagnostic style is silent 24 | 25 | ifndef BOFFSET 26 | BOFFSET=12 27 | endif 28 | #description: set this flag to some value to override default first array 29 | # dimension padding (12) of tiles used in non-MKL version 30 | 31 | PROGRAM = dgemm 32 | VERBOSEFLAG= -DVERBOSE=$(VERBOSE) 33 | OFFSETFLAG = -DBOFFSET=$(BOFFSET) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT \n\ 37 | BOFFSET=? override default first array dimension padding of tiles [12] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(OFFSETFLAG) $(VERBOSEFLAG) $(USERFLAGS) 41 | OBJS = $(PROGRAM).o $(COMOBJS) 42 | 43 | include ../../common/make.common 44 | -------------------------------------------------------------------------------- /MPI1/Nstream/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef VERBOSE 28 | VERBOSE=0 29 | endif 30 | #description: default diagnostic style is silent 31 | 32 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 33 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT \n\ 37 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 41 | PROGRAM = nstream 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /MPI1/PIC-static/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | COMOBJS += random_draw.o 3 | 4 | ##### User configurable options ##### 5 | 6 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 7 | #description: change above into something that is a decent optimization on you system 8 | 9 | #uncomment any of the following flags (and change values) to change defaults 10 | 11 | USERFLAGS = 12 | #description: parameter to specify optional flags 13 | 14 | #set the following variables for custom libraries and/or other objects 15 | EXTOBJS = 16 | LIBS = -lm 17 | LIBPATHS = 18 | INCLUDEPATHS = 19 | 20 | ### End User configurable options ### 21 | 22 | ifndef RESTRICT_KEYWORD 23 | RESTRICT_KEYWORD=0 24 | endif 25 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 26 | # data accessed through pointers (requires -restrict compiler flag) 27 | 28 | ifndef VERBOSE 29 | VERBOSE=0 30 | endif 31 | #description: default diagnostic style is silent 32 | 33 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 34 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 35 | 36 | OPTIONSSTRING="Make options:\n\ 37 | OPTION MEANING DEFAULT \n\ 38 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 39 | VERBOSE=0/1 omit/include verbose run information [0]" 40 | 41 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 42 | PROGRAM = pic 43 | OBJS = $(PROGRAM).o $(COMOBJS) 44 | 45 | include ../../common/make.common 46 | -------------------------------------------------------------------------------- /MPI1/Reduce/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 4 | #description: change above into something that is a decent optimization on you system 5 | 6 | #uncomment any of the following flags (and change values) to change defaults 7 | 8 | USERFLAGS = 9 | #description: parameter to specify optional flags 10 | 11 | EXTOBJS = 12 | LIBS = 13 | LIBPATHS = 14 | INCLUDEPATHS = 15 | 16 | ### End User configurable options ### 17 | 18 | ifndef RESTRICT_KEYWORD 19 | RESTRICT_KEYWORD=0 20 | endif 21 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 22 | # data accessed through pointers (requires -restrict compiler flag) 23 | 24 | ifndef VERBOSE 25 | VERBOSE=0 26 | endif 27 | #description: default diagnostic style is silent 28 | 29 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 30 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 31 | 32 | OPTIONSSTRING="Make options:\n\ 33 | OPTION MEANING DEFAULT\n\ 34 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 35 | VERBOSE=0/1 omit/include verbose run information [0]" 36 | 37 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 38 | PROGRAM = reduce 39 | OBJS = $(PROGRAM).o $(COMOBJS) 40 | 41 | include ../../common/make.common 42 | -------------------------------------------------------------------------------- /MPI1/Sparse/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef SCRAMBLE 21 | SCRAMBLE=1 22 | endif 23 | #description: if flag is true, grid indices are scrambled to produce irregular stride 24 | 25 | ifndef RESTRICT_KEYWORD 26 | RESTRICT_KEYWORD=0 27 | endif 28 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 29 | # data accessed through pointers (requires -restrict compiler flag) 30 | 31 | ifndef TESTDENSE 32 | TESTDENSE=0 33 | endif 34 | #description: if flag is set, sparse matrix will be embedded in dense matrix 35 | 36 | ifndef VERBOSE 37 | VERBOSE=0 38 | endif 39 | #description: default diagnostic style is silent 40 | 41 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 42 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 43 | SCRAMBLEFLAG= -DSCRAMBLE=$(SCRAMBLE) 44 | DENSEFLAG = -DTESTDENSE=$(TESTDENSE) 45 | 46 | OPTIONSSTRING="Make options:\n\ 47 | OPTION MEANING DEFAULT\n\ 48 | SCRAMBLE=0/1 regular/irregular sparsity pattern [1] \n\ 49 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 50 | TESTDENSE=0/1 store sparse matrix in sparse/dense formet [0] \n\ 51 | VERBOSE=0/1 omit/include verbose run information [0]" 52 | 53 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(SCRAMBLEFLAG) $(DENSEFLAG) \ 54 | $(RESTRICTFLAG) 55 | PROGRAM = sparse 56 | OBJS = $(PROGRAM).o $(COMOBJS) 57 | 58 | include ../../common/make.common 59 | -------------------------------------------------------------------------------- /MPI1/Sparse/README: -------------------------------------------------------------------------------- 1 | This program constructs a sparse matrix and performs a (parallel) 2 | sparse matrix-vector multiplication. The sparse matrix is built as 3 | follows. The standard star-shaped discretization stencil with a 4 | user-specified radius is applied to a structured 2-dimensional 5 | grid. Example of a stencil with radius r=2: 6 | 7 | 0 8 | | 9 | 0 10 | | 11 | 0--0--0--0--0 12 | | 13 | 0 14 | | 15 | 0 16 | 17 | Here, the `0' symbol signifies inclusion in the stencil. A square grid 18 | with linear dimension (2^n) has 2^(2n) = 4^n points. The resulting 19 | matrix has (4^n) rows and (4^n) columns, for a total of (16^n) 20 | elements. The user specifies n. The stencil is applied in a periodic 21 | fashion, i.e. it wraps around the edges of the grid. 22 | 23 | If the scramble flag is unset in the Makefile, the discretization 24 | stencil results in a regularly banded sparse matrix, which can be 25 | stored efficiently in vectors, in principle. If the scramble flag is 26 | maintained, the columns of the matrix are permuted, resulting in a 27 | general irregular sparse matrix, but with a known number of nonzeroes 28 | per row (4r+1). We use Compressed Row Storage for accessing the matrix 29 | elements, even in the case of an unset scramble flag. Numerical values 30 | of matrix elements are chosen judiciously to make verification 31 | easy. They do not correspond to any realistic discretization of a 32 | continuum problem. 33 | -------------------------------------------------------------------------------- /MPI1/Synch_global/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef VERBOSE 21 | VERBOSE=0 22 | endif 23 | #description: default diagnostic style is silent 24 | 25 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 26 | 27 | OPTIONSSTRING="Make options:\n\ 28 | OPTION MEANING DEFAULT\n\ 29 | VERBOSE=0/1 omit/include verbose run information [0]" 30 | 31 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) 32 | PROGRAM = global 33 | OBJS = $(PROGRAM).o $(COMOBJS) 34 | 35 | include ../../common/make.common 36 | -------------------------------------------------------------------------------- /MPI1/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef VERBOSE 27 | VERBOSE=0 28 | endif 29 | #description: default diagnostic style is silent 30 | 31 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 32 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 33 | 34 | OPTIONSSTRING="Make options:\n\ 35 | OPTION MEANING DEFAULT\n\ 36 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 37 | VERBOSE=0/1 omit/include verbose run information [0]" 38 | 39 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 40 | PROGRAM = p2p 41 | # objects below are the default, used by "clean," if invoked 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /MPI1/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef SYNCHRONOUS 28 | SYNCHRONOUS=0 29 | endif 30 | #description: turn on synchronous (blocking) communications 31 | 32 | ifndef VERBOSE 33 | VERBOSE=0 34 | endif 35 | #description: default diagnostic style is silent 36 | 37 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 38 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 39 | BLOCKFLAG = -DSYNCHRONOUS=$(SYNCHRONOUS) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0] \n\ 45 | SYNCHRONOUS=0/1 Use a/synchronous communications [0]" 46 | 47 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) $(BLOCKFLAG) 48 | PROGRAM = transpose 49 | OBJS = $(PROGRAM).o $(COMOBJS) 50 | 51 | include ../../common/make.common 52 | -------------------------------------------------------------------------------- /MPIOPENMP/Synch_p2p/README: -------------------------------------------------------------------------------- 1 | Use "mpiexec.hydra -np procs -f hostfile -ppn threads ./p2p threads iters xsize ysize" 2 | or try the run script runp2p instead. This script puts all threads of only one rank 3 | on each node, so may be suboptimal. 4 | -------------------------------------------------------------------------------- /MPIOPENMP/Synch_p2p/runp2p: -------------------------------------------------------------------------------- 1 | if [ $# -ne 6 ]; then 2 | echo "Usage: $0 <#mpi ranks><#threads><#iterations>" 3 | echo "If no hostfile, use \"none\"" 4 | exit 5 | fi 6 | 7 | procs=$1 8 | hostfile=$2 9 | threads=$3 10 | iterations=$4 11 | xsize=$5 12 | ysize=$6 13 | if [ $hostfile == "none" ]; then 14 | mpiexec.hydra -np $procs -ppn 1 ./p2p $threads $iterations $xsize $ysize 15 | else 16 | mpiexec.hydra -np $procs -f $hostfile -ppn 1 ./p2p $threads $iterations $xsize $ysize 17 | fi 18 | -------------------------------------------------------------------------------- /MPIRMA/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef VERBOSE 27 | VERBOSE=0 28 | endif 29 | #description: default diagnostic style is silent 30 | 31 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 32 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 33 | 34 | OPTIONSSTRING="Make options:\n\ 35 | OPTION MEANING DEFAULT\n\ 36 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 37 | VERBOSE=0/1 omit/include verbose run information [0]" 38 | 39 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 40 | PROGRAM = p2p 41 | # objects below are the default, used by "clean," if invoked 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /MPIRMA/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef VERBOSE 28 | VERBOSE=0 29 | endif 30 | #description: default diagnostic style is silent 31 | 32 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 33 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT\n\ 37 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 41 | PROGRAM = transpose 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /MPISHM/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef VERBOSE 27 | VERBOSE=0 28 | endif 29 | #description: default diagnostic style is silent 30 | 31 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 32 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 33 | 34 | OPTIONSSTRING="Make options:\n\ 35 | OPTION MEANING DEFAULT\n\ 36 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 37 | VERBOSE=0/1 omit/include verbose run information [0]" 38 | 39 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 40 | PROGRAM = p2p 41 | # objects below are the default, used by "clean," if invoked 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /MPISHM/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/MPI.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef SYNCHRONOUS 28 | SYNCHRONOUS=0 29 | endif 30 | #description: turn on synchronous (blocking) communications 31 | 32 | ifndef VERBOSE 33 | VERBOSE=0 34 | endif 35 | #description: default diagnostic style is silent 36 | 37 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 38 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 39 | BLOCKFLAG = -DSYNCHRONOUS=$(SYNCHRONOUS) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0] \n\ 45 | SYNCHRONOUS=0/1 Use a/synchronous communications [0]" 46 | 47 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) $(BLOCKFLAG) 48 | PROGRAM = transpose 49 | OBJS = $(PROGRAM).o $(COMOBJS) 50 | 51 | include ../../common/make.common 52 | -------------------------------------------------------------------------------- /OCTAVE/test_args.m: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env octave -qf 2 | 3 | printf("program name: %s\n", program_name()); 4 | arg_list = argv(); 5 | for i = 1:nargin 6 | printf("argument{%d}: %s\n", i, arg_list{i}); 7 | end 8 | printf("\n"); 9 | -------------------------------------------------------------------------------- /OPENMP/Branch/README: -------------------------------------------------------------------------------- 1 | Option INS_HEAVY of the Branching code requires a customized version 2 | of file func.c, which contains a variable number of functions of 3 | variable size. This file is built by invoking script "func_gen," 4 | which takes two integer input parameters. The first is the rank of 5 | the square matrices whose elements get initialized individually by 6 | one of the functions in func.c. Hence, the number of instructions 7 | associated with each such function is proportional to rank*rank. 8 | The second parameters is the number of functions created. These 9 | functions are all slightly different, but all have the same size. 10 | 11 | Usage: func_gen 12 | The function is invoked with default values 10 and 40 if no 13 | values for these variables are supplied on the make command line. 14 | 15 | -------------------------------------------------------------------------------- /OPENMP/PIC/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/OPENMP.defs 2 | COMOBJS += random_draw.o 3 | 4 | ##### User configurable options ##### 5 | #uncomment any of the following flags (or change values) to change defaults 6 | 7 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 8 | #description: change above into something that is a decent optimization on you system 9 | 10 | #RESTRICTFLAG = -DRESTRICT_KEYWORD 11 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 12 | # data accessed through pointers 13 | 14 | #DEBUGFLAG = -DVERBOSE 15 | #description: default diagnostic style is silent 16 | 17 | #NTHREADFLAG = -DMAXTHREADS=n 18 | #description: default thread limit is 256 19 | 20 | USERFLAGS = 21 | #description: parameter to specify optional flags 22 | 23 | #set the following variables for custom libraries and/or other objects 24 | EXTOBJS = 25 | LIBS = -lm 26 | LIBPATHS = 27 | INCLUDEPATHS = 28 | 29 | ### End User configurable options ### 30 | TUNEFLAGS = $(RESTRICTFLAG) $(DEBUGFLAG) $(USERFLAGS) $(NTHREADFLAG) 31 | PROGRAM = pic 32 | OBJS = $(PROGRAM).o $(COMOBJS) 33 | 34 | include ../../common/make.common 35 | 36 | -------------------------------------------------------------------------------- /OPENMP/Reduce/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/OPENMP.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | EXTOBJS = 13 | LIBS = 14 | LIBPATHS = 15 | INCLUDEPATHS = 16 | 17 | ### End User configurable options ### 18 | 19 | ifndef RESTRICT_KEYWORD 20 | RESTRICT_KEYWORD=0 21 | endif 22 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 23 | # data accessed through pointers (requires -restrict compiler flag) 24 | 25 | ifndef MAXTHREADS 26 | MAXTHREADS=512 27 | endif 28 | #description: default thread limit is 512 29 | 30 | ifndef VERBOSE 31 | VERBOSE=0 32 | endif 33 | #description: default diagnostic style is silent 34 | 35 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 36 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 37 | NTHREADFLAG = -DMAXTHREADS=$(MAXTHREADS) 38 | 39 | OPTIONSSTRING="Make options:\n\ 40 | OPTION MEANING DEFAULT\n\ 41 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 42 | MAXTHREADS=? set maximum number of OpenMP threads [512]\n\ 43 | VERBOSE=0/1 omit/include verbose run information [0]" 44 | 45 | TUNEFLAGS = $(VERBOSEFLAG) $(NTHREADFLAG) $(USERFLAGS)\ 46 | $(RESTRICTFLAG) 47 | PROGRAM = reduce 48 | OBJS = $(PROGRAM).o $(COMOBJS) 49 | 50 | include ../../common/make.common 51 | -------------------------------------------------------------------------------- /OPENMP/Sparse/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/OPENMP.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef SCRAMBLE 21 | SCRAMBLE=1 22 | endif 23 | #description: if flag is true, grid indices are scrambled to produce irregular stride 24 | 25 | ifndef RESTRICT_KEYWORD 26 | RESTRICT_KEYWORD=0 27 | endif 28 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 29 | # data accessed through pointers (requires -restrict compiler flag) 30 | 31 | ifndef MAXTHREADS 32 | MAXTHREADS=512 33 | endif 34 | #description: default thread limit is 512 35 | 36 | ifndef VERBOSE 37 | VERBOSE=0 38 | endif 39 | #description: default diagnostic style is silent 40 | 41 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 42 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 43 | NTHREADFLAG = -DMAXTHREADS=$(MAXTHREADS) 44 | SCRAMBLEFLAG= -DSCRAMBLE=$(SCRAMBLE) 45 | 46 | OPTIONSSTRING="Make options:\n\ 47 | OPTION MEANING DEFAULT\n\ 48 | SCRAMBLE=0/1 regular/irregular sparsity pattern [1] \n\ 49 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 50 | MAXTHREADS=? set maximum number of OpenMP threads [512]\n\ 51 | VERBOSE=0/1 omit/include verbose run information [0]" 52 | 53 | TUNEFLAGS = $(VERBOSEFLAG) $(NTHREADFLAG) $(USERFLAGS) $(SCRAMBLEFLAG) \ 54 | $(RESTRICTFLAG) 55 | PROGRAM = sparse 56 | OBJS = $(PROGRAM).o $(COMOBJS) 57 | 58 | include ../../common/make.common 59 | -------------------------------------------------------------------------------- /OPENMP/Sparse/README: -------------------------------------------------------------------------------- 1 | This program constructs a sparse matrix and performs a (parallel) 2 | sparse matrix-vector multiplication. The sparse matrix is built as 3 | follows. The standard star-shaped discretization stencil with a 4 | user-specified radius is applied to a structured 2-dimensional 5 | grid. Example of a stencil with radius r=2: 6 | 7 | 0 8 | | 9 | 0 10 | | 11 | 0--0--0--0--0 12 | | 13 | 0 14 | | 15 | 0 16 | 17 | Here, the `0' symbol signifies inclusion in the stencil. A square grid 18 | with linear dimension (2^n) has 2^(2n) = 4^n points. The resulting 19 | matrix has (4^n) rows and (4^n) columns, for a total of (16^n) 20 | elements. The user specifies n. The stencil is applied in a periodic 21 | fashion, i.e. it wraps around the edges of the grid. 22 | 23 | If the scramble flag is unset in the Makefile, the discretization 24 | stencil results in a regularly banded sparse matrix, which can be 25 | stored efficiently in vectors, in principle. If the scramble flag is 26 | maintained, the columns of the matrix are permuted, resulting in a 27 | general irregular sparse matrix, but with a known number of nonzeroes 28 | per row (4r+1). We use Compressed Row Storage for accessing the matrix 29 | elements, even in the case of an unset scramble flag. Numerical values 30 | of matrix elements are chosen judiciously to make verification 31 | easy. They do not correspond to any realistic discretization of a 32 | continuum problem. 33 | -------------------------------------------------------------------------------- /OPENMP/Synch_global/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/OPENMP.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef MAXTHREADS 21 | MAXTHREADS=512 22 | endif 23 | #description: default thread limit is 512 24 | 25 | ifndef VERBOSE 26 | VERBOSE=0 27 | endif 28 | #description: default diagnostic style is silent 29 | 30 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 31 | NTHREADFLAG = -DMAXTHREADS=$(MAXTHREADS) 32 | 33 | OPTIONSSTRING="Make options:\n\ 34 | OPTION MEANING DEFAULT\n\ 35 | MAXTHREADS=? set maximum number of OpenMP threads [512]\n\ 36 | VERBOSE=0/1 omit/include verbose run information [0]" 37 | 38 | TUNEFLAGS = $(VERBOSEFLAG) $(NTHREADFLAG) $(USERFLAGS) 39 | PROGRAM = global 40 | OBJS = $(PROGRAM).o $(COMOBJS) 41 | 42 | include ../../common/make.common 43 | -------------------------------------------------------------------------------- /PYTHON/README.md: -------------------------------------------------------------------------------- 1 | # How to run 2 | 3 | ## mpi4py 4 | 5 | ``` 6 | mpiexec -n 4 python3 -m mpi4py nstream-numpy-mpi.py 10 10000000 7 | mpiexec -n 4 python3 -m mpi4py transpose-numpy-mpi.py 10 1000 8 | ``` 9 | 10 | On Mac with Homebrew, this might work better: 11 | 12 | ``` 13 | mpiexec -n 4 ./nstream-numpy-mpi.py 10 10000000 14 | mpiexec -n 4 ./transpose-numpy-mpi.py 10 1000 15 | ``` 16 | 17 | ## shmem4py 18 | 19 | Checkout shmem4py and build against e.g. SOS like this: 20 | ``` 21 | $ export OSHCC=oshcc 22 | $ python3 -m pip install . 23 | ``` 24 | 25 | Run like this: 26 | ``` 27 | $ oshrun -n 4 python3 nstream-numpy-shmem.py 10 10000000 28 | Parallel Research Kernels version 29 | Python SHMEM/NumPy STREAM triad: A = B + scalar * C 30 | Number of ranks = 4 31 | Number of iterations = 10 32 | Vector length = 10000000 33 | Solution validates 34 | Rate (MB/s): 22345.12038433607 Avg time (s): 0.0143208 35 | ``` 36 | -------------------------------------------------------------------------------- /RUST/Makefile: -------------------------------------------------------------------------------- 1 | # Enable verbose printing 2 | #RCFLAGS += --cfg "VERBOSE" 3 | 4 | # This is now a runtime option 5 | # Stencil radius 6 | #RCFLAGS += --cfg radius="$(RADIUS)" 7 | 8 | # Stencil shape: star is default, uncomment to switch to grid 9 | #RCFLAGS += --cfg grid 10 | 11 | RCFLAGS += --release 12 | 13 | .PHONY: all clean 14 | 15 | all: 16 | cd nstream && cargo build $(RCFLAGS) 17 | cd nstream-unsafe && cargo build $(RCFLAGS) 18 | cd nstream-iter && cargo build $(RCFLAGS) 19 | cd nstream-rayon && cargo build $(RCFLAGS) 20 | cd p2p && cargo build $(RCFLAGS) 21 | cd stencil && cargo build $(RCFLAGS) 22 | cd transpose && cargo build $(RCFLAGS) 23 | cd transpose-iter && cargo build $(RCFLAGS) 24 | cd transpose-rayon && cargo build $(RCFLAGS) 25 | cd dgemm && cargo build $(RCFLAGS) 26 | cd dgemm-blis && cargo build $(RCFLAGS) 27 | cd dgemm-iter && cargo build $(RCFLAGS) 28 | cd dgemm-rayon && cargo build $(RCFLAGS) 29 | cd pic && cargo build $(RCFLAGS) 30 | clean: 31 | cd nstream && cargo clean 32 | cd nstream-unsafe && cargo clean 33 | cd nstream-iter && cargo clean 34 | cd nstream-rayon && cargo clean 35 | cd p2p && cargo clean 36 | cd stencil && cargo clean 37 | cd transpose && cargo clean 38 | cd transpose-iter && cargo clean 39 | cd transpose-rayon && cargo clean 40 | cd dgemm && cargo clean 41 | cd dgemm-blis && cargo clean 42 | cd dgemm-iter && cargo clean 43 | cd dgemm-rayon && cargo clean 44 | cd pic && cargo clean 45 | -------------------------------------------------------------------------------- /RUST/common/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "common" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Christian Asch "] 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /RUST/dgemm-blis/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dgemm-blis" 3 | version = "0.5.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition="2021" 7 | 8 | [dependencies] 9 | cblas = "0.4" 10 | blas-src = { version = "0.8", features = ["blis"] } 11 | -------------------------------------------------------------------------------- /RUST/dgemm-iter/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dgemm-iter" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition = "2021" 7 | -------------------------------------------------------------------------------- /RUST/dgemm-rayon/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dgemm-rayon" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition = "2021" 7 | 8 | [dependencies] 9 | rayon = "1.5" 10 | -------------------------------------------------------------------------------- /RUST/dgemm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dgemm" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition="2021" 7 | -------------------------------------------------------------------------------- /RUST/legacy/Makefile: -------------------------------------------------------------------------------- 1 | include ../common/RUST.defs 2 | include ../common/PRKVERSION 3 | 4 | ifndef RADIUS 5 | RADIUS=2 6 | endif 7 | 8 | RUSTC = rustc 9 | RCFLAGS = -g 10 | 11 | # Enable verbose printing 12 | #RCFLAGS += --cfg "VERBOSE" 13 | 14 | # This is now a runtime option 15 | # Stencil radius 16 | #RCFLAGS += --cfg radius="$(RADIUS)" 17 | 18 | # Stencil shape: star is default, uncomment to switch to grid 19 | #RCFLAGS += --cfg grid 20 | 21 | .PHONY: all clean run 22 | 23 | all: p2p stencil transpose 24 | 25 | %: %.rs 26 | $(RUSTC) $(RCFLAGS) $< -o $@ 27 | 28 | clean: 29 | -rm -f *.o 30 | -rm -f *.optrpt 31 | -rm -f *.dwarf 32 | -rm -rf *.dSYM 33 | -rm -f p2p stencil transpose 34 | 35 | -------------------------------------------------------------------------------- /RUST/nstream-iter/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nstream-iter" 3 | version = "0.1.0" 4 | edition="2021" 5 | authors = ["Jeff Hammond ", "Thomas Hayward-Schneider "] 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /RUST/nstream-rayon/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nstream-rayon" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Thomas Hayward-Schneider ", "Sajid Ali "] 5 | 6 | edition = "2021" 7 | 8 | [dependencies] 9 | rayon = "1.5" 10 | -------------------------------------------------------------------------------- /RUST/nstream-unsafe/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nstream-unsafe" 3 | version = "0.1.0" 4 | edition="2021" 5 | authors = ["Jeff Hammond ", "Thomas Hayward-Schneider "] 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /RUST/nstream/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nstream" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Jeff Hammond "] 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /RUST/p2p/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "p2p" 3 | version = "0.1.0" 4 | edition="2021" 5 | authors = ["Jeff Hammond "] 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /RUST/pic/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pic" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Christian Asch "] 6 | 7 | [dependencies] 8 | clap = { version = "4.5.32", features = ["derive"] } 9 | common = { version = "0.1.0", path = "../common" } 10 | -------------------------------------------------------------------------------- /RUST/stencil/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stencil" 3 | version = "0.1.0" 4 | edition="2021" 5 | authors = ["Jeff Hammond "] 6 | 7 | [dependencies] 8 | -------------------------------------------------------------------------------- /RUST/transpose-iter/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "transpose-iter" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition = "2021" 7 | -------------------------------------------------------------------------------- /RUST/transpose-rayon/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "transpose-rayon" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition = "2021" 7 | 8 | [dependencies] 9 | rayon = "1.5" 10 | -------------------------------------------------------------------------------- /RUST/transpose/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "transpose" 3 | version = "0.1.0" 4 | authors = ["Jeff Hammond ", "Sajid Ali "] 5 | 6 | edition = "2021" 7 | -------------------------------------------------------------------------------- /SCALA/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: nstream.class p2p.class stencil.class transpose.class 3 | 4 | %.class: %.scala 5 | scalac -deprecation $< 6 | 7 | clean: 8 | rm -f transpose*.class 9 | rm -f stencil*.class 10 | rm -f nstream*.class 11 | rm -f p2p*.class 12 | 13 | -------------------------------------------------------------------------------- /SCALA/README.md: -------------------------------------------------------------------------------- 1 | # How to build 2 | 3 | Just type `make` 4 | 5 | # How to run 6 | 7 | ``` 8 | JAVA_OPTS="-Xmx4G" scala -nc nstream 10 $((1024*1024*64)) 9 | ``` 10 | 11 | Note that the environmental variable `JAVA_OPTS` sets the maximum memory 12 | used by Java to 4G, which is probably acceptable for most use cases. 13 | The default is quite low and will not allow you to run nstream with 14 | more than ~16MW. 15 | 16 | If you're interested in running in a script mode, simple specify the 17 | file name of the source code. 18 | 19 | ``` 20 | JAVA_OPTS="-Xmx4G" scala -nc nstream.scala 10 $((1024*1024*64)) 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /SERIAL/Branch/Makefile: -------------------------------------------------------------------------------- 1 | ifndef NUMBER_OF_FUNCTIONS 2 | NUMBER_OF_FUNCTIONS=40 3 | endif 4 | 5 | ifndef MATRIX_RANK 6 | MATRIX_RANK=10 7 | endif 8 | 9 | include ../../common/SERIAL.defs 10 | ##### User configurable options ##### 11 | 12 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 13 | #description: change above into something that is a decent optimization on you system 14 | 15 | #uncomment any of the following flags (and change values) to change defaults 16 | 17 | USERFLAGS = 18 | #description: parameter to specify optional flags 19 | 20 | EXTOBJS = 21 | LIBS = 22 | LIBPATHS = 23 | INCLUDEPATHS = 24 | 25 | ### End User configurable options ### 26 | 27 | ifndef RESTRICT_KEYWORD 28 | RESTRICT_KEYWORD=0 29 | endif 30 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 31 | # data accessed through pointers (requires -restrict compiler flag) 32 | 33 | ifndef VERBOSE 34 | VERBOSE=0 35 | endif 36 | #description: default diagnostic style is silent 37 | 38 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 39 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 40 | 41 | OPTIONSSTRING="Make options:\n\ 42 | OPTION MEANING DEFAULT\n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0]" 45 | 46 | TUNEFLAGS = $(RESTRICTFLAG) $(VERBOSEFLAG) $(USERFLAGS) 47 | PROGRAM = branch 48 | OBJS = $(PROGRAM).o $(COMOBJS) func.o 49 | 50 | include ../../common/make.common 51 | 52 | func.c: 53 | @echo "############################################################" 54 | @echo "##### No file func.c -- invoking func_gen to create it #####" 55 | @echo "############################################################" 56 | ./func_gen ${MATRIX_RANK} ${NUMBER_OF_FUNCTIONS} 57 | 58 | veryclean: 59 | @rm -f func.c ___* 60 | make clean 61 | -------------------------------------------------------------------------------- /SERIAL/Branch/README: -------------------------------------------------------------------------------- 1 | Option INS_HEAVY of the Branching code requires a customized version 2 | of file func.c, which contains a variable number of functions of 3 | variable size. This file is built by invoking script "func_gen," 4 | which takes two integer input parameters. The first is the rank of 5 | the square matrices whose elements get initialized individually by 6 | one of the functions in func.c. Hence, the number of instructions 7 | associated with each such function is proportional to rank*rank. 8 | The second parameters is the number of functions created. These 9 | functions are all slightly different, but all have the same size. 10 | 11 | Usage: func_gen 12 | The function is invoked with default values 10 and 40 if no 13 | values for these variables are supplied on the make command line. 14 | 15 | -------------------------------------------------------------------------------- /SERIAL/PIC/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SERIAL.defs 2 | COMOBJS += random_draw.o 3 | 4 | ##### User configurable options ##### 5 | 6 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 7 | #description: change above into something that is a decent optimization on you system 8 | 9 | #uncomment any of the following flags (and change values) to change defaults 10 | 11 | USERFLAGS = 12 | #description: parameter to specify optional flags 13 | 14 | #set the following variables for custom libraries and/or other objects 15 | EXTOBJS = 16 | LIBS = -lm 17 | LIBPATHS = 18 | INCLUDEPATHS = 19 | 20 | ### End User configurable options ### 21 | 22 | ifndef RESTRICT_KEYWORD 23 | RESTRICT_KEYWORD=0 24 | endif 25 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 26 | # data accessed through pointers (requires -restrict compiler flag) 27 | 28 | ifndef VERBOSE 29 | VERBOSE=0 30 | endif 31 | #description: default diagnostic style is silent 32 | 33 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 34 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 35 | 36 | OPTIONSSTRING="Make options:\n\ 37 | OPTION MEANING DEFAULT \n\ 38 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 39 | VERBOSE=0/1 omit/include verbose run information [0]" 40 | 41 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 42 | PROGRAM = pic 43 | OBJS = $(PROGRAM).o $(COMOBJS) 44 | 45 | include ../../common/make.common 46 | -------------------------------------------------------------------------------- /SERIAL/Reduce/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SERIAL.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | EXTOBJS = 13 | LIBS = 14 | LIBPATHS = 15 | INCLUDEPATHS = 16 | 17 | ### End User configurable options ### 18 | 19 | ifndef RESTRICT_KEYWORD 20 | RESTRICT_KEYWORD=0 21 | endif 22 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 23 | # data accessed through pointers (requires -restrict compiler flag) 24 | 25 | ifndef VERBOSE 26 | VERBOSE=0 27 | endif 28 | #description: default diagnostic style is silent 29 | 30 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 31 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 32 | 33 | OPTIONSSTRING="Make options:\n\ 34 | OPTION MEANING DEFAULT\n\ 35 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 36 | VERBOSE=0/1 omit/include verbose run information [0]" 37 | 38 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 39 | PROGRAM = reduce 40 | OBJS = $(PROGRAM).o $(COMOBJS) 41 | 42 | include ../../common/make.common 43 | -------------------------------------------------------------------------------- /SERIAL/Sparse/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SERIAL.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef SCRAMBLE 21 | SCRAMBLE=1 22 | endif 23 | #description: if flag is true, grid indices are scrambled to produce irregular stride 24 | 25 | ifndef RESTRICT_KEYWORD 26 | RESTRICT_KEYWORD=0 27 | endif 28 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 29 | # data accessed through pointers (requires -restrict compiler flag) 30 | 31 | ifndef VERBOSE 32 | VERBOSE=0 33 | endif 34 | #description: default diagnostic style is silent 35 | 36 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 37 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 38 | SCRAMBLEFLAG= -DSCRAMBLE=$(SCRAMBLE) 39 | 40 | OPTIONSSTRING="Make options:\n\ 41 | OPTION MEANING DEFAULT\n\ 42 | SCRAMBLE=0/1 regular/irregular sparsity pattern [1] \n\ 43 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0]" 45 | 46 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(SCRAMBLEFLAG) $(RESTRICTFLAG) 47 | PROGRAM = sparse 48 | OBJS = $(PROGRAM).o $(COMOBJS) 49 | 50 | include ../../common/make.common 51 | -------------------------------------------------------------------------------- /SERIAL/Sparse/README: -------------------------------------------------------------------------------- 1 | This program constructs a sparse matrix and performs a (parallel) 2 | sparse matrix-vector multiplication. The sparse matrix is built as 3 | follows. The standard star-shaped discretization stencil with a 4 | user-specified radius is applied to a structured 2-dimensional 5 | grid. Example of a stencil with radius r=2: 6 | 7 | 0 8 | | 9 | 0 10 | | 11 | 0--0--0--0--0 12 | | 13 | 0 14 | | 15 | 0 16 | 17 | Here, the `0' symbol signifies inclusion in the stencil. A square grid 18 | with linear dimension (2^n) has 2^(2n) = 4^n points. The resulting 19 | matrix has (4^n) rows and (4^n) columns, for a total of (16^n) 20 | elements. The user specifies n. The stencil is applied in a periodic 21 | fashion, i.e. it wraps around the edges of the grid. 22 | 23 | If the scramble flag is unset in the Makefile, the discretization 24 | stencil results in a regularly banded sparse matrix, which can be 25 | stored efficiently in vectors, in principle. If the scramble flag is 26 | maintained, the columns of the matrix are permuted, resulting in a 27 | general irregular sparse matrix, but with a known number of nonzeroes 28 | per row (4r+1). We use Compressed Row Storage for accessing the matrix 29 | elements, even in the case of an unset scramble flag. Numerical values 30 | of matrix elements are chosen judiciously to make verification 31 | easy. They do not correspond to any realistic discretization of a 32 | continuum problem. 33 | -------------------------------------------------------------------------------- /SERIAL/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SERIAL.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | #uncomment any of the following flags (and change values) to change defaults 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef VERBOSE 27 | VERBOSE=0 28 | endif 29 | #description: default diagnostic style is silent 30 | 31 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 32 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 33 | 34 | OPTIONSSTRING="Make options:\n\ 35 | OPTION MEANING DEFAULT\n\ 36 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 37 | VERBOSE=0/1 omit/include verbose run information [0]" 38 | 39 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 40 | PROGRAM = p2p 41 | # objects below are the default, used by "clean," if invoked 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /SERIAL/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SERIAL.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef VERBOSE 28 | VERBOSE=0 29 | endif 30 | #description: default diagnostic style is silent 31 | 32 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 33 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT\n\ 37 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0]" 39 | 40 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) $(OPTFLAGS) 41 | PROGRAM = transpose 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /SHMEM/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SHMEM.defs 2 | ##### User configurable options ##### 3 | 4 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 5 | #description: change above into something that is a decent optimization on you system 6 | 7 | USERFLAGS = 8 | #description: parameter to specify optional flags 9 | 10 | #set the following variables for custom libraries and/or other objects 11 | 12 | EXTOBJS = 13 | LIBS = -lm 14 | LIBPATHS = 15 | INCLUDEPATHS = 16 | 17 | ### End User configurable options ### 18 | 19 | ifndef RESTRICT_KEYWORD 20 | RESTRICT_KEYWORD=0 21 | endif 22 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 23 | # data accessed through pointers (requires -restrict compiler flag) 24 | 25 | ifndef VERBOSE 26 | VERBOSE=0 27 | endif 28 | #description: default diagnostic style is silent 29 | 30 | ifndef SYNCHRONOUS 31 | SYNCHRONOUS=0 32 | endif 33 | #description: default handshake between threads is off 34 | 35 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 36 | RESTRICTFLAG = -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 37 | SYNCHFLAG = -DSYNCHRONOUS=$(SYNCHRONOUS) 38 | 39 | OPTIONSSTRING="Make options:\n\ 40 | OPTION MEANING DEFAULT\n\ 41 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 42 | VERBOSE=0/1 omit/include verbose run information [0] \n\ 43 | SYNCHRONOUS=0/1 enable/disable handshake between threads [0]" 44 | 45 | 46 | TUNEFLAGS = $(DEBUGFLAG) $(USERFLAGS) $(SYNCHFLAG) $(VERBOSEFLAG)\ 47 | $(RESTRICTFLAG) 48 | PROGRAM = p2p 49 | OBJS = $(PROGRAM).o $(COMOBJS) 50 | 51 | include ../../common/make.common 52 | -------------------------------------------------------------------------------- /SHMEM/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/SHMEM.defs 2 | 3 | ##### User configurable options ##### 4 | #uncomment any of the following flags (and change values) to change defaults 5 | 6 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 7 | #description: change above into something that is a decent optimization on you system 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef BARRIER_SYNCH 21 | BARRIER_SYNCH=0 22 | endif 23 | #description: default is not to use a barrier but point to point synchronization 24 | 25 | ifndef VERBOSE 26 | VERBOSE=0 27 | endif 28 | #description: default diagnostic style is silent 29 | 30 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 31 | SYNCHFLAG = -DBARRIER_SYNCH=$(BARRIER_SYNCH) 32 | 33 | OPTIONSSTRING="Make options:\n\ 34 | OPTION MEANING DEFAULT\n\ 35 | BARRIER_SYNCH=0/1 use point-to-point/barrier synchronization [0] \n\ 36 | VERBOSE=0/1 omit/include verbose run information [0]" 37 | 38 | TUNEFLAGS = $(VERBOSEFLAG) $(SYNCHFLAG) 39 | PROGRAM = transpose 40 | OBJS = $(PROGRAM).o $(COMOBJS) 41 | 42 | include ../../common/make.common 43 | 44 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | We strongly prefer to engage users via GitHub. This 2 | automatically preserves useful information for posterity 3 | and provides a natural way to link to specific code. 4 | 5 | Please create GitHub issues to request help of any kind, 6 | including questions about the project. 7 | 8 | If you prefer to communicate wtih the PRK community via 9 | email, use parallel-research-kernels@googlegroups.com. 10 | This list includes dozens of people who have an interest 11 | in the project, not just the developers. It is a good 12 | forum for asking general questions, the answers to which 13 | may be of interest to many users. 14 | 15 | If GitHub issues does not work for you, you can email the 16 | developers directly. If all else fails, you can find 17 | these in the Google Group history (see above). 18 | -------------------------------------------------------------------------------- /UPC/README.md: -------------------------------------------------------------------------------- 1 | # Aligned memory 2 | 3 | Berkeley UPC provides page-aligned memory by default. 4 | One can modify this with the runtime environment variable `UPC_SHARED_ALLOC_ALIGN`. 5 | 6 | See [`man upcrun`](http://upc.lbl.gov/docs/user/upcrun.html) for details. 7 | -------------------------------------------------------------------------------- /UPC/Synch_p2p/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/UPC.defs 2 | 3 | ##### User configurable options ##### 4 | #uncomment any of the following flags (and change values) to change defaults 5 | 6 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 7 | #description: change above into something that is a decent optimization on you system 8 | 9 | USERFLAGS = 10 | #description: parameter to specify optional flags 11 | 12 | #set the following variables for custom libraries and/or other objects 13 | EXTOBJS = 14 | LIBS = 15 | LIBPATHS = 16 | INCLUDEPATHS = 17 | 18 | ### End User configurable options ### 19 | 20 | ifndef RESTRICT_KEYWORD 21 | RESTRICT_KEYWORD=0 22 | endif 23 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 24 | # data accessed through pointers (requires -restrict compiler flag) 25 | 26 | ifndef USE_BUPC_EXT 27 | USE_BUPC_EXT=0 28 | endif 29 | #description: default is to not enable BUPC extensions 30 | 31 | ifndef VERBOSE 32 | VERBOSE=0 33 | endif 34 | #description: default diagnostic style is silent 35 | 36 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 37 | RESTRICTFLAG = -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 38 | BUPCFLAG = -DUSE_BUPC_EXT=$(USE_BUPC_EXT) 39 | 40 | OPTIONSSTRING="Make options:\n\ 41 | OPTION MEANING DEFAULT\n\ 42 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 43 | USE_BUPC_EXT=0/1 disable/enable Berkely UPC extensions [0] \n\ 44 | VERBOSE=0/1 omit/include verbose run information [0]" 45 | 46 | TUNEFLAGS = $(RESTRICTFLAG) $(VERBOSEFLAG)$(USERFLAGS) $(BUPCFLAG) 47 | PROGRAM = p2p 48 | OBJS = $(PROGRAM).o $(COMOBJS) 49 | 50 | include ../../common/make.common 51 | -------------------------------------------------------------------------------- /UPC/Transpose/Makefile: -------------------------------------------------------------------------------- 1 | include ../../common/UPC.defs 2 | 3 | ##### User configurable options ##### 4 | 5 | OPTFLAGS = $(DEFAULT_OPT_FLAGS) 6 | #description: change above into something that is a decent optimization on you system 7 | 8 | #uncomment any of the following flags (and change values) to change defaults 9 | 10 | USERFLAGS = 11 | #description: parameter to specify optional flags 12 | 13 | #set the following variables for custom libraries and/or other objects 14 | EXTOBJS = 15 | LIBS = 16 | LIBPATHS = 17 | INCLUDEPATHS = 18 | 19 | ### End User configurable options ### 20 | 21 | ifndef RESTRICT_KEYWORD 22 | RESTRICT_KEYWORD=0 23 | endif 24 | #description: the "restrict" keyword can be used on IA platforms to disambiguate 25 | # data accessed through pointers (requires -restrict compiler flag) 26 | 27 | ifndef VERBOSE 28 | VERBOSE=0 29 | endif 30 | #description: default diagnostic style is silent 31 | 32 | VERBOSEFLAG = -DVERBOSE=$(VERBOSE) 33 | RESTRICTFLAG= -DRESTRICT_KEYWORD=$(RESTRICT_KEYWORD) 34 | 35 | OPTIONSSTRING="Make options:\n\ 36 | OPTION MEANING DEFAULT\n\ 37 | RESTRICT_KEYWORD=0/1 disable/enable restrict keyword (aliasing) [0] \n\ 38 | VERBOSE=0/1 omit/include verbose run information [0] \n\ 39 | 40 | TUNEFLAGS = $(VERBOSEFLAG) $(USERFLAGS) $(RESTRICTFLAG) 41 | PROGRAM = transpose 42 | OBJS = $(PROGRAM).o $(COMOBJS) 43 | 44 | include ../../common/make.common 45 | -------------------------------------------------------------------------------- /ci/install-boost.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | case "$os" in 10 | Darwin) 11 | brew install boost || brew upgrade boost || true 12 | ;; 13 | 14 | Linux) 15 | sudo apt-get install libboost-all-dev 16 | # We do not test Boost.Compute on Linux because of OpenCL issues... 17 | # Boost.Compute is a header-only library 18 | #git clone --depth 1 https://github.com/kylelutz/compute.git ${CI_ROOT}/compute 19 | #git clone --depth 1 https://github.com/boostorg/compute.git ${CI_ROOT}/compute 20 | ;; 21 | esac 22 | -------------------------------------------------------------------------------- /ci/install-chapel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | os=`uname` 9 | 10 | if [ "$os" = "Darwin" ] || [ "${CHPL_COMM}" = "none" ] ; then 11 | echo "Mac single-locale" 12 | brew install chapel || brew upgrade chapel 13 | brew test chapel 14 | else 15 | # We could test Clang via the C back-end as well, but it seems silly. 16 | # Let GCC exercise C back-end and test the LLVM back-end for Clang. 17 | if [ "${CC}" = "clang" ] || [ "${CXX}" = "clang++" ] ; then 18 | CHPL_LLVM=llvm 19 | fi 20 | cd $CI_ROOT 21 | wget -q --no-check-certificate https://github.com/chapel-lang/chapel/releases/download/1.12.0/chapel-1.12.0.tar.gz 22 | tar -xzf chapel-1.12.0.tar.gz 23 | ln -s chapel-1.12.0 chapel 24 | cd chapel 25 | make 26 | ln -s `find $PWD -type f -name chpl` $CI_HOME/bin/chpl 27 | fi 28 | -------------------------------------------------------------------------------- /ci/install-charm++.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | # charm++ or AMPI 9 | RUNTIME="$2" 10 | 11 | # not here: pull this out of Travis environment 12 | #CHARM_CONDUIT="$3" 13 | 14 | # unused for now 15 | case "$os" in 16 | Linux) 17 | CHARM_OS=linux 18 | ;; 19 | Darwin) 20 | CHARM_OS=darwin 21 | ;; 22 | esac 23 | 24 | # unused for now 25 | case "$CHARM_CONDUIT" in 26 | multicore) 27 | CHARM_CONDUIT_OPTIONS="multicore-linux64" 28 | ;; 29 | netlrts) 30 | CHARM_CONDUIT_OPTIONS="netlrts-$CHARM_OS-x86_64" 31 | ;; 32 | netlrts-smp) 33 | CHARM_CONDUIT_OPTIONS="netlrts-$CHARM_OS-x86_64 smp" 34 | ;; 35 | esac 36 | 37 | if [ ! -d "$CI_ROOT/charm" ]; then 38 | cd $CI_ROOT 39 | git clone --depth 1 -b v6.8.0 https://charm.cs.illinois.edu/gerrit/charm.git charm 40 | cd charm 41 | case "$os" in 42 | Darwin) 43 | echo "Mac" 44 | #./build $RUNTIME netlrts-darwin-x86_64 --with-production -j4 45 | ./build $RUNTIME netlrts-darwin-x86_64 smp --with-production -j4 46 | ;; 47 | 48 | Linux) 49 | echo "Linux" 50 | # This fails with: The authenticity of host 'localhost (127.0.0.1)' can't be established. 51 | #./build $RUNTIME netlrts-linux-x86_64 --with-production -j4 52 | ./build $RUNTIME netlrts-linux-x86_64 smp --with-production 53 | #./build $RUNTIME multicore-linux64 --with-production 54 | ;; 55 | esac 56 | else 57 | echo "Charm++ or AMPI already installed..." 58 | case "$RUNTIME" in 59 | AMPI) 60 | find $CI_ROOT/charm -name charmrun 61 | find $CI_ROOT/charm -name ampicc 62 | ;; 63 | charm++) 64 | find $CI_ROOT/charm -name charmrun 65 | find $CI_ROOT/charm -name charmc 66 | ;; 67 | esac 68 | fi 69 | -------------------------------------------------------------------------------- /ci/install-clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ "${CC}" = "clang" ] || [ "${CXX}" = "clang++" ] ; then 9 | os=`uname` 10 | case "$os" in 11 | Darwin) 12 | echo "Mac" 13 | brew install llvm || brew upgrade llvm || true 14 | #brew install libomp || brew upgrade libomp || true 15 | ;; 16 | Linux) 17 | echo "Linux Clang/LLVM builds not supported!" 18 | set +e 19 | for v in "-11" "-10" "-9" "-8" "-7" ; do 20 | sudo apt-get install clang$v && sudo apt-get install libomp$v-dev 21 | if [ -f /usr/lib/llvm$v/bin/clang-$v ] && [ -f /usr/lib/llvm$v/lib/libomp.so ] ; then 22 | /usr/lib/llvm$v/bin/clang-$ -v 23 | break 24 | fi 25 | done 26 | set -e 27 | ;; 28 | esac 29 | fi 30 | -------------------------------------------------------------------------------- /ci/install-cmake.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | case "$os" in 10 | Darwin) 11 | echo "Mac" 12 | brew upgrade cmake || brew install cmake || true 13 | #brew list cmake 14 | ;; 15 | 16 | Linux) 17 | echo "Linux" 18 | if [ ! -d "$CI_ROOT/cmake" ]; then 19 | mkdir -p $CI_ROOT/cmake 20 | # from source 21 | #wget --no-check-certificate -q https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz 22 | #tar -C $CI_ROOT -xzf cmake-3.4.1.tar.gz 23 | #cd ~/cmake-3.4.1 24 | #mkdir build && cd build 25 | #cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$CI_ROOT/cmake 26 | #make -j4 && make install 27 | # from binary 28 | cd $CI_ROOT 29 | wget --no-check-certificate -q https://github.com/Kitware/CMake/releases/download/v3.13.2/cmake-3.13.2-Linux-x86_64.sh 30 | sh ./cmake-3.13.2-Linux-x86_64.sh --prefix=$CI_ROOT/cmake --skip-license --exclude-subdir 31 | else 32 | echo "CMake installed..." 33 | find $CI_ROOT/cmake -name cmake 34 | fi 35 | ;; 36 | esac 37 | -------------------------------------------------------------------------------- /ci/install-cudastf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CI_ROOT="$1" 4 | 5 | cd $CI_ROOT 6 | 7 | TARGETDIR=$CI_ROOT/stf/ 8 | 9 | mkdir -p $TARGETDIR 10 | 11 | git clone https://github.com/NVIDIA/cccl.git $TARGETDIR/cccl/ 12 | -------------------------------------------------------------------------------- /ci/install-executors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | git clone --depth 1 https://github.com/facebookexperimental/libunifex.git $CI_ROOT/libunifex 9 | pushd $CI_ROOT/libunifex 10 | mkdir build 11 | cd build 12 | cmake .. -DCMAKE_CXX_COMPILER=${CXX} -DCMAKE_C_COMPILER=${CC} -DCMAKE_CXX_FLAGS="-std=c++20" 13 | -------------------------------------------------------------------------------- /ci/install-fgmpi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ ! -d "$CI_ROOT/fgmpi" ]; then 9 | 10 | # TAR build 11 | wget --no-check-certificate -q http://www.cs.ubc.ca/~humaira/code/fgmpi-2.0.tar.gz 12 | tar -C $CI_ROOT -xzf fgmpi-2.0.tar.gz 13 | cd $CI_ROOT/fgmpi-2.0 14 | 15 | # GIT build 16 | #cd $CI_ROOT 17 | #git clone --depth 1 https://github.com/humairakamal/fgmpi.git fgmpi-source 18 | #cd fgmpi-source 19 | ## this may fail on older autotools 20 | #./autogen.sh 21 | 22 | # TAR or GIT 23 | mkdir build && cd build 24 | # Clang defaults to C99, which chokes on "Set_PROC_NULL" 25 | ../configure --disable-fortran --disable-romio CFLAGS="-std=gnu89 -w" --prefix=$CI_ROOT/fgmpi 26 | make -j2 27 | make install 28 | 29 | # Package install 30 | # TODO (restore from older version but unpack in $CI_ROOT without sudo) 31 | 32 | else 33 | echo "FG-MPI installed..." 34 | find $CI_ROOT/fgmpi -name mpiexec 35 | find $CI_ROOT/fgmpi -name mpicc 36 | mpicc -show 37 | fi 38 | -------------------------------------------------------------------------------- /ci/install-ga.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ ! -d "$CI_ROOT/ga" ]; then 9 | git clone -b develop https://github.com/GlobalArrays/ga.git $CI_ROOT/ga-src 10 | cd $CI_ROOT/ga-src 11 | ./autogen.sh 12 | mkdir build 13 | cd build 14 | #../configure CC=mpicc --prefix=$CI_ROOT/ga 15 | #../configure --with-mpi3 MPICC=mpiicc MPICXX=mpiicpc MPIFC=mpiifort MPIF77=mpiifort --prefix=$CI_ROOT/ga && make -j8 install 16 | ../configure --with-armci=${CI_ROOT}/armci-mpi MPICC=mpiicc MPICXX=mpiicpc MPIFC=mpiifort MPIF77=mpiifort --prefix=$CI_ROOT/ga && make -j8 install 17 | make 18 | make install 19 | else 20 | echo "Global Arrays installed..." 21 | find $CI_ROOT/ga -name ga.h 22 | fi 23 | -------------------------------------------------------------------------------- /ci/install-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | if [ "${CC}" = "gcc" ] || [ "${CXX}" = "g++" ] ; then 10 | case "$os" in 11 | Darwin) 12 | echo "Mac" 13 | # this is 5.3.0 or later 14 | brew upgrade gcc || brew install gcc --force-bottle || true 15 | brew link --overwrite --dry-run gcc 16 | brew link --overwrite gcc || true 17 | ;; 18 | Linux) 19 | echo "Linux" 20 | set +e 21 | for v in "-10" "-9" "-8" "-7" "-6" "-5" ; do 22 | sudo apt-get install gcc$v g++$v gfortran$v 23 | done 24 | set -e 25 | ;; 26 | esac 27 | fi 28 | -------------------------------------------------------------------------------- /ci/install-grappa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | # TODO: Make compiler and MPI configurable... 10 | 11 | if [ ! -d "$CI_ROOT/grappa" ]; then 12 | case "$os" in 13 | Darwin) 14 | echo "Mac" 15 | #brew install ruby boost 16 | # Homebrew location 17 | export MPI_ROOT=/usr/local 18 | ;; 19 | 20 | Linux) 21 | echo "Linux" 22 | export MPI_ROOT=$CI_ROOT 23 | ;; 24 | esac 25 | 26 | cd $CI_ROOT 27 | git clone --depth 1 https://github.com/uwsampa/grappa.git grappa-source 28 | cd grappa-source 29 | # DEBUG 30 | #find /usr -name gcc\* -type f 31 | #find $CI_ROOT 32 | # END 33 | # Invoking CMake directly 34 | mkdir build && cd build 35 | if [ -f ~/use-intel-compilers ] ; then 36 | cmake .. -DGRAPPA_INSTALL_PREFIX=$CI_ROOT/grappa \ 37 | -DCMAKE_C_COMPILER="mpiicc" \ 38 | -DCMAKE_CXX_COMPILER="mpiicpc" \ 39 | -DMPI_C_COMPILER="mpiicc" \ 40 | -DMPI_CXX_COMPILER="mpiicpc" 41 | else 42 | cmake .. -DGRAPPA_INSTALL_PREFIX=$CI_ROOT/grappa \ 43 | -DCMAKE_C_COMPILER="$MPI_ROOT/bin/mpicc" \ 44 | -DCMAKE_CXX_COMPILER="$MPI_ROOT/bin/mpicxx" \ 45 | -DMPI_C_COMPILER="$MPI_ROOT/bin/mpicc" \ 46 | -DMPI_CXX_COMPILER="$MPI_ROOT/bin/mpicxx" 47 | #-DMPI_C_LINK_FLAGS="-L$MPI_ROOT/lib" \ 48 | #-DMPI_C_LIBRARIES="-lmpi" \ 49 | #-DMPI_C_INCLUDE_PATH="$MPI_ROOT/include" \ 50 | #-DMPI_CXX_LINK_FLAGS="-L$MPI_ROOT/lib" \ 51 | #-DMPI_CXX_LIBRARIES="-lmpicxx -lmpi" \ 52 | #-DMPI_CXX_INCLUDE_PATH="$MPI_ROOT/include" \ 53 | fi 54 | make -j2 55 | make install 56 | else 57 | echo "Grappa installed..." 58 | find $CI_ROOT -name grappa.mk 59 | fi 60 | -------------------------------------------------------------------------------- /ci/install-hpx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | if [ -f ~/use-intel-compilers ] ; then 7 | export CC=icc 8 | export CXX=icpc 9 | export FC=ifort 10 | fi 11 | 12 | TRAVIS_ROOT="$1" 13 | 14 | case "$TRAVIS_OS_NAME" in 15 | linux) 16 | ;; 17 | osx) 18 | set +e 19 | brew update 20 | for p in boost jemalloc gperftools ; do 21 | brew install $p || brew upgrade $p 22 | done 23 | set -e 24 | ;; 25 | esac 26 | 27 | if [ ! -d "$TRAVIS_ROOT/hpx" ]; then 28 | cd $TRAVIS_ROOT 29 | git clone --depth 1 https://github.com/STEllAR-GROUP/hpx.git hpx-source 30 | cd hpx-source 31 | mkdir build 32 | cd build 33 | cmake .. -DCMAKE_INSTALL_PREFIX:PATH=$TRAVIS_ROOT/hpx -DCMAKE_MACOSX_RPATH=YES -DHPX_WITH_HWLOC=OFF 34 | make -j2 35 | # make check # target does not exist 36 | make install 37 | else 38 | echo "HPX installed..." 39 | find $TRAVIS_ROOT/hpx 40 | fi 41 | -------------------------------------------------------------------------------- /ci/install-hpx3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | os=`uname` 8 | 9 | case "$os" in 10 | Linux) 11 | ;; 12 | Darwin) 13 | set +e 14 | if [ "$USE_HPX_TARBALL" ] ; then 15 | export HPX_BOOST="homebrew/versions/boost155" 16 | else 17 | export HPX_BOOST="boost" 18 | fi 19 | for p in $HPX_BOOST jemalloc gperftools ; do 20 | brew install $p || brew upgrade $p 21 | done 22 | set -e 23 | ;; 24 | esac 25 | 26 | if [ ! -d "$CI_ROOT/hpx3" ]; then 27 | cd $CI_ROOT 28 | #if [ "$USE_HPX_TARBALL" ] ; then 29 | # wget -q --no-check-certificate http://stellar.cct.lsu.edu/files/hpx_0.9.11.tar.bz2 30 | # if [ `which md5` ] ; then 31 | # echo "MD5 signature is:" 32 | # md5 hpx_0.9.11.tar.bz2 33 | # echo "MD5 signature should be:" 34 | # echo "86a71189fb6344d27bf53d6aa2b33122" 35 | # fi 36 | # tar -xjf hpx_0.9.11.tar.bz2 37 | # cd hpx_0.9.11 38 | #else 39 | git clone --depth 1 https://github.com/STEllAR-GROUP/hpx.git hpx3-source 40 | cd hpx3-source 41 | #fi 42 | mkdir build 43 | cd build 44 | cmake .. -DCMAKE_INSTALL_PREFIX:PATH=$CI_ROOT/hpx3 -DCMAKE_MACOSX_RPATH=YES -DHPX_WITH_HWLOC=OFF 45 | make -j2 46 | # make check # target does not exist 47 | make install 48 | else 49 | echo "HPX-3 installed..." 50 | find $CI_ROOT/hpx3 51 | fi 52 | -------------------------------------------------------------------------------- /ci/install-hpx5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ ! -d "$CI_ROOT/hpx5" ] ; then 9 | cd $CI_ROOT 10 | if [ "0" = "1" ] ; then 11 | wget -q --no-check-certificate http://hpx.crest.iu.edu/release/HPX_Release_v2.0.0.tar.gz 12 | if [ `which shasum` ] ; then 13 | echo "SHA-256 signature is:" 14 | shasum -a 256 HPX_Release_v2.0.0.tar.gz 15 | echo "SHA-256 signature should be:" 16 | echo "647c5f0ef3618f734066c91d741021d7bd38cf21" 17 | fi 18 | tar -xzf HPX_Release_v2.0.0.tar.gz 19 | cd HPX_Release_v2.0.0/hpx 20 | else 21 | export GIT_SSL_NO_VERIFY=1 22 | git clone --depth 1 http://gitlab.crest.iu.edu/extreme/hpx.git hpx5-source 23 | cd hpx5-source 24 | fi 25 | ./bootstrap 26 | ./configure --prefix=$CI_ROOT/hpx5 27 | make -j2 28 | make check 29 | make install 30 | else 31 | echo "HPX-5 installed..." 32 | find $CI_ROOT/hpx5 -name hpx-config 33 | fi 34 | -------------------------------------------------------------------------------- /ci/install-hydra.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | HYDRA_ROOT=$CI_ROOT/hydra 8 | 9 | if [ ! -d "$HYDRA_ROOT" ]; then 10 | cd $CI_ROOT 11 | wget --no-check-certificate -q http://www.mpich.org/static/downloads/3.2/hydra-3.2.tar.gz 12 | tar -xzf hydra-3.2.tar.gz 13 | cd hydra-3.2 14 | ./configure CC=cc --prefix=$HYDRA_ROOT 15 | make && make install 16 | else 17 | echo "MPICH Hydra installed..." 18 | find $HYDRA_ROOT -name pmi.h 19 | fi 20 | -------------------------------------------------------------------------------- /ci/install-julia.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | case "$os" in 10 | Darwin) 11 | echo "Mac" 12 | brew cask upgrade julia || brew cask install julia 13 | ;; 14 | Linux) 15 | echo "Linux" 16 | JULIA_NAME=julia-1.3.1 17 | if [ ! -d "$CI_ROOT/$JULIA_NAME" ]; then 18 | cd $CI_ROOT 19 | wget --no-check-certificate -q https://julialang-s3.julialang.org/bin/linux/x64/1.3/julia-1.3.1-linux-x86_64.tar.gz 20 | tar -C $CI_ROOT -xzvf julia-1.3.1-linux-x86_64.tar.gz 21 | # symbolic link was not working for reasons i cannot explain 22 | ln -s $CI_ROOT/$JULIA_NAME $CI_ROOT/julia 23 | find $CI_ROOT -type f -name julia 24 | fi 25 | ;; 26 | esac 27 | -------------------------------------------------------------------------------- /ci/install-legion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | echo "compiler versions:" 9 | $CC --version 10 | $CXX --version 11 | 12 | if [ ! -d "$CI_ROOT/legion" ]; then 13 | cd $CI_ROOT 14 | git clone -b master --depth 1 https://github.com/StanfordLegion/legion.git 15 | else 16 | echo "Legion present..." 17 | find $CI_ROOT/legion 18 | fi 19 | -------------------------------------------------------------------------------- /ci/install-libfabric.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ ! -d "$CI_ROOT/libfabric" ]; then 9 | cd $CI_ROOT 10 | git clone --depth 1 https://github.com/ofiwg/libfabric.git libfabric-source 11 | #git clone -b 'v1.5.2' --depth 1 https://github.com/ofiwg/libfabric.git libfabric-source 12 | cd libfabric-source 13 | ./autogen.sh 14 | ./configure CC=cc --prefix=$CI_ROOT/libfabric 15 | make 16 | make install 17 | export FI_LOG_LEVEL=error 18 | else 19 | echo "OFI/libfabric installed..." 20 | find $CI_ROOT -name "fi.h" 21 | fi 22 | -------------------------------------------------------------------------------- /ci/install-musl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | MUSL_CC="$2" 8 | os=`uname` 9 | 10 | if [ "${MUSL_CC}" = "" ] ; then 11 | MUSL_CC=${CC} 12 | fi 13 | 14 | WEBSITE=https://www.musl-libc.org 15 | VERSION=1.1.16 16 | DIRECTORY=releases 17 | 18 | if [ "$os" = "Linux" ] ; then 19 | cd ${CI_ROOT} 20 | wget --no-check-certificate -q ${WEBSITE}/${DIRECTORY}/musl-${VERSION}.tar.gz 21 | tar -xzf musl-${VERSION}.tar.gz 22 | cd musl-${VERSION} 23 | ./configure --prefix=${CI_ROOT}/musl CC=${MUSL_CC} && make -j2 && make install 24 | else 25 | echo "MUSL does not support Mac" 26 | exit 99 27 | fi 28 | 29 | -------------------------------------------------------------------------------- /ci/install-occa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | case $CXX in 9 | g++) 10 | for major in "-9" "-8" "-7" "-6" "-5" "" ; do 11 | if [ -f "`which ${CXX}${major}`" ]; then 12 | export PRK_CXX="${CXX}${major}" 13 | export PRK_CC="${CC}${major}" 14 | echo "Found C++: $PRK_CXX" 15 | break 16 | fi 17 | done 18 | if [ "x$PRK_CXX" = "x" ] ; then 19 | export PRK_CXX="${CXX}" 20 | export PRK_CC="${CC}" 21 | fi 22 | ;; 23 | clang++) 24 | for version in "-7" "-6" "-5" "-4" "-3.9" "-3.8" "-3.7" "-3.6" "" ; do 25 | if [ -f "`which ${CXX}${version}`" ]; then 26 | export PRK_CXX="${CXX}${version}" 27 | export PRK_CC="${CC}${version}" 28 | echo "Found C++: $PRK_CXX" 29 | break 30 | fi 31 | done 32 | if [ "x$PRK_CXX" = "x" ] ; then 33 | export PRK_CXX="${CXX}" 34 | export PRK_CC="${CC}" 35 | fi 36 | ;; 37 | esac 38 | ${PRK_CXX} -v 39 | 40 | if [ ! -d "$CI_ROOT/occa" ]; then 41 | BRANCH="1.0" 42 | git clone --recursive --depth 1 -b ${BRANCH} https://github.com/libocca/occa.git $CI_ROOT/occa 43 | CXX=${PRK_CXX} OCCA_CUDA_ENABLED=0 OCCA_FORTRAN_ENABLED=0 make -C $CI_ROOT/occa 44 | else 45 | echo "OCCA installed..." 46 | find $CI_ROOT/occa -name occa.hpp 47 | fi 48 | -------------------------------------------------------------------------------- /ci/install-octave.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | case "$os" in 10 | Darwin) 11 | echo "Mac" 12 | brew tap homebrew/science 13 | brew install octave || brew upgrade octave 14 | ;; 15 | 16 | Linux) 17 | echo "Linux not supported" 18 | ;; 19 | esac 20 | -------------------------------------------------------------------------------- /ci/install-ornl-openshmem.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | -------------------------------------------------------------------------------- /ci/install-oshmpi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ ! -d "$CI_ROOT/oshmpi" ]; then 9 | git clone --depth 1 https://github.com/jeffhammond/oshmpi.git 10 | cd oshmpi 11 | ./autogen.sh 12 | ./configure CC=mpicc --prefix=$CI_ROOT/oshmpi 13 | make 14 | make install 15 | else 16 | echo "OSHMPI installed..." 17 | find $CI_ROOT/oshmpi -name shmem.h 18 | fi 19 | -------------------------------------------------------------------------------- /ci/install-petsc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | if [ ! -f "$CI_ROOT/petsc/include/petsc.h" ]; then 9 | if [ -d "$CI_ROOT/petsc-src" ]; then 10 | cd $CI_ROOT/petsc-src 11 | git pull 12 | else 13 | git clone -b maint https://gitlab.com/petsc/petsc.git $CI_ROOT/petsc-src 14 | fi 15 | cd $CI_ROOT/petsc-src 16 | ./configure --prefix=$CI_ROOT/petsc \ 17 | --with-blaslapack-dir=$MKLROOT \ 18 | --with-mpi-dir=$I_MPI_ROOT \ 19 | --with-cxx=0 --with-fc=0 20 | make PETSC_DIR=$CI_ROOT/petsc-src PETSC_ARCH=arch-linux-c-debug all 21 | make PETSC_DIR=$CI_ROOT/petsc-src PETSC_ARCH=arch-linux-c-debug install 22 | else 23 | echo "PETSc installed..." 24 | cat $CI_ROOT/petsc/lib/petsc/conf/reconfigure*.py 25 | fi 26 | -------------------------------------------------------------------------------- /ci/install-pstl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | git clone --depth 1 https://github.com/llvm-mirror/pstl.git $CI_ROOT/llvm-pstl-git 10 | cd $CI_ROOT/llvm-pstl-git 11 | mkdir build 12 | cd build 13 | cmake .. -DCMAKE_INSTALL_PREFIX=$CI_ROOT/pstl 14 | make -j2 install 15 | 16 | #case "$os" in 17 | # Darwin) 18 | # echo "Mac" 19 | # brew upgrade parallelstl || brew install parallelstl 20 | # ;; 21 | # Linux) 22 | # echo "Linux" 23 | # if [ ! -d "$CI_ROOT/pstl" ]; then 24 | # git clone --depth 1 https://github.com/intel/parallelstl.git $CI_ROOT/pstl 25 | # fi 26 | # ;; 27 | #esac 28 | -------------------------------------------------------------------------------- /ci/install-python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | case "$os" in 10 | Darwin) 11 | echo "Mac" 12 | brew unlink python@2 || brew uninstall python@2 13 | brew upgrade python || brew install python 14 | brew upgrade numpy || brew install numpy 15 | brew link --overwrite python 16 | ;; 17 | Linux) 18 | echo "Linux" 19 | sudo apt-get install python3-numpy 20 | #sudo apt-get install python3-numba 21 | ;; 22 | esac 23 | -------------------------------------------------------------------------------- /ci/install-raja.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | case $CXX in 9 | g++) 10 | for major in "-9" "-8" "-7" "-6" "-5" "" ; do 11 | if [ -f "`which ${CXX}${major}`" ]; then 12 | export PRK_CXX="${CXX}${major}" 13 | export PRK_CC="${CC}${major}" 14 | echo "Found C++: $PRK_CXX" 15 | break 16 | fi 17 | done 18 | if [ "x$PRK_CXX" = "x" ] ; then 19 | export PRK_CXX="${CXX}" 20 | export PRK_CC="${CC}" 21 | fi 22 | USE_OPENMP="On" 23 | ;; 24 | clang++) 25 | for version in "-5" "-4" "-3.9" "-3.8" "-3.7" "-3.6" "" ; do 26 | if [ -f "`which ${CXX}${version}`" ]; then 27 | export PRK_CXX="${CXX}${version}" 28 | export PRK_CC="${CC}${version}" 29 | echo "Found C++: $PRK_CXX" 30 | break 31 | fi 32 | done 33 | if [ "x$PRK_CXX" = "x" ] ; then 34 | export PRK_CXX="${CXX}" 35 | export PRK_CC="${CC}" 36 | fi 37 | USE_OPENMP="Off" 38 | ;; 39 | esac 40 | ${PRK_CXX} -v 41 | 42 | if [ ! -d "$CI_ROOT/raja" ]; then 43 | BRANCH=develop 44 | git clone --recursive --depth 1 -b ${BRANCH} https://github.com/LLNL/RAJA.git 45 | cd RAJA 46 | mkdir build 47 | cd build 48 | cmake .. -DCMAKE_CXX_COMPILER=${PRK_CXX} -DCMAKE_C_COMPILER=${PRK_CC} \ 49 | -DCMAKE_INSTALL_PREFIX=${CI_ROOT}/raja \ 50 | -DENABLE_TBB=On -DENABLE_OPENMP=${USE_OPENMP} 51 | make -j2 52 | make install -j2 53 | else 54 | echo "RAJA installed..." 55 | find $CI_ROOT/raja -name RAJA.hxx 56 | fi 57 | -------------------------------------------------------------------------------- /ci/install-ranges.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | os=`uname` 8 | 9 | if [ "$os" = "Darwin" ] ; then 10 | git clone --depth 1 https://github.com/ericniebler/range-v3.git $CI_ROOT/range-v3 11 | else 12 | sh ./ci/install-boost.sh $CI_ROOT 13 | fi 14 | -------------------------------------------------------------------------------- /ci/install-rust.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | os=`uname` 7 | CI_ROOT="$1" 8 | 9 | case "$os" in 10 | Darwin) 11 | echo "Mac" 12 | brew tap homebrew/core 13 | brew install rust || brew upgrade rust 14 | ;; 15 | Linux) 16 | echo "Linux not supported" 17 | ;; 18 | esac 19 | -------------------------------------------------------------------------------- /ci/install-sandia-openshmem.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | SHMEM_ROOT=$CI_ROOT/sandia-openshmem 8 | 9 | if [ ! -d "$SHMEM_ROOT" ]; then 10 | # HEAD 11 | #git clone --depth 1 https://github.com/Sandia-OpenSHMEM/SOS.git sandia-shmem 12 | #cd sandia-shmem 13 | VERSION=1.4.2 14 | #git clone -b v$VERSION --depth 1 https://github.com/Sandia-OpenSHMEM/SOS.git SOS-$VERSION 15 | wget https://github.com/Sandia-OpenSHMEM/SOS/archive/v$VERSION.tar.gz 16 | tar -xzf v$VERSION.tar.gz 17 | cd SOS-$VERSION 18 | ./autogen.sh 19 | mkdir build 20 | cd build 21 | # Removed # --with-pmi=$CI_ROOT/hydra per Jim 22 | ../configure --with-libfabric=$CI_ROOT/libfabric \ 23 | --disable-fortran \ 24 | --enable-error-checking \ 25 | --enable-pmi-simple \ 26 | --prefix=$SHMEM_ROOT 27 | #--enable-remote-virtual-addressing \ 28 | make 29 | make check | true 30 | make install 31 | else 32 | echo "Sandia OpenSHMEM installed..." 33 | find $SHMEM_ROOT -name shmem.h 34 | fi 35 | -------------------------------------------------------------------------------- /ci/install-sycl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | 8 | git clone --depth 1 https://github.com/triSYCL/triSYCL.git $CI_ROOT/triSYCL 9 | -------------------------------------------------------------------------------- /ci/install-tbb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | CI_ROOT="$1" 7 | os=`uname` 8 | 9 | WEBSITE=https://github.com/01org/tbb/releases/download 10 | VERSION=2018_U1 11 | DIRECTORY=tbb2018_20170919oss 12 | 13 | case "$os" in 14 | Darwin) 15 | echo "Mac" 16 | wget --no-check-certificate -q ${WEBSITE}/${VERSION}/${DIRECTORY}_mac.tgz 17 | tar -xzf ${DIRECTORY}_mac.tgz 18 | ;; 19 | 20 | Linux) 21 | echo "Linux" 22 | wget --no-check-certificate -q ${WEBSITE}/${VERSION}/${DIRECTORY}_lin.tgz 23 | tar -xzf ${DIRECTORY}_lin.tgz 24 | ;; 25 | esac 26 | export TBBROOT=${PWD}/${DIRECTORY} 27 | mv ${TBBROOT} ${CI_ROOT}/tbb 28 | find ${CI_ROOT}/tbb -name "libtbb.so" 29 | -------------------------------------------------------------------------------- /ci/install-upcxx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | if [ -f ~/use-intel-compilers ] ; then 7 | export CC=icc 8 | export CXX=icpc 9 | export FC=ifort 10 | fi 11 | 12 | TRAVIS_ROOT="$1" 13 | 14 | UPCXX_RELEASE=upcxx-2019.9.0 15 | UPCXX_PREFIX=$TRAVIS_ROOT/$UPCXX_RELEASE 16 | 17 | if [ ! -d "$UPCXX_PREFIX" ]; then 18 | cd $TRAVIS_ROOT 19 | wget --no-check-certificate -q https://bitbucket.org/berkeleylab/upcxx/downloads/${UPCXX_RELEASE}.tar.gz 20 | tar -xzf $UPCXX_RELEASE.tar.gz 21 | cd $UPCXX_RELEASE 22 | ./install $TRAVIS_ROOT/upcxx 23 | else 24 | echo "UPC++ installed..." 25 | find $TRAVIS_ROOT/upcxx -name upcxx -type f 26 | fi 27 | 28 | -------------------------------------------------------------------------------- /common/AMPI.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | ifeq ($(CHARMTOP),) 3 | AMPICC=true 4 | else 5 | AMPICC=$(CHARMTOP)/bin/ampicc 6 | endif 7 | CCOMPILER=$(AMPICC) 8 | CITRANSLATOR=$(AMPICC) -E 9 | CLINKER=$(CCOMPILER) -language ampi 10 | COMOBJS=MPI_bail_out.o wtime.o 11 | PROG_ENV=-DADAPTIVE_MPI 12 | -------------------------------------------------------------------------------- /common/CHARM++.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | ifeq ($(CHARMTOP),) 3 | CHARMC=true 4 | else 5 | CHARMC=$(CHARMTOP)/bin/charmc 6 | endif 7 | CCOMPILER=$(CHARMC) 8 | CITRANSLATOR=$(CHARMC) -E 9 | CLINKER=$(CCOMPILER) -language charm++ 10 | COMOBJS= 11 | PROG_ENV=-DCHARMXX 12 | -------------------------------------------------------------------------------- /common/FENIX.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | CCOMPILER=${ULFMTOP}/bin/mpicc 3 | CLINKER=$(CCOMPILER) 4 | COMOBJS=FENIX_bail_out.o wtime.o 5 | PROG_ENV=-DFENIX 6 | -------------------------------------------------------------------------------- /common/FG_MPI.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | ifeq ($(FGMPICC),) 3 | FGMPICC=true 4 | endif 5 | CCOMPILER=$(FGMPICC) 6 | CLINKER=$(CCOMPILER) 7 | COMOBJS=MPI_bail_out.o wtime.o 8 | PROG_ENV=-DFG_MPI 9 | -------------------------------------------------------------------------------- /common/GRAPPA.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | ifneq ($(GRAPPATOP),) 3 | include $(GRAPPATOP)/share/Grappa/grappa.mk 4 | endif 5 | CCOMPILER=$(GRAPPA_CXX) 6 | CLINKER=$(GRAPPA_LD) 7 | COMOBJS= 8 | PROG_ENV=-DGRAPPA 9 | -------------------------------------------------------------------------------- /common/LEGION.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | -------------------------------------------------------------------------------- /common/MPI.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | CCOMPILER=$(MPICC) 3 | CLINKER=$(CCOMPILER) 4 | COMOBJS=MPI_bail_out.o wtime.o 5 | PROG_ENV=-DMPI 6 | -------------------------------------------------------------------------------- /common/MPIOPENMP.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | CCOMPILER=$(MPICC) 3 | CLINKER=$(CCOMPILER) 4 | COMOBJS=MPI_bail_out.o wtime.o 5 | PROG_ENV=-DMPI $(OPENMPFLAG) 6 | -------------------------------------------------------------------------------- /common/OPENMP.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | CCOMPILER =$(CC) 3 | CLINKER = $(CCOMPILER) 4 | COMOBJS = wtime.o OPENMP_bail_out.o 5 | PROG_ENV = $(OPENMPFLAG) 6 | -------------------------------------------------------------------------------- /common/OPENMP_bail_out.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | 3 | Name: bail_out 4 | 5 | Purpose: Exit gracefully when an OpenMP thread has encountered an error 6 | inside a parallel region 7 | 8 | Arguments: error code (zero for no error). 9 | 10 | Returns: nothing, but the program terminates with a nonzero exit status 11 | 12 | Notes: This function must be called by all threads in the team. Multiple 13 | threads may have tried to update the shared error variable at the 14 | same time, so this needs to be done atomically if we want to 15 | guarantee that the value of 1 is put into error. In our case, 16 | however, we merely want to know if the value is different from 17 | zero, so we do not need atomicity. 18 | 19 | History: Written by Rob Van der Wijngaart, July 2006 20 | 21 | **********************************************************************/ 22 | 23 | #include 24 | 25 | void bail_out(int error) { 26 | 27 | #pragma omp barrier 28 | if (error != 0) exit(EXIT_FAILURE); 29 | } 30 | -------------------------------------------------------------------------------- /common/PRKVERSION: -------------------------------------------------------------------------------- 1 | PRKVERSION="2020" 2 | -------------------------------------------------------------------------------- /common/README.freebsd: -------------------------------------------------------------------------------- 1 | This is a rather terse summary of what is required to build the PRKs on FreeBSD. 2 | 3 | # Necessary Packages 4 | 5 | BSD make isn't GNU make, which the PRK assumes. 6 | 7 | sudo pkg install gmake 8 | 9 | I assume GCC works fine as it does on Linux but I tested LLVM. 10 | OpenMP target is not supported by LLVM 6.0.1 so those compilations will fail. 11 | 12 | sudo pkg install clang flang libpgmath 13 | 14 | ## C++ dependencies 15 | 16 | sudo pkg install opencl-2.2_1 17 | sudo pkg install devel/clinfo devel/ocl-icd lang/beignet lang/pocl 18 | sudo pkg install tbb 19 | sudo pkg install boost-all 20 | 21 | You will need to acquire triSYCL and Intel Parallel STL via GitHub. 22 | One minor issue with triSYCL was addressed by patching triSYCL. 23 | I suspect this issue disappears with LLVM 7.0 but you can look up 24 | the issue with `std::optional` on GitHub if necessary. 25 | 26 | RAJA and Kokkos were not tested. 27 | -------------------------------------------------------------------------------- /common/RUST.defs: -------------------------------------------------------------------------------- 1 | include ../common/make.defs 2 | include ../common/PRKVERSION 3 | -------------------------------------------------------------------------------- /common/SERIAL.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | CCOMPILER =$(CC) 3 | CLINKER = $(CCOMPILER) 4 | COMOBJS = wtime.o 5 | PROG_ENV = -DSERIAL 6 | -------------------------------------------------------------------------------- /common/SHMEM.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | ifeq ($(SHMEMCC),) 3 | SHMEMCC=true 4 | endif 5 | CCOMPILER=$(SHMEMCC) 6 | CLINKER=$(CCOMPILER) 7 | COMOBJS=wtime.o SHMEM_bail_out.o 8 | PROG_ENV=-DSHMEM 9 | -------------------------------------------------------------------------------- /common/Stencil/loop_gen: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | RADIUS=$1 3 | STAR=$2 4 | if [ $STAR -ne 0 ]; then 5 | echo " OUT(i,j) = OUT(i,j) + WEIGHT(0,0)*IN(i,j)" > loop_body_star.incl 6 | jj=1 7 | while [ $jj -le $RADIUS ]; do 8 | echo " +WEIGHT(0,-$jj)*IN(i,j-$jj)+WEIGHT(0,$jj)*IN(i,j+$jj)" >> loop_body_star.incl 9 | echo " +WEIGHT(-$jj,0)*IN(i-$jj,j)+WEIGHT($jj,0)*IN(i+$jj,j)" >> loop_body_star.incl 10 | jj=`expr $jj + 1` 11 | done 12 | echo " ;" >> loop_body_star.incl 13 | else 14 | echo " OUT(i,j) = OUT(i,j) +" > loop_body_compact.incl 15 | jj=`expr -1 \* $RADIUS` 16 | while [ $jj -le $RADIUS ]; do 17 | ii=`expr -1 \* $RADIUS` 18 | while [ $ii -le $RADIUS ]; do 19 | si='' 20 | if [ $ii -lt 0 ]; then si=$ii 21 | else 22 | if [ $ii -gt 0 ]; then si=+$ii; fi 23 | fi 24 | sj='' 25 | if [ $jj -lt 0 ]; then sj=$jj 26 | else 27 | if [ $jj -gt 0 ]; then sj=+$jj; fi 28 | fi 29 | echo " +WEIGHT($ii,$jj)*IN(i$si,j$sj)" >> loop_body_compact.incl 30 | ii=`expr $ii + 1` 31 | done 32 | jj=`expr $jj + 1` 33 | done 34 | echo " ;" >> loop_body_compact.incl 35 | fi 36 | -------------------------------------------------------------------------------- /common/Stencil/loop_gen_amr: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | RADIUS=$1 3 | STAR=$2 4 | if [ $STAR -ne 0 ]; then 5 | echo " OUT_R(g,i,j) = OUT_R(g,i,j) + WEIGHT_R(0,0)*IN_R(g,i,j)" > loop_body_star_amr.incl 6 | jj=1 7 | while [ $jj -le $RADIUS ]; do 8 | echo " +WEIGHT_R(0,-$jj)*IN_R(g,i,j-$jj)+WEIGHT_R(0,$jj)*IN_R(g,i,j+$jj)" >> loop_body_star_amr.incl 9 | echo " +WEIGHT_R(-$jj,0)*IN_R(g,i-$jj,j)+WEIGHT_R($jj,0)*IN_R(g,i+$jj,j)" >> loop_body_star_amr.incl 10 | jj=`expr $jj + 1` 11 | done 12 | echo " ;" >> loop_body_star_amr.incl 13 | else 14 | echo " OUT_R(g,i,j) = OUT_R(g,i,j) +" > loop_body_compact_amr.incl 15 | jj=`expr -1 \* $RADIUS` 16 | while [ $jj -le $RADIUS ]; do 17 | ii=`expr -1 \* $RADIUS` 18 | while [ $ii -le $RADIUS ]; do 19 | si='' 20 | if [ $ii -lt 0 ]; then si=$ii 21 | else 22 | if [ $ii -gt 0 ]; then si=+$ii; fi 23 | fi 24 | sj='' 25 | if [ $jj -lt 0 ]; then sj=$jj 26 | else 27 | if [ $jj -gt 0 ]; then sj=+$jj; fi 28 | fi 29 | echo " +WEIGHT_R($ii,$jj)*IN_R(g,i$si,j$sj)" >> loop_body_compact_amr.incl 30 | ii=`expr $ii + 1` 31 | done 32 | jj=`expr $jj + 1` 33 | done 34 | echo " ;" >> loop_body_compact_amr.incl 35 | fi 36 | -------------------------------------------------------------------------------- /common/UPC.defs: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | ifeq ($(UPCC),) 3 | UPCC=true 4 | endif 5 | CCOMPILER =$(UPCC) 6 | CLINKER = $(CCOMPILER) 7 | COMOBJS = wtime.o 8 | PROG_ENV = $(UPCFLAG) 9 | -------------------------------------------------------------------------------- /common/make.common: -------------------------------------------------------------------------------- 1 | CFLAGS=$(OPTFLAGS) $(PROG_ENV) 2 | INCLUDEPATHSPLUS=$(INCLUDEPATHS) -I../../include 3 | COMMON=../../common 4 | 5 | usage: 6 | @echo "Usage: type \"make $(PROGRAM)\" to build executable" 7 | @echo " \"make clean\" to remove objects and executables" 8 | @echo -e $(OPTIONSSTRING) 9 | 10 | 11 | ifeq ($(PROG_ENV),-DCHARMXX) 12 | $(PROGRAM).C: $(PROGRAM).decl.h 13 | endif 14 | 15 | $(PROGRAM):$(OBJS) 16 | $(CLINKER) -o $(PROGRAM) $(LIBPATHS) $(CFLAGS) $(OBJS) $(EXTOBJS) $(LIBS) 17 | 18 | 19 | ifeq ($(PROG_ENV),-DCHARMXX) 20 | $(PROGRAM).decl.h: $(PROGRAM).ci 21 | $(CITRANSLATOR) $(PROGRAM).ci 22 | endif 23 | 24 | timestep.o: timestep.c 25 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 26 | 27 | wtime.o:$(COMMON)/wtime.c 28 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 29 | 30 | random_draw.o:$(COMMON)/random_draw.c 31 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 32 | 33 | MPI_bail_out.o:$(COMMON)/MPI_bail_out.c 34 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 35 | 36 | FENIX_bail_out.o:$(COMMON)/FENIX_bail_out.c 37 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 38 | 39 | SHMEM_bail_out.o:$(COMMON)/SHMEM_bail_out.c 40 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 41 | 42 | OPENMP_bail_out.o:$(COMMON)/OPENMP_bail_out.c 43 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 44 | 45 | .c.o: 46 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 47 | 48 | .cpp.o: 49 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 50 | 51 | .upc.o: 52 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 53 | 54 | .C.o: $(CHARMDEP) 55 | $(CCOMPILER) $(CFLAGS) $(TUNEFLAGS) $(INCLUDEPATHSPLUS) -c $< 56 | 57 | clean: 58 | rm -f $(OBJS) $(PROGRAM) *.optrpt *~ charmrun stats.json $(PROGRAM).decl.h $(PROGRAM).def.h 59 | -------------------------------------------------------------------------------- /common/make.defs.ibmbg: -------------------------------------------------------------------------------- 1 | # 2 | # This file shows the IBM Blue Gene/Q toolchain options for PRKs using 3 | # OpenMP, MPI and/or Fortran (sans coarrays) only. 4 | # 5 | # Base compilers and language options 6 | # 7 | # C99 is required in some implementations. 8 | CC=bgxlc_r -qlanglvl=stdc99 9 | # All of the Fortran code is written for the 2008 standard and requires preprocessing. 10 | # You might need to modify the build system for the preprocessor options to work. 11 | FC=bgxlf_r 12 | # C++11 may not be required but does no harm here. 13 | CXX=bgxlcxx_r 14 | # 15 | # Compiler flags 16 | # 17 | DEFAULT_OPT_FLAGS=-O3 18 | # 19 | # OpenMP flags 20 | # 21 | # You can also use -qopenmp. -openmp is deprecated. 22 | OPENMPFLAG=-qsmp=omp 23 | # 24 | # MPI 25 | # 26 | MPICC=mpixlc_r 27 | -------------------------------------------------------------------------------- /common/make.defs.musl: -------------------------------------------------------------------------------- 1 | # 2 | # This file shows the MUSL toolchain options for PRKs, 3 | # which is only used for C11 testing. 4 | # 5 | # Base compilers and language options 6 | # 7 | CC=/opt/musl/1.1.16/gcc-7/bin/musl-gcc -std=c11 -static 8 | # 9 | # Compiler flags 10 | # 11 | # -mtune=native is appropriate for most cases. 12 | # -march=native is appropriate if you want portable binaries. 13 | DEFAULT_OPT_FLAGS=-g -O3 -mtune=native -ffast-math 14 | # 15 | # OpenMP flags 16 | # 17 | OPENMPFLAG=-fopenmp 18 | OFFLOADFLAG=-foffload="-O3 -v" 19 | OPENACCFLAG=-fopenacc 20 | -------------------------------------------------------------------------------- /common/make.defs.old: -------------------------------------------------------------------------------- 1 | #name of MPI C compiler, e.g. mpiicc, mpicc 2 | MPICC= 3 | 4 | #name of C compiler, e.g. icc, xlc, gcc 5 | CC= 6 | 7 | #name of MPI Fortran compiler, e.g. mpifort, mpif90 8 | MPIF90= 9 | 10 | #name of Fortran compiler, e.g. ifort, xlf_r, gfortran 11 | FC= 12 | 13 | #name of compile line flag enabling OpenMP, e.g. -openmp, -qopenmp, -fopenmp 14 | OPENMPFLAG= 15 | OFFLOADFLAG= 16 | 17 | #default compiler optimization flags 18 | DEFAULT_OPT_FLAGS:= 19 | 20 | ############################ OPTIONAL ######################### 21 | 22 | # Fortran 2008 coarrays flag, *including any library* 23 | # ifort: -coarray=distributed, gfortran: -fcoarray(=single) or -fcoarray=lib -lcaf_mpi, crayftn: -h caf 24 | COARRAYFLAG= 25 | 26 | #name of C++ compiler (to be used in MPI context for Grappa), e.g. mpigxx, mpiicpc 27 | CXX= 28 | 29 | #name of UPC compiler, e.g. gupc, cc, upcc 30 | UPCC= 31 | 32 | #name of compile line flag enabling UPC if necessary, e.g. -h upc 33 | UPCFLAG= 34 | 35 | #name of MPI C compiler (to be used in Fine-Grain MPI context), e.g. mpicc 36 | FGMPICC= 37 | 38 | #name of C compiler (to be used in MPI context of OpenSHMEM), e.g. $(MPICC) 39 | SHMEMCC= 40 | 41 | #location where Charm++ is installed, e.g. $(HOME)/charm/mpi-linux-x86_64-ifort-smp-mpicxx 42 | CHARMTOP= 43 | 44 | #location where Grappa is installed, e.g. $(GRAPPA_PREFIX) if you've done "source /bin/settings.sh" 45 | GRAPPATOP= 46 | 47 | #location where Fine-Grain MPI is installed, e.g. $(HOME)/fgmpi-install 48 | FGMPITOP= 49 | 50 | #location where OpenCoarrays is installed, e.g. $(HOME)/opencoarrays 51 | OCAS= 52 | 53 | #location where Legion is installed, e.g. $(HOME)/legion 54 | LEGIONTOP= 55 | 56 | #location where ULFM-enabled MPI is installed 57 | ULFMTOP= 58 | 59 | #flags to use when running applications with ULFM-enabled MPI 60 | ULFMRUNFLAG= 61 | 62 | #location where Fenix is installed 63 | FENIXTOP= 64 | -------------------------------------------------------------------------------- /common/make.defs.upcxx-hpx: -------------------------------------------------------------------------------- 1 | UPCXXDIR=./upcxx 2 | UPCXX=${UPCXXDIR}/bin/upcxx 3 | UPCXXFLAG=-codemode={O3,debug} 4 | UPCXXFLAG+=-std=c++17 5 | UPCXXFLAG+=-mtune=native -ffast-math 6 | 7 | HPXDIR=./hpx 8 | HPXCXX=${HPXDIR}/bin/hpxcxx 9 | HPXFLAG=-Wno-unused-local-typedef ${HWLOCFLAG} 10 | -------------------------------------------------------------------------------- /doc/AMR-PRK.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/doc/AMR-PRK.pdf -------------------------------------------------------------------------------- /doc/Boost.md: -------------------------------------------------------------------------------- 1 | # Homebrew 2 | 3 | Install Boost like this: 4 | ``` 5 | brew install boost boost-bcp 6 | ``` 7 | 8 | Create a "minimal" collection of headers that you can copy elsewhere: 9 | ``` 10 | bcp --boost=/usr/local/Cellar/boost/1.64.0_1/include boost/range/irange.hpp . 11 | ``` 12 | 13 | Then change this line in `prk_util.h` from 14 | ``` 15 | # include 16 | ``` 17 | to 18 | ``` 19 | # include "boost/range/irange.hpp" 20 | ``` 21 | and make sure that the `boost/` subdirectory generated by `bcp` lives in `PRK/Cxx11/.` 22 | -------------------------------------------------------------------------------- /doc/HIP.md: -------------------------------------------------------------------------------- 1 | HIP 2 | 3 | # Notes 4 | 5 | Version 3.9.0 6 | ``` 7 | sudo apt-get -o Dpkg::Options::="--force-overwrite" install hipfort 8 | ``` 9 | 10 | # Hardware 11 | 12 | I am only testing on a Hades Canyon NUC with the Vega M GPU. 13 | -------------------------------------------------------------------------------- /doc/HPX.md: -------------------------------------------------------------------------------- 1 | # 2 | 3 | ```sh 4 | cmake .. -DCMAKE_INSTALL_PREFIX=$PRK_DIR/Cxx11/hpx \ 5 | -DCMAKE_CXX_COMPILER=/usr/local/Cellar/llvm/9.0.1/bin/clang++ \ 6 | -DCMAKE_C_COMPILER=/usr/local/Cellar/llvm/9.0.1/bin/clang \ 7 | -DHPX_WITH_TESTS:BOOL=Off \ 8 | -DHPX_WITH_TESTS_BENCHMARKS:BOOL=Off \ 9 | -DHPX_WITH_TESTS_EXAMPLES:BOOL=Off \ 10 | -DHPX_WITH_TESTS_REGRESSIONS:BOOL=Off \ 11 | -DHPX_WITH_TESTS_UNIT:BOOL=Off 12 | make install 13 | ``` 14 | -------------------------------------------------------------------------------- /doc/Hammond-PPP2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/doc/Hammond-PPP2019.pdf -------------------------------------------------------------------------------- /doc/IXPUG_Invited2_Hammond.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/doc/IXPUG_Invited2_Hammond.pdf -------------------------------------------------------------------------------- /doc/KOKKOS.md: -------------------------------------------------------------------------------- 1 | # Kokkos README 2 | 3 | ## IBM POWER9 + NVIDIA V100 4 | 5 | If you do not enable GPU arch >5, it fails at runtime. 6 | 7 | If you do not enable lambda support, `parallel_reduce` will not compile. 8 | 9 | ``` 10 | cmake .. -DKokkos_ENABLE_CUDA=True \ 11 | -DCMAKE_CXX_COMPILER=$HOME/KOKKOS/git/bin/nvcc_wrapper \ 12 | -DCMAKE_INSTALL_PREFIX=$HOME/KOKKOS/install-cuda \ 13 | -DKokkos_ARCH_POWER9=ON \ 14 | -DKokkos_ARCH_VOLTA70=ON \ 15 | -DKokkos_ENABLE_CUDA_LAMBDA=ON \ 16 | && make -j install 17 | ``` 18 | -------------------------------------------------------------------------------- /doc/OCCA.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /doc/OpenCL.md: -------------------------------------------------------------------------------- 1 | These are just some notes. This information may not be accurate 2 | in the future, so please determine the accuracy of this information 3 | experimentally. 4 | 5 | # Apple OpenCL 6 | 7 | This works very nicely. No known issues. 8 | 9 | # POCL 10 | 11 | POCL is a portable, open-source implementation of OpenCL. 12 | We have only tested it on Mac so far. 13 | 14 | [GitHub](https://github.com/pocl/pocl) 15 | [Documentation](http://portablecl.org/docs/html/index.html) 16 | 17 | The POCL available from Homebrew did not work when we tried it (Sept 2017). 18 | Building from source as follows did. As you can see, we used the Homebrew 19 | LLVM 4.0 package. The LLVM 3.8 package didn't work. 20 | 21 | ``` 22 | cmake .. \ 23 | -DCMAKE_C_COMPILER=/usr/local/Cellar/llvm@4/4.0.1/bin/clang \ 24 | -DCMAKE_CXX_COMPILER=/usr/local/Cellar/llvm@4/4.0.1/bin/clang++ \ 25 | -DCMAKE_INSTALL_PREFIX=/opt/pocl/latest && \ 26 | make && make test && make install 27 | ``` 28 | 29 | # Linux 30 | 31 | We have tested against Beignet, Intel OpenCL, and NVIDIA OpenCL on 32 | Ubuntu 16.04 LTS. The CPU implementations work well, although 33 | we have only tested Intel Core i7 and Xeon E5 (both Haswell) 34 | processors. 35 | 36 | We have seen one issue with NVIDIA OpenCL so far: 37 | - https://github.com/ParRes/Kernels/issues/183 38 | 39 | We have not yet tested OpenCL on Intel/Altera FPGAs, AMD GPUs, 40 | or any other hardware. 41 | -------------------------------------------------------------------------------- /doc/PRK for ETH.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/doc/PRK for ETH.pdf -------------------------------------------------------------------------------- /doc/ParallelSTL.md: -------------------------------------------------------------------------------- 1 | Parallel STL support is not mature. Currently, we support two implementations: 2 | 3 | * Intel 18.0+ 4 | - https://software.intel.com/en-us/articles/parallel-stl-parallel-algorithms-in-standard-template-library has details 5 | - https://github.com/intel/parallelstl.git is the open-source implementation that works with GCC and Clang (TBB is required). 6 | 7 | * GCC 7.2+ 8 | - std::execution is not supported. 9 | - one enables parallelism explicitly by switching from std to `__gnu_parallel namespace`. 10 | - one enables parallelism implicitly by using the `_GLIBCXX_PARALLEL` preprocessor symbol. 11 | - [GCC docs](https://gcc.gnu.org/onlinedocs/libstdc++/manual/parallel_mode_using.html) have details. 12 | 13 | Future implementation targets may include: 14 | 15 | * https://github.com/KhronosGroup/SyclParallelSTL 16 | * http://thrust.github.io/ 17 | 18 | -------------------------------------------------------------------------------- /doc/RAJA.md: -------------------------------------------------------------------------------- 1 | # RAJA README 2 | 3 | ## IBM POWER9 + NVIDIA V100 4 | 5 | ``` 6 | cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/RAJA/install-cuda \ 7 | -DCMAKE_CXX_COMPILER=xlc++_r -DCMAKE_C_COMPILER=xlc_r \ 8 | -DENABLE_OPENMP=On -DENABLE_TARGET_OPENMP=On -DOpenMP_CXX_FLAGS="-qsmp -qoffload" \ 9 | -DENABLE_CUDA=On -DCUDA_ARCH=sm_70 10 | && make -j install 11 | ``` 12 | 13 | Optional extras: `-qsuppress=1500-030` or `-qmaxmem=-1` 14 | 15 | -------------------------------------------------------------------------------- /doc/flang-new.md: -------------------------------------------------------------------------------- 1 | This works, but -flang-experimental-exec` and `-Wall` are ignored. 2 | 3 | ``` 4 | /opt/llvm/latest/bin/flang-new -flang-experimental-exec -g -O3 -ffast-math -Wall -DRADIUS=2 -DSTAR -c p2p.F90 5 | ld -L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lSystem p2p.o prk_mod.o -o p2p /opt/llvm/latest/lib/libFortran*a 6 | ``` 7 | -------------------------------------------------------------------------------- /doc/par-res-kern-report-v1.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/doc/par-res-kern-report-v1.0.pdf -------------------------------------------------------------------------------- /doc/par-res-kern-report-v1.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/doc/par-res-kern-report-v1.3.pdf -------------------------------------------------------------------------------- /include/lcg.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015, Intel Corporation 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided 13 | with the distribution. 14 | * Neither the name of Intel Corporation nor the names of its 15 | contributors may be used to endorse or promote products 16 | derived from this software without specific prior written 17 | permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | #ifndef LCG_H 34 | #define LCG_H 35 | 36 | #include "par-res-kern_general.h" 37 | 38 | extern void LCG_init(void); 39 | extern uint64_t LCG_next(uint64_t); 40 | extern void LCG_get_chunk(uint64_t *, uint64_t *, int, int, uint64_t); 41 | extern void LCG_jump(uint64_t, uint64_t); 42 | 43 | #endif /* LCG_H */ 44 | -------------------------------------------------------------------------------- /include/par-res-kern_mpiomp.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2013, Intel Corporation 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided 13 | with the distribution. 14 | * Neither the name of Intel Corporation nor the names of its 15 | contributors may be used to endorse or promote products 16 | derived from this software without specific prior written 17 | permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | #ifndef PRK_MPIOMP_H 34 | #define PRK_MPIOMP_H 35 | 36 | #include 37 | #include 38 | 39 | #endif /* PRK_MPIOMP_H */ 40 | -------------------------------------------------------------------------------- /include/par-res-kern_upc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2013, Intel Corporation 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided 13 | with the distribution. 14 | * Neither the name of Intel Corporation nor the names of its 15 | contributors may be used to endorse or promote products 16 | derived from this software without specific prior written 17 | permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | #ifndef PRK_UPC_H 34 | #define PRK_UPC_H 35 | 36 | #include "par-res-kern_general.h" 37 | 38 | #include 39 | #include 40 | 41 | #endif /* PRK_UPC_H */ 42 | -------------------------------------------------------------------------------- /logo/PRK logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/logo/PRK logo.jpeg -------------------------------------------------------------------------------- /logo/PRK logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/logo/PRK logo.pdf -------------------------------------------------------------------------------- /logo/PRK logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/logo/PRK logo.png -------------------------------------------------------------------------------- /logo/PRK logo.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParRes/Kernels/320c170cbdf54c69da725eeb834c8855b9a9a39b/logo/PRK logo.pptx -------------------------------------------------------------------------------- /logo/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /scripts/small/Makefile_FENIX: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | runfenix: runfenix.in 3 | @echo PATH=$(ULFMTOP)/bin:$(PATH) > runfenix 4 | @echo export ULFMRUNFLAG=\"$(ULFMRUNFLAG)\" >> runfenix 5 | @cat runfenix.in >> runfenix 6 | @chmod u+x runfenix 7 | 8 | veryclean: 9 | rm -f runfenix 10 | -------------------------------------------------------------------------------- /scripts/small/Makefile_FG_MPI: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | runfgmpi: runfgmpi.in 3 | @echo PATH=$(FGMPITOP)/bin:$(PATH) > runfgmpi 4 | @echo LD_LIBRARY_PATH=$(FGMPITOP)/lib:$(LD_LIBRARY_PATH) >> runfgmpi 5 | @cat runfgmpi.in >> runfgmpi 6 | @chmod u+x runfgmpi 7 | 8 | veryclean: 9 | rm -f runfgmpi 10 | -------------------------------------------------------------------------------- /scripts/small/runall: -------------------------------------------------------------------------------- 1 | ./scripts/small/rundarwin 2 | ./scripts/small/runfreaks 3 | -------------------------------------------------------------------------------- /scripts/small/runcharm++: -------------------------------------------------------------------------------- 1 | NUMPROCS=2 2 | OVERDECOMPOSITION=2 3 | NUMITERS=10 4 | SEPLINE="===============================================================" 5 | CHARMRUN=CHARM++/Stencil/charmrun 6 | 7 | $CHARMRUN +p$NUMPROCS CHARM++/Stencil/stencil $NUMITERS 1000 $OVERDECOMPOSITION; echo $SEPLINE 8 | $CHARMRUN +p$NUMPROCS CHARM++/Synch_p2p/p2p $NUMITERS 1000 100 $OVERDECOMPOSITION; echo $SEPLINE 9 | $CHARMRUN +p$NUMPROCS CHARM++/Transpose/transpose $NUMITERS 2000 64 $OVERDECOMPOSITION; echo $SEPLINE 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/small/rundarwin: -------------------------------------------------------------------------------- 1 | ./scripts/small/runserial 2 | ./scripts/small/runmpi1 3 | ./scripts/small/runfgmpi 4 | ./scripts/small/runopenmp 5 | ./scripts/small/runmpiopenmp 6 | ./scripts/small/runmpirma 7 | ./scripts/small/runshmem 8 | ./scripts/small/runmpishm 9 | ./scripts/small/runupc 10 | -------------------------------------------------------------------------------- /scripts/small/runfenix.in: -------------------------------------------------------------------------------- 1 | NUMPROCS=6 2 | NUMSPARES=2 3 | NUMITERS=10 4 | NUMKILLS=1 5 | FREQ=4 6 | SEPLINE="===============================================================" 7 | MPIRUN=`which mpirun` 8 | 9 | $MPIRUN $ULFMRUNFLAG -np $NUMPROCS FENIX/Stencil/stencil $NUMITERS 1000 $NUMSPARES $NUMKILLS $FREQ 0; echo $SEPLINE 10 | $MPIRUN $ULFMRUNFLAG -np $NUMPROCS FENIX/AMR/amr $NUMITERS 1000 100 1 20 5 1 $NUMSPARES $NUMKILLS $FREQ 0 HIGH_WATER; echo $SEPLINE 11 | $MPIRUN $ULFMRUNFLAG -np $NUMPROCS FENIX/Transpose/transpose $NUMITERS 1000 $NUMSPARES $NUMKILLS $FREQ 0; echo $SEPLINE 12 | $MPIRUN $ULFMRUNFLAG -np $NUMPROCS FENIX/Synch_p2p/p2p $NUMITERS 1000 1000 $NUMSPARES $NUMKILLS $FREQ 0; echo $SEPLINE 13 | -------------------------------------------------------------------------------- /scripts/small/runfgmpi.in: -------------------------------------------------------------------------------- 1 | NUMPROCS=2 2 | NUMTHREADS=2 3 | NUMITERS=10 4 | SEPLINE="===============================================================" 5 | MPIRUN=mpiexec 6 | 7 | for type in vector_go vector_stop no_vector ins_heavy; do 8 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Branch/branch $NUMITERS 1000 $type; echo $SEPLINE 9 | done 10 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/DGEMM/dgemm $NUMITERS 500 32 1; echo $SEPLINE 11 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Nstream/nstream $NUMITERS 2000000 0; echo $SEPLINE 12 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Random/random 16 16; echo $SEPLINE 13 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Reduce/reduce $NUMITERS 2000000; echo $SEPLINE 14 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Sparse/sparse $NUMITERS 10 4; echo $SEPLINE 15 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Stencil/stencil $NUMITERS 1000; echo $SEPLINE 16 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Synch_global/global $NUMITERS 10000; echo $SEPLINE 17 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Synch_p2p/p2p $NUMITERS 1000 100; echo $SEPLINE 18 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Transpose/transpose $NUMITERS 2000 64; echo $SEPLINE 19 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/PIC-static/pic $NUMITERS 1000 1000000 1 2 GEOMETRIC 0.99; echo $SEPLINE 20 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/PIC-static/pic $NUMITERS 1000 1000000 0 1 SINUSOIDAL; echo $SEPLINE 21 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/PIC-static/pic $NUMITERS 1000 1000000 1 0 LINEAR 1.0 3.0; echo $SEPLINE 22 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/PIC-static/pic $NUMITERS 1000 1000000 1 0 PATCH 0 200 100 200; echo $SEPLINE 23 | 24 | 25 | -------------------------------------------------------------------------------- /scripts/small/runfreaks: -------------------------------------------------------------------------------- 1 | ./scripts/small/runcharm++ 2 | ./scripts/small/rungrappa 3 | ./scripts/small/runampi 4 | ./scripts/small/runlegion 5 | 6 | -------------------------------------------------------------------------------- /scripts/small/rungrappa: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMITERS=10 3 | SEPLINE="===============================================================" 4 | MPIRUN=mpiexec.hydra 5 | $MPIRUN -np $NUMPROCS GRAPPA/Stencil/stencil $NUMITERS 1000; echo $SEPLINE 6 | $MPIRUN -np $NUMPROCS GRAPPA/Synch_p2p/p2p $NUMITERS 1000 100; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS GRAPPA/Transpose/transpose $NUMITERS 2000 64; echo $SEPLINE 8 | 9 | -------------------------------------------------------------------------------- /scripts/small/runlegion: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMITERS=10 3 | SEPLINE="===============================================================" 4 | 5 | LEGION/Stencil/stencil $NUMPROCS $NUMITERS 1000 2 -ll:cpu $NUMPROCS; echo $SEPLINE 6 | LEGION/Transpose/transpose $NUMPROCS $NUMITERS 2000 64 -ll:cpu $NUMPROCS; echo $SEPLINE 7 | 8 | 9 | -------------------------------------------------------------------------------- /scripts/small/runmpiopenmp: -------------------------------------------------------------------------------- 1 | NUMPROCS=2 2 | NUMTHREADS=2 3 | NUMITERS=10 4 | SEPLINE="===============================================================" 5 | MPIRUN=mpirun 6 | 7 | $MPIRUN -np $NUMPROCS MPIOPENMP/Nstream/nstream $NUMTHREADS $NUMITERS 2000000 0; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPIOPENMP/Stencil/stencil $NUMTHREADS $NUMITERS 1000; echo $SEPLINE 9 | $MPIRUN -np $NUMPROCS MPIOPENMP/Synch_p2p/p2p $NUMTHREADS $NUMITERS 1000 1000; echo $SEPLINE 10 | $MPIRUN -np $NUMPROCS MPIOPENMP/Transpose/transpose $NUMTHREADS $NUMITERS 2000 64; echo $SEPLINE 11 | -------------------------------------------------------------------------------- /scripts/small/runmpirma: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMITERS=10 3 | SEPLINE="===============================================================" 4 | MPIRUN=mpirun 5 | 6 | $MPIRUN -np $NUMPROCS MPIRMA/Stencil/stencil $NUMITERS 1000; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS MPIRMA/Synch_p2p/p2p $NUMITERS 1000 100; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPIRMA/Transpose/transpose $NUMITERS 2000 64; echo $SEPLINE 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/small/runmpishm: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMSUBPROCS=2 3 | NUMITERS=10 4 | SEPLINE="===============================================================" 5 | MPIRUN=mpirun 6 | 7 | $MPIRUN -np $NUMPROCS MPISHM/Stencil/stencil $NUMSUBPROCS $NUMITERS 1000; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPISHM/Synch_p2p/p2p $NUMITERS 1000 100; echo $SEPLINE 9 | $MPIRUN -np $NUMPROCS MPISHM/Transpose/transpose $NUMSUBPROCS $NUMITERS 1000 64; echo $SEPLINE 10 | -------------------------------------------------------------------------------- /scripts/small/runopenmp: -------------------------------------------------------------------------------- 1 | NUMTHREADS=4 2 | NUMITERS=10 3 | SEPLINE="===============================================================" 4 | 5 | for type in vector_go vector_stop no_vector ins_heavy; do 6 | OPENMP/Branch/branch $NUMTHREADS $NUMITERS 1000 $type; echo $SEPLINE 7 | done 8 | OPENMP/DGEMM/dgemm $NUMTHREADS $NUMITERS 500 32; echo $SEPLINE 9 | OPENMP/Nstream/nstream $NUMTHREADS $NUMITERS 2000000 0; echo $SEPLINE 10 | OPENMP/Random/random $NUMTHREADS 16 16 4; echo $SEPLINE 11 | for ALGORITHM in linear binary-barrier binary-p2p long-optimal; do 12 | OPENMP/Reduce/reduce $NUMTHREADS $NUMITERS 2000000 $ALGORITHM; echo $SEPLINE 13 | done 14 | OPENMP/Refcount/refcount $NUMTHREADS 2000000 100; echo $SEPLINE 15 | OPENMP/Sparse/sparse $NUMTHREADS $NUMITERS 10 4; echo $SEPLINE 16 | OPENMP/Stencil/stencil $NUMTHREADS $NUMITERS 1000; echo $SEPLINE 17 | OPENMP/Synch_global/global $NUMTHREADS $NUMITERS 10000; echo $SEPLINE 18 | OPENMP/Synch_p2p/p2p $NUMTHREADS $NUMITERS 1000 100; echo $SEPLINE 19 | OPENMP/Transpose/transpose $NUMTHREADS $NUMITERS 2000 64; echo $SEPLINE 20 | OPENMP/PIC/pic $NUMTHREADS $NUMITERS 1000 1000000 1 2 GEOMETRIC 0.99; echo $SEPLINE 21 | OPENMP/PIC/pic $NUMTHREADS $NUMITERS 1000 1000000 0 1 SINUSOIDAL; echo $SEPLINE 22 | OPENMP/PIC/pic $NUMTHREADS $NUMITERS 1000 1000000 1 0 LINEAR 1.0 3.0; echo $SEPLINE 23 | OPENMP/PIC/pic $NUMTHREADS $NUMITERS 1000 1000000 1 0 PATCH 0 200 100 200; echo $SEPLINE 24 | -------------------------------------------------------------------------------- /scripts/small/runserial: -------------------------------------------------------------------------------- 1 | NUMITERS=10 2 | SEPLINE="===============================================================" 3 | 4 | for type in vector_go vector_stop no_vector ins_heavy; do 5 | SERIAL/Branch/branch $NUMITERS 1000 $type; echo $SEPLINE 6 | done 7 | SERIAL/DGEMM/dgemm $NUMITERS 500 32; echo $SEPLINE 8 | SERIAL/Nstream/nstream $NUMITERS 2000000 0; echo $SEPLINE 9 | SERIAL/Random/random 16 16 4; echo $SEPLINE 10 | SERIAL/Reduce/reduce $NUMITERS 2000000; echo $SEPLINE 11 | SERIAL/Sparse/sparse $NUMITERS 10 4; echo $SEPLINE 12 | SERIAL/Stencil/stencil $NUMITERS 1000; echo $SEPLINE 13 | SERIAL/Synch_p2p/p2p $NUMITERS 1000 100; echo $SEPLINE 14 | SERIAL/Transpose/transpose $NUMITERS 2000 64; echo $SEPLINE 15 | SERIAL/PIC/pic $NUMITERS 1000 1000000 1 2 GEOMETRIC 0.99; echo $SEPLINE 16 | SERIAL/PIC/pic $NUMITERS 1000 1000000 0 1 SINUSOIDAL; echo $SEPLINE 17 | SERIAL/PIC/pic $NUMITERS 1000 1000000 1 0 LINEAR 1.0 3.0; echo $SEPLINE 18 | SERIAL/PIC/pic $NUMITERS 1000 1000000 1 0 PATCH 0 200 100 200; echo $SEPLINE 19 | SERIAL/AMR/amr $NUMITERS 1000 100 2 2 1 5; echo $SEPLINE 20 | -------------------------------------------------------------------------------- /scripts/small/runshmem: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMITERS=10 3 | SEPLINE="===============================================================" 4 | MPIRUN=mpirun 5 | 6 | $MPIRUN -np $NUMPROCS SHMEM/Stencil/stencil $NUMITERS 1000; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS SHMEM/Synch_p2p/p2p $NUMITERS 1000 100; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS SHMEM/Transpose/transpose $NUMITERS 2000 64; echo $SEPLINE 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/small/runupc: -------------------------------------------------------------------------------- 1 | NUMTHREADS=4 2 | NUMITERS=10 3 | SEPLINE="===============================================================" 4 | 5 | UPC/Stencil/stencil -n $NUMTHREADS $NUMITERS 1000; echo $SEPLINE 6 | UPC/Synch_p2p/p2p -n $NUMTHREADS $NUMITERS 1000 1000; echo $SEPLINE 7 | UPC/Transpose/transpose -n $NUMTHREADS $NUMITERS 2000 64; echo $SEPLINE 8 | -------------------------------------------------------------------------------- /scripts/wide/Makefile_FG_MPI: -------------------------------------------------------------------------------- 1 | include ../../common/make.defs 2 | runfgmpi: runfgmpi.in 3 | @echo PATH=$(FGMPITOP)/bin:$(PATH) > runfgmpi 4 | @echo LD_LIBRARY_PATH=$(FGMPITOP)/lib:$(LD_LIBRARY_PATH) >> runfgmpi 5 | @cat runfgmpi.in >> runfgmpi 6 | @chmod u+x runfgmpi 7 | 8 | veryclean: 9 | rm -f runfgmpi 10 | -------------------------------------------------------------------------------- /scripts/wide/runall: -------------------------------------------------------------------------------- 1 | ./scripts/wide/rundarwin 2 | ./scripts/wide/runfreaks 3 | -------------------------------------------------------------------------------- /scripts/wide/runcharm++: -------------------------------------------------------------------------------- 1 | NUMPROCS=2 2 | OVERDECOMPOSITION=2 3 | NUMITERS=1 4 | SEPLINE="===============================================================" 5 | CHARMRUN=CHARM++/Stencil/charmrun 6 | 7 | $CHARMRUN +p$NUMPROCS CHARM++/Stencil/stencil $NUMITERS 50000 $OVERDECOMPOSITION; echo $SEPLINE 8 | $CHARMRUN +p$NUMPROCS CHARM++/Synch_p2p/p2p $NUMITERS 70000 70000 $OVERDECOMPOSITION; echo $SEPLINE 9 | $CHARMRUN +p$NUMPROCS CHARM++/Transpose/transpose $NUMITERS 50000 64 $OVERDECOMPOSITION; echo $SEPLINE 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/wide/rundarwin: -------------------------------------------------------------------------------- 1 | ./scripts/wide/runserial 2 | ./scripts/wide/runmpi1 3 | ./scripts/wide/runfgmpi 4 | ./scripts/wide/runopenmp 5 | ./scripts/wide/runmpiopenmp 6 | ./scripts/wide/runmpirma 7 | ./scripts/wide/runshmem 8 | ./scripts/wide/runmpishm 9 | ./scripts/wide/runupc 10 | -------------------------------------------------------------------------------- /scripts/wide/runfgmpi.in: -------------------------------------------------------------------------------- 1 | NUMPROCS=2 2 | NUMTHREADS=2 3 | NUMITERS=1 4 | SEPLINE="===============================================================" 5 | MPIRUN=mpiexec 6 | 7 | #$MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/DGEMM/dgemm $NUMITERS -50000 32 1; echo $SEPLINE 8 | #$MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Nstream/nstream $NUMITERS 2000000000L 0; echo $SEPLINE 9 | #$MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Random/random $NUMITERS 32; echo $SEPLINE 10 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Reduce/reduce $NUMITERS 1000000000L; echo $SEPLINE 11 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Sparse/sparse $NUMITERS 13 7; echo $SEPLINE 12 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Stencil/stencil $NUMITERS 50000; echo $SEPLINE 13 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Synch_global/global $NUMITERS 2000000000L; echo $SEPLINE 14 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Synch_p2p/p2p $NUMITERS 70000 70000; echo $SEPLINE 15 | $MPIRUN -np $NUMPROCS -nfg $NUMTHREADS FG_MPI/Transpose/transpose $NUMITERS 50000 64; echo $SEPLINE 16 | -------------------------------------------------------------------------------- /scripts/wide/runfreaks: -------------------------------------------------------------------------------- 1 | ./scripts/wide/runcharm++ 2 | ./scripts/wide/runampi 3 | ./scripts/wide/rungrappa 4 | -------------------------------------------------------------------------------- /scripts/wide/rungrappa: -------------------------------------------------------------------------------- 1 | NUMITERS=1 2 | NUMPROCS=2 3 | SEPLINE="===============================================================" 4 | MPIRUN=mpiexec.hydra 5 | 6 | $MPIRUN -np $NUMPROCS GRAPPA/Stencil/stencil $NUMITERS 39000; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS GRAPPA/Synch_p2p/p2p $NUMITERS 65000 65000; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS GRAPPA/Transpose/transpose $NUMITERS 24000 64; echo $SEPLINE 9 | 10 | -------------------------------------------------------------------------------- /scripts/wide/runmpi1: -------------------------------------------------------------------------------- 1 | NUMITERS=1 2 | SEPLINE="===============================================================" 3 | MPIRUN=mpirun 4 | 5 | for NUMPROCS in 1 2; do 6 | $MPIRUN -np $NUMPROCS MPI1/DGEMM/dgemm $NUMITERS -50000 32 1; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS MPI1/Nstream/nstream $NUMITERS 2000000000L 0; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPI1/Random/random $NUMITERS 32; echo $SEPLINE 9 | $MPIRUN -np $NUMPROCS MPI1/Reduce/reduce $NUMITERS 2000000000L; echo $SEPLINE 10 | $MPIRUN -np $NUMPROCS MPI1/Sparse/sparse $NUMITERS 13 7; echo $SEPLINE 11 | $MPIRUN -np $NUMPROCS MPI1/Stencil/stencil $NUMITERS 50000; echo $SEPLINE 12 | $MPIRUN -np $NUMPROCS MPI1/Synch_global/global $NUMITERS 2000000000L; echo $SEPLINE 13 | $MPIRUN -np $NUMPROCS MPI1/Synch_p2p/p2p $NUMITERS 70000 70000; echo $SEPLINE 14 | $MPIRUN -np $NUMPROCS MPI1/Transpose/transpose $NUMITERS 50000 64; echo $SEPLINE 15 | done 16 | 17 | 18 | -------------------------------------------------------------------------------- /scripts/wide/runmpiopenmp: -------------------------------------------------------------------------------- 1 | NUMPROCS=2 2 | NUMTHREADS=2 3 | NUMITERS=1 4 | SEPLINE="===============================================================" 5 | MPIRUN=mpirun 6 | 7 | $MPIRUN -np $NUMPROCS MPIOPENMP/Nstream/nstream $NUMTHREADS $NUMITERS 2000000000L 0; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPIOPENMP/Stencil/stencil $NUMTHREADS $NUMITERS 50000; echo $SEPLINE 9 | $MPIRUN -np $NUMPROCS MPIOPENMP/Synch_p2p/p2p $NUMTHREADS $NUMITERS 70000 70000; echo $SEPLINE 10 | $MPIRUN -np $NUMPROCS MPIOPENMP/Transpose/transpose $NUMTHREADS $NUMITERS 50000 64; echo $SEPLINE 11 | -------------------------------------------------------------------------------- /scripts/wide/runmpirma: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMITERS=1 3 | SEPLINE="===============================================================" 4 | MPIRUN=mpirun 5 | 6 | $MPIRUN -np $NUMPROCS MPIRMA/Stencil/stencil $NUMITERS 50000; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS MPIRMA/Synch_p2p/p2p $NUMITERS 70000 70000; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPIRMA/Transpose/transpose $NUMITERS 46000 64; echo $SEPLINE 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/wide/runmpishm: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMSUBPROCS=2 3 | NUMITERS=1 4 | SEPLINE="===============================================================" 5 | MPIRUN=mpirun 6 | #we would like to run larger problems, but are limited by a bug in MPICH's MPI_Win_create_shared 7 | $MPIRUN -np $NUMPROCS MPISHM/Stencil/stencil $NUMSUBPROCS $NUMITERS 45000; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS MPISHM/Synch_p2p/p2p $NUMITERS 60000 60000; echo $SEPLINE 9 | $MPIRUN -np $NUMPROCS MPISHM/Transpose/transpose $NUMSUBPROCS $NUMITERS 37000 64; echo $SEPLINE 10 | -------------------------------------------------------------------------------- /scripts/wide/runopenmp: -------------------------------------------------------------------------------- 1 | NUMTHREADS=4 2 | NUMITERS=1 3 | SEPLINE="===============================================================" 4 | 5 | OPENMP/DGEMM/dgemm $NUMTHREADS $NUMITERS -50000 32; echo $SEPLINE 6 | OPENMP/Nstream/nstream $NUMTHREADS $NUMITERS 2000000000L 0; echo $SEPLINE 7 | OPENMP/Random/random $NUMTHREADS 32 4 4; echo $SEPLINE 8 | for ALGORITHM in linear binary-barrier binary-p2p long-optimal; do 9 | OPENMP/Reduce/reduce $NUMTHREADS $NUMITERS 1000000000L $ALGORITHM; echo $SEPLINE 10 | done 11 | OPENMP/Sparse/sparse $NUMTHREADS $NUMITERS 13 7; echo $SEPLINE 12 | OPENMP/Stencil/stencil $NUMTHREADS $NUMITERS 46000; echo $SEPLINE 13 | OPENMP/Synch_p2p/p2p $NUMTHREADS $NUMITERS 70000 70000; echo $SEPLINE 14 | OPENMP/Transpose/transpose $NUMTHREADS $NUMITERS 50000 64; echo $SEPLINE 15 | -------------------------------------------------------------------------------- /scripts/wide/runserial: -------------------------------------------------------------------------------- 1 | NUMITERS=1 2 | SEPLINE="===============================================================" 3 | 4 | SERIAL/DGEMM/dgemm $NUMITERS -50000 32; echo $SEPLINE 5 | SERIAL/Nstream/nstream $NUMITERS 2000000000L 0; echo $SEPLINE 6 | SERIAL/Random/random $NUMITERS 32 8; echo $SEPLINE 7 | SERIAL/Reduce/reduce $NUMITERS 2000000000L; echo $SEPLINE 8 | SERIAL/Sparse/sparse $NUMITERS 13 7; echo $SEPLINE 9 | SERIAL/Stencil/stencil $NUMITERS 50000; echo $SEPLINE 10 | SERIAL/Synch_p2p/p2p $NUMITERS 70000 70000; echo $SEPLINE 11 | SERIAL/Transpose/transpose $NUMITERS 50000 64; echo $SEPLINE 12 | -------------------------------------------------------------------------------- /scripts/wide/runshmem: -------------------------------------------------------------------------------- 1 | NUMPROCS=4 2 | NUMITERS=1 3 | SEPLINE="===============================================================" 4 | MPIRUN=mpirun 5 | 6 | $MPIRUN -np $NUMPROCS SHMEM/Stencil/stencil $NUMITERS 50000; echo $SEPLINE 7 | $MPIRUN -np $NUMPROCS SHMEM/Synch_p2p/p2p $NUMITERS 70000 70000; echo $SEPLINE 8 | $MPIRUN -np $NUMPROCS SHMEM/Transpose/transpose $NUMITERS 50000 64; echo $SEPLINE 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/wide/runupc: -------------------------------------------------------------------------------- 1 | NUMTHREADS=4 2 | NUMITERS=1 3 | SEPLINE="===============================================================" 4 | 5 | UPC/Stencil/stencil -n $NUMTHREADS $NUMITERS 40000; echo $SEPLINE 6 | UPC/Synch_p2p/p2p -n $NUMTHREADS $NUMITERS 60000 60000; echo $SEPLINE 7 | UPC/Transpose/transpose -n $NUMTHREADS $NUMITERS 40000 64; echo $SEPLINE 8 | --------------------------------------------------------------------------------