├── .gitignore
├── CMakeLists.txt
├── Doxyfile.cmake
├── LICENSE.txt
├── README.md
├── TODO
├── cmake
    └── modules
    │   ├── FindOpenCL.cmake
    │   └── FindROSE.cmake
├── common
    ├── CMakeLists.txt
    └── config.h.cmake
├── config
    └── sample1.lua
├── docs
    ├── compilation.md
    ├── design.org
    ├── developer-guide.rst
    ├── install.md
    ├── programming.md
    ├── rose.md
    └── unfiled_notes.rst
├── examples
    ├── CMakeLists.txt
    ├── copy
    │   └── copy3d.c
    ├── diffusion-benchmark
    │   ├── CMakeLists.txt
    │   ├── Makefile.cmake
    │   ├── README
    │   ├── autotune.conf
    │   ├── baseline.cc
    │   ├── baseline.h
    │   ├── diffusion3d.cc
    │   ├── diffusion3d.h
    │   ├── diffusion3d.mic.c
    │   ├── diffusion3d_cuda.cu
    │   ├── diffusion3d_cuda.h
    │   ├── diffusion3d_cuda_shared.cu
    │   ├── diffusion3d_cuda_temporal_blocking.cu
    │   ├── diffusion3d_cuda_temporal_blocking.h
    │   ├── diffusion3d_mic.cc
    │   ├── diffusion3d_mic.h
    │   ├── diffusion3d_openmp.cc
    │   ├── diffusion3d_openmp.h
    │   ├── diffusion3d_openmp_temporal_blocking.cc
    │   ├── diffusion3d_openmp_temporal_blocking.h
    │   ├── diffusion3d_physis.c
    │   ├── diffusion3d_physis.h
    │   ├── main.cc
    │   ├── opt.conf
    │   └── stopwatch.h
    ├── diffusion-fortran
    │   ├── README
    │   ├── diffusion3d_fortran.F90
    │   └── diffusion3d_original.c
    ├── himeno
    │   ├── CMakeLists.txt
    │   ├── Makefile.cmake
    │   ├── autotune.conf
    │   ├── himenobmtxpa_original.c
    │   ├── himenobmtxpa_physis.c
    │   ├── opt.conf
    │   └── physis.conf
    ├── test_double_buffering.c
    ├── test_global_variable.c
    ├── test_periodic_boundary.c
    ├── test_set.c
    └── test_staggered_grid.c
├── include
    └── physis
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── config.h.cmake
    │   ├── fortran
    │       ├── physis.F90
    │       └── physis.h
    │   ├── internal_common.h
    │   ├── math.h
    │   ├── physis.F90
    │   ├── physis.h
    │   ├── physis_common.h
    │   ├── physis_cuda.h
    │   ├── physis_cuda_hm.h
    │   ├── physis_mpi.h
    │   ├── physis_mpi_cuda.h
    │   ├── physis_mpi_opencl.h
    │   ├── physis_mpi_opencl_device.h
    │   ├── physis_mpi_openmp.h
    │   ├── physis_opencl.h
    │   ├── physis_opencl_kernel.h
    │   ├── physis_ref.h
    │   ├── physis_user.F90
    │   ├── physis_user.h
    │   ├── physis_util.h
    │   ├── reduce.h
    │   ├── runtime.h
    │   ├── stopwatch.h
    │   └── types.h
├── misc
    ├── add_source_header.py
    ├── cpplint.py
    ├── google-c-style.el
    ├── opencl
    │   ├── test-runtime-05-orig.c
    │   ├── test-runtime-05-orig.cuda.cu
    │   ├── test-runtime-05-orig.ref.c
    │   └── test-runtime-05.c
    ├── rose-build.sh
    ├── source_header.txt
    └── valgrind-suppressions.supp
├── runtime
    ├── CMakeLists.txt
    ├── buffer.cc
    ├── buffer.h
    ├── buffer_cuda.cu
    ├── buffer_cuda.h
    ├── buffer_mpi_openmp.cc
    ├── buffer_mpi_openmp.h
    ├── buffer_mpi_openmp_numa.cc
    ├── buffer_opencl.cc
    ├── buffer_opencl.h
    ├── common.cc
    ├── cub-1.3.2
    │   ├── .settings
    │   │   ├── org.eclipse.cdt.codan.core.prefs
    │   │   ├── org.eclipse.cdt.core.prefs
    │   │   ├── org.eclipse.cdt.ui.prefs
    │   │   └── org.eclipse.core.runtime.prefs
    │   ├── LICENSE.TXT
    │   ├── README.md
    │   └── cub
    │   │   ├── block
    │   │       ├── block_discontinuity.cuh
    │   │       ├── block_exchange.cuh
    │   │       ├── block_histogram.cuh
    │   │       ├── block_load.cuh
    │   │       ├── block_radix_rank.cuh
    │   │       ├── block_radix_sort.cuh
    │   │       ├── block_raking_layout.cuh
    │   │       ├── block_reduce.cuh
    │   │       ├── block_scan.cuh
    │   │       ├── block_shift.cuh
    │   │       ├── block_store.cuh
    │   │       └── specializations
    │   │       │   ├── block_histogram_atomic.cuh
    │   │       │   ├── block_histogram_sort.cuh
    │   │       │   ├── block_reduce_raking.cuh
    │   │       │   ├── block_reduce_raking_commutative_only.cuh
    │   │       │   ├── block_reduce_warp_reductions.cuh
    │   │       │   ├── block_scan_raking.cuh
    │   │       │   └── block_scan_warp_scans.cuh
    │   │   ├── block_range
    │   │       ├── block_range_histo.cuh
    │   │       ├── block_range_radix_sort_downsweep.cuh
    │   │       ├── block_range_radix_sort_upsweep.cuh
    │   │       ├── block_range_reduce.cuh
    │   │       ├── block_range_reduce_by_key.cuh
    │   │       ├── block_range_scan.cuh
    │   │       ├── block_range_select.cuh
    │   │       ├── block_scan_prefix_operators.cuh
    │   │       └── specializations
    │   │       │   ├── block_range_histo_gatomic.cuh
    │   │       │   ├── block_range_histo_satomic.cuh
    │   │       │   └── block_range_histo_sort.cuh
    │   │   ├── cub.cuh
    │   │   ├── device
    │   │       ├── device_histogram.cuh
    │   │       ├── device_partition.cuh
    │   │       ├── device_radix_sort.cuh
    │   │       ├── device_reduce.cuh
    │   │       ├── device_scan.cuh
    │   │       ├── device_select.cuh
    │   │       └── dispatch
    │   │       │   ├── device_histogram_dispatch.cuh
    │   │       │   ├── device_radix_sort_dispatch.cuh
    │   │       │   ├── device_reduce_by_key_dispatch.cuh
    │   │       │   ├── device_reduce_dispatch.cuh
    │   │       │   ├── device_scan_dispatch.cuh
    │   │       │   └── device_select_dispatch.cuh
    │   │   ├── grid
    │   │       ├── grid_barrier.cuh
    │   │       ├── grid_even_share.cuh
    │   │       ├── grid_mapping.cuh
    │   │       └── grid_queue.cuh
    │   │   ├── host
    │   │       └── spinlock.cuh
    │   │   ├── iterator
    │   │       ├── arg_index_input_iterator.cuh
    │   │       ├── cache_modified_input_iterator.cuh
    │   │       ├── cache_modified_output_iterator.cuh
    │   │       ├── constant_input_iterator.cuh
    │   │       ├── counting_input_iterator.cuh
    │   │       ├── tex_obj_input_iterator.cuh
    │   │       ├── tex_ref_input_iterator.cuh
    │   │       └── transform_input_iterator.cuh
    │   │   ├── thread
    │   │       ├── thread_load.cuh
    │   │       ├── thread_operators.cuh
    │   │       ├── thread_reduce.cuh
    │   │       ├── thread_scan.cuh
    │   │       └── thread_store.cuh
    │   │   ├── util_allocator.cuh
    │   │   ├── util_arch.cuh
    │   │   ├── util_debug.cuh
    │   │   ├── util_device.cuh
    │   │   ├── util_macro.cuh
    │   │   ├── util_namespace.cuh
    │   │   ├── util_ptx.cuh
    │   │   ├── util_type.cuh
    │   │   └── warp
    │   │       ├── specializations
    │   │           ├── warp_reduce_shfl.cuh
    │   │           ├── warp_reduce_smem.cuh
    │   │           ├── warp_scan_shfl.cuh
    │   │           └── warp_scan_smem.cuh
    │   │       ├── warp_reduce.cuh
    │   │       └── warp_scan.cuh
    ├── cuda_util.h
    ├── grid.cc
    ├── grid.h
    ├── grid_mpi.cc
    ├── grid_mpi.h
    ├── grid_mpi_cuda.cc
    ├── grid_mpi_cuda.h
    ├── grid_mpi_cuda_debug_util.h
    ├── grid_mpi_cuda_exp.cc
    ├── grid_mpi_cuda_exp.h
    ├── grid_mpi_debug_util.h
    ├── grid_mpi_opencl.cc
    ├── grid_mpi_opencl.h
    ├── grid_mpi_openmp.cc
    ├── grid_mpi_openmp.h
    ├── grid_mpi_openmp_misc.cc
    ├── grid_space_mpi.h
    ├── grid_space_mpi_cuda.h
    ├── grid_util.cc
    ├── grid_util.h
    ├── grid_util_mpi_openmp.cc
    ├── grid_util_mpi_openmp.h
    ├── ipc.h
    ├── ipc_mpi.cc
    ├── ipc_mpi.h
    ├── libphysis_rt_cuda.cc
    ├── libphysis_rt_cuda_hm.cc
    ├── libphysis_rt_mpi.cc
    ├── libphysis_rt_mpi_cuda.cc
    ├── libphysis_rt_mpi_opencl.cc
    ├── libphysis_rt_mpi_opencl_extra.cc
    ├── libphysis_rt_mpi_openmp.cc
    ├── libphysis_rt_mpi_openmp_numa.cc
    ├── libphysis_rt_opencl.cc
    ├── libphysis_rt_ref.cc
    ├── mpi_opencl_runtime.h
    ├── mpi_openmp_runtime.h
    ├── mpi_runtime_common.cc
    ├── mpi_runtime_common.h
    ├── mpi_util.h
    ├── mpi_wrapper.cc
    ├── mpi_wrapper.h
    ├── opencl_gridcp.cc
    ├── opencl_gridinit.cc
    ├── opencl_kernelinit.cc
    ├── opencl_misc.cc
    ├── opencl_psinit.cc
    ├── opencl_runtime.h
    ├── proc.cc
    ├── proc.h
    ├── reduce.h
    ├── reduce_cuda.cu
    ├── reduce_cuda.h
    ├── reduce_grid_mpi_cuda_exp.cu
    ├── reduce_grid_mpi_cuda_exp.h
    ├── reduce_mpi_cuda.cu
    ├── rpc.h
    ├── rpc_cuda.cc
    ├── rpc_cuda.h
    ├── rpc_mpi.cc
    ├── rpc_mpi.h
    ├── rpc_mpi_cuda.cc
    ├── rpc_mpi_cuda.h
    ├── rpc_mpi_opencl.cc
    ├── rpc_mpi_opencl.h
    ├── rpc_mpi_openmp.cc
    ├── rpc_mpi_openmp.h
    ├── rpc_opencl.h
    ├── rpc_opencl_common.h
    ├── rpc_opencl_mpi.cc
    ├── rpc_opencl_mpi.h
    ├── runtime.h
    ├── runtime_common.cc
    ├── runtime_common.h
    ├── runtime_common_cuda.h
    ├── runtime_cuda.h
    ├── runtime_cuda_hm.cc
    ├── runtime_cuda_hm.h
    ├── runtime_mpi.cc
    ├── runtime_mpi.h
    ├── runtime_mpi_cuda.cc
    ├── runtime_mpi_cuda.h
    ├── runtime_ref.cc
    ├── runtime_ref.h
    ├── tests
    │   ├── CMakeLists.txt
    │   ├── test_buffer.cc
    │   ├── test_buffer_cuda.cc
    │   ├── test_grid_mpi.cc
    │   ├── test_grid_mpi_cuda_exp.cc
    │   ├── test_grid_mpi_cuda_exp_utype.cc
    │   ├── test_mpi_cuda_runtime.cc
    │   ├── test_mpi_runtime_2d.cc
    │   ├── test_mpi_runtime_3d.cc
    │   └── test_physis_rt_mpi.c
    ├── timing.cc
    └── timing.h
├── tests
    ├── CMakeLists.txt
    ├── gmock
    │   ├── COPYING
    │   ├── README
    │   ├── gmock-gtest-all.cc
    │   ├── gmock
    │   │   └── gmock.h
    │   ├── gmock_main.cc
    │   └── gtest
    │   │   └── gtest.h
    └── system_tests
    │   ├── CMakeLists.txt
    │   ├── run_system_tests.sh.cmake
    │   └── test_cases
    │       ├── CMakeLists.txt
    │       ├── test_01.c
    │       ├── test_02.c
    │       ├── test_03.c
    │       ├── test_08.c
    │       ├── test_09.c
    │       ├── test_10.c
    │       ├── test_15.c
    │       ├── test_15.manual.cuda.cu
    │       ├── test_15.manual.ref.c
    │       ├── test_16.c
    │       ├── test_16.manual.cuda.cu
    │       ├── test_16.manual.ref.c
    │       ├── test_27-pt-periodic.c
    │       ├── test_27-pt-periodic.manual.cuda.cu
    │       ├── test_27-pt-periodic.manual.ref.c
    │       ├── test_27-pt-reduction.c
    │       ├── test_27-pt-reduction.manual.cuda.cu
    │       ├── test_27-pt-reduction.manual.ref.c
    │       ├── test_27-pt.c
    │       ├── test_27-pt.manual.cuda.cu
    │       ├── test_27-pt.manual.ref.c
    │       ├── test_3-pt-1d.c
    │       ├── test_3-pt-1d.manual.cuda.cu
    │       ├── test_3-pt-1d.manual.ref.c
    │       ├── test_3-pt-periodic.c
    │       ├── test_3-pt-periodic.manual.cuda.cu
    │       ├── test_3-pt-periodic.manual.ref.c
    │       ├── test_5-pt-2d.c
    │       ├── test_5-pt-2d.manual.cuda.cu
    │       ├── test_5-pt-2d.manual.ref.c
    │       ├── test_5-pt-periodic.c
    │       ├── test_5-pt-periodic.manual.cuda.cu
    │       ├── test_5-pt-periodic.manual.ref.c
    │       ├── test_7-pt-double-type.c
    │       ├── test_7-pt-double-type.manual.cuda.cu
    │       ├── test_7-pt-double-type.manual.ref.c
    │       ├── test_7-pt-int-type.c
    │       ├── test_7-pt-int-type.manual.ref.c
    │       ├── test_7-pt-multi-iterations.c
    │       ├── test_7-pt-multi-iterations.manual.cuda.cu
    │       ├── test_7-pt-multi-iterations.manual.ref.c
    │       ├── test_7-pt-neumann-cond.c
    │       ├── test_7-pt-neumann-cond.manual.cuda.cu
    │       ├── test_7-pt-neumann-cond.manual.ref.c
    │       ├── test_7-pt-periodic.c
    │       ├── test_7-pt-periodic.manual.cuda.cu
    │       ├── test_7-pt-periodic.manual.ref.c
    │       ├── test_7-pt-type-mix.c
    │       ├── test_7-pt-type-mix.manual.cuda.cu
    │       ├── test_7-pt-type-mix.manual.ref.c
    │       ├── test_7-pt.c
    │       ├── test_7-pt.manual.cuda.cu
    │       ├── test_7-pt.manual.ref.c
    │       ├── test_7-pt.module.c
    │       ├── test_7-pt.module_base.c
    │       ├── test_9-pt-2d.c
    │       ├── test_9-pt-2d.manual.cuda.cu
    │       ├── test_9-pt-2d.manual.ref.c
    │       ├── test_9-pt-periodic-reduction.c
    │       ├── test_9-pt-periodic-reduction.manual.ref.c
    │       ├── test_9-pt-reduction.c
    │       ├── test_9-pt-reduction.manual.ref.c
    │       ├── test_asymmetric-periodic.c
    │       ├── test_asymmetric-periodic.manual.cuda.cu
    │       ├── test_asymmetric-periodic.manual.ref.c
    │       ├── test_asymmetric.c
    │       ├── test_asymmetric.manual.cuda.cu
    │       ├── test_asymmetric.manual.ref.c
    │       ├── test_cplusplus.cc
    │       ├── test_mixed-dim.c
    │       ├── test_mixed-dim.manual.cuda.cu
    │       ├── test_mixed-dim.manual.ref.c
    │       ├── test_mixed-dim2.c
    │       ├── test_mixed-dim2.manual.cuda.cu
    │       ├── test_mixed-dim2.manual.ref.c
    │       ├── test_mixed-dim3.c
    │       ├── test_mixed-dim3.manual.cuda.cu
    │       ├── test_mixed-dim3.manual.ref.c
    │       ├── test_multi-kernels.c
    │       ├── test_param_name.c
    │       ├── test_redblack-periodic.c
    │       ├── test_redblack-periodic.manual.cuda.cu
    │       ├── test_redblack-periodic.manual.ref.c
    │       ├── test_redblack-separated.c
    │       ├── test_redblack.c
    │       ├── test_redblack.manual.cuda.cu
    │       ├── test_redblack.manual.ref.c
    │       ├── test_reduction-2d.c
    │       ├── test_reduction-3d-int.c
    │       ├── test_reduction-3d-long.c
    │       ├── test_reduction-3d-max.c
    │       ├── test_reduction-3d-min.c
    │       ├── test_reduction-3d-prod.c
    │       ├── test_reduction-3d-sum.c
    │       ├── test_reduction-3d-sum.manual.cuda.cu
    │       ├── test_reduction-3d-sum.manual.ref.c
    │       ├── test_stencil-hole.c
    │       ├── test_stencil-hole.manual.cuda.cu
    │       ├── test_stencil-hole.manual.ref.c
    │       ├── test_user-defined-type-7-pt-periodic-complex.c
    │       ├── test_user-defined-type-7-pt-periodic-complex.manual.cuda.cu
    │       ├── test_user-defined-type-7-pt-periodic-complex.manual.ref.c
    │       ├── test_user-defined-type-7-pt-periodic.c
    │       ├── test_user-defined-type-7-pt-periodic.manual.cuda.cu
    │       ├── test_user-defined-type-7-pt-periodic.manual.ref.c
    │       ├── test_user-defined-type-7-pt.c
    │       ├── test_user-defined-type-7-pt.manual.cuda.cu
    │       ├── test_user-defined-type-7-pt.manual.ref.c
    │       ├── test_user-defined-type-array-member-copy.c
    │       ├── test_user-defined-type-copyin-copyout-two-members.c
    │       ├── test_user-defined-type-copyin-copyout.c
    │       ├── test_user-defined-type-kernel-copy.c
    │       ├── test_user-defined-type-multi-dim-member.c
    │       ├── test_user-defined-type-multi-dim-member.manual.cuda.cu
    │       ├── test_user-defined-type-multi-dim-member.manual.ref.c
    │       ├── test_user-defined-type-multi-members.c
    │       ├── test_user-defined-type-multi-members.manual.cuda.cu
    │       ├── test_user-defined-type-multi-members.manual.ref.c
    │       ├── test_user-defined-type-transpose.c
    │       ├── test_user-defined-type1.c
    │       ├── test_user-defined-type1.manual.cuda.cu
    │       ├── test_user-defined-type1.manual.ref.c
    │       ├── test_user-defined-type2.c
    │       ├── test_user-defined-type3.c
    │       ├── test_user-defined-type3.manual.cuda.cu
    │       ├── test_user-defined-type3.manual.ref.c
    │       ├── test_user-defined-type5.c
    │       ├── test_user-defined-type5.manual.cuda.cu
    │       └── test_user-defined-type5.manual.ref.c
├── translator
    ├── CMakeLists.txt
    ├── alias_analysis.cc
    ├── alias_analysis.h
    ├── ast_processing.cc
    ├── ast_processing.h
    ├── ast_traversal.h
    ├── builder_interface.cc
    ├── builder_interface.h
    ├── config.h.cmake
    ├── configuration.cc
    ├── configuration.h
    ├── cuda_builder_interface.h
    ├── cuda_hm_runtime_builder.cc
    ├── cuda_hm_runtime_builder.h
    ├── cuda_hm_translator.cc
    ├── cuda_hm_translator.h
    ├── cuda_runtime_builder.cc
    ├── cuda_runtime_builder.h
    ├── cuda_translator.cc
    ├── cuda_translator.h
    ├── cuda_util.cc
    ├── cuda_util.h
    ├── def_analysis.cc
    ├── def_analysis.h
    ├── domain.cc
    ├── domain.h
    ├── fortran_output_fix.cc
    ├── fortran_output_fix.h
    ├── grid.cc
    ├── grid.h
    ├── kernel.cc
    ├── kernel.h
    ├── map.cc
    ├── map.h
    ├── mpi_builder_interface.h
    ├── mpi_cuda_optimizer.cc
    ├── mpi_cuda_optimizer.h
    ├── mpi_cuda_runtime_builder.cc
    ├── mpi_cuda_runtime_builder.h
    ├── mpi_cuda_translator.cc
    ├── mpi_cuda_translator.h
    ├── mpi_opencl_create_kernel.cc
    ├── mpi_opencl_create_kernel_body.cc
    ├── mpi_opencl_create_kernel_call.cc
    ├── mpi_opencl_create_kernel_misc.cc
    ├── mpi_opencl_create_kernel_multi.cc
    ├── mpi_opencl_edit_kernel.cc
    ├── mpi_opencl_optimizer.cc
    ├── mpi_opencl_optimizer.h
    ├── mpi_opencl_runtime_builder.cc
    ├── mpi_opencl_runtime_builder.h
    ├── mpi_opencl_stencilmap.cc
    ├── mpi_opencl_stencilrun.cc
    ├── mpi_opencl_translator.cc
    ├── mpi_opencl_translator.h
    ├── mpi_openmp_create_kernel.cc
    ├── mpi_openmp_init.cc
    ├── mpi_openmp_translator.cc
    ├── mpi_openmp_translator.h
    ├── mpi_runtime_builder.cc
    ├── mpi_runtime_builder.h
    ├── mpi_translator.cc
    ├── mpi_translator.h
    ├── opencl_translator.cc
    ├── opencl_translator.h
    ├── opencl_translator_arghack.cc
    ├── opencl_translator_consistency.cc
    ├── opencl_translator_create_kernel.cc
    ├── opencl_translator_edit_kernel.cc
    ├── opencl_translator_getemit.cc
    ├── opencl_translator_misc.cc
    ├── opencl_translator_stencilrun.cc
    ├── optimizer
    │   ├── cuda_optimizer.cc
    │   ├── cuda_optimizer.h
    │   ├── kernel_inlining.cc
    │   ├── loop_opt.cc
    │   ├── loop_peeling.cc
    │   ├── mpi_cuda_optimizer.cc
    │   ├── mpi_cuda_optimizer.h
    │   ├── mpi_optimizer.cc
    │   ├── mpi_optimizer.h
    │   ├── offset_cse.cc
    │   ├── offset_spatial_cse.cc
    │   ├── optimization_common.cc
    │   ├── optimization_common.h
    │   ├── optimization_passes.cc
    │   ├── optimization_passes.h
    │   ├── optimizer.cc
    │   ├── optimizer.h
    │   ├── primitive_optimization.cc
    │   ├── reference_optimizer.cc
    │   ├── reference_optimizer.h
    │   ├── register_blocking.cc
    │   └── unconditional_get.cc
    ├── physis_exception.h
    ├── physis_names.h
    ├── physisc-cuda-hm.cmake
    ├── physisc-cuda.cmake
    ├── physisc-mpi-cuda.cmake
    ├── physisc-mpi-opencl.cmake
    ├── physisc-mpi-openmp.cmake
    ├── physisc-mpi.cmake
    ├── physisc-mpi2.cmake
    ├── physisc-opencl.cmake
    ├── physisc-ref.cmake
    ├── physisc.cc
    ├── reduce.cc
    ├── reduce.h
    ├── reference_runtime_builder.cc
    ├── reference_runtime_builder.h
    ├── reference_translator.cc
    ├── reference_translator.h
    ├── rose_ast_attribute.cc
    ├── rose_ast_attribute.h
    ├── rose_fortran.cc
    ├── rose_fortran.h
    ├── rose_traversal.cc
    ├── rose_traversal.h
    ├── rose_util.cc
    ├── rose_util.h
    ├── run.cc
    ├── run.h
    ├── runtime_builder.cc
    ├── runtime_builder.h
    ├── stencil_analysis.cc
    ├── stencil_analysis.h
    ├── stencil_range.cc
    ├── stencil_range.h
    ├── test
    │   ├── CMakeLists.txt
    │   ├── common.cc
    │   ├── common.h
    │   ├── test_ast_processing.cc
    │   ├── test_ast_processing_input_remove_redundant_variable_copy.c
    │   ├── test_ast_processing_input_remove_unused_func.c
    │   ├── test_ast_traversal.cc
    │   ├── test_ast_traversal_input.c
    │   ├── test_grid.cc
    │   └── test_grid_input.c
    ├── tocheck
    ├── translation_context.cc
    ├── translation_context.h
    ├── translation_util.cc
    ├── translation_util.h
    ├── translator.cc
    ├── translator.h
    └── translator_common.h
└── util
    ├── CMakeLists.txt
    ├── configuration.cc
    ├── configuration.h
    ├── log4cpp-test.cpp
    ├── log4cpp.cpp
    ├── log4cpp.h
    ├── lua_loader.cc
    └── lua_loader.h


/.gitignore:
--------------------------------------------------------------------------------
1 | *.mod
2 | *.rmod
3 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011-2012, Naoya Maruyama
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met: 
 8 | 
 9 | * Redistributions of source code must retain the above copyright
10 |   notice, this list of conditions and the following disclaimer. 
11 | * Redistributions in binary form must reproduce the above copyright
12 |   notice, this list of conditions and the following disclaimer in the
13 |   documentation and/or other materials provided with the
14 |   distribution. 
15 | * Neither the name of RIKEN AICS nor the names of its contributors may
16 |   be used to endorse or promote products derived from this software
17 |   without specific prior written permission. 
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
30 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | - check how emit is analyzed
2 | - annotate emit with attribute
3 | 


--------------------------------------------------------------------------------
/cmake/modules/FindOpenCL.cmake:
--------------------------------------------------------------------------------
 1 | # OPENCL_INCLUDE_PATH
 2 | # OPENCL_LIBRARIES
 3 | # OPENCL_FOUND = true if ROSE is found
 4 | 
 5 | find_package (CUDA)
 6 | 
 7 | if (CUDA_FOUND) 
 8 |   find_path (OPENCL_INCLUDE_DIR
 9 |     NAMES CL/cl.h
10 |     PATHS ${CUDA_INCLUDE_DIRS}
11 |     )
12 |   if (OPENCL_INCLUDE_DIR)
13 |     find_library(OPENCL_LIBRARY
14 |       NAMES OpenCL
15 |       PATHS env LD_LIBRARY_PATH
16 |       )
17 |   endif ()
18 | endif()
19 | 
20 | set (OPENCL_FOUND FALSE)
21 | if (OPENCL_INCLUDE_DIR AND OPENCL_LIBRARY)
22 |   message (STATUS "OpenCL found")  
23 |   message (STATUS "OPENCL_INCLUDE_DIR=${OPENCL_INCLUDE_DIR}")
24 |   message (STATUS "OPENCL_LIBRARY=${OPENCL_LIBRARY}")
25 |   set (OPENCL_FOUND TRUE)
26 | else ()
27 |   message (STATUS "OpenCL not found")
28 | endif ()
29 | 
30 | MARK_AS_ADVANCED(
31 | OPENCL_INCLUDE_DIR
32 | OPENCL_LIBRARY
33 | OPENCL_FOUND
34 | )
35 | 


--------------------------------------------------------------------------------
/cmake/modules/FindROSE.cmake:
--------------------------------------------------------------------------------
 1 | # ROSE_INCLUDE_DIR
 2 | # ROSE_LIBRARIES
 3 | # ROSE_FOUND = true if ROSE is found
 4 | 
 5 | FIND_PATH(ROSE_INCLUDE_DIR rose.h PATH_SUFFIXES rose NO_SYSTEM_ENVIRONMENT_PATH)
 6 | FIND_LIBRARY(ROSE_LIBRARIES rose  NO_SYSTEM_ENVIRONMENT_PATH)
 7 | FIND_LIBRARY(ROSE_LIBRARIES rose)
 8 | 
 9 | SET(ROSE_FOUND FALSE)
10 | IF(ROSE_INCLUDE_DIR AND ROSE_LIBRARIES)
11 | MESSAGE(STATUS "ROSE_INCLUDE_DIR=${ROSE_INCLUDE_DIR}")
12 | MESSAGE(STATUS "ROSE_LIBRARIES=${ROSE_LIBRARIES}")
13 | SET(ROSE_FOUND TRUE)
14 | string(REGEX MATCH "include/rose$" ROSE_EDG4X ${ROSE_INCLUDE_DIR})
15 | if (ROSE_EDG4X)
16 |   message(STATUS "Detected ROSE EDG4X")
17 |   set(ROSE_EDG4X TRUE)
18 | else ()
19 |   message(STATUS "Detected ROSE EDG3")
20 | endif ()
21 | ENDIF()
22 | 
23 | MARK_AS_ADVANCED(
24 | ROSE_INCLUDE_DIR
25 | ROSE_LIBRARIES
26 | ROSE_FOUND
27 | )
28 | 


--------------------------------------------------------------------------------
/common/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)


--------------------------------------------------------------------------------
/common/config.h.cmake:
--------------------------------------------------------------------------------
 1 | #ifndef PHYSIS_COMMON_CONFIG_H_
 2 | #define PHYSIS_COMMON_CONFIG_H_
 3 | 
 4 | #cmakedefine PS_WARNING
 5 | #cmakedefine PS_DEBUG
 6 | #cmakedefine PS_VERBOSE
 7 | 
 8 | #cmakedefine AUTO_TUNING
 9 | 
10 | #endif /* PHYSIS_COMMON_CONFIG_H_ */
11 | 


--------------------------------------------------------------------------------
/config/sample1.lua:
--------------------------------------------------------------------------------
1 | CUDA_BLOCK_SIZE = {64, 4, 1}
2 | MPI_OVERLAP = true
3 | MULTISTREAM_BOUNDARY = true
4 | -- TRACE_KERNEL = false
5 | -- CUDA_KERNEL_ERROR_CHECK = false
6 | 


--------------------------------------------------------------------------------
/docs/unfiled_notes.rst:
--------------------------------------------------------------------------------
 1 | * Boost v1.45 (or older) will cause "dereferencing type-punned
 2 |   pointer" warning with runtime/reduce.h when gcc optimization is
 3 |   enabled. It was fixed as discussed here:
 4 |   https://svn.boost.org/trac/boost/ticket/4538. Version 1.47 seems to 
 5 |   have that fix.
 6 | * On Mac OS X, translator is not tested since building ROSE on Mac OS X
 7 |   is not well supported.
 8 | * On Mac OS X, the Boost library installed by Homebrew may be built
 9 |   with g++, not the OS X default clang++. nvcc works only with the
10 |   default compiler, so the Homebrew-built Boost and nvcc do not work
11 |   together.
12 | * On Mac OS X, nvcc (at least v6.5) uses stdlibc++, not the default
13 |   libc++. Linking by CMake uses c++ rather than nvcc, so -stdlib flag
14 |   needs to be set.
15 | * MPICH fails to compile with OS X c++ with -stdlib=libstdc++ switch. 
16 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(himeno)
2 | add_subdirectory(diffusion-benchmark)


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Makefile.cmake
 2 |   @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile @ONLY)
 3 | 
 4 | install(FILES
 5 |   autotune.conf
 6 |   baseline.cc
 7 |   baseline.h
 8 |   CMakeLists.txt
 9 |   diffusion3d.cc
10 |   diffusion3d_cuda.cu
11 |   diffusion3d_cuda.h
12 |   diffusion3d_cuda_shared.cu
13 |   diffusion3d_cuda_temporal_blocking.cu
14 |   diffusion3d_cuda_temporal_blocking.h
15 |   diffusion3d.h
16 |   diffusion3d.mic.c
17 |   diffusion3d_mic.cc
18 |   diffusion3d_mic.h
19 |   diffusion3d_openmp.cc
20 |   diffusion3d_openmp.h
21 |   diffusion3d_openmp_temporal_blocking.cc
22 |   diffusion3d_openmp_temporal_blocking.h
23 |   diffusion3d_physis.c
24 |   diffusion3d_physis.h
25 |   main.cc
26 |   opt.conf
27 |   README
28 |   stopwatch.h
29 |   @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile
30 |   DESTINATION examples/diffusion-benchmark)
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/README:
--------------------------------------------------------------------------------
 1 | ====================================
 2 | Diffusion 7-point Stencil Benchmarks
 3 | ====================================
 4 | 
 5 | Benchmarks
 6 | ----------
 7 | 
 8 | - baseline
 9 |   - A baseline sequential implementation
10 | - openmp
11 |   - OpenMP-parallel version of baseline
12 | - mic (not completed)
13 |   - Intel MIC version
14 | - cuda 
15 |   - NVIDIA CUDA version
16 |   - Several variants available (e.g., cuda_opt1)
17 | - opencl (not completed)
18 |   - Portable OpenCL version
19 | 
20 | Compilation
21 | -----------
22 | 
23 | Type `make'. By default, diffusion3d_baseline and diffusion3d_openmp
24 | will be built. Other variants can be built with `make
25 | variant-name'. For example, the CUDA variant can be built `make cuda'
26 | if the CUDA toolkit is available.
27 | 
28 | 
29 | Usage
30 | -----
31 | 
32 | Execute each benchmark as: 
33 | 
34 |    benchmark_executable [--count N] [--size S]
35 | 
36 | The options can be used to set benchmark configurations. For more
37 | information, see the help message by supplying --help option.
38 | 
39 | 
40 | Notes
41 | -----
42 | - File diffusion3d.mic.c
43 |   - deprecated
44 | - File diffusion3d_mic.cc
45 |   - not tested
46 |   - needs PCI timing. see diffusion3d.mic.c
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/autotune.conf:
--------------------------------------------------------------------------------
 1 | CUDA_BLOCK_SIZE = {{32, 4, 1}, {32, 8, 1}, {64, 4, 1}, {64, 8, 1}, {128, 4, 1}, {128, 8, 1}}
 2 | OPT_KERNEL_INLINING = {true, false}
 3 | OPT_LOOP_PEELING = {true, false}
 4 | OPT_REGISTER_BLOCKING = {true, false}
 5 | OPT_UNCONDITIONAL_GET = {true, false}
 6 | OPT_OFFSET_CSE = {true, false}
 7 | OPT_OFFSET_SPATIAL_CSE = {true, false}
 8 | OPT_OFFSET_COMP = {true, false}
 9 | OPT_LOOP_OPT = {true, false}
10 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/baseline.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCHMARKS_DIFFUSION3D_BASELINE_H_
 2 | #define BENCHMARKS_DIFFUSION3D_BASELINE_H_
 3 | 
 4 | #include "diffusion3d.h"
 5 | 
 6 | namespace diffusion3d {
 7 | 
 8 | class Baseline: public Diffusion3D {
 9 |  protected:
10 |   REAL *f1_, *f2_;
11 |  public:
12 |   Baseline(int nx, int ny, int nz):
13 |       Diffusion3D(nx, ny, nz), f1_(NULL), f2_(NULL) {}
14 |   virtual std::string GetName() const {
15 |     return std::string("baseline");
16 |   }
17 |   virtual void InitializeBenchmark();
18 |   virtual void FinalizeBenchmark();
19 |   virtual void RunKernel(int count);
20 |   virtual REAL GetAccuracy(int count);  
21 |   virtual void Dump() const;
22 | };
23 | 
24 | }
25 | 
26 | #endif /* DIFFUSION3D_DIFFUSION3D_H_ */
27 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d.cc:
--------------------------------------------------------------------------------
 1 | #include "diffusion3d.h"
 2 | 
 3 | #include <getopt.h>
 4 | 
 5 | #include <string>
 6 | #include <vector>
 7 | #include <map>
 8 | 
 9 | using std::vector;
10 | using std::string;
11 | 
12 | namespace diffusion3d {
13 | 
14 | void Initialize(REAL *buff, const int nx, const int ny, const int nz,
15 |                 const REAL kx, const REAL ky, const REAL kz,
16 |                 const REAL dx, const REAL dy, const REAL dz,
17 |                 const REAL kappa, const REAL time) {
18 |   REAL ax = exp(-kappa*time*(kx*kx));
19 |   REAL ay = exp(-kappa*time*(ky*ky));
20 |   REAL az = exp(-kappa*time*(kz*kz));
21 |   int jz;  
22 |   for (jz = 0; jz < nz; jz++) {
23 |     int jy;
24 |     for (jy = 0; jy < ny; jy++) {
25 |       int jx;
26 |       for (jx = 0; jx < nx; jx++) {
27 |         int j = jz*nx*ny + jy*nx + jx;
28 |         REAL x = dx*((REAL)(jx + 0.5));
29 |         REAL y = dy*((REAL)(jy + 0.5));
30 |         REAL z = dz*((REAL)(jz + 0.5));
31 |         REAL f0 = (REAL)0.125
32 |           *(1.0 - ax*cos(kx*x))
33 |           *(1.0 - ay*cos(ky*y))
34 |           *(1.0 - az*cos(kz*z));
35 |         buff[j] = f0;
36 |       }
37 |     }
38 |   }
39 | }
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_cuda_temporal_blocking.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/naoyam/physis/39ee5250a2d5baa545ca03e7c5c9aa9c81f1ab19/examples/diffusion-benchmark/diffusion3d_cuda_temporal_blocking.cu


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_cuda_temporal_blocking.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_CUDA_TEMPORAL_BLOCKING_H_
 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_CUDA_TEMPORAL_BLOCKING_H_
 3 | 
 4 | #include "diffusion3d.h"
 5 | #include "baseline.h"
 6 | #include "diffusion3d_cuda.h"
 7 | 
 8 | #include <cuda_runtime.h>
 9 | 
10 | namespace diffusion3d {
11 | 
12 | class Diffusion3DCUDATemporalBlocking: public Diffusion3DCUDA {
13 |  public:
14 |   Diffusion3DCUDATemporalBlocking(int nx, int ny, int nz):
15 |       Diffusion3DCUDA(nx, ny, nz) {
16 |     block_x_ = 32;
17 |     block_y_ = 16;
18 |   }
19 |   virtual std::string GetName() const {
20 |     return std::string("cuda_temporal_blocking");
21 |   }
22 |   //virtual void InitializeBenchmark();
23 |   virtual void RunKernel(int count);
24 | };
25 | 
26 | }
27 | 
28 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_CUDA_TEMPORAL_BLOCKING_H_ */
29 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_mic.cc:
--------------------------------------------------------------------------------
 1 | #include "benchmarks/diffusion3d/diffusion3d_mic.h"
 2 | 
 3 | namespace diffusion3d {
 4 | 
 5 | void Diffusion3DMIC::InitializeBenchmark() {
 6 |   f1_ = (REAL*)_mm_malloc(sizeof(REAL) * nx_ * ny_ * nz_, 4096);
 7 |   assert(f1_);    
 8 |   f2_ = (REAL*)_mm_malloc(sizeof(REAL) * nx_ * ny_ * nz_, 4096);
 9 |   assert(f2_);
10 |   Initialize(f1_, nx_, ny_, nz_,
11 |              kx_, ky_, kz_, dx_, dy_, dz_,
12 |              kappa_, 0.0);
13 | }
14 | 
15 | void Diffusion3DMIC::RunKernel(int count) {
16 |   int i;
17 | #pragma offload target(mic) \
18 |   inout(f1_:length(nx_*ny_*nz_) align(2*1024*1024))     \
19 |   inout(f2_:length(nx_*ny_*nz_) align(2*1024*1024))
20 |   {
21 |     for (i = 0; i < count; ++i) {
22 |       int y, z;
23 | #pragma omp parallel for collapse(2) private(y, z)
24 |       for (z = 0; z < nz_; z++) {
25 |         for (y = 0; y < ny_; y++) {
26 |           int x;
27 | #pragma ivdep          
28 |           for (x = 0; x < nx_; x++) {
29 |             int c, w, e, n, s, b, t;
30 |             c =  x + y * nx_ + z * nx_ * ny_;
31 |             w = (x == 0)    ? c : c - 1;
32 |             e = (x == nx_-1) ? c : c + 1;
33 |             n = (y == 0)    ? c : c - nx_;
34 |             s = (y == ny_-1) ? c : c + nx_;
35 |             b = (z == 0)    ? c : c - nx_ * ny_;
36 |             t = (z == nz_-1) ? c : c + nx_ * ny_;
37 |             f2_[c] = cc_ * f1_[c] + cw_ * f1_[w] + ce_ * f1_[e]
38 |                 + cs_ * f1_[s] + cn_ * f1_[n] + cb_ * f1_[b] + ct_ * f1_[t];
39 |           }
40 |         }
41 |       }
42 |       REAL *t = f1_;
43 |       f1_ = f2_;
44 |       f2_ = t;
45 |     }
46 |   }
47 |   return;
48 | }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_mic.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_MIC_H_
 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_MIC_H_
 3 | 
 4 | #include "diffusion3d.h"
 5 | #include "baseline.h"
 6 | 
 7 | namespace diffusion3d {
 8 | 
 9 | class Diffusion3DMIC: public Baseline {
10 |  public:
11 |   Diffusion3DMIC(int nx, int ny, int nz):
12 |       Baseline(nx, ny, nz) {}
13 |   virtual std::string GetName() const {
14 |     return std::string("mic");
15 |   }
16 |   virtual void InitializeBenchmark();
17 |   virtual void RunKernel(int count);
18 | };
19 | 
20 | }
21 | 
22 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ */
23 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_openmp.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_
 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_
 3 | 
 4 | #include "diffusion3d.h"
 5 | #include "baseline.h"
 6 | 
 7 | namespace diffusion3d {
 8 | 
 9 | class Diffusion3DOpenMP: public Baseline {
10 |  public:
11 |   Diffusion3DOpenMP(int nx, int ny, int nz):
12 |       Baseline(nx, ny, nz) {}
13 |   virtual std::string GetName() const {
14 |     return std::string("openmp");
15 |   }
16 |   virtual void InitializeBenchmark();
17 |   virtual void RunKernel(int count);
18 |   virtual void InitializeOMP(
19 |       REAL *buff, const int nx, const int ny, const int nz,
20 |       const REAL kx, const REAL ky, const REAL kz,
21 |       const REAL dx, const REAL dy, const REAL dz,
22 |       const REAL kappa, const REAL time);
23 |   
24 | };
25 | 
26 | }
27 | 
28 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ */
29 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_openmp_temporal_blocking.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_TEMPORAL_BLOCKING_H_
 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_TEMPORAL_BLOCKING_H_
 3 | 
 4 | #include "diffusion3d_openmp.h"
 5 | 
 6 | namespace diffusion3d {
 7 | 
 8 | class Diffusion3DOpenMPTemporalBlocking: public Diffusion3DOpenMP {
 9 |  public:
10 |   Diffusion3DOpenMPTemporalBlocking(int nx, int ny, int nz):
11 |       Diffusion3DOpenMP(nx, ny, nz) {}
12 |   virtual std::string GetName() const {
13 |     return std::string("openmp_temporal_blocking");
14 |   }
15 |   virtual void RunKernel(int count);
16 | };
17 | 
18 | }
19 | 
20 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ */
21 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/diffusion3d_physis.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_PHYSIS_H_
 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_PHYSIS_H_
 3 | 
 4 | #include "diffusion3d.h"
 5 | #include "baseline.h"
 6 | 
 7 | extern "C" {
 8 |   extern void initialize_physis(int argc, char **argv,
 9 |                                 int nx, int ny, int nz);
10 |   extern void initialize_benchmark_physis(int nx, int ny, int nz);
11 |   extern void finalize_benchmark_physis();
12 |   extern void run_kernel_physis(int count, REAL *f1_host,
13 |                                 int nx, int ny, int nz,
14 |                                 REAL ce, REAL cw, REAL cn, REAL cs,
15 |                                 REAL ct, REAL cb, REAL cc);
16 |   
17 | }
18 | 
19 | namespace diffusion3d {
20 | 
21 | class Diffusion3DPhysis: public Baseline {
22 |  public:
23 |   Diffusion3DPhysis(int nx, int ny, int nz,
24 |                     int argc, char **argv):
25 |       Baseline(nx, ny, nz) {
26 |     initialize_physis(argc, argv, nx, ny, nz);
27 |   }
28 |   virtual std::string GetName() const {
29 |     return std::string("physis");
30 |   }
31 |   virtual void InitializeBenchmark() {
32 |     Baseline::InitializeBenchmark();
33 |     initialize_benchmark_physis(nx_, ny_, nz_);
34 |   }
35 |   virtual void FinalizeBenchmark() {
36 |     finalize_benchmark_physis();
37 |   }
38 |   virtual void RunKernel(int count) {
39 |     run_kernel_physis(count, f1_, nx_, ny_, nz_,
40 |                       ce_, cw_, cn_, cs_, ct_, cb_, cc_);
41 |   }
42 |   
43 | };
44 | 
45 | }
46 | 
47 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_PHYSIS_H_ */
48 | 
49 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/opt.conf:
--------------------------------------------------------------------------------
1 | OPT_OFFSET_COMP = true
2 | OPT_LOOP_PEELING = true
3 | OPT_REGISTER_BLOCKING = true
4 | OPT_LOOP_OPT = true
5 | OPT_UNCONDITIONAL_GET = true
6 | 


--------------------------------------------------------------------------------
/examples/diffusion-benchmark/stopwatch.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2011, Tokyo Institute of Technology.
 2 | // All rights reserved.
 3 | //
 4 | // This file is distributed under the license described in
 5 | // LICENSE.txt.
 6 | //
 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp)
 8 | 
 9 | #ifndef BENCHMARKS_COMMON_STOPWATCH_H_
10 | #define BENCHMARKS_COMMON_STOPWATCH_H_
11 | 
12 | #if defined(unix) || defined(__unix__) || defined(__unix) || \
13 |   defined(__APPLE__)
14 | #include <sys/time.h>
15 | #include <time.h>
16 | 
17 | typedef struct {
18 |   struct timeval tv;
19 | } Stopwatch;
20 | 
21 | #else
22 | #error "Unknown environment"
23 | #endif
24 | 
25 | static inline void StopwatchQuery(Stopwatch *w) {
26 |   gettimeofday(&(w->tv), NULL);
27 |   return;
28 | }
29 | 
30 | static inline float StopwatchDiff(const Stopwatch *begin,
31 |                                   const Stopwatch *end) {
32 |   return (end->tv.tv_sec - begin->tv.tv_sec)
33 |       + (end->tv.tv_usec - begin->tv.tv_usec) * 1.0e-06;
34 | }
35 | 
36 | static inline void StopwatchStart(Stopwatch *w) {
37 |   StopwatchQuery(w);
38 |   return;
39 | }
40 |     
41 | static inline float StopwatchStop(Stopwatch *w) {
42 |   Stopwatch now;
43 |   StopwatchQuery(&now);
44 |   return StopwatchDiff(w, &now);
45 | }
46 | 
47 | #endif /* BENCHMARKS_COMMON_STOPWATCH_H_ */
48 | 


--------------------------------------------------------------------------------
/examples/diffusion-fortran/README:
--------------------------------------------------------------------------------
1 | This is work in progress.
2 | 


--------------------------------------------------------------------------------
/examples/himeno/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Makefile.cmake
 2 |   @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile @ONLY)
 3 | 
 4 | install(FILES
 5 |   himenobmtxpa_physis.c himenobmtxpa_original.c
 6 |   @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile
 7 |   physis.conf
 8 |   opt.conf
 9 |   autotune.conf
10 |   DESTINATION examples/himeno)
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/himeno/autotune.conf:
--------------------------------------------------------------------------------
 1 | CUDA_BLOCK_SIZE = {{32, 4, 1}, {32, 8, 1}, {64, 4, 1}, {64, 8, 1}, {128, 4, 1}, {128, 8, 1}}
 2 | OPT_KERNEL_INLINING = {true, false}
 3 | OPT_LOOP_PEELING = {true, false}
 4 | OPT_REGISTER_BLOCKING = {true, false}
 5 | OPT_UNCONDITIONAL_GET = {true, false}
 6 | OPT_OFFSET_CSE = {true, false}
 7 | OPT_OFFSET_SPATIAL_CSE = {true, false}
 8 | OPT_OFFSET_COMP = {true, false}
 9 | OPT_LOOP_OPT = {true, false}
10 | 


--------------------------------------------------------------------------------
/examples/himeno/opt.conf:
--------------------------------------------------------------------------------
1 | CUDA_BLOCK_SIZE = {64, 4, 1}
2 | OPT_OFFSET_COMP = true
3 | OPT_LOOP_OPT = true
4 | 


--------------------------------------------------------------------------------
/examples/himeno/physis.conf:
--------------------------------------------------------------------------------
1 | OPT_OFFSET_COMP = true
2 | OPT_LOOP_OPT = true


--------------------------------------------------------------------------------
/examples/test_double_buffering.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "physis/physis.h"
 3 | 
 4 | #define N 8
 5 | 
 6 | void kernel1(const int x, const int y, const int z,
 7 |             PSGrid3DFloat g1, PSGrid3DFloat g2) {
 8 |   float v = PSGridGet(g1, x, y, z) * 2;
 9 |   PSGridEmit(g2, v);
10 |   return;
11 | }
12 | 
13 | void kernel2(const int x, const int y, const int z,
14 |             PSGrid3DFloat g1, PSGrid3DFloat g2) {
15 |   float v = PSGridGet(g1, x, y, z) * 2;
16 |   PSGridEmit(g2, v);
17 |   return;
18 | }
19 | 
20 | int main(int argc, char *argv[]) {
21 |   PSInit(&argc, &argv, 3, N, N, N);
22 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
23 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);  
24 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
25 |   size_t nelms = N*N*N;
26 | 
27 | #if 0  
28 |   float *indata = (float *)malloc(sizeof(float) * nelms);
29 |   int i;
30 |   for (i = 0; i < nelms; i++) {
31 |     indata[i] = i;
32 |   }
33 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
34 |     
35 |   PSGridCopyin(g, indata);
36 | #endif  
37 | 
38 | #if 1
39 |   PSStencilRun(PSStencilMap(kernel1, d, g1, g2),
40 |                PSStencilMap(kernel1, d, g2, g1));
41 | #else
42 |   PSStencilRun(PSStencilMap(kernel1, d, g1, g2),
43 |                PSStencilMap(kernel2, d, g2, g1));
44 | #endif  
45 |   
46 | #if 0  
47 |   PSGridCopyout(g, outdata);
48 |     
49 |   for (i = 0; i < nelms; i++) {
50 |     if (indata[i] * 2 != outdata[i]) {
51 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
52 |               i, indata[i], outdata[i]);
53 |     }
54 |   }
55 | 
56 |   PSGridFree(g);
57 | #endif
58 |   
59 |   PSFinalize();
60 |   return 0;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/examples/test_global_variable.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "physis/physis.h"
 3 | 
 4 | #define N 8
 5 | 
 6 | PSGrid3DFloat g;
 7 | 
 8 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) {
 9 |   float v = PSGridGet(g, x, y, z) * 2;
10 |   PSGridEmit(g, v);
11 |   return;
12 | }
13 | 
14 | int main(int argc, char *argv[]) {
15 |   PSInit(&argc, &argv, 3, N, N, N);
16 |   g = PSGrid3DFloatNew(N, N, N);
17 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
18 |   size_t nelms = N*N*N;
19 |   
20 |   float *indata = (float *)malloc(sizeof(float) * nelms);
21 |   int i;
22 |   for (i = 0; i < nelms; i++) {
23 |     indata[i] = i;
24 |   }
25 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
26 |     
27 |   PSGridCopyin(g, indata);
28 | 
29 |   PSStencilRun(PSStencilMap(kernel, d, g));
30 |     
31 |   PSGridCopyout(g, outdata);
32 |     
33 |   for (i = 0; i < nelms; i++) {
34 |     if (indata[i] * 2 != outdata[i]) {
35 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
36 |               i, indata[i], outdata[i]);
37 |     }
38 |   }
39 | 
40 |   PSGridFree(g);
41 |   PSFinalize();
42 |   free(indata);
43 |   free(outdata);
44 |   return 0;
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/examples/test_set.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include "physis/physis.h"
 4 | 
 5 | #define N 2
 6 | 
 7 | #define IDX3(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |   PSInit(&argc, &argv, 3, N, N, N);
11 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
12 |   size_t nelms = N*N*N;
13 |   
14 |   int i, j, k;
15 |   float v = 0;
16 |   for (i = 0; i < N; ++i) {
17 |     for (j = 0; j < N; ++j) {
18 |       for (k = 0; k < N; ++k) {
19 |         PSGridSet(g, i, j, k, v);
20 |         ++v;
21 |       }
22 |     }
23 |   }
24 |   
25 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
26 |   PSGridCopyout(g, outdata);
27 | 
28 |   v = 0;
29 |   for (i = 0; i < N; ++i) {
30 |     for (j = 0; j < N; ++j) {
31 |       for (k = 0; k < N; ++k) {
32 |         if (outdata[IDX3(i, j, k)] != v) {
33 |           fprintf(stderr, "Error: mismatch at %d:%d:%d, in: %f, out: %f\n",
34 |                   i, j, k, outdata[IDX3(i,j,k)], v);
35 |         }
36 |         ++v;
37 |       }
38 |     }
39 |   }
40 | 
41 |   PSGridFree(g);
42 |   PSFinalize();
43 |   free(outdata);
44 |   return 0;
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/include/physis/.gitignore:
--------------------------------------------------------------------------------
1 | config.h
2 | 


--------------------------------------------------------------------------------
/include/physis/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake
3 |   ${CMAKE_CURRENT_SOURCE_DIR}/config.h)


--------------------------------------------------------------------------------
/include/physis/config.h.cmake:
--------------------------------------------------------------------------------
 1 | #ifndef PHYSIS_CONFIG_H_
 2 | #define PHYSIS_CONFIG_H_
 3 | 
 4 | // These are duplicates of common/config.h, but need to be here since
 5 | // common/config.h is not going to be installed, while this header file
 6 | // is installed by make install.
 7 | #cmakedefine CUDA_ENABLED
 8 | #cmakedefine MPI_ENABLED
 9 | #cmakedefine PS_DEBUG
10 | #cmakedefine PS_VERBOSE
11 | #cmakedefine PS_WARNING
12 | 
13 | #cmakedefine AUTO_DOUBLE_BUFFERING
14 | 
15 | #endif /* PHYSIS_CONFIG_H_ */
16 | 


--------------------------------------------------------------------------------
/include/physis/fortran/physis.h:
--------------------------------------------------------------------------------
 1 | ! Copyright 2011-2013, RIKEN AICS.
 2 | ! All rights reserved.
 3 | !
 4 | ! This file is distributed under the BSD license. See LICENSE.txt for
 5 | ! details.
 6 | #ifndef PHYSIS_PHYSIS_FORTRAN_PHYSIS_H_
 7 | #define PHYSIS_PHYSIS_FORTRAN_PHYSIS_H_
 8 |     
 9 | #if ! defined(PHYSIS_INDEX_INT64)
10 | #define PHYSIS_INDEX_INT32
11 | #endif
12 | 
13 | #endif /* PHYSIS_PHYSIS_FORTRAN_PHYSIS_H_ */
14 | 


--------------------------------------------------------------------------------
/include/physis/math.h:
--------------------------------------------------------------------------------
 1 | #ifndef PHYSIS_MATH_H_
 2 | #define PHYSIS_MATH_H_
 3 | 
 4 | #ifdef PHYSIS_USER
 5 | extern double exp(double x);
 6 | extern float expf(float x);
 7 | extern long double expl(long double x);
 8 | extern double cos(double x);
 9 | extern float cosf(float x);
10 | extern double acos(double x);
11 | extern float acosf(float x);
12 | #else
13 | #if defined(PHYSIS_REF) || defined(PHYSIS_MPI) || defined(PHYSIS_MPI_OPENMP)
14 | #include <math.h>
15 | #endif
16 | #endif
17 | 
18 | 
19 | 
20 | #endif /* PHYSIS_MATH_H_ */
21 | 


--------------------------------------------------------------------------------
/include/physis/physis.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright 2011-2013, RIKEN AICS.
 2 | ! All rights reserved.
 3 | !
 4 | ! This file is distributed under the BSD license. See LICENSE.txt for
 5 | ! details.
 6 | 
 7 | #ifndef PHYSIS_PHYSIS_F90_
 8 | #define PHYSIS_PHYSIS_F90_
 9 | 
10 | #if defined(PHYSIS_USER)
11 | #include "physis/physis_user.F90"
12 | #endif
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/include/physis/physis.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_PHYSIS_H_
 4 | #define PHYSIS_PHYSIS_H_
 5 | 
 6 | #if defined(PHYSIS_USER)
 7 | #include "physis/physis_user.h"
 8 | #endif
 9 | 
10 | #if defined(PHYSIS_REF)
11 | #include "physis/physis_ref.h"
12 | #elif defined(PHYSIS_CUDA)
13 | #include "physis/physis_cuda.h"
14 | #elif defined(PHYSIS_CUDA_HM)
15 | #include "physis/physis_cuda_hm.h"
16 | #elif defined(PHYSIS_MPI)
17 | #include "physis/physis_mpi.h"
18 | #elif defined(PHYSIS_MPI_CUDA)
19 | #include "physis/physis_mpi_cuda.h"
20 | #elif defined(PHYSIS_OPENCL)
21 | #include "physis/physis_opencl.h"
22 | #elif defined(PHYSIS_MPI_OPENCL)
23 | #include "physis/physis_mpi_opencl.h"
24 | #elif defined(PHYSIS_MPI_OPENMP)
25 | #include "physis_mpi_openmp.h"
26 | #endif
27 | 
28 | 
29 | #include "physis/math.h"
30 | 
31 | #endif /* PHYSIS_PHYSIS_H_ */
32 | 


--------------------------------------------------------------------------------
/include/physis/physis_cuda_hm.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2011-2013, RIKEN AICS.
 2 | // All rights reserved.
 3 | //
 4 | // This file is distributed under the BSD license. See LICENSE.txt for
 5 | // details.
 6 | 
 7 | #ifndef PHYSIS_PHYSIS_CUDA_HM_H_
 8 | #define PHYSIS_PHYSIS_CUDA_HM_H_
 9 | 
10 | #include "physis/physis_cuda.h"
11 | 
12 | #endif /* PHYSIS_PHYSIS_CUDA_HM_H_ */
13 | 
14 | 


--------------------------------------------------------------------------------
/include/physis/reduce.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2011, Tokyo Institute of Technology.
 2 | // All rights reserved.
 3 | //
 4 | // This file is distributed under the license described in
 5 | // LICENSE.txt.
 6 | //
 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp)
 8 | 
 9 | #ifndef PHYSIS_REDUCTION_H_
10 | #define PHYSIS_REDUCTION_H_
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 |   enum PSReduceOp {
17 |     PS_MAX,
18 |     PS_MIN,
19 |     PS_SUM,
20 |     PS_PROD
21 |   };
22 | 
23 | 
24 | #ifdef __cplusplus
25 | }
26 | #endif
27 | 
28 | 
29 | #endif /* PHYSIS_RUNTIME_H_ */
30 | 


--------------------------------------------------------------------------------
/include/physis/runtime.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_H_
 4 | #define PHYSIS_RUNTIME_H_
 5 | 
 6 | #include <stdio.h>
 7 | 
 8 | #include "physis/stopwatch.h"
 9 | 
10 | #define __PS_PERIODIC(x, y) (((x)+(y))%(y))
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif /* __cplusplus */
15 | 
16 | extern FILE *__ps_trace;
17 | 
18 | static inline void __PSTraceStencilPre(const char *msg) {
19 |   if (__ps_trace) {
20 |     fprintf(__ps_trace, "Physis: Stencil started (%s)\n", msg);
21 |   }
22 |   return;
23 | }
24 | 
25 |   static inline void __PSTraceStencilPost(float time) {
26 |   if (__ps_trace) {
27 |     fprintf(__ps_trace, "Physis: Stencil finished (time: %f)\n", time);
28 |   }
29 |   return;
30 | }
31 | 
32 | #ifdef AUTO_TUNING  
33 |   /**  initialize random
34 |    * @param[in] n ... number of randomized value
35 |    * @return    random handle
36 |    */
37 |   extern void *__PSRandomInit(int n);
38 |   /** get randomized value
39 |    * @param[in] handle ... random handle
40 |    * @param[in] count ... index of randomized value
41 |    * @return    randomized value
42 |    */
43 |   static inline int __PSRandom(void *handle, int count) {
44 |     return ((int *)handle)[count];
45 |   }
46 |   /** finalize random
47 |    * @param[in] handle ... random handle
48 |    */
49 |   static inline void __PSRandomFini(void *handle) {
50 |     free(handle);
51 |   }
52 | #endif
53 |   
54 | #ifdef __cplusplus
55 | }
56 | #endif /* __cplusplus */
57 |   
58 | 
59 | #endif /* PHYSIS_RUNTIME_H_ */
60 | 


--------------------------------------------------------------------------------
/include/physis/stopwatch.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2011, Tokyo Institute of Technology.
 2 | // All rights reserved.
 3 | //
 4 | // This file is distributed under the license described in
 5 | // LICENSE.txt.
 6 | //
 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp)
 8 | 
 9 | #ifndef PHYSIS_STOPWATCH_H_
10 | #define PHYSIS_STOPWATCH_H_
11 | 
12 | #if defined(unix) || defined(__unix__) || defined(__unix) || \
13 |   defined(__APPLE__)
14 | #include <sys/time.h>
15 | #include <time.h>
16 | 
17 | typedef struct {
18 |   struct timeval tv;
19 | } __PSStopwatch;
20 | 
21 | #else
22 | #error "Unknown environment"
23 | #endif
24 | 
25 | static inline void __PSStopwatchQuery(__PSStopwatch *w) {
26 |   gettimeofday(&(w->tv), NULL);
27 |   return;
28 | }
29 | 
30 | // returns mili seconds
31 | static inline float __PSStopwatchDiff(const __PSStopwatch *begin,
32 |                                       const __PSStopwatch *end) {
33 |   return (end->tv.tv_sec - begin->tv.tv_sec) * 1000.0f
34 |       + (end->tv.tv_usec - begin->tv.tv_usec) / 1000.0f;
35 | }
36 | 
37 | static __inline void __PSStopwatchStart(__PSStopwatch *w) {
38 |   __PSStopwatchQuery(w);
39 |   return;
40 | }
41 |     
42 | static __inline float __PSStopwatchStop(__PSStopwatch *w) {
43 |   __PSStopwatch now;
44 |   __PSStopwatchQuery(&now);
45 |   return __PSStopwatchDiff(w, &now);
46 | }
47 | 
48 | #endif /* PHYSIS_STOPWATCH_H_ */
49 | 


--------------------------------------------------------------------------------
/include/physis/types.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2011, Tokyo Institute of Technology.
 2 | // All rights reserved.
 3 | //
 4 | // This file is distributed under the license described in
 5 | // LICENSE.txt.
 6 | //
 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp)
 8 | 
 9 | #ifndef PHYSIS_TYPES_H_
10 | #define PHYSIS_TYPES_H_
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 |   typedef int PSType;
17 |   enum PSPrimitiveType {
18 |     PS_INT = 0,
19 |     PS_LONG = 1,
20 |     PS_FLOAT = 2,
21 |     PS_DOUBLE = 3,
22 |     PS_USER = 4
23 |   };
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | 
30 | #endif /* PHYSIS_TYPES_H_ */
31 | 


--------------------------------------------------------------------------------
/misc/source_header.txt:
--------------------------------------------------------------------------------
1 | // Licensed under the BSD license. See LICENSE.txt for more details.
2 | 


--------------------------------------------------------------------------------
/misc/valgrind-suppressions.supp:
--------------------------------------------------------------------------------
 1 | {
 2 |    <lua_is_number_addr8>
 3 |    Memcheck:Addr8
 4 |    ...
 5 |    fun:lua_isnumber
 6 |    ...
 7 |    fun:_ZN6physis4util13Configuration8LoadFileERKSs
 8 |    fun:main
 9 | }   
10 | 
11 | {
12 |    <lua_free>
13 |    Memcheck:Free
14 |    ...
15 |    fun:_ZN6physis4util13Configuration8LoadFileERKSs
16 |    fun:main
17 | }   
18 | 
19 | {
20 |    <exit_free>
21 |    Memcheck:Free
22 |    ...
23 |    fun:exit
24 |    ...
25 | }   
26 | 
27 | 


--------------------------------------------------------------------------------
/runtime/common.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "physis/runtime_common.h"
 4 | 
 5 | #ifdef __cplusplus
 6 | extern "C" {
 7 | #endif __cplusplus
 8 |   
 9 | 
10 | #ifdef __cplusplus
11 | }
12 | #endif __cplusplus
13 | 


--------------------------------------------------------------------------------
/runtime/cub-1.3.2/.settings/org.eclipse.cdt.ui.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | formatter_profile=_B40C
3 | formatter_settings_version=1
4 | 


--------------------------------------------------------------------------------
/runtime/cub-1.3.2/.settings/org.eclipse.core.runtime.prefs:
--------------------------------------------------------------------------------
1 | content-types/enabled=true
2 | content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh
3 | content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu
4 | eclipse.preferences.version=1
5 | 


--------------------------------------------------------------------------------
/runtime/cub-1.3.2/LICENSE.TXT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010-2011, Duane Merrill.  All rights reserved.
 2 | Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |    *  Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |    *  Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |    *  Neither the name of the NVIDIA CORPORATION nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/runtime/cuda_util.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_CUDA_UTIL_H_
 4 | #define PHYSIS_RUNTIME_CUDA_UTIL_H_
 5 | 
 6 | #include <thrust/transform_reduce.h>
 7 | #include <thrust/functional.h>
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | 
12 | 
13 | } // namespace runtime
14 | } // namespace physis
15 | 
16 | #endif /* PHYSIS_RUNTIME_CUDA_UTIL_H_ */
17 | 
18 | 


--------------------------------------------------------------------------------
/runtime/grid_util_mpi_openmp.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_GRID_UTIL_MPI_OPENMP_H_
 4 | #define PHYSIS_RUNTIME_GRID_UTIL_MPI_OPENMP_H_
 5 | 
 6 | #include "physis/physis_common.h"
 7 | #include "runtime/runtime_common.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | namespace mpiopenmputil {
12 | 
13 | void CopyinoutSubgrid_MP(
14 |     bool copyout_to_subgrid_mode_p,
15 |     size_t elm_size, int num_dims,
16 |     void **grid_mp,
17 |     const IntArray  &grid_size,
18 |     const IntArray &grid_division,
19 |     const size_t * const *grid_mp_offset,
20 |     const size_t * const *grid_mp_width,
21 |     void **subgrid_mp,
22 |     const IntArray &subgrid_offset,
23 |     const IntArray &subgrid_size,
24 |     const IntArray &subgrid_division,
25 |     const size_t * const *subgrid_mp_offset,
26 |     const size_t * const *subgrid_mp_width
27 |                          );
28 | 
29 | void getMPOffset(
30 |     const unsigned int num_dims,
31 |     const IntArray &offset,
32 |     const IntArray &grid_size,
33 |     const IntArray &grid_division,
34 |     const size_t * const *grid_mp_offset,
35 |     const size_t * const *grid_mp_width,
36 |     unsigned int &cpuid_OUT,
37 |     size_t &gridid_OUT,
38 |     size_t &width_avail_OUT
39 |                  );  
40 | 
41 | } // namespace mpiopenmputil
42 | } // namespace runtime
43 | } // namespace physis
44 | 
45 | 
46 | #endif /* PHYSIS_RUNTIME_GRID_UTIL_MPI_OPENMP_H_ */
47 | 
48 | 


--------------------------------------------------------------------------------
/runtime/ipc.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_IPC_H_
 4 | #define PHYSIS_RUNTIME_IPC_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | 
 8 | namespace physis {
 9 | namespace runtime {
10 | 
11 | // Singleton class
12 | class InterProcComm {
13 |  protected:
14 |   InterProcComm() {}
15 |   virtual ~InterProcComm() {}
16 |  public:
17 |   typedef enum {IPC_SUCCESS = 0, IPC_FAILURE = 1} IPC_ERROR_T;
18 |   virtual void *CreateRequest() const = 0;
19 |   virtual IPC_ERROR_T Init(int *argc, char ***argv) = 0;
20 |   virtual IPC_ERROR_T Finalize() = 0;
21 |   virtual int GetRank() const = 0;
22 |   virtual int GetNumProcs() const = 0;
23 |   virtual IPC_ERROR_T Send(void *buf, size_t len,
24 |                            int dest) = 0;
25 |   virtual IPC_ERROR_T Isend(void *buf, size_t len,
26 |                             int dest, void *req) = 0;
27 |   virtual IPC_ERROR_T Recv(void *buf, size_t len, int src) = 0;
28 |   virtual IPC_ERROR_T Irecv(void *buf, size_t len,
29 |                     int src, void *req) = 0;
30 |   virtual IPC_ERROR_T Wait(void *req) = 0;
31 |   //virtual IPC_ERROR_T WaitAll() = 0;
32 |   virtual IPC_ERROR_T Test(void *req, bool *flag) = 0;
33 |   virtual IPC_ERROR_T Bcast(void *buf, size_t len, int root) = 0;
34 |   virtual IPC_ERROR_T Reduce(void *src, void *dst,
35 |                              int count,
36 |                              PSType type,
37 |                              PSReduceOp op, int root) = 0;
38 |   virtual IPC_ERROR_T Barrier() = 0;
39 | 
40 | };
41 | 
42 | } // namespace runtime
43 | } // namespace physis
44 | 
45 | #endif /* PHYSIS_RUNTIME_IPC_H_ */
46 | 


--------------------------------------------------------------------------------
/runtime/ipc_mpi.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_IPC_MPI_H_
 4 | #define PHYSIS_RUNTIME_IPC_MPI_H_
 5 | 
 6 | #include "mpi.h"
 7 | 
 8 | #include "runtime/ipc.h"
 9 | 
10 | 
11 | namespace physis {
12 | namespace runtime {
13 | 
14 | class InterProcCommMPI: public InterProcComm {
15 |  protected:
16 |   InterProcCommMPI(): comm_(MPI_COMM_WORLD), initialized_(false) {}
17 |   virtual ~InterProcCommMPI() {}  
18 |  public:
19 |   static InterProcCommMPI* GetInstance();
20 |   virtual void *CreateRequest() const;
21 |   virtual IPC_ERROR_T Init(int *argc, char ***argv);
22 |   virtual IPC_ERROR_T Finalize();
23 |   virtual int GetRank() const;
24 |   virtual int GetNumProcs() const;
25 |   virtual IPC_ERROR_T Send(void *buf, size_t len, int dest);
26 |   virtual IPC_ERROR_T Isend(void *buf, size_t len,
27 |                             int dest, void *req);
28 |   virtual IPC_ERROR_T Recv(void *buf, size_t len, int src);
29 |   virtual IPC_ERROR_T Irecv(void *buf, size_t len,
30 |                     int src, void *req);
31 |   virtual IPC_ERROR_T Wait(void *req);
32 |   //virtual IPC_ERROR_T WaitAll();
33 |   virtual IPC_ERROR_T Test(void *req, bool *flag);
34 |   virtual IPC_ERROR_T Bcast(void *buf, size_t len, int root);
35 |   virtual IPC_ERROR_T Reduce(void *src, void *dst,
36 |                      int count, PSType type,
37 |                      PSReduceOp op, int root);
38 |   virtual IPC_ERROR_T Barrier();
39 | 
40 |  protected:
41 |   MPI_Comm comm_;
42 |   static InterProcCommMPI *singleton_;
43 |   bool initialized_;
44 | };
45 | 
46 | } // namespace runtime
47 | } // namespace physis
48 | 
49 | #endif /* PHYSIS_RUNTIME_IPC_MPI_H_ */
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/runtime/libphysis_rt_mpi_openmp_numa.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #if ! defined(_GNU_SOURCE)
 4 | #define _GNU_SOURCE
 5 | #endif
 6 | 
 7 | #include "runtime/mpi_openmp_runtime.h"
 8 | 
 9 | #ifdef USE_OPENMP_NUMA
10 | #if 0
11 | #include <numa.h>
12 | #endif
13 | #endif
14 | 
15 | #include <omp.h>
16 | #include <sched.h>
17 | 
18 | #ifdef __cplusplus
19 | extern "C" {
20 | #endif
21 | 
22 |   void __PSInitLoop_OpenMP(void){
23 | #ifndef USE_OPENMP_NUMA
24 |     return;
25 | #else
26 | #if 1
27 |     cpu_set_t mask;
28 |     CPU_ZERO(&mask);
29 |     int thread_idx = omp_get_thread_num();
30 |     CPU_SET(thread_idx, &mask);
31 |     //LOG_DEBUG() << "Resetting using CPU ID to " << thread_idx << "\n";
32 |     if (sched_setaffinity(0, sizeof(mask), &mask) == -1){
33 |       perror("sched_setaffinity ");
34 |       LOG_DEBUG() << "Calling sched_setaffinity failed for OpenMP thread "
35 |                   << thread_idx << "\n";
36 |     }
37 | #endif
38 | #endif /* ifndef USE_OPENMP_NUMA */
39 |   } // __PSInitLoop_NUMA
40 | 
41 | #ifdef __cplusplus
42 | }
43 | #endif
44 | 
45 | 


--------------------------------------------------------------------------------
/runtime/mpi_opencl_runtime.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_MPI_OPENCL_RUNTIME_H_
 4 | #define PHYSIS_RUNTIME_MPI_OPENCL_RUNTIME_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/grid_mpi_opencl.h"
 8 | #include "runtime/rpc_mpi_opencl.h"
 9 | #include "runtime/rpc_opencl_mpi.h"
10 | #include <vector>
11 | 
12 | // FIXME
13 | // FIXME
14 | // Get this back this later!!
15 | #define NUM_CLINFO_BOUNDARY_KERNEL 16
16 | 
17 | namespace physis {
18 | namespace runtime {
19 | 
20 | typedef void (*__PSStencilRunClientFunction)(int, void **);
21 | extern __PSStencilRunClientFunction *__PS_stencils;
22 | 
23 | extern ProcInfo *pinfo;
24 | extern MasterMPIOpenCL *master;
25 | extern ClientMPIOpenCL *client;
26 | extern GridSpaceMPIOpenCL *gs;
27 | 
28 | 
29 | } // namespace runtime
30 | } // namespace physis
31 | 
32 | namespace physis {
33 | namespace runtime {
34 | 
35 | extern CLMPIbaseinfo *clinfo_generic;
36 | extern CLMPIbaseinfo *clinfo_inner;
37 | extern CLMPIbaseinfo *clinfo_boundary_copy;
38 | extern std::vector<CLMPIbaseinfo *> clinfo_boundary_kernel;
39 | 
40 | extern CLMPIbaseinfo *clinfo_nowusing;
41 | 
42 | } // namespace runtime
43 | } // namespace physis
44 | 
45 | namespace physis {
46 | namespace runtime {
47 | extern void InitOpenCL(
48 |     int my_rank, int num_local_processes, int *argc, char ***argv
49 |                        );
50 | extern void DestroyOpenCL(void);
51 | } // namespace runtime
52 | } // namespace physis
53 | 
54 | #endif /* PHYSIS_RUNTIME_MPI_OPENCL_RUNTIME_H_ */
55 | 


--------------------------------------------------------------------------------
/runtime/mpi_openmp_runtime.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_MPI_OPENMP_RUNTIME_H_
 4 | #define PHYSIS_RUNTIME_MPI_OPENMP_RUNTIME_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/grid_mpi_openmp.h"
 8 | #include "runtime/rpc_mpi_openmp.h"
 9 | 
10 | #ifdef USE_OPENMP_NUMA
11 | #define PROCINFO  ProcInfoOpenMP
12 | #define MASTER    MasterOpenMP
13 | #define CLIENT    ClientOpenMP
14 | #define GRIDSPACEMPI  GridSpaceMPIOpenMP
15 | #define GRIDMPI   GridMPIOpenMP
16 | #else
17 | #define PROCINFO  ProcInfo
18 | #define MASTER    Master
19 | #define CLIENT    Client
20 | #define GRIDSPACEMPI  GridSpaceMPI
21 | #define GRIDMPI   GridMPI
22 | #endif
23 | 
24 | namespace physis {
25 | namespace runtime {
26 | 
27 | typedef void (*__PSStencilRunClientFunction)(int, void **);
28 | extern __PSStencilRunClientFunction *__PS_stencils;
29 | 
30 | extern PROCINFO *pinfo;
31 | extern MASTER *master;
32 | extern CLIENT *client;
33 | extern GRIDSPACEMPI *gs;
34 | 
35 | } // namespace runtime
36 | } // namespace physis
37 | 
38 | 
39 | #endif /* PHYSIS_RUNTIME_MPI_RUNTIME_H_ */
40 | 


--------------------------------------------------------------------------------
/runtime/mpi_runtime_common.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include <stdarg.h>
 4 | #include <map>
 5 | #include <string>
 6 | 
 7 | #include "runtime/mpi_runtime_common.h"
 8 | 
 9 | #include "mpi.h"
10 | 
11 | #include "physis/physis_mpi.h"
12 | #include "physis/physis_util.h"
13 | #include "runtime/grid_mpi_debug_util.h"
14 | #include "runtime/mpi_util.h"
15 | #include "runtime/grid_mpi2.h"
16 | #include "runtime/rpc2.h"
17 | #include "runtime/inter_proc_comm_mpi.h"
18 | 
19 | using std::map;
20 | using std::string;
21 | 
22 | using namespace physis::runtime;
23 | 
24 | using physis::IndexArray;
25 | using physis::IntArray;
26 | using physis::SizeArray;
27 | 
28 | namespace physis {
29 | namespace runtime {
30 | 
31 | __PSStencilRunClientFunction *__PS_stencils;
32 | 
33 | ProcInfo *pinfo;
34 | Master *master;
35 | Client *client;
36 | GridSpaceMPI *gs;
37 | 
38 | } // namespace runtime
39 | } // namespace physis
40 | 
41 | #ifdef __cplusplus
42 | extern "C" {
43 | #endif
44 | 
45 | 
46 | 
47 | 
48 | #ifdef __cplusplus
49 | }
50 | #endif
51 | 
52 | 


--------------------------------------------------------------------------------
/runtime/mpi_runtime_common.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_MPI_RUNTIME_COMMON_H_
 4 | #define PHYSIS_RUNTIME_MPI_RUNTIME_COMMON_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/rpc.h"
 8 | #include "runtime/grid_mpi.h"
 9 | 
10 | namespace physis {
11 | namespace runtime {
12 | 
13 | 
14 | 
15 | } // namespace runtime
16 | } // namespace physis
17 | 
18 | 
19 | #endif /* PHYSIS_RUNTIME_MPI_RUNTIME_H_ */
20 | 


--------------------------------------------------------------------------------
/runtime/opencl_misc.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | /* physis-OpenCL specific */
 4 | 
 5 | #define BUFSIZE 1024
 6 | 
 7 | /* C or C++ standard headers */
 8 | #include <cstring>
 9 | #include <cstdlib>
10 | #include <cstdio>
11 | 
12 | /* physis-OpenCL specific*/
13 | #include "runtime/rpc_opencl.h"
14 | 
15 | namespace physis {
16 | namespace runtime {
17 | 
18 | void CLbaseinfo::guess_kernelfile
19 | (const int *argc, char ***argv, std::string &filename, std::string &kernelname) const {
20 | 
21 |   char buf[BUFSIZE];
22 |   // const char *tail;
23 |   const char *argi;
24 |   char *pos;
25 |   char **argvv = *argv;
26 |   int found = 0;
27 |   int i = 0;
28 |   int len = 0;
29 | 
30 |   for (i = 0; i < *argc; i++) {
31 |     argi = argvv[i];
32 |     len = strlen(argi);
33 |     if (len >= BUFSIZE - 4) continue;
34 |     strncpy(buf, argi, len);
35 |     pos = buf + len;
36 | #if 0
37 | #else
38 |     sprintf(pos, "%s", ".c");
39 | #endif
40 |     found = 1;
41 |     break;
42 |   }
43 |   if (found)
44 |     filename = buf;
45 |   else
46 |     filename = "";
47 | 
48 |   // At first set the below kernel name, will be updated by __PSSetKernel
49 |   kernelname = "__PSStencilRun_kernel";
50 | } // guess_kernelfile
51 | 
52 | std::string CLbaseinfo::physis_opencl_h_include_path(void) const {
53 |   std::string ret = "";
54 | 
55 |   // FIXME
56 |   // Currently no header files are included in kernel code
57 | 
58 | #if 0
59 | #endif
60 | 
61 |   return ret;
62 | 
63 | } // physis_opencl_h_dir_path
64 | 
65 | } // namespace physis
66 | } // namespace runtime
67 | 


--------------------------------------------------------------------------------
/runtime/opencl_runtime.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_OPENCL_RUNTIME_H_
 4 | #define PHYSIS_RUNTIME_OPENCL_RUNTIME_H_
 5 | 
 6 | /* physis-OpenCL specific */
 7 | #include "runtime/rpc_opencl.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | extern CLinfo *master;
12 | }
13 | }
14 | #endif // #define PHYSIS_RUNTIME_OPENCL_RUNTIME_H_
15 | 


--------------------------------------------------------------------------------
/runtime/proc.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "runtime/proc.h"
 4 | 
 5 | namespace physis {
 6 | namespace runtime {
 7 | 
 8 | std::ostream &Proc::print(std::ostream &os) const {
 9 |   os << "Proc {"
10 |      << "rank: " << rank_
11 |      << ", #procs: " << num_procs_
12 |      << "}";
13 |   return os;
14 | }
15 | 
16 | } // namespace runtime
17 | } // namespace physis
18 | 


--------------------------------------------------------------------------------
/runtime/proc.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_PROC_H_
 4 | #define PHYSIS_RUNTIME_PROC_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/ipc.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | 
12 | class Proc {
13 |  protected:
14 |   int rank_;
15 |   int num_procs_;
16 |   InterProcComm *ipc_;
17 |   __PSStencilRunClientFunction *stencil_runs_;
18 |  public:
19 |   Proc(InterProcComm *ipc,
20 |        __PSStencilRunClientFunction *stencil_runs):
21 |       rank_(ipc->GetRank()), num_procs_(ipc->GetNumProcs()), ipc_(ipc),
22 |       stencil_runs_(stencil_runs) {}
23 |   virtual ~Proc() {}
24 |   std::ostream &print(std::ostream &os) const;
25 |   int rank() const { return rank_; }
26 |   int num_procs() const { return num_procs_; }
27 |   InterProcComm *ipc() { return ipc_; }  
28 |   static int GetRootRank() { return 0; }
29 |   bool IsRoot() const { return rank_ == GetRootRank(); }
30 | };
31 | 
32 | } // namespace runtime
33 | } // namespace physis
34 | 
35 | inline
36 | std::ostream &operator<<(std::ostream &os, const physis::runtime::Proc &proc) {
37 |   return proc.print(os);
38 | }
39 | 
40 | #endif /* PHYSIS_RUNTIME_PROC_H_ */
41 | 


--------------------------------------------------------------------------------
/runtime/reduce_cuda.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_REDUCE_CUDA_H_
 4 | #define PHYSIS_RUNTIME_REDUCE_CUDA_H_
 5 | #include "runtime/runtime_common.h"
 6 | #include "runtime/runtime_common_cuda.h"
 7 | #include "runtime/cuda_util.h"
 8 | #include "runtime/reduce.h"
 9 | //#include "physis/physis_cuda.h"
10 | 
11 | #include <thrust/reduce.h>
12 | #include <thrust/device_ptr.h>
13 | #include <thrust/extrema.h>
14 | 
15 | namespace physis {
16 | namespace runtime {
17 | 
18 | //! Reduce a grid with binary operation op.
19 | template <class T>
20 | void ReduceGridCUDA(void *buf, PSReduceOp op,
21 |                     void *dev_grid, size_t len) {
22 |   thrust::device_ptr<T> dev_ptr((T*)dev_grid);
23 |   T *out = (T*)buf;
24 |   if (op == PS_MAX) {
25 |     *out = *thrust::max_element(dev_ptr, dev_ptr + len);
26 |   } else if (op == PS_MIN) {
27 |     *out = *thrust::min_element(dev_ptr, dev_ptr + len);
28 |   } else if (op == PS_SUM) {
29 |     *out = thrust::reduce(dev_ptr, dev_ptr + len,
30 |                           physis::runtime::GetReductionDefaultValue<T>(op),
31 |                           thrust::plus<T>());
32 |   } else if (op == PS_PROD) {
33 |     *out = thrust::reduce(dev_ptr, dev_ptr + len,
34 |                           physis::runtime::GetReductionDefaultValue<T>(op),
35 |                           thrust::multiplies<T>());
36 |   } else {
37 |     PSAbort(1);
38 |   }
39 |   return;
40 | }
41 | 
42 | } // namespace runtime
43 | } // namespace physis
44 | 
45 | #endif /* PHYSIS_RUNTIME_REDUCE_CUDA_H_ */
46 | 


--------------------------------------------------------------------------------
/runtime/reduce_grid_mpi_cuda_exp.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_REDUCE_GRID_MPI_CUDA_EXP_H_
 4 | #define PHYSIS_RUNTIME_REDUCE_GRID_MPI_CUDA_EXP_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "physis/reduce.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | 
12 | extern int ReduceGridMPICUDAExp(void *buf, PSType type, PSReduceOp op,
13 |                                 void *dev_grid, int dim, const IndexArray &size,
14 |                                 const Width2 &width);
15 | 
16 | } //namespace runtime
17 | } //namespace runtime
18 | 
19 | #endif // PHYSIS_RUNTIME_REDUCE_GRID_MPI_CUDA_EXP_H_ 
20 | 


--------------------------------------------------------------------------------
/runtime/rpc_cuda.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "runtime/rpc_cuda.h"
 4 | #include "runtime/mpi_wrapper.h"
 5 | #include "runtime/grid_util.h"
 6 | #include "runtime/runtime_common_cuda.h"
 7 | #include "runtime/grid_space_mpi_cuda.h"
 8 | 
 9 | #include <cuda_runtime.h>
10 | 
11 | namespace physis {
12 | namespace runtime {
13 | 
14 | } // namespace runtime
15 | } // namespace physis
16 | 


--------------------------------------------------------------------------------
/runtime/rpc_mpi_cuda.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RPC_MPI_CUDA_H_
 4 | #define PHYSIS_RUNTIME_RPC_MPI_CUDA_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/rpc.h"
 8 | #include "runtime/grid_mpi_cuda.h"
 9 | #include "runtime/buffer_cuda.h"
10 | 
11 | namespace physis {
12 | namespace runtime {
13 | 
14 | class MasterMPICUDA: public Master {
15 |  public:
16 |   MasterMPICUDA(const ProcInfo &pinfo, GridSpaceMPICUDA *gs,
17 |                 MPI_Comm comm);
18 |   virtual ~MasterMPICUDA();
19 |   virtual void Finalize();
20 |   virtual void GridCopyinLocal(GridMPI *g, const void *buf);
21 |   virtual void GridCopyoutLocal(GridMPI *g, void *buf);  
22 |  protected:
23 |   BufferCUDAHost *pinned_buf_;
24 | };
25 | 
26 | class ClientMPICUDA: public Client {
27 |  public:
28 |   ClientMPICUDA(const ProcInfo &pinfo, GridSpaceMPICUDA *gs,
29 |                 MPI_Comm comm);
30 |   virtual ~ClientMPICUDA();
31 |   virtual void Finalize();  
32 | };
33 | 
34 | } // namespace runtime
35 | } // namespace physis
36 | 
37 | #endif /* PHYSIS_RUNTIME_RPC_MPI_CUDA_H_ */
38 | 


--------------------------------------------------------------------------------
/runtime/rpc_mpi_opencl.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RPC_MPI_OPENCL_H_
 4 | #define PHYSIS_RUNTIME_RPC_MPI_OPENCL_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/rpc_mpi.h"
 8 | #include "runtime/grid_mpi_opencl.h"
 9 | #include "runtime/rpc_opencl_common.h"
10 | #include "runtime/buffer_opencl.h"
11 | 
12 | #include <vector>
13 | 
14 | namespace physis {
15 | namespace runtime {
16 | 
17 | class MasterMPIOpenCL: public Master {
18 |  public:
19 |   MasterMPIOpenCL(
20 |       const ProcInfo &pinfo, GridSpaceMPIOpenCL *gs,
21 |       MPI_Comm comm,
22 |       CLbaseinfo *cl_in);
23 |   virtual ~MasterMPIOpenCL();
24 |   virtual void Finalize();
25 |   virtual void GridCopyinLocal(GridMPI *g, const void *buf);
26 |   virtual void GridCopyoutLocal(GridMPI *g, void *buf);
27 |  protected:
28 |   BufferOpenCLHost *pinned_buf_;
29 |   CLbaseinfo *cl_generic_;
30 | };
31 | 
32 | class ClientMPIOpenCL: public Client {
33 |  public:
34 |   ClientMPIOpenCL(
35 |       const ProcInfo &pinfo, GridSpaceMPIOpenCL *gs,
36 |       MPI_Comm comm,
37 |       CLbaseinfo *cl_in);
38 |   virtual ~ClientMPIOpenCL();
39 |   virtual void Finalize();
40 |  protected:
41 |   CLbaseinfo *cl_generic_;
42 | };
43 | 
44 | } // namespace runtime
45 | } // namespace physis
46 | 
47 | #endif /* PHYSIS_RUNTIME_RPC_MPI_OPENCL_H_ */
48 | 


--------------------------------------------------------------------------------
/runtime/rpc_opencl.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RPC_OPENCL_H_
 4 | #define PHYSIS_RUNTIME_RPC_OPENCL_H_
 5 | 
 6 | /* physis-OpenCL specific*/
 7 | #include "physis/physis_opencl.h"
 8 | #include "runtime/rpc_opencl_common.h"
 9 | 
10 | /*
11 |   Note:
12 |   CLinfo: OpenCL information class
13 |   Members or functions not using __PSGrid belongs to CLbaseinfo.
14 |   Members or functions using __PSGrid should belong to
15 |   CLbase, which should inherit CLbaseinfo.
16 | */
17 | 
18 | namespace physis {
19 | namespace runtime {
20 | 
21 | class CLinfo : public CLbaseinfo {
22 |  protected:
23 | 
24 |  public:
25 |   CLinfo(): CLbaseinfo() {};
26 |   virtual ~CLinfo() {};
27 | 
28 |   virtual __PSGrid* GridNew(int elm_size, int num_dims, PSVectorInt dim, int double_buffering);
29 |   virtual void GridFree(__PSGrid *g);
30 |   virtual void GridCopyin(__PSGrid *g, const void *src_buf);
31 |   virtual void GridCopyout(__PSGrid *g, void *dst_buf);
32 |   virtual void GridSet(__PSGrid *g, const void *val_ptr, va_list valst_dim);
33 | 
34 | }; // class CLinfo
35 | 
36 | } // namespace runtime
37 | } // namespace physis
38 | 
39 | #endif /* #define PHYSIS_RUNTIME_RPC_OPENCL_H_ */
40 | 


--------------------------------------------------------------------------------
/runtime/rpc_opencl_mpi.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RPC_OPENCL_MPI_H
 4 | #define PHYSIS_RUNTIME_RPC_OPENCL_MPI_H
 5 | 
 6 | #include "runtime/rpc_opencl_common.h"
 7 | 
 8 | namespace physis {
 9 | namespace runtime {
10 | 
11 | class CLMPIbaseinfo : public CLbaseinfo {
12 | 
13 |  protected:
14 |   virtual std::string create_kernel_contents(std::string kernelfile) const;
15 |   virtual std::string physis_opencl_h_include_path(void) const { return header_path_; }
16 | 
17 |   std::string header_path_;
18 |   std::string kernel_filen_;
19 |   int dev_id_;
20 |   int save_context_p;
21 | 
22 |  public:
23 |   CLMPIbaseinfo();
24 |   CLMPIbaseinfo(
25 |       unsigned int id_default, unsigned int create_queue_p,
26 |       unsigned int block_events_p);
27 |   CLMPIbaseinfo(CLMPIbaseinfo &master);
28 |   virtual ~CLMPIbaseinfo();
29 | 
30 |   virtual cl_program get_prog(void) const { return clprog; }
31 |   virtual void set_kernel_filen(std::string filen) { kernel_filen_ = filen; }
32 |   virtual std::string get_kernel_filen(void) const { return kernel_filen_; }
33 | 
34 |   virtual void set_header_include_path(const char *path) { if (path) header_path_ = path; }
35 |   virtual void mark_save_context() { save_context_p = 1; }
36 | 
37 |   virtual void sync_queue() { if (clqueue) clFinish(clqueue); } 
38 | 
39 | 
40 | }; // class CLMPIbaseinfo
41 | } // namespace runtime
42 | } // namespace physis
43 | 
44 | 
45 | #endif /* #define PHYSIS_RUNTIME_RPC_OPENCL_MPI_H */
46 | 


--------------------------------------------------------------------------------
/runtime/runtime.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_H_
 4 | #define PHYSIS_RUNTIME_RUNTIME_H_
 5 | 
 6 | #include <stdarg.h>
 7 | 
 8 | #include "runtime/runtime_common.h"
 9 | #include "runtime/grid.h"
10 | 
11 | namespace physis {
12 | namespace runtime {
13 | 
14 | 
15 | template <class GridSpaceType>
16 | class Runtime {
17 |  public:
18 |   Runtime(): gs_(NULL) {}
19 |   virtual ~Runtime() {}
20 |   virtual void Init(int *argc, char ***argv, int grid_num_dims,
21 |                     va_list vl) {
22 |     // Set __ps_trace if physis-trace option is given
23 |     __ps_trace = NULL;
24 |     string opt_name = "physis-trace";
25 |     vector<string> opts;
26 |     if (ParseOption(argc, argv, opt_name, 0, opts)) {
27 |       __ps_trace = stderr;
28 |       LOG_INFO() << "Tracing enabled\n";
29 |     }
30 |   }
31 |     
32 |   virtual GridSpaceType *gs() {
33 |     return gs_;
34 |   }
35 |  protected:
36 |   GridSpaceType *gs_;
37 |   
38 | };
39 | 
40 | } // namespace runtime
41 | } // namespace physis
42 | 
43 | 
44 | 
45 | #endif /* PHYSIS_RUNTIME_RUNTIME_H_ */
46 | 


--------------------------------------------------------------------------------
/runtime/runtime_common.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_COMMON_H_
 4 | #define PHYSIS_RUNTIME_RUNTIME_COMMON_H_
 5 | 
 6 | #define PHYSIS_RUNTIME
 7 | 
 8 | #include "physis/physis_util.h"
 9 | #include "physis/physis_common.h"
10 | #include "physis/internal_common.h"
11 | #include "common/config.h"
12 | 
13 | namespace physis {
14 | namespace runtime {
15 | 
16 | struct Width2 {
17 |   UnsignedArray bw;
18 |   UnsignedArray fw;
19 |   const UnsignedArray &operator()(bool is_fw) const {
20 |     return is_fw ? fw : bw;
21 |   }
22 |   unsigned operator()(int dim, bool is_fw) const {
23 |     return operator()(is_fw)[dim];
24 |   }
25 | };
26 | 
27 | typedef void (*__PSStencilRunClientFunction)(int, void **);
28 | 
29 | // Returns the number of process grid dimensions. Returns negative
30 | // value on failure.
31 | int GetProcessDim(int *argc, char ***argv, IntArray &proc_size);
32 | 
33 | bool ParseOption(int *argc, char ***argv, const string &opt_name,
34 |                  int num_additional_args, vector<string> &opts);
35 | 
36 | 
37 | } // namespace runtime
38 | } // namespace physis
39 | 
40 | inline
41 | std::ostream &operator<<(std::ostream &os, const physis::runtime::Width2 &w) {
42 |   return os << "{bw: " << w.bw << ", fw: " << w.fw << "}";
43 | }
44 | 
45 | #endif /* PHYSIS_RUNTIME_RUNTIME_COMMON_H_ */
46 | 


--------------------------------------------------------------------------------
/runtime/runtime_cuda.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_CUDA_H_
 4 | #define PHYSIS_RUNTIME_RUNTIME_CUDA_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/runtime.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | 
12 | template <class GridSpaceType>
13 | class RuntimeCUDA: public Runtime<GridSpaceType> {
14 |  public:
15 |   RuntimeCUDA() {}
16 |   virtual ~RuntimeCUDA() {}
17 | };
18 | 
19 | } // namespace runtime
20 | } // namespace physis
21 | 
22 | #endif /* PHYSIS_RUNTIME_RUNTIME_CUDA_H_ */
23 | 
24 | 


--------------------------------------------------------------------------------
/runtime/runtime_cuda_hm.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "runtime/runtime_cuda_hm.h"
 4 | 
 5 | namespace physis {
 6 | namespace runtime {
 7 | 
 8 | RuntimeCUDAHM::RuntimeCUDAHM(): RuntimeCUDA() {
 9 | }
10 | 
11 | RuntimeCUDAHM::~RuntimeCUDAHM() {
12 | }
13 | 
14 | } // namespace runtime
15 | } // namespace physis
16 | 


--------------------------------------------------------------------------------
/runtime/runtime_cuda_hm.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_CUDA_HM_H_
 4 | #define PHYSIS_RUNTIME_RUNTIME_CUDA_HM_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/runtime_cuda.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | 
12 | class RuntimeCUDAHM: public RuntimeCUDA {
13 |  public:
14 |   RuntimeCUDAHM();
15 |   virtual ~RuntimeCUDAHM();
16 | };
17 | 
18 | } // namespace runtime
19 | } // namespace physis
20 | 
21 | #endif /* PHYSIS_RUNTIME_RUNTIME_CUDA_HM_H_ */
22 | 
23 | 


--------------------------------------------------------------------------------
/runtime/runtime_ref.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "runtime/runtime_ref.h"
 4 | 
 5 | 
 6 | namespace physis {
 7 | namespace runtime {
 8 | 
 9 | 
10 | } // namespace runtime
11 | } // namespace physis
12 | 


--------------------------------------------------------------------------------
/runtime/runtime_ref.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_REF_H_
 4 | #define PHYSIS_RUNTIME_RUNTIME_REF_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | #include "runtime/runtime.h"
 8 | 
 9 | namespace physis {
10 | namespace runtime {
11 | 
12 | template <class GridSpaceType>
13 | class RuntimeRef: public Runtime<GridSpaceType> {
14 |  public:
15 |   RuntimeRef() {}
16 |   virtual ~RuntimeRef() {}
17 | };
18 | 
19 | } // namespace runtime
20 | } // namespace physis
21 | 
22 | #endif /* PHYSIS_RUNTIME_RUNTIME_REF_H_ */
23 | 
24 | 


--------------------------------------------------------------------------------
/runtime/tests/test_physis_rt_mpi.c:
--------------------------------------------------------------------------------
 1 | #include "physis_mpi.h"
 2 | 
 3 | #define N (4)
 4 | 
 5 | typedef void (*grid_update_client_t)();
 6 | grid_update_client_t *update_clients;
 7 | 
 8 | int *create_grid()
 9 | {
10 |     return (int*)calloc(N * N * N, sizeof(int));
11 | }
12 | 
13 | void init_grid(int *g) 
14 | {
15 |     int i;
16 |     for (i = 0; i < N * N * N; i++) {
17 |         g[i] = i;
18 |     }
19 |     return;
20 | }
21 | 
22 | void print_grid(int *g, FILE *out) 
23 | {
24 |     int i;
25 |     fprintf(out, "grid: ");
26 |     for (i = 0; i < N * N * N; i++) {
27 |         fprintf(out, "%d ", g[i]);
28 |     }
29 |     fprintf(out, "\n");
30 |     return;
31 | }
32 | 
33 | int main(int argc, char *argv[]) 
34 | {
35 |     PhysisInit(&argc, &argv);
36 |     unsigned s[3] = {N, N, N};
37 |     uvec_t halo = {1, 1, 1};
38 |     grid *g = grid_new(3, sizeof(int), s, halo, halo);
39 |     int *gin = create_grid();
40 |     init_grid(gin);
41 |     print_grid(gin, stdout);
42 |     int *gout = create_grid();
43 |     grid_copyin(g, gin);
44 |     grid_copyout(g, gout);
45 |     printf("copyout\n");
46 |     print_grid(gout, stdout);
47 |     grid_free(g);
48 |     PhysisFinalize();
49 |     return 0;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/runtime/timing.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "runtime/timing.h"
 4 | 
 5 | namespace physis {
 6 | namespace runtime {
 7 | 
 8 | DataCopyProfile::DataCopyProfile():
 9 |     gpu_to_cpu(0.0), cpu_in(0.0), cpu_out(0.0), cpu_to_gpu(0.0) {}
10 | 
11 | 
12 | std::ostream &DataCopyProfile::print(std::ostream &os) const {
13 |   StringJoin sj;
14 |   sj << "GPU->CPU: " << gpu_to_cpu;
15 |   sj << "CPU->MPI: " << cpu_out;
16 |   sj << "MPI->CPU: " << cpu_in;
17 |   sj << "CPU->GPU: " << cpu_to_gpu;
18 |   os << "(" << sj.str() << ")";
19 |   return os;
20 | }
21 | 
22 | } // namespace runtime
23 | } // namespace physis
24 | 
25 | 


--------------------------------------------------------------------------------
/runtime/timing.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_RUNTIME_TIMING_H_
 4 | #define PHYSIS_RUNTIME_TIMING_H_
 5 | 
 6 | #include "runtime/runtime_common.h"
 7 | 
 8 | namespace physis {
 9 | namespace runtime {
10 | 
11 | struct DataCopyProfile {
12 |   double gpu_to_cpu;
13 |   double cpu_in;    
14 |   double cpu_out;
15 |   double cpu_to_gpu;
16 |   DataCopyProfile();
17 |   std::ostream &print(std::ostream &os) const;
18 | };
19 | 
20 | struct Stopwatch {
21 |   __PSStopwatch st;
22 |   void Start() {
23 |     __PSStopwatchStart(&st);
24 |   }
25 |   float Stop() {
26 |     return __PSStopwatchStop(&st);
27 |   }
28 | };
29 | 
30 | } // namespace runtime
31 | } // namespace physis
32 | 
33 | inline std::ostream& operator<<(
34 |     std::ostream &os,
35 |     const physis::runtime::DataCopyProfile &prof) {
36 |   return prof.print(os);
37 | }
38 | 
39 | #endif /* PHYSIS_RUNTIME_TIMING_H_ */
40 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(system_tests)
2 | #add_subdirectory(gmock)
3 | 


--------------------------------------------------------------------------------
/tests/gmock/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright 2008, Google Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 | notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above
11 | copyright notice, this list of conditions and the following disclaimer
12 | in the documentation and/or other materials provided with the
13 | distribution.
14 |     * Neither the name of Google Inc. nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/tests/gmock/README:
--------------------------------------------------------------------------------
1 | Google Mock verison 1.6.0
2 | 
3 | Retains only files under the fused-src directory.
4 | 


--------------------------------------------------------------------------------
/tests/system_tests/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/run_system_tests.sh.cmake
3 |   ${CMAKE_BINARY_DIR}/run_system_tests.sh @ONLY)
4 | 
5 | add_subdirectory(test_cases)
6 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | file(GLOB test_srcs
 3 |   "${CMAKE_CURRENT_SOURCE_DIR}/test_*.manual.ref.c")
 4 | foreach (src ${test_srcs})
 5 |   get_filename_component(fname ${src} NAME_WE)
 6 |   add_executable(${fname}.manual.ref.exe ${src})
 7 | endforeach ()
 8 | 
 9 | # uses the same manual code as test_redblack
10 | add_executable(test_redblack-separated.manual.ref.exe
11 |   test_redblack.manual.ref.c)
12 | 
13 | file(GLOB cuda_test_srcs
14 |   "${CMAKE_CURRENT_SOURCE_DIR}/test_*.manual.cuda.cu")
15 | 
16 | if (CUDA_FOUND)
17 |   foreach (src ${cuda_test_srcs})
18 |     get_filename_component(fname ${src} NAME_WE)
19 |     cuda_add_executable(${fname}.manual.cuda.exe ${src})
20 |     if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
21 |       set_target_properties(
22 | 	${fname}.manual.cuda.exe PROPERTIES
23 | 	LINK_FLAGS "-stdlib=libstdc++"
24 | 	)
25 |     endif ()
26 |   endforeach ()
27 |   # For integer code, use the normal C code for testing the CUDA version
28 |   add_executable(test_7-pt-int-type.manual.cuda.exe
29 |     test_7-pt-int-type.manual.ref.c)
30 |   add_executable(test_9-pt-reduction.manual.cuda.exe
31 |     test_9-pt-reduction.manual.ref.c)
32 |   add_executable(test_9-pt-periodic-reduction.manual.cuda.exe
33 |     test_9-pt-periodic-reduction.manual.ref.c)
34 | endif ()
35 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_01.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: copyin and copyout
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 8
11 | 
12 | int main(int argc, char *argv[]) {
13 |   PSInit(&argc, &argv, 3, N, N, N);
14 |   PSGrid3DFloat g3 = PSGrid3DFloatNew(N, N, N);
15 |     
16 |   float *indata = (float *)malloc(sizeof(float) * N * N * N);
17 |   int i;
18 |   for (i = 0; i < N*N*N; i++) {
19 |     indata[i] = i;
20 |   }
21 |   float *outdata = (float *)malloc(sizeof(float) * N * N * N);
22 |     
23 |   PSGridCopyin(g3, indata);
24 |   PSGridCopyout(g3, outdata);
25 |     
26 |   for (i = 0; i < N*N*N; i++) {
27 |     if (indata[i] != outdata[i]) {
28 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
29 |               i, indata[i], outdata[i]);
30 |       exit(1);
31 |     }
32 |   }
33 | 
34 |   PSGridFree(g3);
35 |   PSFinalize();
36 |   free(indata);
37 |   free(outdata);
38 |   return 0;
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_02.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Identity kernel
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 8
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) {
13 |   float v = PSGridGet(g, x, y, z) * 2;
14 |   PSGridEmit(g, v);
15 |   return;
16 | }
17 | 
18 | int main(int argc, char *argv[]) {
19 |   PSInit(&argc, &argv, 3, N, N, N);
20 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
21 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
22 |   size_t nelms = N*N*N;
23 |   
24 |   float *indata = (float *)malloc(sizeof(float) * nelms);
25 |   int i;
26 |   for (i = 0; i < nelms; i++) {
27 |     indata[i] = i;
28 |   }
29 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
30 |     
31 |   PSGridCopyin(g, indata);
32 | 
33 |   PSStencilRun(PSStencilMap(kernel, d, g));
34 |     
35 |   PSGridCopyout(g, outdata);
36 |     
37 |   for (i = 0; i < nelms; i++) {
38 |     if (indata[i] * 2 != outdata[i]) {
39 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
40 |               i, indata[i]*2, outdata[i]);
41 |       exit(1);
42 |     }
43 |   }
44 | 
45 |   PSGridFree(g);
46 |   PSFinalize();
47 |   free(indata);
48 |   free(outdata);
49 |   return 0;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_03.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Identity kernel using source and destination grids
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 8
11 | 
12 | void kernel1(const int x, const int y, const int z, PSGrid3DFloat g,
13 |              PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g, x, y, z);
15 |   PSGridEmit(g2, v);
16 |   return;
17 | }
18 | 
19 | int main(int argc, char *argv[]) {
20 |   PSInit(&argc, &argv, 3, N, N, N);
21 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
22 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);  
23 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
24 |   size_t nelms = N*N*N;
25 |   
26 |   float *indata = (float *)malloc(sizeof(float) * nelms);
27 |   int i;
28 |   for (i = 0; i < nelms; i++) {
29 |     indata[i] = i;
30 |   }
31 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
32 |     
33 |   PSGridCopyin(g, indata);
34 | 
35 |   PSStencilRun(PSStencilMap(kernel1, d, g, g2));
36 |     
37 |   PSGridCopyout(g2, outdata);
38 |     
39 |   for (i = 0; i < nelms; i++) {
40 |     if (indata[i] != outdata[i]) {
41 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
42 |               i, indata[i], outdata[i]);
43 |       exit(1);
44 |     }
45 |   }
46 | 
47 |   PSGridFree(g);
48 |   PSGridFree(g2);
49 |   PSFinalize();
50 |   free(indata);
51 |   free(outdata);
52 |   return 0;
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_09.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Accessing a 2D plane in a 3D grid
 3 |  * DIM: 3
 4 |  * PRIORITY: 10 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 4
11 | 
12 | void kernel(const int x, const int y, const int z,
13 |             PSGrid3DFloat g1, PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g2, x, y, 0);
15 |   PSGridEmit(g1, v);
16 |   return;
17 | }
18 | 
19 | #define IDX3(x, y, z) ((x) + (y) * N + (z) * N * N)
20 | #define IDX2(x, y) ((x) + (y) * N)
21 | 
22 | int main(int argc, char *argv[]) {
23 |   PSInit(&argc, &argv, 3, N, N, N);
24 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
25 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, 1);
26 |   
27 |   size_t nelms = N*N*N;
28 |   int i, j, k;
29 |   
30 |   float *indata = (float *)malloc(sizeof(float) * nelms);
31 | 
32 |   for (i = 0; i < N*N; i++) {
33 |     indata[i] = i;
34 |   }
35 |   PSGridCopyin(g2, indata);
36 | 
37 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
38 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2), 1);
39 |   
40 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
41 |   PSGridCopyout(g1, outdata);
42 | 
43 |   for (k = 0; k < N; ++k) {
44 |     for (j = 0; j < N; ++j) {    
45 |       for (i = 0; i < N; ++i) {
46 |         if (indata[IDX2(i, j)] != outdata[IDX3(i, j, k)]) {
47 |           printf("Error: mismatch at %d,%d,%d, in: %f, out: %f\n",
48 |                  i, j, k, indata[IDX2(i, j)], outdata[IDX3(i, j, k)]);
49 |           exit(1);
50 |         }
51 |       }
52 |     }
53 |   }
54 |   
55 |   PSGridFree(g1);
56 |   PSGridFree(g2);  
57 |   PSFinalize();
58 | 
59 |   free(indata);
60 |   free(outdata);
61 |   return 0;
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_10.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Reflecting access
 3 |  * DIM: 3
 4 |  * PRIORITY: 10 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 8
11 | 
12 | void kernel(const int x, const int y, const int z,
13 |             PSGrid3DFloat g1, PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g1, N - x -1, y, z);
15 |   PSGridEmit(g2, v);
16 |   return;
17 | }
18 | 
19 | #define IDX3(x, y, z) ((x) + (y) * N + (z) * N * N)
20 | #define IDX2(x, y) ((x) + (y) * N)
21 | 
22 | int main(int argc, char *argv[]) {
23 |   PSInit(&argc, &argv, 3, N, N, N);
24 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
25 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);  
26 |   
27 |   size_t nelms = N*N*N;
28 |   int i, j, k;
29 |   
30 |   float *indata = (float *)malloc(sizeof(float) * nelms);
31 | 
32 |   for (i = 0; i < N*N*N; i++) {
33 |     indata[i] = i;
34 |   }
35 |   PSGridCopyin(g1, indata);
36 | 
37 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
38 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2), 1);
39 |   
40 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
41 |   PSGridCopyout(g2, outdata);
42 | 
43 |   for (i = 0; i < N; ++i) {
44 |     for (j = 0; j < N; ++j) {
45 |       for (k = 0; k < N; ++k) {
46 |         if (indata[IDX3(N-i-1, j, k)] != outdata[IDX3(i, j, k)]) {
47 |           printf("Error: mismatch at %d,%d,%d, in: %f, out: %f\n",
48 |                  i, j, k, indata[IDX3(i, j, k)], outdata[IDX3(i, j, k)]);
49 |           exit(1);
50 |         }
51 |       }
52 |     }
53 |   }
54 |   
55 |   PSGridFree(g1);
56 |   PSGridFree(g2);  
57 |   PSFinalize();
58 | 
59 |   free(indata);
60 |   free(outdata);
61 |   return 0;
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_15.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   for (z = 0; z < N; ++z) {
12 |     for (y = 0; y < N; ++y) {
13 |       for (x = 0; x < N; ++x) {
14 |         float c = g1[OFFSET(x, y, z)];
15 |         float l = 0.0f;
16 |         if (x > 0) {
17 |           l = g1[OFFSET(x-1, y, z)];
18 |         }
19 |         if (x > 0) {
20 |           l += g1[OFFSET(x-1, y, z)];
21 |         } else {
22 |           l += g1[OFFSET(x, y, z)];
23 |         }
24 |         if (x > 0) {
25 |           l += g1[OFFSET(x-1, y, z)];
26 |         } else {
27 |           l += c;
28 |         }
29 |         if (x > 0 && x < N-1) {
30 |           l += g1[OFFSET(x-1, y, z)] + g1[OFFSET(x+1, y, z)];
31 |         } else {
32 |           l += g1[OFFSET(x, y, z)];
33 |         }
34 |         if (x % 2 == 0) {
35 |           l += g1[OFFSET(x, y, z)];
36 |         }
37 |         g2[OFFSET(x, y, z)] = c + l;
38 |       }
39 |     }
40 |   }
41 |   return;
42 | }
43 | 
44 | void dump(float *input) {
45 |   int i;
46 |   for (i = 0; i < N*N*N; ++i) {
47 |     printf("%f\n", input[i]);
48 |   }
49 | }
50 | 
51 | int main(int argc, char *argv[]) {
52 |   REAL *g1, *g2;  
53 |   size_t nelms = N*N*N;
54 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
55 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
56 | 
57 |   int i;
58 |   for (i = 0; i < (int)nelms; i++) {
59 |     g1[i] = i;
60 |     g2[i] = i;
61 |   }
62 | 
63 |   kernel(g1, g2);
64 |   dump(g2);
65 |   
66 |   free(g1);
67 |   free(g2);
68 |   return 0;
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_16.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | #define PSGridGet(g, x, y, z) ((g)[OFFSET(x, y, z)])
 9 | 
10 | void kernel(float *g1, float *g2) {
11 |   int x, y, z;
12 |   int halo_width = 1;
13 |   for (z = 0; z < N; ++z) {
14 |     for (y = 0; y < N; ++y) {
15 |       for (x = 0; x < N; ++x) {
16 |         float c, w, e, n, s, b, t;
17 |         c = PSGridGet(g1, x, y, z);
18 |         if (x == 0)    w = c; else w = PSGridGet(g1, x-1, y, z);
19 |         if (x == N-1) e = c ; else e = PSGridGet(g1, x+1, y, z);
20 |         if (y == 0)    n = c ; else n=PSGridGet(g1, x, y-1, z);
21 |         if (y == N-1) s= c ; else s=PSGridGet(g1, x, y+1, z);
22 |         if (z == 0)    b= c ; else b=PSGridGet(g1, x, y, z-1);
23 |         if (z == N-1) t= c ; else t=PSGridGet(g1, x, y, z+1);
24 |         g2[OFFSET(x, y, z)] = c + w + e + s + n + b + t;
25 |       }
26 |     }
27 |   }
28 |   return;
29 | }
30 | 
31 | void dump(float *input) {
32 |   int i;
33 |   for (i = 0; i < N*N*N; ++i) {
34 |     printf("%f\n", input[i]);
35 |   }
36 | }
37 | 
38 | int main(int argc, char *argv[]) {
39 |   REAL *g1, *g2;  
40 |   size_t nelms = N*N*N;
41 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
42 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
43 | 
44 |   int i;
45 |   for (i = 0; i < (int)nelms; i++) {
46 |     g1[i] = i;
47 |     g2[i] = i;
48 |   }
49 | 
50 |   kernel(g1, g2);
51 |   dump(g2);
52 |   
53 |   free(g1);
54 |   free(g2);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_3-pt-1d.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 3-point stencil with 1-D grids
 3 |  * DIM: 1
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 1024
11 | 
12 | static void kernel(const int x, PSGrid1DFloat g, PSGrid1DFloat g2) {
13 |   float v =
14 |       PSGridGet(g,x-1) +
15 |       PSGridGet(g,x) + 
16 |       PSGridGet(g,x+1);
17 |   PSGridEmit(g2, v);
18 |   return;
19 | }
20 | 
21 | void dump(float *input) {
22 |   int i;
23 |   for (i = 0; i < N; ++i) {
24 |     printf("%f\n", input[i]);
25 |   }
26 | }
27 | 
28 | int main(int argc, char *argv[]) {
29 |   PSInit(&argc, &argv, 1, N);
30 |   PSGrid1DFloat g1 = PSGrid1DFloatNew(N);
31 |   PSGrid1DFloat g2 = PSGrid1DFloatNew(N);
32 | 
33 |   PSDomain1D d = PSDomain1DNew(1, N-1);
34 |   size_t nelms = N;
35 |   
36 |   float *indata = (float *)malloc(sizeof(float) * nelms);
37 |   int i;
38 |   for (i = 0; i < nelms; i++) {
39 |     indata[i] = i;
40 |   }
41 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
42 |     
43 |   PSGridCopyin(g1, indata);
44 |   PSGridCopyin(g2, indata);  
45 | 
46 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
47 |     
48 |   PSGridCopyout(g2, outdata);
49 |   dump(outdata);  
50 | 
51 |   PSGridFree(g1);
52 |   PSGridFree(g2);
53 |   PSFinalize();
54 |   free(indata);
55 |   free(outdata);
56 |   return 0;
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_3-pt-1d.manual.cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "cuda.h"
 3 | #include "cuda_runtime.h"
 4 | 
 5 | #define N 1024
 6 | #define REAL float
 7 | 
 8 | #define OFFSET1D(x) (x)
 9 | #define OFFSET3D(x, y, z) ((x) + (y) * N + (z) * N * N)
10 | 
11 |  __global__ void kernel(REAL *g1, REAL *g2) {
12 |    int x = threadIdx.x + blockIdx.x * blockDim.x;
13 | 
14 |    if (x == 0 || x == N-1) return;
15 | 
16 |    float v = g1[OFFSET1D(x-1)] + g1[OFFSET1D(x)] +
17 |        g1[OFFSET1D(x+1)];
18 |    g2[OFFSET1D(x)] = v;
19 |    return;
20 |  }
21 | 
22 | void dump(float *input) {
23 |   int i;
24 |   for (i = 0; i < N; ++i) {
25 |     printf("%f\n", input[i]);
26 |   }
27 | }
28 | 
29 | int main(int argc, char *argv[]) {
30 |   REAL *g1, *g1d;
31 |   REAL *g2d;
32 |   size_t nelms = N;
33 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
34 |   cudaMalloc((void**)&g1d, sizeof(REAL) * nelms);
35 |   cudaMalloc((void**)&g2d, sizeof(REAL) * nelms);
36 | 
37 |   int i;
38 |   for (i = 0; i < (int)nelms; i++) {
39 |     g1[i] = i;
40 |   }
41 |     
42 |   cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);
43 |   cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);  
44 |   
45 |   dim3 block_dim(4);
46 |   dim3 grid_dim(N/block_dim.x);
47 | 
48 |   kernel<<<grid_dim, block_dim>>>(g1d, g2d);
49 |   cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost);
50 | 
51 |   dump(g1);
52 | 
53 |   cudaDeviceReset();
54 |   return 0;
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_3-pt-1d.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 1024
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x) (x)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x;
11 |   for (x = 1; x < N-1; ++x) {
12 |     float v = g1[x-1] + g1[x] + g1[x+1];
13 |     g2[x] = v;
14 |   }
15 |   return;
16 | }
17 | 
18 | void dump(float *input) {
19 |   int i;
20 |   for (i = 0; i < N; ++i) {
21 |     printf("%f\n", input[i]);
22 |   }
23 | }
24 | 
25 | int main(int argc, char *argv[]) {
26 |   REAL *g1, *g2;  
27 |   size_t nelms = N;
28 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
29 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
30 | 
31 |   int i;
32 |   for (i = 0; i < (int)nelms; i++) {
33 |     g1[i] = i;
34 |     g2[i] = i;
35 |   }
36 | 
37 |   kernel(g1, g2);
38 |   dump(g2);
39 |   
40 |   free(g1);
41 |   free(g2);
42 |   return 0;
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_3-pt-periodic.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 3-point periodic-boundary stencil
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
13 |             PSGrid3DFloat g2) {
14 |   float v =
15 |       PSGridGetPeriodic(g,x,y,z) +
16 |       PSGridGetPeriodic(g,x,y,z+1) + 
17 |       PSGridGetPeriodic(g,x,y,z-1);
18 |   PSGridEmit(g2, v);
19 |   return;
20 | }
21 | 
22 | void dump(float *input) {
23 |   int i;
24 |   for (i = 0; i < N*N*N; ++i) {
25 |     printf("%f\n", input[i]);
26 |   }
27 | }
28 | 
29 | int main(int argc, char *argv[]) {
30 |   PSInit(&argc, &argv, 3, N, N, N);
31 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
32 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);
33 | 
34 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
35 |   size_t nelms = N*N*N;
36 |   
37 |   float *indata = (float *)malloc(sizeof(float) * nelms);
38 |   int i;
39 |   for (i = 0; i < nelms; i++) {
40 |     indata[i] = i;
41 |   }
42 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
43 |     
44 |   PSGridCopyin(g1, indata);
45 |   PSGridCopyin(g2, indata);  
46 | 
47 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
48 |     
49 |   PSGridCopyout(g2, outdata);
50 |   dump(outdata);  
51 | 
52 |   PSGridFree(g1);
53 |   PSGridFree(g2);
54 |   PSFinalize();
55 |   free(indata);
56 |   free(outdata);
57 |   return 0;
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_3-pt-periodic.manual.cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "cuda.h"
 3 | #include "cuda_runtime.h"
 4 | 
 5 | #define N 32
 6 | #define REAL float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 |  __global__ void kernel(REAL *g1, REAL *g2) {
11 |    int x = threadIdx.x + blockIdx.x * blockDim.x;
12 |    int y = threadIdx.y + blockIdx.y * blockDim.y;
13 |    int z = threadIdx.z + blockIdx.z * blockDim.z;
14 | 
15 |    int zp = ((z - 1) + N) % N;
16 |    int zn = (z + 1) % N;
17 | 
18 |    float v =
19 |        g1[OFFSET(x, y, z)] +
20 |        g1[OFFSET(x, y, zn)] +
21 |        g1[OFFSET(x, y, zp)];
22 |    g2[OFFSET(x, y, z)] = v;
23 |    return;
24 |  }
25 | 
26 | void dump(float *input) {
27 |   int i;
28 |   for (i = 0; i < N*N*N; ++i) {
29 |     printf("%f\n", input[i]);
30 |   }
31 | }
32 | 
33 | int main(int argc, char *argv[]) {
34 |   REAL *g1, *g1d;
35 |   REAL *g2d;
36 |   size_t nelms = N*N*N;
37 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
38 |   cudaMalloc((void**)&g1d, sizeof(REAL) * nelms);
39 |   cudaMalloc((void**)&g2d, sizeof(REAL) * nelms);
40 | 
41 |   int i;
42 |   for (i = 0; i < (int)nelms; i++) {
43 |     g1[i] = i;
44 |   }
45 |     
46 |   cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);
47 |   cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);  
48 |   
49 |   dim3 block_dim(4, 4, 4);
50 |   dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z);
51 | 
52 |   kernel<<<grid_dim, block_dim>>>(g1d, g2d);
53 |   cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost);
54 | 
55 |   dump(g1);
56 | 
57 |   cudaDeviceReset();
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_3-pt-periodic.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   for (z = 0; z < N; ++z) {
12 |     int zp = ((z - 1) + N) % N;
13 |     int zn = (z + 1) % N;
14 |     for (y = 0; y < N; ++y) {
15 |       int yp = ((y - 1) + N) % N;
16 |       int yn = (y + 1) % N;
17 |       for (x = 0; x < N; ++x) {
18 |         int xp = ((x - 1) + N) % N;
19 |         int xn = (x + 1) % N;
20 |         float v =
21 |             g1[OFFSET(x, y, z)] +
22 |             g1[OFFSET(x, y, zn)] +
23 |             g1[OFFSET(x, y, zp)];
24 |         g2[OFFSET(x, y, z)] = v;
25 |       }
26 |     }
27 |   }
28 |   return;
29 | }
30 | 
31 | void dump(float *input) {
32 |   int i;
33 |   for (i = 0; i < N*N*N; ++i) {
34 |     printf("%f\n", input[i]);
35 |   }
36 | }
37 | 
38 | int main(int argc, char *argv[]) {
39 |   REAL *g1, *g2;  
40 |   size_t nelms = N*N*N;
41 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
42 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
43 | 
44 |   int i;
45 |   for (i = 0; i < (int)nelms; i++) {
46 |     g1[i] = i;
47 |     g2[i] = i;
48 |   }
49 | 
50 |   kernel(g1, g2);
51 |   dump(g2);
52 |   
53 |   free(g1);
54 |   free(g2);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_5-pt-2d.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 5-point stencil with 2-D grids
 3 |  * DIM: 2
 4 |  * PRIORITY: 10
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | static void kernel(const int x, const int y, PSGrid2DFloat g, PSGrid2DFloat g2) {
13 |   float v =
14 |       PSGridGet(g,x, y) + 
15 |       PSGridGet(g,x-1, y) +
16 |       PSGridGet(g,x+1, y) +
17 |       PSGridGet(g,x, y-1) +
18 |       PSGridGet(g,x, y+1);
19 |   PSGridEmit(g2, v);
20 |   return;
21 | }
22 | 
23 | void dump(float *input) {
24 |   int i;
25 |   for (i = 0; i < N*N; ++i) {
26 |     printf("%f\n", input[i]);
27 |   }
28 | }
29 | 
30 | int main(int argc, char *argv[]) {
31 |   PSInit(&argc, &argv, 2, N, N);
32 |   PSGrid2DFloat g1 = PSGrid2DFloatNew(N, N);
33 |   PSGrid2DFloat g2 = PSGrid2DFloatNew(N, N);
34 | 
35 |   PSDomain2D d = PSDomain2DNew(1, N-1, 1, N-1);
36 |   size_t nelms = N * N;
37 |   
38 |   float *indata = (float *)malloc(sizeof(float) * nelms);
39 |   int i;
40 |   for (i = 0; i < nelms; i++) {
41 |     indata[i] = i;
42 |   }
43 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
44 |     
45 |   PSGridCopyin(g1, indata);
46 |   PSGridCopyin(g2, indata);  
47 | 
48 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
49 |     
50 |   PSGridCopyout(g2, outdata);
51 |   dump(outdata);  
52 | 
53 |   PSGridFree(g1);
54 |   PSGridFree(g2);
55 |   PSFinalize();
56 |   free(indata);
57 |   free(outdata);
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_5-pt-2d.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y) ((x) + (y) * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y;
11 |   for (y = 1; y < N-1; ++y) {  
12 |     for (x = 1; x < N-1; ++x) {
13 |       float v = g1[OFFSET(x, y)] +
14 |           g1[OFFSET(x-1, y)] +
15 |           g1[OFFSET(x+1, y)] +
16 |           g1[OFFSET(x, y-1)] +
17 |           g1[OFFSET(x, y+1)];
18 |       g2[OFFSET(x, y)] = v;
19 |     }
20 |   }
21 |   return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | int main(int argc, char *argv[]) {
32 |   REAL *g1, *g2;  
33 |   size_t nelms = N*N;
34 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
35 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
36 | 
37 |   int i;
38 |   for (i = 0; i < (int)nelms; i++) {
39 |     g1[i] = i;
40 |     g2[i] = i;
41 |   }
42 | 
43 |   kernel(g1, g2);
44 |   dump(g2);
45 |   
46 |   free(g1);
47 |   free(g2);
48 |   return 0;
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_5-pt-periodic.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 7-point periodic-boundary stencil
 3 |  * DIM: 2
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, PSGrid2DFloat g,
13 |             PSGrid2DFloat g2) {
14 |   float v =
15 |       PSGridGetPeriodic(g,x,y) +
16 |       PSGridGetPeriodic(g,x+1,y) +
17 |       PSGridGetPeriodic(g,x-1,y) +
18 |       PSGridGetPeriodic(g,x,y+1) +
19 |       PSGridGetPeriodic(g,x,y-1);
20 |   PSGridEmit(g2, v);
21 |   return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | int main(int argc, char *argv[]) {
32 |   PSInit(&argc, &argv, 2, N, N);
33 |   PSGrid2DFloat g1 = PSGrid2DFloatNew(N, N);
34 |   PSGrid2DFloat g2 = PSGrid2DFloatNew(N, N);
35 | 
36 |   PSDomain2D d = PSDomain2DNew(0, N, 0, N);
37 |   size_t nelms = N*N;
38 |   
39 |   float *indata = (float *)malloc(sizeof(float) * nelms);
40 |   int i;
41 |   for (i = 0; i < nelms; i++) {
42 |     indata[i] = i;
43 |   }
44 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
45 |     
46 |   PSGridCopyin(g1, indata);
47 |   PSGridCopyin(g2, indata);  
48 | 
49 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2),
50 |                PSStencilMap(kernel, d, g2, g1));
51 |     
52 |   PSGridCopyout(g1, outdata);
53 |   dump(outdata);  
54 | 
55 |   PSGridFree(g1);
56 |   PSGridFree(g2);
57 |   PSFinalize();
58 |   free(indata);
59 |   free(outdata);
60 |   return 0;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_5-pt-periodic.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y) ((x) + (y) * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y;
11 |   for (y = 0; y < N; ++y) {
12 |     int yp = ((y - 1) + N) % N;
13 |     int yn = (y + 1) % N;
14 |     for (x = 0; x < N; ++x) {
15 |       int xp = ((x - 1) + N) % N;
16 |       int xn = (x + 1) % N;
17 |       float v =
18 |           g1[OFFSET(x, y)] +
19 |           g1[OFFSET(xn, y)] +
20 |           g1[OFFSET(xp, y)] +
21 |           g1[OFFSET(x, yn)] +
22 |           g1[OFFSET(x, yp)];
23 |       g2[OFFSET(x, y)] = v;
24 |     }
25 |   }
26 |   return;
27 | }
28 | 
29 | void dump(float *input) {
30 |   int i;
31 |   for (i = 0; i < N*N; ++i) {
32 |     printf("%f\n", input[i]);
33 |   }
34 | }
35 | 
36 | int main(int argc, char *argv[]) {
37 |   REAL *g1, *g2;  
38 |   size_t nelms = N*N;
39 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
40 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
41 | 
42 |   int i;
43 |   for (i = 0; i < (int)nelms; i++) {
44 |     g1[i] = i;
45 |     g2[i] = i;
46 |   }
47 | 
48 |   kernel(g1, g2);
49 |   kernel(g2, g1);
50 |   
51 |   dump(g1);
52 |   
53 |   free(g1);
54 |   free(g2);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-double-type.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | 
 6 | #define T double
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | void kernel(T *g1, T *g2) {
11 |   int x, y, z;
12 |   int halo_width = 1;
13 |   for (z = halo_width; z < N-halo_width; ++z) {
14 |     for (y = halo_width; y < N-halo_width; ++y) {
15 |       for (x = halo_width; x < N-halo_width; ++x) {
16 |         T v = g1[OFFSET(x, y, z)] +
17 |             g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] +
18 |             g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] +
19 |             g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)];
20 |         g2[OFFSET(x, y, z)] = v;
21 |       }
22 |     }
23 |   }
24 |   return;
25 | }
26 | 
27 | void dump(T *input) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     printf("%f\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   T *g1, *g2;  
36 |   size_t nelms = N*N*N;
37 |   g1 = (T *)malloc(sizeof(T) * nelms);
38 |   g2 = (T *)malloc(sizeof(T) * nelms);
39 | 
40 |   int i;
41 |   for (i = 0; i < (int)nelms; i++) {
42 |     g1[i] = i;
43 |     g2[i] = i;
44 |   }
45 | 
46 |   kernel(g1, g2);
47 |   dump(g2);
48 |   
49 |   free(g1);
50 |   free(g2);
51 |   return 0;
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-int-type.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | 
 6 | #define T int
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | void kernel(T *g1, T *g2) {
11 |   int x, y, z;
12 |   int halo_width = 1;
13 |   for (z = halo_width; z < N-halo_width; ++z) {
14 |     for (y = halo_width; y < N-halo_width; ++y) {
15 |       for (x = halo_width; x < N-halo_width; ++x) {
16 |         T v = g1[OFFSET(x, y, z)] +
17 |             g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] +
18 |             g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] +
19 |             g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)];
20 |         g2[OFFSET(x, y, z)] = v;
21 |       }
22 |     }
23 |   }
24 |   return;
25 | }
26 | 
27 | void dump(T *input) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     printf("%d\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   T *g1, *g2;  
36 |   size_t nelms = N*N*N;
37 |   g1 = (T *)malloc(sizeof(T) * nelms);
38 |   g2 = (T *)malloc(sizeof(T) * nelms);
39 | 
40 |   int i;
41 |   for (i = 0; i < (int)nelms; i++) {
42 |     g1[i] = i;
43 |     g2[i] = i;
44 |   }
45 | 
46 |   kernel(g1, g2);
47 |   dump(g2);
48 |   
49 |   free(g1);
50 |   free(g2);
51 |   return 0;
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-multi-iterations.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define ITER 5
 6 | #define REAL float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | void kernel(float *g1, float *g2) {
11 |   int x, y, z;
12 |   int halo_width = 1;
13 |   for (z = halo_width; z < N-halo_width; ++z) {
14 |     for (y = halo_width; y < N-halo_width; ++y) {
15 |       for (x = halo_width; x < N-halo_width; ++x) {
16 |         float v = g1[OFFSET(x, y, z)] +
17 |             g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] +
18 |             g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] +
19 |             g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)];
20 |         g2[OFFSET(x, y, z)] = v;
21 |       }
22 |     }
23 |   }
24 |   return;
25 | }
26 | 
27 | void dump(float *input) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     printf("%f\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   REAL *g1, *g2;  
36 |   size_t nelms = N*N*N;
37 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
38 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
39 | 
40 |   int i;
41 |   for (i = 0; i < (int)nelms; i++) {
42 |     g1[i] = i;
43 |     g2[i] = i;
44 |   }
45 | 
46 |   for (i = 0; i < ITER; ++i) {
47 |     kernel(g1, g2);
48 |     kernel(g2, g1);    
49 |   }
50 | 
51 |   dump(g1);
52 |   
53 |   free(g1);
54 |   free(g2);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-neumann-cond.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 8
 5 | #define ITER 1
 6 | #define REAL float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | void kernel(REAL *g1, REAL *g2,
11 |             int nx, int ny, int nz) {
12 |   int z;
13 |   for (z = 0; z < nz; z++) {
14 |     int y;
15 |     for (y = 0; y < ny; y++) {
16 |       int x;
17 |       for (x = 0; x < nx; x++) {
18 |         int c, w, e, n, s, b, t;
19 |         c =  x + y * nx + z * nx * ny;
20 |         w = (x == 0)    ? c : c - 1;
21 |         e = (x == nx-1) ? c : c + 1;
22 |         n = (y == 0)    ? c : c - ny;
23 |         s = (y == ny-1) ? c : c + ny;
24 |         b = (z == 0)    ? c : c - nx * ny;
25 |         t = (z == nz-1) ? c : c + nx * ny;
26 |         g2[c] = g1[c] + g1[w] + g1[e]
27 |             + g1[s] + g1[n] + g1[b] + g1[t];
28 |       }
29 |     }
30 |   }
31 |   return;
32 | }
33 | 
34 | void dump(float *input) {
35 |   int i;
36 |   for (i = 0; i < N*N*N; ++i) {
37 |     printf("%f\n", input[i]);
38 |   }
39 | }
40 | 
41 | int main(int argc, char *argv[]) {
42 |   REAL *g1, *g2;  
43 |   size_t nelms = N*N*N;
44 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
45 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
46 | 
47 |   int i;
48 |   for (i = 0; i < nelms; i++) {
49 |     g1[i] = i;
50 |   }
51 | 
52 |   int nx = N, ny = N, nz = N;
53 | 
54 |   for (i = 0; i < ITER; ++i) {
55 |     kernel(g1, g2, nx, ny, nz);
56 |     REAL *t = g1;
57 |     g1 = g2;
58 |     g2 = t;
59 |   }
60 |   dump(g1);
61 |   
62 |   free(g1);
63 |   free(g2);
64 |   return 0;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-periodic.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 7-point periodic-boundary stencil
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
13 |             PSGrid3DFloat g2) {
14 |   float v =
15 |       PSGridGetPeriodic(g,x,y,z) +
16 |       PSGridGetPeriodic(g,x+1,y,z) +
17 |       PSGridGetPeriodic(g,x-1,y,z) +
18 |       PSGridGetPeriodic(g,x,y+1,z) +
19 |       PSGridGetPeriodic(g,x,y-1,z) + 
20 |       PSGridGetPeriodic(g,x,y,z+1) + 
21 |       PSGridGetPeriodic(g,x,y,z-1);
22 |   PSGridEmit(g2, v);
23 |   return;
24 | }
25 | 
26 | void dump(float *input) {
27 |   int i;
28 |   for (i = 0; i < N*N*N; ++i) {
29 |     printf("%f\n", input[i]);
30 |   }
31 | }
32 | 
33 | int main(int argc, char *argv[]) {
34 |   PSInit(&argc, &argv, 3, N, N, N);
35 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
36 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);
37 | 
38 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
39 |   size_t nelms = N*N*N;
40 |   
41 |   float *indata = (float *)malloc(sizeof(float) * nelms);
42 |   int i;
43 |   for (i = 0; i < nelms; i++) {
44 |     indata[i] = i;
45 |   }
46 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
47 |     
48 |   PSGridCopyin(g1, indata);
49 |   PSGridCopyin(g2, indata);  
50 | 
51 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
52 |     
53 |   PSGridCopyout(g2, outdata);
54 |   dump(outdata);  
55 | 
56 |   PSGridFree(g1);
57 |   PSGridFree(g2);
58 |   PSFinalize();
59 |   free(indata);
60 |   free(outdata);
61 |   return 0;
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-periodic.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   for (z = 0; z < N; ++z) {
12 |     int zp = ((z - 1) + N) % N;
13 |     int zn = (z + 1) % N;
14 |     for (y = 0; y < N; ++y) {
15 |       int yp = ((y - 1) + N) % N;
16 |       int yn = (y + 1) % N;
17 |       for (x = 0; x < N; ++x) {
18 |         int xp = ((x - 1) + N) % N;
19 |         int xn = (x + 1) % N;
20 |         float v =
21 |             g1[OFFSET(x, y, z)] +
22 |             g1[OFFSET(xn, y, z)] +
23 |             g1[OFFSET(xp, y, z)] +
24 |             g1[OFFSET(x, yn, z)] +
25 |             g1[OFFSET(x, yp, z)] +
26 |             g1[OFFSET(x, y, zn)] +
27 |             g1[OFFSET(x, y, zp)];
28 |         g2[OFFSET(x, y, z)] = v;
29 |       }
30 |     }
31 |   }
32 |   return;
33 | }
34 | 
35 | void dump(float *input) {
36 |   int i;
37 |   for (i = 0; i < N*N*N; ++i) {
38 |     printf("%f\n", input[i]);
39 |   }
40 | }
41 | 
42 | int main(int argc, char *argv[]) {
43 |   REAL *g1, *g2;  
44 |   size_t nelms = N*N*N;
45 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
46 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
47 | 
48 |   int i;
49 |   for (i = 0; i < (int)nelms; i++) {
50 |     g1[i] = i;
51 |     g2[i] = i;
52 |   }
53 | 
54 |   kernel(g1, g2);
55 |   dump(g2);
56 |   
57 |   free(g1);
58 |   free(g2);
59 |   return 0;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt-type-mix.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | 
 6 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 7 | 
 8 | void kernel(float *g1, double *g2, int *c) {
 9 |   int x, y, z;
10 |   int halo_width = 1;
11 |   for (z = halo_width; z < N-halo_width; ++z) {
12 |     for (y = halo_width; y < N-halo_width; ++y) {
13 |       for (x = halo_width; x < N-halo_width; ++x) {
14 |         double v = (double)g1[OFFSET(x, y, z)] +
15 |             (double)g1[OFFSET(x+1, y, z)] +
16 |             (double)g1[OFFSET(x-1, y, z)] +
17 |             (double)g1[OFFSET(x, y+1, z)] +
18 |             (double)g1[OFFSET(x, y-1, z)] +
19 |             (double)g1[OFFSET(x, y, z-1)] +
20 |             (double)g1[OFFSET(x, y, z+1)];
21 |         g2[OFFSET(x, y, z)] = v * c[OFFSET(x, y, z)];
22 |       }
23 |     }
24 |   }
25 |   return;
26 | }
27 | 
28 | void dump(double *input) {
29 |   int i;
30 |   for (i = 0; i < N*N*N; ++i) {
31 |     printf("%f\n", input[i]);
32 |   }
33 | }
34 | 
35 | int main(int argc, char *argv[]) {
36 |   float *g1;
37 |   double *g2;
38 |   int *c;
39 |   size_t nelms = N*N*N;
40 |   g1 = (float *)malloc(sizeof(float) * nelms);
41 |   g2 = (double *)malloc(sizeof(double) * nelms);
42 |   c = (int *)malloc(sizeof(int) * nelms);  
43 | 
44 |   int i;
45 |   for (i = 0; i < (int)nelms; i++) {
46 |     g1[i] = i;
47 |     g2[i] = 0;
48 |     c[i] = i % 10;
49 |   }
50 | 
51 |   kernel(g1, g2, c);
52 |   dump(g2);
53 |   
54 |   free(g1);
55 |   free(g2);
56 |   free(c);
57 |   return 0;
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   int halo_width = 1;
12 |   for (z = halo_width; z < N-halo_width; ++z) {
13 |     for (y = halo_width; y < N-halo_width; ++y) {
14 |       for (x = halo_width; x < N-halo_width; ++x) {
15 |         float v = g1[OFFSET(x, y, z)] +
16 |             g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] +
17 |             g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] +
18 |             g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)];
19 |         g2[OFFSET(x, y, z)] = v;
20 |       }
21 |     }
22 |   }
23 |   return;
24 | }
25 | 
26 | void dump(float *input) {
27 |   int i;
28 |   for (i = 0; i < N*N*N; ++i) {
29 |     printf("%f\n", input[i]);
30 |   }
31 | }
32 | 
33 | int main(int argc, char *argv[]) {
34 |   REAL *g1, *g2;  
35 |   size_t nelms = N*N*N;
36 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
37 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
38 | 
39 |   int i;
40 |   for (i = 0; i < (int)nelms; i++) {
41 |     g1[i] = i;
42 |     g2[i] = i;
43 |   }
44 | 
45 |   kernel(g1, g2);
46 |   dump(g2);
47 |   
48 |   free(g1);
49 |   free(g2);
50 |   return 0;
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt.module.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Module test
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | static void kernel(const int x, const int y, const int z, PSGrid3DFloat g1,
11 |                    PSGrid3DFloat g2) {
12 |   float v = PSGridGet(g1, x, y, z) +
13 |       PSGridGet(g1, x+1, y, z) + PSGridGet(g1, x-1, y, z) +
14 |       PSGridGet(g1, x, y+1, z) + PSGridGet(g1, x, y-1, z) +
15 |       PSGridGet(g1, x, y, z-1) + PSGridGet(g1, x, y, z+1);
16 |   PSGridEmit(g2, v);
17 |   return;
18 | }
19 | 
20 | #define halo_width (1)
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | int run(PSGrid3DFloat g1, PSGrid3DFloat g2, int n) {
27 |   PSDomain3D d = PSDomain3DNew(0+halo_width, n-halo_width,
28 |                                0+halo_width, n-halo_width,
29 |                                0+halo_width, n-halo_width);
30 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
31 |   return 0;
32 | }
33 | 
34 | PSGrid3DFloat create_grid(int n) {
35 |   PSGrid3DFloat g = PSGrid3DFloatNew(n, n, n);
36 |   return g;
37 | }
38 | 
39 | void copyin(PSGrid3DFloat g, float *d) {
40 |   PSGridCopyin(g, d);
41 | }
42 | 
43 | void copyout(PSGrid3DFloat g, float *d) {
44 |   PSGridCopyout(g, d);
45 | }
46 | 
47 | int test_module_init(int argc, char *argv[], int n) {
48 |   PSInit(&argc, &argv, 3, n, n, n);
49 |   return 0;
50 | }
51 | 
52 | #ifdef __cplusplus
53 | }
54 | #endif
55 | 
56 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_7-pt.module_base.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 7-point stencil
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | //#include "physis/physis.h"
10 | 
11 | #define N 32
12 | 
13 | typedef void * PSGrid3DFloat;
14 | extern int test_module_init(int, char *[]);
15 | extern PSGrid3DFloat create_grid(int);
16 | extern void copyin(PSGrid3DFloat, float*);
17 | extern void copyout(PSGrid3DFloat, float*);
18 | extern void run(PSGrid3DFloat, PSGrid3DFloat, int);
19 | 
20 | void dump(float *input) {
21 |   int i;
22 |   for (i = 0; i < N*N*N; ++i) {
23 |     printf("%f\n", input[i]);
24 |   }
25 | }
26 | 
27 | int main(int argc, char *argv[]) {
28 |   test_module_init(argc, argv);
29 |   size_t nelms = N*N*N;
30 |   
31 |   float *indata = (float *)malloc(sizeof(float) * nelms);
32 |   int i;
33 |   for (i = 0; i < nelms; i++) {
34 |     indata[i] = i;
35 |   }
36 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
37 | 
38 |   PSGrid3DFloat g1 = create_grid(N);
39 |   PSGrid3DFloat g2 = create_grid(N);
40 |     
41 |   copyin(g1, indata);
42 |   copyin(g2, indata);
43 |   run(g1, g2, N);
44 |   copyout(g2, outdata);
45 | 
46 |   dump(outdata);  
47 | 
48 |   free(indata);
49 |   free(outdata);
50 |   return 0;
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_9-pt-2d.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 9-point stencil with 2-D grids
 3 |  * DIM: 2
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | static void kernel(const int x, const int y, PSGrid2DFloat g, PSGrid2DFloat g2) {
13 |   float v =
14 |       PSGridGet(g,x, y) + 
15 |       PSGridGet(g,x-1, y) +
16 |       PSGridGet(g,x+1, y) +
17 |       PSGridGet(g,x, y-1) +
18 |       PSGridGet(g,x, y+1) +
19 |       PSGridGet(g,x-1, y-1) +
20 |       PSGridGet(g,x+1, y-1) +
21 |       PSGridGet(g,x-1, y+1) +
22 |       PSGridGet(g,x+1, y+1);
23 |   PSGridEmit(g2, v);
24 |   return;
25 | }
26 | 
27 | void dump(float *input) {
28 |   int i;
29 |   for (i = 0; i < N*N; ++i) {
30 |     printf("%f\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   PSInit(&argc, &argv, 2, N, N);
36 |   PSGrid2DFloat g1 = PSGrid2DFloatNew(N, N);
37 |   PSGrid2DFloat g2 = PSGrid2DFloatNew(N, N);
38 | 
39 |   PSDomain2D d = PSDomain2DNew(1, N-1, 1, N-1);
40 |   size_t nelms = N * N;
41 |   
42 |   float *indata = (float *)malloc(sizeof(float) * nelms);
43 |   int i;
44 |   for (i = 0; i < nelms; i++) {
45 |     indata[i] = i;
46 |   }
47 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
48 |     
49 |   PSGridCopyin(g1, indata);
50 |   PSGridCopyin(g2, indata);  
51 | 
52 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
53 |     
54 |   PSGridCopyout(g2, outdata);
55 |   dump(outdata);  
56 | 
57 |   PSGridFree(g1);
58 |   PSGridFree(g2);
59 |   PSFinalize();
60 |   free(indata);
61 |   free(outdata);
62 |   return 0;
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_9-pt-2d.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y) ((x) + (y) * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y;
11 |   for (y = 1; y < N-1; ++y) {  
12 |     for (x = 1; x < N-1; ++x) {
13 |       float v = g1[OFFSET(x, y)] +
14 |           g1[OFFSET(x-1, y)] +
15 |           g1[OFFSET(x+1, y)] +
16 |           g1[OFFSET(x, y-1)] +
17 |           g1[OFFSET(x, y+1)] +
18 |           g1[OFFSET(x-1, y-1)] +
19 |           g1[OFFSET(x+1, y-1)] +
20 |           g1[OFFSET(x-1, y+1)] +
21 |           g1[OFFSET(x+1, y+1)];
22 |       g2[OFFSET(x, y)] = v;
23 |     }
24 |   }
25 |   return;
26 | }
27 | 
28 | void dump(float *input) {
29 |   int i;
30 |   for (i = 0; i < N*N; ++i) {
31 |     printf("%f\n", input[i]);
32 |   }
33 | }
34 | 
35 | int main(int argc, char *argv[]) {
36 |   REAL *g1, *g2;  
37 |   size_t nelms = N*N;
38 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
39 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
40 | 
41 |   int i;
42 |   for (i = 0; i < (int)nelms; i++) {
43 |     g1[i] = i;
44 |     g2[i] = i;
45 |   }
46 | 
47 |   kernel(g1, g2);
48 |   dump(g2);
49 |   
50 |   free(g1);
51 |   free(g2);
52 |   return 0;
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_9-pt-periodic-reduction.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 9-point periodic stencil with reduction
 3 |  * DIM: 2
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | #define TYPE int
12 | #define PSGridType PSGrid2DInt
13 | 
14 | static void kernel(const int x, const int y, PSGridType g, PSGridType g2) {
15 |   float v =
16 |       PSGridGetPeriodic(g,x, y) + 
17 |       PSGridGetPeriodic(g,x-1, y) +
18 |       PSGridGetPeriodic(g,x+1, y) +
19 |       PSGridGetPeriodic(g,x, y-1) +
20 |       PSGridGetPeriodic(g,x, y+1) +
21 |       PSGridGetPeriodic(g,x-1, y-1) +
22 |       PSGridGetPeriodic(g,x+1, y-1) +
23 |       PSGridGetPeriodic(g,x-1, y+1) +
24 |       PSGridGetPeriodic(g,x+1, y+1);
25 |   PSGridEmit(g2, v);
26 |   return;
27 | }
28 | 
29 | int main(int argc, char *argv[]) {
30 |   PSInit(&argc, &argv, 2, N, N);
31 |   PSGridType g1 = PSGrid2DIntNew(N, N);
32 |   PSGridType g2 = PSGrid2DIntNew(N, N);
33 | 
34 |   PSDomain2D d = PSDomain2DNew(0, N, 0, N);
35 |   size_t nelms = N * N;
36 |   
37 |   TYPE *indata = (TYPE *)malloc(sizeof(TYPE) * nelms);
38 |   int i;
39 |   for (i = 0; i < nelms; i++) {
40 |     indata[i] = i;
41 |   }
42 |   TYPE *outdata = (TYPE *)malloc(sizeof(TYPE) * nelms);
43 |     
44 |   PSGridCopyin(g1, indata);
45 |   PSGridCopyin(g2, indata);  
46 | 
47 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2),
48 |                PSStencilMap(kernel, d, g2, g1));
49 |   int v;
50 |   PSReduce(&v, PS_SUM, g1);
51 |   printf("%d\n", v);
52 | 
53 |   PSGridFree(g1);
54 |   PSGridFree(g2);
55 |   PSFinalize();
56 |   free(indata);
57 |   free(outdata);
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_9-pt-periodic-reduction.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define TYPE int
 6 | 
 7 | #define OFFSET(x, y) ((x) + (y) * N)
 8 | 
 9 | void kernel(TYPE *g1, TYPE *g2) {
10 |   int x, y;
11 |   for (y = 0; y < N; ++y) {
12 |     int yp = ((y - 1) + N) % N;
13 |     int yn = ((y + 1) + N) % N;    
14 |     for (x = 0; x < N; ++x) {
15 |       int xp = ((x - 1) + N) % N;
16 |       int xn = ((x + 1) + N) % N;
17 |       float v = g1[OFFSET(x, y)] +
18 |           g1[OFFSET(xp, y)] +
19 |           g1[OFFSET(xn, y)] +
20 |           g1[OFFSET(x, yp)] +
21 |           g1[OFFSET(x, yn)] +
22 |           g1[OFFSET(xp, yp)] +
23 |           g1[OFFSET(xn, yp)] +
24 |           g1[OFFSET(xp, yn)] +
25 |           g1[OFFSET(xn, yn)];
26 |       g2[OFFSET(x, y)] = v;
27 |     }
28 |   }
29 |   return;
30 | }
31 | 
32 | TYPE reduce(TYPE *g) {
33 |   TYPE v = 0;
34 |   int i;
35 |   for (i = 0; i < N*N; ++i) {
36 |     v += g[i];
37 |   }
38 |   return v;
39 | }
40 | 
41 | int main(int argc, char *argv[]) {
42 |   TYPE *g1, *g2;  
43 |   size_t nelms = N*N;
44 |   g1 = (TYPE *)malloc(sizeof(TYPE) * nelms);
45 |   g2 = (TYPE *)malloc(sizeof(TYPE) * nelms);
46 | 
47 |   int i;
48 |   for (i = 0; i < (int)nelms; i++) {
49 |     g1[i] = i;
50 |     g2[i] = i;
51 |   }
52 | 
53 |   kernel(g1, g2);
54 |   kernel(g2, g1);
55 |   printf("%d\n", reduce(g1));
56 |   
57 |   free(g1);
58 |   free(g2);
59 |   return 0;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_9-pt-reduction.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 9-point stencil with reduction
 3 |  * DIM: 2
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | #define TYPE int
12 | #define PSGridType PSGrid2DInt
13 | 
14 | static void kernel(const int x, const int y, PSGridType g, PSGridType g2) {
15 |   float v =
16 |       PSGridGet(g,x, y) + 
17 |       PSGridGet(g,x-1, y) +
18 |       PSGridGet(g,x+1, y) +
19 |       PSGridGet(g,x, y-1) +
20 |       PSGridGet(g,x, y+1) +
21 |       PSGridGet(g,x-1, y-1) +
22 |       PSGridGet(g,x+1, y-1) +
23 |       PSGridGet(g,x-1, y+1) +
24 |       PSGridGet(g,x+1, y+1);
25 |   PSGridEmit(g2, v);
26 |   return;
27 | }
28 | 
29 | void dump(float *input) {
30 |   int i;
31 |   for (i = 0; i < N*N; ++i) {
32 |     printf("%f\n", input[i]);
33 |   }
34 | }
35 | 
36 | int main(int argc, char *argv[]) {
37 |   PSInit(&argc, &argv, 2, N, N);
38 |   PSGridType g1 = PSGrid2DIntNew(N, N);
39 |   PSGridType g2 = PSGrid2DIntNew(N, N);
40 | 
41 |   PSDomain2D d = PSDomain2DNew(1, N-1, 1, N-1);
42 |   size_t nelms = N * N;
43 |   
44 |   TYPE *indata = (TYPE *)malloc(sizeof(TYPE) * nelms);
45 |   int i;
46 |   for (i = 0; i < nelms; i++) {
47 |     indata[i] = i;
48 |   }
49 |   TYPE *outdata = (TYPE *)malloc(sizeof(TYPE) * nelms);
50 |     
51 |   PSGridCopyin(g1, indata);
52 |   PSGridCopyin(g2, indata);  
53 | 
54 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2),
55 |                PSStencilMap(kernel, d, g2, g1));
56 |   int v;
57 |   PSReduce(&v, PS_SUM, g1);
58 |   printf("%d\n", v);
59 | 
60 |   PSGridFree(g1);
61 |   PSGridFree(g2);
62 |   PSFinalize();
63 |   free(indata);
64 |   free(outdata);
65 |   return 0;
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_9-pt-reduction.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define TYPE int
 6 | 
 7 | #define OFFSET(x, y) ((x) + (y) * N)
 8 | 
 9 | void kernel(TYPE *g1, TYPE *g2) {
10 |   int x, y;
11 |   for (y = 1; y < N-1; ++y) {  
12 |     for (x = 1; x < N-1; ++x) {
13 |       float v = g1[OFFSET(x, y)] +
14 |           g1[OFFSET(x-1, y)] +
15 |           g1[OFFSET(x+1, y)] +
16 |           g1[OFFSET(x, y-1)] +
17 |           g1[OFFSET(x, y+1)] +
18 |           g1[OFFSET(x-1, y-1)] +
19 |           g1[OFFSET(x+1, y-1)] +
20 |           g1[OFFSET(x-1, y+1)] +
21 |           g1[OFFSET(x+1, y+1)];
22 |       g2[OFFSET(x, y)] = v;
23 |     }
24 |   }
25 |   return;
26 | }
27 | 
28 | TYPE reduce(TYPE *g) {
29 |   TYPE v = 0;
30 |   int i;
31 |   for (i = 0; i < N*N; ++i) {
32 |     v += g[i];
33 |   }
34 |   return v;
35 | }
36 | 
37 | int main(int argc, char *argv[]) {
38 |   TYPE *g1, *g2;  
39 |   size_t nelms = N*N;
40 |   g1 = (TYPE *)malloc(sizeof(TYPE) * nelms);
41 |   g2 = (TYPE *)malloc(sizeof(TYPE) * nelms);
42 | 
43 |   int i;
44 |   for (i = 0; i < (int)nelms; i++) {
45 |     g1[i] = i;
46 |     g2[i] = i;
47 |   }
48 | 
49 |   kernel(g1, g2);
50 |   kernel(g2, g1);
51 |   printf("%d\n", reduce(g1));
52 |   
53 |   free(g1);
54 |   free(g2);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_asymmetric-periodic.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Asymmetric stencil with periodic boundary condition
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
13 |             PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g, x, y, z) +
15 |       PSGridGet(g, x-1, y, z) + PSGridGetPeriodic(g, x+2, y, z);
16 |   PSGridEmit(g2, v);
17 |   return;
18 | }
19 | 
20 | void dump(float *input) {
21 |   int i;
22 |   for (i = 0; i < N*N*N; ++i) {
23 |     printf("%f\n", input[i]);
24 |   }
25 | }
26 | 
27 | #define halo_width (2)
28 | 
29 | int main(int argc, char *argv[]) {
30 |   PSInit(&argc, &argv, 3, N, N, N);
31 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
32 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);
33 | 
34 |   PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width,
35 |                                0+halo_width, N-halo_width,
36 |                                0+halo_width, N-halo_width);
37 |   size_t nelms = N*N*N;
38 |   
39 |   float *indata = (float *)malloc(sizeof(float) * nelms);
40 |   int i;
41 |   for (i = 0; i < nelms; i++) {
42 |     indata[i] = i;
43 |   }
44 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
45 |     
46 |   PSGridCopyin(g1, indata);
47 |   PSGridCopyin(g2, indata);  
48 | 
49 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
50 |     
51 |   PSGridCopyout(g2, outdata);
52 |   dump(outdata);  
53 | 
54 |   PSGridFree(g1);
55 |   PSGridFree(g2);
56 |   PSFinalize();
57 |   free(indata);
58 |   free(outdata);
59 |   return 0;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_asymmetric-periodic.manual.cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "cuda.h"
 3 | #include "cuda_runtime.h"
 4 | 
 5 | #define N 32
 6 | #define REAL float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 |  __global__ void kernel(REAL *g1, REAL *g2) {
11 |    int x = threadIdx.x + blockIdx.x * blockDim.x;
12 |    int y = threadIdx.y + blockIdx.y * blockDim.y;
13 |    int z = threadIdx.z + blockIdx.z * blockDim.z;
14 | 
15 |    if (x <= 1 || x >= N-2 || y <= 1 || y >= N-2 ||
16 |        z <= 1 || z >= N-2) return;
17 |   
18 |    float v = g1[OFFSET(x, y, z)] +
19 |        g1[OFFSET(x-1, y, z)] + g1[OFFSET((x+2+N)%N, y, z)];
20 |    g2[OFFSET(x, y, z)] = v;
21 |    return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | #define halo_width (1)
32 | 
33 | int main(int argc, char *argv[]) {
34 |   REAL *g1, *g1d;
35 |   REAL *g2d;
36 |   size_t nelms = N*N*N;
37 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
38 |   cudaMalloc((void**)&g1d, sizeof(REAL) * nelms);
39 |   cudaMalloc((void**)&g2d, sizeof(REAL) * nelms);
40 | 
41 |   int i;
42 |   for (i = 0; i < (int)nelms; i++) {
43 |     g1[i] = i;
44 |   }
45 |     
46 |   cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);
47 |   cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);  
48 |   
49 |   dim3 block_dim(4, 4, 4);
50 |   dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z);
51 | 
52 |   kernel<<<grid_dim, block_dim>>>(g1d, g2d);
53 |   cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost);
54 | 
55 |   dump(g1);
56 | 
57 |   cudaDeviceReset();
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_asymmetric-periodic.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   int halo_width = 2;
12 |   for (z = halo_width; z < N-halo_width; ++z) {
13 |     for (y = halo_width; y < N-halo_width; ++y) {
14 |       for (x = halo_width; x < N-halo_width; ++x) {
15 |         float v = g1[OFFSET(x, y, z)] +
16 |             g1[OFFSET(x-1, y, z)] +
17 |             g1[OFFSET(((x+2)+N)%N, y, z)];
18 |         g2[OFFSET(x, y, z)] = v;
19 |       }
20 |     }
21 |   }
22 |   return;
23 | }
24 | 
25 | void dump(float *input) {
26 |   int i;
27 |   for (i = 0; i < N*N*N; ++i) {
28 |     printf("%f\n", input[i]);
29 |   }
30 | }
31 | 
32 | int main(int argc, char *argv[]) {
33 |   REAL *g1, *g2;  
34 |   size_t nelms = N*N*N;
35 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
36 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
37 | 
38 |   int i;
39 |   for (i = 0; i < (int)nelms; i++) {
40 |     g1[i] = i;
41 |     g2[i] = i;
42 |   }
43 | 
44 |   kernel(g1, g2);
45 |   dump(g2);
46 |   
47 |   free(g1);
48 |   free(g2);
49 |   return 0;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_asymmetric.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Asymmetric stencil
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
13 |             PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g, x, y, z) +
15 |       PSGridGet(g, x+1, y+1, z+1) + PSGridGet(g, x-2, y-2, z-2);
16 |   PSGridEmit(g2, v);
17 |   return;
18 | }
19 | 
20 | void dump(float *input) {
21 |   int i;
22 |   for (i = 0; i < N*N*N; ++i) {
23 |     printf("%f\n", input[i]);
24 |   }
25 | }
26 | 
27 | #define halo_width (2)
28 | 
29 | int main(int argc, char *argv[]) {
30 |   PSInit(&argc, &argv, 3, N, N, N);
31 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
32 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);
33 | 
34 |   PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width,
35 |                                0+halo_width, N-halo_width,
36 |                                0+halo_width, N-halo_width);
37 |   size_t nelms = N*N*N;
38 |   
39 |   float *indata = (float *)malloc(sizeof(float) * nelms);
40 |   int i;
41 |   for (i = 0; i < nelms; i++) {
42 |     indata[i] = i;
43 |   }
44 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
45 |     
46 |   PSGridCopyin(g1, indata);
47 |   PSGridCopyin(g2, indata);  
48 | 
49 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
50 |     
51 |   PSGridCopyout(g2, outdata);
52 |   dump(outdata);  
53 | 
54 |   PSGridFree(g1);
55 |   PSGridFree(g2);
56 |   PSFinalize();
57 |   free(indata);
58 |   free(outdata);
59 |   return 0;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_asymmetric.manual.cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "cuda.h"
 3 | #include "cuda_runtime.h"
 4 | 
 5 | #define N 32
 6 | #define REAL float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 |  __global__ void kernel(REAL *g1, REAL *g2) {
11 |    int x = threadIdx.x + blockIdx.x * blockDim.x;
12 |    int y = threadIdx.y + blockIdx.y * blockDim.y;
13 |    int z = threadIdx.z + blockIdx.z * blockDim.z;
14 | 
15 |    if (x <= 1 || x >= N-2 || y <= 1 || y >= N-2 ||
16 |        z <= 1 || z >= N-2) return;
17 |   
18 |    float v = g1[OFFSET(x, y, z)] +
19 |        g1[OFFSET(x+1, y+1, z+1)] + g1[OFFSET(x-2, y-2, z-2)];
20 |    g2[OFFSET(x, y, z)] = v;
21 |    return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | #define halo_width (1)
32 | 
33 | int main(int argc, char *argv[]) {
34 |   REAL *g1, *g1d;
35 |   REAL *g2d;
36 |   size_t nelms = N*N*N;
37 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
38 |   cudaMalloc((void**)&g1d, sizeof(REAL) * nelms);
39 |   cudaMalloc((void**)&g2d, sizeof(REAL) * nelms);
40 | 
41 |   int i;
42 |   for (i = 0; i < (int)nelms; i++) {
43 |     g1[i] = i;
44 |   }
45 |     
46 |   cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);
47 |   cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);  
48 |   
49 |   dim3 block_dim(4, 4, 4);
50 |   dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z);
51 | 
52 |   kernel<<<grid_dim, block_dim>>>(g1d, g2d);
53 |   cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost);
54 | 
55 |   dump(g1);
56 | 
57 |   cudaDeviceReset();
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_asymmetric.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   int halo_width = 2;
12 |   for (z = halo_width; z < N-halo_width; ++z) {
13 |     for (y = halo_width; y < N-halo_width; ++y) {
14 |       for (x = halo_width; x < N-halo_width; ++x) {
15 |         float v = g1[OFFSET(x, y, z)] +
16 |             g1[OFFSET(x+1, y+1, z+1)] + g1[OFFSET(x-2, y-2, z-2)];
17 |         g2[OFFSET(x, y, z)] = v;
18 |       }
19 |     }
20 |   }
21 |   return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | int main(int argc, char *argv[]) {
32 |   REAL *g1, *g2;  
33 |   size_t nelms = N*N*N;
34 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
35 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
36 | 
37 |   int i;
38 |   for (i = 0; i < (int)nelms; i++) {
39 |     g1[i] = i;
40 |     g2[i] = i;
41 |   }
42 | 
43 |   kernel(g1, g2);
44 |   dump(g2);
45 |   
46 |   free(g1);
47 |   free(g2);
48 |   return 0;
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_mixed-dim.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Grids with different dimensions
 3 |  * DIM: 3
 4 |  * PRIORITY: 2
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
13 |             PSGrid3DFloat g2, PSGrid1DFloat k) {
14 |   float v = PSGridGet(g,x,y,z) * PSGridGet(k, x) +
15 |       PSGridGet(g,x-1,y,z) * PSGridGet(k, x-1) +
16 |       PSGridGet(g,x+1,y,z) * PSGridGet(k, x+1);
17 |   PSGridEmit(g2, v);
18 |   return;
19 | }
20 | 
21 | void dump(float *input) {
22 |   int i;
23 |   for (i = 0; i < N*N*N; ++i) {
24 |     printf("%f\n", input[i]);
25 |   }
26 | }
27 | 
28 | int main(int argc, char *argv[]) {
29 |   PSInit(&argc, &argv, 3, N, N, N);
30 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
31 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);
32 |   PSGrid1DFloat k = PSGrid1DFloatNew(N);
33 | 
34 |   PSDomain3D d = PSDomain3DNew(1, N-1, 0, N, 0, N);
35 |   size_t nelms = N*N*N;
36 |   
37 |   float *indata = (float *)malloc(sizeof(float) * nelms);
38 |   int i;
39 |   for (i = 0; i < nelms; i++) {
40 |     indata[i] = i;
41 |   }
42 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
43 |     
44 |   PSGridCopyin(g1, indata);
45 |   PSGridCopyin(g2, indata);
46 | 
47 |   for (i = 0; i < N; ++i) {
48 |     indata[i] = 1 + (i%2); // 1 or 2
49 |   }
50 | 
51 |   PSGridCopyin(k, indata);
52 | 
53 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2, k));
54 |     
55 |   PSGridCopyout(g2, outdata);
56 |   dump(outdata);  
57 | 
58 |   PSGridFree(g1);
59 |   PSGridFree(g2);
60 |   PSFinalize();
61 |   free(indata);
62 |   free(outdata);
63 |   return 0;
64 | }
65 | 
66 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_mixed-dim.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET1D(x) (x)
 8 | #define OFFSET3D(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | 
11 | void kernel(float *g1, float *g2, float *k) {
12 |   int x, y, z;
13 |   for (z = 0; z < N; ++z) {
14 |     for (y = 0; y < N; ++y) {
15 |       for (x = 1; x < N-1; ++x) {
16 |         float v =
17 |             g1[OFFSET3D(x, y, z)] * k[OFFSET1D(x)] +
18 |             g1[OFFSET3D(x-1, y, z)] * k[OFFSET1D(x-1)] +
19 |             g1[OFFSET3D(x+1, y, z)] * k[OFFSET1D(x+1)];
20 |         g2[OFFSET3D(x, y, z)] = v;
21 |       }
22 |     }
23 |   }
24 |   return;
25 | }
26 | 
27 | void dump(float *input) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     printf("%f\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   REAL *g1, *g2, *k;
36 |   size_t nelms = N*N*N;
37 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
38 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
39 |   k = (REAL *)malloc(sizeof(REAL) * N);
40 | 
41 |   int i;
42 |   for (i = 0; i < (int)nelms; i++) {
43 |     g1[i] = i;
44 |     g2[i] = i;
45 |   }
46 | 
47 |   for (i = 0; i < N; ++i) {
48 |     k[i] = 1 + (i%2); // 1 or 2    
49 |   }
50 | 
51 |   kernel(g1, g2, k);
52 |   dump(g2);
53 |   
54 |   free(g1);
55 |   free(g2);
56 |   free(k);
57 |   return 0;
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_mixed-dim2.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET1D(x) (x)
 8 | #define OFFSET3D(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | 
11 | void kernel(float *g1, float *g2,
12 |             float *i, float *j, float *k) {
13 |   int x, y, z;
14 |   for (z = 1; z < N-1; ++z) {
15 |     for (y = 1; y < N-1; ++y) {
16 |       for (x = 1; x < N-1; ++x) {
17 |         float v =
18 |             g1[OFFSET3D(x, y, z)] +
19 |             g1[OFFSET3D(x-1, y, z)] * i[OFFSET1D(x-1)] +
20 |             g1[OFFSET3D(x+1, y, z)] * i[OFFSET1D(x+1)] +
21 |             g1[OFFSET3D(x, y-1, z)] * j[OFFSET1D(y-1)] +
22 |             g1[OFFSET3D(x, y+1, z)] * j[OFFSET1D(y+1)] +
23 |             g1[OFFSET3D(x, y, z-1)] * k[OFFSET1D(z-1)] +
24 |             g1[OFFSET3D(x, y, z+1)] * k[OFFSET1D(z+1)];
25 |         g2[OFFSET3D(x, y, z)] = v;
26 |       }
27 |     }
28 |   }
29 |   return;
30 | }
31 | 
32 | void dump(float *input) {
33 |   int i;
34 |   for (i = 0; i < N*N*N; ++i) {
35 |     printf("%f\n", input[i]);
36 |   }
37 | }
38 | 
39 | int main(int argc, char *argv[]) {
40 |   REAL *g1, *g2, *k;
41 |   size_t nelms = N*N*N;
42 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
43 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
44 |   k = (REAL *)malloc(sizeof(REAL) * N);
45 | 
46 |   int i;
47 |   for (i = 0; i < (int)nelms; i++) {
48 |     g1[i] = i;
49 |     g2[i] = i;
50 |   }
51 | 
52 |   for (i = 0; i < N; ++i) {
53 |     k[i] = 1 + (i%2); // 1 or 2    
54 |   }
55 | 
56 |   kernel(g1, g2, k, k, k);
57 |   dump(g2);
58 |   
59 |   free(g1);
60 |   free(g2);
61 |   free(k);
62 |   return 0;
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_multi-kernels.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Combining two kernels
 3 |  * DIM: 3
 4 |  * PRIORITY: 2 
 5 |  */ 
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 8
11 | 
12 | void kernel1(const int x, const int y, const int z,
13 |              PSGrid3DFloat g1, PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g1, x, y, z) * 2;
15 |   PSGridEmit(g2, v);
16 |   return;
17 | }
18 | 
19 | void kernel2(const int x, const int y, const int z,
20 |              PSGrid3DFloat g2, PSGrid3DFloat g1) {
21 |   float v = PSGridGet(g2, x, y, z) / 2;
22 |   PSGridEmit(g1, v);
23 |   return;
24 | }
25 | 
26 | int main(int argc, char *argv[]) {
27 |   PSInit(&argc, &argv, 3, N, N, N);
28 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
29 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);  
30 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
31 |   size_t nelms = N*N*N;
32 |   
33 |   float *indata = (float *)malloc(sizeof(float) * nelms);
34 |   int i;
35 |   for (i = 0; i < nelms; i++) {
36 |     indata[i] = i;
37 |   }
38 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
39 |     
40 |   PSGridCopyin(g1, indata);
41 | 
42 |   PSStencilRun(PSStencilMap(kernel1, d, g1, g2),
43 |                PSStencilMap(kernel2, d, g2, g1));
44 |     
45 |   PSGridCopyout(g1, outdata);
46 |     
47 |   for (i = 0; i < nelms; i++) {
48 |     if (indata[i] != outdata[i]) {
49 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
50 |               i, indata[i], outdata[i]);
51 |       exit(1);
52 |     }
53 |   }
54 | 
55 |   PSGridFree(g1);
56 |   PSGridFree(g2);  
57 |   PSFinalize();
58 |   free(indata);
59 |   free(outdata);
60 |   return 0;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_param_name.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Parameter name being the same as an existing function
 3 |  * DIM: 3
 4 |  * PRIORITY: 1 
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <math.h> // gamma is declared in math.h
 9 | #include "physis/physis.h"
10 | 
11 | #define N 8
12 | 
13 | // Use "gamma" as a parameter. This causes a CUDA translation error in
14 | // commit 61d14b3e6362e7154d7da3dbdd7887a2106240f4.
15 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
16 |              PSGrid3DFloat g2, float gamma) {
17 |   float v = PSGridGet(g, x, y, z) * gamma;
18 |   PSGridEmit(g2, v);
19 |   return;
20 | }
21 | 
22 | int main(int argc, char *argv[]) {
23 |   PSInit(&argc, &argv, 3, N, N, N);
24 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
25 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);  
26 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
27 |   size_t nelms = N*N*N;
28 |   
29 |   float *indata = (float *)malloc(sizeof(float) * nelms);
30 |   int i;
31 |   for (i = 0; i < nelms; i++) {
32 |     indata[i] = i;
33 |   }
34 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
35 |     
36 |   PSGridCopyin(g, indata);
37 | 
38 |   PSStencilRun(PSStencilMap(kernel, d, g, g2, 1.0f));
39 |     
40 |   PSGridCopyout(g2, outdata);
41 |     
42 |   for (i = 0; i < nelms; i++) {
43 |     if (indata[i] != outdata[i]) {
44 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
45 |               i, indata[i], outdata[i]);
46 |       exit(1);
47 |     }
48 |   }
49 | 
50 |   PSGridFree(g);
51 |   PSGridFree(g2);
52 |   PSFinalize();
53 |   free(indata);
54 |   free(outdata);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_redblack-periodic.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 7-point periodic stencil with red-black ordering
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  * TARGETS: ref cuda 
 6 |  */
 7 | 
 8 | #include <stdio.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 32
12 | 
13 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) {
14 |   float v = PSGridGetPeriodic(g, x, y, z) +
15 |       PSGridGetPeriodic(g, x+1, y, z) + PSGridGetPeriodic(g, x-1, y, z) +
16 |       PSGridGetPeriodic(g, x, y+1, z) + PSGridGetPeriodic(g, x, y-1, z) +
17 |       PSGridGetPeriodic(g, x, y, z-1) + PSGridGetPeriodic(g, x, y, z+1);
18 |   PSGridEmit(g, v);
19 |   return;
20 | }
21 | 
22 | void dump(float *input) {
23 |   int i;
24 |   for (i = 0; i < N*N*N; ++i) {
25 |     printf("%f\n", input[i]);
26 |   }
27 | }
28 | 
29 | #define halo_width (1)
30 | 
31 | int main(int argc, char *argv[]) {
32 |   PSInit(&argc, &argv, 3, N, N, N);
33 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
34 | 
35 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
36 |   size_t nelms = N*N*N;
37 |   
38 |   float *indata = (float *)malloc(sizeof(float) * nelms);
39 |   int i;
40 |   for (i = 0; i < nelms; i++) {
41 |     indata[i] = i;
42 |   }
43 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
44 |     
45 |   PSGridCopyin(g, indata);
46 | 
47 |   PSStencilRun(PSStencilMapRedBlack(kernel, d, g));
48 |     
49 |   PSGridCopyout(g, outdata);
50 |   dump(outdata);  
51 | 
52 |   PSGridFree(g);
53 |   PSFinalize();
54 |   free(indata);
55 |   free(outdata);
56 |   return 0;
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_redblack-periodic.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g, int rb) {
10 |   int x, y, z;
11 |   for (z = 0; z < N; ++z) {
12 |     int zp = ((z - 1) + N) % N;
13 |     int zn = (z + 1) % N;
14 |     for (y = 0; y < N; ++y) {
15 |       int yp = ((y - 1) + N) % N;
16 |       int yn = (y + 1) % N;
17 |       for (x = (y+z+rb)%2 ; x < N; x+=2) {
18 |         int xp = ((x - 1) + N) % N;
19 |         int xn = (x + 1) % N;
20 |         float v =
21 |             g[OFFSET(x, y, z)] +
22 |             g[OFFSET(xn, y, z)] +
23 |             g[OFFSET(xp, y, z)] +
24 |             g[OFFSET(x, yn, z)] +
25 |             g[OFFSET(x, yp, z)] +
26 |             g[OFFSET(x, y, zn)] +
27 |             g[OFFSET(x, y, zp)];
28 |         g[OFFSET(x, y, z)] = v;
29 |       }
30 |     }
31 |   }
32 |   return;
33 | }
34 | 
35 | void dump(float *input) {
36 |   int i;
37 |   for (i = 0; i < N*N*N; ++i) {
38 |     printf("%f\n", input[i]);
39 |   }
40 | }
41 | 
42 | int main(int argc, char *argv[]) {
43 |   REAL *g;
44 |   size_t nelms = N*N*N;
45 |   g = (REAL *)malloc(sizeof(REAL) * nelms);
46 | 
47 |   int i;
48 |   for (i = 0; i < (int)nelms; i++) {
49 |     g[i] = i;
50 |   }
51 | 
52 |   kernel(g, 0);
53 |   kernel(g, 1);  
54 |   dump(g);
55 |   
56 |   free(g);
57 |   return 0;
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_redblack-separated.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 7-point stencil with red-black ordering
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  * TARGETS: ref cuda 
 6 |  */
 7 | 
 8 | #include <stdio.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 32
12 | 
13 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) {
14 |   float v = PSGridGet(g, x, y, z) +
15 |       PSGridGet(g, x+1, y, z) + PSGridGet(g, x-1, y, z) +
16 |       PSGridGet(g, x, y+1, z) + PSGridGet(g, x, y-1, z) +
17 |       PSGridGet(g, x, y, z-1) + PSGridGet(g, x, y, z+1);
18 |   PSGridEmit(g, v);
19 |   return;
20 | }
21 | 
22 | void dump(float *input) {
23 |   int i;
24 |   for (i = 0; i < N*N*N; ++i) {
25 |     printf("%f\n", input[i]);
26 |   }
27 | }
28 | 
29 | #define halo_width (1)
30 | 
31 | int main(int argc, char *argv[]) {
32 |   PSInit(&argc, &argv, 3, N, N, N);
33 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
34 | 
35 |   PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width,
36 |                                0+halo_width, N-halo_width,
37 |                                0+halo_width, N-halo_width);
38 |   size_t nelms = N*N*N;
39 |   
40 |   float *indata = (float *)malloc(sizeof(float) * nelms);
41 |   int i;
42 |   for (i = 0; i < nelms; i++) {
43 |     indata[i] = i;
44 |   }
45 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
46 |     
47 |   PSGridCopyin(g, indata);
48 | 
49 |   PSStencilRun(PSStencilMapRed(kernel, d, g),
50 |                PSStencilMapBlack(kernel, d, g));
51 |     
52 |   PSGridCopyout(g, outdata);
53 |   dump(outdata);  
54 | 
55 |   PSGridFree(g);
56 |   PSFinalize();
57 |   free(indata);
58 |   free(outdata);
59 |   return 0;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_redblack.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: 7-point stencil with red-black ordering
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  * TARGETS: ref cuda
 6 |  */
 7 | 
 8 | #include <stdio.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 32
12 | 
13 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) {
14 |   float v = PSGridGet(g, x, y, z) +
15 |       PSGridGet(g, x+1, y, z) + PSGridGet(g, x-1, y, z) +
16 |       PSGridGet(g, x, y+1, z) + PSGridGet(g, x, y-1, z) +
17 |       PSGridGet(g, x, y, z-1) + PSGridGet(g, x, y, z+1);
18 |   PSGridEmit(g, v);
19 |   return;
20 | }
21 | 
22 | void dump(float *input) {
23 |   int i;
24 |   for (i = 0; i < N*N*N; ++i) {
25 |     printf("%f\n", input[i]);
26 |   }
27 | }
28 | 
29 | #define halo_width (1)
30 | 
31 | int main(int argc, char *argv[]) {
32 |   PSInit(&argc, &argv, 3, N, N, N);
33 |   PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N);
34 | 
35 |   PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width,
36 |                                0+halo_width, N-halo_width,
37 |                                0+halo_width, N-halo_width);
38 |   size_t nelms = N*N*N;
39 |   
40 |   float *indata = (float *)malloc(sizeof(float) * nelms);
41 |   int i;
42 |   for (i = 0; i < nelms; i++) {
43 |     indata[i] = i;
44 |   }
45 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
46 |     
47 |   PSGridCopyin(g, indata);
48 | 
49 |   PSStencilRun(PSStencilMapRedBlack(kernel, d, g));
50 |     
51 |   PSGridCopyout(g, outdata);
52 |   dump(outdata);  
53 | 
54 |   PSGridFree(g);
55 |   PSFinalize();
56 |   free(indata);
57 |   free(outdata);
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_redblack.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g, int rb) {
10 |   int x, y, z;
11 |   int halo_width = 1;
12 |   for (z = halo_width; z < N-halo_width; ++z) {
13 |     for (y = halo_width; y < N-halo_width; ++y) {
14 |       for (x = halo_width + ((halo_width & 1) ^ (y + z + rb)%2);
15 |            x < N-halo_width; x+=2) {
16 |         float v = g[OFFSET(x, y, z)] +
17 |             g[OFFSET(x+1, y, z)] + g[OFFSET(x-1, y, z)] +
18 |             g[OFFSET(x, y+1, z)] + g[OFFSET(x, y-1, z)] +
19 |             g[OFFSET(x, y, z-1)] + g[OFFSET(x, y, z+1)];
20 |         g[OFFSET(x, y, z)] = v;
21 |       }
22 |     }
23 |   }
24 |   return;
25 | }
26 | 
27 | void dump(float *input) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     printf("%f\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   REAL *g;  
36 |   size_t nelms = N*N*N;
37 |   g = (REAL *)malloc(sizeof(REAL) * nelms);
38 | 
39 |   int i;
40 |   for (i = 0; i < (int)nelms; i++) {
41 |     g[i] = i;
42 |   }
43 | 
44 |   kernel(g, 0);
45 |   kernel(g, 1);  
46 |   dump(g);
47 |   
48 |   free(g);
49 |   return 0;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-2d.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Grid reduction OP=PS_SUM
 3 |  * DIM: 2
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 4
12 | #define REAL double
13 | #define PSGrid2D PSGrid2DDouble
14 | #define PSGrid2DNew PSGrid2DDoubleNew
15 | 
16 | REAL reduce(REAL *g) {
17 |   REAL v = 0.0;
18 |   int i;
19 |   for (i = 0; i < N*N; ++i) {
20 |     v += g[i];
21 |   }
22 |   return v;
23 | }
24 | 
25 | int main(int argc, char *argv[]) {
26 |   PSInit(&argc, &argv, 2, N, N);
27 |   PSGrid2D g1 = PSGrid2DNew(N, N);
28 |   size_t nelms = N*N;
29 |   REAL *indata = (REAL *)malloc(sizeof(REAL) * nelms);
30 |   int i;
31 |   for (i = 0; i < nelms; i++) {
32 |     indata[i] = i;
33 |   }
34 |   PSGridCopyin(g1, indata);
35 |   REAL v;
36 |   PSReduce(&v, PS_SUM, g1);
37 |   REAL v_ref = reduce(indata);
38 |   fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref);
39 |   if (v != v_ref) {
40 |     fprintf(stderr, "Error: Non matching result\n");
41 |     exit(1);
42 |   }
43 |   PSGridFree(g1);
44 |   PSFinalize();
45 |   free(indata);
46 |   return 0;
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-int.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Int grid reduction OP=PS_SUM
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 8
12 | #define NN (N*N*N)
13 | #define GTYPE int
14 | #define FMT "%d"
15 | #define PSGrid3D PSGrid3DInt
16 | #define PSGrid3DNew PSGrid3DIntNew
17 | 
18 | GTYPE reduce(GTYPE *g) {
19 |   GTYPE v = 0.0;
20 |   int i;
21 |   for (i = 0; i < NN; ++i) {
22 |     v += g[i];
23 |   }
24 |   return v;
25 | }
26 | 
27 | int main(int argc, char *argv[]) {
28 |   PSInit(&argc, &argv, 3, N, N, N);
29 |   PSGrid3D g1 = PSGrid3DNew(N, N, N);
30 |   GTYPE *indata = (GTYPE *)malloc(sizeof(GTYPE) * NN);
31 |   int i;
32 |   for (i = 0; i < NN; i++) {
33 |     indata[i] = i;
34 |   }
35 |   PSGridCopyin(g1, indata);
36 |   GTYPE v;
37 |   PSReduce(&v, PS_SUM, g1);
38 |   GTYPE v_ref = reduce(indata);
39 |   fprintf(stderr, "Reduction result: " FMT ", reference: " FMT "\n", v, v_ref);
40 |   if (v != v_ref) {
41 |     fprintf(stderr, "Error: No matching result\n");
42 |     exit(1);
43 |   }
44 |   PSGridFree(g1);
45 |   PSFinalize();
46 |   free(indata);
47 |   return 0;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-long.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Long grid reduction OP=PS_SUM
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 8
12 | #define NN (N*N*N)
13 | #define GTYPE long
14 | #define FMT "%d"
15 | #define PSGrid3D PSGrid3DLong
16 | #define PSGrid3DNew PSGrid3DLongNew
17 | 
18 | GTYPE reduce(GTYPE *g) {
19 |   GTYPE v = 0.0;
20 |   int i;
21 |   for (i = 0; i < NN; ++i) {
22 |     v += g[i];
23 |   }
24 |   return v;
25 | }
26 | 
27 | int main(int argc, char *argv[]) {
28 |   PSInit(&argc, &argv, 3, N, N, N);
29 |   PSGrid3D g1 = PSGrid3DNew(N, N, N);
30 |   GTYPE *indata = (GTYPE *)malloc(sizeof(GTYPE) * NN);
31 |   int i;
32 |   for (i = 0; i < NN; i++) {
33 |     indata[i] = i;
34 |   }
35 |   PSGridCopyin(g1, indata);
36 |   GTYPE v;
37 |   PSReduce(&v, PS_SUM, g1);
38 |   GTYPE v_ref = reduce(indata);
39 |   fprintf(stderr, "Reduction result: " FMT ", reference: " FMT "\n", v, v_ref);
40 |   if (v != v_ref) {
41 |     fprintf(stderr, "Error: No matching result\n");
42 |     exit(1);
43 |   }
44 |   PSGridFree(g1);
45 |   PSFinalize();
46 |   free(indata);
47 |   return 0;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-max.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Grid reduction OP=PS_MAX
 3 |  * DIM: 3
 4 |  * PRIORITY: 2
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 4
12 | #define REAL float
13 | #define PSGrid3D PSGrid3DFloat
14 | #define PSGrid3DNew PSGrid3DFloatNew
15 | 
16 | REAL reduce(REAL *g) {
17 |   REAL v = g[0];
18 |   int i;
19 |   for (i = 1; i < N*N*N; ++i) {
20 |     v = (v > g[i]) ? v : g[i];
21 |   }
22 |   return v;
23 | }
24 | 
25 | int main(int argc, char *argv[]) {
26 |   PSInit(&argc, &argv, 3, N, N, N);
27 |   PSGrid3D g1 = PSGrid3DNew(N, N, N);
28 |   size_t nelms = N*N*N;
29 |   float *indata = (float *)malloc(sizeof(REAL) * nelms);
30 |   int i;
31 |   for (i = 0; i < nelms; i++) {
32 |     indata[i] = i;
33 |   }
34 |   PSGridCopyin(g1, indata);
35 |   float v;
36 |   PSReduce(&v, PS_MAX, g1);
37 |   float v_ref = reduce(indata);
38 |   fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref);
39 |   if (v != v_ref) {
40 |     fprintf(stderr, "Error: Non matching result\n");
41 |     exit(1);
42 |   }
43 |   PSGridFree(g1);
44 |   PSFinalize();
45 |   free(indata);
46 |   return 0;
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-min.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Grid reduction OP=PS_MIN
 3 |  * DIM: 3
 4 |  * PRIORITY: 2
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 8
12 | #define REAL float
13 | #define PSGrid3D PSGrid3DFloat
14 | #define PSGrid3DNew PSGrid3DFloatNew
15 | 
16 | REAL reduce(REAL *g) {
17 |   REAL v = g[0];
18 |   int i;
19 |   for (i = 1; i < N*N*N; ++i) {
20 |     v = (v > g[i]) ? g[i] : v;
21 |   }
22 |   return v;
23 | }
24 | 
25 | int main(int argc, char *argv[]) {
26 |   PSInit(&argc, &argv, 3, N, N, N);
27 |   PSGrid3D g1 = PSGrid3DNew(N, N, N);
28 |   size_t nelms = N*N*N;
29 |   float *indata = (float *)malloc(sizeof(REAL) * nelms);
30 |   int i;
31 |   for (i = 0; i < nelms; i++) {
32 |     indata[i] = i+10;
33 |   }
34 |   PSGridCopyin(g1, indata);
35 |   float v;
36 |   PSReduce(&v, PS_MIN, g1);
37 |   float v_ref = reduce(indata);
38 |   fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref);
39 |   if (v != v_ref) {
40 |     fprintf(stderr, "Error: Non matching result\n");
41 |     exit(1);
42 |   }
43 |   PSGridFree(g1);
44 |   PSFinalize();
45 |   free(indata);
46 |   return 0;
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-prod.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Grid reduction OP=PS_PROD
 3 |  * DIM: 3
 4 |  * PRIORITY: 2
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 4
12 | #define REAL float
13 | #define PSGrid3D PSGrid3DFloat
14 | #define PSGrid3DNew PSGrid3DFloatNew
15 | 
16 | REAL reduce(REAL *g) {
17 |   REAL v = 1.0;
18 |   int i;
19 |   for (i = 0; i < N*N*N; ++i) {
20 |     v *= g[i];
21 |   }
22 |   return v;
23 | }
24 | 
25 | int main(int argc, char *argv[]) {
26 |   PSInit(&argc, &argv, 3, N, N, N);
27 |   PSGrid3D g1 = PSGrid3DNew(N, N, N);
28 |   size_t nelms = N*N*N;
29 |   float *indata = (float *)malloc(sizeof(REAL) * nelms);
30 |   int i;
31 |   for (i = 0; i < nelms; i++) {
32 |     indata[i] = 1.1;
33 |   }
34 |   PSGridCopyin(g1, indata);
35 |   float v;
36 |   PSReduce(&v, PS_PROD, g1);
37 |   float v_ref = reduce(indata);
38 |   fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref);
39 |   fprintf(stderr, "Difference: %f\n", fabs(v - v_ref));
40 |   if (fabs(v - v_ref) / v_ref > 1.0e-5) {
41 |     fprintf(stderr, "Error: Non matching result\n");
42 |     exit(1);
43 |   }
44 |   PSGridFree(g1);
45 |   PSFinalize();
46 |   free(indata);
47 |   return 0;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-sum.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Grid reduction OP=PS_SUM
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include "physis/physis.h"
10 | 
11 | #define N 16
12 | #define REAL float
13 | #define PSGrid3D PSGrid3DFloat
14 | #define PSGrid3DNew PSGrid3DFloatNew
15 | 
16 | int main(int argc, char *argv[]) {
17 |   PSInit(&argc, &argv, 3, N, N, N);
18 |   PSGrid3D g1 = PSGrid3DNew(N, N, N);
19 |   size_t nelms = N*N*N;
20 |   float *indata = (float *)malloc(sizeof(REAL) * nelms);
21 |   int i;
22 |   for (i = 0; i < nelms; i++) {
23 |     indata[i] = i;
24 |   }
25 |   PSGridCopyin(g1, indata);
26 |   float v;
27 |   PSReduce(&v, PS_SUM, g1);
28 |   printf("%f\n", v);
29 |   PSGridFree(g1);
30 |   PSFinalize();
31 |   free(indata);
32 |   return 0;
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-sum.manual.cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "cuda.h"
 3 | #include "cuda_runtime.h"
 4 | 
 5 | #include <thrust/reduce.h>
 6 | #include <thrust/device_ptr.h>
 7 | #include <thrust/extrema.h>
 8 | 
 9 | #define N 16
10 | #define REAL float
11 | 
12 | int main(int argc, char *argv[]) {
13 |   REAL *g1, *g1d;
14 |   size_t nelms = N*N*N;
15 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
16 |   cudaMalloc((void**)&g1d, sizeof(REAL) * nelms);
17 | 
18 |   int i;
19 |   for (i = 0; i < (int)nelms; i++) {
20 |     g1[i] = i;
21 |   }
22 |     
23 |   cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);
24 |   
25 |   thrust::device_ptr<REAL> dev_ptr((REAL*)g1d);
26 |   REAL v = thrust::reduce(dev_ptr, dev_ptr + nelms,
27 |                           0.0f, thrust::plus<REAL>());
28 | 
29 |   printf("%f\n", v);
30 |   
31 |   cudaDeviceReset();
32 |   return 0;
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_reduction-3d-sum.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 16
 5 | #define REAL float
 6 | 
 7 | REAL reduce(float *input) {
 8 |   int i;
 9 |   REAL v = 0;
10 |   for (i = 0; i < N*N*N; ++i) {
11 |     v += input[i];
12 |   }
13 |   return v;
14 | }
15 | 
16 | int main(int argc, char *argv[]) {
17 |   REAL *g1;
18 |   size_t nelms = N*N*N;
19 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
20 | 
21 |   int i;
22 |   for (i = 0; i < (int)nelms; i++) {
23 |     g1[i] = i;
24 |   }
25 | 
26 |   REAL v = reduce(g1);
27 |   printf("%f\n", v);
28 |   
29 |   free(g1);
30 |   return 0;
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_stencil-hole.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Stencil hole
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | 
12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g,
13 |             PSGrid3DFloat g2) {
14 |   float v = PSGridGet(g, x, y, z) +
15 |       PSGridGet(g, x+2, y, z) + PSGridGet(g, x-2, y, z);
16 |   PSGridEmit(g2, v);
17 |   return;
18 | }
19 | 
20 | void dump(float *input) {
21 |   int i;
22 |   for (i = 0; i < N*N*N; ++i) {
23 |     printf("%f\n", input[i]);
24 |   }
25 | }
26 | 
27 | #define halo_width (2)
28 | 
29 | int main(int argc, char *argv[]) {
30 |   PSInit(&argc, &argv, 3, N, N, N);
31 |   PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N);
32 |   PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N);
33 | 
34 |   PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width,
35 |                                0+halo_width, N-halo_width,
36 |                                0+halo_width, N-halo_width);
37 |   size_t nelms = N*N*N;
38 |   
39 |   float *indata = (float *)malloc(sizeof(float) * nelms);
40 |   int i;
41 |   for (i = 0; i < nelms; i++) {
42 |     indata[i] = i;
43 |   }
44 |   float *outdata = (float *)malloc(sizeof(float) * nelms);
45 |     
46 |   PSGridCopyin(g1, indata);
47 |   PSGridCopyin(g2, indata);  
48 | 
49 |   PSStencilRun(PSStencilMap(kernel, d, g1, g2));
50 |     
51 |   PSGridCopyout(g2, outdata);
52 |   dump(outdata);  
53 | 
54 |   PSGridFree(g1);
55 |   PSGridFree(g2);
56 |   PSFinalize();
57 |   free(indata);
58 |   free(outdata);
59 |   return 0;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_stencil-hole.manual.cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "cuda.h"
 3 | #include "cuda_runtime.h"
 4 | 
 5 | #define N 32
 6 | #define REAL float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 |  __global__ void kernel(REAL *g1, REAL *g2) {
11 |    int x = threadIdx.x + blockIdx.x * blockDim.x;
12 |    int y = threadIdx.y + blockIdx.y * blockDim.y;
13 |    int z = threadIdx.z + blockIdx.z * blockDim.z;
14 | 
15 |    if (x <= 1 || x >= N-2 || y <= 1 || y >= N-2 ||
16 |        z <= 1 || z >= N-2) return;
17 |   
18 |    float v = g1[OFFSET(x, y, z)] +
19 |        g1[OFFSET(x+2, y, z)] + g1[OFFSET(x-2, y, z)];
20 |    g2[OFFSET(x, y, z)] = v;
21 |    return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | #define halo_width (1)
32 | 
33 | int main(int argc, char *argv[]) {
34 |   REAL *g1, *g1d;
35 |   REAL *g2d;
36 |   size_t nelms = N*N*N;
37 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
38 |   cudaMalloc((void**)&g1d, sizeof(REAL) * nelms);
39 |   cudaMalloc((void**)&g2d, sizeof(REAL) * nelms);
40 | 
41 |   int i;
42 |   for (i = 0; i < (int)nelms; i++) {
43 |     g1[i] = i;
44 |   }
45 |     
46 |   cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);
47 |   cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice);  
48 |   
49 |   dim3 block_dim(4, 4, 4);
50 |   dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z);
51 | 
52 |   kernel<<<grid_dim, block_dim>>>(g1d, g2d);
53 |   cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost);
54 | 
55 |   dump(g1);
56 | 
57 |   cudaDeviceReset();
58 |   return 0;
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_stencil-hole.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | #define REAL float
 6 | 
 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 8 | 
 9 | void kernel(float *g1, float *g2) {
10 |   int x, y, z;
11 |   int halo_width = 2;
12 |   for (z = halo_width; z < N-halo_width; ++z) {
13 |     for (y = halo_width; y < N-halo_width; ++y) {
14 |       for (x = halo_width; x < N-halo_width; ++x) {
15 |         float v = g1[OFFSET(x, y, z)] +
16 |             g1[OFFSET(x+2, y, z)] + g1[OFFSET(x-2, y, z)];
17 |         g2[OFFSET(x, y, z)] = v;
18 |       }
19 |     }
20 |   }
21 |   return;
22 | }
23 | 
24 | void dump(float *input) {
25 |   int i;
26 |   for (i = 0; i < N*N*N; ++i) {
27 |     printf("%f\n", input[i]);
28 |   }
29 | }
30 | 
31 | int main(int argc, char *argv[]) {
32 |   REAL *g1, *g2;  
33 |   size_t nelms = N*N*N;
34 |   g1 = (REAL *)malloc(sizeof(REAL) * nelms);
35 |   g2 = (REAL *)malloc(sizeof(REAL) * nelms);
36 | 
37 |   int i;
38 |   for (i = 0; i < (int)nelms; i++) {
39 |     g1[i] = i;
40 |     g2[i] = i;
41 |   }
42 | 
43 |   kernel(g1, g2);
44 |   dump(g2);
45 |   
46 |   free(g1);
47 |   free(g2);
48 |   return 0;
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type-7-pt-periodic-complex.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define N 32
 6 | 
 7 | typedef struct {
 8 |   float r;
 9 |   float i;
10 | } Complex;
11 | 
12 | 
13 | #define OFFSET(x, y, z) ((((x)+N)%N) + (((y)+N)%N) * N + (((z)+N)%N) * N * N)
14 | 
15 | void kernel1(Complex *g1, Complex *g2) {
16 |   int x, y, z;
17 |   for (z = 0; z < N; ++z) {
18 |     for (y = 0; y < N; ++y) {
19 |       for (x = 0; x < N; ++x) {
20 |         Complex t = g1[OFFSET(x, y, z)];
21 |         Complex t1 = g1[OFFSET(x+1, y, z)];
22 |         Complex t2 = g1[OFFSET(x-1, y, z)];
23 |         Complex t3 = g1[OFFSET(x, y+1, z)];
24 |         Complex t4 = g1[OFFSET(x, y-1, z)];
25 |         Complex t5 = g1[OFFSET(x, y, z+1)];
26 |         Complex t6 = g1[OFFSET(x, y, z-1)];
27 |         float r = t.r + t1.r + t2.r + t3.r + t4.r + t5.r + t6.r;
28 |         float i = t.i + t1.i + t2.i + t3.i + t4.i + t5.i + t6.i;        
29 |         Complex v = {r, i};
30 |         g2[OFFSET(x, y, z)] = v;
31 |       }
32 |     }
33 |   }
34 |   return;
35 | }
36 | 
37 | void dump(Complex *input) {
38 |   int i;
39 |   for (i = 0; i < N*N*N; ++i) {
40 |     printf("%f %f\n", input[i].r, input[i].i);
41 |   }
42 | }
43 | 
44 | int main(int argc, char *argv[]) {
45 |   Complex *g1, *g2;
46 |   size_t nelms = N*N*N;
47 |   g1 = (Complex *)malloc(sizeof(Complex) * nelms);
48 |   g2 = (Complex *)malloc(sizeof(Complex) * nelms);  
49 | 
50 |   int i;
51 |   for (i = 0; i < nelms; i++) {
52 |     g1[i].r = i;
53 |     g1[i].i = i+1;
54 |   }
55 | 
56 |   kernel1(g1, g2);
57 |   dump(g2);
58 |   free(g1);
59 |   free(g2);  
60 |   return 0;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type-7-pt-periodic.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define N 32
 6 | 
 7 | typedef struct {
 8 |   float p;
 9 |   float q;
10 | } Point;
11 | 
12 | 
13 | #define OFFSET(x, y, z) ((((x)+N)%N) + (((y)+N)%N) * N + (((z)+N)%N) * N * N)
14 | 
15 | void kernel1(Point *g) {
16 |   int x, y, z;
17 |   for (z = 0; z < N; ++z) {
18 |     for (y = 0; y < N; ++y) {
19 |       for (x = 0; x < N; ++x) {
20 |         float v = g[OFFSET(x, y, z)].p +
21 |                    g[OFFSET(x+1, y, z)].p +
22 |                    g[OFFSET(x-1, y, z)].p +
23 |                    g[OFFSET(x, y+1, z)].p +
24 |                    g[OFFSET(x, y-1, z)].p +
25 |                    g[OFFSET(x, y, z+1)].p +
26 |                    g[OFFSET(x, y, z-1)].p;
27 |         g[OFFSET(x, y, z)].q = v;
28 |       }
29 |     }
30 |   }
31 |   return;
32 | }
33 | 
34 | void dump(Point *input) {
35 |   int i;
36 |   for (i = 0; i < N*N*N; ++i) {
37 |     printf("%f %f\n", input[i].p, input[i].q);
38 |   }
39 | }
40 | 
41 | int main(int argc, char *argv[]) {
42 |   Point *g;
43 |   size_t nelms = N*N*N;
44 |   g = (Point *)malloc(sizeof(Point) * nelms);
45 | 
46 |   int i;
47 |   for (i = 0; i < nelms; i++) {
48 |     g[i].p = i;
49 |     g[i].q = 0;
50 |   }
51 | 
52 |   kernel1(g);
53 |   dump(g);
54 |   free(g);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type-7-pt.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define N 32
 6 | 
 7 | typedef struct {
 8 |   float p;
 9 |   float q;
10 | } Point;
11 | 
12 | 
13 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
14 | 
15 | void kernel1(Point *g) {
16 |   int x, y, z;
17 |   for (z = 1; z < N-1; ++z) {
18 |     for (y = 1; y < N-1; ++y) {
19 |       for (x = 1; x < N-1; ++x) {
20 |         float v = g[OFFSET(x, y, z)].p +
21 |                    g[OFFSET(x+1, y, z)].p +
22 |                    g[OFFSET(x-1, y, z)].p +
23 |                    g[OFFSET(x, y+1, z)].p +
24 |                    g[OFFSET(x, y-1, z)].p +
25 |                    g[OFFSET(x, y, z+1)].p +
26 |                    g[OFFSET(x, y, z-1)].p;
27 |         g[OFFSET(x, y, z)].q = v;
28 |       }
29 |     }
30 |   }
31 |   return;
32 | }
33 | 
34 | void dump(Point *input) {
35 |   int i;
36 |   for (i = 0; i < N*N*N; ++i) {
37 |     printf("%f %f\n", input[i].p, input[i].q);
38 |   }
39 | }
40 | 
41 | int main(int argc, char *argv[]) {
42 |   Point *g;
43 |   size_t nelms = N*N*N;
44 |   g = (Point *)malloc(sizeof(Point) * nelms);
45 | 
46 |   int i;
47 |   for (i = 0; i < nelms; i++) {
48 |     g[i].p = i;
49 |     g[i].q = 0;
50 |   }
51 | 
52 |   kernel1(g);
53 |   dump(g);
54 |   free(g);
55 |   return 0;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type-array-member-copy.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Copy within an array member
 3 |  * DIM: 3
 4 |  * PRIORITY: 10
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | #define ITER 10
12 | 
13 | struct Point {
14 |   float p[2];
15 | };
16 | 
17 | DeclareGrid3D(Point, struct Point);
18 | 
19 | void kernel(const int x, const int y, const int z,
20 |             PSGrid3DPoint g) {
21 |   float v = PSGridGet(g, x, y, z).p[0];
22 |   PSGridEmitUtype(g.p[1], v);
23 |   return;
24 | }
25 | 
26 | void check(struct Point *p) {
27 |   int i;
28 |   for (i = 0; i < N*N*N; ++i) {
29 |     if (p[i].p[0] != p[i].p[1]) {
30 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
31 |               i, p[i].p[0], p[i].p[1]);
32 |       exit(1);
33 |     }
34 |   }
35 | }
36 | 
37 | int main(int argc, char *argv[]) {
38 |   PSInit(&argc, &argv, 3, N, N, N);
39 |   PSGrid3DPoint g = PSGrid3DPointNew(N, N, N);
40 | 
41 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
42 |   size_t nelms = N*N*N;
43 |   
44 |   struct Point *indata = (struct Point *)malloc(
45 |       sizeof(struct Point) * nelms);
46 |   int i;
47 |   for (i = 0; i < nelms; i++) {
48 |     indata[i].p[0] = i;
49 |     indata[i].p[1] = 0;
50 |   }
51 |     
52 |   PSGridCopyin(g, indata);
53 | 
54 |   PSStencilRun(PSStencilMap(kernel, d, g));
55 |     
56 |   PSGridCopyout(g, indata);
57 | 
58 |   check(indata);
59 | 
60 |   PSGridFree(g);
61 |   PSFinalize();
62 |   free(indata);
63 |   return 0;
64 | }
65 | 
66 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type-copyin-copyout-two-members.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Run copyin and copyout on a user-defined type with two members
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | #define ITER 10
12 | 
13 | struct Point {
14 |   float x;
15 |   float y;
16 | };
17 | 
18 | DeclareGrid3D(Point, struct Point);
19 | 
20 | void check(struct Point *in, struct Point *out) {
21 |   int x = 0;
22 |   size_t nelms = N*N*N;  
23 |   for (x = 0; x < nelms; ++x) {
24 |     if (in[x].x != out[x].x) {
25 |       fprintf(stderr, "Error: x mismatch at %d, in: %f, out: %f\n",
26 |               x, in[x].x, out[x].x);
27 |       exit(1);
28 |     }
29 |     if (in[x].y != out[x].y) {
30 |       fprintf(stderr, "Error: y mismatch at %d, in: %f, out: %f\n",
31 |               x, in[x].y, out[x].y);
32 |       exit(1);
33 |     }
34 |   }
35 | }
36 | 
37 | int main(int argc, char *argv[]) {
38 |   PSInit(&argc, &argv, 3, N, N, N);
39 |   PSGrid3DPoint g1 = PSGrid3DPointNew(N, N, N);
40 |   size_t nelms = N*N*N;
41 |   struct Point *indata = (struct Point *)malloc(
42 |       sizeof(struct Point) * nelms);
43 |   int i;
44 |   for (i = 0; i < nelms; i++) {
45 |     indata[i].x = i;
46 |     indata[i].y = i+1;
47 |   }
48 |   struct Point *outdata = (struct Point *)malloc(
49 |       sizeof(struct Point) * nelms);
50 |     
51 |   PSGridCopyin(g1, indata);
52 |   PSGridCopyout(g1, outdata);
53 | 
54 |   check(indata, outdata);
55 | 
56 |   PSGridFree(g1);
57 |   PSFinalize();
58 |   free(indata);
59 |   free(outdata);
60 |   return 0;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type2.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * TEST: Copy between user-defined type members
 3 |  * DIM: 3
 4 |  * PRIORITY: 1
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include "physis/physis.h"
 9 | 
10 | #define N 32
11 | #define ITER 10
12 | 
13 | struct Point {
14 |   float p;
15 |   float q;
16 | };
17 | 
18 | DeclareGrid3D(Point, struct Point);
19 | 
20 | void kernel(const int x, const int y, const int z,
21 |             PSGrid3DPoint g) {
22 |   float v = PSGridGet(g, x, y, z).p;
23 |   PSGridEmitUtype(g.q, v);
24 |   return;
25 | }
26 | 
27 | void check(struct Point *p) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     if (p[i].p != p[i].q) {
31 |       fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n",
32 |               i, p[i].p, p[i].q);
33 |       exit(1);
34 |     }
35 |   }
36 | }
37 | 
38 | int main(int argc, char *argv[]) {
39 |   PSInit(&argc, &argv, 3, N, N, N);
40 |   PSGrid3DPoint g = PSGrid3DPointNew(N, N, N);
41 | 
42 |   PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N);
43 |   size_t nelms = N*N*N;
44 |   
45 |   struct Point *indata = (struct Point *)malloc(
46 |       sizeof(struct Point) * nelms);
47 |   int i;
48 |   for (i = 0; i < nelms; i++) {
49 |     indata[i].p = i;
50 |     indata[i].q = 0;
51 |   }
52 |     
53 |   PSGridCopyin(g, indata);
54 | 
55 |   PSStencilRun(PSStencilMap(kernel, d, g));
56 |     
57 |   PSGridCopyout(g, indata);
58 | 
59 |   check(indata);
60 | 
61 |   PSGridFree(g);
62 |   PSFinalize();
63 |   free(indata);
64 |   return 0;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/tests/system_tests/test_cases/test_user-defined-type5.manual.ref.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define N 32
 5 | 
 6 | #define T float
 7 | 
 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N)
 9 | 
10 | void kernel(T *g1, T *g2) {
11 |   int x, y, z;
12 |   int halo_width = 1;
13 |   for (z = halo_width; z < N-halo_width; ++z) {
14 |     for (y = halo_width; y < N-halo_width; ++y) {
15 |       for (x = halo_width; x < N-halo_width; ++x) {
16 |         T v = g1[OFFSET(x, y, z)] +
17 |             g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] +
18 |             g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] +
19 |             g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)];
20 |         g2[OFFSET(x, y, z)] = v;
21 |       }
22 |     }
23 |   }
24 |   return;
25 | }
26 | 
27 | void dump(T *input) {
28 |   int i;
29 |   for (i = 0; i < N*N*N; ++i) {
30 |     printf("%f\n", input[i]);
31 |   }
32 | }
33 | 
34 | int main(int argc, char *argv[]) {
35 |   T *g1, *g2;  
36 |   size_t nelms = N*N*N;
37 |   g1 = (T *)malloc(sizeof(T) * nelms);
38 |   g2 = (T *)malloc(sizeof(T) * nelms);
39 | 
40 |   int i;
41 |   for (i = 0; i < (int)nelms; i++) {
42 |     g1[i] = i;
43 |     g2[i] = 0;
44 |   }
45 | 
46 |   kernel(g1, g2);
47 |   dump(g2);
48 |   
49 |   free(g1);
50 |   free(g2);
51 |   return 0;
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/translator/ast_processing.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_AST_PROCESSING_H_
 4 | #define PHYSIS_TRANSLATOR_AST_PROCESSING_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "physis/internal_common.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | namespace rose_util {
12 | 
13 | int RemoveRedundantVariableCopy(SgNode *scope);
14 | int RemoveUnusedFunction(SgNode *scope);
15 | 
16 | }  // namespace rose_util
17 | }  // namespace translator
18 | }  // namespace physis
19 | 
20 | #endif /* PHYSIS_TRANSLATOR_AST_PROCESSING_H_ */
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/translator/ast_traversal.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_AST_TRAVERSAL_H_
 4 | #define PHYSIS_TRANSLATOR_AST_TRAVERSAL_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "physis/internal_common.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | namespace rose_util {
12 | 
13 | template <class ASTNodeType>
14 | ASTNodeType *FindClosestAncestor(SgNode *node) {
15 |   SgNode *p = node->get_parent();
16 |   while (p) {
17 |     if (p->variantT() == (VariantT)ASTNodeType::static_variant) {
18 |       return dynamic_cast<ASTNodeType*>(p);
19 |     }
20 |     p = p->get_parent();
21 |   }
22 |   return NULL;
23 | }
24 | 
25 | }  // namespace rose_util
26 | }  // namespace translator
27 | }  // namespace physis
28 | 
29 | #endif /* PHYSIS_TRANSLATOR_AST_TRAVERSAL_H_ */
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/translator/config.h.cmake:
--------------------------------------------------------------------------------
 1 | #ifndef PHYSIS_TRANSLATOR_COMMON_H_
 2 | #define PHYSIS_TRANSLATOR_COMMON_H_
 3 | 
 4 | #include "common/config.h"
 5 | 
 6 | #cmakedefine CUDA_TRANSLATOR_ENABLED
 7 | #cmakedefine CUDA_HM_TRANSLATOR_ENABLED
 8 | #cmakedefine MPI_TRANSLATOR_ENABLED
 9 | #cmakedefine MPI_OPENMP_TRANSLATOR_ENABLED
10 | #cmakedefine MPI_CUDA_TRANSLATOR_ENABLED
11 | #cmakedefine OPENCL_TRANSLATOR_ENABLED
12 | #cmakedefine MPI_OPENCL_TRANSLATOR_ENABLED
13 | 
14 | #endif /* PHYSIS_COMMON_CONFIG_H_ */


--------------------------------------------------------------------------------
/translator/cuda_hm_runtime_builder.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/cuda_hm_runtime_builder.h"
 4 | 
 5 | namespace physis {
 6 | namespace translator {
 7 | 
 8 | CUDAHMRuntimeBuilder::CUDAHMRuntimeBuilder(SgScopeStatement *global_scope,
 9 |                                            const Configuration &config):
10 |     CUDARuntimeBuilder(global_scope, config) {
11 | }
12 | 
13 | } // namespace translator
14 | } // namespace physis
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/translator/cuda_hm_runtime_builder.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_CUDA_HM_RUNTIME_BUILDER_H_
 4 | #define PHYSIS_TRANSLATOR_CUDA_HM_RUNTIME_BUILDER_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "translator/reference_runtime_builder.h"
 8 | #include "translator/cuda_runtime_builder.h"
 9 | 
10 | namespace physis {
11 | namespace translator {
12 | 
13 | class CUDAHMRuntimeBuilder : public CUDARuntimeBuilder {
14 |  public:
15 |   CUDAHMRuntimeBuilder(SgScopeStatement *global_scope,
16 |                        const Configuration &config);
17 |   virtual ~CUDAHMRuntimeBuilder() {}
18 | };
19 | 
20 | } // namespace translator
21 | } // namespace physis
22 | 
23 | 
24 | 
25 | #endif /* PHYSIS_TRANSLATOR_CUDA_HM_RUNTIME_BUILDER_H_ */
26 | 


--------------------------------------------------------------------------------
/translator/cuda_hm_translator.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/cuda_hm_translator.h"
 4 | 
 5 | namespace pu = physis::util;
 6 | namespace sb = SageBuilder;
 7 | namespace si = SageInterface;
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | 
12 | CUDAHMTranslator::CUDAHMTranslator(const Configuration &config):
13 |     CUDATranslator(config) {
14 |   target_specific_macro_ = "PHYSIS_CUDA_HM";  
15 | }
16 | 
17 | } // namespace translator
18 | } // namespace physis
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/translator/cuda_hm_translator.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_CUDA_HM_TRANSLATOR_H_
 4 | #define PHYSIS_TRANSLATOR_CUDA_HM_TRANSLATOR_H_
 5 | 
 6 | #include "translator/translator.h"
 7 | #include "translator/translator_common.h"
 8 | #include "translator/cuda_translator.h"
 9 | #include "translator/cuda_runtime_builder.h"
10 | 
11 | namespace physis {
12 | namespace translator {
13 | 
14 | class CUDAHMTranslator : public CUDATranslator {
15 |  public:
16 |   CUDAHMTranslator(const Configuration &config);
17 |   virtual ~CUDAHMTranslator() {}
18 | };
19 | 
20 | } // namespace translator
21 | } // namespace physis
22 | 
23 | #endif /* PHYSIS_TRANSLATOR_CUDA_HM_TRANSLATOR_H_ */
24 | 


--------------------------------------------------------------------------------
/translator/def_analysis.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_DEF_ANALYSIS_H_
 4 | #define PHYSIS_TRANSLATOR_DEF_ANALYSIS_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "translator/rose_util.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | 
12 | typedef map<const SgInitializedName*, SgExpressionPtrList> DefMap;
13 | 
14 | static inline string toString(DefMap &dm) {
15 |   ostringstream ss;
16 |   FOREACH(it, dm.begin(), dm.end()) {
17 |     const SgInitializedName *v = it->first;
18 |     StringJoin sj(",");
19 |     FOREACH(eit, it->second.begin(), it->second.end()) {
20 |       SgExpression *e = *eit;
21 |       if (!e) {
22 |         sj << "NULL";
23 |       } else {
24 |         sj << e->unparseToString();
25 |       }
26 |     }
27 |     ss << v->get_name().getString()
28 |        << " -> {" << sj << "}\n";
29 |   }
30 | 
31 |   return ss.str();
32 | }
33 | 
34 | std::auto_ptr<DefMap> findDefinitions(
35 |     SgNode *topLevelNode, const std::vector<SgType*> &relevantTypes);
36 | 
37 | } // namespace translator
38 | } // namespace physis
39 | 
40 | 
41 | #endif /* PHYSIS_TRANSLATOR_DEF_ANALYSIS_H_ */
42 | 


--------------------------------------------------------------------------------
/translator/fortran_output_fix.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | 
 4 | #ifndef PHYSIS_TRANSLATOR_FORTRAN_OUTPUT_FIX_H_
 5 | #define PHYSIS_TRANSLATOR_FORTRAN_OUTPUT_FIX_H_
 6 | 
 7 | #include "translator/translator_common.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | 
12 | void FixFortranOutput(const string &path);
13 | 
14 | } // namespace translator
15 | } // namespace physis
16 | 
17 | #endif /* PHYSIS_TRANSLATOR_FORTRAN_OUTPUT_FIX_H_ */
18 | 
19 | 


--------------------------------------------------------------------------------
/translator/mpi_cuda_optimizer.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_MPI_CUDA_OPTIMIZER_H_
 4 | #define PHYSIS_TRANSLATOR_MPI_CUDA_OPTIMIZER_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "translator/mpi_cuda_translator.h"
 8 | 
 9 | #if 0 // TODO: Merge this to optimizer/mpi_cuda_optimizer.h
10 | namespace physis {
11 | namespace translator {
12 | 
13 | class MPICUDAOptimizer {
14 |  public:
15 |   MPICUDAOptimizer(const MPICUDATranslator &trans);
16 |   virtual ~MPICUDAOptimizer() {}
17 |   virtual void GridPreCalcAddr(SgFunctionDeclaration *func);
18 |  protected:
19 |   const MPICUDATranslator &trans_;
20 | };
21 | 
22 | 
23 | } // namespace translator
24 | } // namespace physis
25 | 
26 | #endif
27 | 
28 | #endif /* PHYSIS_TRANSLATOR_MPI_CUDA_OPTIMIZER_H_ */
29 | 


--------------------------------------------------------------------------------
/translator/mpi_opencl_optimizer.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_MPI_OPENCL_OPTIMIZER_H_
 4 | #define PHYSIS_TRANSLATOR_MPI_OPENCL_OPTIMIZER_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "translator/mpi_opencl_translator.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | 
12 | class MPIOpenCLOptimizer {
13 |  public:
14 |   MPIOpenCLOptimizer(const MPIOpenCLTranslator &trans);
15 |   virtual ~MPIOpenCLOptimizer() {}
16 |   virtual void GridPreCalcAddr(SgFunctionDeclaration *func);
17 |  protected:
18 |   const MPIOpenCLTranslator &trans_;
19 | };
20 | 
21 | } // namespace translator
22 | } // namespace physis
23 | 
24 | #endif /* PHYSIS_TRANSLATOR_MPI_OPENCL_OPTIMIZER_H_ */
25 | 


--------------------------------------------------------------------------------
/translator/mpi_opencl_runtime_builder.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_MPI_OPENCL_RUNTIME_BUILDER_H_
 4 | #define PHYSIS_TRANSLATOR_MPI_OPENCL_RUNTIME_BUILDER_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | 
11 | } // namespace translator
12 | } // namespace physis
13 | 
14 | #endif /* PHYSIS_TRANSLATOR_MPI_OPENCL_RUNTIME_BUILDER_H_ */
15 | 
16 | 


--------------------------------------------------------------------------------
/translator/mpi_openmp_translator.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_MPI_OPENMP_TRANSLATOR_H_
 4 | #define PHYSIS_TRANSLATOR_MPI_OPENMP_TRANSLATOR_H_
 5 | 
 6 | #include "translator/mpi_translator.h"
 7 | #include "translator/reference_translator.h"
 8 | 
 9 | #define MPI_OPENMP_DIVISION_X_DEFAULT (1)
10 | #define MPI_OPENMP_DIVISION_Y_DEFAULT (1)
11 | #define MPI_OPENMP_DIVISION_Z_DEFAULT (2)
12 | 
13 | #define MPI_OPENMP_CACHESIZE_X_DEFAULT (100)
14 | #define MPI_OPENMP_CACHESIZE_Y_DEFAULT (100)
15 | #define MPI_OPENMP_CACHESIZE_Z_DEFAULT (100)
16 | 
17 | namespace physis {
18 | namespace translator {
19 | 
20 | class MPIOpenMPTranslator : public MPITranslator {
21 |  private:
22 | 
23 |  public:
24 |   MPIOpenMPTranslator(const Configuration &config);
25 |   virtual ~MPIOpenMPTranslator();
26 | 
27 |   //virtual void Translate();
28 | 
29 |   //virtual void SetUp(SgProject *project, TranslationContext *context);
30 |   //virtual void Finish();
31 | 
32 |  protected:
33 |   virtual void translateInit(SgFunctionCallExp *node);
34 | 
35 |   // Nothing performed for this target for now
36 |   virtual void FixAST() {}
37 | 
38 |  public:
39 |   virtual SgBasicBlock *BuildRunKernelBody(
40 |       StencilMap *s, SgInitializedName *stencil_param);
41 | 
42 |  protected:
43 |   int division_[3];
44 |   int cache_size_[3];
45 | 
46 | 
47 | };
48 | 
49 | } // namespace translator
50 | } // namespace physis
51 | 
52 | #endif /* PHYSIS_TRANSLATOR_MPI_OPENMP_TRANSLATOR_H_ */
53 | 


--------------------------------------------------------------------------------
/translator/mpi_translator.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_MPI_TRANSLATOR_H_
 4 | #define PHYSIS_TRANSLATOR_MPI_TRANSLATOR_H_
 5 | 
 6 | #include "translator/translator.h"
 7 | #include "translator/translator_common.h"
 8 | #include "translator/reference_translator.h"
 9 | #include "translator/mpi_runtime_builder.h"
10 | 
11 | namespace physis {
12 | namespace translator {
13 | 
14 | class MPITranslator: public ReferenceTranslator {
15 |  public:
16 |   MPITranslator(const Configuration &config);
17 |   virtual ~MPITranslator() {}
18 |   virtual void Translate();
19 |  protected:
20 |   bool flag_mpi_overlap_;
21 |   virtual MPIRuntimeBuilder *builder() {
22 |     return dynamic_cast<MPIRuntimeBuilder*>(rt_builder_);
23 |   }
24 |   virtual void TranslateInit(SgFunctionCallExp *node);
25 |   virtual void TranslateRun(SgFunctionCallExp *node,
26 |                             Run *run);
27 |   virtual void appendNewArgExtra(SgExprListExp *args, Grid *g,
28 |                                  SgVariableDeclaration *dim_decl);
29 |   virtual void AppendNewArgStencilMemberInfo(SgExprListExp *args, Grid *g,
30 |                                              SgStatement *prec_stmt);
31 |   
32 | #if 0  
33 |   virtual void CheckSizes();
34 | #endif  
35 | 
36 |   int global_num_dims_;
37 |   //IntArray global_size_;
38 |   SgFunctionSymbol *stencil_run_func_;
39 |   string get_addr_name_;
40 |   string get_addr_no_halo_name_;
41 |   string emit_addr_name_;
42 | 
43 |   virtual void FixAST();
44 | };
45 | 
46 | } // namespace translator
47 | } // namespace physis
48 | 
49 | 
50 | #endif /* PHYSIS_TRANSLATOR_MPI_TRANSLATOR_H_ */
51 | 


--------------------------------------------------------------------------------
/translator/optimizer/cuda_optimizer.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/cuda_optimizer.h"
 4 | #include "translator/optimizer/optimization_passes.h"
 5 | 
 6 | namespace physis {
 7 | namespace translator {
 8 | namespace optimizer {
 9 | 
10 | void CUDAOptimizer::DoStage1() {
11 | }
12 | 
13 | void CUDAOptimizer::DoStage2() {
14 |   if (config_->LookupFlag("OPT_KERNEL_INLINING")) {
15 |     pass::kernel_inlining(proj_, tx_, builder_);
16 |   }
17 |   if (config_->LookupFlag("OPT_LOOP_PEELING")) {
18 |     pass::loop_peeling(proj_, tx_, builder_);
19 |   }
20 |   if (config_->LookupFlag("OPT_REGISTER_BLOCKING")) {
21 |     pass::register_blocking(proj_, tx_, builder_);
22 |   }
23 |   if (config_->LookupFlag("OPT_UNCONDITIONAL_GET")) {
24 |     pass::unconditional_get(proj_, tx_, builder_);
25 |   }
26 |   if (config_->LookupFlag("OPT_OFFSET_CSE")) {
27 |     pass::offset_cse(proj_, tx_, builder_);
28 |   }
29 |   if (config_->LookupFlag("OPT_OFFSET_SPATIAL_CSE")) {
30 |     pass::offset_spatial_cse(proj_, tx_, builder_);
31 |   }
32 |   if (config_->LookupFlag("OPT_LOOP_OPT")) {
33 |     pass::loop_opt(proj_, tx_, builder_);
34 |     pass::primitive_optimization(proj_, tx_, builder_);
35 |   }
36 | }
37 | 
38 | } // namespace optimizer
39 | } // namespace translator
40 | } // namespace physis
41 | 
42 | 


--------------------------------------------------------------------------------
/translator/optimizer/cuda_optimizer.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_CUDA_OPTIMIZER_H_
 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_CUDA_OPTIMIZER_H_
 5 | 
 6 | #include "translator/optimizer/optimizer.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | namespace optimizer {
11 | 
12 | class CUDAOptimizer: public Optimizer {
13 |  public:
14 |   CUDAOptimizer(SgProject *proj,
15 |                 physis::translator::TranslationContext *tx,
16 |                 physis::translator::BuilderInterface *builder,
17 |                 physis::translator::Configuration *config)
18 |       : Optimizer(proj, tx, builder, config) {}
19 |   virtual ~CUDAOptimizer() {}
20 |  protected:
21 |   virtual void DoStage1();
22 |   virtual void DoStage2();  
23 | };
24 | 
25 | } // namespace optimizer
26 | } // namespace translator
27 | } // namespace physis
28 | 
29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_CUDA_OPTIMIZER_H_ */
30 | 


--------------------------------------------------------------------------------
/translator/optimizer/mpi_cuda_optimizer.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/mpi_cuda_optimizer.h"
 4 | #include "translator/optimizer/optimization_passes.h"
 5 | 
 6 | namespace physis {
 7 | namespace translator {
 8 | namespace optimizer {
 9 | 
10 | void MPICUDAOptimizer::DoStage1() {
11 | }
12 | 
13 | void MPICUDAOptimizer::DoStage2() {
14 |   // TODO: support this optimization
15 | #if 0  
16 |   if (config_->LookupFlag("OPT_UNCONDITIONAL_GET")) {
17 |     pass::unconditional_get(proj_, tx_, builder_);
18 |   }
19 | #endif  
20 | }
21 | 
22 | } // namespace optimizer
23 | } // namespace translator
24 | } // namespace physis
25 | 
26 | 


--------------------------------------------------------------------------------
/translator/optimizer/mpi_cuda_optimizer.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_CUDA_OPTIMIZER_H_
 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_CUDA_OPTIMIZER_H_
 5 | 
 6 | #include "translator/optimizer/optimizer.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | namespace optimizer {
11 | 
12 | class MPICUDAOptimizer: public Optimizer {
13 |  public:
14 |   MPICUDAOptimizer(SgProject *proj,
15 |                    physis::translator::TranslationContext *tx,
16 |                    physis::translator::BuilderInterface *builder,
17 |                    physis::translator::Configuration *config)
18 |       : Optimizer(proj, tx, builder, config) {}
19 |   virtual ~MPICUDAOptimizer() {}
20 |  protected:
21 |   virtual void DoStage1();
22 |   virtual void DoStage2();
23 | };
24 | 
25 | } // namespace optimizer
26 | } // namespace translator
27 | } // namespace physis
28 | 
29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_MPI_CUDA_OPTIMIZER_H_ */
30 | 


--------------------------------------------------------------------------------
/translator/optimizer/mpi_optimizer.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/mpi_optimizer.h"
 4 | #include "translator/optimizer/optimization_passes.h"
 5 | 
 6 | namespace physis {
 7 | namespace translator {
 8 | namespace optimizer {
 9 | 
10 | void MPIOptimizer::DoStage1() {
11 | }
12 | 
13 | void MPIOptimizer::DoStage2() {
14 |   if (config_->LookupFlag("OPT_KERNEL_INLINING")) {
15 |     pass::kernel_inlining(proj_, tx_, builder_);
16 |   }
17 | }
18 | 
19 | } // namespace optimizer
20 | } // namespace translator
21 | } // namespace physis
22 | 
23 | 


--------------------------------------------------------------------------------
/translator/optimizer/mpi_optimizer.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_OPTIMIZER_H_
 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_OPTIMIZER_H_
 5 | 
 6 | #include "translator/optimizer/optimizer.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | namespace optimizer {
11 | 
12 | class MPIOptimizer: public Optimizer {
13 |  public:
14 |   MPIOptimizer(SgProject *proj,
15 |                physis::translator::TranslationContext *tx,
16 |                physis::translator::BuilderInterface *builder,
17 |                physis::translator::Configuration *config)
18 |       : Optimizer(proj, tx, builder, config) {}
19 |   virtual ~MPIOptimizer() {}
20 |  protected:
21 |   virtual void DoStage1();
22 |   virtual void DoStage2();
23 | };
24 | 
25 | } // namespace optimizer
26 | } // namespace translator
27 | } // namespace physis
28 | 
29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_MPI_OPTIMIZER_H_ */
30 | 


--------------------------------------------------------------------------------
/translator/optimizer/optimization_common.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/translator_common.h"
 4 | #include "translator/translation_context.h"
 5 | 
 6 | namespace physis {
 7 | namespace translator {
 8 | namespace optimizer {
 9 | 
10 | //! Find innermost kernel loops
11 | extern vector<SgForStatement*> FindInnermostLoops(SgNode *proj);
12 | 
13 | //! Find expressions that are assigned to variable v
14 | extern void GetVariableSrc(SgInitializedName *v,
15 |                            vector<SgExpression*> &src_exprs);
16 | 
17 | //! Simple dead code elimination
18 | extern bool EliminateDeadCode(SgStatement *stmt);
19 | 
20 | //! Returns a single source expression for a variable if statically determined
21 | SgExpression *GetDeterministicDefinition(SgInitializedName *var);
22 | 
23 | } // namespace optimizer
24 | } // namespace translator
25 | } // namespace physis
26 | 


--------------------------------------------------------------------------------
/translator/optimizer/optimization_passes.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/optimization_passes.h"
 4 | #include "translator/rose_util.h"
 5 | 
 6 | namespace si = SageInterface;
 7 | namespace sb = SageBuilder;
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | namespace optimizer {
12 | namespace pass {
13 | 
14 | void null_optimization(
15 |     SgProject *proj,
16 |     physis::translator::TranslationContext *tx,
17 |     physis::translator::BuilderInterface *builder) {
18 |   pre_process(proj, tx, __FUNCTION__);
19 | }
20 | 
21 | } // namespace pass
22 | } // namespace optimizer
23 | } // namespace translator
24 | } // namespace physis
25 | 
26 | 


--------------------------------------------------------------------------------
/translator/optimizer/optimizer.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/optimizer.h"
 4 | #include "translator/optimizer/optimization_passes.h"
 5 | #include "translator/ast_processing.h"
 6 | 
 7 | namespace physis {
 8 | namespace translator {
 9 | namespace optimizer {
10 | 
11 | void Optimizer::DoStage1() {
12 |   pass::null_optimization(proj_, tx_, builder_);
13 | }
14 | 
15 | void Optimizer::DoStage2() {
16 |   pass::null_optimization(proj_, tx_, builder_);
17 | }
18 | void Optimizer::Stage1() {
19 |   Stage1PreProcess();
20 |   LOG_DEBUG() << "Applying Stage 1 optimization passes\n";  
21 |   DoStage1();
22 |   LOG_DEBUG() << "Stage 1 optimization done\n";  
23 |   Stage1PostProcess();
24 | }
25 | 
26 | void Optimizer::Stage2() {
27 |   Stage2PreProcess();
28 |   LOG_DEBUG() << "Applying Stage 2 optimization passes\n";
29 |   DoStage2();
30 |   LOG_DEBUG() << "Stage 2 optimization done\n";
31 |   Stage2PostProcess();  
32 | }
33 | 
34 | void Optimizer::Stage1PreProcess() {
35 | }
36 | 
37 | void Optimizer::Stage1PostProcess() {
38 | }
39 | 
40 | void Optimizer::Stage2PreProcess() {
41 | }
42 | 
43 | void Optimizer::Stage2PostProcess() {
44 |   rose_util::RemoveUnusedFunction(proj_);
45 | }
46 | 
47 | 
48 | } // namespace optimizer
49 | } // namespace translator
50 | } // namespace physis
51 | 
52 | 


--------------------------------------------------------------------------------
/translator/optimizer/primitive_optimization.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/optimization_passes.h"
 4 | #include "translator/optimizer/optimization_common.h"
 5 | #include "translator/rose_util.h"
 6 | #include "translator/builder_interface.h"
 7 | #include "translator/translation_util.h"
 8 | 
 9 | namespace si = SageInterface;
10 | namespace sb = SageBuilder;
11 | 
12 | namespace physis {
13 | namespace translator {
14 | namespace optimizer {
15 | namespace pass {
16 | 
17 | void primitive_optimization(
18 |     SgProject *proj,
19 |     physis::translator::TranslationContext *tx,
20 |     physis::translator::BuilderInterface *builder) {
21 |   pre_process(proj, tx, __FUNCTION__);
22 |   
23 |   vector<SgForStatement*> target_loops = FindInnermostLoops(proj);
24 |   FOREACH (it, target_loops.begin(), target_loops.end()) {
25 |     SgForStatement *loop = *it;
26 |     EliminateDeadCode(loop);
27 |   }
28 |   
29 |   post_process(proj, tx, __FUNCTION__);  
30 | }
31 | 
32 | } // namespace pass
33 | } // namespace optimizer
34 | } // namespace translator
35 | } // namespace physis
36 | 
37 | 


--------------------------------------------------------------------------------
/translator/optimizer/reference_optimizer.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/optimizer/reference_optimizer.h"
 4 | #include "translator/optimizer/optimization_passes.h"
 5 | 
 6 | namespace physis {
 7 | namespace translator {
 8 | namespace optimizer {
 9 | 
10 | void ReferenceOptimizer::DoStage1() {
11 | }
12 | 
13 | void ReferenceOptimizer::DoStage2() {
14 |   if (config_->LookupFlag("OPT_KERNEL_INLINING")) {
15 |     pass::kernel_inlining(proj_, tx_, builder_);
16 |   }
17 |   if (config_->LookupFlag("OPT_LOOP_PEELING")) {
18 |     pass::loop_peeling(proj_, tx_, builder_);
19 |   }
20 |   // Unconditional get should be placed before register blocking
21 |   if (config_->LookupFlag("OPT_UNCONDITIONAL_GET")) {
22 |     pass::unconditional_get(proj_, tx_, builder_);
23 |   }
24 |   if (config_->LookupFlag("OPT_REGISTER_BLOCKING")) {
25 |     pass::register_blocking(proj_, tx_, builder_);
26 |   }
27 |   if (config_->LookupFlag("OPT_OFFSET_CSE")) {
28 |     pass::offset_cse(proj_, tx_, builder_);
29 |   }
30 |   if (config_->LookupFlag("OPT_OFFSET_SPATIAL_CSE")) {
31 |     pass::offset_spatial_cse(proj_, tx_, builder_);
32 |   }
33 |   if (config_->LookupFlag("OPT_LOOP_OPT")) {
34 |     pass::loop_opt(proj_, tx_, builder_);
35 |     pass::primitive_optimization(proj_, tx_, builder_);
36 |   }
37 | }
38 | 
39 | } // namespace optimizer
40 | } // namespace translator
41 | } // namespace physis
42 | 
43 | 


--------------------------------------------------------------------------------
/translator/optimizer/reference_optimizer.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_OPTIMIZER_H_
 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_OPTIMIZER_H_
 5 | 
 6 | #include "translator/optimizer/optimizer.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | namespace optimizer {
11 | 
12 | class ReferenceOptimizer: public Optimizer {
13 |  public:
14 |   ReferenceOptimizer(SgProject *proj,
15 |                      physis::translator::TranslationContext *tx,
16 |                      physis::translator::BuilderInterface *builder,
17 |                      physis::translator::Configuration *config)
18 |       : Optimizer(proj, tx, builder, config) {}
19 |   virtual ~ReferenceOptimizer() {}
20 |  protected:
21 |   virtual void DoStage1();
22 |   virtual void DoStage2();
23 | };
24 | 
25 | } // namespace optimizer
26 | } // namespace translator
27 | } // namespace physis
28 | 
29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_OPTIMIZER_H_ */
30 | 


--------------------------------------------------------------------------------
/translator/physis_exception.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_PHYSIS_EXCEPTION_H_
 4 | #define PHYSIS_TRANSLATOR_PHYSIS_EXCEPTION_H_
 5 | 
 6 | #include <exception>
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | 
11 | class PhysisException
12 |     :public std::exception {
13 |   string msg;
14 |  public:
15 |   explicit PhysisException(const string &msg) throw(): msg(msg) {}
16 |   virtual ~PhysisException() throw() {}
17 |   virtual const char* what() const throw() {
18 |     return msg.c_str();
19 |   }
20 | };
21 | 
22 | } // namespace translator
23 | } // namespace physis
24 | 
25 | #endif /* PHYSIS_TRANSLATOR_PHYSIS_EXCEPTION_H_ */
26 | 


--------------------------------------------------------------------------------
/translator/physisc-cuda-hm.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --cuda-host-memory -I${CMAKE_INSTALL_PREFIX}/include $*
4 | 


--------------------------------------------------------------------------------
/translator/physisc-cuda.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --cuda -I${CMAKE_INSTALL_PREFIX}/include $*


--------------------------------------------------------------------------------
/translator/physisc-mpi-cuda.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi-cuda -I${CMAKE_INSTALL_PREFIX}/include $*


--------------------------------------------------------------------------------
/translator/physisc-mpi-opencl.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi-opencl \
4 |     -I${CMAKE_INSTALL_PREFIX}/include \
5 |     $*
6 | 


--------------------------------------------------------------------------------
/translator/physisc-mpi-openmp.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi-openmp -I${CMAKE_INSTALL_PREFIX}/include $*
4 | 


--------------------------------------------------------------------------------
/translator/physisc-mpi.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi -I${CMAKE_INSTALL_PREFIX}/include $*


--------------------------------------------------------------------------------
/translator/physisc-mpi2.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi2 -I${CMAKE_INSTALL_PREFIX}/include $*


--------------------------------------------------------------------------------
/translator/physisc-opencl.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --opencl -DPHYSIS_OPENCL_HEADER_DIR=\"${CMAKE_INSTALL_PREFIX}/include\" -I${CMAKE_INSTALL_PREFIX}/include $@
4 | 


--------------------------------------------------------------------------------
/translator/physisc-ref.cmake:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --ref -I${CMAKE_INSTALL_PREFIX}/include $*


--------------------------------------------------------------------------------
/translator/reduce.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | // Copyright 2011, Tokyo Institute of Technology.
 4 | // All rights reserved.
 5 | //
 6 | // This file is distributed under the license described in
 7 | // LICENSE.txt.
 8 | //
 9 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp)
10 | 
11 | #ifndef PHYSIS_TRANSLATOR_REDUCE_H_
12 | #define PHYSIS_TRANSLATOR_REDUCE_H_
13 | 
14 | #include "translator/translator_common.h"
15 | #include "translator/grid.h"
16 | #include "physis/physis_util.h"
17 | 
18 | #define REDUCE_NAME ("PSReduce")
19 | 
20 | namespace physis {
21 | namespace translator {
22 | 
23 | class Reduce: public AstAttribute {
24 |  public:
25 |   enum KIND {GRID, KERNEL};  
26 |   Reduce(SgFunctionCallExp *fc);
27 |   virtual ~Reduce();
28 |   static const std::string name;
29 |   Reduce *copy();
30 |   SgFunctionCallExp *reduce_call() const { return reduce_call_; };
31 |   bool IsGrid() const;
32 |   bool IsKernel() const;
33 |   //! Returns the variable referencing the grid to be reduced.
34 |   SgVarRefExp *GetGrid() const;
35 |   //! Returns true if a call is to the reduce intrinsic.
36 |   /*!
37 |     \param call A function call.
38 |     \return True if the call is to the reduce intrinsic.
39 |    */
40 |   static bool IsReduce(SgFunctionCallExp *call);
41 |  protected:
42 |   SgFunctionCallExp *reduce_call_;
43 |   KIND kind_;
44 | };
45 | 
46 | 
47 | } // namespace translator
48 | } // namespace physis
49 | 
50 | #endif /* PHYSIS_TRANSLATOR_REDUCE_H_ */
51 | 


--------------------------------------------------------------------------------
/translator/rose_ast_attribute.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/rose_ast_attribute.h"
 4 | #include "translator/rose_util.h"
 5 | #include "translator/stencil_range.h"
 6 | 
 7 | namespace physis {
 8 | namespace translator {
 9 | 
10 | const std::string GridCallAttribute::name = "GridCall";
11 | 
12 | GridCallAttribute::GridCallAttribute(SgInitializedName *grid_var,
13 |                                      KIND k):
14 |     grid_var_(grid_var), kind_(k) {
15 | }
16 | 
17 | GridCallAttribute::~GridCallAttribute() {}
18 | 
19 | AstAttribute *GridCallAttribute::copy() {
20 |   return new GridCallAttribute(grid_var_, kind_);
21 | }
22 | 
23 | bool GridCallAttribute::IsGet() {
24 |   return kind_ == GET;
25 | }
26 | 
27 | bool GridCallAttribute::IsGetPeriodic() {
28 |   return kind_ == GET_PERIODIC;
29 | }
30 | 
31 | bool GridCallAttribute::IsEmit() {
32 |   return kind_ == EMIT;
33 | }
34 | 
35 | void CopyAllAttributes(SgNode *dst, SgNode *src) {
36 |   // ROSE does not seem to have API for locating all attached
37 |   // attributes or copy them all. So, as an ad-hoc work around, list
38 |   // all potentially attahced attributes here to get them copied to
39 |   // the destination node.
40 |   if (rose_util::GetASTAttribute<StencilIndexVarAttribute>(src)) {
41 |     rose_util::CopyASTAttribute<StencilIndexVarAttribute>(
42 |         dst, src, false);
43 |     LOG_DEBUG() << "StencilIndexVarAttribute found at: "
44 |                 << src->unparseToString() << "\n";
45 |   }
46 | }
47 | 
48 | } // namespace translator
49 | } // namespace physis
50 | 


--------------------------------------------------------------------------------
/translator/rose_ast_attribute.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_ROSE_AST_ATTRIBUTE_H_
 4 | #define PHYSIS_TRANSLATOR_ROSE_AST_ATTRIBUTE_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | 
11 | class GridCallAttribute: public AstAttribute {
12 |  public:
13 |   enum KIND {GET, GET_PERIODIC, EMIT};  
14 |   GridCallAttribute(SgInitializedName *grid_var,
15 |                     KIND k);
16 |   virtual ~GridCallAttribute();
17 |   static const std::string name;
18 |   AstAttribute *copy();
19 |   SgInitializedName *grid_var() { return grid_var_; };
20 |   //! Returns true if the node is get.  
21 |   bool IsGet();
22 |   //! Returns true if the node is get_periodic.
23 |   bool IsGetPeriodic();  
24 |   bool IsEmit();  
25 |  protected:
26 |   SgInitializedName *grid_var_;
27 |   KIND kind_;
28 | };
29 | 
30 | void CopyAllAttributes(SgNode *dst, SgNode *src);
31 | 
32 | } // namespace translator
33 | } // namespace physis
34 | 
35 | #endif /* PHYSIS_TRANSLATOR_ROSE_AST_ATTRIBUTE_H_ */
36 | 


--------------------------------------------------------------------------------
/translator/rose_fortran.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_ROSE_FORTRAN_H_
 4 | #define PHYSIS_TRANSLATOR_ROSE_FORTRAN_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "physis/internal_common.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | namespace rose_fortran {
12 | 
13 | SgDerivedTypeStatement *BuildDerivedTypeStatementAndDefinition(
14 |     std::string name, SgScopeStatement *scope);
15 | 
16 | SgFortranDo *BuildFortranDo(SgExpression *initialization,
17 |                             SgExpression *bound,
18 |                             SgExpression *increment,
19 |                             SgBasicBlock *body);
20 | 
21 | SgAllocateStatement *BuildAllocateStatement();
22 | 
23 | 
24 | }  // namespace rose_fortran
25 | }  // namespace translator
26 | }  // namespace physis
27 | 
28 | #endif /* PHYSIS_TRANSLATOR_ROSE_FORTRAN_H__ */
29 | 


--------------------------------------------------------------------------------
/translator/run.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_RUN_H_
 4 | #define PHYSIS_TRANSLATOR_RUN_H_
 5 | 
 6 | #include <vector>
 7 | 
 8 | #include "translator/translator_common.h"
 9 | #include "physis/physis_util.h"
10 | #include "translator/map.h"
11 | 
12 | namespace physis {
13 | namespace translator {
14 | 
15 | class TranslationContext;
16 | 
17 | class Run {
18 |   SgFunctionCallExp *call;
19 |   SgExpression *count_;
20 |   typedef std::vector<std::pair<SgExpression*, StencilMap*> >
21 |   StencilMapArgVector;
22 |   StencilMapArgVector stencils_;
23 |  public:
24 |   Run(SgFunctionCallExp *call, TranslationContext *tx);
25 |   virtual ~Run() {}
26 | 
27 |   string GetName() const {
28 |     return "__" + string(PS_STENCIL_RUN_NAME) + "_" + toString(id_);
29 |   }
30 | 
31 |   const StencilMapArgVector &stencils() const { return stencils_; }
32 |   bool HasCount() const;
33 |   SgExpression *BuildCount() const;
34 | 
35 |   static bool isRun(SgFunctionCallExp *call);
36 |   static SgExpression *findCountArg(SgFunctionCallExp *call);
37 | #ifdef UNUSED_CODE
38 |   virtual bool IsRead(Grid *g, TranslationContext *tx);
39 |   virtual bool IsReadAny(GridSet *gs, TranslationContext *tx);
40 |   virtual bool IsModified(Grid *g, TranslationContext *tx);
41 |   virtual bool IsModifiedAny(GridSet *gs, TranslationContext *tx);
42 | #endif
43 |   
44 |   int id() const { return id_; }
45 |   
46 |  protected:
47 |   int id_;
48 |   static Counter c;
49 | };
50 | 
51 | } // namespace translator
52 | } // namespace physis
53 | 
54 | #endif /* RUN_H_ */
55 | 


--------------------------------------------------------------------------------
/translator/stencil_analysis.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_STENCIL_ANALYSIS_H_
 4 | #define PHYSIS_TRANSLATOR_STENCIL_ANALYSIS_H_
 5 | 
 6 | #include "translator/translator_common.h"
 7 | #include "translator/map.h"
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | 
12 | bool AnalyzeStencilIndex(SgExpression *arg, StencilIndex &idx,
13 |                          SgFunctionDeclaration *kernel);
14 | void AnalyzeStencilRange(StencilMap &sm, TranslationContext &tx);
15 | 
16 | //void AnalyzeEmit(SgFunctionDeclaration *func);
17 | 
18 | void AnalyzeGet(SgNode *top_level_node,
19 |                 TranslationContext &tx);
20 | void AnalyzeEmit(SgNode *top_level_node,
21 |                  TranslationContext &tx);
22 | 
23 | /*!
24 |   
25 |   \param get
26 |   \param indices
27 |   \param parent
28 |   \return True upon success; fasle otherwise.
29 | */
30 | bool AnalyzeGetArrayMember(SgDotExp *get, SgExpressionVector &indices,
31 |                            SgExpression *&parent);
32 | 
33 | 
34 | } // namespace translator
35 | } // namespace physis
36 | 
37 | 
38 | #endif /* PHYSIS_TRANSLATOR_STENCIL_ANALYSIS_H_ */
39 | 


--------------------------------------------------------------------------------
/translator/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(ROSE)
 2 | if (NOT ROSE_FOUND)
 3 |   return()
 4 | endif()
 5 | 
 6 | include_directories(${ROSE_INCLUDE_DIR})
 7 | include_directories(${CMAKE_SOURCE_DIR}/tests/gmock)
 8 | link_directories(${CMAKE_BINARY_DIR}/tests/gmock)
 9 | 
10 | set (test_src
11 |   test_ast_processing.cc test_grid.cc
12 |   test_ast_traversal.cc)
13 | 
14 | add_custom_target(test-translator
15 |   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
16 | 
17 | foreach (i ${test_src})
18 |   get_filename_component(exe ${i} NAME_WE)
19 |   add_executable(${exe} ${i} common.cc)
20 |   target_link_libraries(${exe}
21 |     translator
22 |     gmock
23 |     ${ROSE_LIBRARIES}
24 |     ${JAVA_JVM_LIBRARY}
25 |     ${Boost_LIBRARIES})
26 |   file(GLOB input_files 
27 |     "${CMAKE_CURRENT_SOURCE_DIR}/${exe}_input*.c")
28 |   foreach (input ${input_files})
29 |     get_filename_component(fname ${input} NAME)
30 |     add_custom_command(
31 |       OUTPUT ${fname}
32 |       COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${fname} ${CMAKE_CURRENT_BINARY_DIR}/${fname}
33 |       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${fname})
34 |     add_custom_target(${fname} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${fname}) 
35 |     add_dependencies(${exe} ${fname})
36 |   endforeach ()
37 |   #add_dependencies(test-translator ${exe})
38 |   add_custom_target(test-${exe}
39 |     COMMAND ${exe}
40 |     DEPENDS ${exe}
41 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
42 |   add_dependencies(test-translator test-${exe})
43 |   unset(input_files)
44 | endforeach ()
45 | 
46 | 


--------------------------------------------------------------------------------
/translator/test/common.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "translator/test/common.h"
 4 | 
 5 | #include <vector>
 6 | 
 7 | using namespace ::std;
 8 | 
 9 | namespace physis {
10 | namespace translator {
11 | namespace test {
12 | 
13 | SgProject *FrontEnd(const char *infile) {
14 |   vector<string> argv;
15 |   argv.push_back("test");
16 |   argv.push_back(infile);
17 |   SgProject* proj = frontend(argv);
18 |   AstTests::runAllTests(proj);
19 |   return proj;
20 | }
21 | 
22 | 
23 | } // namespace test
24 | } // namespace translator
25 | } // namespace physis
26 | 


--------------------------------------------------------------------------------
/translator/test/common.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_TEST_COMMON_H_
 4 | #define PHYSIS_TRANSLATOR_TEST_COMMON_H_
 5 | 
 6 | #include "rose.h"
 7 | 
 8 | namespace physis {
 9 | namespace translator {
10 | namespace test {
11 | 
12 | SgProject *FrontEnd(const char *infile);
13 | 
14 | } // namespace test
15 | } // namespace translator
16 | } // namespace physis
17 | 
18 | 
19 | #endif /* PHYSIS_TRANSLATOR_TEST_COMMON_H_ */
20 | 


--------------------------------------------------------------------------------
/translator/test/test_ast_processing_input_remove_redundant_variable_copy.c:
--------------------------------------------------------------------------------
 1 | 
 2 | int DoesNotRemoveNonRedundantVariableCopy() {
 3 |   int x = 10;
 4 |   return x;
 5 | }
 6 | 
 7 | int RemovesRedundantVariableCopy() {
 8 |   int x = 10;
 9 |   int z __attribute__((unused)), y = x; // make sure only y is
10 |                                         // removed and z is left as is
11 |   return y;
12 | }
13 | 
14 | int DoesNotRemoveVariableCopyWhenSrcReassigned() {
15 |   int x = 10;
16 |   int y = x;
17 |   x = 20;
18 |   return y;
19 | }
20 | 
21 | int DoesNotRemoveVariableCopyWhenDstReassigned() {
22 |   int x = 10;
23 |   int y = x;
24 |   y = 10;
25 |   return y;
26 | }
27 | 
28 | int foo(int x) {
29 |   return x;
30 | }
31 | 
32 | int RemoveRedundantVariableCopyWithFuncCall() {
33 |   int x = 10;
34 |   int y = x;
35 |   return foo(y);
36 | }
37 | 
38 | int RemoveWhenAssignedWithUnaryOp() {
39 |   int x = 10;
40 |   int y = -x;
41 |   return foo(y);
42 | }
43 | 


--------------------------------------------------------------------------------
/translator/test/test_ast_processing_input_remove_unused_func.c:
--------------------------------------------------------------------------------
 1 | 
 2 | static void ToRemove() {
 3 |   return;
 4 | }
 5 | 
 6 | void NotToRemove1() {
 7 |   return;
 8 | }
 9 | 
10 | static void NotToRemove2() {
11 |   return;
12 | }
13 | 
14 | void foo() {
15 |   NotToRemove2();
16 | }
17 | 
18 | static void PointerReferenced() {
19 |   return;
20 | }
21 | 
22 | void bar() {
23 |   void (*x)() =  PointerReferenced;
24 |   return;
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/translator/test/test_ast_traversal_input.c:
--------------------------------------------------------------------------------
1 | 
2 | void foo(int param) {
3 | }
4 | 


--------------------------------------------------------------------------------
/translator/test/test_grid_input.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/naoyam/physis/39ee5250a2d5baa545ca03e7c5c9aa9c81f1ab19/translator/test/test_grid_input.c


--------------------------------------------------------------------------------
/translator/tocheck:
--------------------------------------------------------------------------------
1 | kernel.cc: analyzeGridWrites -> tx.getGridEmitCalls
2 | translation_context.cc: getGridCalls, getGridEmits 
3 | grid.cc: isGridCall, typespecificcall
4 | 


--------------------------------------------------------------------------------
/translator/translation_util.h:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #ifndef PHYSIS_TRANSLATOR_TRANSLATOR_UTIL_H_
 4 | #define PHYSIS_TRANSLATOR_TRANSLATOR_UTIL_H_
 5 | 
 6 | #include "physis/physis_common.h"
 7 | #include "translator/translator_common.h"
 8 | #include "translator/grid.h"
 9 | namespace physis {
10 | namespace translator {
11 | 
12 | SgType *BuildInt32Type(SgScopeStatement *scope=NULL);
13 | SgType *BuildInt64Type(SgScopeStatement *scope=NULL);
14 | SgType *BuildIndexType(SgScopeStatement *scope=NULL);
15 | SgType *BuildIndexType2(SgScopeStatement *scope=NULL);
16 | 
17 | SgExpression *BuildIndexVal(PSIndex v);
18 | 
19 | SgType *BuildPSOffsetsType();
20 | SgVariableDeclaration *BuildPSOffsets(std::string name,
21 |                                       SgScopeStatement *scope,
22 |                                       __PSOffsets &v);
23 | 
24 | SgType *BuildPSGridRangeType();
25 | SgVariableDeclaration *BuildPSGridRange(std::string name,
26 |                                         SgScopeStatement *block,
27 |                                         __PSGridRange &v);
28 | 
29 | SgExpression *BuildFunctionCall(const std::string &name,
30 |                                 SgExpression *arg1);
31 | 
32 | SgType *GetBaseType(SgType *ty);
33 |   
34 | } // namespace translator
35 | } // namespace physis
36 | 
37 | 
38 | 
39 | #endif /* PHYSIS_TRANSLATOR_TRANSLATOR_UTIL_H_ */
40 | 


--------------------------------------------------------------------------------
/util/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | find_package(Lua51 REQUIRED)
2 | message(STATUS "Lua include dir: ${LUA_INCLUDE_DIR}")
3 | message(STATUS "Lua libraries dir: ${LUA_LIBRARIES}")
4 | message(STATUS "Lua version: ${LUA_VERSION_STRING}")
5 | include_directories(${LUA_INCLUDE_DIR})
6 | add_library(configuration configuration.cc lua_loader.cc)
7 | target_link_libraries(configuration ${LUA_LIBRARIES})


--------------------------------------------------------------------------------
/util/configuration.cc:
--------------------------------------------------------------------------------
 1 | // Licensed under the BSD license. See LICENSE.txt for more details.
 2 | 
 3 | #include "util/configuration.h"
 4 | 
 5 | #include <iostream>
 6 | 
 7 | #include "util/lua_loader.h"
 8 | #include "physis/physis_util.h"
 9 | 
10 | using std::string;
11 | 
12 | namespace physis {
13 | namespace util {
14 | 
15 | 
16 | Configuration::Configuration() {}
17 | 
18 | int Configuration::LoadFile(const std::string &path) {
19 |   LuaLoader ll;
20 |   LuaTable *tbl = ll.LoadFile(path);
21 |   tbl_.Merge(*tbl);
22 |   LOG_DEBUG() << "Current config: " << *this << "\n";
23 |   return 0;
24 | }
25 | 
26 | std::ostream &Configuration::print(std::ostream &os) const {
27 |   StringJoin sj;
28 |   FOREACH (it, key_desc_map_.begin(), key_desc_map_.end()) {
29 |     const KeyDesc &key = it->second;
30 |     if (tbl_.HasKey(key)) {
31 |       tbl_.Find(key)->second->print(sj << key << ": ");
32 |     }
33 |   }
34 |   return os << "{" << sj.str() << "}";
35 | }
36 | 
37 | } // namespace util
38 | } // namespace physis
39 | 


--------------------------------------------------------------------------------
/util/log4cpp-test.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include "log4cpp.h"
 3 | #include <iostream>
 4 | using namespace std;
 5 | 
 6 | struct foo
 7 | {
 8 | 	ostream& print(std::ostream& os) const {
 9 |         os << "foo";
10 |         return os;
11 |     }
12 | };
13 | 
14 | std::ostream &operator<<(std::ostream &os, const foo&x) 
15 | {
16 |     return x.print(os);
17 | }
18 |     
19 | 
20 | int main(int argc, char *argv[])
21 | {
22 |     cout << "Hello, world!" << endl;
23 | 
24 |     LOG_DEBUG("test");
25 |     LOG_DEBUG(1.2);
26 |     LOG_DEBUG(string("string"));
27 |     foo x;
28 |     LOG_DEBUG(x);
29 | 
30 |     LOG_ERROR(x);
31 |     LOG_WARNING("abc");
32 |     TRACE_START;
33 |     
34 |     return EXIT_SUCCESS;
35 | }
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/util/log4cpp.cpp:
--------------------------------------------------------------------------------
1 | #include "log4cpp.h"
2 | #include <iostream>
3 | 
4 | using namespace log4cpp;
5 | 
6 | const logger log4cpp::cerr_logger(&std::cerr);
7 | 


--------------------------------------------------------------------------------