├── .gitignore ├── CMakeLists.txt ├── Doxyfile.cmake ├── LICENSE.txt ├── README.md ├── TODO ├── cmake └── modules │ ├── FindOpenCL.cmake │ └── FindROSE.cmake ├── common ├── CMakeLists.txt └── config.h.cmake ├── config └── sample1.lua ├── docs ├── compilation.md ├── design.org ├── developer-guide.rst ├── install.md ├── programming.md ├── rose.md └── unfiled_notes.rst ├── examples ├── CMakeLists.txt ├── copy │ └── copy3d.c ├── diffusion-benchmark │ ├── CMakeLists.txt │ ├── Makefile.cmake │ ├── README │ ├── autotune.conf │ ├── baseline.cc │ ├── baseline.h │ ├── diffusion3d.cc │ ├── diffusion3d.h │ ├── diffusion3d.mic.c │ ├── diffusion3d_cuda.cu │ ├── diffusion3d_cuda.h │ ├── diffusion3d_cuda_shared.cu │ ├── diffusion3d_cuda_temporal_blocking.cu │ ├── diffusion3d_cuda_temporal_blocking.h │ ├── diffusion3d_mic.cc │ ├── diffusion3d_mic.h │ ├── diffusion3d_openmp.cc │ ├── diffusion3d_openmp.h │ ├── diffusion3d_openmp_temporal_blocking.cc │ ├── diffusion3d_openmp_temporal_blocking.h │ ├── diffusion3d_physis.c │ ├── diffusion3d_physis.h │ ├── main.cc │ ├── opt.conf │ └── stopwatch.h ├── diffusion-fortran │ ├── README │ ├── diffusion3d_fortran.F90 │ └── diffusion3d_original.c ├── himeno │ ├── CMakeLists.txt │ ├── Makefile.cmake │ ├── autotune.conf │ ├── himenobmtxpa_original.c │ ├── himenobmtxpa_physis.c │ ├── opt.conf │ └── physis.conf ├── test_double_buffering.c ├── test_global_variable.c ├── test_periodic_boundary.c ├── test_set.c └── test_staggered_grid.c ├── include └── physis │ ├── .gitignore │ ├── CMakeLists.txt │ ├── config.h.cmake │ ├── fortran │ ├── physis.F90 │ └── physis.h │ ├── internal_common.h │ ├── math.h │ ├── physis.F90 │ ├── physis.h │ ├── physis_common.h │ ├── physis_cuda.h │ ├── physis_cuda_hm.h │ ├── physis_mpi.h │ ├── physis_mpi_cuda.h │ ├── physis_mpi_opencl.h │ ├── physis_mpi_opencl_device.h │ ├── physis_mpi_openmp.h │ ├── physis_opencl.h │ ├── physis_opencl_kernel.h │ ├── physis_ref.h │ ├── physis_user.F90 │ ├── physis_user.h │ ├── physis_util.h │ ├── reduce.h │ ├── runtime.h │ ├── stopwatch.h │ └── types.h ├── misc ├── add_source_header.py ├── cpplint.py ├── google-c-style.el ├── opencl │ ├── test-runtime-05-orig.c │ ├── test-runtime-05-orig.cuda.cu │ ├── test-runtime-05-orig.ref.c │ └── test-runtime-05.c ├── rose-build.sh ├── source_header.txt └── valgrind-suppressions.supp ├── runtime ├── CMakeLists.txt ├── buffer.cc ├── buffer.h ├── buffer_cuda.cu ├── buffer_cuda.h ├── buffer_mpi_openmp.cc ├── buffer_mpi_openmp.h ├── buffer_mpi_openmp_numa.cc ├── buffer_opencl.cc ├── buffer_opencl.h ├── common.cc ├── cub-1.3.2 │ ├── .settings │ │ ├── org.eclipse.cdt.codan.core.prefs │ │ ├── org.eclipse.cdt.core.prefs │ │ ├── org.eclipse.cdt.ui.prefs │ │ └── org.eclipse.core.runtime.prefs │ ├── LICENSE.TXT │ ├── README.md │ └── cub │ │ ├── block │ │ ├── block_discontinuity.cuh │ │ ├── block_exchange.cuh │ │ ├── block_histogram.cuh │ │ ├── block_load.cuh │ │ ├── block_radix_rank.cuh │ │ ├── block_radix_sort.cuh │ │ ├── block_raking_layout.cuh │ │ ├── block_reduce.cuh │ │ ├── block_scan.cuh │ │ ├── block_shift.cuh │ │ ├── block_store.cuh │ │ └── specializations │ │ │ ├── block_histogram_atomic.cuh │ │ │ ├── block_histogram_sort.cuh │ │ │ ├── block_reduce_raking.cuh │ │ │ ├── block_reduce_raking_commutative_only.cuh │ │ │ ├── block_reduce_warp_reductions.cuh │ │ │ ├── block_scan_raking.cuh │ │ │ └── block_scan_warp_scans.cuh │ │ ├── block_range │ │ ├── block_range_histo.cuh │ │ ├── block_range_radix_sort_downsweep.cuh │ │ ├── block_range_radix_sort_upsweep.cuh │ │ ├── block_range_reduce.cuh │ │ ├── block_range_reduce_by_key.cuh │ │ ├── block_range_scan.cuh │ │ ├── block_range_select.cuh │ │ ├── block_scan_prefix_operators.cuh │ │ └── specializations │ │ │ ├── block_range_histo_gatomic.cuh │ │ │ ├── block_range_histo_satomic.cuh │ │ │ └── block_range_histo_sort.cuh │ │ ├── cub.cuh │ │ ├── device │ │ ├── device_histogram.cuh │ │ ├── device_partition.cuh │ │ ├── device_radix_sort.cuh │ │ ├── device_reduce.cuh │ │ ├── device_scan.cuh │ │ ├── device_select.cuh │ │ └── dispatch │ │ │ ├── device_histogram_dispatch.cuh │ │ │ ├── device_radix_sort_dispatch.cuh │ │ │ ├── device_reduce_by_key_dispatch.cuh │ │ │ ├── device_reduce_dispatch.cuh │ │ │ ├── device_scan_dispatch.cuh │ │ │ └── device_select_dispatch.cuh │ │ ├── grid │ │ ├── grid_barrier.cuh │ │ ├── grid_even_share.cuh │ │ ├── grid_mapping.cuh │ │ └── grid_queue.cuh │ │ ├── host │ │ └── spinlock.cuh │ │ ├── iterator │ │ ├── arg_index_input_iterator.cuh │ │ ├── cache_modified_input_iterator.cuh │ │ ├── cache_modified_output_iterator.cuh │ │ ├── constant_input_iterator.cuh │ │ ├── counting_input_iterator.cuh │ │ ├── tex_obj_input_iterator.cuh │ │ ├── tex_ref_input_iterator.cuh │ │ └── transform_input_iterator.cuh │ │ ├── thread │ │ ├── thread_load.cuh │ │ ├── thread_operators.cuh │ │ ├── thread_reduce.cuh │ │ ├── thread_scan.cuh │ │ └── thread_store.cuh │ │ ├── util_allocator.cuh │ │ ├── util_arch.cuh │ │ ├── util_debug.cuh │ │ ├── util_device.cuh │ │ ├── util_macro.cuh │ │ ├── util_namespace.cuh │ │ ├── util_ptx.cuh │ │ ├── util_type.cuh │ │ └── warp │ │ ├── specializations │ │ ├── warp_reduce_shfl.cuh │ │ ├── warp_reduce_smem.cuh │ │ ├── warp_scan_shfl.cuh │ │ └── warp_scan_smem.cuh │ │ ├── warp_reduce.cuh │ │ └── warp_scan.cuh ├── cuda_util.h ├── grid.cc ├── grid.h ├── grid_mpi.cc ├── grid_mpi.h ├── grid_mpi_cuda.cc ├── grid_mpi_cuda.h ├── grid_mpi_cuda_debug_util.h ├── grid_mpi_cuda_exp.cc ├── grid_mpi_cuda_exp.h ├── grid_mpi_debug_util.h ├── grid_mpi_opencl.cc ├── grid_mpi_opencl.h ├── grid_mpi_openmp.cc ├── grid_mpi_openmp.h ├── grid_mpi_openmp_misc.cc ├── grid_space_mpi.h ├── grid_space_mpi_cuda.h ├── grid_util.cc ├── grid_util.h ├── grid_util_mpi_openmp.cc ├── grid_util_mpi_openmp.h ├── ipc.h ├── ipc_mpi.cc ├── ipc_mpi.h ├── libphysis_rt_cuda.cc ├── libphysis_rt_cuda_hm.cc ├── libphysis_rt_mpi.cc ├── libphysis_rt_mpi_cuda.cc ├── libphysis_rt_mpi_opencl.cc ├── libphysis_rt_mpi_opencl_extra.cc ├── libphysis_rt_mpi_openmp.cc ├── libphysis_rt_mpi_openmp_numa.cc ├── libphysis_rt_opencl.cc ├── libphysis_rt_ref.cc ├── mpi_opencl_runtime.h ├── mpi_openmp_runtime.h ├── mpi_runtime_common.cc ├── mpi_runtime_common.h ├── mpi_util.h ├── mpi_wrapper.cc ├── mpi_wrapper.h ├── opencl_gridcp.cc ├── opencl_gridinit.cc ├── opencl_kernelinit.cc ├── opencl_misc.cc ├── opencl_psinit.cc ├── opencl_runtime.h ├── proc.cc ├── proc.h ├── reduce.h ├── reduce_cuda.cu ├── reduce_cuda.h ├── reduce_grid_mpi_cuda_exp.cu ├── reduce_grid_mpi_cuda_exp.h ├── reduce_mpi_cuda.cu ├── rpc.h ├── rpc_cuda.cc ├── rpc_cuda.h ├── rpc_mpi.cc ├── rpc_mpi.h ├── rpc_mpi_cuda.cc ├── rpc_mpi_cuda.h ├── rpc_mpi_opencl.cc ├── rpc_mpi_opencl.h ├── rpc_mpi_openmp.cc ├── rpc_mpi_openmp.h ├── rpc_opencl.h ├── rpc_opencl_common.h ├── rpc_opencl_mpi.cc ├── rpc_opencl_mpi.h ├── runtime.h ├── runtime_common.cc ├── runtime_common.h ├── runtime_common_cuda.h ├── runtime_cuda.h ├── runtime_cuda_hm.cc ├── runtime_cuda_hm.h ├── runtime_mpi.cc ├── runtime_mpi.h ├── runtime_mpi_cuda.cc ├── runtime_mpi_cuda.h ├── runtime_ref.cc ├── runtime_ref.h ├── tests │ ├── CMakeLists.txt │ ├── test_buffer.cc │ ├── test_buffer_cuda.cc │ ├── test_grid_mpi.cc │ ├── test_grid_mpi_cuda_exp.cc │ ├── test_grid_mpi_cuda_exp_utype.cc │ ├── test_mpi_cuda_runtime.cc │ ├── test_mpi_runtime_2d.cc │ ├── test_mpi_runtime_3d.cc │ └── test_physis_rt_mpi.c ├── timing.cc └── timing.h ├── tests ├── CMakeLists.txt ├── gmock │ ├── COPYING │ ├── README │ ├── gmock-gtest-all.cc │ ├── gmock │ │ └── gmock.h │ ├── gmock_main.cc │ └── gtest │ │ └── gtest.h └── system_tests │ ├── CMakeLists.txt │ ├── run_system_tests.sh.cmake │ └── test_cases │ ├── CMakeLists.txt │ ├── test_01.c │ ├── test_02.c │ ├── test_03.c │ ├── test_08.c │ ├── test_09.c │ ├── test_10.c │ ├── test_15.c │ ├── test_15.manual.cuda.cu │ ├── test_15.manual.ref.c │ ├── test_16.c │ ├── test_16.manual.cuda.cu │ ├── test_16.manual.ref.c │ ├── test_27-pt-periodic.c │ ├── test_27-pt-periodic.manual.cuda.cu │ ├── test_27-pt-periodic.manual.ref.c │ ├── test_27-pt-reduction.c │ ├── test_27-pt-reduction.manual.cuda.cu │ ├── test_27-pt-reduction.manual.ref.c │ ├── test_27-pt.c │ ├── test_27-pt.manual.cuda.cu │ ├── test_27-pt.manual.ref.c │ ├── test_3-pt-1d.c │ ├── test_3-pt-1d.manual.cuda.cu │ ├── test_3-pt-1d.manual.ref.c │ ├── test_3-pt-periodic.c │ ├── test_3-pt-periodic.manual.cuda.cu │ ├── test_3-pt-periodic.manual.ref.c │ ├── test_5-pt-2d.c │ ├── test_5-pt-2d.manual.cuda.cu │ ├── test_5-pt-2d.manual.ref.c │ ├── test_5-pt-periodic.c │ ├── test_5-pt-periodic.manual.cuda.cu │ ├── test_5-pt-periodic.manual.ref.c │ ├── test_7-pt-double-type.c │ ├── test_7-pt-double-type.manual.cuda.cu │ ├── test_7-pt-double-type.manual.ref.c │ ├── test_7-pt-int-type.c │ ├── test_7-pt-int-type.manual.ref.c │ ├── test_7-pt-multi-iterations.c │ ├── test_7-pt-multi-iterations.manual.cuda.cu │ ├── test_7-pt-multi-iterations.manual.ref.c │ ├── test_7-pt-neumann-cond.c │ ├── test_7-pt-neumann-cond.manual.cuda.cu │ ├── test_7-pt-neumann-cond.manual.ref.c │ ├── test_7-pt-periodic.c │ ├── test_7-pt-periodic.manual.cuda.cu │ ├── test_7-pt-periodic.manual.ref.c │ ├── test_7-pt-type-mix.c │ ├── test_7-pt-type-mix.manual.cuda.cu │ ├── test_7-pt-type-mix.manual.ref.c │ ├── test_7-pt.c │ ├── test_7-pt.manual.cuda.cu │ ├── test_7-pt.manual.ref.c │ ├── test_7-pt.module.c │ ├── test_7-pt.module_base.c │ ├── test_9-pt-2d.c │ ├── test_9-pt-2d.manual.cuda.cu │ ├── test_9-pt-2d.manual.ref.c │ ├── test_9-pt-periodic-reduction.c │ ├── test_9-pt-periodic-reduction.manual.ref.c │ ├── test_9-pt-reduction.c │ ├── test_9-pt-reduction.manual.ref.c │ ├── test_asymmetric-periodic.c │ ├── test_asymmetric-periodic.manual.cuda.cu │ ├── test_asymmetric-periodic.manual.ref.c │ ├── test_asymmetric.c │ ├── test_asymmetric.manual.cuda.cu │ ├── test_asymmetric.manual.ref.c │ ├── test_cplusplus.cc │ ├── test_mixed-dim.c │ ├── test_mixed-dim.manual.cuda.cu │ ├── test_mixed-dim.manual.ref.c │ ├── test_mixed-dim2.c │ ├── test_mixed-dim2.manual.cuda.cu │ ├── test_mixed-dim2.manual.ref.c │ ├── test_mixed-dim3.c │ ├── test_mixed-dim3.manual.cuda.cu │ ├── test_mixed-dim3.manual.ref.c │ ├── test_multi-kernels.c │ ├── test_param_name.c │ ├── test_redblack-periodic.c │ ├── test_redblack-periodic.manual.cuda.cu │ ├── test_redblack-periodic.manual.ref.c │ ├── test_redblack-separated.c │ ├── test_redblack.c │ ├── test_redblack.manual.cuda.cu │ ├── test_redblack.manual.ref.c │ ├── test_reduction-2d.c │ ├── test_reduction-3d-int.c │ ├── test_reduction-3d-long.c │ ├── test_reduction-3d-max.c │ ├── test_reduction-3d-min.c │ ├── test_reduction-3d-prod.c │ ├── test_reduction-3d-sum.c │ ├── test_reduction-3d-sum.manual.cuda.cu │ ├── test_reduction-3d-sum.manual.ref.c │ ├── test_stencil-hole.c │ ├── test_stencil-hole.manual.cuda.cu │ ├── test_stencil-hole.manual.ref.c │ ├── test_user-defined-type-7-pt-periodic-complex.c │ ├── test_user-defined-type-7-pt-periodic-complex.manual.cuda.cu │ ├── test_user-defined-type-7-pt-periodic-complex.manual.ref.c │ ├── test_user-defined-type-7-pt-periodic.c │ ├── test_user-defined-type-7-pt-periodic.manual.cuda.cu │ ├── test_user-defined-type-7-pt-periodic.manual.ref.c │ ├── test_user-defined-type-7-pt.c │ ├── test_user-defined-type-7-pt.manual.cuda.cu │ ├── test_user-defined-type-7-pt.manual.ref.c │ ├── test_user-defined-type-array-member-copy.c │ ├── test_user-defined-type-copyin-copyout-two-members.c │ ├── test_user-defined-type-copyin-copyout.c │ ├── test_user-defined-type-kernel-copy.c │ ├── test_user-defined-type-multi-dim-member.c │ ├── test_user-defined-type-multi-dim-member.manual.cuda.cu │ ├── test_user-defined-type-multi-dim-member.manual.ref.c │ ├── test_user-defined-type-multi-members.c │ ├── test_user-defined-type-multi-members.manual.cuda.cu │ ├── test_user-defined-type-multi-members.manual.ref.c │ ├── test_user-defined-type-transpose.c │ ├── test_user-defined-type1.c │ ├── test_user-defined-type1.manual.cuda.cu │ ├── test_user-defined-type1.manual.ref.c │ ├── test_user-defined-type2.c │ ├── test_user-defined-type3.c │ ├── test_user-defined-type3.manual.cuda.cu │ ├── test_user-defined-type3.manual.ref.c │ ├── test_user-defined-type5.c │ ├── test_user-defined-type5.manual.cuda.cu │ └── test_user-defined-type5.manual.ref.c ├── translator ├── CMakeLists.txt ├── alias_analysis.cc ├── alias_analysis.h ├── ast_processing.cc ├── ast_processing.h ├── ast_traversal.h ├── builder_interface.cc ├── builder_interface.h ├── config.h.cmake ├── configuration.cc ├── configuration.h ├── cuda_builder_interface.h ├── cuda_hm_runtime_builder.cc ├── cuda_hm_runtime_builder.h ├── cuda_hm_translator.cc ├── cuda_hm_translator.h ├── cuda_runtime_builder.cc ├── cuda_runtime_builder.h ├── cuda_translator.cc ├── cuda_translator.h ├── cuda_util.cc ├── cuda_util.h ├── def_analysis.cc ├── def_analysis.h ├── domain.cc ├── domain.h ├── fortran_output_fix.cc ├── fortran_output_fix.h ├── grid.cc ├── grid.h ├── kernel.cc ├── kernel.h ├── map.cc ├── map.h ├── mpi_builder_interface.h ├── mpi_cuda_optimizer.cc ├── mpi_cuda_optimizer.h ├── mpi_cuda_runtime_builder.cc ├── mpi_cuda_runtime_builder.h ├── mpi_cuda_translator.cc ├── mpi_cuda_translator.h ├── mpi_opencl_create_kernel.cc ├── mpi_opencl_create_kernel_body.cc ├── mpi_opencl_create_kernel_call.cc ├── mpi_opencl_create_kernel_misc.cc ├── mpi_opencl_create_kernel_multi.cc ├── mpi_opencl_edit_kernel.cc ├── mpi_opencl_optimizer.cc ├── mpi_opencl_optimizer.h ├── mpi_opencl_runtime_builder.cc ├── mpi_opencl_runtime_builder.h ├── mpi_opencl_stencilmap.cc ├── mpi_opencl_stencilrun.cc ├── mpi_opencl_translator.cc ├── mpi_opencl_translator.h ├── mpi_openmp_create_kernel.cc ├── mpi_openmp_init.cc ├── mpi_openmp_translator.cc ├── mpi_openmp_translator.h ├── mpi_runtime_builder.cc ├── mpi_runtime_builder.h ├── mpi_translator.cc ├── mpi_translator.h ├── opencl_translator.cc ├── opencl_translator.h ├── opencl_translator_arghack.cc ├── opencl_translator_consistency.cc ├── opencl_translator_create_kernel.cc ├── opencl_translator_edit_kernel.cc ├── opencl_translator_getemit.cc ├── opencl_translator_misc.cc ├── opencl_translator_stencilrun.cc ├── optimizer │ ├── cuda_optimizer.cc │ ├── cuda_optimizer.h │ ├── kernel_inlining.cc │ ├── loop_opt.cc │ ├── loop_peeling.cc │ ├── mpi_cuda_optimizer.cc │ ├── mpi_cuda_optimizer.h │ ├── mpi_optimizer.cc │ ├── mpi_optimizer.h │ ├── offset_cse.cc │ ├── offset_spatial_cse.cc │ ├── optimization_common.cc │ ├── optimization_common.h │ ├── optimization_passes.cc │ ├── optimization_passes.h │ ├── optimizer.cc │ ├── optimizer.h │ ├── primitive_optimization.cc │ ├── reference_optimizer.cc │ ├── reference_optimizer.h │ ├── register_blocking.cc │ └── unconditional_get.cc ├── physis_exception.h ├── physis_names.h ├── physisc-cuda-hm.cmake ├── physisc-cuda.cmake ├── physisc-mpi-cuda.cmake ├── physisc-mpi-opencl.cmake ├── physisc-mpi-openmp.cmake ├── physisc-mpi.cmake ├── physisc-mpi2.cmake ├── physisc-opencl.cmake ├── physisc-ref.cmake ├── physisc.cc ├── reduce.cc ├── reduce.h ├── reference_runtime_builder.cc ├── reference_runtime_builder.h ├── reference_translator.cc ├── reference_translator.h ├── rose_ast_attribute.cc ├── rose_ast_attribute.h ├── rose_fortran.cc ├── rose_fortran.h ├── rose_traversal.cc ├── rose_traversal.h ├── rose_util.cc ├── rose_util.h ├── run.cc ├── run.h ├── runtime_builder.cc ├── runtime_builder.h ├── stencil_analysis.cc ├── stencil_analysis.h ├── stencil_range.cc ├── stencil_range.h ├── test │ ├── CMakeLists.txt │ ├── common.cc │ ├── common.h │ ├── test_ast_processing.cc │ ├── test_ast_processing_input_remove_redundant_variable_copy.c │ ├── test_ast_processing_input_remove_unused_func.c │ ├── test_ast_traversal.cc │ ├── test_ast_traversal_input.c │ ├── test_grid.cc │ └── test_grid_input.c ├── tocheck ├── translation_context.cc ├── translation_context.h ├── translation_util.cc ├── translation_util.h ├── translator.cc ├── translator.h └── translator_common.h └── util ├── CMakeLists.txt ├── configuration.cc ├── configuration.h ├── log4cpp-test.cpp ├── log4cpp.cpp ├── log4cpp.h ├── lua_loader.cc └── lua_loader.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.mod 2 | *.rmod 3 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2012, Naoya Maruyama 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the 14 | distribution. 15 | * Neither the name of RIKEN AICS nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - check how emit is analyzed 2 | - annotate emit with attribute 3 | -------------------------------------------------------------------------------- /cmake/modules/FindOpenCL.cmake: -------------------------------------------------------------------------------- 1 | # OPENCL_INCLUDE_PATH 2 | # OPENCL_LIBRARIES 3 | # OPENCL_FOUND = true if ROSE is found 4 | 5 | find_package (CUDA) 6 | 7 | if (CUDA_FOUND) 8 | find_path (OPENCL_INCLUDE_DIR 9 | NAMES CL/cl.h 10 | PATHS ${CUDA_INCLUDE_DIRS} 11 | ) 12 | if (OPENCL_INCLUDE_DIR) 13 | find_library(OPENCL_LIBRARY 14 | NAMES OpenCL 15 | PATHS env LD_LIBRARY_PATH 16 | ) 17 | endif () 18 | endif() 19 | 20 | set (OPENCL_FOUND FALSE) 21 | if (OPENCL_INCLUDE_DIR AND OPENCL_LIBRARY) 22 | message (STATUS "OpenCL found") 23 | message (STATUS "OPENCL_INCLUDE_DIR=${OPENCL_INCLUDE_DIR}") 24 | message (STATUS "OPENCL_LIBRARY=${OPENCL_LIBRARY}") 25 | set (OPENCL_FOUND TRUE) 26 | else () 27 | message (STATUS "OpenCL not found") 28 | endif () 29 | 30 | MARK_AS_ADVANCED( 31 | OPENCL_INCLUDE_DIR 32 | OPENCL_LIBRARY 33 | OPENCL_FOUND 34 | ) 35 | -------------------------------------------------------------------------------- /cmake/modules/FindROSE.cmake: -------------------------------------------------------------------------------- 1 | # ROSE_INCLUDE_DIR 2 | # ROSE_LIBRARIES 3 | # ROSE_FOUND = true if ROSE is found 4 | 5 | FIND_PATH(ROSE_INCLUDE_DIR rose.h PATH_SUFFIXES rose NO_SYSTEM_ENVIRONMENT_PATH) 6 | FIND_LIBRARY(ROSE_LIBRARIES rose NO_SYSTEM_ENVIRONMENT_PATH) 7 | FIND_LIBRARY(ROSE_LIBRARIES rose) 8 | 9 | SET(ROSE_FOUND FALSE) 10 | IF(ROSE_INCLUDE_DIR AND ROSE_LIBRARIES) 11 | MESSAGE(STATUS "ROSE_INCLUDE_DIR=${ROSE_INCLUDE_DIR}") 12 | MESSAGE(STATUS "ROSE_LIBRARIES=${ROSE_LIBRARIES}") 13 | SET(ROSE_FOUND TRUE) 14 | string(REGEX MATCH "include/rose$" ROSE_EDG4X ${ROSE_INCLUDE_DIR}) 15 | if (ROSE_EDG4X) 16 | message(STATUS "Detected ROSE EDG4X") 17 | set(ROSE_EDG4X TRUE) 18 | else () 19 | message(STATUS "Detected ROSE EDG3") 20 | endif () 21 | ENDIF() 22 | 23 | MARK_AS_ADVANCED( 24 | ROSE_INCLUDE_DIR 25 | ROSE_LIBRARIES 26 | ROSE_FOUND 27 | ) 28 | -------------------------------------------------------------------------------- /common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h) -------------------------------------------------------------------------------- /common/config.h.cmake: -------------------------------------------------------------------------------- 1 | #ifndef PHYSIS_COMMON_CONFIG_H_ 2 | #define PHYSIS_COMMON_CONFIG_H_ 3 | 4 | #cmakedefine PS_WARNING 5 | #cmakedefine PS_DEBUG 6 | #cmakedefine PS_VERBOSE 7 | 8 | #cmakedefine AUTO_TUNING 9 | 10 | #endif /* PHYSIS_COMMON_CONFIG_H_ */ 11 | -------------------------------------------------------------------------------- /config/sample1.lua: -------------------------------------------------------------------------------- 1 | CUDA_BLOCK_SIZE = {64, 4, 1} 2 | MPI_OVERLAP = true 3 | MULTISTREAM_BOUNDARY = true 4 | -- TRACE_KERNEL = false 5 | -- CUDA_KERNEL_ERROR_CHECK = false 6 | -------------------------------------------------------------------------------- /docs/unfiled_notes.rst: -------------------------------------------------------------------------------- 1 | * Boost v1.45 (or older) will cause "dereferencing type-punned 2 | pointer" warning with runtime/reduce.h when gcc optimization is 3 | enabled. It was fixed as discussed here: 4 | https://svn.boost.org/trac/boost/ticket/4538. Version 1.47 seems to 5 | have that fix. 6 | * On Mac OS X, translator is not tested since building ROSE on Mac OS X 7 | is not well supported. 8 | * On Mac OS X, the Boost library installed by Homebrew may be built 9 | with g++, not the OS X default clang++. nvcc works only with the 10 | default compiler, so the Homebrew-built Boost and nvcc do not work 11 | together. 12 | * On Mac OS X, nvcc (at least v6.5) uses stdlibc++, not the default 13 | libc++. Linking by CMake uses c++ rather than nvcc, so -stdlib flag 14 | needs to be set. 15 | * MPICH fails to compile with OS X c++ with -stdlib=libstdc++ switch. 16 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(himeno) 2 | add_subdirectory(diffusion-benchmark) -------------------------------------------------------------------------------- /examples/diffusion-benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Makefile.cmake 2 | @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile @ONLY) 3 | 4 | install(FILES 5 | autotune.conf 6 | baseline.cc 7 | baseline.h 8 | CMakeLists.txt 9 | diffusion3d.cc 10 | diffusion3d_cuda.cu 11 | diffusion3d_cuda.h 12 | diffusion3d_cuda_shared.cu 13 | diffusion3d_cuda_temporal_blocking.cu 14 | diffusion3d_cuda_temporal_blocking.h 15 | diffusion3d.h 16 | diffusion3d.mic.c 17 | diffusion3d_mic.cc 18 | diffusion3d_mic.h 19 | diffusion3d_openmp.cc 20 | diffusion3d_openmp.h 21 | diffusion3d_openmp_temporal_blocking.cc 22 | diffusion3d_openmp_temporal_blocking.h 23 | diffusion3d_physis.c 24 | diffusion3d_physis.h 25 | main.cc 26 | opt.conf 27 | README 28 | stopwatch.h 29 | @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile 30 | DESTINATION examples/diffusion-benchmark) 31 | 32 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/README: -------------------------------------------------------------------------------- 1 | ==================================== 2 | Diffusion 7-point Stencil Benchmarks 3 | ==================================== 4 | 5 | Benchmarks 6 | ---------- 7 | 8 | - baseline 9 | - A baseline sequential implementation 10 | - openmp 11 | - OpenMP-parallel version of baseline 12 | - mic (not completed) 13 | - Intel MIC version 14 | - cuda 15 | - NVIDIA CUDA version 16 | - Several variants available (e.g., cuda_opt1) 17 | - opencl (not completed) 18 | - Portable OpenCL version 19 | 20 | Compilation 21 | ----------- 22 | 23 | Type `make'. By default, diffusion3d_baseline and diffusion3d_openmp 24 | will be built. Other variants can be built with `make 25 | variant-name'. For example, the CUDA variant can be built `make cuda' 26 | if the CUDA toolkit is available. 27 | 28 | 29 | Usage 30 | ----- 31 | 32 | Execute each benchmark as: 33 | 34 | benchmark_executable [--count N] [--size S] 35 | 36 | The options can be used to set benchmark configurations. For more 37 | information, see the help message by supplying --help option. 38 | 39 | 40 | Notes 41 | ----- 42 | - File diffusion3d.mic.c 43 | - deprecated 44 | - File diffusion3d_mic.cc 45 | - not tested 46 | - needs PCI timing. see diffusion3d.mic.c 47 | 48 | 49 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/autotune.conf: -------------------------------------------------------------------------------- 1 | CUDA_BLOCK_SIZE = {{32, 4, 1}, {32, 8, 1}, {64, 4, 1}, {64, 8, 1}, {128, 4, 1}, {128, 8, 1}} 2 | OPT_KERNEL_INLINING = {true, false} 3 | OPT_LOOP_PEELING = {true, false} 4 | OPT_REGISTER_BLOCKING = {true, false} 5 | OPT_UNCONDITIONAL_GET = {true, false} 6 | OPT_OFFSET_CSE = {true, false} 7 | OPT_OFFSET_SPATIAL_CSE = {true, false} 8 | OPT_OFFSET_COMP = {true, false} 9 | OPT_LOOP_OPT = {true, false} 10 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/baseline.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARKS_DIFFUSION3D_BASELINE_H_ 2 | #define BENCHMARKS_DIFFUSION3D_BASELINE_H_ 3 | 4 | #include "diffusion3d.h" 5 | 6 | namespace diffusion3d { 7 | 8 | class Baseline: public Diffusion3D { 9 | protected: 10 | REAL *f1_, *f2_; 11 | public: 12 | Baseline(int nx, int ny, int nz): 13 | Diffusion3D(nx, ny, nz), f1_(NULL), f2_(NULL) {} 14 | virtual std::string GetName() const { 15 | return std::string("baseline"); 16 | } 17 | virtual void InitializeBenchmark(); 18 | virtual void FinalizeBenchmark(); 19 | virtual void RunKernel(int count); 20 | virtual REAL GetAccuracy(int count); 21 | virtual void Dump() const; 22 | }; 23 | 24 | } 25 | 26 | #endif /* DIFFUSION3D_DIFFUSION3D_H_ */ 27 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d.cc: -------------------------------------------------------------------------------- 1 | #include "diffusion3d.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using std::vector; 10 | using std::string; 11 | 12 | namespace diffusion3d { 13 | 14 | void Initialize(REAL *buff, const int nx, const int ny, const int nz, 15 | const REAL kx, const REAL ky, const REAL kz, 16 | const REAL dx, const REAL dy, const REAL dz, 17 | const REAL kappa, const REAL time) { 18 | REAL ax = exp(-kappa*time*(kx*kx)); 19 | REAL ay = exp(-kappa*time*(ky*ky)); 20 | REAL az = exp(-kappa*time*(kz*kz)); 21 | int jz; 22 | for (jz = 0; jz < nz; jz++) { 23 | int jy; 24 | for (jy = 0; jy < ny; jy++) { 25 | int jx; 26 | for (jx = 0; jx < nx; jx++) { 27 | int j = jz*nx*ny + jy*nx + jx; 28 | REAL x = dx*((REAL)(jx + 0.5)); 29 | REAL y = dy*((REAL)(jy + 0.5)); 30 | REAL z = dz*((REAL)(jz + 0.5)); 31 | REAL f0 = (REAL)0.125 32 | *(1.0 - ax*cos(kx*x)) 33 | *(1.0 - ay*cos(ky*y)) 34 | *(1.0 - az*cos(kz*z)); 35 | buff[j] = f0; 36 | } 37 | } 38 | } 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_cuda_temporal_blocking.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naoyam/physis/39ee5250a2d5baa545ca03e7c5c9aa9c81f1ab19/examples/diffusion-benchmark/diffusion3d_cuda_temporal_blocking.cu -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_cuda_temporal_blocking.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_CUDA_TEMPORAL_BLOCKING_H_ 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_CUDA_TEMPORAL_BLOCKING_H_ 3 | 4 | #include "diffusion3d.h" 5 | #include "baseline.h" 6 | #include "diffusion3d_cuda.h" 7 | 8 | #include 9 | 10 | namespace diffusion3d { 11 | 12 | class Diffusion3DCUDATemporalBlocking: public Diffusion3DCUDA { 13 | public: 14 | Diffusion3DCUDATemporalBlocking(int nx, int ny, int nz): 15 | Diffusion3DCUDA(nx, ny, nz) { 16 | block_x_ = 32; 17 | block_y_ = 16; 18 | } 19 | virtual std::string GetName() const { 20 | return std::string("cuda_temporal_blocking"); 21 | } 22 | //virtual void InitializeBenchmark(); 23 | virtual void RunKernel(int count); 24 | }; 25 | 26 | } 27 | 28 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_CUDA_TEMPORAL_BLOCKING_H_ */ 29 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_mic.cc: -------------------------------------------------------------------------------- 1 | #include "benchmarks/diffusion3d/diffusion3d_mic.h" 2 | 3 | namespace diffusion3d { 4 | 5 | void Diffusion3DMIC::InitializeBenchmark() { 6 | f1_ = (REAL*)_mm_malloc(sizeof(REAL) * nx_ * ny_ * nz_, 4096); 7 | assert(f1_); 8 | f2_ = (REAL*)_mm_malloc(sizeof(REAL) * nx_ * ny_ * nz_, 4096); 9 | assert(f2_); 10 | Initialize(f1_, nx_, ny_, nz_, 11 | kx_, ky_, kz_, dx_, dy_, dz_, 12 | kappa_, 0.0); 13 | } 14 | 15 | void Diffusion3DMIC::RunKernel(int count) { 16 | int i; 17 | #pragma offload target(mic) \ 18 | inout(f1_:length(nx_*ny_*nz_) align(2*1024*1024)) \ 19 | inout(f2_:length(nx_*ny_*nz_) align(2*1024*1024)) 20 | { 21 | for (i = 0; i < count; ++i) { 22 | int y, z; 23 | #pragma omp parallel for collapse(2) private(y, z) 24 | for (z = 0; z < nz_; z++) { 25 | for (y = 0; y < ny_; y++) { 26 | int x; 27 | #pragma ivdep 28 | for (x = 0; x < nx_; x++) { 29 | int c, w, e, n, s, b, t; 30 | c = x + y * nx_ + z * nx_ * ny_; 31 | w = (x == 0) ? c : c - 1; 32 | e = (x == nx_-1) ? c : c + 1; 33 | n = (y == 0) ? c : c - nx_; 34 | s = (y == ny_-1) ? c : c + nx_; 35 | b = (z == 0) ? c : c - nx_ * ny_; 36 | t = (z == nz_-1) ? c : c + nx_ * ny_; 37 | f2_[c] = cc_ * f1_[c] + cw_ * f1_[w] + ce_ * f1_[e] 38 | + cs_ * f1_[s] + cn_ * f1_[n] + cb_ * f1_[b] + ct_ * f1_[t]; 39 | } 40 | } 41 | } 42 | REAL *t = f1_; 43 | f1_ = f2_; 44 | f2_ = t; 45 | } 46 | } 47 | return; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_mic.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_MIC_H_ 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_MIC_H_ 3 | 4 | #include "diffusion3d.h" 5 | #include "baseline.h" 6 | 7 | namespace diffusion3d { 8 | 9 | class Diffusion3DMIC: public Baseline { 10 | public: 11 | Diffusion3DMIC(int nx, int ny, int nz): 12 | Baseline(nx, ny, nz) {} 13 | virtual std::string GetName() const { 14 | return std::string("mic"); 15 | } 16 | virtual void InitializeBenchmark(); 17 | virtual void RunKernel(int count); 18 | }; 19 | 20 | } 21 | 22 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ */ 23 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_openmp.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ 3 | 4 | #include "diffusion3d.h" 5 | #include "baseline.h" 6 | 7 | namespace diffusion3d { 8 | 9 | class Diffusion3DOpenMP: public Baseline { 10 | public: 11 | Diffusion3DOpenMP(int nx, int ny, int nz): 12 | Baseline(nx, ny, nz) {} 13 | virtual std::string GetName() const { 14 | return std::string("openmp"); 15 | } 16 | virtual void InitializeBenchmark(); 17 | virtual void RunKernel(int count); 18 | virtual void InitializeOMP( 19 | REAL *buff, const int nx, const int ny, const int nz, 20 | const REAL kx, const REAL ky, const REAL kz, 21 | const REAL dx, const REAL dy, const REAL dz, 22 | const REAL kappa, const REAL time); 23 | 24 | }; 25 | 26 | } 27 | 28 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ */ 29 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_openmp_temporal_blocking.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_TEMPORAL_BLOCKING_H_ 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_TEMPORAL_BLOCKING_H_ 3 | 4 | #include "diffusion3d_openmp.h" 5 | 6 | namespace diffusion3d { 7 | 8 | class Diffusion3DOpenMPTemporalBlocking: public Diffusion3DOpenMP { 9 | public: 10 | Diffusion3DOpenMPTemporalBlocking(int nx, int ny, int nz): 11 | Diffusion3DOpenMP(nx, ny, nz) {} 12 | virtual std::string GetName() const { 13 | return std::string("openmp_temporal_blocking"); 14 | } 15 | virtual void RunKernel(int count); 16 | }; 17 | 18 | } 19 | 20 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_OPENMP_H_ */ 21 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/diffusion3d_physis.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARKS_DIFFUSION3D_DIFFUSION3D_PHYSIS_H_ 2 | #define BENCHMARKS_DIFFUSION3D_DIFFUSION3D_PHYSIS_H_ 3 | 4 | #include "diffusion3d.h" 5 | #include "baseline.h" 6 | 7 | extern "C" { 8 | extern void initialize_physis(int argc, char **argv, 9 | int nx, int ny, int nz); 10 | extern void initialize_benchmark_physis(int nx, int ny, int nz); 11 | extern void finalize_benchmark_physis(); 12 | extern void run_kernel_physis(int count, REAL *f1_host, 13 | int nx, int ny, int nz, 14 | REAL ce, REAL cw, REAL cn, REAL cs, 15 | REAL ct, REAL cb, REAL cc); 16 | 17 | } 18 | 19 | namespace diffusion3d { 20 | 21 | class Diffusion3DPhysis: public Baseline { 22 | public: 23 | Diffusion3DPhysis(int nx, int ny, int nz, 24 | int argc, char **argv): 25 | Baseline(nx, ny, nz) { 26 | initialize_physis(argc, argv, nx, ny, nz); 27 | } 28 | virtual std::string GetName() const { 29 | return std::string("physis"); 30 | } 31 | virtual void InitializeBenchmark() { 32 | Baseline::InitializeBenchmark(); 33 | initialize_benchmark_physis(nx_, ny_, nz_); 34 | } 35 | virtual void FinalizeBenchmark() { 36 | finalize_benchmark_physis(); 37 | } 38 | virtual void RunKernel(int count) { 39 | run_kernel_physis(count, f1_, nx_, ny_, nz_, 40 | ce_, cw_, cn_, cs_, ct_, cb_, cc_); 41 | } 42 | 43 | }; 44 | 45 | } 46 | 47 | #endif /* BENCHMARKS_DIFFUSION3D_DIFFUSION3D_PHYSIS_H_ */ 48 | 49 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/opt.conf: -------------------------------------------------------------------------------- 1 | OPT_OFFSET_COMP = true 2 | OPT_LOOP_PEELING = true 3 | OPT_REGISTER_BLOCKING = true 4 | OPT_LOOP_OPT = true 5 | OPT_UNCONDITIONAL_GET = true 6 | -------------------------------------------------------------------------------- /examples/diffusion-benchmark/stopwatch.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011, Tokyo Institute of Technology. 2 | // All rights reserved. 3 | // 4 | // This file is distributed under the license described in 5 | // LICENSE.txt. 6 | // 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp) 8 | 9 | #ifndef BENCHMARKS_COMMON_STOPWATCH_H_ 10 | #define BENCHMARKS_COMMON_STOPWATCH_H_ 11 | 12 | #if defined(unix) || defined(__unix__) || defined(__unix) || \ 13 | defined(__APPLE__) 14 | #include 15 | #include 16 | 17 | typedef struct { 18 | struct timeval tv; 19 | } Stopwatch; 20 | 21 | #else 22 | #error "Unknown environment" 23 | #endif 24 | 25 | static inline void StopwatchQuery(Stopwatch *w) { 26 | gettimeofday(&(w->tv), NULL); 27 | return; 28 | } 29 | 30 | static inline float StopwatchDiff(const Stopwatch *begin, 31 | const Stopwatch *end) { 32 | return (end->tv.tv_sec - begin->tv.tv_sec) 33 | + (end->tv.tv_usec - begin->tv.tv_usec) * 1.0e-06; 34 | } 35 | 36 | static inline void StopwatchStart(Stopwatch *w) { 37 | StopwatchQuery(w); 38 | return; 39 | } 40 | 41 | static inline float StopwatchStop(Stopwatch *w) { 42 | Stopwatch now; 43 | StopwatchQuery(&now); 44 | return StopwatchDiff(w, &now); 45 | } 46 | 47 | #endif /* BENCHMARKS_COMMON_STOPWATCH_H_ */ 48 | -------------------------------------------------------------------------------- /examples/diffusion-fortran/README: -------------------------------------------------------------------------------- 1 | This is work in progress. 2 | -------------------------------------------------------------------------------- /examples/himeno/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Makefile.cmake 2 | @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile @ONLY) 3 | 4 | install(FILES 5 | himenobmtxpa_physis.c himenobmtxpa_original.c 6 | @CMAKE_CURRENT_BINARY_DIR@/tmp/Makefile 7 | physis.conf 8 | opt.conf 9 | autotune.conf 10 | DESTINATION examples/himeno) 11 | 12 | -------------------------------------------------------------------------------- /examples/himeno/autotune.conf: -------------------------------------------------------------------------------- 1 | CUDA_BLOCK_SIZE = {{32, 4, 1}, {32, 8, 1}, {64, 4, 1}, {64, 8, 1}, {128, 4, 1}, {128, 8, 1}} 2 | OPT_KERNEL_INLINING = {true, false} 3 | OPT_LOOP_PEELING = {true, false} 4 | OPT_REGISTER_BLOCKING = {true, false} 5 | OPT_UNCONDITIONAL_GET = {true, false} 6 | OPT_OFFSET_CSE = {true, false} 7 | OPT_OFFSET_SPATIAL_CSE = {true, false} 8 | OPT_OFFSET_COMP = {true, false} 9 | OPT_LOOP_OPT = {true, false} 10 | -------------------------------------------------------------------------------- /examples/himeno/opt.conf: -------------------------------------------------------------------------------- 1 | CUDA_BLOCK_SIZE = {64, 4, 1} 2 | OPT_OFFSET_COMP = true 3 | OPT_LOOP_OPT = true 4 | -------------------------------------------------------------------------------- /examples/himeno/physis.conf: -------------------------------------------------------------------------------- 1 | OPT_OFFSET_COMP = true 2 | OPT_LOOP_OPT = true -------------------------------------------------------------------------------- /examples/test_double_buffering.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "physis/physis.h" 3 | 4 | #define N 8 5 | 6 | void kernel1(const int x, const int y, const int z, 7 | PSGrid3DFloat g1, PSGrid3DFloat g2) { 8 | float v = PSGridGet(g1, x, y, z) * 2; 9 | PSGridEmit(g2, v); 10 | return; 11 | } 12 | 13 | void kernel2(const int x, const int y, const int z, 14 | PSGrid3DFloat g1, PSGrid3DFloat g2) { 15 | float v = PSGridGet(g1, x, y, z) * 2; 16 | PSGridEmit(g2, v); 17 | return; 18 | } 19 | 20 | int main(int argc, char *argv[]) { 21 | PSInit(&argc, &argv, 3, N, N, N); 22 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 23 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 24 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 25 | size_t nelms = N*N*N; 26 | 27 | #if 0 28 | float *indata = (float *)malloc(sizeof(float) * nelms); 29 | int i; 30 | for (i = 0; i < nelms; i++) { 31 | indata[i] = i; 32 | } 33 | float *outdata = (float *)malloc(sizeof(float) * nelms); 34 | 35 | PSGridCopyin(g, indata); 36 | #endif 37 | 38 | #if 1 39 | PSStencilRun(PSStencilMap(kernel1, d, g1, g2), 40 | PSStencilMap(kernel1, d, g2, g1)); 41 | #else 42 | PSStencilRun(PSStencilMap(kernel1, d, g1, g2), 43 | PSStencilMap(kernel2, d, g2, g1)); 44 | #endif 45 | 46 | #if 0 47 | PSGridCopyout(g, outdata); 48 | 49 | for (i = 0; i < nelms; i++) { 50 | if (indata[i] * 2 != outdata[i]) { 51 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 52 | i, indata[i], outdata[i]); 53 | } 54 | } 55 | 56 | PSGridFree(g); 57 | #endif 58 | 59 | PSFinalize(); 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /examples/test_global_variable.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "physis/physis.h" 3 | 4 | #define N 8 5 | 6 | PSGrid3DFloat g; 7 | 8 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) { 9 | float v = PSGridGet(g, x, y, z) * 2; 10 | PSGridEmit(g, v); 11 | return; 12 | } 13 | 14 | int main(int argc, char *argv[]) { 15 | PSInit(&argc, &argv, 3, N, N, N); 16 | g = PSGrid3DFloatNew(N, N, N); 17 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 18 | size_t nelms = N*N*N; 19 | 20 | float *indata = (float *)malloc(sizeof(float) * nelms); 21 | int i; 22 | for (i = 0; i < nelms; i++) { 23 | indata[i] = i; 24 | } 25 | float *outdata = (float *)malloc(sizeof(float) * nelms); 26 | 27 | PSGridCopyin(g, indata); 28 | 29 | PSStencilRun(PSStencilMap(kernel, d, g)); 30 | 31 | PSGridCopyout(g, outdata); 32 | 33 | for (i = 0; i < nelms; i++) { 34 | if (indata[i] * 2 != outdata[i]) { 35 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 36 | i, indata[i], outdata[i]); 37 | } 38 | } 39 | 40 | PSGridFree(g); 41 | PSFinalize(); 42 | free(indata); 43 | free(outdata); 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /examples/test_set.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "physis/physis.h" 4 | 5 | #define N 2 6 | 7 | #define IDX3(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | int main(int argc, char *argv[]) { 10 | PSInit(&argc, &argv, 3, N, N, N); 11 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 12 | size_t nelms = N*N*N; 13 | 14 | int i, j, k; 15 | float v = 0; 16 | for (i = 0; i < N; ++i) { 17 | for (j = 0; j < N; ++j) { 18 | for (k = 0; k < N; ++k) { 19 | PSGridSet(g, i, j, k, v); 20 | ++v; 21 | } 22 | } 23 | } 24 | 25 | float *outdata = (float *)malloc(sizeof(float) * nelms); 26 | PSGridCopyout(g, outdata); 27 | 28 | v = 0; 29 | for (i = 0; i < N; ++i) { 30 | for (j = 0; j < N; ++j) { 31 | for (k = 0; k < N; ++k) { 32 | if (outdata[IDX3(i, j, k)] != v) { 33 | fprintf(stderr, "Error: mismatch at %d:%d:%d, in: %f, out: %f\n", 34 | i, j, k, outdata[IDX3(i,j,k)], v); 35 | } 36 | ++v; 37 | } 38 | } 39 | } 40 | 41 | PSGridFree(g); 42 | PSFinalize(); 43 | free(outdata); 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /include/physis/.gitignore: -------------------------------------------------------------------------------- 1 | config.h 2 | -------------------------------------------------------------------------------- /include/physis/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake 3 | ${CMAKE_CURRENT_SOURCE_DIR}/config.h) -------------------------------------------------------------------------------- /include/physis/config.h.cmake: -------------------------------------------------------------------------------- 1 | #ifndef PHYSIS_CONFIG_H_ 2 | #define PHYSIS_CONFIG_H_ 3 | 4 | // These are duplicates of common/config.h, but need to be here since 5 | // common/config.h is not going to be installed, while this header file 6 | // is installed by make install. 7 | #cmakedefine CUDA_ENABLED 8 | #cmakedefine MPI_ENABLED 9 | #cmakedefine PS_DEBUG 10 | #cmakedefine PS_VERBOSE 11 | #cmakedefine PS_WARNING 12 | 13 | #cmakedefine AUTO_DOUBLE_BUFFERING 14 | 15 | #endif /* PHYSIS_CONFIG_H_ */ 16 | -------------------------------------------------------------------------------- /include/physis/fortran/physis.h: -------------------------------------------------------------------------------- 1 | ! Copyright 2011-2013, RIKEN AICS. 2 | ! All rights reserved. 3 | ! 4 | ! This file is distributed under the BSD license. See LICENSE.txt for 5 | ! details. 6 | #ifndef PHYSIS_PHYSIS_FORTRAN_PHYSIS_H_ 7 | #define PHYSIS_PHYSIS_FORTRAN_PHYSIS_H_ 8 | 9 | #if ! defined(PHYSIS_INDEX_INT64) 10 | #define PHYSIS_INDEX_INT32 11 | #endif 12 | 13 | #endif /* PHYSIS_PHYSIS_FORTRAN_PHYSIS_H_ */ 14 | -------------------------------------------------------------------------------- /include/physis/math.h: -------------------------------------------------------------------------------- 1 | #ifndef PHYSIS_MATH_H_ 2 | #define PHYSIS_MATH_H_ 3 | 4 | #ifdef PHYSIS_USER 5 | extern double exp(double x); 6 | extern float expf(float x); 7 | extern long double expl(long double x); 8 | extern double cos(double x); 9 | extern float cosf(float x); 10 | extern double acos(double x); 11 | extern float acosf(float x); 12 | #else 13 | #if defined(PHYSIS_REF) || defined(PHYSIS_MPI) || defined(PHYSIS_MPI_OPENMP) 14 | #include 15 | #endif 16 | #endif 17 | 18 | 19 | 20 | #endif /* PHYSIS_MATH_H_ */ 21 | -------------------------------------------------------------------------------- /include/physis/physis.F90: -------------------------------------------------------------------------------- 1 | ! Copyright 2011-2013, RIKEN AICS. 2 | ! All rights reserved. 3 | ! 4 | ! This file is distributed under the BSD license. See LICENSE.txt for 5 | ! details. 6 | 7 | #ifndef PHYSIS_PHYSIS_F90_ 8 | #define PHYSIS_PHYSIS_F90_ 9 | 10 | #if defined(PHYSIS_USER) 11 | #include "physis/physis_user.F90" 12 | #endif 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /include/physis/physis.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_PHYSIS_H_ 4 | #define PHYSIS_PHYSIS_H_ 5 | 6 | #if defined(PHYSIS_USER) 7 | #include "physis/physis_user.h" 8 | #endif 9 | 10 | #if defined(PHYSIS_REF) 11 | #include "physis/physis_ref.h" 12 | #elif defined(PHYSIS_CUDA) 13 | #include "physis/physis_cuda.h" 14 | #elif defined(PHYSIS_CUDA_HM) 15 | #include "physis/physis_cuda_hm.h" 16 | #elif defined(PHYSIS_MPI) 17 | #include "physis/physis_mpi.h" 18 | #elif defined(PHYSIS_MPI_CUDA) 19 | #include "physis/physis_mpi_cuda.h" 20 | #elif defined(PHYSIS_OPENCL) 21 | #include "physis/physis_opencl.h" 22 | #elif defined(PHYSIS_MPI_OPENCL) 23 | #include "physis/physis_mpi_opencl.h" 24 | #elif defined(PHYSIS_MPI_OPENMP) 25 | #include "physis_mpi_openmp.h" 26 | #endif 27 | 28 | 29 | #include "physis/math.h" 30 | 31 | #endif /* PHYSIS_PHYSIS_H_ */ 32 | -------------------------------------------------------------------------------- /include/physis/physis_cuda_hm.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011-2013, RIKEN AICS. 2 | // All rights reserved. 3 | // 4 | // This file is distributed under the BSD license. See LICENSE.txt for 5 | // details. 6 | 7 | #ifndef PHYSIS_PHYSIS_CUDA_HM_H_ 8 | #define PHYSIS_PHYSIS_CUDA_HM_H_ 9 | 10 | #include "physis/physis_cuda.h" 11 | 12 | #endif /* PHYSIS_PHYSIS_CUDA_HM_H_ */ 13 | 14 | -------------------------------------------------------------------------------- /include/physis/reduce.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011, Tokyo Institute of Technology. 2 | // All rights reserved. 3 | // 4 | // This file is distributed under the license described in 5 | // LICENSE.txt. 6 | // 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp) 8 | 9 | #ifndef PHYSIS_REDUCTION_H_ 10 | #define PHYSIS_REDUCTION_H_ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | enum PSReduceOp { 17 | PS_MAX, 18 | PS_MIN, 19 | PS_SUM, 20 | PS_PROD 21 | }; 22 | 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | 29 | #endif /* PHYSIS_RUNTIME_H_ */ 30 | -------------------------------------------------------------------------------- /include/physis/runtime.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_H_ 4 | #define PHYSIS_RUNTIME_H_ 5 | 6 | #include 7 | 8 | #include "physis/stopwatch.h" 9 | 10 | #define __PS_PERIODIC(x, y) (((x)+(y))%(y)) 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif /* __cplusplus */ 15 | 16 | extern FILE *__ps_trace; 17 | 18 | static inline void __PSTraceStencilPre(const char *msg) { 19 | if (__ps_trace) { 20 | fprintf(__ps_trace, "Physis: Stencil started (%s)\n", msg); 21 | } 22 | return; 23 | } 24 | 25 | static inline void __PSTraceStencilPost(float time) { 26 | if (__ps_trace) { 27 | fprintf(__ps_trace, "Physis: Stencil finished (time: %f)\n", time); 28 | } 29 | return; 30 | } 31 | 32 | #ifdef AUTO_TUNING 33 | /** initialize random 34 | * @param[in] n ... number of randomized value 35 | * @return random handle 36 | */ 37 | extern void *__PSRandomInit(int n); 38 | /** get randomized value 39 | * @param[in] handle ... random handle 40 | * @param[in] count ... index of randomized value 41 | * @return randomized value 42 | */ 43 | static inline int __PSRandom(void *handle, int count) { 44 | return ((int *)handle)[count]; 45 | } 46 | /** finalize random 47 | * @param[in] handle ... random handle 48 | */ 49 | static inline void __PSRandomFini(void *handle) { 50 | free(handle); 51 | } 52 | #endif 53 | 54 | #ifdef __cplusplus 55 | } 56 | #endif /* __cplusplus */ 57 | 58 | 59 | #endif /* PHYSIS_RUNTIME_H_ */ 60 | -------------------------------------------------------------------------------- /include/physis/stopwatch.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011, Tokyo Institute of Technology. 2 | // All rights reserved. 3 | // 4 | // This file is distributed under the license described in 5 | // LICENSE.txt. 6 | // 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp) 8 | 9 | #ifndef PHYSIS_STOPWATCH_H_ 10 | #define PHYSIS_STOPWATCH_H_ 11 | 12 | #if defined(unix) || defined(__unix__) || defined(__unix) || \ 13 | defined(__APPLE__) 14 | #include 15 | #include 16 | 17 | typedef struct { 18 | struct timeval tv; 19 | } __PSStopwatch; 20 | 21 | #else 22 | #error "Unknown environment" 23 | #endif 24 | 25 | static inline void __PSStopwatchQuery(__PSStopwatch *w) { 26 | gettimeofday(&(w->tv), NULL); 27 | return; 28 | } 29 | 30 | // returns mili seconds 31 | static inline float __PSStopwatchDiff(const __PSStopwatch *begin, 32 | const __PSStopwatch *end) { 33 | return (end->tv.tv_sec - begin->tv.tv_sec) * 1000.0f 34 | + (end->tv.tv_usec - begin->tv.tv_usec) / 1000.0f; 35 | } 36 | 37 | static __inline void __PSStopwatchStart(__PSStopwatch *w) { 38 | __PSStopwatchQuery(w); 39 | return; 40 | } 41 | 42 | static __inline float __PSStopwatchStop(__PSStopwatch *w) { 43 | __PSStopwatch now; 44 | __PSStopwatchQuery(&now); 45 | return __PSStopwatchDiff(w, &now); 46 | } 47 | 48 | #endif /* PHYSIS_STOPWATCH_H_ */ 49 | -------------------------------------------------------------------------------- /include/physis/types.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011, Tokyo Institute of Technology. 2 | // All rights reserved. 3 | // 4 | // This file is distributed under the license described in 5 | // LICENSE.txt. 6 | // 7 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp) 8 | 9 | #ifndef PHYSIS_TYPES_H_ 10 | #define PHYSIS_TYPES_H_ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | typedef int PSType; 17 | enum PSPrimitiveType { 18 | PS_INT = 0, 19 | PS_LONG = 1, 20 | PS_FLOAT = 2, 21 | PS_DOUBLE = 3, 22 | PS_USER = 4 23 | }; 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | 30 | #endif /* PHYSIS_TYPES_H_ */ 31 | -------------------------------------------------------------------------------- /misc/source_header.txt: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | -------------------------------------------------------------------------------- /misc/valgrind-suppressions.supp: -------------------------------------------------------------------------------- 1 | { 2 | 3 | Memcheck:Addr8 4 | ... 5 | fun:lua_isnumber 6 | ... 7 | fun:_ZN6physis4util13Configuration8LoadFileERKSs 8 | fun:main 9 | } 10 | 11 | { 12 | 13 | Memcheck:Free 14 | ... 15 | fun:_ZN6physis4util13Configuration8LoadFileERKSs 16 | fun:main 17 | } 18 | 19 | { 20 | 21 | Memcheck:Free 22 | ... 23 | fun:exit 24 | ... 25 | } 26 | 27 | -------------------------------------------------------------------------------- /runtime/common.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "physis/runtime_common.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif __cplusplus 8 | 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif __cplusplus 13 | -------------------------------------------------------------------------------- /runtime/cub-1.3.2/.settings/org.eclipse.cdt.ui.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | formatter_profile=_B40C 3 | formatter_settings_version=1 4 | -------------------------------------------------------------------------------- /runtime/cub-1.3.2/.settings/org.eclipse.core.runtime.prefs: -------------------------------------------------------------------------------- 1 | content-types/enabled=true 2 | content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh 3 | content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu 4 | eclipse.preferences.version=1 5 | -------------------------------------------------------------------------------- /runtime/cub-1.3.2/LICENSE.TXT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2011, Duane Merrill. All rights reserved. 2 | Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the NVIDIA CORPORATION nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /runtime/cuda_util.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_CUDA_UTIL_H_ 4 | #define PHYSIS_RUNTIME_CUDA_UTIL_H_ 5 | 6 | #include 7 | #include 8 | 9 | namespace physis { 10 | namespace runtime { 11 | 12 | 13 | } // namespace runtime 14 | } // namespace physis 15 | 16 | #endif /* PHYSIS_RUNTIME_CUDA_UTIL_H_ */ 17 | 18 | -------------------------------------------------------------------------------- /runtime/grid_util_mpi_openmp.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_GRID_UTIL_MPI_OPENMP_H_ 4 | #define PHYSIS_RUNTIME_GRID_UTIL_MPI_OPENMP_H_ 5 | 6 | #include "physis/physis_common.h" 7 | #include "runtime/runtime_common.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | namespace mpiopenmputil { 12 | 13 | void CopyinoutSubgrid_MP( 14 | bool copyout_to_subgrid_mode_p, 15 | size_t elm_size, int num_dims, 16 | void **grid_mp, 17 | const IntArray &grid_size, 18 | const IntArray &grid_division, 19 | const size_t * const *grid_mp_offset, 20 | const size_t * const *grid_mp_width, 21 | void **subgrid_mp, 22 | const IntArray &subgrid_offset, 23 | const IntArray &subgrid_size, 24 | const IntArray &subgrid_division, 25 | const size_t * const *subgrid_mp_offset, 26 | const size_t * const *subgrid_mp_width 27 | ); 28 | 29 | void getMPOffset( 30 | const unsigned int num_dims, 31 | const IntArray &offset, 32 | const IntArray &grid_size, 33 | const IntArray &grid_division, 34 | const size_t * const *grid_mp_offset, 35 | const size_t * const *grid_mp_width, 36 | unsigned int &cpuid_OUT, 37 | size_t &gridid_OUT, 38 | size_t &width_avail_OUT 39 | ); 40 | 41 | } // namespace mpiopenmputil 42 | } // namespace runtime 43 | } // namespace physis 44 | 45 | 46 | #endif /* PHYSIS_RUNTIME_GRID_UTIL_MPI_OPENMP_H_ */ 47 | 48 | -------------------------------------------------------------------------------- /runtime/ipc.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_IPC_H_ 4 | #define PHYSIS_RUNTIME_IPC_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | 8 | namespace physis { 9 | namespace runtime { 10 | 11 | // Singleton class 12 | class InterProcComm { 13 | protected: 14 | InterProcComm() {} 15 | virtual ~InterProcComm() {} 16 | public: 17 | typedef enum {IPC_SUCCESS = 0, IPC_FAILURE = 1} IPC_ERROR_T; 18 | virtual void *CreateRequest() const = 0; 19 | virtual IPC_ERROR_T Init(int *argc, char ***argv) = 0; 20 | virtual IPC_ERROR_T Finalize() = 0; 21 | virtual int GetRank() const = 0; 22 | virtual int GetNumProcs() const = 0; 23 | virtual IPC_ERROR_T Send(void *buf, size_t len, 24 | int dest) = 0; 25 | virtual IPC_ERROR_T Isend(void *buf, size_t len, 26 | int dest, void *req) = 0; 27 | virtual IPC_ERROR_T Recv(void *buf, size_t len, int src) = 0; 28 | virtual IPC_ERROR_T Irecv(void *buf, size_t len, 29 | int src, void *req) = 0; 30 | virtual IPC_ERROR_T Wait(void *req) = 0; 31 | //virtual IPC_ERROR_T WaitAll() = 0; 32 | virtual IPC_ERROR_T Test(void *req, bool *flag) = 0; 33 | virtual IPC_ERROR_T Bcast(void *buf, size_t len, int root) = 0; 34 | virtual IPC_ERROR_T Reduce(void *src, void *dst, 35 | int count, 36 | PSType type, 37 | PSReduceOp op, int root) = 0; 38 | virtual IPC_ERROR_T Barrier() = 0; 39 | 40 | }; 41 | 42 | } // namespace runtime 43 | } // namespace physis 44 | 45 | #endif /* PHYSIS_RUNTIME_IPC_H_ */ 46 | -------------------------------------------------------------------------------- /runtime/ipc_mpi.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_IPC_MPI_H_ 4 | #define PHYSIS_RUNTIME_IPC_MPI_H_ 5 | 6 | #include "mpi.h" 7 | 8 | #include "runtime/ipc.h" 9 | 10 | 11 | namespace physis { 12 | namespace runtime { 13 | 14 | class InterProcCommMPI: public InterProcComm { 15 | protected: 16 | InterProcCommMPI(): comm_(MPI_COMM_WORLD), initialized_(false) {} 17 | virtual ~InterProcCommMPI() {} 18 | public: 19 | static InterProcCommMPI* GetInstance(); 20 | virtual void *CreateRequest() const; 21 | virtual IPC_ERROR_T Init(int *argc, char ***argv); 22 | virtual IPC_ERROR_T Finalize(); 23 | virtual int GetRank() const; 24 | virtual int GetNumProcs() const; 25 | virtual IPC_ERROR_T Send(void *buf, size_t len, int dest); 26 | virtual IPC_ERROR_T Isend(void *buf, size_t len, 27 | int dest, void *req); 28 | virtual IPC_ERROR_T Recv(void *buf, size_t len, int src); 29 | virtual IPC_ERROR_T Irecv(void *buf, size_t len, 30 | int src, void *req); 31 | virtual IPC_ERROR_T Wait(void *req); 32 | //virtual IPC_ERROR_T WaitAll(); 33 | virtual IPC_ERROR_T Test(void *req, bool *flag); 34 | virtual IPC_ERROR_T Bcast(void *buf, size_t len, int root); 35 | virtual IPC_ERROR_T Reduce(void *src, void *dst, 36 | int count, PSType type, 37 | PSReduceOp op, int root); 38 | virtual IPC_ERROR_T Barrier(); 39 | 40 | protected: 41 | MPI_Comm comm_; 42 | static InterProcCommMPI *singleton_; 43 | bool initialized_; 44 | }; 45 | 46 | } // namespace runtime 47 | } // namespace physis 48 | 49 | #endif /* PHYSIS_RUNTIME_IPC_MPI_H_ */ 50 | 51 | 52 | -------------------------------------------------------------------------------- /runtime/libphysis_rt_mpi_openmp_numa.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #if ! defined(_GNU_SOURCE) 4 | #define _GNU_SOURCE 5 | #endif 6 | 7 | #include "runtime/mpi_openmp_runtime.h" 8 | 9 | #ifdef USE_OPENMP_NUMA 10 | #if 0 11 | #include 12 | #endif 13 | #endif 14 | 15 | #include 16 | #include 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | void __PSInitLoop_OpenMP(void){ 23 | #ifndef USE_OPENMP_NUMA 24 | return; 25 | #else 26 | #if 1 27 | cpu_set_t mask; 28 | CPU_ZERO(&mask); 29 | int thread_idx = omp_get_thread_num(); 30 | CPU_SET(thread_idx, &mask); 31 | //LOG_DEBUG() << "Resetting using CPU ID to " << thread_idx << "\n"; 32 | if (sched_setaffinity(0, sizeof(mask), &mask) == -1){ 33 | perror("sched_setaffinity "); 34 | LOG_DEBUG() << "Calling sched_setaffinity failed for OpenMP thread " 35 | << thread_idx << "\n"; 36 | } 37 | #endif 38 | #endif /* ifndef USE_OPENMP_NUMA */ 39 | } // __PSInitLoop_NUMA 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /runtime/mpi_opencl_runtime.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_MPI_OPENCL_RUNTIME_H_ 4 | #define PHYSIS_RUNTIME_MPI_OPENCL_RUNTIME_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/grid_mpi_opencl.h" 8 | #include "runtime/rpc_mpi_opencl.h" 9 | #include "runtime/rpc_opencl_mpi.h" 10 | #include 11 | 12 | // FIXME 13 | // FIXME 14 | // Get this back this later!! 15 | #define NUM_CLINFO_BOUNDARY_KERNEL 16 16 | 17 | namespace physis { 18 | namespace runtime { 19 | 20 | typedef void (*__PSStencilRunClientFunction)(int, void **); 21 | extern __PSStencilRunClientFunction *__PS_stencils; 22 | 23 | extern ProcInfo *pinfo; 24 | extern MasterMPIOpenCL *master; 25 | extern ClientMPIOpenCL *client; 26 | extern GridSpaceMPIOpenCL *gs; 27 | 28 | 29 | } // namespace runtime 30 | } // namespace physis 31 | 32 | namespace physis { 33 | namespace runtime { 34 | 35 | extern CLMPIbaseinfo *clinfo_generic; 36 | extern CLMPIbaseinfo *clinfo_inner; 37 | extern CLMPIbaseinfo *clinfo_boundary_copy; 38 | extern std::vector clinfo_boundary_kernel; 39 | 40 | extern CLMPIbaseinfo *clinfo_nowusing; 41 | 42 | } // namespace runtime 43 | } // namespace physis 44 | 45 | namespace physis { 46 | namespace runtime { 47 | extern void InitOpenCL( 48 | int my_rank, int num_local_processes, int *argc, char ***argv 49 | ); 50 | extern void DestroyOpenCL(void); 51 | } // namespace runtime 52 | } // namespace physis 53 | 54 | #endif /* PHYSIS_RUNTIME_MPI_OPENCL_RUNTIME_H_ */ 55 | -------------------------------------------------------------------------------- /runtime/mpi_openmp_runtime.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_MPI_OPENMP_RUNTIME_H_ 4 | #define PHYSIS_RUNTIME_MPI_OPENMP_RUNTIME_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/grid_mpi_openmp.h" 8 | #include "runtime/rpc_mpi_openmp.h" 9 | 10 | #ifdef USE_OPENMP_NUMA 11 | #define PROCINFO ProcInfoOpenMP 12 | #define MASTER MasterOpenMP 13 | #define CLIENT ClientOpenMP 14 | #define GRIDSPACEMPI GridSpaceMPIOpenMP 15 | #define GRIDMPI GridMPIOpenMP 16 | #else 17 | #define PROCINFO ProcInfo 18 | #define MASTER Master 19 | #define CLIENT Client 20 | #define GRIDSPACEMPI GridSpaceMPI 21 | #define GRIDMPI GridMPI 22 | #endif 23 | 24 | namespace physis { 25 | namespace runtime { 26 | 27 | typedef void (*__PSStencilRunClientFunction)(int, void **); 28 | extern __PSStencilRunClientFunction *__PS_stencils; 29 | 30 | extern PROCINFO *pinfo; 31 | extern MASTER *master; 32 | extern CLIENT *client; 33 | extern GRIDSPACEMPI *gs; 34 | 35 | } // namespace runtime 36 | } // namespace physis 37 | 38 | 39 | #endif /* PHYSIS_RUNTIME_MPI_RUNTIME_H_ */ 40 | -------------------------------------------------------------------------------- /runtime/mpi_runtime_common.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "runtime/mpi_runtime_common.h" 8 | 9 | #include "mpi.h" 10 | 11 | #include "physis/physis_mpi.h" 12 | #include "physis/physis_util.h" 13 | #include "runtime/grid_mpi_debug_util.h" 14 | #include "runtime/mpi_util.h" 15 | #include "runtime/grid_mpi2.h" 16 | #include "runtime/rpc2.h" 17 | #include "runtime/inter_proc_comm_mpi.h" 18 | 19 | using std::map; 20 | using std::string; 21 | 22 | using namespace physis::runtime; 23 | 24 | using physis::IndexArray; 25 | using physis::IntArray; 26 | using physis::SizeArray; 27 | 28 | namespace physis { 29 | namespace runtime { 30 | 31 | __PSStencilRunClientFunction *__PS_stencils; 32 | 33 | ProcInfo *pinfo; 34 | Master *master; 35 | Client *client; 36 | GridSpaceMPI *gs; 37 | 38 | } // namespace runtime 39 | } // namespace physis 40 | 41 | #ifdef __cplusplus 42 | extern "C" { 43 | #endif 44 | 45 | 46 | 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | -------------------------------------------------------------------------------- /runtime/mpi_runtime_common.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_MPI_RUNTIME_COMMON_H_ 4 | #define PHYSIS_RUNTIME_MPI_RUNTIME_COMMON_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/rpc.h" 8 | #include "runtime/grid_mpi.h" 9 | 10 | namespace physis { 11 | namespace runtime { 12 | 13 | 14 | 15 | } // namespace runtime 16 | } // namespace physis 17 | 18 | 19 | #endif /* PHYSIS_RUNTIME_MPI_RUNTIME_H_ */ 20 | -------------------------------------------------------------------------------- /runtime/opencl_misc.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | /* physis-OpenCL specific */ 4 | 5 | #define BUFSIZE 1024 6 | 7 | /* C or C++ standard headers */ 8 | #include 9 | #include 10 | #include 11 | 12 | /* physis-OpenCL specific*/ 13 | #include "runtime/rpc_opencl.h" 14 | 15 | namespace physis { 16 | namespace runtime { 17 | 18 | void CLbaseinfo::guess_kernelfile 19 | (const int *argc, char ***argv, std::string &filename, std::string &kernelname) const { 20 | 21 | char buf[BUFSIZE]; 22 | // const char *tail; 23 | const char *argi; 24 | char *pos; 25 | char **argvv = *argv; 26 | int found = 0; 27 | int i = 0; 28 | int len = 0; 29 | 30 | for (i = 0; i < *argc; i++) { 31 | argi = argvv[i]; 32 | len = strlen(argi); 33 | if (len >= BUFSIZE - 4) continue; 34 | strncpy(buf, argi, len); 35 | pos = buf + len; 36 | #if 0 37 | #else 38 | sprintf(pos, "%s", ".c"); 39 | #endif 40 | found = 1; 41 | break; 42 | } 43 | if (found) 44 | filename = buf; 45 | else 46 | filename = ""; 47 | 48 | // At first set the below kernel name, will be updated by __PSSetKernel 49 | kernelname = "__PSStencilRun_kernel"; 50 | } // guess_kernelfile 51 | 52 | std::string CLbaseinfo::physis_opencl_h_include_path(void) const { 53 | std::string ret = ""; 54 | 55 | // FIXME 56 | // Currently no header files are included in kernel code 57 | 58 | #if 0 59 | #endif 60 | 61 | return ret; 62 | 63 | } // physis_opencl_h_dir_path 64 | 65 | } // namespace physis 66 | } // namespace runtime 67 | -------------------------------------------------------------------------------- /runtime/opencl_runtime.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_OPENCL_RUNTIME_H_ 4 | #define PHYSIS_RUNTIME_OPENCL_RUNTIME_H_ 5 | 6 | /* physis-OpenCL specific */ 7 | #include "runtime/rpc_opencl.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | extern CLinfo *master; 12 | } 13 | } 14 | #endif // #define PHYSIS_RUNTIME_OPENCL_RUNTIME_H_ 15 | -------------------------------------------------------------------------------- /runtime/proc.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "runtime/proc.h" 4 | 5 | namespace physis { 6 | namespace runtime { 7 | 8 | std::ostream &Proc::print(std::ostream &os) const { 9 | os << "Proc {" 10 | << "rank: " << rank_ 11 | << ", #procs: " << num_procs_ 12 | << "}"; 13 | return os; 14 | } 15 | 16 | } // namespace runtime 17 | } // namespace physis 18 | -------------------------------------------------------------------------------- /runtime/proc.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_PROC_H_ 4 | #define PHYSIS_RUNTIME_PROC_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/ipc.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | 12 | class Proc { 13 | protected: 14 | int rank_; 15 | int num_procs_; 16 | InterProcComm *ipc_; 17 | __PSStencilRunClientFunction *stencil_runs_; 18 | public: 19 | Proc(InterProcComm *ipc, 20 | __PSStencilRunClientFunction *stencil_runs): 21 | rank_(ipc->GetRank()), num_procs_(ipc->GetNumProcs()), ipc_(ipc), 22 | stencil_runs_(stencil_runs) {} 23 | virtual ~Proc() {} 24 | std::ostream &print(std::ostream &os) const; 25 | int rank() const { return rank_; } 26 | int num_procs() const { return num_procs_; } 27 | InterProcComm *ipc() { return ipc_; } 28 | static int GetRootRank() { return 0; } 29 | bool IsRoot() const { return rank_ == GetRootRank(); } 30 | }; 31 | 32 | } // namespace runtime 33 | } // namespace physis 34 | 35 | inline 36 | std::ostream &operator<<(std::ostream &os, const physis::runtime::Proc &proc) { 37 | return proc.print(os); 38 | } 39 | 40 | #endif /* PHYSIS_RUNTIME_PROC_H_ */ 41 | -------------------------------------------------------------------------------- /runtime/reduce_cuda.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_REDUCE_CUDA_H_ 4 | #define PHYSIS_RUNTIME_REDUCE_CUDA_H_ 5 | #include "runtime/runtime_common.h" 6 | #include "runtime/runtime_common_cuda.h" 7 | #include "runtime/cuda_util.h" 8 | #include "runtime/reduce.h" 9 | //#include "physis/physis_cuda.h" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace physis { 16 | namespace runtime { 17 | 18 | //! Reduce a grid with binary operation op. 19 | template 20 | void ReduceGridCUDA(void *buf, PSReduceOp op, 21 | void *dev_grid, size_t len) { 22 | thrust::device_ptr dev_ptr((T*)dev_grid); 23 | T *out = (T*)buf; 24 | if (op == PS_MAX) { 25 | *out = *thrust::max_element(dev_ptr, dev_ptr + len); 26 | } else if (op == PS_MIN) { 27 | *out = *thrust::min_element(dev_ptr, dev_ptr + len); 28 | } else if (op == PS_SUM) { 29 | *out = thrust::reduce(dev_ptr, dev_ptr + len, 30 | physis::runtime::GetReductionDefaultValue(op), 31 | thrust::plus()); 32 | } else if (op == PS_PROD) { 33 | *out = thrust::reduce(dev_ptr, dev_ptr + len, 34 | physis::runtime::GetReductionDefaultValue(op), 35 | thrust::multiplies()); 36 | } else { 37 | PSAbort(1); 38 | } 39 | return; 40 | } 41 | 42 | } // namespace runtime 43 | } // namespace physis 44 | 45 | #endif /* PHYSIS_RUNTIME_REDUCE_CUDA_H_ */ 46 | -------------------------------------------------------------------------------- /runtime/reduce_grid_mpi_cuda_exp.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_REDUCE_GRID_MPI_CUDA_EXP_H_ 4 | #define PHYSIS_RUNTIME_REDUCE_GRID_MPI_CUDA_EXP_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "physis/reduce.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | 12 | extern int ReduceGridMPICUDAExp(void *buf, PSType type, PSReduceOp op, 13 | void *dev_grid, int dim, const IndexArray &size, 14 | const Width2 &width); 15 | 16 | } //namespace runtime 17 | } //namespace runtime 18 | 19 | #endif // PHYSIS_RUNTIME_REDUCE_GRID_MPI_CUDA_EXP_H_ 20 | -------------------------------------------------------------------------------- /runtime/rpc_cuda.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "runtime/rpc_cuda.h" 4 | #include "runtime/mpi_wrapper.h" 5 | #include "runtime/grid_util.h" 6 | #include "runtime/runtime_common_cuda.h" 7 | #include "runtime/grid_space_mpi_cuda.h" 8 | 9 | #include 10 | 11 | namespace physis { 12 | namespace runtime { 13 | 14 | } // namespace runtime 15 | } // namespace physis 16 | -------------------------------------------------------------------------------- /runtime/rpc_mpi_cuda.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RPC_MPI_CUDA_H_ 4 | #define PHYSIS_RUNTIME_RPC_MPI_CUDA_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/rpc.h" 8 | #include "runtime/grid_mpi_cuda.h" 9 | #include "runtime/buffer_cuda.h" 10 | 11 | namespace physis { 12 | namespace runtime { 13 | 14 | class MasterMPICUDA: public Master { 15 | public: 16 | MasterMPICUDA(const ProcInfo &pinfo, GridSpaceMPICUDA *gs, 17 | MPI_Comm comm); 18 | virtual ~MasterMPICUDA(); 19 | virtual void Finalize(); 20 | virtual void GridCopyinLocal(GridMPI *g, const void *buf); 21 | virtual void GridCopyoutLocal(GridMPI *g, void *buf); 22 | protected: 23 | BufferCUDAHost *pinned_buf_; 24 | }; 25 | 26 | class ClientMPICUDA: public Client { 27 | public: 28 | ClientMPICUDA(const ProcInfo &pinfo, GridSpaceMPICUDA *gs, 29 | MPI_Comm comm); 30 | virtual ~ClientMPICUDA(); 31 | virtual void Finalize(); 32 | }; 33 | 34 | } // namespace runtime 35 | } // namespace physis 36 | 37 | #endif /* PHYSIS_RUNTIME_RPC_MPI_CUDA_H_ */ 38 | -------------------------------------------------------------------------------- /runtime/rpc_mpi_opencl.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RPC_MPI_OPENCL_H_ 4 | #define PHYSIS_RUNTIME_RPC_MPI_OPENCL_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/rpc_mpi.h" 8 | #include "runtime/grid_mpi_opencl.h" 9 | #include "runtime/rpc_opencl_common.h" 10 | #include "runtime/buffer_opencl.h" 11 | 12 | #include 13 | 14 | namespace physis { 15 | namespace runtime { 16 | 17 | class MasterMPIOpenCL: public Master { 18 | public: 19 | MasterMPIOpenCL( 20 | const ProcInfo &pinfo, GridSpaceMPIOpenCL *gs, 21 | MPI_Comm comm, 22 | CLbaseinfo *cl_in); 23 | virtual ~MasterMPIOpenCL(); 24 | virtual void Finalize(); 25 | virtual void GridCopyinLocal(GridMPI *g, const void *buf); 26 | virtual void GridCopyoutLocal(GridMPI *g, void *buf); 27 | protected: 28 | BufferOpenCLHost *pinned_buf_; 29 | CLbaseinfo *cl_generic_; 30 | }; 31 | 32 | class ClientMPIOpenCL: public Client { 33 | public: 34 | ClientMPIOpenCL( 35 | const ProcInfo &pinfo, GridSpaceMPIOpenCL *gs, 36 | MPI_Comm comm, 37 | CLbaseinfo *cl_in); 38 | virtual ~ClientMPIOpenCL(); 39 | virtual void Finalize(); 40 | protected: 41 | CLbaseinfo *cl_generic_; 42 | }; 43 | 44 | } // namespace runtime 45 | } // namespace physis 46 | 47 | #endif /* PHYSIS_RUNTIME_RPC_MPI_OPENCL_H_ */ 48 | -------------------------------------------------------------------------------- /runtime/rpc_opencl.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RPC_OPENCL_H_ 4 | #define PHYSIS_RUNTIME_RPC_OPENCL_H_ 5 | 6 | /* physis-OpenCL specific*/ 7 | #include "physis/physis_opencl.h" 8 | #include "runtime/rpc_opencl_common.h" 9 | 10 | /* 11 | Note: 12 | CLinfo: OpenCL information class 13 | Members or functions not using __PSGrid belongs to CLbaseinfo. 14 | Members or functions using __PSGrid should belong to 15 | CLbase, which should inherit CLbaseinfo. 16 | */ 17 | 18 | namespace physis { 19 | namespace runtime { 20 | 21 | class CLinfo : public CLbaseinfo { 22 | protected: 23 | 24 | public: 25 | CLinfo(): CLbaseinfo() {}; 26 | virtual ~CLinfo() {}; 27 | 28 | virtual __PSGrid* GridNew(int elm_size, int num_dims, PSVectorInt dim, int double_buffering); 29 | virtual void GridFree(__PSGrid *g); 30 | virtual void GridCopyin(__PSGrid *g, const void *src_buf); 31 | virtual void GridCopyout(__PSGrid *g, void *dst_buf); 32 | virtual void GridSet(__PSGrid *g, const void *val_ptr, va_list valst_dim); 33 | 34 | }; // class CLinfo 35 | 36 | } // namespace runtime 37 | } // namespace physis 38 | 39 | #endif /* #define PHYSIS_RUNTIME_RPC_OPENCL_H_ */ 40 | -------------------------------------------------------------------------------- /runtime/rpc_opencl_mpi.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RPC_OPENCL_MPI_H 4 | #define PHYSIS_RUNTIME_RPC_OPENCL_MPI_H 5 | 6 | #include "runtime/rpc_opencl_common.h" 7 | 8 | namespace physis { 9 | namespace runtime { 10 | 11 | class CLMPIbaseinfo : public CLbaseinfo { 12 | 13 | protected: 14 | virtual std::string create_kernel_contents(std::string kernelfile) const; 15 | virtual std::string physis_opencl_h_include_path(void) const { return header_path_; } 16 | 17 | std::string header_path_; 18 | std::string kernel_filen_; 19 | int dev_id_; 20 | int save_context_p; 21 | 22 | public: 23 | CLMPIbaseinfo(); 24 | CLMPIbaseinfo( 25 | unsigned int id_default, unsigned int create_queue_p, 26 | unsigned int block_events_p); 27 | CLMPIbaseinfo(CLMPIbaseinfo &master); 28 | virtual ~CLMPIbaseinfo(); 29 | 30 | virtual cl_program get_prog(void) const { return clprog; } 31 | virtual void set_kernel_filen(std::string filen) { kernel_filen_ = filen; } 32 | virtual std::string get_kernel_filen(void) const { return kernel_filen_; } 33 | 34 | virtual void set_header_include_path(const char *path) { if (path) header_path_ = path; } 35 | virtual void mark_save_context() { save_context_p = 1; } 36 | 37 | virtual void sync_queue() { if (clqueue) clFinish(clqueue); } 38 | 39 | 40 | }; // class CLMPIbaseinfo 41 | } // namespace runtime 42 | } // namespace physis 43 | 44 | 45 | #endif /* #define PHYSIS_RUNTIME_RPC_OPENCL_MPI_H */ 46 | -------------------------------------------------------------------------------- /runtime/runtime.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_H_ 4 | #define PHYSIS_RUNTIME_RUNTIME_H_ 5 | 6 | #include 7 | 8 | #include "runtime/runtime_common.h" 9 | #include "runtime/grid.h" 10 | 11 | namespace physis { 12 | namespace runtime { 13 | 14 | 15 | template 16 | class Runtime { 17 | public: 18 | Runtime(): gs_(NULL) {} 19 | virtual ~Runtime() {} 20 | virtual void Init(int *argc, char ***argv, int grid_num_dims, 21 | va_list vl) { 22 | // Set __ps_trace if physis-trace option is given 23 | __ps_trace = NULL; 24 | string opt_name = "physis-trace"; 25 | vector opts; 26 | if (ParseOption(argc, argv, opt_name, 0, opts)) { 27 | __ps_trace = stderr; 28 | LOG_INFO() << "Tracing enabled\n"; 29 | } 30 | } 31 | 32 | virtual GridSpaceType *gs() { 33 | return gs_; 34 | } 35 | protected: 36 | GridSpaceType *gs_; 37 | 38 | }; 39 | 40 | } // namespace runtime 41 | } // namespace physis 42 | 43 | 44 | 45 | #endif /* PHYSIS_RUNTIME_RUNTIME_H_ */ 46 | -------------------------------------------------------------------------------- /runtime/runtime_common.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_COMMON_H_ 4 | #define PHYSIS_RUNTIME_RUNTIME_COMMON_H_ 5 | 6 | #define PHYSIS_RUNTIME 7 | 8 | #include "physis/physis_util.h" 9 | #include "physis/physis_common.h" 10 | #include "physis/internal_common.h" 11 | #include "common/config.h" 12 | 13 | namespace physis { 14 | namespace runtime { 15 | 16 | struct Width2 { 17 | UnsignedArray bw; 18 | UnsignedArray fw; 19 | const UnsignedArray &operator()(bool is_fw) const { 20 | return is_fw ? fw : bw; 21 | } 22 | unsigned operator()(int dim, bool is_fw) const { 23 | return operator()(is_fw)[dim]; 24 | } 25 | }; 26 | 27 | typedef void (*__PSStencilRunClientFunction)(int, void **); 28 | 29 | // Returns the number of process grid dimensions. Returns negative 30 | // value on failure. 31 | int GetProcessDim(int *argc, char ***argv, IntArray &proc_size); 32 | 33 | bool ParseOption(int *argc, char ***argv, const string &opt_name, 34 | int num_additional_args, vector &opts); 35 | 36 | 37 | } // namespace runtime 38 | } // namespace physis 39 | 40 | inline 41 | std::ostream &operator<<(std::ostream &os, const physis::runtime::Width2 &w) { 42 | return os << "{bw: " << w.bw << ", fw: " << w.fw << "}"; 43 | } 44 | 45 | #endif /* PHYSIS_RUNTIME_RUNTIME_COMMON_H_ */ 46 | -------------------------------------------------------------------------------- /runtime/runtime_cuda.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_CUDA_H_ 4 | #define PHYSIS_RUNTIME_RUNTIME_CUDA_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/runtime.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | 12 | template 13 | class RuntimeCUDA: public Runtime { 14 | public: 15 | RuntimeCUDA() {} 16 | virtual ~RuntimeCUDA() {} 17 | }; 18 | 19 | } // namespace runtime 20 | } // namespace physis 21 | 22 | #endif /* PHYSIS_RUNTIME_RUNTIME_CUDA_H_ */ 23 | 24 | -------------------------------------------------------------------------------- /runtime/runtime_cuda_hm.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "runtime/runtime_cuda_hm.h" 4 | 5 | namespace physis { 6 | namespace runtime { 7 | 8 | RuntimeCUDAHM::RuntimeCUDAHM(): RuntimeCUDA() { 9 | } 10 | 11 | RuntimeCUDAHM::~RuntimeCUDAHM() { 12 | } 13 | 14 | } // namespace runtime 15 | } // namespace physis 16 | -------------------------------------------------------------------------------- /runtime/runtime_cuda_hm.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_CUDA_HM_H_ 4 | #define PHYSIS_RUNTIME_RUNTIME_CUDA_HM_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/runtime_cuda.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | 12 | class RuntimeCUDAHM: public RuntimeCUDA { 13 | public: 14 | RuntimeCUDAHM(); 15 | virtual ~RuntimeCUDAHM(); 16 | }; 17 | 18 | } // namespace runtime 19 | } // namespace physis 20 | 21 | #endif /* PHYSIS_RUNTIME_RUNTIME_CUDA_HM_H_ */ 22 | 23 | -------------------------------------------------------------------------------- /runtime/runtime_ref.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "runtime/runtime_ref.h" 4 | 5 | 6 | namespace physis { 7 | namespace runtime { 8 | 9 | 10 | } // namespace runtime 11 | } // namespace physis 12 | -------------------------------------------------------------------------------- /runtime/runtime_ref.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_RUNTIME_REF_H_ 4 | #define PHYSIS_RUNTIME_RUNTIME_REF_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | #include "runtime/runtime.h" 8 | 9 | namespace physis { 10 | namespace runtime { 11 | 12 | template 13 | class RuntimeRef: public Runtime { 14 | public: 15 | RuntimeRef() {} 16 | virtual ~RuntimeRef() {} 17 | }; 18 | 19 | } // namespace runtime 20 | } // namespace physis 21 | 22 | #endif /* PHYSIS_RUNTIME_RUNTIME_REF_H_ */ 23 | 24 | -------------------------------------------------------------------------------- /runtime/tests/test_physis_rt_mpi.c: -------------------------------------------------------------------------------- 1 | #include "physis_mpi.h" 2 | 3 | #define N (4) 4 | 5 | typedef void (*grid_update_client_t)(); 6 | grid_update_client_t *update_clients; 7 | 8 | int *create_grid() 9 | { 10 | return (int*)calloc(N * N * N, sizeof(int)); 11 | } 12 | 13 | void init_grid(int *g) 14 | { 15 | int i; 16 | for (i = 0; i < N * N * N; i++) { 17 | g[i] = i; 18 | } 19 | return; 20 | } 21 | 22 | void print_grid(int *g, FILE *out) 23 | { 24 | int i; 25 | fprintf(out, "grid: "); 26 | for (i = 0; i < N * N * N; i++) { 27 | fprintf(out, "%d ", g[i]); 28 | } 29 | fprintf(out, "\n"); 30 | return; 31 | } 32 | 33 | int main(int argc, char *argv[]) 34 | { 35 | PhysisInit(&argc, &argv); 36 | unsigned s[3] = {N, N, N}; 37 | uvec_t halo = {1, 1, 1}; 38 | grid *g = grid_new(3, sizeof(int), s, halo, halo); 39 | int *gin = create_grid(); 40 | init_grid(gin); 41 | print_grid(gin, stdout); 42 | int *gout = create_grid(); 43 | grid_copyin(g, gin); 44 | grid_copyout(g, gout); 45 | printf("copyout\n"); 46 | print_grid(gout, stdout); 47 | grid_free(g); 48 | PhysisFinalize(); 49 | return 0; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /runtime/timing.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "runtime/timing.h" 4 | 5 | namespace physis { 6 | namespace runtime { 7 | 8 | DataCopyProfile::DataCopyProfile(): 9 | gpu_to_cpu(0.0), cpu_in(0.0), cpu_out(0.0), cpu_to_gpu(0.0) {} 10 | 11 | 12 | std::ostream &DataCopyProfile::print(std::ostream &os) const { 13 | StringJoin sj; 14 | sj << "GPU->CPU: " << gpu_to_cpu; 15 | sj << "CPU->MPI: " << cpu_out; 16 | sj << "MPI->CPU: " << cpu_in; 17 | sj << "CPU->GPU: " << cpu_to_gpu; 18 | os << "(" << sj.str() << ")"; 19 | return os; 20 | } 21 | 22 | } // namespace runtime 23 | } // namespace physis 24 | 25 | -------------------------------------------------------------------------------- /runtime/timing.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_RUNTIME_TIMING_H_ 4 | #define PHYSIS_RUNTIME_TIMING_H_ 5 | 6 | #include "runtime/runtime_common.h" 7 | 8 | namespace physis { 9 | namespace runtime { 10 | 11 | struct DataCopyProfile { 12 | double gpu_to_cpu; 13 | double cpu_in; 14 | double cpu_out; 15 | double cpu_to_gpu; 16 | DataCopyProfile(); 17 | std::ostream &print(std::ostream &os) const; 18 | }; 19 | 20 | struct Stopwatch { 21 | __PSStopwatch st; 22 | void Start() { 23 | __PSStopwatchStart(&st); 24 | } 25 | float Stop() { 26 | return __PSStopwatchStop(&st); 27 | } 28 | }; 29 | 30 | } // namespace runtime 31 | } // namespace physis 32 | 33 | inline std::ostream& operator<<( 34 | std::ostream &os, 35 | const physis::runtime::DataCopyProfile &prof) { 36 | return prof.print(os); 37 | } 38 | 39 | #endif /* PHYSIS_RUNTIME_TIMING_H_ */ 40 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(system_tests) 2 | #add_subdirectory(gmock) 3 | -------------------------------------------------------------------------------- /tests/gmock/COPYING: -------------------------------------------------------------------------------- 1 | Copyright 2008, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /tests/gmock/README: -------------------------------------------------------------------------------- 1 | Google Mock verison 1.6.0 2 | 3 | Retains only files under the fused-src directory. 4 | -------------------------------------------------------------------------------- /tests/system_tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/run_system_tests.sh.cmake 3 | ${CMAKE_BINARY_DIR}/run_system_tests.sh @ONLY) 4 | 5 | add_subdirectory(test_cases) 6 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB test_srcs 3 | "${CMAKE_CURRENT_SOURCE_DIR}/test_*.manual.ref.c") 4 | foreach (src ${test_srcs}) 5 | get_filename_component(fname ${src} NAME_WE) 6 | add_executable(${fname}.manual.ref.exe ${src}) 7 | endforeach () 8 | 9 | # uses the same manual code as test_redblack 10 | add_executable(test_redblack-separated.manual.ref.exe 11 | test_redblack.manual.ref.c) 12 | 13 | file(GLOB cuda_test_srcs 14 | "${CMAKE_CURRENT_SOURCE_DIR}/test_*.manual.cuda.cu") 15 | 16 | if (CUDA_FOUND) 17 | foreach (src ${cuda_test_srcs}) 18 | get_filename_component(fname ${src} NAME_WE) 19 | cuda_add_executable(${fname}.manual.cuda.exe ${src}) 20 | if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") 21 | set_target_properties( 22 | ${fname}.manual.cuda.exe PROPERTIES 23 | LINK_FLAGS "-stdlib=libstdc++" 24 | ) 25 | endif () 26 | endforeach () 27 | # For integer code, use the normal C code for testing the CUDA version 28 | add_executable(test_7-pt-int-type.manual.cuda.exe 29 | test_7-pt-int-type.manual.ref.c) 30 | add_executable(test_9-pt-reduction.manual.cuda.exe 31 | test_9-pt-reduction.manual.ref.c) 32 | add_executable(test_9-pt-periodic-reduction.manual.cuda.exe 33 | test_9-pt-periodic-reduction.manual.ref.c) 34 | endif () 35 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_01.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: copyin and copyout 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 8 11 | 12 | int main(int argc, char *argv[]) { 13 | PSInit(&argc, &argv, 3, N, N, N); 14 | PSGrid3DFloat g3 = PSGrid3DFloatNew(N, N, N); 15 | 16 | float *indata = (float *)malloc(sizeof(float) * N * N * N); 17 | int i; 18 | for (i = 0; i < N*N*N; i++) { 19 | indata[i] = i; 20 | } 21 | float *outdata = (float *)malloc(sizeof(float) * N * N * N); 22 | 23 | PSGridCopyin(g3, indata); 24 | PSGridCopyout(g3, outdata); 25 | 26 | for (i = 0; i < N*N*N; i++) { 27 | if (indata[i] != outdata[i]) { 28 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 29 | i, indata[i], outdata[i]); 30 | exit(1); 31 | } 32 | } 33 | 34 | PSGridFree(g3); 35 | PSFinalize(); 36 | free(indata); 37 | free(outdata); 38 | return 0; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_02.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Identity kernel 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 8 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) { 13 | float v = PSGridGet(g, x, y, z) * 2; 14 | PSGridEmit(g, v); 15 | return; 16 | } 17 | 18 | int main(int argc, char *argv[]) { 19 | PSInit(&argc, &argv, 3, N, N, N); 20 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 21 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 22 | size_t nelms = N*N*N; 23 | 24 | float *indata = (float *)malloc(sizeof(float) * nelms); 25 | int i; 26 | for (i = 0; i < nelms; i++) { 27 | indata[i] = i; 28 | } 29 | float *outdata = (float *)malloc(sizeof(float) * nelms); 30 | 31 | PSGridCopyin(g, indata); 32 | 33 | PSStencilRun(PSStencilMap(kernel, d, g)); 34 | 35 | PSGridCopyout(g, outdata); 36 | 37 | for (i = 0; i < nelms; i++) { 38 | if (indata[i] * 2 != outdata[i]) { 39 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 40 | i, indata[i]*2, outdata[i]); 41 | exit(1); 42 | } 43 | } 44 | 45 | PSGridFree(g); 46 | PSFinalize(); 47 | free(indata); 48 | free(outdata); 49 | return 0; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_03.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Identity kernel using source and destination grids 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 8 11 | 12 | void kernel1(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2) { 14 | float v = PSGridGet(g, x, y, z); 15 | PSGridEmit(g2, v); 16 | return; 17 | } 18 | 19 | int main(int argc, char *argv[]) { 20 | PSInit(&argc, &argv, 3, N, N, N); 21 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 22 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 23 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 24 | size_t nelms = N*N*N; 25 | 26 | float *indata = (float *)malloc(sizeof(float) * nelms); 27 | int i; 28 | for (i = 0; i < nelms; i++) { 29 | indata[i] = i; 30 | } 31 | float *outdata = (float *)malloc(sizeof(float) * nelms); 32 | 33 | PSGridCopyin(g, indata); 34 | 35 | PSStencilRun(PSStencilMap(kernel1, d, g, g2)); 36 | 37 | PSGridCopyout(g2, outdata); 38 | 39 | for (i = 0; i < nelms; i++) { 40 | if (indata[i] != outdata[i]) { 41 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 42 | i, indata[i], outdata[i]); 43 | exit(1); 44 | } 45 | } 46 | 47 | PSGridFree(g); 48 | PSGridFree(g2); 49 | PSFinalize(); 50 | free(indata); 51 | free(outdata); 52 | return 0; 53 | } 54 | 55 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_09.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Accessing a 2D plane in a 3D grid 3 | * DIM: 3 4 | * PRIORITY: 10 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 4 11 | 12 | void kernel(const int x, const int y, const int z, 13 | PSGrid3DFloat g1, PSGrid3DFloat g2) { 14 | float v = PSGridGet(g2, x, y, 0); 15 | PSGridEmit(g1, v); 16 | return; 17 | } 18 | 19 | #define IDX3(x, y, z) ((x) + (y) * N + (z) * N * N) 20 | #define IDX2(x, y) ((x) + (y) * N) 21 | 22 | int main(int argc, char *argv[]) { 23 | PSInit(&argc, &argv, 3, N, N, N); 24 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 25 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, 1); 26 | 27 | size_t nelms = N*N*N; 28 | int i, j, k; 29 | 30 | float *indata = (float *)malloc(sizeof(float) * nelms); 31 | 32 | for (i = 0; i < N*N; i++) { 33 | indata[i] = i; 34 | } 35 | PSGridCopyin(g2, indata); 36 | 37 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 38 | PSStencilRun(PSStencilMap(kernel, d, g1, g2), 1); 39 | 40 | float *outdata = (float *)malloc(sizeof(float) * nelms); 41 | PSGridCopyout(g1, outdata); 42 | 43 | for (k = 0; k < N; ++k) { 44 | for (j = 0; j < N; ++j) { 45 | for (i = 0; i < N; ++i) { 46 | if (indata[IDX2(i, j)] != outdata[IDX3(i, j, k)]) { 47 | printf("Error: mismatch at %d,%d,%d, in: %f, out: %f\n", 48 | i, j, k, indata[IDX2(i, j)], outdata[IDX3(i, j, k)]); 49 | exit(1); 50 | } 51 | } 52 | } 53 | } 54 | 55 | PSGridFree(g1); 56 | PSGridFree(g2); 57 | PSFinalize(); 58 | 59 | free(indata); 60 | free(outdata); 61 | return 0; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_10.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Reflecting access 3 | * DIM: 3 4 | * PRIORITY: 10 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 8 11 | 12 | void kernel(const int x, const int y, const int z, 13 | PSGrid3DFloat g1, PSGrid3DFloat g2) { 14 | float v = PSGridGet(g1, N - x -1, y, z); 15 | PSGridEmit(g2, v); 16 | return; 17 | } 18 | 19 | #define IDX3(x, y, z) ((x) + (y) * N + (z) * N * N) 20 | #define IDX2(x, y) ((x) + (y) * N) 21 | 22 | int main(int argc, char *argv[]) { 23 | PSInit(&argc, &argv, 3, N, N, N); 24 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 25 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 26 | 27 | size_t nelms = N*N*N; 28 | int i, j, k; 29 | 30 | float *indata = (float *)malloc(sizeof(float) * nelms); 31 | 32 | for (i = 0; i < N*N*N; i++) { 33 | indata[i] = i; 34 | } 35 | PSGridCopyin(g1, indata); 36 | 37 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 38 | PSStencilRun(PSStencilMap(kernel, d, g1, g2), 1); 39 | 40 | float *outdata = (float *)malloc(sizeof(float) * nelms); 41 | PSGridCopyout(g2, outdata); 42 | 43 | for (i = 0; i < N; ++i) { 44 | for (j = 0; j < N; ++j) { 45 | for (k = 0; k < N; ++k) { 46 | if (indata[IDX3(N-i-1, j, k)] != outdata[IDX3(i, j, k)]) { 47 | printf("Error: mismatch at %d,%d,%d, in: %f, out: %f\n", 48 | i, j, k, indata[IDX3(i, j, k)], outdata[IDX3(i, j, k)]); 49 | exit(1); 50 | } 51 | } 52 | } 53 | } 54 | 55 | PSGridFree(g1); 56 | PSGridFree(g2); 57 | PSFinalize(); 58 | 59 | free(indata); 60 | free(outdata); 61 | return 0; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_15.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | for (z = 0; z < N; ++z) { 12 | for (y = 0; y < N; ++y) { 13 | for (x = 0; x < N; ++x) { 14 | float c = g1[OFFSET(x, y, z)]; 15 | float l = 0.0f; 16 | if (x > 0) { 17 | l = g1[OFFSET(x-1, y, z)]; 18 | } 19 | if (x > 0) { 20 | l += g1[OFFSET(x-1, y, z)]; 21 | } else { 22 | l += g1[OFFSET(x, y, z)]; 23 | } 24 | if (x > 0) { 25 | l += g1[OFFSET(x-1, y, z)]; 26 | } else { 27 | l += c; 28 | } 29 | if (x > 0 && x < N-1) { 30 | l += g1[OFFSET(x-1, y, z)] + g1[OFFSET(x+1, y, z)]; 31 | } else { 32 | l += g1[OFFSET(x, y, z)]; 33 | } 34 | if (x % 2 == 0) { 35 | l += g1[OFFSET(x, y, z)]; 36 | } 37 | g2[OFFSET(x, y, z)] = c + l; 38 | } 39 | } 40 | } 41 | return; 42 | } 43 | 44 | void dump(float *input) { 45 | int i; 46 | for (i = 0; i < N*N*N; ++i) { 47 | printf("%f\n", input[i]); 48 | } 49 | } 50 | 51 | int main(int argc, char *argv[]) { 52 | REAL *g1, *g2; 53 | size_t nelms = N*N*N; 54 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 55 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 56 | 57 | int i; 58 | for (i = 0; i < (int)nelms; i++) { 59 | g1[i] = i; 60 | g2[i] = i; 61 | } 62 | 63 | kernel(g1, g2); 64 | dump(g2); 65 | 66 | free(g1); 67 | free(g2); 68 | return 0; 69 | } 70 | 71 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_16.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | #define PSGridGet(g, x, y, z) ((g)[OFFSET(x, y, z)]) 9 | 10 | void kernel(float *g1, float *g2) { 11 | int x, y, z; 12 | int halo_width = 1; 13 | for (z = 0; z < N; ++z) { 14 | for (y = 0; y < N; ++y) { 15 | for (x = 0; x < N; ++x) { 16 | float c, w, e, n, s, b, t; 17 | c = PSGridGet(g1, x, y, z); 18 | if (x == 0) w = c; else w = PSGridGet(g1, x-1, y, z); 19 | if (x == N-1) e = c ; else e = PSGridGet(g1, x+1, y, z); 20 | if (y == 0) n = c ; else n=PSGridGet(g1, x, y-1, z); 21 | if (y == N-1) s= c ; else s=PSGridGet(g1, x, y+1, z); 22 | if (z == 0) b= c ; else b=PSGridGet(g1, x, y, z-1); 23 | if (z == N-1) t= c ; else t=PSGridGet(g1, x, y, z+1); 24 | g2[OFFSET(x, y, z)] = c + w + e + s + n + b + t; 25 | } 26 | } 27 | } 28 | return; 29 | } 30 | 31 | void dump(float *input) { 32 | int i; 33 | for (i = 0; i < N*N*N; ++i) { 34 | printf("%f\n", input[i]); 35 | } 36 | } 37 | 38 | int main(int argc, char *argv[]) { 39 | REAL *g1, *g2; 40 | size_t nelms = N*N*N; 41 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 42 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 43 | 44 | int i; 45 | for (i = 0; i < (int)nelms; i++) { 46 | g1[i] = i; 47 | g2[i] = i; 48 | } 49 | 50 | kernel(g1, g2); 51 | dump(g2); 52 | 53 | free(g1); 54 | free(g2); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_3-pt-1d.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 3-point stencil with 1-D grids 3 | * DIM: 1 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 1024 11 | 12 | static void kernel(const int x, PSGrid1DFloat g, PSGrid1DFloat g2) { 13 | float v = 14 | PSGridGet(g,x-1) + 15 | PSGridGet(g,x) + 16 | PSGridGet(g,x+1); 17 | PSGridEmit(g2, v); 18 | return; 19 | } 20 | 21 | void dump(float *input) { 22 | int i; 23 | for (i = 0; i < N; ++i) { 24 | printf("%f\n", input[i]); 25 | } 26 | } 27 | 28 | int main(int argc, char *argv[]) { 29 | PSInit(&argc, &argv, 1, N); 30 | PSGrid1DFloat g1 = PSGrid1DFloatNew(N); 31 | PSGrid1DFloat g2 = PSGrid1DFloatNew(N); 32 | 33 | PSDomain1D d = PSDomain1DNew(1, N-1); 34 | size_t nelms = N; 35 | 36 | float *indata = (float *)malloc(sizeof(float) * nelms); 37 | int i; 38 | for (i = 0; i < nelms; i++) { 39 | indata[i] = i; 40 | } 41 | float *outdata = (float *)malloc(sizeof(float) * nelms); 42 | 43 | PSGridCopyin(g1, indata); 44 | PSGridCopyin(g2, indata); 45 | 46 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 47 | 48 | PSGridCopyout(g2, outdata); 49 | dump(outdata); 50 | 51 | PSGridFree(g1); 52 | PSGridFree(g2); 53 | PSFinalize(); 54 | free(indata); 55 | free(outdata); 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_3-pt-1d.manual.cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda.h" 3 | #include "cuda_runtime.h" 4 | 5 | #define N 1024 6 | #define REAL float 7 | 8 | #define OFFSET1D(x) (x) 9 | #define OFFSET3D(x, y, z) ((x) + (y) * N + (z) * N * N) 10 | 11 | __global__ void kernel(REAL *g1, REAL *g2) { 12 | int x = threadIdx.x + blockIdx.x * blockDim.x; 13 | 14 | if (x == 0 || x == N-1) return; 15 | 16 | float v = g1[OFFSET1D(x-1)] + g1[OFFSET1D(x)] + 17 | g1[OFFSET1D(x+1)]; 18 | g2[OFFSET1D(x)] = v; 19 | return; 20 | } 21 | 22 | void dump(float *input) { 23 | int i; 24 | for (i = 0; i < N; ++i) { 25 | printf("%f\n", input[i]); 26 | } 27 | } 28 | 29 | int main(int argc, char *argv[]) { 30 | REAL *g1, *g1d; 31 | REAL *g2d; 32 | size_t nelms = N; 33 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 34 | cudaMalloc((void**)&g1d, sizeof(REAL) * nelms); 35 | cudaMalloc((void**)&g2d, sizeof(REAL) * nelms); 36 | 37 | int i; 38 | for (i = 0; i < (int)nelms; i++) { 39 | g1[i] = i; 40 | } 41 | 42 | cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 43 | cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 44 | 45 | dim3 block_dim(4); 46 | dim3 grid_dim(N/block_dim.x); 47 | 48 | kernel<<>>(g1d, g2d); 49 | cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost); 50 | 51 | dump(g1); 52 | 53 | cudaDeviceReset(); 54 | return 0; 55 | } 56 | 57 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_3-pt-1d.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 1024 5 | #define REAL float 6 | 7 | #define OFFSET(x) (x) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x; 11 | for (x = 1; x < N-1; ++x) { 12 | float v = g1[x-1] + g1[x] + g1[x+1]; 13 | g2[x] = v; 14 | } 15 | return; 16 | } 17 | 18 | void dump(float *input) { 19 | int i; 20 | for (i = 0; i < N; ++i) { 21 | printf("%f\n", input[i]); 22 | } 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | REAL *g1, *g2; 27 | size_t nelms = N; 28 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 29 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 30 | 31 | int i; 32 | for (i = 0; i < (int)nelms; i++) { 33 | g1[i] = i; 34 | g2[i] = i; 35 | } 36 | 37 | kernel(g1, g2); 38 | dump(g2); 39 | 40 | free(g1); 41 | free(g2); 42 | return 0; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_3-pt-periodic.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 3-point periodic-boundary stencil 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2) { 14 | float v = 15 | PSGridGetPeriodic(g,x,y,z) + 16 | PSGridGetPeriodic(g,x,y,z+1) + 17 | PSGridGetPeriodic(g,x,y,z-1); 18 | PSGridEmit(g2, v); 19 | return; 20 | } 21 | 22 | void dump(float *input) { 23 | int i; 24 | for (i = 0; i < N*N*N; ++i) { 25 | printf("%f\n", input[i]); 26 | } 27 | } 28 | 29 | int main(int argc, char *argv[]) { 30 | PSInit(&argc, &argv, 3, N, N, N); 31 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 32 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 33 | 34 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 35 | size_t nelms = N*N*N; 36 | 37 | float *indata = (float *)malloc(sizeof(float) * nelms); 38 | int i; 39 | for (i = 0; i < nelms; i++) { 40 | indata[i] = i; 41 | } 42 | float *outdata = (float *)malloc(sizeof(float) * nelms); 43 | 44 | PSGridCopyin(g1, indata); 45 | PSGridCopyin(g2, indata); 46 | 47 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 48 | 49 | PSGridCopyout(g2, outdata); 50 | dump(outdata); 51 | 52 | PSGridFree(g1); 53 | PSGridFree(g2); 54 | PSFinalize(); 55 | free(indata); 56 | free(outdata); 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_3-pt-periodic.manual.cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda.h" 3 | #include "cuda_runtime.h" 4 | 5 | #define N 32 6 | #define REAL float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | __global__ void kernel(REAL *g1, REAL *g2) { 11 | int x = threadIdx.x + blockIdx.x * blockDim.x; 12 | int y = threadIdx.y + blockIdx.y * blockDim.y; 13 | int z = threadIdx.z + blockIdx.z * blockDim.z; 14 | 15 | int zp = ((z - 1) + N) % N; 16 | int zn = (z + 1) % N; 17 | 18 | float v = 19 | g1[OFFSET(x, y, z)] + 20 | g1[OFFSET(x, y, zn)] + 21 | g1[OFFSET(x, y, zp)]; 22 | g2[OFFSET(x, y, z)] = v; 23 | return; 24 | } 25 | 26 | void dump(float *input) { 27 | int i; 28 | for (i = 0; i < N*N*N; ++i) { 29 | printf("%f\n", input[i]); 30 | } 31 | } 32 | 33 | int main(int argc, char *argv[]) { 34 | REAL *g1, *g1d; 35 | REAL *g2d; 36 | size_t nelms = N*N*N; 37 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | cudaMalloc((void**)&g1d, sizeof(REAL) * nelms); 39 | cudaMalloc((void**)&g2d, sizeof(REAL) * nelms); 40 | 41 | int i; 42 | for (i = 0; i < (int)nelms; i++) { 43 | g1[i] = i; 44 | } 45 | 46 | cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 47 | cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 48 | 49 | dim3 block_dim(4, 4, 4); 50 | dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z); 51 | 52 | kernel<<>>(g1d, g2d); 53 | cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost); 54 | 55 | dump(g1); 56 | 57 | cudaDeviceReset(); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_3-pt-periodic.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | for (z = 0; z < N; ++z) { 12 | int zp = ((z - 1) + N) % N; 13 | int zn = (z + 1) % N; 14 | for (y = 0; y < N; ++y) { 15 | int yp = ((y - 1) + N) % N; 16 | int yn = (y + 1) % N; 17 | for (x = 0; x < N; ++x) { 18 | int xp = ((x - 1) + N) % N; 19 | int xn = (x + 1) % N; 20 | float v = 21 | g1[OFFSET(x, y, z)] + 22 | g1[OFFSET(x, y, zn)] + 23 | g1[OFFSET(x, y, zp)]; 24 | g2[OFFSET(x, y, z)] = v; 25 | } 26 | } 27 | } 28 | return; 29 | } 30 | 31 | void dump(float *input) { 32 | int i; 33 | for (i = 0; i < N*N*N; ++i) { 34 | printf("%f\n", input[i]); 35 | } 36 | } 37 | 38 | int main(int argc, char *argv[]) { 39 | REAL *g1, *g2; 40 | size_t nelms = N*N*N; 41 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 42 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 43 | 44 | int i; 45 | for (i = 0; i < (int)nelms; i++) { 46 | g1[i] = i; 47 | g2[i] = i; 48 | } 49 | 50 | kernel(g1, g2); 51 | dump(g2); 52 | 53 | free(g1); 54 | free(g2); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_5-pt-2d.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 5-point stencil with 2-D grids 3 | * DIM: 2 4 | * PRIORITY: 10 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | static void kernel(const int x, const int y, PSGrid2DFloat g, PSGrid2DFloat g2) { 13 | float v = 14 | PSGridGet(g,x, y) + 15 | PSGridGet(g,x-1, y) + 16 | PSGridGet(g,x+1, y) + 17 | PSGridGet(g,x, y-1) + 18 | PSGridGet(g,x, y+1); 19 | PSGridEmit(g2, v); 20 | return; 21 | } 22 | 23 | void dump(float *input) { 24 | int i; 25 | for (i = 0; i < N*N; ++i) { 26 | printf("%f\n", input[i]); 27 | } 28 | } 29 | 30 | int main(int argc, char *argv[]) { 31 | PSInit(&argc, &argv, 2, N, N); 32 | PSGrid2DFloat g1 = PSGrid2DFloatNew(N, N); 33 | PSGrid2DFloat g2 = PSGrid2DFloatNew(N, N); 34 | 35 | PSDomain2D d = PSDomain2DNew(1, N-1, 1, N-1); 36 | size_t nelms = N * N; 37 | 38 | float *indata = (float *)malloc(sizeof(float) * nelms); 39 | int i; 40 | for (i = 0; i < nelms; i++) { 41 | indata[i] = i; 42 | } 43 | float *outdata = (float *)malloc(sizeof(float) * nelms); 44 | 45 | PSGridCopyin(g1, indata); 46 | PSGridCopyin(g2, indata); 47 | 48 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 49 | 50 | PSGridCopyout(g2, outdata); 51 | dump(outdata); 52 | 53 | PSGridFree(g1); 54 | PSGridFree(g2); 55 | PSFinalize(); 56 | free(indata); 57 | free(outdata); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_5-pt-2d.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y) ((x) + (y) * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y; 11 | for (y = 1; y < N-1; ++y) { 12 | for (x = 1; x < N-1; ++x) { 13 | float v = g1[OFFSET(x, y)] + 14 | g1[OFFSET(x-1, y)] + 15 | g1[OFFSET(x+1, y)] + 16 | g1[OFFSET(x, y-1)] + 17 | g1[OFFSET(x, y+1)]; 18 | g2[OFFSET(x, y)] = v; 19 | } 20 | } 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | int main(int argc, char *argv[]) { 32 | REAL *g1, *g2; 33 | size_t nelms = N*N; 34 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 35 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 36 | 37 | int i; 38 | for (i = 0; i < (int)nelms; i++) { 39 | g1[i] = i; 40 | g2[i] = i; 41 | } 42 | 43 | kernel(g1, g2); 44 | dump(g2); 45 | 46 | free(g1); 47 | free(g2); 48 | return 0; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_5-pt-periodic.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 7-point periodic-boundary stencil 3 | * DIM: 2 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, PSGrid2DFloat g, 13 | PSGrid2DFloat g2) { 14 | float v = 15 | PSGridGetPeriodic(g,x,y) + 16 | PSGridGetPeriodic(g,x+1,y) + 17 | PSGridGetPeriodic(g,x-1,y) + 18 | PSGridGetPeriodic(g,x,y+1) + 19 | PSGridGetPeriodic(g,x,y-1); 20 | PSGridEmit(g2, v); 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | int main(int argc, char *argv[]) { 32 | PSInit(&argc, &argv, 2, N, N); 33 | PSGrid2DFloat g1 = PSGrid2DFloatNew(N, N); 34 | PSGrid2DFloat g2 = PSGrid2DFloatNew(N, N); 35 | 36 | PSDomain2D d = PSDomain2DNew(0, N, 0, N); 37 | size_t nelms = N*N; 38 | 39 | float *indata = (float *)malloc(sizeof(float) * nelms); 40 | int i; 41 | for (i = 0; i < nelms; i++) { 42 | indata[i] = i; 43 | } 44 | float *outdata = (float *)malloc(sizeof(float) * nelms); 45 | 46 | PSGridCopyin(g1, indata); 47 | PSGridCopyin(g2, indata); 48 | 49 | PSStencilRun(PSStencilMap(kernel, d, g1, g2), 50 | PSStencilMap(kernel, d, g2, g1)); 51 | 52 | PSGridCopyout(g1, outdata); 53 | dump(outdata); 54 | 55 | PSGridFree(g1); 56 | PSGridFree(g2); 57 | PSFinalize(); 58 | free(indata); 59 | free(outdata); 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_5-pt-periodic.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y) ((x) + (y) * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y; 11 | for (y = 0; y < N; ++y) { 12 | int yp = ((y - 1) + N) % N; 13 | int yn = (y + 1) % N; 14 | for (x = 0; x < N; ++x) { 15 | int xp = ((x - 1) + N) % N; 16 | int xn = (x + 1) % N; 17 | float v = 18 | g1[OFFSET(x, y)] + 19 | g1[OFFSET(xn, y)] + 20 | g1[OFFSET(xp, y)] + 21 | g1[OFFSET(x, yn)] + 22 | g1[OFFSET(x, yp)]; 23 | g2[OFFSET(x, y)] = v; 24 | } 25 | } 26 | return; 27 | } 28 | 29 | void dump(float *input) { 30 | int i; 31 | for (i = 0; i < N*N; ++i) { 32 | printf("%f\n", input[i]); 33 | } 34 | } 35 | 36 | int main(int argc, char *argv[]) { 37 | REAL *g1, *g2; 38 | size_t nelms = N*N; 39 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 40 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 41 | 42 | int i; 43 | for (i = 0; i < (int)nelms; i++) { 44 | g1[i] = i; 45 | g2[i] = i; 46 | } 47 | 48 | kernel(g1, g2); 49 | kernel(g2, g1); 50 | 51 | dump(g1); 52 | 53 | free(g1); 54 | free(g2); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-double-type.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | 6 | #define T double 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | void kernel(T *g1, T *g2) { 11 | int x, y, z; 12 | int halo_width = 1; 13 | for (z = halo_width; z < N-halo_width; ++z) { 14 | for (y = halo_width; y < N-halo_width; ++y) { 15 | for (x = halo_width; x < N-halo_width; ++x) { 16 | T v = g1[OFFSET(x, y, z)] + 17 | g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] + 18 | g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] + 19 | g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | } 22 | } 23 | } 24 | return; 25 | } 26 | 27 | void dump(T *input) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | printf("%f\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | T *g1, *g2; 36 | size_t nelms = N*N*N; 37 | g1 = (T *)malloc(sizeof(T) * nelms); 38 | g2 = (T *)malloc(sizeof(T) * nelms); 39 | 40 | int i; 41 | for (i = 0; i < (int)nelms; i++) { 42 | g1[i] = i; 43 | g2[i] = i; 44 | } 45 | 46 | kernel(g1, g2); 47 | dump(g2); 48 | 49 | free(g1); 50 | free(g2); 51 | return 0; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-int-type.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | 6 | #define T int 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | void kernel(T *g1, T *g2) { 11 | int x, y, z; 12 | int halo_width = 1; 13 | for (z = halo_width; z < N-halo_width; ++z) { 14 | for (y = halo_width; y < N-halo_width; ++y) { 15 | for (x = halo_width; x < N-halo_width; ++x) { 16 | T v = g1[OFFSET(x, y, z)] + 17 | g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] + 18 | g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] + 19 | g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | } 22 | } 23 | } 24 | return; 25 | } 26 | 27 | void dump(T *input) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | printf("%d\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | T *g1, *g2; 36 | size_t nelms = N*N*N; 37 | g1 = (T *)malloc(sizeof(T) * nelms); 38 | g2 = (T *)malloc(sizeof(T) * nelms); 39 | 40 | int i; 41 | for (i = 0; i < (int)nelms; i++) { 42 | g1[i] = i; 43 | g2[i] = i; 44 | } 45 | 46 | kernel(g1, g2); 47 | dump(g2); 48 | 49 | free(g1); 50 | free(g2); 51 | return 0; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-multi-iterations.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define ITER 5 6 | #define REAL float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | void kernel(float *g1, float *g2) { 11 | int x, y, z; 12 | int halo_width = 1; 13 | for (z = halo_width; z < N-halo_width; ++z) { 14 | for (y = halo_width; y < N-halo_width; ++y) { 15 | for (x = halo_width; x < N-halo_width; ++x) { 16 | float v = g1[OFFSET(x, y, z)] + 17 | g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] + 18 | g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] + 19 | g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | } 22 | } 23 | } 24 | return; 25 | } 26 | 27 | void dump(float *input) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | printf("%f\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | REAL *g1, *g2; 36 | size_t nelms = N*N*N; 37 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 39 | 40 | int i; 41 | for (i = 0; i < (int)nelms; i++) { 42 | g1[i] = i; 43 | g2[i] = i; 44 | } 45 | 46 | for (i = 0; i < ITER; ++i) { 47 | kernel(g1, g2); 48 | kernel(g2, g1); 49 | } 50 | 51 | dump(g1); 52 | 53 | free(g1); 54 | free(g2); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-neumann-cond.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 8 5 | #define ITER 1 6 | #define REAL float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | void kernel(REAL *g1, REAL *g2, 11 | int nx, int ny, int nz) { 12 | int z; 13 | for (z = 0; z < nz; z++) { 14 | int y; 15 | for (y = 0; y < ny; y++) { 16 | int x; 17 | for (x = 0; x < nx; x++) { 18 | int c, w, e, n, s, b, t; 19 | c = x + y * nx + z * nx * ny; 20 | w = (x == 0) ? c : c - 1; 21 | e = (x == nx-1) ? c : c + 1; 22 | n = (y == 0) ? c : c - ny; 23 | s = (y == ny-1) ? c : c + ny; 24 | b = (z == 0) ? c : c - nx * ny; 25 | t = (z == nz-1) ? c : c + nx * ny; 26 | g2[c] = g1[c] + g1[w] + g1[e] 27 | + g1[s] + g1[n] + g1[b] + g1[t]; 28 | } 29 | } 30 | } 31 | return; 32 | } 33 | 34 | void dump(float *input) { 35 | int i; 36 | for (i = 0; i < N*N*N; ++i) { 37 | printf("%f\n", input[i]); 38 | } 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | REAL *g1, *g2; 43 | size_t nelms = N*N*N; 44 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 45 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 46 | 47 | int i; 48 | for (i = 0; i < nelms; i++) { 49 | g1[i] = i; 50 | } 51 | 52 | int nx = N, ny = N, nz = N; 53 | 54 | for (i = 0; i < ITER; ++i) { 55 | kernel(g1, g2, nx, ny, nz); 56 | REAL *t = g1; 57 | g1 = g2; 58 | g2 = t; 59 | } 60 | dump(g1); 61 | 62 | free(g1); 63 | free(g2); 64 | return 0; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-periodic.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 7-point periodic-boundary stencil 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2) { 14 | float v = 15 | PSGridGetPeriodic(g,x,y,z) + 16 | PSGridGetPeriodic(g,x+1,y,z) + 17 | PSGridGetPeriodic(g,x-1,y,z) + 18 | PSGridGetPeriodic(g,x,y+1,z) + 19 | PSGridGetPeriodic(g,x,y-1,z) + 20 | PSGridGetPeriodic(g,x,y,z+1) + 21 | PSGridGetPeriodic(g,x,y,z-1); 22 | PSGridEmit(g2, v); 23 | return; 24 | } 25 | 26 | void dump(float *input) { 27 | int i; 28 | for (i = 0; i < N*N*N; ++i) { 29 | printf("%f\n", input[i]); 30 | } 31 | } 32 | 33 | int main(int argc, char *argv[]) { 34 | PSInit(&argc, &argv, 3, N, N, N); 35 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 36 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 37 | 38 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 39 | size_t nelms = N*N*N; 40 | 41 | float *indata = (float *)malloc(sizeof(float) * nelms); 42 | int i; 43 | for (i = 0; i < nelms; i++) { 44 | indata[i] = i; 45 | } 46 | float *outdata = (float *)malloc(sizeof(float) * nelms); 47 | 48 | PSGridCopyin(g1, indata); 49 | PSGridCopyin(g2, indata); 50 | 51 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 52 | 53 | PSGridCopyout(g2, outdata); 54 | dump(outdata); 55 | 56 | PSGridFree(g1); 57 | PSGridFree(g2); 58 | PSFinalize(); 59 | free(indata); 60 | free(outdata); 61 | return 0; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-periodic.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | for (z = 0; z < N; ++z) { 12 | int zp = ((z - 1) + N) % N; 13 | int zn = (z + 1) % N; 14 | for (y = 0; y < N; ++y) { 15 | int yp = ((y - 1) + N) % N; 16 | int yn = (y + 1) % N; 17 | for (x = 0; x < N; ++x) { 18 | int xp = ((x - 1) + N) % N; 19 | int xn = (x + 1) % N; 20 | float v = 21 | g1[OFFSET(x, y, z)] + 22 | g1[OFFSET(xn, y, z)] + 23 | g1[OFFSET(xp, y, z)] + 24 | g1[OFFSET(x, yn, z)] + 25 | g1[OFFSET(x, yp, z)] + 26 | g1[OFFSET(x, y, zn)] + 27 | g1[OFFSET(x, y, zp)]; 28 | g2[OFFSET(x, y, z)] = v; 29 | } 30 | } 31 | } 32 | return; 33 | } 34 | 35 | void dump(float *input) { 36 | int i; 37 | for (i = 0; i < N*N*N; ++i) { 38 | printf("%f\n", input[i]); 39 | } 40 | } 41 | 42 | int main(int argc, char *argv[]) { 43 | REAL *g1, *g2; 44 | size_t nelms = N*N*N; 45 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 46 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 47 | 48 | int i; 49 | for (i = 0; i < (int)nelms; i++) { 50 | g1[i] = i; 51 | g2[i] = i; 52 | } 53 | 54 | kernel(g1, g2); 55 | dump(g2); 56 | 57 | free(g1); 58 | free(g2); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt-type-mix.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | 6 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 7 | 8 | void kernel(float *g1, double *g2, int *c) { 9 | int x, y, z; 10 | int halo_width = 1; 11 | for (z = halo_width; z < N-halo_width; ++z) { 12 | for (y = halo_width; y < N-halo_width; ++y) { 13 | for (x = halo_width; x < N-halo_width; ++x) { 14 | double v = (double)g1[OFFSET(x, y, z)] + 15 | (double)g1[OFFSET(x+1, y, z)] + 16 | (double)g1[OFFSET(x-1, y, z)] + 17 | (double)g1[OFFSET(x, y+1, z)] + 18 | (double)g1[OFFSET(x, y-1, z)] + 19 | (double)g1[OFFSET(x, y, z-1)] + 20 | (double)g1[OFFSET(x, y, z+1)]; 21 | g2[OFFSET(x, y, z)] = v * c[OFFSET(x, y, z)]; 22 | } 23 | } 24 | } 25 | return; 26 | } 27 | 28 | void dump(double *input) { 29 | int i; 30 | for (i = 0; i < N*N*N; ++i) { 31 | printf("%f\n", input[i]); 32 | } 33 | } 34 | 35 | int main(int argc, char *argv[]) { 36 | float *g1; 37 | double *g2; 38 | int *c; 39 | size_t nelms = N*N*N; 40 | g1 = (float *)malloc(sizeof(float) * nelms); 41 | g2 = (double *)malloc(sizeof(double) * nelms); 42 | c = (int *)malloc(sizeof(int) * nelms); 43 | 44 | int i; 45 | for (i = 0; i < (int)nelms; i++) { 46 | g1[i] = i; 47 | g2[i] = 0; 48 | c[i] = i % 10; 49 | } 50 | 51 | kernel(g1, g2, c); 52 | dump(g2); 53 | 54 | free(g1); 55 | free(g2); 56 | free(c); 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | int halo_width = 1; 12 | for (z = halo_width; z < N-halo_width; ++z) { 13 | for (y = halo_width; y < N-halo_width; ++y) { 14 | for (x = halo_width; x < N-halo_width; ++x) { 15 | float v = g1[OFFSET(x, y, z)] + 16 | g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] + 17 | g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] + 18 | g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)]; 19 | g2[OFFSET(x, y, z)] = v; 20 | } 21 | } 22 | } 23 | return; 24 | } 25 | 26 | void dump(float *input) { 27 | int i; 28 | for (i = 0; i < N*N*N; ++i) { 29 | printf("%f\n", input[i]); 30 | } 31 | } 32 | 33 | int main(int argc, char *argv[]) { 34 | REAL *g1, *g2; 35 | size_t nelms = N*N*N; 36 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 37 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | 39 | int i; 40 | for (i = 0; i < (int)nelms; i++) { 41 | g1[i] = i; 42 | g2[i] = i; 43 | } 44 | 45 | kernel(g1, g2); 46 | dump(g2); 47 | 48 | free(g1); 49 | free(g2); 50 | return 0; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt.module.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Module test 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | static void kernel(const int x, const int y, const int z, PSGrid3DFloat g1, 11 | PSGrid3DFloat g2) { 12 | float v = PSGridGet(g1, x, y, z) + 13 | PSGridGet(g1, x+1, y, z) + PSGridGet(g1, x-1, y, z) + 14 | PSGridGet(g1, x, y+1, z) + PSGridGet(g1, x, y-1, z) + 15 | PSGridGet(g1, x, y, z-1) + PSGridGet(g1, x, y, z+1); 16 | PSGridEmit(g2, v); 17 | return; 18 | } 19 | 20 | #define halo_width (1) 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | int run(PSGrid3DFloat g1, PSGrid3DFloat g2, int n) { 27 | PSDomain3D d = PSDomain3DNew(0+halo_width, n-halo_width, 28 | 0+halo_width, n-halo_width, 29 | 0+halo_width, n-halo_width); 30 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 31 | return 0; 32 | } 33 | 34 | PSGrid3DFloat create_grid(int n) { 35 | PSGrid3DFloat g = PSGrid3DFloatNew(n, n, n); 36 | return g; 37 | } 38 | 39 | void copyin(PSGrid3DFloat g, float *d) { 40 | PSGridCopyin(g, d); 41 | } 42 | 43 | void copyout(PSGrid3DFloat g, float *d) { 44 | PSGridCopyout(g, d); 45 | } 46 | 47 | int test_module_init(int argc, char *argv[], int n) { 48 | PSInit(&argc, &argv, 3, n, n, n); 49 | return 0; 50 | } 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | 56 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_7-pt.module_base.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 7-point stencil 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include 9 | //#include "physis/physis.h" 10 | 11 | #define N 32 12 | 13 | typedef void * PSGrid3DFloat; 14 | extern int test_module_init(int, char *[]); 15 | extern PSGrid3DFloat create_grid(int); 16 | extern void copyin(PSGrid3DFloat, float*); 17 | extern void copyout(PSGrid3DFloat, float*); 18 | extern void run(PSGrid3DFloat, PSGrid3DFloat, int); 19 | 20 | void dump(float *input) { 21 | int i; 22 | for (i = 0; i < N*N*N; ++i) { 23 | printf("%f\n", input[i]); 24 | } 25 | } 26 | 27 | int main(int argc, char *argv[]) { 28 | test_module_init(argc, argv); 29 | size_t nelms = N*N*N; 30 | 31 | float *indata = (float *)malloc(sizeof(float) * nelms); 32 | int i; 33 | for (i = 0; i < nelms; i++) { 34 | indata[i] = i; 35 | } 36 | float *outdata = (float *)malloc(sizeof(float) * nelms); 37 | 38 | PSGrid3DFloat g1 = create_grid(N); 39 | PSGrid3DFloat g2 = create_grid(N); 40 | 41 | copyin(g1, indata); 42 | copyin(g2, indata); 43 | run(g1, g2, N); 44 | copyout(g2, outdata); 45 | 46 | dump(outdata); 47 | 48 | free(indata); 49 | free(outdata); 50 | return 0; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_9-pt-2d.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 9-point stencil with 2-D grids 3 | * DIM: 2 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | static void kernel(const int x, const int y, PSGrid2DFloat g, PSGrid2DFloat g2) { 13 | float v = 14 | PSGridGet(g,x, y) + 15 | PSGridGet(g,x-1, y) + 16 | PSGridGet(g,x+1, y) + 17 | PSGridGet(g,x, y-1) + 18 | PSGridGet(g,x, y+1) + 19 | PSGridGet(g,x-1, y-1) + 20 | PSGridGet(g,x+1, y-1) + 21 | PSGridGet(g,x-1, y+1) + 22 | PSGridGet(g,x+1, y+1); 23 | PSGridEmit(g2, v); 24 | return; 25 | } 26 | 27 | void dump(float *input) { 28 | int i; 29 | for (i = 0; i < N*N; ++i) { 30 | printf("%f\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | PSInit(&argc, &argv, 2, N, N); 36 | PSGrid2DFloat g1 = PSGrid2DFloatNew(N, N); 37 | PSGrid2DFloat g2 = PSGrid2DFloatNew(N, N); 38 | 39 | PSDomain2D d = PSDomain2DNew(1, N-1, 1, N-1); 40 | size_t nelms = N * N; 41 | 42 | float *indata = (float *)malloc(sizeof(float) * nelms); 43 | int i; 44 | for (i = 0; i < nelms; i++) { 45 | indata[i] = i; 46 | } 47 | float *outdata = (float *)malloc(sizeof(float) * nelms); 48 | 49 | PSGridCopyin(g1, indata); 50 | PSGridCopyin(g2, indata); 51 | 52 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 53 | 54 | PSGridCopyout(g2, outdata); 55 | dump(outdata); 56 | 57 | PSGridFree(g1); 58 | PSGridFree(g2); 59 | PSFinalize(); 60 | free(indata); 61 | free(outdata); 62 | return 0; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_9-pt-2d.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y) ((x) + (y) * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y; 11 | for (y = 1; y < N-1; ++y) { 12 | for (x = 1; x < N-1; ++x) { 13 | float v = g1[OFFSET(x, y)] + 14 | g1[OFFSET(x-1, y)] + 15 | g1[OFFSET(x+1, y)] + 16 | g1[OFFSET(x, y-1)] + 17 | g1[OFFSET(x, y+1)] + 18 | g1[OFFSET(x-1, y-1)] + 19 | g1[OFFSET(x+1, y-1)] + 20 | g1[OFFSET(x-1, y+1)] + 21 | g1[OFFSET(x+1, y+1)]; 22 | g2[OFFSET(x, y)] = v; 23 | } 24 | } 25 | return; 26 | } 27 | 28 | void dump(float *input) { 29 | int i; 30 | for (i = 0; i < N*N; ++i) { 31 | printf("%f\n", input[i]); 32 | } 33 | } 34 | 35 | int main(int argc, char *argv[]) { 36 | REAL *g1, *g2; 37 | size_t nelms = N*N; 38 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 39 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 40 | 41 | int i; 42 | for (i = 0; i < (int)nelms; i++) { 43 | g1[i] = i; 44 | g2[i] = i; 45 | } 46 | 47 | kernel(g1, g2); 48 | dump(g2); 49 | 50 | free(g1); 51 | free(g2); 52 | return 0; 53 | } 54 | 55 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_9-pt-periodic-reduction.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 9-point periodic stencil with reduction 3 | * DIM: 2 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | #define TYPE int 12 | #define PSGridType PSGrid2DInt 13 | 14 | static void kernel(const int x, const int y, PSGridType g, PSGridType g2) { 15 | float v = 16 | PSGridGetPeriodic(g,x, y) + 17 | PSGridGetPeriodic(g,x-1, y) + 18 | PSGridGetPeriodic(g,x+1, y) + 19 | PSGridGetPeriodic(g,x, y-1) + 20 | PSGridGetPeriodic(g,x, y+1) + 21 | PSGridGetPeriodic(g,x-1, y-1) + 22 | PSGridGetPeriodic(g,x+1, y-1) + 23 | PSGridGetPeriodic(g,x-1, y+1) + 24 | PSGridGetPeriodic(g,x+1, y+1); 25 | PSGridEmit(g2, v); 26 | return; 27 | } 28 | 29 | int main(int argc, char *argv[]) { 30 | PSInit(&argc, &argv, 2, N, N); 31 | PSGridType g1 = PSGrid2DIntNew(N, N); 32 | PSGridType g2 = PSGrid2DIntNew(N, N); 33 | 34 | PSDomain2D d = PSDomain2DNew(0, N, 0, N); 35 | size_t nelms = N * N; 36 | 37 | TYPE *indata = (TYPE *)malloc(sizeof(TYPE) * nelms); 38 | int i; 39 | for (i = 0; i < nelms; i++) { 40 | indata[i] = i; 41 | } 42 | TYPE *outdata = (TYPE *)malloc(sizeof(TYPE) * nelms); 43 | 44 | PSGridCopyin(g1, indata); 45 | PSGridCopyin(g2, indata); 46 | 47 | PSStencilRun(PSStencilMap(kernel, d, g1, g2), 48 | PSStencilMap(kernel, d, g2, g1)); 49 | int v; 50 | PSReduce(&v, PS_SUM, g1); 51 | printf("%d\n", v); 52 | 53 | PSGridFree(g1); 54 | PSGridFree(g2); 55 | PSFinalize(); 56 | free(indata); 57 | free(outdata); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_9-pt-periodic-reduction.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define TYPE int 6 | 7 | #define OFFSET(x, y) ((x) + (y) * N) 8 | 9 | void kernel(TYPE *g1, TYPE *g2) { 10 | int x, y; 11 | for (y = 0; y < N; ++y) { 12 | int yp = ((y - 1) + N) % N; 13 | int yn = ((y + 1) + N) % N; 14 | for (x = 0; x < N; ++x) { 15 | int xp = ((x - 1) + N) % N; 16 | int xn = ((x + 1) + N) % N; 17 | float v = g1[OFFSET(x, y)] + 18 | g1[OFFSET(xp, y)] + 19 | g1[OFFSET(xn, y)] + 20 | g1[OFFSET(x, yp)] + 21 | g1[OFFSET(x, yn)] + 22 | g1[OFFSET(xp, yp)] + 23 | g1[OFFSET(xn, yp)] + 24 | g1[OFFSET(xp, yn)] + 25 | g1[OFFSET(xn, yn)]; 26 | g2[OFFSET(x, y)] = v; 27 | } 28 | } 29 | return; 30 | } 31 | 32 | TYPE reduce(TYPE *g) { 33 | TYPE v = 0; 34 | int i; 35 | for (i = 0; i < N*N; ++i) { 36 | v += g[i]; 37 | } 38 | return v; 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | TYPE *g1, *g2; 43 | size_t nelms = N*N; 44 | g1 = (TYPE *)malloc(sizeof(TYPE) * nelms); 45 | g2 = (TYPE *)malloc(sizeof(TYPE) * nelms); 46 | 47 | int i; 48 | for (i = 0; i < (int)nelms; i++) { 49 | g1[i] = i; 50 | g2[i] = i; 51 | } 52 | 53 | kernel(g1, g2); 54 | kernel(g2, g1); 55 | printf("%d\n", reduce(g1)); 56 | 57 | free(g1); 58 | free(g2); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_9-pt-reduction.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 9-point stencil with reduction 3 | * DIM: 2 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | #define TYPE int 12 | #define PSGridType PSGrid2DInt 13 | 14 | static void kernel(const int x, const int y, PSGridType g, PSGridType g2) { 15 | float v = 16 | PSGridGet(g,x, y) + 17 | PSGridGet(g,x-1, y) + 18 | PSGridGet(g,x+1, y) + 19 | PSGridGet(g,x, y-1) + 20 | PSGridGet(g,x, y+1) + 21 | PSGridGet(g,x-1, y-1) + 22 | PSGridGet(g,x+1, y-1) + 23 | PSGridGet(g,x-1, y+1) + 24 | PSGridGet(g,x+1, y+1); 25 | PSGridEmit(g2, v); 26 | return; 27 | } 28 | 29 | void dump(float *input) { 30 | int i; 31 | for (i = 0; i < N*N; ++i) { 32 | printf("%f\n", input[i]); 33 | } 34 | } 35 | 36 | int main(int argc, char *argv[]) { 37 | PSInit(&argc, &argv, 2, N, N); 38 | PSGridType g1 = PSGrid2DIntNew(N, N); 39 | PSGridType g2 = PSGrid2DIntNew(N, N); 40 | 41 | PSDomain2D d = PSDomain2DNew(1, N-1, 1, N-1); 42 | size_t nelms = N * N; 43 | 44 | TYPE *indata = (TYPE *)malloc(sizeof(TYPE) * nelms); 45 | int i; 46 | for (i = 0; i < nelms; i++) { 47 | indata[i] = i; 48 | } 49 | TYPE *outdata = (TYPE *)malloc(sizeof(TYPE) * nelms); 50 | 51 | PSGridCopyin(g1, indata); 52 | PSGridCopyin(g2, indata); 53 | 54 | PSStencilRun(PSStencilMap(kernel, d, g1, g2), 55 | PSStencilMap(kernel, d, g2, g1)); 56 | int v; 57 | PSReduce(&v, PS_SUM, g1); 58 | printf("%d\n", v); 59 | 60 | PSGridFree(g1); 61 | PSGridFree(g2); 62 | PSFinalize(); 63 | free(indata); 64 | free(outdata); 65 | return 0; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_9-pt-reduction.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define TYPE int 6 | 7 | #define OFFSET(x, y) ((x) + (y) * N) 8 | 9 | void kernel(TYPE *g1, TYPE *g2) { 10 | int x, y; 11 | for (y = 1; y < N-1; ++y) { 12 | for (x = 1; x < N-1; ++x) { 13 | float v = g1[OFFSET(x, y)] + 14 | g1[OFFSET(x-1, y)] + 15 | g1[OFFSET(x+1, y)] + 16 | g1[OFFSET(x, y-1)] + 17 | g1[OFFSET(x, y+1)] + 18 | g1[OFFSET(x-1, y-1)] + 19 | g1[OFFSET(x+1, y-1)] + 20 | g1[OFFSET(x-1, y+1)] + 21 | g1[OFFSET(x+1, y+1)]; 22 | g2[OFFSET(x, y)] = v; 23 | } 24 | } 25 | return; 26 | } 27 | 28 | TYPE reduce(TYPE *g) { 29 | TYPE v = 0; 30 | int i; 31 | for (i = 0; i < N*N; ++i) { 32 | v += g[i]; 33 | } 34 | return v; 35 | } 36 | 37 | int main(int argc, char *argv[]) { 38 | TYPE *g1, *g2; 39 | size_t nelms = N*N; 40 | g1 = (TYPE *)malloc(sizeof(TYPE) * nelms); 41 | g2 = (TYPE *)malloc(sizeof(TYPE) * nelms); 42 | 43 | int i; 44 | for (i = 0; i < (int)nelms; i++) { 45 | g1[i] = i; 46 | g2[i] = i; 47 | } 48 | 49 | kernel(g1, g2); 50 | kernel(g2, g1); 51 | printf("%d\n", reduce(g1)); 52 | 53 | free(g1); 54 | free(g2); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_asymmetric-periodic.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Asymmetric stencil with periodic boundary condition 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2) { 14 | float v = PSGridGet(g, x, y, z) + 15 | PSGridGet(g, x-1, y, z) + PSGridGetPeriodic(g, x+2, y, z); 16 | PSGridEmit(g2, v); 17 | return; 18 | } 19 | 20 | void dump(float *input) { 21 | int i; 22 | for (i = 0; i < N*N*N; ++i) { 23 | printf("%f\n", input[i]); 24 | } 25 | } 26 | 27 | #define halo_width (2) 28 | 29 | int main(int argc, char *argv[]) { 30 | PSInit(&argc, &argv, 3, N, N, N); 31 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 32 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 33 | 34 | PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width, 35 | 0+halo_width, N-halo_width, 36 | 0+halo_width, N-halo_width); 37 | size_t nelms = N*N*N; 38 | 39 | float *indata = (float *)malloc(sizeof(float) * nelms); 40 | int i; 41 | for (i = 0; i < nelms; i++) { 42 | indata[i] = i; 43 | } 44 | float *outdata = (float *)malloc(sizeof(float) * nelms); 45 | 46 | PSGridCopyin(g1, indata); 47 | PSGridCopyin(g2, indata); 48 | 49 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 50 | 51 | PSGridCopyout(g2, outdata); 52 | dump(outdata); 53 | 54 | PSGridFree(g1); 55 | PSGridFree(g2); 56 | PSFinalize(); 57 | free(indata); 58 | free(outdata); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_asymmetric-periodic.manual.cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda.h" 3 | #include "cuda_runtime.h" 4 | 5 | #define N 32 6 | #define REAL float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | __global__ void kernel(REAL *g1, REAL *g2) { 11 | int x = threadIdx.x + blockIdx.x * blockDim.x; 12 | int y = threadIdx.y + blockIdx.y * blockDim.y; 13 | int z = threadIdx.z + blockIdx.z * blockDim.z; 14 | 15 | if (x <= 1 || x >= N-2 || y <= 1 || y >= N-2 || 16 | z <= 1 || z >= N-2) return; 17 | 18 | float v = g1[OFFSET(x, y, z)] + 19 | g1[OFFSET(x-1, y, z)] + g1[OFFSET((x+2+N)%N, y, z)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | #define halo_width (1) 32 | 33 | int main(int argc, char *argv[]) { 34 | REAL *g1, *g1d; 35 | REAL *g2d; 36 | size_t nelms = N*N*N; 37 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | cudaMalloc((void**)&g1d, sizeof(REAL) * nelms); 39 | cudaMalloc((void**)&g2d, sizeof(REAL) * nelms); 40 | 41 | int i; 42 | for (i = 0; i < (int)nelms; i++) { 43 | g1[i] = i; 44 | } 45 | 46 | cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 47 | cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 48 | 49 | dim3 block_dim(4, 4, 4); 50 | dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z); 51 | 52 | kernel<<>>(g1d, g2d); 53 | cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost); 54 | 55 | dump(g1); 56 | 57 | cudaDeviceReset(); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_asymmetric-periodic.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | int halo_width = 2; 12 | for (z = halo_width; z < N-halo_width; ++z) { 13 | for (y = halo_width; y < N-halo_width; ++y) { 14 | for (x = halo_width; x < N-halo_width; ++x) { 15 | float v = g1[OFFSET(x, y, z)] + 16 | g1[OFFSET(x-1, y, z)] + 17 | g1[OFFSET(((x+2)+N)%N, y, z)]; 18 | g2[OFFSET(x, y, z)] = v; 19 | } 20 | } 21 | } 22 | return; 23 | } 24 | 25 | void dump(float *input) { 26 | int i; 27 | for (i = 0; i < N*N*N; ++i) { 28 | printf("%f\n", input[i]); 29 | } 30 | } 31 | 32 | int main(int argc, char *argv[]) { 33 | REAL *g1, *g2; 34 | size_t nelms = N*N*N; 35 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 36 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 37 | 38 | int i; 39 | for (i = 0; i < (int)nelms; i++) { 40 | g1[i] = i; 41 | g2[i] = i; 42 | } 43 | 44 | kernel(g1, g2); 45 | dump(g2); 46 | 47 | free(g1); 48 | free(g2); 49 | return 0; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_asymmetric.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Asymmetric stencil 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2) { 14 | float v = PSGridGet(g, x, y, z) + 15 | PSGridGet(g, x+1, y+1, z+1) + PSGridGet(g, x-2, y-2, z-2); 16 | PSGridEmit(g2, v); 17 | return; 18 | } 19 | 20 | void dump(float *input) { 21 | int i; 22 | for (i = 0; i < N*N*N; ++i) { 23 | printf("%f\n", input[i]); 24 | } 25 | } 26 | 27 | #define halo_width (2) 28 | 29 | int main(int argc, char *argv[]) { 30 | PSInit(&argc, &argv, 3, N, N, N); 31 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 32 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 33 | 34 | PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width, 35 | 0+halo_width, N-halo_width, 36 | 0+halo_width, N-halo_width); 37 | size_t nelms = N*N*N; 38 | 39 | float *indata = (float *)malloc(sizeof(float) * nelms); 40 | int i; 41 | for (i = 0; i < nelms; i++) { 42 | indata[i] = i; 43 | } 44 | float *outdata = (float *)malloc(sizeof(float) * nelms); 45 | 46 | PSGridCopyin(g1, indata); 47 | PSGridCopyin(g2, indata); 48 | 49 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 50 | 51 | PSGridCopyout(g2, outdata); 52 | dump(outdata); 53 | 54 | PSGridFree(g1); 55 | PSGridFree(g2); 56 | PSFinalize(); 57 | free(indata); 58 | free(outdata); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_asymmetric.manual.cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda.h" 3 | #include "cuda_runtime.h" 4 | 5 | #define N 32 6 | #define REAL float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | __global__ void kernel(REAL *g1, REAL *g2) { 11 | int x = threadIdx.x + blockIdx.x * blockDim.x; 12 | int y = threadIdx.y + blockIdx.y * blockDim.y; 13 | int z = threadIdx.z + blockIdx.z * blockDim.z; 14 | 15 | if (x <= 1 || x >= N-2 || y <= 1 || y >= N-2 || 16 | z <= 1 || z >= N-2) return; 17 | 18 | float v = g1[OFFSET(x, y, z)] + 19 | g1[OFFSET(x+1, y+1, z+1)] + g1[OFFSET(x-2, y-2, z-2)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | #define halo_width (1) 32 | 33 | int main(int argc, char *argv[]) { 34 | REAL *g1, *g1d; 35 | REAL *g2d; 36 | size_t nelms = N*N*N; 37 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | cudaMalloc((void**)&g1d, sizeof(REAL) * nelms); 39 | cudaMalloc((void**)&g2d, sizeof(REAL) * nelms); 40 | 41 | int i; 42 | for (i = 0; i < (int)nelms; i++) { 43 | g1[i] = i; 44 | } 45 | 46 | cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 47 | cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 48 | 49 | dim3 block_dim(4, 4, 4); 50 | dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z); 51 | 52 | kernel<<>>(g1d, g2d); 53 | cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost); 54 | 55 | dump(g1); 56 | 57 | cudaDeviceReset(); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_asymmetric.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | int halo_width = 2; 12 | for (z = halo_width; z < N-halo_width; ++z) { 13 | for (y = halo_width; y < N-halo_width; ++y) { 14 | for (x = halo_width; x < N-halo_width; ++x) { 15 | float v = g1[OFFSET(x, y, z)] + 16 | g1[OFFSET(x+1, y+1, z+1)] + g1[OFFSET(x-2, y-2, z-2)]; 17 | g2[OFFSET(x, y, z)] = v; 18 | } 19 | } 20 | } 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | int main(int argc, char *argv[]) { 32 | REAL *g1, *g2; 33 | size_t nelms = N*N*N; 34 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 35 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 36 | 37 | int i; 38 | for (i = 0; i < (int)nelms; i++) { 39 | g1[i] = i; 40 | g2[i] = i; 41 | } 42 | 43 | kernel(g1, g2); 44 | dump(g2); 45 | 46 | free(g1); 47 | free(g2); 48 | return 0; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_mixed-dim.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Grids with different dimensions 3 | * DIM: 3 4 | * PRIORITY: 2 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2, PSGrid1DFloat k) { 14 | float v = PSGridGet(g,x,y,z) * PSGridGet(k, x) + 15 | PSGridGet(g,x-1,y,z) * PSGridGet(k, x-1) + 16 | PSGridGet(g,x+1,y,z) * PSGridGet(k, x+1); 17 | PSGridEmit(g2, v); 18 | return; 19 | } 20 | 21 | void dump(float *input) { 22 | int i; 23 | for (i = 0; i < N*N*N; ++i) { 24 | printf("%f\n", input[i]); 25 | } 26 | } 27 | 28 | int main(int argc, char *argv[]) { 29 | PSInit(&argc, &argv, 3, N, N, N); 30 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 31 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 32 | PSGrid1DFloat k = PSGrid1DFloatNew(N); 33 | 34 | PSDomain3D d = PSDomain3DNew(1, N-1, 0, N, 0, N); 35 | size_t nelms = N*N*N; 36 | 37 | float *indata = (float *)malloc(sizeof(float) * nelms); 38 | int i; 39 | for (i = 0; i < nelms; i++) { 40 | indata[i] = i; 41 | } 42 | float *outdata = (float *)malloc(sizeof(float) * nelms); 43 | 44 | PSGridCopyin(g1, indata); 45 | PSGridCopyin(g2, indata); 46 | 47 | for (i = 0; i < N; ++i) { 48 | indata[i] = 1 + (i%2); // 1 or 2 49 | } 50 | 51 | PSGridCopyin(k, indata); 52 | 53 | PSStencilRun(PSStencilMap(kernel, d, g1, g2, k)); 54 | 55 | PSGridCopyout(g2, outdata); 56 | dump(outdata); 57 | 58 | PSGridFree(g1); 59 | PSGridFree(g2); 60 | PSFinalize(); 61 | free(indata); 62 | free(outdata); 63 | return 0; 64 | } 65 | 66 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_mixed-dim.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET1D(x) (x) 8 | #define OFFSET3D(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | 11 | void kernel(float *g1, float *g2, float *k) { 12 | int x, y, z; 13 | for (z = 0; z < N; ++z) { 14 | for (y = 0; y < N; ++y) { 15 | for (x = 1; x < N-1; ++x) { 16 | float v = 17 | g1[OFFSET3D(x, y, z)] * k[OFFSET1D(x)] + 18 | g1[OFFSET3D(x-1, y, z)] * k[OFFSET1D(x-1)] + 19 | g1[OFFSET3D(x+1, y, z)] * k[OFFSET1D(x+1)]; 20 | g2[OFFSET3D(x, y, z)] = v; 21 | } 22 | } 23 | } 24 | return; 25 | } 26 | 27 | void dump(float *input) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | printf("%f\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | REAL *g1, *g2, *k; 36 | size_t nelms = N*N*N; 37 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 39 | k = (REAL *)malloc(sizeof(REAL) * N); 40 | 41 | int i; 42 | for (i = 0; i < (int)nelms; i++) { 43 | g1[i] = i; 44 | g2[i] = i; 45 | } 46 | 47 | for (i = 0; i < N; ++i) { 48 | k[i] = 1 + (i%2); // 1 or 2 49 | } 50 | 51 | kernel(g1, g2, k); 52 | dump(g2); 53 | 54 | free(g1); 55 | free(g2); 56 | free(k); 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_mixed-dim2.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET1D(x) (x) 8 | #define OFFSET3D(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | 11 | void kernel(float *g1, float *g2, 12 | float *i, float *j, float *k) { 13 | int x, y, z; 14 | for (z = 1; z < N-1; ++z) { 15 | for (y = 1; y < N-1; ++y) { 16 | for (x = 1; x < N-1; ++x) { 17 | float v = 18 | g1[OFFSET3D(x, y, z)] + 19 | g1[OFFSET3D(x-1, y, z)] * i[OFFSET1D(x-1)] + 20 | g1[OFFSET3D(x+1, y, z)] * i[OFFSET1D(x+1)] + 21 | g1[OFFSET3D(x, y-1, z)] * j[OFFSET1D(y-1)] + 22 | g1[OFFSET3D(x, y+1, z)] * j[OFFSET1D(y+1)] + 23 | g1[OFFSET3D(x, y, z-1)] * k[OFFSET1D(z-1)] + 24 | g1[OFFSET3D(x, y, z+1)] * k[OFFSET1D(z+1)]; 25 | g2[OFFSET3D(x, y, z)] = v; 26 | } 27 | } 28 | } 29 | return; 30 | } 31 | 32 | void dump(float *input) { 33 | int i; 34 | for (i = 0; i < N*N*N; ++i) { 35 | printf("%f\n", input[i]); 36 | } 37 | } 38 | 39 | int main(int argc, char *argv[]) { 40 | REAL *g1, *g2, *k; 41 | size_t nelms = N*N*N; 42 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 43 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 44 | k = (REAL *)malloc(sizeof(REAL) * N); 45 | 46 | int i; 47 | for (i = 0; i < (int)nelms; i++) { 48 | g1[i] = i; 49 | g2[i] = i; 50 | } 51 | 52 | for (i = 0; i < N; ++i) { 53 | k[i] = 1 + (i%2); // 1 or 2 54 | } 55 | 56 | kernel(g1, g2, k, k, k); 57 | dump(g2); 58 | 59 | free(g1); 60 | free(g2); 61 | free(k); 62 | return 0; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_multi-kernels.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Combining two kernels 3 | * DIM: 3 4 | * PRIORITY: 2 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 8 11 | 12 | void kernel1(const int x, const int y, const int z, 13 | PSGrid3DFloat g1, PSGrid3DFloat g2) { 14 | float v = PSGridGet(g1, x, y, z) * 2; 15 | PSGridEmit(g2, v); 16 | return; 17 | } 18 | 19 | void kernel2(const int x, const int y, const int z, 20 | PSGrid3DFloat g2, PSGrid3DFloat g1) { 21 | float v = PSGridGet(g2, x, y, z) / 2; 22 | PSGridEmit(g1, v); 23 | return; 24 | } 25 | 26 | int main(int argc, char *argv[]) { 27 | PSInit(&argc, &argv, 3, N, N, N); 28 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 29 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 30 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 31 | size_t nelms = N*N*N; 32 | 33 | float *indata = (float *)malloc(sizeof(float) * nelms); 34 | int i; 35 | for (i = 0; i < nelms; i++) { 36 | indata[i] = i; 37 | } 38 | float *outdata = (float *)malloc(sizeof(float) * nelms); 39 | 40 | PSGridCopyin(g1, indata); 41 | 42 | PSStencilRun(PSStencilMap(kernel1, d, g1, g2), 43 | PSStencilMap(kernel2, d, g2, g1)); 44 | 45 | PSGridCopyout(g1, outdata); 46 | 47 | for (i = 0; i < nelms; i++) { 48 | if (indata[i] != outdata[i]) { 49 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 50 | i, indata[i], outdata[i]); 51 | exit(1); 52 | } 53 | } 54 | 55 | PSGridFree(g1); 56 | PSGridFree(g2); 57 | PSFinalize(); 58 | free(indata); 59 | free(outdata); 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_param_name.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Parameter name being the same as an existing function 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include // gamma is declared in math.h 9 | #include "physis/physis.h" 10 | 11 | #define N 8 12 | 13 | // Use "gamma" as a parameter. This causes a CUDA translation error in 14 | // commit 61d14b3e6362e7154d7da3dbdd7887a2106240f4. 15 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 16 | PSGrid3DFloat g2, float gamma) { 17 | float v = PSGridGet(g, x, y, z) * gamma; 18 | PSGridEmit(g2, v); 19 | return; 20 | } 21 | 22 | int main(int argc, char *argv[]) { 23 | PSInit(&argc, &argv, 3, N, N, N); 24 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 25 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 26 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 27 | size_t nelms = N*N*N; 28 | 29 | float *indata = (float *)malloc(sizeof(float) * nelms); 30 | int i; 31 | for (i = 0; i < nelms; i++) { 32 | indata[i] = i; 33 | } 34 | float *outdata = (float *)malloc(sizeof(float) * nelms); 35 | 36 | PSGridCopyin(g, indata); 37 | 38 | PSStencilRun(PSStencilMap(kernel, d, g, g2, 1.0f)); 39 | 40 | PSGridCopyout(g2, outdata); 41 | 42 | for (i = 0; i < nelms; i++) { 43 | if (indata[i] != outdata[i]) { 44 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 45 | i, indata[i], outdata[i]); 46 | exit(1); 47 | } 48 | } 49 | 50 | PSGridFree(g); 51 | PSGridFree(g2); 52 | PSFinalize(); 53 | free(indata); 54 | free(outdata); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_redblack-periodic.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 7-point periodic stencil with red-black ordering 3 | * DIM: 3 4 | * PRIORITY: 1 5 | * TARGETS: ref cuda 6 | */ 7 | 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 32 12 | 13 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) { 14 | float v = PSGridGetPeriodic(g, x, y, z) + 15 | PSGridGetPeriodic(g, x+1, y, z) + PSGridGetPeriodic(g, x-1, y, z) + 16 | PSGridGetPeriodic(g, x, y+1, z) + PSGridGetPeriodic(g, x, y-1, z) + 17 | PSGridGetPeriodic(g, x, y, z-1) + PSGridGetPeriodic(g, x, y, z+1); 18 | PSGridEmit(g, v); 19 | return; 20 | } 21 | 22 | void dump(float *input) { 23 | int i; 24 | for (i = 0; i < N*N*N; ++i) { 25 | printf("%f\n", input[i]); 26 | } 27 | } 28 | 29 | #define halo_width (1) 30 | 31 | int main(int argc, char *argv[]) { 32 | PSInit(&argc, &argv, 3, N, N, N); 33 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 34 | 35 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 36 | size_t nelms = N*N*N; 37 | 38 | float *indata = (float *)malloc(sizeof(float) * nelms); 39 | int i; 40 | for (i = 0; i < nelms; i++) { 41 | indata[i] = i; 42 | } 43 | float *outdata = (float *)malloc(sizeof(float) * nelms); 44 | 45 | PSGridCopyin(g, indata); 46 | 47 | PSStencilRun(PSStencilMapRedBlack(kernel, d, g)); 48 | 49 | PSGridCopyout(g, outdata); 50 | dump(outdata); 51 | 52 | PSGridFree(g); 53 | PSFinalize(); 54 | free(indata); 55 | free(outdata); 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_redblack-periodic.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g, int rb) { 10 | int x, y, z; 11 | for (z = 0; z < N; ++z) { 12 | int zp = ((z - 1) + N) % N; 13 | int zn = (z + 1) % N; 14 | for (y = 0; y < N; ++y) { 15 | int yp = ((y - 1) + N) % N; 16 | int yn = (y + 1) % N; 17 | for (x = (y+z+rb)%2 ; x < N; x+=2) { 18 | int xp = ((x - 1) + N) % N; 19 | int xn = (x + 1) % N; 20 | float v = 21 | g[OFFSET(x, y, z)] + 22 | g[OFFSET(xn, y, z)] + 23 | g[OFFSET(xp, y, z)] + 24 | g[OFFSET(x, yn, z)] + 25 | g[OFFSET(x, yp, z)] + 26 | g[OFFSET(x, y, zn)] + 27 | g[OFFSET(x, y, zp)]; 28 | g[OFFSET(x, y, z)] = v; 29 | } 30 | } 31 | } 32 | return; 33 | } 34 | 35 | void dump(float *input) { 36 | int i; 37 | for (i = 0; i < N*N*N; ++i) { 38 | printf("%f\n", input[i]); 39 | } 40 | } 41 | 42 | int main(int argc, char *argv[]) { 43 | REAL *g; 44 | size_t nelms = N*N*N; 45 | g = (REAL *)malloc(sizeof(REAL) * nelms); 46 | 47 | int i; 48 | for (i = 0; i < (int)nelms; i++) { 49 | g[i] = i; 50 | } 51 | 52 | kernel(g, 0); 53 | kernel(g, 1); 54 | dump(g); 55 | 56 | free(g); 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_redblack-separated.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 7-point stencil with red-black ordering 3 | * DIM: 3 4 | * PRIORITY: 1 5 | * TARGETS: ref cuda 6 | */ 7 | 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 32 12 | 13 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) { 14 | float v = PSGridGet(g, x, y, z) + 15 | PSGridGet(g, x+1, y, z) + PSGridGet(g, x-1, y, z) + 16 | PSGridGet(g, x, y+1, z) + PSGridGet(g, x, y-1, z) + 17 | PSGridGet(g, x, y, z-1) + PSGridGet(g, x, y, z+1); 18 | PSGridEmit(g, v); 19 | return; 20 | } 21 | 22 | void dump(float *input) { 23 | int i; 24 | for (i = 0; i < N*N*N; ++i) { 25 | printf("%f\n", input[i]); 26 | } 27 | } 28 | 29 | #define halo_width (1) 30 | 31 | int main(int argc, char *argv[]) { 32 | PSInit(&argc, &argv, 3, N, N, N); 33 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 34 | 35 | PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width, 36 | 0+halo_width, N-halo_width, 37 | 0+halo_width, N-halo_width); 38 | size_t nelms = N*N*N; 39 | 40 | float *indata = (float *)malloc(sizeof(float) * nelms); 41 | int i; 42 | for (i = 0; i < nelms; i++) { 43 | indata[i] = i; 44 | } 45 | float *outdata = (float *)malloc(sizeof(float) * nelms); 46 | 47 | PSGridCopyin(g, indata); 48 | 49 | PSStencilRun(PSStencilMapRed(kernel, d, g), 50 | PSStencilMapBlack(kernel, d, g)); 51 | 52 | PSGridCopyout(g, outdata); 53 | dump(outdata); 54 | 55 | PSGridFree(g); 56 | PSFinalize(); 57 | free(indata); 58 | free(outdata); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_redblack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: 7-point stencil with red-black ordering 3 | * DIM: 3 4 | * PRIORITY: 1 5 | * TARGETS: ref cuda 6 | */ 7 | 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 32 12 | 13 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g) { 14 | float v = PSGridGet(g, x, y, z) + 15 | PSGridGet(g, x+1, y, z) + PSGridGet(g, x-1, y, z) + 16 | PSGridGet(g, x, y+1, z) + PSGridGet(g, x, y-1, z) + 17 | PSGridGet(g, x, y, z-1) + PSGridGet(g, x, y, z+1); 18 | PSGridEmit(g, v); 19 | return; 20 | } 21 | 22 | void dump(float *input) { 23 | int i; 24 | for (i = 0; i < N*N*N; ++i) { 25 | printf("%f\n", input[i]); 26 | } 27 | } 28 | 29 | #define halo_width (1) 30 | 31 | int main(int argc, char *argv[]) { 32 | PSInit(&argc, &argv, 3, N, N, N); 33 | PSGrid3DFloat g = PSGrid3DFloatNew(N, N, N); 34 | 35 | PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width, 36 | 0+halo_width, N-halo_width, 37 | 0+halo_width, N-halo_width); 38 | size_t nelms = N*N*N; 39 | 40 | float *indata = (float *)malloc(sizeof(float) * nelms); 41 | int i; 42 | for (i = 0; i < nelms; i++) { 43 | indata[i] = i; 44 | } 45 | float *outdata = (float *)malloc(sizeof(float) * nelms); 46 | 47 | PSGridCopyin(g, indata); 48 | 49 | PSStencilRun(PSStencilMapRedBlack(kernel, d, g)); 50 | 51 | PSGridCopyout(g, outdata); 52 | dump(outdata); 53 | 54 | PSGridFree(g); 55 | PSFinalize(); 56 | free(indata); 57 | free(outdata); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_redblack.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g, int rb) { 10 | int x, y, z; 11 | int halo_width = 1; 12 | for (z = halo_width; z < N-halo_width; ++z) { 13 | for (y = halo_width; y < N-halo_width; ++y) { 14 | for (x = halo_width + ((halo_width & 1) ^ (y + z + rb)%2); 15 | x < N-halo_width; x+=2) { 16 | float v = g[OFFSET(x, y, z)] + 17 | g[OFFSET(x+1, y, z)] + g[OFFSET(x-1, y, z)] + 18 | g[OFFSET(x, y+1, z)] + g[OFFSET(x, y-1, z)] + 19 | g[OFFSET(x, y, z-1)] + g[OFFSET(x, y, z+1)]; 20 | g[OFFSET(x, y, z)] = v; 21 | } 22 | } 23 | } 24 | return; 25 | } 26 | 27 | void dump(float *input) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | printf("%f\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | REAL *g; 36 | size_t nelms = N*N*N; 37 | g = (REAL *)malloc(sizeof(REAL) * nelms); 38 | 39 | int i; 40 | for (i = 0; i < (int)nelms; i++) { 41 | g[i] = i; 42 | } 43 | 44 | kernel(g, 0); 45 | kernel(g, 1); 46 | dump(g); 47 | 48 | free(g); 49 | return 0; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-2d.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Grid reduction OP=PS_SUM 3 | * DIM: 2 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 4 12 | #define REAL double 13 | #define PSGrid2D PSGrid2DDouble 14 | #define PSGrid2DNew PSGrid2DDoubleNew 15 | 16 | REAL reduce(REAL *g) { 17 | REAL v = 0.0; 18 | int i; 19 | for (i = 0; i < N*N; ++i) { 20 | v += g[i]; 21 | } 22 | return v; 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | PSInit(&argc, &argv, 2, N, N); 27 | PSGrid2D g1 = PSGrid2DNew(N, N); 28 | size_t nelms = N*N; 29 | REAL *indata = (REAL *)malloc(sizeof(REAL) * nelms); 30 | int i; 31 | for (i = 0; i < nelms; i++) { 32 | indata[i] = i; 33 | } 34 | PSGridCopyin(g1, indata); 35 | REAL v; 36 | PSReduce(&v, PS_SUM, g1); 37 | REAL v_ref = reduce(indata); 38 | fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref); 39 | if (v != v_ref) { 40 | fprintf(stderr, "Error: Non matching result\n"); 41 | exit(1); 42 | } 43 | PSGridFree(g1); 44 | PSFinalize(); 45 | free(indata); 46 | return 0; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-int.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Int grid reduction OP=PS_SUM 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 8 12 | #define NN (N*N*N) 13 | #define GTYPE int 14 | #define FMT "%d" 15 | #define PSGrid3D PSGrid3DInt 16 | #define PSGrid3DNew PSGrid3DIntNew 17 | 18 | GTYPE reduce(GTYPE *g) { 19 | GTYPE v = 0.0; 20 | int i; 21 | for (i = 0; i < NN; ++i) { 22 | v += g[i]; 23 | } 24 | return v; 25 | } 26 | 27 | int main(int argc, char *argv[]) { 28 | PSInit(&argc, &argv, 3, N, N, N); 29 | PSGrid3D g1 = PSGrid3DNew(N, N, N); 30 | GTYPE *indata = (GTYPE *)malloc(sizeof(GTYPE) * NN); 31 | int i; 32 | for (i = 0; i < NN; i++) { 33 | indata[i] = i; 34 | } 35 | PSGridCopyin(g1, indata); 36 | GTYPE v; 37 | PSReduce(&v, PS_SUM, g1); 38 | GTYPE v_ref = reduce(indata); 39 | fprintf(stderr, "Reduction result: " FMT ", reference: " FMT "\n", v, v_ref); 40 | if (v != v_ref) { 41 | fprintf(stderr, "Error: No matching result\n"); 42 | exit(1); 43 | } 44 | PSGridFree(g1); 45 | PSFinalize(); 46 | free(indata); 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-long.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Long grid reduction OP=PS_SUM 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 8 12 | #define NN (N*N*N) 13 | #define GTYPE long 14 | #define FMT "%d" 15 | #define PSGrid3D PSGrid3DLong 16 | #define PSGrid3DNew PSGrid3DLongNew 17 | 18 | GTYPE reduce(GTYPE *g) { 19 | GTYPE v = 0.0; 20 | int i; 21 | for (i = 0; i < NN; ++i) { 22 | v += g[i]; 23 | } 24 | return v; 25 | } 26 | 27 | int main(int argc, char *argv[]) { 28 | PSInit(&argc, &argv, 3, N, N, N); 29 | PSGrid3D g1 = PSGrid3DNew(N, N, N); 30 | GTYPE *indata = (GTYPE *)malloc(sizeof(GTYPE) * NN); 31 | int i; 32 | for (i = 0; i < NN; i++) { 33 | indata[i] = i; 34 | } 35 | PSGridCopyin(g1, indata); 36 | GTYPE v; 37 | PSReduce(&v, PS_SUM, g1); 38 | GTYPE v_ref = reduce(indata); 39 | fprintf(stderr, "Reduction result: " FMT ", reference: " FMT "\n", v, v_ref); 40 | if (v != v_ref) { 41 | fprintf(stderr, "Error: No matching result\n"); 42 | exit(1); 43 | } 44 | PSGridFree(g1); 45 | PSFinalize(); 46 | free(indata); 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-max.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Grid reduction OP=PS_MAX 3 | * DIM: 3 4 | * PRIORITY: 2 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 4 12 | #define REAL float 13 | #define PSGrid3D PSGrid3DFloat 14 | #define PSGrid3DNew PSGrid3DFloatNew 15 | 16 | REAL reduce(REAL *g) { 17 | REAL v = g[0]; 18 | int i; 19 | for (i = 1; i < N*N*N; ++i) { 20 | v = (v > g[i]) ? v : g[i]; 21 | } 22 | return v; 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | PSInit(&argc, &argv, 3, N, N, N); 27 | PSGrid3D g1 = PSGrid3DNew(N, N, N); 28 | size_t nelms = N*N*N; 29 | float *indata = (float *)malloc(sizeof(REAL) * nelms); 30 | int i; 31 | for (i = 0; i < nelms; i++) { 32 | indata[i] = i; 33 | } 34 | PSGridCopyin(g1, indata); 35 | float v; 36 | PSReduce(&v, PS_MAX, g1); 37 | float v_ref = reduce(indata); 38 | fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref); 39 | if (v != v_ref) { 40 | fprintf(stderr, "Error: Non matching result\n"); 41 | exit(1); 42 | } 43 | PSGridFree(g1); 44 | PSFinalize(); 45 | free(indata); 46 | return 0; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-min.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Grid reduction OP=PS_MIN 3 | * DIM: 3 4 | * PRIORITY: 2 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 8 12 | #define REAL float 13 | #define PSGrid3D PSGrid3DFloat 14 | #define PSGrid3DNew PSGrid3DFloatNew 15 | 16 | REAL reduce(REAL *g) { 17 | REAL v = g[0]; 18 | int i; 19 | for (i = 1; i < N*N*N; ++i) { 20 | v = (v > g[i]) ? g[i] : v; 21 | } 22 | return v; 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | PSInit(&argc, &argv, 3, N, N, N); 27 | PSGrid3D g1 = PSGrid3DNew(N, N, N); 28 | size_t nelms = N*N*N; 29 | float *indata = (float *)malloc(sizeof(REAL) * nelms); 30 | int i; 31 | for (i = 0; i < nelms; i++) { 32 | indata[i] = i+10; 33 | } 34 | PSGridCopyin(g1, indata); 35 | float v; 36 | PSReduce(&v, PS_MIN, g1); 37 | float v_ref = reduce(indata); 38 | fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref); 39 | if (v != v_ref) { 40 | fprintf(stderr, "Error: Non matching result\n"); 41 | exit(1); 42 | } 43 | PSGridFree(g1); 44 | PSFinalize(); 45 | free(indata); 46 | return 0; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-prod.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Grid reduction OP=PS_PROD 3 | * DIM: 3 4 | * PRIORITY: 2 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 4 12 | #define REAL float 13 | #define PSGrid3D PSGrid3DFloat 14 | #define PSGrid3DNew PSGrid3DFloatNew 15 | 16 | REAL reduce(REAL *g) { 17 | REAL v = 1.0; 18 | int i; 19 | for (i = 0; i < N*N*N; ++i) { 20 | v *= g[i]; 21 | } 22 | return v; 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | PSInit(&argc, &argv, 3, N, N, N); 27 | PSGrid3D g1 = PSGrid3DNew(N, N, N); 28 | size_t nelms = N*N*N; 29 | float *indata = (float *)malloc(sizeof(REAL) * nelms); 30 | int i; 31 | for (i = 0; i < nelms; i++) { 32 | indata[i] = 1.1; 33 | } 34 | PSGridCopyin(g1, indata); 35 | float v; 36 | PSReduce(&v, PS_PROD, g1); 37 | float v_ref = reduce(indata); 38 | fprintf(stderr, "Reduction result: %f, reference: %f\n", v, v_ref); 39 | fprintf(stderr, "Difference: %f\n", fabs(v - v_ref)); 40 | if (fabs(v - v_ref) / v_ref > 1.0e-5) { 41 | fprintf(stderr, "Error: Non matching result\n"); 42 | exit(1); 43 | } 44 | PSGridFree(g1); 45 | PSFinalize(); 46 | free(indata); 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-sum.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Grid reduction OP=PS_SUM 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include 9 | #include "physis/physis.h" 10 | 11 | #define N 16 12 | #define REAL float 13 | #define PSGrid3D PSGrid3DFloat 14 | #define PSGrid3DNew PSGrid3DFloatNew 15 | 16 | int main(int argc, char *argv[]) { 17 | PSInit(&argc, &argv, 3, N, N, N); 18 | PSGrid3D g1 = PSGrid3DNew(N, N, N); 19 | size_t nelms = N*N*N; 20 | float *indata = (float *)malloc(sizeof(REAL) * nelms); 21 | int i; 22 | for (i = 0; i < nelms; i++) { 23 | indata[i] = i; 24 | } 25 | PSGridCopyin(g1, indata); 26 | float v; 27 | PSReduce(&v, PS_SUM, g1); 28 | printf("%f\n", v); 29 | PSGridFree(g1); 30 | PSFinalize(); 31 | free(indata); 32 | return 0; 33 | } 34 | 35 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-sum.manual.cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda.h" 3 | #include "cuda_runtime.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #define N 16 10 | #define REAL float 11 | 12 | int main(int argc, char *argv[]) { 13 | REAL *g1, *g1d; 14 | size_t nelms = N*N*N; 15 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 16 | cudaMalloc((void**)&g1d, sizeof(REAL) * nelms); 17 | 18 | int i; 19 | for (i = 0; i < (int)nelms; i++) { 20 | g1[i] = i; 21 | } 22 | 23 | cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 24 | 25 | thrust::device_ptr dev_ptr((REAL*)g1d); 26 | REAL v = thrust::reduce(dev_ptr, dev_ptr + nelms, 27 | 0.0f, thrust::plus()); 28 | 29 | printf("%f\n", v); 30 | 31 | cudaDeviceReset(); 32 | return 0; 33 | } 34 | 35 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_reduction-3d-sum.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 16 5 | #define REAL float 6 | 7 | REAL reduce(float *input) { 8 | int i; 9 | REAL v = 0; 10 | for (i = 0; i < N*N*N; ++i) { 11 | v += input[i]; 12 | } 13 | return v; 14 | } 15 | 16 | int main(int argc, char *argv[]) { 17 | REAL *g1; 18 | size_t nelms = N*N*N; 19 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 20 | 21 | int i; 22 | for (i = 0; i < (int)nelms; i++) { 23 | g1[i] = i; 24 | } 25 | 26 | REAL v = reduce(g1); 27 | printf("%f\n", v); 28 | 29 | free(g1); 30 | return 0; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_stencil-hole.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Stencil hole 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | 12 | void kernel(const int x, const int y, const int z, PSGrid3DFloat g, 13 | PSGrid3DFloat g2) { 14 | float v = PSGridGet(g, x, y, z) + 15 | PSGridGet(g, x+2, y, z) + PSGridGet(g, x-2, y, z); 16 | PSGridEmit(g2, v); 17 | return; 18 | } 19 | 20 | void dump(float *input) { 21 | int i; 22 | for (i = 0; i < N*N*N; ++i) { 23 | printf("%f\n", input[i]); 24 | } 25 | } 26 | 27 | #define halo_width (2) 28 | 29 | int main(int argc, char *argv[]) { 30 | PSInit(&argc, &argv, 3, N, N, N); 31 | PSGrid3DFloat g1 = PSGrid3DFloatNew(N, N, N); 32 | PSGrid3DFloat g2 = PSGrid3DFloatNew(N, N, N); 33 | 34 | PSDomain3D d = PSDomain3DNew(0+halo_width, N-halo_width, 35 | 0+halo_width, N-halo_width, 36 | 0+halo_width, N-halo_width); 37 | size_t nelms = N*N*N; 38 | 39 | float *indata = (float *)malloc(sizeof(float) * nelms); 40 | int i; 41 | for (i = 0; i < nelms; i++) { 42 | indata[i] = i; 43 | } 44 | float *outdata = (float *)malloc(sizeof(float) * nelms); 45 | 46 | PSGridCopyin(g1, indata); 47 | PSGridCopyin(g2, indata); 48 | 49 | PSStencilRun(PSStencilMap(kernel, d, g1, g2)); 50 | 51 | PSGridCopyout(g2, outdata); 52 | dump(outdata); 53 | 54 | PSGridFree(g1); 55 | PSGridFree(g2); 56 | PSFinalize(); 57 | free(indata); 58 | free(outdata); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_stencil-hole.manual.cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda.h" 3 | #include "cuda_runtime.h" 4 | 5 | #define N 32 6 | #define REAL float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | __global__ void kernel(REAL *g1, REAL *g2) { 11 | int x = threadIdx.x + blockIdx.x * blockDim.x; 12 | int y = threadIdx.y + blockIdx.y * blockDim.y; 13 | int z = threadIdx.z + blockIdx.z * blockDim.z; 14 | 15 | if (x <= 1 || x >= N-2 || y <= 1 || y >= N-2 || 16 | z <= 1 || z >= N-2) return; 17 | 18 | float v = g1[OFFSET(x, y, z)] + 19 | g1[OFFSET(x+2, y, z)] + g1[OFFSET(x-2, y, z)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | #define halo_width (1) 32 | 33 | int main(int argc, char *argv[]) { 34 | REAL *g1, *g1d; 35 | REAL *g2d; 36 | size_t nelms = N*N*N; 37 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 38 | cudaMalloc((void**)&g1d, sizeof(REAL) * nelms); 39 | cudaMalloc((void**)&g2d, sizeof(REAL) * nelms); 40 | 41 | int i; 42 | for (i = 0; i < (int)nelms; i++) { 43 | g1[i] = i; 44 | } 45 | 46 | cudaMemcpy(g1d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 47 | cudaMemcpy(g2d, g1, sizeof(REAL) * nelms, cudaMemcpyHostToDevice); 48 | 49 | dim3 block_dim(4, 4, 4); 50 | dim3 grid_dim(N/block_dim.x, N/block_dim.y, N/block_dim.z); 51 | 52 | kernel<<>>(g1d, g2d); 53 | cudaMemcpy(g1, g2d, sizeof(REAL) * nelms, cudaMemcpyDeviceToHost); 54 | 55 | dump(g1); 56 | 57 | cudaDeviceReset(); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_stencil-hole.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | #define REAL float 6 | 7 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 8 | 9 | void kernel(float *g1, float *g2) { 10 | int x, y, z; 11 | int halo_width = 2; 12 | for (z = halo_width; z < N-halo_width; ++z) { 13 | for (y = halo_width; y < N-halo_width; ++y) { 14 | for (x = halo_width; x < N-halo_width; ++x) { 15 | float v = g1[OFFSET(x, y, z)] + 16 | g1[OFFSET(x+2, y, z)] + g1[OFFSET(x-2, y, z)]; 17 | g2[OFFSET(x, y, z)] = v; 18 | } 19 | } 20 | } 21 | return; 22 | } 23 | 24 | void dump(float *input) { 25 | int i; 26 | for (i = 0; i < N*N*N; ++i) { 27 | printf("%f\n", input[i]); 28 | } 29 | } 30 | 31 | int main(int argc, char *argv[]) { 32 | REAL *g1, *g2; 33 | size_t nelms = N*N*N; 34 | g1 = (REAL *)malloc(sizeof(REAL) * nelms); 35 | g2 = (REAL *)malloc(sizeof(REAL) * nelms); 36 | 37 | int i; 38 | for (i = 0; i < (int)nelms; i++) { 39 | g1[i] = i; 40 | g2[i] = i; 41 | } 42 | 43 | kernel(g1, g2); 44 | dump(g2); 45 | 46 | free(g1); 47 | free(g2); 48 | return 0; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type-7-pt-periodic-complex.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define N 32 6 | 7 | typedef struct { 8 | float r; 9 | float i; 10 | } Complex; 11 | 12 | 13 | #define OFFSET(x, y, z) ((((x)+N)%N) + (((y)+N)%N) * N + (((z)+N)%N) * N * N) 14 | 15 | void kernel1(Complex *g1, Complex *g2) { 16 | int x, y, z; 17 | for (z = 0; z < N; ++z) { 18 | for (y = 0; y < N; ++y) { 19 | for (x = 0; x < N; ++x) { 20 | Complex t = g1[OFFSET(x, y, z)]; 21 | Complex t1 = g1[OFFSET(x+1, y, z)]; 22 | Complex t2 = g1[OFFSET(x-1, y, z)]; 23 | Complex t3 = g1[OFFSET(x, y+1, z)]; 24 | Complex t4 = g1[OFFSET(x, y-1, z)]; 25 | Complex t5 = g1[OFFSET(x, y, z+1)]; 26 | Complex t6 = g1[OFFSET(x, y, z-1)]; 27 | float r = t.r + t1.r + t2.r + t3.r + t4.r + t5.r + t6.r; 28 | float i = t.i + t1.i + t2.i + t3.i + t4.i + t5.i + t6.i; 29 | Complex v = {r, i}; 30 | g2[OFFSET(x, y, z)] = v; 31 | } 32 | } 33 | } 34 | return; 35 | } 36 | 37 | void dump(Complex *input) { 38 | int i; 39 | for (i = 0; i < N*N*N; ++i) { 40 | printf("%f %f\n", input[i].r, input[i].i); 41 | } 42 | } 43 | 44 | int main(int argc, char *argv[]) { 45 | Complex *g1, *g2; 46 | size_t nelms = N*N*N; 47 | g1 = (Complex *)malloc(sizeof(Complex) * nelms); 48 | g2 = (Complex *)malloc(sizeof(Complex) * nelms); 49 | 50 | int i; 51 | for (i = 0; i < nelms; i++) { 52 | g1[i].r = i; 53 | g1[i].i = i+1; 54 | } 55 | 56 | kernel1(g1, g2); 57 | dump(g2); 58 | free(g1); 59 | free(g2); 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type-7-pt-periodic.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define N 32 6 | 7 | typedef struct { 8 | float p; 9 | float q; 10 | } Point; 11 | 12 | 13 | #define OFFSET(x, y, z) ((((x)+N)%N) + (((y)+N)%N) * N + (((z)+N)%N) * N * N) 14 | 15 | void kernel1(Point *g) { 16 | int x, y, z; 17 | for (z = 0; z < N; ++z) { 18 | for (y = 0; y < N; ++y) { 19 | for (x = 0; x < N; ++x) { 20 | float v = g[OFFSET(x, y, z)].p + 21 | g[OFFSET(x+1, y, z)].p + 22 | g[OFFSET(x-1, y, z)].p + 23 | g[OFFSET(x, y+1, z)].p + 24 | g[OFFSET(x, y-1, z)].p + 25 | g[OFFSET(x, y, z+1)].p + 26 | g[OFFSET(x, y, z-1)].p; 27 | g[OFFSET(x, y, z)].q = v; 28 | } 29 | } 30 | } 31 | return; 32 | } 33 | 34 | void dump(Point *input) { 35 | int i; 36 | for (i = 0; i < N*N*N; ++i) { 37 | printf("%f %f\n", input[i].p, input[i].q); 38 | } 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | Point *g; 43 | size_t nelms = N*N*N; 44 | g = (Point *)malloc(sizeof(Point) * nelms); 45 | 46 | int i; 47 | for (i = 0; i < nelms; i++) { 48 | g[i].p = i; 49 | g[i].q = 0; 50 | } 51 | 52 | kernel1(g); 53 | dump(g); 54 | free(g); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type-7-pt.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define N 32 6 | 7 | typedef struct { 8 | float p; 9 | float q; 10 | } Point; 11 | 12 | 13 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 14 | 15 | void kernel1(Point *g) { 16 | int x, y, z; 17 | for (z = 1; z < N-1; ++z) { 18 | for (y = 1; y < N-1; ++y) { 19 | for (x = 1; x < N-1; ++x) { 20 | float v = g[OFFSET(x, y, z)].p + 21 | g[OFFSET(x+1, y, z)].p + 22 | g[OFFSET(x-1, y, z)].p + 23 | g[OFFSET(x, y+1, z)].p + 24 | g[OFFSET(x, y-1, z)].p + 25 | g[OFFSET(x, y, z+1)].p + 26 | g[OFFSET(x, y, z-1)].p; 27 | g[OFFSET(x, y, z)].q = v; 28 | } 29 | } 30 | } 31 | return; 32 | } 33 | 34 | void dump(Point *input) { 35 | int i; 36 | for (i = 0; i < N*N*N; ++i) { 37 | printf("%f %f\n", input[i].p, input[i].q); 38 | } 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | Point *g; 43 | size_t nelms = N*N*N; 44 | g = (Point *)malloc(sizeof(Point) * nelms); 45 | 46 | int i; 47 | for (i = 0; i < nelms; i++) { 48 | g[i].p = i; 49 | g[i].q = 0; 50 | } 51 | 52 | kernel1(g); 53 | dump(g); 54 | free(g); 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type-array-member-copy.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Copy within an array member 3 | * DIM: 3 4 | * PRIORITY: 10 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | #define ITER 10 12 | 13 | struct Point { 14 | float p[2]; 15 | }; 16 | 17 | DeclareGrid3D(Point, struct Point); 18 | 19 | void kernel(const int x, const int y, const int z, 20 | PSGrid3DPoint g) { 21 | float v = PSGridGet(g, x, y, z).p[0]; 22 | PSGridEmitUtype(g.p[1], v); 23 | return; 24 | } 25 | 26 | void check(struct Point *p) { 27 | int i; 28 | for (i = 0; i < N*N*N; ++i) { 29 | if (p[i].p[0] != p[i].p[1]) { 30 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 31 | i, p[i].p[0], p[i].p[1]); 32 | exit(1); 33 | } 34 | } 35 | } 36 | 37 | int main(int argc, char *argv[]) { 38 | PSInit(&argc, &argv, 3, N, N, N); 39 | PSGrid3DPoint g = PSGrid3DPointNew(N, N, N); 40 | 41 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 42 | size_t nelms = N*N*N; 43 | 44 | struct Point *indata = (struct Point *)malloc( 45 | sizeof(struct Point) * nelms); 46 | int i; 47 | for (i = 0; i < nelms; i++) { 48 | indata[i].p[0] = i; 49 | indata[i].p[1] = 0; 50 | } 51 | 52 | PSGridCopyin(g, indata); 53 | 54 | PSStencilRun(PSStencilMap(kernel, d, g)); 55 | 56 | PSGridCopyout(g, indata); 57 | 58 | check(indata); 59 | 60 | PSGridFree(g); 61 | PSFinalize(); 62 | free(indata); 63 | return 0; 64 | } 65 | 66 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type-copyin-copyout-two-members.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Run copyin and copyout on a user-defined type with two members 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | #define ITER 10 12 | 13 | struct Point { 14 | float x; 15 | float y; 16 | }; 17 | 18 | DeclareGrid3D(Point, struct Point); 19 | 20 | void check(struct Point *in, struct Point *out) { 21 | int x = 0; 22 | size_t nelms = N*N*N; 23 | for (x = 0; x < nelms; ++x) { 24 | if (in[x].x != out[x].x) { 25 | fprintf(stderr, "Error: x mismatch at %d, in: %f, out: %f\n", 26 | x, in[x].x, out[x].x); 27 | exit(1); 28 | } 29 | if (in[x].y != out[x].y) { 30 | fprintf(stderr, "Error: y mismatch at %d, in: %f, out: %f\n", 31 | x, in[x].y, out[x].y); 32 | exit(1); 33 | } 34 | } 35 | } 36 | 37 | int main(int argc, char *argv[]) { 38 | PSInit(&argc, &argv, 3, N, N, N); 39 | PSGrid3DPoint g1 = PSGrid3DPointNew(N, N, N); 40 | size_t nelms = N*N*N; 41 | struct Point *indata = (struct Point *)malloc( 42 | sizeof(struct Point) * nelms); 43 | int i; 44 | for (i = 0; i < nelms; i++) { 45 | indata[i].x = i; 46 | indata[i].y = i+1; 47 | } 48 | struct Point *outdata = (struct Point *)malloc( 49 | sizeof(struct Point) * nelms); 50 | 51 | PSGridCopyin(g1, indata); 52 | PSGridCopyout(g1, outdata); 53 | 54 | check(indata, outdata); 55 | 56 | PSGridFree(g1); 57 | PSFinalize(); 58 | free(indata); 59 | free(outdata); 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type2.c: -------------------------------------------------------------------------------- 1 | /* 2 | * TEST: Copy between user-defined type members 3 | * DIM: 3 4 | * PRIORITY: 1 5 | */ 6 | 7 | #include 8 | #include "physis/physis.h" 9 | 10 | #define N 32 11 | #define ITER 10 12 | 13 | struct Point { 14 | float p; 15 | float q; 16 | }; 17 | 18 | DeclareGrid3D(Point, struct Point); 19 | 20 | void kernel(const int x, const int y, const int z, 21 | PSGrid3DPoint g) { 22 | float v = PSGridGet(g, x, y, z).p; 23 | PSGridEmitUtype(g.q, v); 24 | return; 25 | } 26 | 27 | void check(struct Point *p) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | if (p[i].p != p[i].q) { 31 | fprintf(stderr, "Error: mismatch at %d, in: %f, out: %f\n", 32 | i, p[i].p, p[i].q); 33 | exit(1); 34 | } 35 | } 36 | } 37 | 38 | int main(int argc, char *argv[]) { 39 | PSInit(&argc, &argv, 3, N, N, N); 40 | PSGrid3DPoint g = PSGrid3DPointNew(N, N, N); 41 | 42 | PSDomain3D d = PSDomain3DNew(0, N, 0, N, 0, N); 43 | size_t nelms = N*N*N; 44 | 45 | struct Point *indata = (struct Point *)malloc( 46 | sizeof(struct Point) * nelms); 47 | int i; 48 | for (i = 0; i < nelms; i++) { 49 | indata[i].p = i; 50 | indata[i].q = 0; 51 | } 52 | 53 | PSGridCopyin(g, indata); 54 | 55 | PSStencilRun(PSStencilMap(kernel, d, g)); 56 | 57 | PSGridCopyout(g, indata); 58 | 59 | check(indata); 60 | 61 | PSGridFree(g); 62 | PSFinalize(); 63 | free(indata); 64 | return 0; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /tests/system_tests/test_cases/test_user-defined-type5.manual.ref.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 32 5 | 6 | #define T float 7 | 8 | #define OFFSET(x, y, z) ((x) + (y) * N + (z) * N * N) 9 | 10 | void kernel(T *g1, T *g2) { 11 | int x, y, z; 12 | int halo_width = 1; 13 | for (z = halo_width; z < N-halo_width; ++z) { 14 | for (y = halo_width; y < N-halo_width; ++y) { 15 | for (x = halo_width; x < N-halo_width; ++x) { 16 | T v = g1[OFFSET(x, y, z)] + 17 | g1[OFFSET(x+1, y, z)] + g1[OFFSET(x-1, y, z)] + 18 | g1[OFFSET(x, y+1, z)] + g1[OFFSET(x, y-1, z)] + 19 | g1[OFFSET(x, y, z-1)] + g1[OFFSET(x, y, z+1)]; 20 | g2[OFFSET(x, y, z)] = v; 21 | } 22 | } 23 | } 24 | return; 25 | } 26 | 27 | void dump(T *input) { 28 | int i; 29 | for (i = 0; i < N*N*N; ++i) { 30 | printf("%f\n", input[i]); 31 | } 32 | } 33 | 34 | int main(int argc, char *argv[]) { 35 | T *g1, *g2; 36 | size_t nelms = N*N*N; 37 | g1 = (T *)malloc(sizeof(T) * nelms); 38 | g2 = (T *)malloc(sizeof(T) * nelms); 39 | 40 | int i; 41 | for (i = 0; i < (int)nelms; i++) { 42 | g1[i] = i; 43 | g2[i] = 0; 44 | } 45 | 46 | kernel(g1, g2); 47 | dump(g2); 48 | 49 | free(g1); 50 | free(g2); 51 | return 0; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /translator/ast_processing.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_AST_PROCESSING_H_ 4 | #define PHYSIS_TRANSLATOR_AST_PROCESSING_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "physis/internal_common.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | namespace rose_util { 12 | 13 | int RemoveRedundantVariableCopy(SgNode *scope); 14 | int RemoveUnusedFunction(SgNode *scope); 15 | 16 | } // namespace rose_util 17 | } // namespace translator 18 | } // namespace physis 19 | 20 | #endif /* PHYSIS_TRANSLATOR_AST_PROCESSING_H_ */ 21 | 22 | 23 | -------------------------------------------------------------------------------- /translator/ast_traversal.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_AST_TRAVERSAL_H_ 4 | #define PHYSIS_TRANSLATOR_AST_TRAVERSAL_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "physis/internal_common.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | namespace rose_util { 12 | 13 | template 14 | ASTNodeType *FindClosestAncestor(SgNode *node) { 15 | SgNode *p = node->get_parent(); 16 | while (p) { 17 | if (p->variantT() == (VariantT)ASTNodeType::static_variant) { 18 | return dynamic_cast(p); 19 | } 20 | p = p->get_parent(); 21 | } 22 | return NULL; 23 | } 24 | 25 | } // namespace rose_util 26 | } // namespace translator 27 | } // namespace physis 28 | 29 | #endif /* PHYSIS_TRANSLATOR_AST_TRAVERSAL_H_ */ 30 | 31 | 32 | -------------------------------------------------------------------------------- /translator/config.h.cmake: -------------------------------------------------------------------------------- 1 | #ifndef PHYSIS_TRANSLATOR_COMMON_H_ 2 | #define PHYSIS_TRANSLATOR_COMMON_H_ 3 | 4 | #include "common/config.h" 5 | 6 | #cmakedefine CUDA_TRANSLATOR_ENABLED 7 | #cmakedefine CUDA_HM_TRANSLATOR_ENABLED 8 | #cmakedefine MPI_TRANSLATOR_ENABLED 9 | #cmakedefine MPI_OPENMP_TRANSLATOR_ENABLED 10 | #cmakedefine MPI_CUDA_TRANSLATOR_ENABLED 11 | #cmakedefine OPENCL_TRANSLATOR_ENABLED 12 | #cmakedefine MPI_OPENCL_TRANSLATOR_ENABLED 13 | 14 | #endif /* PHYSIS_COMMON_CONFIG_H_ */ -------------------------------------------------------------------------------- /translator/cuda_hm_runtime_builder.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/cuda_hm_runtime_builder.h" 4 | 5 | namespace physis { 6 | namespace translator { 7 | 8 | CUDAHMRuntimeBuilder::CUDAHMRuntimeBuilder(SgScopeStatement *global_scope, 9 | const Configuration &config): 10 | CUDARuntimeBuilder(global_scope, config) { 11 | } 12 | 13 | } // namespace translator 14 | } // namespace physis 15 | 16 | 17 | -------------------------------------------------------------------------------- /translator/cuda_hm_runtime_builder.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_CUDA_HM_RUNTIME_BUILDER_H_ 4 | #define PHYSIS_TRANSLATOR_CUDA_HM_RUNTIME_BUILDER_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "translator/reference_runtime_builder.h" 8 | #include "translator/cuda_runtime_builder.h" 9 | 10 | namespace physis { 11 | namespace translator { 12 | 13 | class CUDAHMRuntimeBuilder : public CUDARuntimeBuilder { 14 | public: 15 | CUDAHMRuntimeBuilder(SgScopeStatement *global_scope, 16 | const Configuration &config); 17 | virtual ~CUDAHMRuntimeBuilder() {} 18 | }; 19 | 20 | } // namespace translator 21 | } // namespace physis 22 | 23 | 24 | 25 | #endif /* PHYSIS_TRANSLATOR_CUDA_HM_RUNTIME_BUILDER_H_ */ 26 | -------------------------------------------------------------------------------- /translator/cuda_hm_translator.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/cuda_hm_translator.h" 4 | 5 | namespace pu = physis::util; 6 | namespace sb = SageBuilder; 7 | namespace si = SageInterface; 8 | 9 | namespace physis { 10 | namespace translator { 11 | 12 | CUDAHMTranslator::CUDAHMTranslator(const Configuration &config): 13 | CUDATranslator(config) { 14 | target_specific_macro_ = "PHYSIS_CUDA_HM"; 15 | } 16 | 17 | } // namespace translator 18 | } // namespace physis 19 | 20 | 21 | -------------------------------------------------------------------------------- /translator/cuda_hm_translator.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_CUDA_HM_TRANSLATOR_H_ 4 | #define PHYSIS_TRANSLATOR_CUDA_HM_TRANSLATOR_H_ 5 | 6 | #include "translator/translator.h" 7 | #include "translator/translator_common.h" 8 | #include "translator/cuda_translator.h" 9 | #include "translator/cuda_runtime_builder.h" 10 | 11 | namespace physis { 12 | namespace translator { 13 | 14 | class CUDAHMTranslator : public CUDATranslator { 15 | public: 16 | CUDAHMTranslator(const Configuration &config); 17 | virtual ~CUDAHMTranslator() {} 18 | }; 19 | 20 | } // namespace translator 21 | } // namespace physis 22 | 23 | #endif /* PHYSIS_TRANSLATOR_CUDA_HM_TRANSLATOR_H_ */ 24 | -------------------------------------------------------------------------------- /translator/def_analysis.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_DEF_ANALYSIS_H_ 4 | #define PHYSIS_TRANSLATOR_DEF_ANALYSIS_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "translator/rose_util.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | 12 | typedef map DefMap; 13 | 14 | static inline string toString(DefMap &dm) { 15 | ostringstream ss; 16 | FOREACH(it, dm.begin(), dm.end()) { 17 | const SgInitializedName *v = it->first; 18 | StringJoin sj(","); 19 | FOREACH(eit, it->second.begin(), it->second.end()) { 20 | SgExpression *e = *eit; 21 | if (!e) { 22 | sj << "NULL"; 23 | } else { 24 | sj << e->unparseToString(); 25 | } 26 | } 27 | ss << v->get_name().getString() 28 | << " -> {" << sj << "}\n"; 29 | } 30 | 31 | return ss.str(); 32 | } 33 | 34 | std::auto_ptr findDefinitions( 35 | SgNode *topLevelNode, const std::vector &relevantTypes); 36 | 37 | } // namespace translator 38 | } // namespace physis 39 | 40 | 41 | #endif /* PHYSIS_TRANSLATOR_DEF_ANALYSIS_H_ */ 42 | -------------------------------------------------------------------------------- /translator/fortran_output_fix.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | 4 | #ifndef PHYSIS_TRANSLATOR_FORTRAN_OUTPUT_FIX_H_ 5 | #define PHYSIS_TRANSLATOR_FORTRAN_OUTPUT_FIX_H_ 6 | 7 | #include "translator/translator_common.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | 12 | void FixFortranOutput(const string &path); 13 | 14 | } // namespace translator 15 | } // namespace physis 16 | 17 | #endif /* PHYSIS_TRANSLATOR_FORTRAN_OUTPUT_FIX_H_ */ 18 | 19 | -------------------------------------------------------------------------------- /translator/mpi_cuda_optimizer.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_MPI_CUDA_OPTIMIZER_H_ 4 | #define PHYSIS_TRANSLATOR_MPI_CUDA_OPTIMIZER_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "translator/mpi_cuda_translator.h" 8 | 9 | #if 0 // TODO: Merge this to optimizer/mpi_cuda_optimizer.h 10 | namespace physis { 11 | namespace translator { 12 | 13 | class MPICUDAOptimizer { 14 | public: 15 | MPICUDAOptimizer(const MPICUDATranslator &trans); 16 | virtual ~MPICUDAOptimizer() {} 17 | virtual void GridPreCalcAddr(SgFunctionDeclaration *func); 18 | protected: 19 | const MPICUDATranslator &trans_; 20 | }; 21 | 22 | 23 | } // namespace translator 24 | } // namespace physis 25 | 26 | #endif 27 | 28 | #endif /* PHYSIS_TRANSLATOR_MPI_CUDA_OPTIMIZER_H_ */ 29 | -------------------------------------------------------------------------------- /translator/mpi_opencl_optimizer.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_MPI_OPENCL_OPTIMIZER_H_ 4 | #define PHYSIS_TRANSLATOR_MPI_OPENCL_OPTIMIZER_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "translator/mpi_opencl_translator.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | 12 | class MPIOpenCLOptimizer { 13 | public: 14 | MPIOpenCLOptimizer(const MPIOpenCLTranslator &trans); 15 | virtual ~MPIOpenCLOptimizer() {} 16 | virtual void GridPreCalcAddr(SgFunctionDeclaration *func); 17 | protected: 18 | const MPIOpenCLTranslator &trans_; 19 | }; 20 | 21 | } // namespace translator 22 | } // namespace physis 23 | 24 | #endif /* PHYSIS_TRANSLATOR_MPI_OPENCL_OPTIMIZER_H_ */ 25 | -------------------------------------------------------------------------------- /translator/mpi_opencl_runtime_builder.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_MPI_OPENCL_RUNTIME_BUILDER_H_ 4 | #define PHYSIS_TRANSLATOR_MPI_OPENCL_RUNTIME_BUILDER_H_ 5 | 6 | #include "translator/translator_common.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | 11 | } // namespace translator 12 | } // namespace physis 13 | 14 | #endif /* PHYSIS_TRANSLATOR_MPI_OPENCL_RUNTIME_BUILDER_H_ */ 15 | 16 | -------------------------------------------------------------------------------- /translator/mpi_openmp_translator.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_MPI_OPENMP_TRANSLATOR_H_ 4 | #define PHYSIS_TRANSLATOR_MPI_OPENMP_TRANSLATOR_H_ 5 | 6 | #include "translator/mpi_translator.h" 7 | #include "translator/reference_translator.h" 8 | 9 | #define MPI_OPENMP_DIVISION_X_DEFAULT (1) 10 | #define MPI_OPENMP_DIVISION_Y_DEFAULT (1) 11 | #define MPI_OPENMP_DIVISION_Z_DEFAULT (2) 12 | 13 | #define MPI_OPENMP_CACHESIZE_X_DEFAULT (100) 14 | #define MPI_OPENMP_CACHESIZE_Y_DEFAULT (100) 15 | #define MPI_OPENMP_CACHESIZE_Z_DEFAULT (100) 16 | 17 | namespace physis { 18 | namespace translator { 19 | 20 | class MPIOpenMPTranslator : public MPITranslator { 21 | private: 22 | 23 | public: 24 | MPIOpenMPTranslator(const Configuration &config); 25 | virtual ~MPIOpenMPTranslator(); 26 | 27 | //virtual void Translate(); 28 | 29 | //virtual void SetUp(SgProject *project, TranslationContext *context); 30 | //virtual void Finish(); 31 | 32 | protected: 33 | virtual void translateInit(SgFunctionCallExp *node); 34 | 35 | // Nothing performed for this target for now 36 | virtual void FixAST() {} 37 | 38 | public: 39 | virtual SgBasicBlock *BuildRunKernelBody( 40 | StencilMap *s, SgInitializedName *stencil_param); 41 | 42 | protected: 43 | int division_[3]; 44 | int cache_size_[3]; 45 | 46 | 47 | }; 48 | 49 | } // namespace translator 50 | } // namespace physis 51 | 52 | #endif /* PHYSIS_TRANSLATOR_MPI_OPENMP_TRANSLATOR_H_ */ 53 | -------------------------------------------------------------------------------- /translator/mpi_translator.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_MPI_TRANSLATOR_H_ 4 | #define PHYSIS_TRANSLATOR_MPI_TRANSLATOR_H_ 5 | 6 | #include "translator/translator.h" 7 | #include "translator/translator_common.h" 8 | #include "translator/reference_translator.h" 9 | #include "translator/mpi_runtime_builder.h" 10 | 11 | namespace physis { 12 | namespace translator { 13 | 14 | class MPITranslator: public ReferenceTranslator { 15 | public: 16 | MPITranslator(const Configuration &config); 17 | virtual ~MPITranslator() {} 18 | virtual void Translate(); 19 | protected: 20 | bool flag_mpi_overlap_; 21 | virtual MPIRuntimeBuilder *builder() { 22 | return dynamic_cast(rt_builder_); 23 | } 24 | virtual void TranslateInit(SgFunctionCallExp *node); 25 | virtual void TranslateRun(SgFunctionCallExp *node, 26 | Run *run); 27 | virtual void appendNewArgExtra(SgExprListExp *args, Grid *g, 28 | SgVariableDeclaration *dim_decl); 29 | virtual void AppendNewArgStencilMemberInfo(SgExprListExp *args, Grid *g, 30 | SgStatement *prec_stmt); 31 | 32 | #if 0 33 | virtual void CheckSizes(); 34 | #endif 35 | 36 | int global_num_dims_; 37 | //IntArray global_size_; 38 | SgFunctionSymbol *stencil_run_func_; 39 | string get_addr_name_; 40 | string get_addr_no_halo_name_; 41 | string emit_addr_name_; 42 | 43 | virtual void FixAST(); 44 | }; 45 | 46 | } // namespace translator 47 | } // namespace physis 48 | 49 | 50 | #endif /* PHYSIS_TRANSLATOR_MPI_TRANSLATOR_H_ */ 51 | -------------------------------------------------------------------------------- /translator/optimizer/cuda_optimizer.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/cuda_optimizer.h" 4 | #include "translator/optimizer/optimization_passes.h" 5 | 6 | namespace physis { 7 | namespace translator { 8 | namespace optimizer { 9 | 10 | void CUDAOptimizer::DoStage1() { 11 | } 12 | 13 | void CUDAOptimizer::DoStage2() { 14 | if (config_->LookupFlag("OPT_KERNEL_INLINING")) { 15 | pass::kernel_inlining(proj_, tx_, builder_); 16 | } 17 | if (config_->LookupFlag("OPT_LOOP_PEELING")) { 18 | pass::loop_peeling(proj_, tx_, builder_); 19 | } 20 | if (config_->LookupFlag("OPT_REGISTER_BLOCKING")) { 21 | pass::register_blocking(proj_, tx_, builder_); 22 | } 23 | if (config_->LookupFlag("OPT_UNCONDITIONAL_GET")) { 24 | pass::unconditional_get(proj_, tx_, builder_); 25 | } 26 | if (config_->LookupFlag("OPT_OFFSET_CSE")) { 27 | pass::offset_cse(proj_, tx_, builder_); 28 | } 29 | if (config_->LookupFlag("OPT_OFFSET_SPATIAL_CSE")) { 30 | pass::offset_spatial_cse(proj_, tx_, builder_); 31 | } 32 | if (config_->LookupFlag("OPT_LOOP_OPT")) { 33 | pass::loop_opt(proj_, tx_, builder_); 34 | pass::primitive_optimization(proj_, tx_, builder_); 35 | } 36 | } 37 | 38 | } // namespace optimizer 39 | } // namespace translator 40 | } // namespace physis 41 | 42 | -------------------------------------------------------------------------------- /translator/optimizer/cuda_optimizer.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_CUDA_OPTIMIZER_H_ 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_CUDA_OPTIMIZER_H_ 5 | 6 | #include "translator/optimizer/optimizer.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | namespace optimizer { 11 | 12 | class CUDAOptimizer: public Optimizer { 13 | public: 14 | CUDAOptimizer(SgProject *proj, 15 | physis::translator::TranslationContext *tx, 16 | physis::translator::BuilderInterface *builder, 17 | physis::translator::Configuration *config) 18 | : Optimizer(proj, tx, builder, config) {} 19 | virtual ~CUDAOptimizer() {} 20 | protected: 21 | virtual void DoStage1(); 22 | virtual void DoStage2(); 23 | }; 24 | 25 | } // namespace optimizer 26 | } // namespace translator 27 | } // namespace physis 28 | 29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_CUDA_OPTIMIZER_H_ */ 30 | -------------------------------------------------------------------------------- /translator/optimizer/mpi_cuda_optimizer.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/mpi_cuda_optimizer.h" 4 | #include "translator/optimizer/optimization_passes.h" 5 | 6 | namespace physis { 7 | namespace translator { 8 | namespace optimizer { 9 | 10 | void MPICUDAOptimizer::DoStage1() { 11 | } 12 | 13 | void MPICUDAOptimizer::DoStage2() { 14 | // TODO: support this optimization 15 | #if 0 16 | if (config_->LookupFlag("OPT_UNCONDITIONAL_GET")) { 17 | pass::unconditional_get(proj_, tx_, builder_); 18 | } 19 | #endif 20 | } 21 | 22 | } // namespace optimizer 23 | } // namespace translator 24 | } // namespace physis 25 | 26 | -------------------------------------------------------------------------------- /translator/optimizer/mpi_cuda_optimizer.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_CUDA_OPTIMIZER_H_ 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_CUDA_OPTIMIZER_H_ 5 | 6 | #include "translator/optimizer/optimizer.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | namespace optimizer { 11 | 12 | class MPICUDAOptimizer: public Optimizer { 13 | public: 14 | MPICUDAOptimizer(SgProject *proj, 15 | physis::translator::TranslationContext *tx, 16 | physis::translator::BuilderInterface *builder, 17 | physis::translator::Configuration *config) 18 | : Optimizer(proj, tx, builder, config) {} 19 | virtual ~MPICUDAOptimizer() {} 20 | protected: 21 | virtual void DoStage1(); 22 | virtual void DoStage2(); 23 | }; 24 | 25 | } // namespace optimizer 26 | } // namespace translator 27 | } // namespace physis 28 | 29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_MPI_CUDA_OPTIMIZER_H_ */ 30 | -------------------------------------------------------------------------------- /translator/optimizer/mpi_optimizer.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/mpi_optimizer.h" 4 | #include "translator/optimizer/optimization_passes.h" 5 | 6 | namespace physis { 7 | namespace translator { 8 | namespace optimizer { 9 | 10 | void MPIOptimizer::DoStage1() { 11 | } 12 | 13 | void MPIOptimizer::DoStage2() { 14 | if (config_->LookupFlag("OPT_KERNEL_INLINING")) { 15 | pass::kernel_inlining(proj_, tx_, builder_); 16 | } 17 | } 18 | 19 | } // namespace optimizer 20 | } // namespace translator 21 | } // namespace physis 22 | 23 | -------------------------------------------------------------------------------- /translator/optimizer/mpi_optimizer.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_OPTIMIZER_H_ 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_MPI_OPTIMIZER_H_ 5 | 6 | #include "translator/optimizer/optimizer.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | namespace optimizer { 11 | 12 | class MPIOptimizer: public Optimizer { 13 | public: 14 | MPIOptimizer(SgProject *proj, 15 | physis::translator::TranslationContext *tx, 16 | physis::translator::BuilderInterface *builder, 17 | physis::translator::Configuration *config) 18 | : Optimizer(proj, tx, builder, config) {} 19 | virtual ~MPIOptimizer() {} 20 | protected: 21 | virtual void DoStage1(); 22 | virtual void DoStage2(); 23 | }; 24 | 25 | } // namespace optimizer 26 | } // namespace translator 27 | } // namespace physis 28 | 29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_MPI_OPTIMIZER_H_ */ 30 | -------------------------------------------------------------------------------- /translator/optimizer/optimization_common.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/translator_common.h" 4 | #include "translator/translation_context.h" 5 | 6 | namespace physis { 7 | namespace translator { 8 | namespace optimizer { 9 | 10 | //! Find innermost kernel loops 11 | extern vector FindInnermostLoops(SgNode *proj); 12 | 13 | //! Find expressions that are assigned to variable v 14 | extern void GetVariableSrc(SgInitializedName *v, 15 | vector &src_exprs); 16 | 17 | //! Simple dead code elimination 18 | extern bool EliminateDeadCode(SgStatement *stmt); 19 | 20 | //! Returns a single source expression for a variable if statically determined 21 | SgExpression *GetDeterministicDefinition(SgInitializedName *var); 22 | 23 | } // namespace optimizer 24 | } // namespace translator 25 | } // namespace physis 26 | -------------------------------------------------------------------------------- /translator/optimizer/optimization_passes.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/optimization_passes.h" 4 | #include "translator/rose_util.h" 5 | 6 | namespace si = SageInterface; 7 | namespace sb = SageBuilder; 8 | 9 | namespace physis { 10 | namespace translator { 11 | namespace optimizer { 12 | namespace pass { 13 | 14 | void null_optimization( 15 | SgProject *proj, 16 | physis::translator::TranslationContext *tx, 17 | physis::translator::BuilderInterface *builder) { 18 | pre_process(proj, tx, __FUNCTION__); 19 | } 20 | 21 | } // namespace pass 22 | } // namespace optimizer 23 | } // namespace translator 24 | } // namespace physis 25 | 26 | -------------------------------------------------------------------------------- /translator/optimizer/optimizer.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/optimizer.h" 4 | #include "translator/optimizer/optimization_passes.h" 5 | #include "translator/ast_processing.h" 6 | 7 | namespace physis { 8 | namespace translator { 9 | namespace optimizer { 10 | 11 | void Optimizer::DoStage1() { 12 | pass::null_optimization(proj_, tx_, builder_); 13 | } 14 | 15 | void Optimizer::DoStage2() { 16 | pass::null_optimization(proj_, tx_, builder_); 17 | } 18 | void Optimizer::Stage1() { 19 | Stage1PreProcess(); 20 | LOG_DEBUG() << "Applying Stage 1 optimization passes\n"; 21 | DoStage1(); 22 | LOG_DEBUG() << "Stage 1 optimization done\n"; 23 | Stage1PostProcess(); 24 | } 25 | 26 | void Optimizer::Stage2() { 27 | Stage2PreProcess(); 28 | LOG_DEBUG() << "Applying Stage 2 optimization passes\n"; 29 | DoStage2(); 30 | LOG_DEBUG() << "Stage 2 optimization done\n"; 31 | Stage2PostProcess(); 32 | } 33 | 34 | void Optimizer::Stage1PreProcess() { 35 | } 36 | 37 | void Optimizer::Stage1PostProcess() { 38 | } 39 | 40 | void Optimizer::Stage2PreProcess() { 41 | } 42 | 43 | void Optimizer::Stage2PostProcess() { 44 | rose_util::RemoveUnusedFunction(proj_); 45 | } 46 | 47 | 48 | } // namespace optimizer 49 | } // namespace translator 50 | } // namespace physis 51 | 52 | -------------------------------------------------------------------------------- /translator/optimizer/primitive_optimization.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/optimization_passes.h" 4 | #include "translator/optimizer/optimization_common.h" 5 | #include "translator/rose_util.h" 6 | #include "translator/builder_interface.h" 7 | #include "translator/translation_util.h" 8 | 9 | namespace si = SageInterface; 10 | namespace sb = SageBuilder; 11 | 12 | namespace physis { 13 | namespace translator { 14 | namespace optimizer { 15 | namespace pass { 16 | 17 | void primitive_optimization( 18 | SgProject *proj, 19 | physis::translator::TranslationContext *tx, 20 | physis::translator::BuilderInterface *builder) { 21 | pre_process(proj, tx, __FUNCTION__); 22 | 23 | vector target_loops = FindInnermostLoops(proj); 24 | FOREACH (it, target_loops.begin(), target_loops.end()) { 25 | SgForStatement *loop = *it; 26 | EliminateDeadCode(loop); 27 | } 28 | 29 | post_process(proj, tx, __FUNCTION__); 30 | } 31 | 32 | } // namespace pass 33 | } // namespace optimizer 34 | } // namespace translator 35 | } // namespace physis 36 | 37 | -------------------------------------------------------------------------------- /translator/optimizer/reference_optimizer.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/optimizer/reference_optimizer.h" 4 | #include "translator/optimizer/optimization_passes.h" 5 | 6 | namespace physis { 7 | namespace translator { 8 | namespace optimizer { 9 | 10 | void ReferenceOptimizer::DoStage1() { 11 | } 12 | 13 | void ReferenceOptimizer::DoStage2() { 14 | if (config_->LookupFlag("OPT_KERNEL_INLINING")) { 15 | pass::kernel_inlining(proj_, tx_, builder_); 16 | } 17 | if (config_->LookupFlag("OPT_LOOP_PEELING")) { 18 | pass::loop_peeling(proj_, tx_, builder_); 19 | } 20 | // Unconditional get should be placed before register blocking 21 | if (config_->LookupFlag("OPT_UNCONDITIONAL_GET")) { 22 | pass::unconditional_get(proj_, tx_, builder_); 23 | } 24 | if (config_->LookupFlag("OPT_REGISTER_BLOCKING")) { 25 | pass::register_blocking(proj_, tx_, builder_); 26 | } 27 | if (config_->LookupFlag("OPT_OFFSET_CSE")) { 28 | pass::offset_cse(proj_, tx_, builder_); 29 | } 30 | if (config_->LookupFlag("OPT_OFFSET_SPATIAL_CSE")) { 31 | pass::offset_spatial_cse(proj_, tx_, builder_); 32 | } 33 | if (config_->LookupFlag("OPT_LOOP_OPT")) { 34 | pass::loop_opt(proj_, tx_, builder_); 35 | pass::primitive_optimization(proj_, tx_, builder_); 36 | } 37 | } 38 | 39 | } // namespace optimizer 40 | } // namespace translator 41 | } // namespace physis 42 | 43 | -------------------------------------------------------------------------------- /translator/optimizer/reference_optimizer.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_OPTIMIZER_H_ 4 | #define PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_OPTIMIZER_H_ 5 | 6 | #include "translator/optimizer/optimizer.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | namespace optimizer { 11 | 12 | class ReferenceOptimizer: public Optimizer { 13 | public: 14 | ReferenceOptimizer(SgProject *proj, 15 | physis::translator::TranslationContext *tx, 16 | physis::translator::BuilderInterface *builder, 17 | physis::translator::Configuration *config) 18 | : Optimizer(proj, tx, builder, config) {} 19 | virtual ~ReferenceOptimizer() {} 20 | protected: 21 | virtual void DoStage1(); 22 | virtual void DoStage2(); 23 | }; 24 | 25 | } // namespace optimizer 26 | } // namespace translator 27 | } // namespace physis 28 | 29 | #endif /* PHYSIS_TRANSLATOR_OPTIMIZER_REFERENCE_OPTIMIZER_H_ */ 30 | -------------------------------------------------------------------------------- /translator/physis_exception.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_PHYSIS_EXCEPTION_H_ 4 | #define PHYSIS_TRANSLATOR_PHYSIS_EXCEPTION_H_ 5 | 6 | #include 7 | 8 | namespace physis { 9 | namespace translator { 10 | 11 | class PhysisException 12 | :public std::exception { 13 | string msg; 14 | public: 15 | explicit PhysisException(const string &msg) throw(): msg(msg) {} 16 | virtual ~PhysisException() throw() {} 17 | virtual const char* what() const throw() { 18 | return msg.c_str(); 19 | } 20 | }; 21 | 22 | } // namespace translator 23 | } // namespace physis 24 | 25 | #endif /* PHYSIS_TRANSLATOR_PHYSIS_EXCEPTION_H_ */ 26 | -------------------------------------------------------------------------------- /translator/physisc-cuda-hm.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --cuda-host-memory -I${CMAKE_INSTALL_PREFIX}/include $* 4 | -------------------------------------------------------------------------------- /translator/physisc-cuda.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --cuda -I${CMAKE_INSTALL_PREFIX}/include $* -------------------------------------------------------------------------------- /translator/physisc-mpi-cuda.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi-cuda -I${CMAKE_INSTALL_PREFIX}/include $* -------------------------------------------------------------------------------- /translator/physisc-mpi-opencl.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi-opencl \ 4 | -I${CMAKE_INSTALL_PREFIX}/include \ 5 | $* 6 | -------------------------------------------------------------------------------- /translator/physisc-mpi-openmp.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi-openmp -I${CMAKE_INSTALL_PREFIX}/include $* 4 | -------------------------------------------------------------------------------- /translator/physisc-mpi.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi -I${CMAKE_INSTALL_PREFIX}/include $* -------------------------------------------------------------------------------- /translator/physisc-mpi2.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --mpi2 -I${CMAKE_INSTALL_PREFIX}/include $* -------------------------------------------------------------------------------- /translator/physisc-opencl.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --opencl -DPHYSIS_OPENCL_HEADER_DIR=\"${CMAKE_INSTALL_PREFIX}/include\" -I${CMAKE_INSTALL_PREFIX}/include $@ 4 | -------------------------------------------------------------------------------- /translator/physisc-ref.cmake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ${CMAKE_INSTALL_PREFIX}/bin/physisc --ref -I${CMAKE_INSTALL_PREFIX}/include $* -------------------------------------------------------------------------------- /translator/reduce.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | // Copyright 2011, Tokyo Institute of Technology. 4 | // All rights reserved. 5 | // 6 | // This file is distributed under the license described in 7 | // LICENSE.txt. 8 | // 9 | // Author: Naoya Maruyama (naoya@matsulab.is.titech.ac.jp) 10 | 11 | #ifndef PHYSIS_TRANSLATOR_REDUCE_H_ 12 | #define PHYSIS_TRANSLATOR_REDUCE_H_ 13 | 14 | #include "translator/translator_common.h" 15 | #include "translator/grid.h" 16 | #include "physis/physis_util.h" 17 | 18 | #define REDUCE_NAME ("PSReduce") 19 | 20 | namespace physis { 21 | namespace translator { 22 | 23 | class Reduce: public AstAttribute { 24 | public: 25 | enum KIND {GRID, KERNEL}; 26 | Reduce(SgFunctionCallExp *fc); 27 | virtual ~Reduce(); 28 | static const std::string name; 29 | Reduce *copy(); 30 | SgFunctionCallExp *reduce_call() const { return reduce_call_; }; 31 | bool IsGrid() const; 32 | bool IsKernel() const; 33 | //! Returns the variable referencing the grid to be reduced. 34 | SgVarRefExp *GetGrid() const; 35 | //! Returns true if a call is to the reduce intrinsic. 36 | /*! 37 | \param call A function call. 38 | \return True if the call is to the reduce intrinsic. 39 | */ 40 | static bool IsReduce(SgFunctionCallExp *call); 41 | protected: 42 | SgFunctionCallExp *reduce_call_; 43 | KIND kind_; 44 | }; 45 | 46 | 47 | } // namespace translator 48 | } // namespace physis 49 | 50 | #endif /* PHYSIS_TRANSLATOR_REDUCE_H_ */ 51 | -------------------------------------------------------------------------------- /translator/rose_ast_attribute.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/rose_ast_attribute.h" 4 | #include "translator/rose_util.h" 5 | #include "translator/stencil_range.h" 6 | 7 | namespace physis { 8 | namespace translator { 9 | 10 | const std::string GridCallAttribute::name = "GridCall"; 11 | 12 | GridCallAttribute::GridCallAttribute(SgInitializedName *grid_var, 13 | KIND k): 14 | grid_var_(grid_var), kind_(k) { 15 | } 16 | 17 | GridCallAttribute::~GridCallAttribute() {} 18 | 19 | AstAttribute *GridCallAttribute::copy() { 20 | return new GridCallAttribute(grid_var_, kind_); 21 | } 22 | 23 | bool GridCallAttribute::IsGet() { 24 | return kind_ == GET; 25 | } 26 | 27 | bool GridCallAttribute::IsGetPeriodic() { 28 | return kind_ == GET_PERIODIC; 29 | } 30 | 31 | bool GridCallAttribute::IsEmit() { 32 | return kind_ == EMIT; 33 | } 34 | 35 | void CopyAllAttributes(SgNode *dst, SgNode *src) { 36 | // ROSE does not seem to have API for locating all attached 37 | // attributes or copy them all. So, as an ad-hoc work around, list 38 | // all potentially attahced attributes here to get them copied to 39 | // the destination node. 40 | if (rose_util::GetASTAttribute(src)) { 41 | rose_util::CopyASTAttribute( 42 | dst, src, false); 43 | LOG_DEBUG() << "StencilIndexVarAttribute found at: " 44 | << src->unparseToString() << "\n"; 45 | } 46 | } 47 | 48 | } // namespace translator 49 | } // namespace physis 50 | -------------------------------------------------------------------------------- /translator/rose_ast_attribute.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_ROSE_AST_ATTRIBUTE_H_ 4 | #define PHYSIS_TRANSLATOR_ROSE_AST_ATTRIBUTE_H_ 5 | 6 | #include "translator/translator_common.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | 11 | class GridCallAttribute: public AstAttribute { 12 | public: 13 | enum KIND {GET, GET_PERIODIC, EMIT}; 14 | GridCallAttribute(SgInitializedName *grid_var, 15 | KIND k); 16 | virtual ~GridCallAttribute(); 17 | static const std::string name; 18 | AstAttribute *copy(); 19 | SgInitializedName *grid_var() { return grid_var_; }; 20 | //! Returns true if the node is get. 21 | bool IsGet(); 22 | //! Returns true if the node is get_periodic. 23 | bool IsGetPeriodic(); 24 | bool IsEmit(); 25 | protected: 26 | SgInitializedName *grid_var_; 27 | KIND kind_; 28 | }; 29 | 30 | void CopyAllAttributes(SgNode *dst, SgNode *src); 31 | 32 | } // namespace translator 33 | } // namespace physis 34 | 35 | #endif /* PHYSIS_TRANSLATOR_ROSE_AST_ATTRIBUTE_H_ */ 36 | -------------------------------------------------------------------------------- /translator/rose_fortran.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_ROSE_FORTRAN_H_ 4 | #define PHYSIS_TRANSLATOR_ROSE_FORTRAN_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "physis/internal_common.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | namespace rose_fortran { 12 | 13 | SgDerivedTypeStatement *BuildDerivedTypeStatementAndDefinition( 14 | std::string name, SgScopeStatement *scope); 15 | 16 | SgFortranDo *BuildFortranDo(SgExpression *initialization, 17 | SgExpression *bound, 18 | SgExpression *increment, 19 | SgBasicBlock *body); 20 | 21 | SgAllocateStatement *BuildAllocateStatement(); 22 | 23 | 24 | } // namespace rose_fortran 25 | } // namespace translator 26 | } // namespace physis 27 | 28 | #endif /* PHYSIS_TRANSLATOR_ROSE_FORTRAN_H__ */ 29 | -------------------------------------------------------------------------------- /translator/run.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_RUN_H_ 4 | #define PHYSIS_TRANSLATOR_RUN_H_ 5 | 6 | #include 7 | 8 | #include "translator/translator_common.h" 9 | #include "physis/physis_util.h" 10 | #include "translator/map.h" 11 | 12 | namespace physis { 13 | namespace translator { 14 | 15 | class TranslationContext; 16 | 17 | class Run { 18 | SgFunctionCallExp *call; 19 | SgExpression *count_; 20 | typedef std::vector > 21 | StencilMapArgVector; 22 | StencilMapArgVector stencils_; 23 | public: 24 | Run(SgFunctionCallExp *call, TranslationContext *tx); 25 | virtual ~Run() {} 26 | 27 | string GetName() const { 28 | return "__" + string(PS_STENCIL_RUN_NAME) + "_" + toString(id_); 29 | } 30 | 31 | const StencilMapArgVector &stencils() const { return stencils_; } 32 | bool HasCount() const; 33 | SgExpression *BuildCount() const; 34 | 35 | static bool isRun(SgFunctionCallExp *call); 36 | static SgExpression *findCountArg(SgFunctionCallExp *call); 37 | #ifdef UNUSED_CODE 38 | virtual bool IsRead(Grid *g, TranslationContext *tx); 39 | virtual bool IsReadAny(GridSet *gs, TranslationContext *tx); 40 | virtual bool IsModified(Grid *g, TranslationContext *tx); 41 | virtual bool IsModifiedAny(GridSet *gs, TranslationContext *tx); 42 | #endif 43 | 44 | int id() const { return id_; } 45 | 46 | protected: 47 | int id_; 48 | static Counter c; 49 | }; 50 | 51 | } // namespace translator 52 | } // namespace physis 53 | 54 | #endif /* RUN_H_ */ 55 | -------------------------------------------------------------------------------- /translator/stencil_analysis.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_STENCIL_ANALYSIS_H_ 4 | #define PHYSIS_TRANSLATOR_STENCIL_ANALYSIS_H_ 5 | 6 | #include "translator/translator_common.h" 7 | #include "translator/map.h" 8 | 9 | namespace physis { 10 | namespace translator { 11 | 12 | bool AnalyzeStencilIndex(SgExpression *arg, StencilIndex &idx, 13 | SgFunctionDeclaration *kernel); 14 | void AnalyzeStencilRange(StencilMap &sm, TranslationContext &tx); 15 | 16 | //void AnalyzeEmit(SgFunctionDeclaration *func); 17 | 18 | void AnalyzeGet(SgNode *top_level_node, 19 | TranslationContext &tx); 20 | void AnalyzeEmit(SgNode *top_level_node, 21 | TranslationContext &tx); 22 | 23 | /*! 24 | 25 | \param get 26 | \param indices 27 | \param parent 28 | \return True upon success; fasle otherwise. 29 | */ 30 | bool AnalyzeGetArrayMember(SgDotExp *get, SgExpressionVector &indices, 31 | SgExpression *&parent); 32 | 33 | 34 | } // namespace translator 35 | } // namespace physis 36 | 37 | 38 | #endif /* PHYSIS_TRANSLATOR_STENCIL_ANALYSIS_H_ */ 39 | -------------------------------------------------------------------------------- /translator/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(ROSE) 2 | if (NOT ROSE_FOUND) 3 | return() 4 | endif() 5 | 6 | include_directories(${ROSE_INCLUDE_DIR}) 7 | include_directories(${CMAKE_SOURCE_DIR}/tests/gmock) 8 | link_directories(${CMAKE_BINARY_DIR}/tests/gmock) 9 | 10 | set (test_src 11 | test_ast_processing.cc test_grid.cc 12 | test_ast_traversal.cc) 13 | 14 | add_custom_target(test-translator 15 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) 16 | 17 | foreach (i ${test_src}) 18 | get_filename_component(exe ${i} NAME_WE) 19 | add_executable(${exe} ${i} common.cc) 20 | target_link_libraries(${exe} 21 | translator 22 | gmock 23 | ${ROSE_LIBRARIES} 24 | ${JAVA_JVM_LIBRARY} 25 | ${Boost_LIBRARIES}) 26 | file(GLOB input_files 27 | "${CMAKE_CURRENT_SOURCE_DIR}/${exe}_input*.c") 28 | foreach (input ${input_files}) 29 | get_filename_component(fname ${input} NAME) 30 | add_custom_command( 31 | OUTPUT ${fname} 32 | COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${fname} ${CMAKE_CURRENT_BINARY_DIR}/${fname} 33 | DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${fname}) 34 | add_custom_target(${fname} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${fname}) 35 | add_dependencies(${exe} ${fname}) 36 | endforeach () 37 | #add_dependencies(test-translator ${exe}) 38 | add_custom_target(test-${exe} 39 | COMMAND ${exe} 40 | DEPENDS ${exe} 41 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) 42 | add_dependencies(test-translator test-${exe}) 43 | unset(input_files) 44 | endforeach () 45 | 46 | -------------------------------------------------------------------------------- /translator/test/common.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "translator/test/common.h" 4 | 5 | #include 6 | 7 | using namespace ::std; 8 | 9 | namespace physis { 10 | namespace translator { 11 | namespace test { 12 | 13 | SgProject *FrontEnd(const char *infile) { 14 | vector argv; 15 | argv.push_back("test"); 16 | argv.push_back(infile); 17 | SgProject* proj = frontend(argv); 18 | AstTests::runAllTests(proj); 19 | return proj; 20 | } 21 | 22 | 23 | } // namespace test 24 | } // namespace translator 25 | } // namespace physis 26 | -------------------------------------------------------------------------------- /translator/test/common.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_TEST_COMMON_H_ 4 | #define PHYSIS_TRANSLATOR_TEST_COMMON_H_ 5 | 6 | #include "rose.h" 7 | 8 | namespace physis { 9 | namespace translator { 10 | namespace test { 11 | 12 | SgProject *FrontEnd(const char *infile); 13 | 14 | } // namespace test 15 | } // namespace translator 16 | } // namespace physis 17 | 18 | 19 | #endif /* PHYSIS_TRANSLATOR_TEST_COMMON_H_ */ 20 | -------------------------------------------------------------------------------- /translator/test/test_ast_processing_input_remove_redundant_variable_copy.c: -------------------------------------------------------------------------------- 1 | 2 | int DoesNotRemoveNonRedundantVariableCopy() { 3 | int x = 10; 4 | return x; 5 | } 6 | 7 | int RemovesRedundantVariableCopy() { 8 | int x = 10; 9 | int z __attribute__((unused)), y = x; // make sure only y is 10 | // removed and z is left as is 11 | return y; 12 | } 13 | 14 | int DoesNotRemoveVariableCopyWhenSrcReassigned() { 15 | int x = 10; 16 | int y = x; 17 | x = 20; 18 | return y; 19 | } 20 | 21 | int DoesNotRemoveVariableCopyWhenDstReassigned() { 22 | int x = 10; 23 | int y = x; 24 | y = 10; 25 | return y; 26 | } 27 | 28 | int foo(int x) { 29 | return x; 30 | } 31 | 32 | int RemoveRedundantVariableCopyWithFuncCall() { 33 | int x = 10; 34 | int y = x; 35 | return foo(y); 36 | } 37 | 38 | int RemoveWhenAssignedWithUnaryOp() { 39 | int x = 10; 40 | int y = -x; 41 | return foo(y); 42 | } 43 | -------------------------------------------------------------------------------- /translator/test/test_ast_processing_input_remove_unused_func.c: -------------------------------------------------------------------------------- 1 | 2 | static void ToRemove() { 3 | return; 4 | } 5 | 6 | void NotToRemove1() { 7 | return; 8 | } 9 | 10 | static void NotToRemove2() { 11 | return; 12 | } 13 | 14 | void foo() { 15 | NotToRemove2(); 16 | } 17 | 18 | static void PointerReferenced() { 19 | return; 20 | } 21 | 22 | void bar() { 23 | void (*x)() = PointerReferenced; 24 | return; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /translator/test/test_ast_traversal_input.c: -------------------------------------------------------------------------------- 1 | 2 | void foo(int param) { 3 | } 4 | -------------------------------------------------------------------------------- /translator/test/test_grid_input.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/naoyam/physis/39ee5250a2d5baa545ca03e7c5c9aa9c81f1ab19/translator/test/test_grid_input.c -------------------------------------------------------------------------------- /translator/tocheck: -------------------------------------------------------------------------------- 1 | kernel.cc: analyzeGridWrites -> tx.getGridEmitCalls 2 | translation_context.cc: getGridCalls, getGridEmits 3 | grid.cc: isGridCall, typespecificcall 4 | -------------------------------------------------------------------------------- /translator/translation_util.h: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #ifndef PHYSIS_TRANSLATOR_TRANSLATOR_UTIL_H_ 4 | #define PHYSIS_TRANSLATOR_TRANSLATOR_UTIL_H_ 5 | 6 | #include "physis/physis_common.h" 7 | #include "translator/translator_common.h" 8 | #include "translator/grid.h" 9 | namespace physis { 10 | namespace translator { 11 | 12 | SgType *BuildInt32Type(SgScopeStatement *scope=NULL); 13 | SgType *BuildInt64Type(SgScopeStatement *scope=NULL); 14 | SgType *BuildIndexType(SgScopeStatement *scope=NULL); 15 | SgType *BuildIndexType2(SgScopeStatement *scope=NULL); 16 | 17 | SgExpression *BuildIndexVal(PSIndex v); 18 | 19 | SgType *BuildPSOffsetsType(); 20 | SgVariableDeclaration *BuildPSOffsets(std::string name, 21 | SgScopeStatement *scope, 22 | __PSOffsets &v); 23 | 24 | SgType *BuildPSGridRangeType(); 25 | SgVariableDeclaration *BuildPSGridRange(std::string name, 26 | SgScopeStatement *block, 27 | __PSGridRange &v); 28 | 29 | SgExpression *BuildFunctionCall(const std::string &name, 30 | SgExpression *arg1); 31 | 32 | SgType *GetBaseType(SgType *ty); 33 | 34 | } // namespace translator 35 | } // namespace physis 36 | 37 | 38 | 39 | #endif /* PHYSIS_TRANSLATOR_TRANSLATOR_UTIL_H_ */ 40 | -------------------------------------------------------------------------------- /util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Lua51 REQUIRED) 2 | message(STATUS "Lua include dir: ${LUA_INCLUDE_DIR}") 3 | message(STATUS "Lua libraries dir: ${LUA_LIBRARIES}") 4 | message(STATUS "Lua version: ${LUA_VERSION_STRING}") 5 | include_directories(${LUA_INCLUDE_DIR}) 6 | add_library(configuration configuration.cc lua_loader.cc) 7 | target_link_libraries(configuration ${LUA_LIBRARIES}) -------------------------------------------------------------------------------- /util/configuration.cc: -------------------------------------------------------------------------------- 1 | // Licensed under the BSD license. See LICENSE.txt for more details. 2 | 3 | #include "util/configuration.h" 4 | 5 | #include 6 | 7 | #include "util/lua_loader.h" 8 | #include "physis/physis_util.h" 9 | 10 | using std::string; 11 | 12 | namespace physis { 13 | namespace util { 14 | 15 | 16 | Configuration::Configuration() {} 17 | 18 | int Configuration::LoadFile(const std::string &path) { 19 | LuaLoader ll; 20 | LuaTable *tbl = ll.LoadFile(path); 21 | tbl_.Merge(*tbl); 22 | LOG_DEBUG() << "Current config: " << *this << "\n"; 23 | return 0; 24 | } 25 | 26 | std::ostream &Configuration::print(std::ostream &os) const { 27 | StringJoin sj; 28 | FOREACH (it, key_desc_map_.begin(), key_desc_map_.end()) { 29 | const KeyDesc &key = it->second; 30 | if (tbl_.HasKey(key)) { 31 | tbl_.Find(key)->second->print(sj << key << ": "); 32 | } 33 | } 34 | return os << "{" << sj.str() << "}"; 35 | } 36 | 37 | } // namespace util 38 | } // namespace physis 39 | -------------------------------------------------------------------------------- /util/log4cpp-test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "log4cpp.h" 3 | #include 4 | using namespace std; 5 | 6 | struct foo 7 | { 8 | ostream& print(std::ostream& os) const { 9 | os << "foo"; 10 | return os; 11 | } 12 | }; 13 | 14 | std::ostream &operator<<(std::ostream &os, const foo&x) 15 | { 16 | return x.print(os); 17 | } 18 | 19 | 20 | int main(int argc, char *argv[]) 21 | { 22 | cout << "Hello, world!" << endl; 23 | 24 | LOG_DEBUG("test"); 25 | LOG_DEBUG(1.2); 26 | LOG_DEBUG(string("string")); 27 | foo x; 28 | LOG_DEBUG(x); 29 | 30 | LOG_ERROR(x); 31 | LOG_WARNING("abc"); 32 | TRACE_START; 33 | 34 | return EXIT_SUCCESS; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /util/log4cpp.cpp: -------------------------------------------------------------------------------- 1 | #include "log4cpp.h" 2 | #include 3 | 4 | using namespace log4cpp; 5 | 6 | const logger log4cpp::cerr_logger(&std::cerr); 7 | --------------------------------------------------------------------------------