├── .gitignore ├── CMakeLists.txt ├── INSTALL.md ├── LICENSE ├── README.md ├── examples ├── CMakeLists.txt ├── README.md ├── example.in ├── random_cube.c ├── random_cube_reproducible.c ├── run_readin.c ├── support_fns.c ├── support_fns.h ├── test_BaryTreeInterface.c ├── zoltan_fns.c └── zoltan_fns.h ├── interfaces ├── README.md ├── fortran │ ├── BaryTreeInterface.fh │ └── example_interface.f03 └── python │ ├── BaryTreeInterface.py │ └── testBaryTreeInterface.py ├── src ├── CMakeLists.txt ├── clusters │ ├── clusters.c │ ├── clusters.h │ └── struct_clusters.h ├── comm_cp │ ├── comm_cp.c │ └── comm_cp.h ├── comm_types │ ├── comm_types.c │ ├── comm_types.h │ └── struct_comm_types.h ├── comm_windows │ ├── comm_windows.c │ ├── comm_windows.h │ └── struct_comm_windows.h ├── drivers │ ├── directdriver.c │ ├── directdriver.h │ ├── treedriver.c │ └── treedriver.h ├── interaction_compute │ ├── interaction_compute.h │ ├── interaction_compute_cc.c │ ├── interaction_compute_correction.c │ ├── interaction_compute_cp.c │ ├── interaction_compute_direct.c │ ├── interaction_compute_downpass.c │ └── interaction_compute_pc.c ├── interaction_lists │ ├── interaction_lists.c │ ├── interaction_lists.h │ └── struct_interaction_lists.h ├── interface │ ├── BaryTreeInterface.c │ └── BaryTreeInterface.h ├── kernels │ ├── README.md │ ├── atan │ │ ├── atan.h │ │ ├── atan_pc.c │ │ ├── atan_pc.h │ │ ├── atan_pp.c │ │ └── atan_pp.h │ ├── coulomb │ │ ├── coulomb.h │ │ ├── coulomb_cc_hermite.c │ │ ├── coulomb_cc_hermite.h │ │ ├── coulomb_cp.c │ │ ├── coulomb_cp.h │ │ ├── coulomb_pc.c │ │ ├── coulomb_pc.h │ │ ├── coulomb_pp.c │ │ ├── coulomb_pp.h │ │ ├── coulomb_ss_cc.c │ │ ├── coulomb_ss_cc.h │ │ ├── coulomb_ss_correction.c │ │ ├── coulomb_ss_correction.h │ │ ├── coulomb_ss_cp.c │ │ ├── coulomb_ss_cp.h │ │ ├── coulomb_ss_pc.c │ │ ├── coulomb_ss_pc.h │ │ ├── coulomb_ss_pp.c │ │ └── coulomb_ss_pp.h │ ├── dcf │ │ ├── dcf.c │ │ └── dcf.h │ ├── mq │ │ ├── mq.h │ │ ├── mq_pc.c │ │ ├── mq_pc.h │ │ ├── mq_pp.c │ │ └── mq_pp.h │ ├── rbs-u │ │ ├── rbs-u.h │ │ ├── rbs-u_cp.c │ │ ├── rbs-u_cp.h │ │ ├── rbs-u_pc.c │ │ ├── rbs-u_pc.h │ │ ├── rbs-u_pp.c │ │ └── rbs-u_pp.h │ ├── rbs-v │ │ ├── rbs-v.h │ │ ├── rbs-v_cp.c │ │ ├── rbs-v_cp.h │ │ ├── rbs-v_pc.c │ │ ├── rbs-v_pc.h │ │ ├── rbs-v_pp.c │ │ └── rbs-v_pp.h │ ├── regularized-coulomb │ │ ├── regularized-coulomb.h │ │ ├── regularized-coulomb_cp.c │ │ ├── regularized-coulomb_cp.h │ │ ├── regularized-coulomb_pc.c │ │ ├── regularized-coulomb_pc.h │ │ ├── regularized-coulomb_pp.c │ │ ├── regularized-coulomb_pp.h │ │ ├── regularized-coulomb_ss_correction.c │ │ ├── regularized-coulomb_ss_correction.h │ │ ├── regularized-coulomb_ss_pc.c │ │ ├── regularized-coulomb_ss_pc.h │ │ ├── regularized-coulomb_ss_pp.c │ │ └── regularized-coulomb_ss_pp.h │ ├── regularized-yukawa │ │ ├── regularized-yukawa.h │ │ ├── regularized-yukawa_cp.c │ │ ├── regularized-yukawa_cp.h │ │ ├── regularized-yukawa_pc.c │ │ ├── regularized-yukawa_pc.h │ │ ├── regularized-yukawa_pp.c │ │ ├── regularized-yukawa_pp.h │ │ ├── regularized-yukawa_ss_correction.c │ │ ├── regularized-yukawa_ss_correction.h │ │ ├── regularized-yukawa_ss_pc.c │ │ ├── regularized-yukawa_ss_pc.h │ │ ├── regularized-yukawa_ss_pp.c │ │ └── regularized-yukawa_ss_pp.h │ ├── sin-over-r │ │ ├── sin-over-r.h │ │ ├── sin-over-r_cp.c │ │ ├── sin-over-r_cp.h │ │ ├── sin-over-r_pc.c │ │ ├── sin-over-r_pc.h │ │ ├── sin-over-r_pp.c │ │ └── sin-over-r_pp.h │ ├── tcf │ │ ├── tcf.c │ │ └── tcf.h │ ├── user_kernel │ │ ├── user_kernel.h │ │ ├── user_kernel_cp.c │ │ ├── user_kernel_cp.h │ │ ├── user_kernel_pc.c │ │ ├── user_kernel_pc.h │ │ ├── user_kernel_pp.c │ │ └── user_kernel_pp.h │ └── yukawa │ │ ├── yukawa.h │ │ ├── yukawa_cp.c │ │ ├── yukawa_cp.h │ │ ├── yukawa_pc.c │ │ ├── yukawa_pc.h │ │ ├── yukawa_pp.c │ │ ├── yukawa_pp.h │ │ ├── yukawa_ss_cc.c │ │ ├── yukawa_ss_cc.h │ │ ├── yukawa_ss_correction.c │ │ ├── yukawa_ss_correction.h │ │ ├── yukawa_ss_cp.c │ │ ├── yukawa_ss_cp.h │ │ ├── yukawa_ss_pc.c │ │ ├── yukawa_ss_pc.h │ │ ├── yukawa_ss_pp.c │ │ └── yukawa_ss_pp.h ├── particles │ ├── particles.c │ ├── particles.h │ └── struct_particles.h ├── run_params │ ├── run_params.c │ ├── run_params.h │ └── struct_run_params.h ├── tree │ ├── batches.c │ ├── batches.h │ ├── partition.c │ ├── partition.h │ ├── struct_tree.h │ ├── struct_tree_linked_list_node.h │ ├── tree.c │ ├── tree.h │ ├── tree_linked_list.c │ └── tree_linked_list.h └── utilities │ ├── advanced_timings.c │ ├── advanced_timings.h │ ├── array.h │ ├── enums.h │ ├── timers.c │ ├── timers.h │ ├── tools.c │ ├── tools.h │ ├── xmalloc.c │ └── xmalloc.h └── tests ├── .gitignore ├── CMakeLists.txt ├── minunit.h └── serial_tests.c /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store 3 | *.ptp* 4 | *.cproject 5 | *org.eclipse* 6 | *.settings* 7 | *.tsv 8 | *.csv 9 | *.xml 10 | *.prefs 11 | build/ 12 | .project 13 | .cproject 14 | .pydevproject 15 | interfaces/python/__pycache__ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9...3.14) 2 | project(BaryTree VERSION 1.0 LANGUAGES C) 3 | 4 | set(default_build_type "Release") 5 | 6 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) 7 | message(STATUS "Setting build type to '${default_build_type}' as none was specified.") 8 | 9 | set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE 10 | STRING "Choose the type of build." FORCE) 11 | 12 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS 13 | "Debug" "Release" "None") 14 | endif() 15 | 16 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) 17 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) 18 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) 19 | 20 | option(BUILD_EXAMPLES "build examples with Zoltan" ON) 21 | option(ENABLE_GPU_BUILD "build OpenACC GPU version" ON) 22 | option(BUILD_SHARED_LIBS "build libraries as shared objects" ON) 23 | 24 | find_package(MPI REQUIRED) 25 | find_package(OpenMP REQUIRED) 26 | 27 | if(BUILD_EXAMPLES OR BUILD_TESTS) 28 | find_package(Zoltan REQUIRED) 29 | add_library(Zoltan_Interface INTERFACE) 30 | target_link_libraries(Zoltan_Interface INTERFACE zoltan) 31 | target_include_directories(Zoltan_Interface INTERFACE ${Zoltan_INCLUDE_DIRS}) 32 | endif() 33 | 34 | #Some PGI versions seem to require a link flag for OpenMP 35 | set_property(TARGET OpenMP::OpenMP_C APPEND 36 | PROPERTY 37 | INTERFACE_LINK_LIBRARIES 38 | $<$:-mp>) 39 | 40 | #Creating an interface for OpenACC 41 | add_library(OpenACC_C INTERFACE) 42 | set_property(TARGET OpenACC_C PROPERTY 43 | INTERFACE_COMPILE_OPTIONS 44 | -acc 45 | -ta=tesla:ccall,cc60$<$:,time> 46 | -Minfo) 47 | set_property(TARGET OpenACC_C PROPERTY 48 | INTERFACE_LINK_LIBRARIES 49 | -acc 50 | -ta=tesla:ccall,cc60) 51 | 52 | if(ENABLE_GPU_BUILD AND NOT (CMAKE_C_COMPILER_ID MATCHES PGI)) 53 | message(STATUS "GPU build requires PGI compilers! Turning GPU build off.") 54 | set(ENABLE_GPU_BUILD OFF CACHE BOOL "build OpenACC GPU version" FORCE) 55 | endif() 56 | 57 | enable_testing() 58 | add_subdirectory(src) 59 | add_subdirectory(examples) 60 | add_subdirectory(tests) 61 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | Installing 2 | ---------- 3 | 4 | In a best case scenario, building and installing the libraries and examples should be as simple as this: 5 | 6 | mkdir build; cd build; export CC=; 7 | cmake .. -DCMAKE_INSTALL_PREFIX=; 8 | make -j install; 9 | 10 | This assumes that you have a few things: 11 | 1. a sane C compiler, 12 | 2. a sane MPI installation that agrees with your C compiler, 13 | 3. CMake version 3.9 or newer, 14 | 4. an installed Trilinos Zoltan library (for building examples). 15 | 16 | If you don't have an install of Zoltan, then you must turn off building of 17 | examples with `-DBUILD_EXAMPLES=OFF`. 18 | 19 | Compiling GPU versions requires that a PGI C compiler be used. If another compiler 20 | other than pgcc is used, for instance gcc or icc, support for building GPU versions 21 | will be automatically turned off during configuration. 22 | 23 | CMake Flags 24 | ----------- 25 | The most useful CMake flags to use during configure are listed below. When passing a flag 26 | to `cmake` during configure, recall that it takes the form `-D=value`. 27 | | Flag | Option/ Value | Description 28 | |------------------------|------------------------------|------------ 29 | | `CMAKE_RELEASE_TYPE` | Debug, Release | Build either the Debug or Release version. 30 | | `ENABLE_GPU_BUILD` | ON, OFF | Toggle whether to build the GPU versions. 31 | | `CMAKE_INSTALL_PREFIX` | `` | Specify install location for `make install`. 32 | | `BUILD_EXAMPLES` | ON, OFF | Toggle whether to build examples (requires Zoltan). 33 | | `BUILD_SHARED_LIBS` | ON, OFF | Toggle whether to build libraries as shared or static objects. 34 | | `Zoltan_DIR` | `` | Specify location of Zoltan CMake configuration file if not picked up by CMake automatically (typically `lib/cmake/Zoltan` of wherever Trilinos was installed). 35 | 36 | If the Zoltan install isn't picked up automatically, you can also add the install location of Trilinos or Zoltan to the CMake module search path with `-DCMAKE_PREFIX_PATH=`. This is an alternative to explicitly setting `Zoltan_DIR`. 37 | 38 | Testing 39 | ------- 40 | After building, you can test the build by running `ctest` or `make test` from the build 41 | directory. This performs a series of simple serial tests. 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 The Regents of the University of Michigan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ____ _______ 2 | | _ \ |__ __| 3 | | |_) | __ _ _ __ _ _| |_ __ ___ ___ 4 | | _ < / _` | '__| | | | | '__/ _ \/ _ \ 5 | | |_) | (_| | | | |_| | | | | __| __/ 6 | |____/ \__,_|_| \__, |_|_| \___|\___| 7 | __/ | 8 | |___/ 9 | BaryTree 10 | ======== 11 | 12 | A work-in-progress library for fast computation of N-body interactions on multiple GPUs, 13 | BaryTree implements barycentric Lagrange and Hermite polynomial interpolation fast 14 | summation methods. The current code employs an OpenACC GPU implementation with MPI 15 | for distributed memory parallelization. 16 | 17 | 18 | Authors: 19 | - Leighton W. Wilson (lwwilson@umich.edu) 20 | - Nathan J. Vaughn (njvaughn@umich.edu) 21 | 22 | Department of Mathematics, 23 | University of Michigan, Ann Arbor. 24 | 25 | 26 | Building 27 | -------- 28 | This project uses CMake to manage and configure its build system. In principle, 29 | building this project is as simple as executing the following from the top level 30 | directory of BaryTree: 31 | 32 | mkdir build; cd build; export CC=; cmake ..; make 33 | 34 | Compiling GPU versions requires that a PGI C compiler be used. For more information 35 | on building and installing, see __INSTALL.md__ in this directory. 36 | 37 | 38 | Examples 39 | -------- 40 | See the __examples__ directory for several example executables that use __BaryTree__ 41 | and the Trilinos __Zoltan__ library for load balancing, in addition to an example 42 | using the library's interface for C programs. See __examples/README.md__ for more 43 | details. 44 | 45 | 46 | Interfaces 47 | ---------- 48 | See the __interfaces__ directory for __BaryTree__ interfaces for non-C programs. 49 | Currently, there is a Python interface and an example script using that interface. 50 | See __interfaces/README.md__ for more details. 51 | 52 | 53 | References 54 | ---------- 55 | Please refer to the following references for more background: 56 | 57 | - L. Wilson, N. Vaughn, and R. Krasny, A GPU-accelerated fast 58 | multipole method based on barycentric Lagrange interpolation 59 | and dual tree traversal, 60 | _Comput. Phys. Commun._ __265__ (2021), 108017. 61 | 62 | - N. Vaughn, L. Wilson, and R. Krasny, A GPU-accelerated barycentric 63 | Lagrange treecode, 64 | _Proc. 21st IEEE Int. Workshop Parallel Distrib. Sci. Eng. 65 | Comput._ (PDSEC 2020) (2020). 66 | 67 | - L. Wang, R. Krasny, and S. Tlupova, A kernel-independent treecode 68 | based on barycentric Lagrange interpolation, 69 | _Commun. Comput. Phys._ __28__ (2020), 1415-1436. 70 | 71 | - R. Krasny and L. Wang, A treecode based on barycentric Hermite 72 | interpolation for electrostatic particle interactions, 73 | _Comput. Math. Biophys._ __7__ (2019), 73-84. 74 | 75 | - H. A. Boateng and R. Krasny, Comparison of treecodes for 76 | computing electrostatic potentials in charged particle 77 | systems with disjoint targets and sources, 78 | _J. Comput. Chem._ __34__ (2013), 2159-2167. 79 | 80 | - J.-P. Berrut and L. N. Trefethen, Barycentric Lagrange interpolation, 81 | _SIAM Rev._ __46__ (2004), 501-517. 82 | 83 | - Z.-H. Duan and R. Krasny, An adaptive treecode for computing 84 | nonbonded potential energy in classical molecular systems, 85 | _J. Comput. Chem._ __22__ (2001), 184–195. 86 | 87 | 88 | License 89 | ------- 90 | Copyright © 2019-2021, The Regents of the University of Michigan. Released under the [MIT License](LICENSE). 91 | 92 | 93 | Disclaimer 94 | ---------- 95 | This material is based upon work supported by the National Science Foundation under grant DMS-1819094, and by the Extreme Science and Engineering Discovery Environment (XSEDE) under grants ACI-1548562 and ASC-190062. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the National Science Foundation. 96 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(BUILD_EXAMPLES) 2 | 3 | set (AUX_SRCS 4 | zoltan_fns.h 5 | zoltan_fns.c 6 | support_fns.h 7 | support_fns.c) 8 | 9 | add_executable(random_cube_cpu random_cube.c ${AUX_SRCS}) 10 | target_link_libraries(random_cube_cpu PRIVATE BaryTree_cpu Zoltan_Interface) 11 | install(TARGETS random_cube_cpu DESTINATION bin) 12 | 13 | add_executable(random_cube_reproducible_cpu random_cube_reproducible.c ${AUX_SRCS}) 14 | target_link_libraries(random_cube_reproducible_cpu PRIVATE BaryTree_cpu Zoltan_Interface) 15 | install(TARGETS random_cube_reproducible_cpu DESTINATION bin) 16 | 17 | add_executable(run_readin_cpu run_readin.c ${AUX_SRCS}) 18 | target_link_libraries(run_readin_cpu PRIVATE BaryTree_cpu Zoltan_Interface) 19 | install(TARGETS run_readin_cpu DESTINATION bin) 20 | 21 | add_executable(test_BaryTreeInterface_cpu test_BaryTreeInterface.c) 22 | target_link_libraries(test_BaryTreeInterface_cpu PRIVATE BaryTree_cpu) 23 | install(TARGETS test_BaryTreeInterface_cpu DESTINATION bin) 24 | 25 | if(ENABLE_GPU_BUILD) 26 | add_executable(random_cube_gpu random_cube.c ${AUX_SRCS}) 27 | target_link_libraries(random_cube_gpu PRIVATE BaryTree_gpu Zoltan_Interface) 28 | install(TARGETS random_cube_gpu DESTINATION bin) 29 | 30 | add_executable(random_cube_reproducible_gpu random_cube_reproducible.c ${AUX_SRCS}) 31 | target_link_libraries(random_cube_reproducible_gpu PRIVATE BaryTree_gpu Zoltan_Interface) 32 | install(TARGETS random_cube_reproducible_gpu DESTINATION bin) 33 | 34 | add_executable(run_readin_gpu run_readin.c ${AUX_SRCS}) 35 | target_link_libraries(run_readin_gpu PRIVATE BaryTree_gpu Zoltan_Interface) 36 | install(TARGETS run_readin_gpu DESTINATION bin) 37 | 38 | add_executable(test_BaryTreeInterface_gpu test_BaryTreeInterface.c) 39 | target_link_libraries(test_BaryTreeInterface_gpu PRIVATE BaryTree_gpu) 40 | install(TARGETS test_BaryTreeInterface_gpu DESTINATION bin) 41 | endif() 42 | 43 | endif() 44 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | Examples 2 | -------- 3 | 4 | This examples folder builds six executables: 5 | 6 | 1. __random_cube_cpu__ and __random_cube_gpu__ 7 | 2. __random_cube_reproducible_cpu__ and __random_cube_reproducible_gpu__ 8 | 3. __testBaryTreeInterface_cpu__ and __testBaryTreeInterface_gpu__ 9 | 10 | - - - 11 | 12 | #### __random_cube__ and __random_cube_reproducible__ 13 | 14 | All of the random cube examples demonstrate the treecode's performance 15 | using a cube of uniformly distributed random particles, load balanced 16 | with Zoltan's recursive coordinate bisection. 17 | 18 | The argument given to the executable is a parameter file that 19 | specifies the run. An example is given here as __example.in__. For 20 | example, one would run: 21 | 22 | mpirun -n 2 random_cube_cpu example.in 23 | 24 | to run the __random_cube_cpu__ example with the parameters specified in 25 | the file __example.in__ across two ranks. 26 | 27 | The parameters that can be specified in the infile are as follows: 28 | | Parameter | Description 29 | |-------------------|------------------ 30 | | `num_particles` | Number of sources and targets. Its use is exclusive with the `num_sources` and `num_targets` parameters. 31 | | `num_sources` | Number of sources. 32 | | `num_targets` | Number of targets. 33 | | `distribution` | Underlying particle distribution: `UNIFORM`, `GAUSSIAN`, `EXPONENTIAL`, `PLUMMER`, or `PLUMMER_SYMMETRIC`. 34 | | `degree` | Degree of polynomial interpolation. 35 | | `theta` | Multipole acceptance criterion (MAC). 36 | | `max_per_source_leaf` | Maximum number of particles per source tree leaf (or source batch, for `CLUSTER_PARTICLE`). 37 | | `max_per_target_leaf` | Maximum number of particles per target tree leaf (or target batch, for `PARTICLE_CLUSTER`). 38 | | `beta` | Automatic tuning accuracy parameter. Number in [0,1], higher is more accurate. 39 | | `compute_type` | Type of treecode method. `CLUSTER_PARTICLE`, `PARTICLE_CLUSTER` (i.e. BLTC), `CLUSTER_CLUSTER` (i.e. BLDTT). 40 | | `approximation` | Type of polynomial: `LAGRANGE` and `HERMITE`. `HERMITE` is incompatible with cluster-cluster. 41 | | `kernel_name` | Name of interaction kernel: `COULOMB`, `YUKAWA`, `REGULARIZED_COULOMB`, `REGULARIZED_YUKAWA`, `SIN_OVER_R`, `USER`. 42 | | `kernel_params` | Comma separated list of parameters for given kernel. 43 | | `run_direct` | Run direct calculation for error comparison: `ON` or `OFF`. 44 | | `verbosity` | Determines verbosity level of output. Integer `0`, `1`, `2`, `3`. Higher means more output. 45 | | `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison. 10 would mean every 10th target is sampled. 46 | 47 | 48 | Note the difference between these executables: 49 | 50 | - The __random_cube__ examples are designed to test the 51 | problem size limits of the treecode by overcoming limits in Zoltan's 52 | maximum array sizes. Unlike the __random_cube_reproducible__ examples, which first 53 | generate all random particles and then use Zoltan to load balance them, 54 | these examples generate a small number of particles, load balances 55 | them, determines the resulting bounding boxes, and then generates the 56 | specified number of random particles in those bounding boxes. The results 57 | produced in terms of performance and accuracy should be very similar to 58 | the __random_cube_reproducible__ examples. 59 | 60 | - The __random_cube_reproducible__ examples are designed for reproducibility 61 | of results. Given a total number of particles across all ranks, the 62 | actual random particles will be the same no matter how many ranks 63 | are used (given that the executable is run on the same computational 64 | resource). Additionally, this example requires that the number of sources 65 | and targets be equal. 66 | 67 | - - - 68 | 69 | #### __testBaryTreeInterface__ 70 | 71 | The __testBaryTreeInterface__ examples demonstrate how to use the C wrapper 72 | for the treecode. A C program that links to the __BaryTree__ library can, 73 | in fact, directly use the `treedriver` function if the calling program 74 | implements the particle and kernel struct used by `treedriver` 75 | (as done in the above examples). The `BaryTreeInterface` function, 76 | however, takes source and target particle arrays directly. 77 | -------------------------------------------------------------------------------- /examples/example.in: -------------------------------------------------------------------------------- 1 | num_sources 20000 2 | num_targets 20000 3 | degree 2 4 | theta 0.9 5 | beta -1.0 6 | size_check 0.0 7 | max_per_source_leaf 100 8 | max_per_target_leaf 100 9 | kernel_name coulomb 10 | kernel_params 1.0 11 | approximation lagrange 12 | compute_type particle-cluster 13 | distribution uniform 14 | run_direct 1 15 | slice 10 16 | verbosity 1 17 | -------------------------------------------------------------------------------- /examples/support_fns.h: -------------------------------------------------------------------------------- 1 | #ifndef H_SUPPORT_FUNCTIONS_H 2 | #define H_SUPPORT_FUNCTIONS_H 3 | 4 | #include 5 | 6 | #include "../src/run_params/struct_run_params.h" 7 | 8 | 9 | typedef enum DISTRIBUTION 10 | { 11 | NO_DISTRIBUTION, 12 | UNIFORM, 13 | GAUSSIAN, 14 | EXPONENTIAL, 15 | PLUMMER, 16 | PLUMMER_SYMMETRIC, 17 | SLAB_1, 18 | SLAB_2, 19 | SPHERICAL_SHELL 20 | } DISTRIBUTION; 21 | 22 | typedef enum PARTITION 23 | { 24 | NO_PARTITION, 25 | RCB, 26 | HSFC 27 | } PARTITION; 28 | 29 | 30 | void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice, 31 | double *xyz_limits, DISTRIBUTION *distribution, PARTITION *partition); 32 | 33 | 34 | double Point_Set_Init(DISTRIBUTION distribution); 35 | 36 | double Point_Set(DISTRIBUTION distribution, double xmin, double xmax); 37 | 38 | void Point_Plummer(double R, double *x, double *y, double *z); 39 | 40 | void Point_Plummer_Octant(double R, double *x, double *y, double *z); 41 | 42 | void Point_Gaussian(double *x, double *y, double *z); 43 | 44 | void Point_Exponential(double *x, double *y, double *z); 45 | 46 | void Point_Spherical_Shell(double R, double *x, double *y, double *z); 47 | 48 | 49 | void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], 50 | double time_run[4], double time_tree[13], double time_direct[4]); 51 | 52 | void Timing_Print(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], 53 | int run_direct, struct RunParams *run_params); 54 | 55 | 56 | void Accuracy_Calculate(double *potential_engy_glob, double *potential_engy_direct_glob, 57 | double *glob_inf_err, double *glob_relinf_err, double *glob_n2_err, double *glob_reln2_err, 58 | double *potential, double *potential_direct, int targets_num, int slice); 59 | 60 | void Accuracy_Print(double potential_engy_glob, double potential_engy_direct_glob, 61 | double glob_inf_err, double glob_relinf_err, double glob_n2_err, double glob_reln2_err, 62 | int slice); 63 | 64 | 65 | void CSV_Print(int N, int M, struct RunParams *run_params, 66 | double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4], 67 | double potential_engy_glob, double potential_engy_direct_glob, 68 | double glob_inf_err, double glob_relinf_err, double glob_n2_err, double glob_reln2_err); 69 | 70 | 71 | #endif /* H_SUPPORT_FUNCTIONS_H */ 72 | -------------------------------------------------------------------------------- /examples/test_BaryTreeInterface.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../src/interface/BaryTreeInterface.h" 7 | 8 | int main(int argc, char **argv) 9 | { 10 | int rank, numProcs; 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &numProcs); 14 | 15 | int numSources = 20000; 16 | int numTargets = 20000; 17 | 18 | KERNEL kernel = YUKAWA; 19 | SINGULARITY singularity = SKIPPING; 20 | APPROXIMATION approximation = LAGRANGE; 21 | COMPUTE_TYPE compute_type = PARTICLE_CLUSTER; 22 | 23 | int numParams = 1; 24 | double kernelParams[1] = {0.5}; 25 | 26 | int interpDegree = 5; 27 | double theta = 0.8; 28 | double beta = 1.0; 29 | 30 | int maxPerLeaf = 500; 31 | int maxPerBatch = 500; 32 | double sizeCheck = 1.0; 33 | 34 | int verbosity = 0; 35 | 36 | double *xS = malloc(numSources * sizeof(double)); 37 | double *yS = malloc(numSources * sizeof(double)); 38 | double *zS = malloc(numSources * sizeof(double)); 39 | double *qS = malloc(numSources * sizeof(double)); 40 | double *wS = malloc(numSources * sizeof(double)); 41 | 42 | double *xT = malloc(numTargets * sizeof(double)); 43 | double *yT = malloc(numTargets * sizeof(double)); 44 | double *zT = malloc(numTargets * sizeof(double)); 45 | double *qT = malloc(numTargets * sizeof(double)); 46 | 47 | double *potential = malloc(numTargets * sizeof(double)); 48 | 49 | for (int i = 0; i < numSources; ++i) { 50 | xS[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 51 | yS[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 52 | zS[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 53 | qS[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 54 | wS[i] = 1.; 55 | } 56 | 57 | for (int i = 0; i < numTargets; ++i) { 58 | xT[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 59 | yT[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 60 | zT[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.; 61 | qT[i] = 1.; 62 | } 63 | 64 | BaryTreeInterface(numTargets, numSources, xT, yT, zT, qT, 65 | xS, yS, zS, qS, wS, potential, 66 | kernel, numParams, kernelParams, 67 | singularity, approximation, compute_type, 68 | theta, interpDegree, maxPerLeaf, maxPerBatch, 69 | sizeCheck, beta, verbosity); 70 | 71 | printf("[test BaryTree interface] BaryTree has finished.\n"); 72 | 73 | free(xS); 74 | free(yS); 75 | free(zS); 76 | free(qS); 77 | free(wS); 78 | free(xT); 79 | free(yT); 80 | free(zT); 81 | free(qT); 82 | free(potential); 83 | 84 | MPI_Finalize(); 85 | 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /examples/zoltan_fns.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "zoltan_fns.h" 6 | 7 | 8 | int ztn_get_number_of_objects(void *data, int *ierr) 9 | { 10 | MESH_DATA *mesh = (MESH_DATA *)data; 11 | *ierr = ZOLTAN_OK; 12 | return mesh->numMyPoints; 13 | } 14 | 15 | 16 | void ztn_get_object_list(void *data, int sizeGID, int sizeLID, 17 | ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID, 18 | int wgt_dim, float *obj_wgts, int *ierr) 19 | { 20 | int i; 21 | MESH_DATA *mesh = (MESH_DATA *)data; 22 | *ierr = ZOLTAN_OK; 23 | 24 | /* In this example, return the IDs of our objects, but no weights. 25 | * Zoltan will assume equally weighted objects. 26 | */ 27 | 28 | for (i = 0; i < mesh->numMyPoints; i++) { 29 | globalID[i] = mesh->myGlobalIDs[i]; 30 | localID[i] = i; 31 | obj_wgts[i] = mesh->b[i]; 32 | } 33 | } 34 | 35 | 36 | int ztn_get_num_geometry(void *data, int *ierr) 37 | { 38 | *ierr = ZOLTAN_OK; 39 | return 3; 40 | } 41 | 42 | 43 | void ztn_get_geometry_list(void *data, int sizeGID, int sizeLID, int num_obj, 44 | ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID, 45 | int num_dim, double *geom_vec, int *ierr) 46 | { 47 | int i; 48 | 49 | MESH_DATA *mesh = (MESH_DATA *)data; 50 | 51 | if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 3)) { 52 | *ierr = ZOLTAN_FATAL; 53 | return; 54 | } 55 | 56 | *ierr = ZOLTAN_OK; 57 | 58 | for (i = 0; i < num_obj ; i++){ 59 | geom_vec[3*i] = (double)mesh->x[i]; 60 | geom_vec[3*i + 1] = (double)mesh->y[i]; 61 | geom_vec[3*i + 2] = (double)mesh->z[i]; 62 | } 63 | 64 | return; 65 | } 66 | 67 | 68 | void ztn_pack(void *data, int num_gid_entries, int num_lid_entries, 69 | ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id, 70 | int dest, int size, char *buf, int *ierr) { 71 | 72 | SINGLE_MESH_DATA *mesh_single = (SINGLE_MESH_DATA *)buf; 73 | MESH_DATA *mesh = (MESH_DATA *)data; 74 | 75 | mesh_single->x = mesh->x[(*local_id)]; 76 | mesh_single->y = mesh->y[(*local_id)]; 77 | mesh_single->z = mesh->z[(*local_id)]; 78 | mesh_single->q = mesh->q[(*local_id)]; 79 | mesh_single->w = mesh->w[(*local_id)]; 80 | mesh_single->b = mesh->b[(*local_id)]; 81 | mesh_single->myGlobalID = mesh->myGlobalIDs[(*local_id)]; 82 | 83 | mesh->myGlobalIDs[(*local_id)] = (ZOLTAN_ID_TYPE)(-1); // Mark local particle as exported 84 | 85 | return; 86 | } 87 | 88 | 89 | void ztn_unpack(void *data, int num_gid_entries, 90 | ZOLTAN_ID_PTR global_id, 91 | int size, char *buf, int *ierr) { 92 | 93 | SINGLE_MESH_DATA *mesh_single = (SINGLE_MESH_DATA *)buf; 94 | MESH_DATA *mesh = (MESH_DATA *)data; 95 | 96 | mesh->numMyPoints += 1; 97 | 98 | mesh->myGlobalIDs = (ZOLTAN_ID_TYPE *)realloc(mesh->myGlobalIDs, 99 | sizeof(ZOLTAN_ID_TYPE) * mesh->numMyPoints); 100 | mesh->x = (double *)realloc(mesh->x, sizeof(double) * mesh->numMyPoints); 101 | mesh->y = (double *)realloc(mesh->y, sizeof(double) * mesh->numMyPoints); 102 | mesh->z = (double *)realloc(mesh->z, sizeof(double) * mesh->numMyPoints); 103 | mesh->q = (double *)realloc(mesh->q, sizeof(double) * mesh->numMyPoints); 104 | mesh->w = (double *)realloc(mesh->w, sizeof(double) * mesh->numMyPoints); 105 | mesh->b = (double *)realloc(mesh->b, sizeof(double) * mesh->numMyPoints); 106 | 107 | mesh->x[mesh->numMyPoints-1] = mesh_single->x; 108 | mesh->y[mesh->numMyPoints-1] = mesh_single->y; 109 | mesh->z[mesh->numMyPoints-1] = mesh_single->z; 110 | mesh->q[mesh->numMyPoints-1] = mesh_single->q; 111 | mesh->w[mesh->numMyPoints-1] = mesh_single->w; 112 | mesh->b[mesh->numMyPoints-1] = mesh_single->b; 113 | mesh->myGlobalIDs[mesh->numMyPoints-1] = mesh_single->myGlobalID; 114 | 115 | return; 116 | } 117 | 118 | 119 | int ztn_obj_size(void *data, int num_gid_entries, int num_lid_entries, 120 | ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id, int *ierr) 121 | { 122 | return sizeof(SINGLE_MESH_DATA); 123 | } 124 | -------------------------------------------------------------------------------- /examples/zoltan_fns.h: -------------------------------------------------------------------------------- 1 | #ifndef H_ZOLTAN_SUPPORT_FUNCTIONS_H 2 | #define H_ZOLTAN_SUPPORT_FUNCTIONS_H 3 | 4 | #include 5 | 6 | typedef struct{ 7 | int numGlobalPoints; 8 | int numMyPoints; 9 | ZOLTAN_ID_PTR myGlobalIDs; 10 | double *x; 11 | double *y; 12 | double *z; 13 | double *q; 14 | double *w; 15 | double *b; 16 | } MESH_DATA; 17 | 18 | typedef struct{ 19 | ZOLTAN_ID_TYPE myGlobalID; 20 | double x; 21 | double y; 22 | double z; 23 | double q; 24 | double w; 25 | double b; 26 | } SINGLE_MESH_DATA; 27 | 28 | 29 | int ztn_get_number_of_objects(void *data, int *ierr); 30 | 31 | void ztn_get_object_list(void *data, int sizeGID, int sizeLID, 32 | ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID, 33 | int wgt_dim, float *obj_wgts, int *ierr); 34 | 35 | int ztn_get_num_geometry(void *data, int *ierr); 36 | 37 | void ztn_get_geometry_list(void *data, int sizeGID, int sizeLID, 38 | int num_obj, ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID, 39 | int num_dim, double *geom_vec, int *ierr); 40 | 41 | void ztn_pack(void *data, int num_gid_entries, int num_lid_entries, 42 | ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id, 43 | int dest, int size, char *buf, int *ierr); 44 | 45 | void ztn_unpack(void *data, int num_gid_entries, 46 | ZOLTAN_ID_PTR global_id, 47 | int size, char *buf, int *ierr); 48 | 49 | int ztn_obj_size(void *data, int num_gid_entries, int num_lid_entries, 50 | ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id, int *ierr); 51 | 52 | 53 | #endif /* H_ZOLTAN_SUPPORT_FUNCTIONS_H */ 54 | -------------------------------------------------------------------------------- /interfaces/README.md: -------------------------------------------------------------------------------- 1 | Interfaces 2 | ---------- 3 | 4 | This folder contains interfaces between __BaryTree__ and other languages. 5 | The BaryTree library itself contains `BaryTreeInterface`. This function takes as input 6 | pointers to the particle arrays, as well runtime parameters such as kernel information, 7 | MAC parameter, and batch and cluster size. This function first constructs the particle 8 | structs then calls the `treedriver`. 9 | 10 | The interfaces contained in these subdirectories are responsible for supplying 11 | `BaryTreeInterface` with the necessary pointers to particle arrays and the runtime metadata. 12 | 13 | ---------- 14 | 15 | ### Python 16 | 17 | The Python folder contains __BaryTreeInterface.py__, which uses the `ctypes` module to load 18 | the library, set the argument types, construct pointers to the `numpy` arrays, and call the 19 | `BaryTreeInterface`. 20 | 21 | __testBaryTreeInterface.py__ imports the Python wrapper, generates some random particles, 22 | and calls the treecode once. 23 | 24 | The `w` array is for quadrature weights when computing discrete convolution sums; 25 | it is set to ones for particle simulations. 26 | -------------------------------------------------------------------------------- /interfaces/fortran/BaryTreeInterface.fh: -------------------------------------------------------------------------------- 1 | ENUM, BIND(C) 2 | ENUMERATOR :: NO_KERNEL = 0, COULOMB, YUKAWA, & 3 | REGULARIZED_COULOMB, REGULARIZED_YUKAWA, & 4 | ATANF, TCF, DCF, SIN_OVER_R, MQ, RBS_U, RBS_V, USER 5 | 6 | ENUMERATOR :: NO_SINGULARITY = 0, SKIPPING, SUBTRACTION 7 | 8 | ENUMERATOR :: NO_APPROX = 0, LAGRANGE, HERMITE 9 | 10 | ENUMERATOR :: NO_COMPUTE_TYPE = 0, PARTICLE_CLUSTER, & 11 | CLUSTER_PARTICLE, CLUSTER_CLUSTER 12 | END ENUM 13 | 14 | INTERFACE 15 | SUBROUTINE BaryTreeInterface(num_targets, num_sources, & 16 | target_x, target_y, target_z, target_q, & 17 | source_x, source_y, source_z, source_q, source_w, & 18 | potential, kernel, num_kernel_params, kernel_params, & 19 | singularity, approximation, compute_type, & 20 | theta, degree, max_source_leaf, max_target_leaf, & 21 | size_check, beta, verbosity) & 22 | BIND(C, NAME='BaryTreeInterface') 23 | 24 | USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT, C_DOUBLE, C_PTR 25 | IMPLICIT NONE 26 | 27 | INTEGER(KIND=C_INT), VALUE, INTENT(IN) :: num_targets, num_sources 28 | TYPE(C_PTR), VALUE, INTENT(IN) :: target_x, target_y, target_z, target_q 29 | TYPE(C_PTR), VALUE, INTENT(IN) :: source_x, source_y, source_z, source_q, source_w 30 | TYPE(C_PTR), VALUE, INTENT(IN) :: potential 31 | 32 | INTEGER(KIND=C_INT), VALUE, INTENT(IN) :: kernel, num_kernel_params, & 33 | singularity, approximation, compute_type, degree, & 34 | max_source_leaf, max_target_leaf, verbosity 35 | TYPE(C_PTR), VALUE, INTENT(IN) :: kernel_params 36 | REAL(KIND=C_DOUBLE), VALUE, INTENT(IN) :: theta, size_check, beta 37 | 38 | END SUBROUTINE BaryTreeInterface 39 | END INTERFACE 40 | -------------------------------------------------------------------------------- /interfaces/fortran/example_interface.f03: -------------------------------------------------------------------------------- 1 | PROGRAM BaryTree_Fortran_Example 2 | 3 | USE MPI 4 | USE ISO_C_BINDING, ONLY: C_LOC 5 | IMPLICIT NONE 6 | 7 | INCLUDE "BaryTreeInterface.fh" 8 | 9 | INTEGER :: rank, num_proc, ierr 10 | 11 | INTEGER :: num_targets, num_sources, kernel, num_kernel_params, singularity, & 12 | approximation, compute_type, degree, max_source_leaf, max_target_leaf, & 13 | verbosity 14 | 15 | DOUBLE PRECISION, POINTER, DIMENSION(:) :: target_x, target_y, target_z, target_q, & 16 | source_x, source_y, source_z, source_q, source_w, potential 17 | 18 | DOUBLE PRECISION, DIMENSION(2) :: kernel_params 19 | 20 | DOUBLE PRECISION :: theta, size_check, beta 21 | 22 | CALL MPI_INIT(ierr) 23 | CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) 24 | CALL MPI_COMM_SIZE(MPI_COMM_WORLD, num_proc, ierr) 25 | 26 | kernel = RBS_U 27 | num_kernel_params = 1 28 | kernel_params(1) = 0.5 29 | 30 | singularity = SKIPPING 31 | approximation = LAGRANGE 32 | verbosity = 3 33 | 34 | max_source_leaf = 50 35 | max_target_leaf = 50 36 | size_check = 1.0 37 | beta = -1 38 | 39 | compute_type = CLUSTER_CLUSTER 40 | theta = 0.7 41 | degree = 3 42 | 43 | num_targets = 10000 44 | num_sources = 10000 45 | 46 | ALLOCATE(target_x(num_targets), target_y(num_targets), target_z(num_targets), & 47 | target_q(num_targets), potential(num_targets), & 48 | source_x(num_sources), source_y(num_sources), source_z(num_sources), & 49 | source_q(num_sources), source_w(num_sources)) 50 | 51 | CALL RANDOM_NUMBER(target_x) 52 | CALL RANDOM_NUMBER(target_y) 53 | CALL RANDOM_NUMBER(target_z) 54 | CALL RANDOM_NUMBER(target_q) 55 | 56 | CALL RANDOM_NUMBER(source_x) 57 | CALL RANDOM_NUMBER(source_y) 58 | CALL RANDOM_NUMBER(source_z) 59 | CALL RANDOM_NUMBER(source_q) 60 | CALL RANDOM_NUMBER(source_w) 61 | 62 | ! Calling with kernel as RBS_U 63 | CALL BaryTreeInterface(num_targets, num_sources, & 64 | C_LOC(target_x), C_LOC(target_y), C_LOC(target_z), C_LOC(target_q), & 65 | C_LOC(source_x), C_LOC(source_y), C_LOC(source_z), C_LOC(source_q), & 66 | C_LOC(source_w), C_LOC(potential), & 67 | kernel, num_kernel_params, C_LOC(kernel_params), & 68 | singularity, approximation, compute_type, theta, degree, & 69 | max_source_leaf, max_target_leaf, size_check, beta, & 70 | verbosity) 71 | 72 | PRINT *, "RBS u total potential is: ", SUM(potential) 73 | 74 | ! Calling with kernel as RBS_V 75 | CALL BaryTreeInterface(num_targets, num_sources, & 76 | C_LOC(target_x), C_LOC(target_y), C_LOC(target_z), C_LOC(target_q), & 77 | C_LOC(source_x), C_LOC(source_y), C_LOC(source_z), C_LOC(source_q), & 78 | C_LOC(source_w), C_LOC(potential), & 79 | RBS_V, num_kernel_params, C_LOC(kernel_params), & 80 | singularity, approximation, compute_type, theta, degree, & 81 | max_source_leaf, max_target_leaf, size_check, beta, & 82 | verbosity) 83 | 84 | PRINT *, "RBS v total potential is: ", SUM(potential) 85 | 86 | DEALLOCATE(target_x, target_y, target_z, target_q, potential, source_x, source_y, & 87 | source_z, source_q, source_w); 88 | 89 | CALL MPI_FINALIZE(ierr) 90 | 91 | END PROGRAM 92 | -------------------------------------------------------------------------------- /interfaces/python/testBaryTreeInterface.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | import os 4 | import sys 5 | import resource 6 | import numpy as np 7 | import mpi4py.MPI as MPI 8 | 9 | 10 | sys.path.append(os.getcwd()) 11 | try: 12 | import BaryTreeInterface as BT 13 | except ImportError: 14 | print('Unable to import BaryTreeInterface due to ImportError') 15 | except OSError: 16 | print('Unable to import BaryTreeInterface due to OSError') 17 | 18 | 19 | if __name__=="__main__": 20 | 21 | # set treecode parameters 22 | N = 5000 23 | maxPerSourceLeaf = 50 24 | maxPerTargetLeaf = 10 25 | GPUpresent = False 26 | theta = 0.8 27 | treecodeDegree = 4 28 | gaussianAlpha = 1.0 29 | verbosity = 0 30 | 31 | approximation = BT.Approximation.LAGRANGE 32 | singularity = BT.Singularity.SUBTRACTION 33 | computeType = BT.ComputeType.PARTICLE_CLUSTER 34 | 35 | kernel = BT.Kernel.YUKAWA 36 | numberOfKernelParameters = 1 37 | kernelParameters = np.array([0.5]) 38 | 39 | 40 | # initialize some random data 41 | np.random.seed(1) 42 | RHO = np.random.rand(N) 43 | X = np.random.rand(N) 44 | Y = np.random.rand(N) 45 | Z = np.random.rand(N) 46 | W = np.ones(N) # W stores quadrature weights for convolution integrals. For particle simulations, set = ones. 47 | 48 | expectedOutput = 588.7432483318685 # using seed of 1, this is expected value of first element of output array. 49 | 50 | 51 | # call the treecode 52 | 53 | output = BT.callTreedriver( N, N, 54 | X, Y, Z, RHO, 55 | np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), 56 | kernel, numberOfKernelParameters, kernelParameters, 57 | singularity, approximation, computeType, 58 | GPUpresent, verbosity, 59 | theta=theta, degree=treecodeDegree, sourceLeafSize=maxPerSourceLeaf, targetLeafSize=maxPerTargetLeaf, sizeCheck=1.0) 60 | 61 | assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output using explicit theta/degree." 62 | 63 | 64 | 65 | 66 | 67 | 68 | beta = 0.1 69 | expectedOutput = 588.7445889051367 # this is expected value of first element of output array for beta = 0.1 70 | output = BT.callTreedriver( N, N, 71 | X, Y, Z, RHO, 72 | np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W), 73 | kernel, numberOfKernelParameters, kernelParameters, 74 | singularity, approximation, computeType, 75 | GPUpresent, verbosity, beta=beta, sizeCheck=1.0) 76 | assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output using beta." 77 | 78 | 79 | print("If no errors printed, then the calls to the treecode wrapper worked (one using explicit theta/degree, one use beta)") 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/clusters/clusters.h: -------------------------------------------------------------------------------- 1 | #ifndef H_CLUSTER_FUNCTIONS_H 2 | #define H_CLUSTER_FUNCTIONS_H 3 | 4 | #include "../utilities/enums.h" 5 | 6 | #include "../tree/struct_tree.h" 7 | #include "../particles/struct_particles.h" 8 | 9 | #include "struct_clusters.h" 10 | 11 | 12 | void Clusters_Sources_Construct(struct Clusters **clusters, const struct Particles *sources, 13 | const struct Tree *tree, const struct RunParams *run_params); 14 | 15 | void Clusters_Targets_Construct(struct Clusters **clusters, const struct Particles *targets, 16 | const struct Tree *tree, const struct RunParams *run_params); 17 | 18 | void Clusters_Alloc(struct Clusters **clusters_addr, int length, 19 | const struct RunParams *run_params); 20 | 21 | void Clusters_Free(struct Clusters **clusters_addr); 22 | 23 | void Clusters_Free_Win(struct Clusters **clusters_addr); 24 | 25 | 26 | #endif /* H_CLUSTER_FUNCTIONS_H */ 27 | -------------------------------------------------------------------------------- /src/clusters/struct_clusters.h: -------------------------------------------------------------------------------- 1 | #ifndef H_CLUSTERS_H 2 | #define H_CLUSTERS_H 3 | 4 | /* declaration of struct with tag particles */ 5 | struct Clusters 6 | { 7 | int num; 8 | int num_weights; 9 | int num_charges; 10 | 11 | double *x; 12 | double *y; 13 | double *z; 14 | double *q; 15 | // quadrature weights. Set = 1 if interacting particles, not performing convolution integral. 16 | double *w; 17 | }; 18 | 19 | #endif /* H_CLUSTERS_H */ 20 | -------------------------------------------------------------------------------- /src/comm_cp/comm_cp.h: -------------------------------------------------------------------------------- 1 | #ifndef H_COMM_CP_FUNCTIONS_H 2 | #define H_COMM_CP_FUNCTIONS_H 3 | 4 | #include "../tree/struct_tree.h" 5 | #include "../particles/struct_particles.h" 6 | #include "../run_params/struct_run_params.h" 7 | 8 | 9 | void Comm_CP_ConstructAndGetData(struct Tree **remote_batches_addr, struct Particles **remote_sources_addr, 10 | const struct Tree *tree_array, const struct Tree *batches, 11 | const struct Particles *sources, const struct RunParams *run_params); 12 | 13 | 14 | #endif /* H_COMM_CP_FUNCTIONS_H */ 15 | -------------------------------------------------------------------------------- /src/comm_types/comm_types.h: -------------------------------------------------------------------------------- 1 | #ifndef H_COMM_TYPES_FUNCTIONS_H 2 | #define H_COMM_TYPES_FUNCTIONS_H 3 | 4 | #include "../tree/struct_tree.h" 5 | #include "../run_params/struct_run_params.h" 6 | 7 | #include "struct_comm_types.h" 8 | 9 | 10 | void CommTypesAndTrees_Construct(struct CommTypes **comm_types_addr, struct Tree ***let_trees_addr, 11 | struct Tree *tree, struct Tree *batches, 12 | struct RunParams *run_params); 13 | 14 | void CommTypesAndTrees_Free(struct CommTypes **comm_types_addr, struct Tree ***let_trees_addr); 15 | 16 | 17 | #endif /* H_COMM_TYPES_FUNCTIONS_H */ 18 | -------------------------------------------------------------------------------- /src/comm_types/struct_comm_types.h: -------------------------------------------------------------------------------- 1 | #ifndef H_STRUCT_COMM_TYPES_H 2 | #define H_STRUCT_COMM_TYPES_H 3 | 4 | #include 5 | 6 | 7 | struct CommTypes 8 | { 9 | int *num_remote_approx_array; 10 | int *previous_let_clusters_length_array; 11 | 12 | int let_clusters_length; 13 | int let_clusters_num; 14 | 15 | MPI_Datatype *MPI_approx_type; 16 | MPI_Datatype *MPI_approx_charges_type; 17 | MPI_Datatype *MPI_approx_weights_type; 18 | 19 | int *new_sources_length_array; 20 | int *previous_let_sources_length_array; 21 | 22 | int let_sources_length; 23 | 24 | MPI_Datatype *MPI_direct_type; 25 | }; 26 | 27 | 28 | #endif /* H_STRUCT_COMM_TYPES_H */ 29 | -------------------------------------------------------------------------------- /src/comm_windows/comm_windows.h: -------------------------------------------------------------------------------- 1 | #ifndef H_COMM_WINDOWS_FUNCTIONS_H 2 | #define H_COMM_WINDOWS_FUNCTIONS_H 3 | 4 | #include "../clusters/struct_clusters.h" 5 | #include "../particles/struct_particles.h" 6 | #include "../run_params/struct_run_params.h" 7 | #include "../comm_types/struct_comm_types.h" 8 | 9 | #include "struct_comm_windows.h" 10 | 11 | 12 | void CommWindows_Create(struct CommWindows **comm_windows_addr, 13 | struct Clusters *clusters, struct Particles *sources, struct RunParams *run_params); 14 | 15 | void CommWindows_Free(struct CommWindows **comm_windows_addr, struct RunParams *run_params); 16 | 17 | void CommWindows_Lock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params); 18 | 19 | void CommWindows_Unlock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params); 20 | 21 | void CommWindows_GetData(struct Clusters *let_clusters, struct Particles *let_sources, 22 | struct CommTypes *comm_types, struct CommWindows *comm_windows, 23 | int get_from, struct RunParams *run_params); 24 | 25 | 26 | #endif /* H_COMM_WINDOWS_FUNCTIONS_H */ 27 | -------------------------------------------------------------------------------- /src/comm_windows/struct_comm_windows.h: -------------------------------------------------------------------------------- 1 | #ifndef H_STRUCT_COMM_WINDOWS_H 2 | #define H_STRUCT_COMM_WINDOWS_H 3 | 4 | #include 5 | 6 | 7 | struct CommWindows 8 | { 9 | MPI_Win win_clusters_x, win_clusters_y, win_clusters_z, win_clusters_q, win_clusters_w; 10 | MPI_Win win_sources_x, win_sources_y, win_sources_z, win_sources_q, win_sources_w; 11 | }; 12 | 13 | 14 | #endif /* H_STRUCT_COMM_WINDOWS_H */ 15 | -------------------------------------------------------------------------------- /src/drivers/directdriver.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../utilities/array.h" 9 | #include "../utilities/tools.h" 10 | #include "../utilities/timers.h" 11 | #include "../utilities/enums.h" 12 | 13 | #include "../particles/struct_particles.h" 14 | #include "../particles/particles.h" 15 | 16 | #include "../run_params/struct_run_params.h" 17 | 18 | #include "../interaction_compute/interaction_compute.h" 19 | 20 | #include "directdriver.h" 21 | 22 | 23 | void directdriver(struct Particles *sources, struct Particles *targets, struct RunParams *run_params, 24 | double *potential, double *time_direct) 25 | { 26 | int rank, num_procs, ierr; 27 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 28 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 29 | 30 | int num_sources = sources->num; 31 | int num_targets = targets->num; 32 | int num_sources_on_proc[num_procs]; 33 | 34 | double *source_x = sources->x; 35 | double *source_y = sources->y; 36 | double *source_z = sources->z; 37 | double *source_q = sources->q; 38 | double *source_w = sources->w; 39 | 40 | double *target_x = targets->x; 41 | double *target_y = targets->y; 42 | double *target_z = targets->z; 43 | double *target_q = targets->q; 44 | 45 | double time1; 46 | time_direct[0] = 0.0; 47 | time_direct[1] = 0.0; 48 | time_direct[2] = 0.0; 49 | time_direct[3] = 0.0; 50 | 51 | 52 | START_TIMER(&time1); 53 | MPI_Allgather(&num_sources, 1, MPI_INT, num_sources_on_proc, 1, MPI_INT, MPI_COMM_WORLD); 54 | MPI_Win win_sources_x, win_sources_y, win_sources_z, win_sources_q, win_sources_w; 55 | MPI_Win_create(source_x, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_x); 56 | MPI_Win_create(source_y, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_y); 57 | MPI_Win_create(source_z, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_z); 58 | MPI_Win_create(source_q, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_q); 59 | MPI_Win_create(source_w, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_w); 60 | STOP_TIMER(&time1); 61 | time_direct[0] += time1; 62 | 63 | for (int proc_id = 1; proc_id < num_procs; ++proc_id) { 64 | 65 | START_TIMER(&time1); 66 | int get_from = (num_procs + rank - proc_id) % num_procs; 67 | 68 | struct Particles *remote_sources = NULL; 69 | Particles_Alloc(&remote_sources, num_sources_on_proc[get_from]); 70 | 71 | MPI_Barrier(MPI_COMM_WORLD); 72 | 73 | MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_x); 74 | MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_y); 75 | MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_z); 76 | MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_q); 77 | MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_w); 78 | 79 | MPI_Get(remote_sources->x, num_sources_on_proc[get_from], MPI_DOUBLE, 80 | get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_x); 81 | MPI_Get(remote_sources->y, num_sources_on_proc[get_from], MPI_DOUBLE, 82 | get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_y); 83 | MPI_Get(remote_sources->z, num_sources_on_proc[get_from], MPI_DOUBLE, 84 | get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_z); 85 | MPI_Get(remote_sources->q, num_sources_on_proc[get_from], MPI_DOUBLE, 86 | get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_q); 87 | MPI_Get(remote_sources->w, num_sources_on_proc[get_from], MPI_DOUBLE, 88 | get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_w); 89 | 90 | MPI_Win_unlock(get_from, win_sources_x); 91 | MPI_Win_unlock(get_from, win_sources_y); 92 | MPI_Win_unlock(get_from, win_sources_z); 93 | MPI_Win_unlock(get_from, win_sources_q); 94 | MPI_Win_unlock(get_from, win_sources_w); 95 | 96 | MPI_Barrier(MPI_COMM_WORLD); 97 | 98 | STOP_TIMER(&time1); 99 | time_direct[0] += time1; 100 | 101 | 102 | START_TIMER(&time1); 103 | InteractionCompute_Direct(potential, remote_sources, targets, run_params); 104 | 105 | Particles_Free(&remote_sources); 106 | STOP_TIMER(&time1); 107 | time_direct[1] += time1; 108 | } 109 | 110 | 111 | START_TIMER(&time_direct[2]); 112 | InteractionCompute_Direct(potential, sources, targets, run_params); 113 | STOP_TIMER(&time_direct[2]); 114 | 115 | 116 | START_TIMER(&time_direct[3]); 117 | InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params); 118 | STOP_TIMER(&time_direct[3]); 119 | 120 | return; 121 | 122 | } /* END of function directdriver */ 123 | -------------------------------------------------------------------------------- /src/drivers/directdriver.h: -------------------------------------------------------------------------------- 1 | #ifndef H_DIRECTDRIVER_H 2 | #define H_DIRECTDRIVER_H 3 | 4 | #include "../particles/struct_particles.h" 5 | #include "../run_params/struct_run_params.h" 6 | 7 | 8 | void directdriver(struct Particles *sources, struct Particles *targets, struct RunParams *run_params, 9 | double *potential_array, double *time_direct); 10 | 11 | 12 | #endif /* H_DIRECTDRIVER_H */ 13 | -------------------------------------------------------------------------------- /src/drivers/treedriver.h: -------------------------------------------------------------------------------- 1 | #ifndef H_TREEDRIVER_H 2 | #define H_TREEDRIVER_H 3 | 4 | #include "../particles/struct_particles.h" 5 | #include "../run_params/struct_run_params.h" 6 | 7 | 8 | void treedriver(struct Particles *sources, struct Particles *targets, struct RunParams *run_params, 9 | double *potential_array, double *time_tree); 10 | 11 | 12 | #endif /* H_TREEDRIVER_H */ 13 | -------------------------------------------------------------------------------- /src/interaction_compute/interaction_compute.h: -------------------------------------------------------------------------------- 1 | #ifndef H_INTERACTION_COMPUTE_H 2 | #define H_INTERACTION_COMPUTE_H 3 | 4 | #include "../tree/struct_tree.h" 5 | #include "../particles/struct_particles.h" 6 | #include "../clusters/struct_clusters.h" 7 | #include "../run_params/struct_run_params.h" 8 | #include "../interaction_lists/struct_interaction_lists.h" 9 | 10 | 11 | void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *batches, 12 | struct InteractionLists *interaction_list, 13 | struct Particles *sources, struct Particles *targets, 14 | struct Clusters *clusters, struct RunParams *run_params); 15 | 16 | 17 | void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *batches, 18 | struct InteractionLists *interaction_list, 19 | struct Particles *sources, struct Particles *targets, 20 | struct Clusters *clusters, struct RunParams *run_params); 21 | 22 | 23 | void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct Tree *target_tree, 24 | struct InteractionLists *interaction_list, 25 | struct Particles *sources, struct Particles *targets, 26 | struct Clusters *source_clusters, struct Clusters *target_clusters, 27 | struct RunParams *run_params); 28 | 29 | 30 | void InteractionCompute_Downpass(double *potential, struct Tree *tree, 31 | struct Particles *targets, struct Clusters *clusters, 32 | struct RunParams *run_params); 33 | 34 | 35 | void InteractionCompute_Direct(double *potential, 36 | struct Particles *sources, struct Particles *targets, 37 | struct RunParams *run_params); 38 | 39 | 40 | void InteractionCompute_SubtractionPotentialCorrection(double *potential, 41 | struct Particles *targets, struct RunParams *run_params); 42 | 43 | 44 | #endif /* H_INTERACTION_COMPUTE_H */ 45 | -------------------------------------------------------------------------------- /src/interaction_compute/interaction_compute_correction.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../particles/struct_particles.h" 8 | #include "../run_params/struct_run_params.h" 9 | 10 | #include "../kernels/coulomb/coulomb.h" 11 | #include "../kernels/yukawa/yukawa.h" 12 | #include "../kernels/regularized-coulomb/regularized-coulomb.h" 13 | #include "../kernels/regularized-yukawa/regularized-yukawa.h" 14 | #include "../kernels/atan/atan.h" 15 | #include "../kernels/sin-over-r/sin-over-r.h" 16 | #include "../kernels/mq/mq.h" 17 | #include "../kernels/user_kernel/user_kernel.h" 18 | 19 | #include "interaction_compute.h" 20 | 21 | 22 | void InteractionCompute_SubtractionPotentialCorrection(double *potential, 23 | struct Particles *targets, struct RunParams *run_params) 24 | { 25 | int num_targets = targets->num; 26 | double *target_q = targets->q; 27 | 28 | if (run_params->singularity == SUBTRACTION) { 29 | if (run_params->kernel == COULOMB) { 30 | K_Coulomb_SS_Correction(potential, target_q, num_targets, run_params); 31 | 32 | } else if (run_params->kernel == REGULARIZED_COULOMB) { 33 | K_RegularizedCoulomb_SS_Correction(potential, target_q, num_targets, run_params); 34 | 35 | } else if (run_params->kernel == YUKAWA) { 36 | K_Yukawa_SS_Correction(potential, target_q, num_targets, run_params); 37 | 38 | } else if (run_params->kernel == REGULARIZED_YUKAWA) { 39 | K_RegularizedYukawa_SS_Correction(potential, target_q, num_targets, run_params); 40 | 41 | } 42 | } 43 | 44 | return; 45 | } 46 | -------------------------------------------------------------------------------- /src/interaction_lists/interaction_lists.h: -------------------------------------------------------------------------------- 1 | #ifndef H_INTERACTION_LISTS_H 2 | #define H_INTERACTION_LISTS_H 3 | 4 | #include "../tree/struct_tree.h" 5 | #include "../run_params/struct_run_params.h" 6 | #include "struct_interaction_lists.h" 7 | 8 | 9 | void InteractionLists_Make(struct InteractionLists **interaction_list_addr, 10 | const struct Tree *source_tree, const struct Tree *target_tree, 11 | const struct RunParams *run_params); 12 | 13 | void InteractionLists_Free(struct InteractionLists **interaction_list_addr); 14 | 15 | void InteractionLists_MakeRemote(const struct Tree *source_tree, const struct Tree *target_tree, 16 | int *approx_list_packed, int *approx_list_unpacked, int *direct_list, 17 | const struct RunParams *run_params); 18 | 19 | 20 | #endif /* H_INTERACTION_LISTS_H */ 21 | -------------------------------------------------------------------------------- /src/interaction_lists/struct_interaction_lists.h: -------------------------------------------------------------------------------- 1 | #ifndef H_STRUCT_INTERACTION_LISTS_H 2 | #define H_STRUCT_INTERACTION_LISTS_H 3 | 4 | 5 | struct InteractionLists 6 | { 7 | int *num_pp; 8 | int *num_cc; 9 | int *num_pc; 10 | int *num_cp; 11 | 12 | int **pp_interactions; 13 | int **cc_interactions; 14 | int **pc_interactions; 15 | int **cp_interactions; 16 | }; 17 | 18 | 19 | #endif /* H_STRUCT_INTERACTION_LISTS_H */ 20 | -------------------------------------------------------------------------------- /src/interface/BaryTreeInterface.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | #include "../particles/struct_particles.h" 9 | #include "../particles/particles.h" 10 | 11 | #include "../run_params/struct_run_params.h" 12 | #include "../run_params/run_params.h" 13 | 14 | #include "../drivers/treedriver.h" 15 | #include "BaryTreeInterface.h" 16 | 17 | 18 | void BaryTreeInterface(int numTargets, int numSources, 19 | double *targetX, double *targetY, double *targetZ, double *targetValue, 20 | double *sourceX, double *sourceY, double *sourceZ, double *sourceValue, double *sourceWeight, 21 | double *outputArray, 22 | KERNEL kernel, int numKernelParams, double *kernelParams, 23 | SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, 24 | double theta, int interpDegree, int maxPerSourceLeaf, int maxPerTargetLeaf, 25 | double sizeCheck, double beta, int verbosity) 26 | { 27 | 28 | double timing[12]; 29 | memset(outputArray, 0, numTargets * sizeof(double)); 30 | 31 | struct RunParams *run_params = NULL; 32 | RunParams_Setup(&run_params, 33 | kernel, numKernelParams, kernelParams, 34 | approximation, singularity, compute_type, 35 | theta, interpDegree, 36 | maxPerSourceLeaf, maxPerTargetLeaf, sizeCheck, 37 | beta, verbosity); 38 | 39 | struct Particles sources, targets; 40 | 41 | targets.num = numTargets; 42 | targets.x = targetX; 43 | targets.y = targetY; 44 | targets.z = targetZ; 45 | targets.q = targetValue; 46 | 47 | sources.num = numSources; 48 | sources.x = sourceX; 49 | sources.y = sourceY; 50 | sources.z = sourceZ; 51 | sources.q = sourceValue; 52 | sources.w = sourceWeight; 53 | 54 | 55 | treedriver(&sources, &targets, run_params, outputArray, timing); 56 | MPI_Barrier(MPI_COMM_WORLD); 57 | 58 | 59 | RunParams_Free(&run_params); 60 | 61 | return; 62 | } 63 | -------------------------------------------------------------------------------- /src/interface/BaryTreeInterface.h: -------------------------------------------------------------------------------- 1 | #ifndef H_BARYTREE_INTERFACE_H 2 | #define H_BARYTREE_INTERFACE_H 3 | 4 | #ifndef H_BARYTREE_TYPES_H 5 | #define H_BARYTREE_TYPES_H 6 | 7 | typedef enum KERNEL 8 | { 9 | NO_KERNEL, 10 | COULOMB, 11 | YUKAWA, 12 | REGULARIZED_COULOMB, 13 | REGULARIZED_YUKAWA, 14 | ATAN, 15 | TCF, 16 | DCF, 17 | SIN_OVER_R, 18 | MQ, 19 | RBS_U, 20 | RBS_V, 21 | USER 22 | } KERNEL; 23 | 24 | 25 | typedef enum SINGULARITY 26 | { 27 | NO_SINGULARITY, 28 | SKIPPING, 29 | SUBTRACTION 30 | } SINGULARITY; 31 | 32 | 33 | typedef enum APPROXIMATION 34 | { 35 | NO_APPROX, 36 | LAGRANGE, 37 | HERMITE 38 | } APPROXIMATION; 39 | 40 | 41 | typedef enum COMPUTE_TYPE 42 | { 43 | NO_COMPUTE_TYPE, 44 | PARTICLE_CLUSTER, 45 | CLUSTER_PARTICLE, 46 | CLUSTER_CLUSTER, 47 | } COMPUTE_TYPE; 48 | 49 | 50 | #endif /* H_BARYTREE_TYPES_H */ 51 | 52 | void BaryTreeInterface(int numTargets, int numSources, 53 | double *targetX, double *targetY, double *targetZ, double *targetValue, 54 | double *sourceX, double *sourceY, double *sourceZ, double *sourceValue, double *sourceWeight, 55 | double *outputArray, 56 | KERNEL kernel, int numKernelParams, double *kernelParams, 57 | SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type, 58 | double theta, int interpOrder, int maxPerSourceLeaf, int maxPerTargetLeaf, 59 | double sizeCheck, double beta, int verbosity); 60 | 61 | 62 | #endif /* H_BARYTREE_INTERFACE_H */ 63 | -------------------------------------------------------------------------------- /src/kernels/README.md: -------------------------------------------------------------------------------- 1 | Adding New Kernels 2 | ------------------ 3 | 4 | Steps for adding a new kernel named `custom-kernel` with support for particle-cluster are described below. Adding support for cluster-cluster and cluster-particle follows similarly. Consult existing kernel directories for more details. 5 | 6 | 1. Add the name of the new kernel to the end of the KERNEL enum in `src/utilities/enums.h`. If you plan to use the Python wrapper, add it to the Kernel class in `interfaces/python/BaryTreeInterface.py` as well. 7 | 8 | 2. Create a new directory `custom-kernel` in `src/kernels/`. 9 | 10 | 3. Create new source files (`custom-kernel_direct.c`, `custom-kernel_direct.h`, `custom-kernel_pc.c`, `custom-kernel_pc.h`) in `src/kernels/custom-kernel` containing three batch-cluster interaction functions: 11 | - `K_CustomKernel_Direct( )` 12 | - `K_CustomKernel_PC_Lagrange( )` 13 | - `K_CustomKernel_PC_Lagrange( )` 14 | 15 | 4. Create a new source file `custom-kernel.h` in `src/kernels/custom-kernel/`. `#include` in this file all other headers associated with this kernel (`custom-kernel_direct.h` and `custom-kernel_pc.h`). 16 | 17 | 5. Edit `interaction_compute_pc.c`: 18 | 1. Include `custom-kernel.h` in `interaction_compute_pc.c` (`#include "kernels/custom_kernel.h"`). 19 | 2. Add your custom kernel in several places, following the format for the already-present kernels, like `if (run_params->kernel == KERNEL_NAME)`: 20 | - In the POTENTIAL FROM APPROX subsection, add your Lagrange and/or Hermite kernels 21 | - In the POTENTIAL FROM DIRECT subsection, add your direct interaction kernel 22 | 23 | 6. Edit `interaction_compute_direct.c`: 24 | 1. Include `custom-kernel.h` in `interaction_compute_pc.c` (`#include "kernels/custom_kernel.h"`). 25 | 2. Add your direct interaction kernel, following the format for the already-present kernels. 26 | 27 | 7. Add your files to `src/CMakeLists.txt`: 28 | 1. Add `custom-kernel_direct.c`, `custom-kernel_direct.h`, `custom-kernel_pc.c`, `custom-kernel_pc.h`, and `kernels/custom_kernel.h` to a new `SRCS_K_CUSTOM_KERNEL` list, following the format of the other kernels. 29 | 2. Add `${SRCS_K_CUSTOM_KERNEL}` to the `SRCS_KERNELS` list. 30 | -------------------------------------------------------------------------------- /src/kernels/atan/atan.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_ATAN_H 3 | #define H_K_ATAN_H 4 | 5 | 6 | #include "atan_pp.h" 7 | #include "atan_pc.h" 8 | 9 | #endif /* H_K_ATAN_PC_H */ 10 | -------------------------------------------------------------------------------- /src/kernels/atan/atan_pc.c: -------------------------------------------------------------------------------- 1 | #ifdef OPENACC_ENABLED 2 | #include 3 | #define M_PI 3.14159265358979323846264338327950288 4 | #else 5 | #include 6 | #endif 7 | #include 8 | 9 | #include "../../run_params/struct_run_params.h" 10 | #include "atan_pc.h" 11 | 12 | 13 | void K_Atan_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 14 | int starting_index_of_target, int starting_index_of_cluster, 15 | double *target_x, double *target_y, double *target_z, 16 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 17 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 18 | { 19 | 20 | double domainLength = run_params->kernel_params[0]; 21 | double delta = run_params->kernel_params[1]; 22 | double wadj = 1. / (1. - delta / sqrt(1. + delta * delta)); 23 | double delta_factor = sqrt(1. + 1.0 / (delta * delta)); 24 | 25 | #ifdef OPENACC_ENABLED 26 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 27 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 28 | { 29 | #endif 30 | #ifdef OPENACC_ENABLED 31 | #pragma acc loop independent 32 | #endif 33 | for (int i = 0; i < number_of_targets_in_batch; i++) { 34 | 35 | int ii = starting_index_of_target + i; 36 | double temporary_potential = 0.0; 37 | double tz = target_z[ii]; 38 | 39 | #ifdef OPENACC_ENABLED 40 | #pragma acc loop independent reduction(+:temporary_potential) 41 | #endif 42 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 43 | 44 | int jj = starting_index_of_cluster + j; 45 | double dz = (tz - cluster_z[jj]) / domainLength; 46 | if (dz < -0.5) { 47 | dz += 1.0; 48 | } 49 | if (dz > 0.5) { 50 | dz -= 1.0; 51 | } 52 | temporary_potential += cluster_charge[jj] 53 | * (1.0 / M_PI * atan(delta_factor * tan(M_PI * dz)) - dz); 54 | } // end loop over interpolation points 55 | #ifdef OPENACC_ENABLED 56 | #pragma acc atomic 57 | #endif 58 | potential[ii] += wadj * temporary_potential; 59 | } 60 | #ifdef OPENACC_ENABLED 61 | } // end kernel 62 | #endif 63 | return; 64 | } 65 | 66 | 67 | 68 | 69 | void K_Atan_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 70 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 71 | double *target_x, double *target_y, double *target_z, 72 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 73 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 74 | { 75 | printf("[BaryTree] ERROR! ATAN KERNEL NOT IMPLEMENTED FOR HERMITE. Exiting.\n"); 76 | return; 77 | } 78 | -------------------------------------------------------------------------------- /src/kernels/atan/atan_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_ATAN_PC_H 3 | #define H_K_ATAN_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Atan_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_Atan_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_ATAN_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/atan/atan_pp.c: -------------------------------------------------------------------------------- 1 | #ifdef OPENACC_ENABLED 2 | #include 3 | #define M_PI 3.14159265358979323846264338327950288 4 | #else 5 | #include 6 | #endif 7 | #include 8 | 9 | #include "../../run_params/struct_run_params.h" 10 | #include "atan_pp.h" 11 | 12 | 13 | void K_Atan_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 14 | int starting_index_of_target, int starting_index_of_source, 15 | double *target_x, double *target_y, double *target_z, 16 | double *source_x, double *source_y, double *source_z, double *source_charge, 17 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 18 | { 19 | 20 | double domainLength = run_params->kernel_params[0]; 21 | double delta = run_params->kernel_params[1]; 22 | double wadj = 1. / (1. - delta / sqrt(1. + delta * delta)); 23 | double delta_factor = sqrt(1. + 1.0 / (delta * delta)); 24 | 25 | #ifdef OPENACC_ENABLED 26 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 27 | source_x, source_y, source_z, source_charge, potential) 28 | { 29 | #endif 30 | #ifdef OPENACC_ENABLED 31 | #pragma acc loop independent 32 | #endif 33 | for (int i = 0; i < number_of_targets_in_batch; i++) { 34 | 35 | int ii = starting_index_of_target + i; 36 | double temporary_potential = 0.0; 37 | double tz = target_z[ii]; 38 | 39 | #ifdef OPENACC_ENABLED 40 | #pragma acc loop independent reduction(+:temporary_potential) 41 | #endif 42 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 43 | int jj = starting_index_of_source + j; 44 | double dz = (tz - source_z[jj]) / domainLength; 45 | 46 | if (dz < -0.5) { 47 | dz += 1.0; 48 | } 49 | if (dz > 0.5) { 50 | dz -= 1.0; 51 | } 52 | temporary_potential += source_charge[jj] 53 | * (1.0 / M_PI * atan(delta_factor * tan(M_PI * dz)) - dz); 54 | } // end loop over interpolation points 55 | #ifdef OPENACC_ENABLED 56 | #pragma acc atomic 57 | #endif 58 | potential[ii] += wadj * temporary_potential; 59 | } 60 | #ifdef OPENACC_ENABLED 61 | } // end kernel 62 | #endif 63 | return; 64 | } 65 | -------------------------------------------------------------------------------- /src/kernels/atan/atan_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_ATAN_PP_H 3 | #define H_K_ATAN_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Atan_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_ATAN_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_H 3 | #define H_K_COULOMB_H 4 | 5 | #include "coulomb_pp.h" 6 | #include "coulomb_pc.h" 7 | #include "coulomb_cp.h" 8 | #include "coulomb_cc_hermite.h" 9 | 10 | #include "coulomb_ss_pp.h" 11 | #include "coulomb_ss_correction.h" 12 | #include "coulomb_ss_pc.h" 13 | #include "coulomb_ss_cp.h" 14 | #include "coulomb_ss_cc.h" 15 | 16 | 17 | 18 | #endif /* H_K_COULOMB_H */ 19 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_cc_hermite.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_CC_HERMITE_H 3 | #define H_K_COULOMB_CC_HERMITE_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_CC_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_source_cluster, int starting_index_of_target_cluster, 10 | double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, 11 | double *source_cluster_w, 12 | double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, 13 | struct RunParams *run_params, int gpu_async_stream_id); 14 | 15 | 16 | #endif /* H_K_COULOMB_CC_HERMITE_H */ 17 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_CP_H 3 | #define H_K_COULOMB_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_sources, int starting_index_of_cluster, 16 | double *source_x, double *source_y, double *source_z, double *source_q, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_COULOMB_CP_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_pc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "coulomb_pc.h" 7 | 8 | 9 | void K_Coulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_cluster, 11 | double *target_x, double *target_y, double *target_z, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | #ifdef OPENACC_ENABLED 17 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 18 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 19 | { 20 | #endif 21 | #ifdef OPENACC_ENABLED 22 | #pragma acc loop independent 23 | #endif 24 | for (int i = 0; i < number_of_targets_in_batch; i++) { 25 | 26 | double temporary_potential = 0.0; 27 | 28 | double tx = target_x[starting_index_of_target + i]; 29 | double ty = target_y[starting_index_of_target + i]; 30 | double tz = target_z[starting_index_of_target + i]; 31 | 32 | #ifdef OPENACC_ENABLED 33 | #pragma acc loop independent reduction(+:temporary_potential) 34 | #endif 35 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc cache(cluster_x[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster], \ 38 | cluster_y[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster], \ 39 | cluster_z[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster], \ 40 | cluster_charge[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster]) 41 | #endif 42 | 43 | int jj = starting_index_of_cluster + j; 44 | double dx = tx - cluster_x[jj]; 45 | double dy = ty - cluster_y[jj]; 46 | double dz = tz - cluster_z[jj]; 47 | double r2 = dx*dx + dy*dy + dz*dz; 48 | 49 | temporary_potential += cluster_charge[starting_index_of_cluster + j] / sqrt(r2); 50 | 51 | } // end loop over interpolation points 52 | #ifdef OPENACC_ENABLED 53 | #pragma acc atomic 54 | #endif 55 | potential[starting_index_of_target + i] += temporary_potential; 56 | } 57 | #ifdef OPENACC_ENABLED 58 | } // end kernel 59 | #endif 60 | return; 61 | } 62 | 63 | 64 | 65 | 66 | void K_Coulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 67 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 68 | double *target_x, double *target_y, double *target_z, 69 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 70 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 71 | { 72 | 73 | // total_number_interpolation_points is the stride, separating clustersQ, clustersQx, clustersQy, etc. 74 | double *cluster_charge_ = &cluster_charge[8*starting_index_of_cluster + 0*number_of_interpolation_points_in_cluster]; 75 | double *cluster_charge_delta_x = &cluster_charge[8*starting_index_of_cluster + 1*number_of_interpolation_points_in_cluster]; 76 | double *cluster_charge_delta_y = &cluster_charge[8*starting_index_of_cluster + 2*number_of_interpolation_points_in_cluster]; 77 | double *cluster_charge_delta_z = &cluster_charge[8*starting_index_of_cluster + 3*number_of_interpolation_points_in_cluster]; 78 | double *cluster_charge_delta_xy = &cluster_charge[8*starting_index_of_cluster + 4*number_of_interpolation_points_in_cluster]; 79 | double *cluster_charge_delta_yz = &cluster_charge[8*starting_index_of_cluster + 5*number_of_interpolation_points_in_cluster]; 80 | double *cluster_charge_delta_xz = &cluster_charge[8*starting_index_of_cluster + 6*number_of_interpolation_points_in_cluster]; 81 | double *cluster_charge_delta_xyz = &cluster_charge[8*starting_index_of_cluster + 7*number_of_interpolation_points_in_cluster]; 82 | 83 | 84 | 85 | #ifdef OPENACC_ENABLED 86 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 87 | cluster_x, cluster_y, cluster_z, cluster_charge, potential, \ 88 | cluster_charge_, cluster_charge_delta_x, cluster_charge_delta_y, cluster_charge_delta_z, \ 89 | cluster_charge_delta_xy, cluster_charge_delta_yz, cluster_charge_delta_xz, \ 90 | cluster_charge_delta_xyz) 91 | { 92 | #endif 93 | #ifdef OPENACC_ENABLED 94 | #pragma acc loop independent 95 | #endif 96 | for (int i = 0; i < number_of_targets_in_batch; i++) { 97 | 98 | int ii = starting_index_of_target + i; 99 | double temporary_potential = 0.0; 100 | 101 | double tx = target_x[ii]; 102 | double ty = target_y[ii]; 103 | double tz = target_z[ii]; 104 | 105 | #ifdef OPENACC_ENABLED 106 | #pragma acc loop independent reduction(+:temporary_potential) 107 | #endif 108 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 109 | 110 | int jj = starting_index_of_cluster + j; 111 | double dx = tx - cluster_x[jj]; 112 | double dy = ty - cluster_y[jj]; 113 | double dz = tz - cluster_z[jj]; 114 | double r = sqrt(dx*dx + dy*dy + dz*dz); 115 | 116 | double rinv = 1 / r; 117 | double r3inv = rinv*rinv*rinv; 118 | double r5inv = r3inv*rinv*rinv; 119 | double r7inv = r5inv*rinv*rinv; 120 | 121 | temporary_potential += rinv * (cluster_charge_[j]) 122 | + r3inv * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy 123 | + cluster_charge_delta_z[j]*dz) 124 | + 3 * r5inv * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz 125 | + cluster_charge_delta_xz[j]*dx*dz) 126 | + 15 * r7inv * cluster_charge_delta_xyz[j]*dx*dy*dz; 127 | 128 | } // end loop over interpolation points 129 | #ifdef OPENACC_ENABLED 130 | #pragma acc atomic 131 | #endif 132 | potential[starting_index_of_target + i] += temporary_potential; 133 | } 134 | #ifdef OPENACC_ENABLED 135 | } // end kernel 136 | #endif 137 | return; 138 | } 139 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_PC_H 3 | #define H_K_COULOMB_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_Coulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_COULOMB_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "coulomb_pp.h" 7 | 8 | void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 13 | { 14 | 15 | #ifdef OPENACC_ENABLED 16 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 17 | source_x, source_y, source_z, source_charge, potential) 18 | { 19 | #pragma acc loop independent 20 | #endif 21 | for (int i = 0; i < number_of_targets_in_batch; i++) { 22 | 23 | int ii = starting_index_of_target + i; 24 | double temporary_potential = 0.0; 25 | 26 | double tx = target_x[ii]; 27 | double ty = target_y[ii]; 28 | double tz = target_z[ii]; 29 | 30 | #ifdef OPENACC_ENABLED 31 | #pragma acc loop independent reduction(+:temporary_potential) 32 | #endif 33 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ 36 | source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ 37 | source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ 38 | source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster]) 39 | #endif 40 | 41 | 42 | int jj = starting_index_of_source + j; 43 | double dx = tx - source_x[jj]; 44 | double dy = ty - source_y[jj]; 45 | double dz = tz - source_z[jj]; 46 | double r2 = dx*dx + dy*dy + dz*dz; 47 | 48 | if (r2 > DBL_MIN) { 49 | temporary_potential += source_charge[jj] / sqrt(r2); 50 | } 51 | } // end loop over interpolation points 52 | #ifdef OPENACC_ENABLED 53 | #pragma acc atomic 54 | #endif 55 | potential[ii] += temporary_potential; 56 | } 57 | #ifdef OPENACC_ENABLED 58 | } // end kernel 59 | #endif 60 | return; 61 | } 62 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_PP_H 3 | #define H_K_COULOMB_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_COULOMB_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_cc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "coulomb_ss_cc.h" 7 | 8 | 9 | void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_sources, int starting_index_of_cluster, 11 | double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, 12 | double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, double *target_cluster_w, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter = run_params->kernel_params[0]; 17 | double kernel_parameter2 = kernel_parameter * kernel_parameter; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, source_cluster_w, \ 21 | target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, target_cluster_w) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | double temporary_weight = 0.0; 31 | 32 | double cx = target_cluster_x[starting_index_of_cluster + i]; 33 | double cy = target_cluster_y[starting_index_of_cluster + i]; 34 | double cz = target_cluster_z[starting_index_of_cluster + i]; 35 | 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) 38 | #endif 39 | for (int j = 0; j < number_of_sources_in_batch; j++) { 40 | #ifdef OPENACC_ENABLED 41 | #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 42 | source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 43 | source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 44 | source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 45 | source_cluster_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) 46 | #endif 47 | 48 | int jj = starting_index_of_sources + j; 49 | double dx = cx - source_cluster_x[jj]; 50 | double dy = cy - source_cluster_y[jj]; 51 | double dz = cz - source_cluster_z[jj]; 52 | double r = sqrt(dx*dx + dy*dy + dz*dz); 53 | 54 | temporary_potential += source_cluster_q[jj] / r; 55 | temporary_weight += exp(-r*r/kernel_parameter2) * source_cluster_w[jj] / r; 56 | 57 | } // end loop over interpolation points 58 | #ifdef OPENACC_ENABLED 59 | #pragma acc atomic 60 | #endif 61 | target_cluster_q[starting_index_of_cluster + i] += temporary_potential; 62 | #ifdef OPENACC_ENABLED 63 | #pragma acc atomic 64 | #endif 65 | target_cluster_w[starting_index_of_cluster + i] += temporary_weight; 66 | } 67 | #ifdef OPENACC_ENABLED 68 | } // end kernel 69 | #endif 70 | return; 71 | } 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_cc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_SS_CC_H 3 | #define H_K_COULOMB_SS_CC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, 11 | double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_charge, double *target_cluster_weight, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_COULOMB_SS_CC_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_correction.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "coulomb_ss_correction.h" 7 | 8 | 9 | void K_Coulomb_SS_Correction(double *potential, double *target_q, 10 | int numTargets, struct RunParams *run_params) 11 | { 12 | double kernel_parameter = run_params->kernel_params[0]; 13 | double param = 2.0 * M_PI * kernel_parameter * kernel_parameter; 14 | for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i]; 15 | 16 | return; 17 | } 18 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_correction.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_SS_CORRECTION_H 3 | #define H_K_COULOMB_SS_CORRECTION_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_SS_Correction(double *potential, double *target_q, 9 | int numTargets, struct RunParams *run_params); 10 | 11 | 12 | #endif /* H_K_COULOMB_SS_CORRECTION_H */ 13 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_cp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "coulomb_ss_cp.h" 7 | 8 | 9 | void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_sources, int starting_index_of_cluster, 11 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter = run_params->kernel_params[0]; 17 | double kernel_parameter2 = kernel_parameter * kernel_parameter; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, source_w, \ 21 | cluster_x, cluster_y, cluster_z, cluster_q, cluster_w) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | double temporary_weight = 0.0; 31 | 32 | double cx = cluster_x[starting_index_of_cluster + i]; 33 | double cy = cluster_y[starting_index_of_cluster + i]; 34 | double cz = cluster_z[starting_index_of_cluster + i]; 35 | 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) 38 | #endif 39 | for (int j = 0; j < number_of_sources_in_batch; j++) { 40 | #ifdef OPENACC_ENABLED 41 | #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 42 | source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 43 | source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 44 | source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 45 | source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) 46 | #endif 47 | 48 | int jj = starting_index_of_sources + j; 49 | double dx = cx - source_x[jj]; 50 | double dy = cy - source_y[jj]; 51 | double dz = cz - source_z[jj]; 52 | double r = sqrt(dx*dx + dy*dy + dz*dz); 53 | 54 | temporary_potential += source_q[jj] * source_w[jj] / r; 55 | temporary_weight += exp(-r*r/kernel_parameter2) * source_w[jj] / r; 56 | 57 | } // end loop over interpolation points 58 | #ifdef OPENACC_ENABLED 59 | #pragma acc atomic 60 | #endif 61 | cluster_q[starting_index_of_cluster + i] += temporary_potential; 62 | #ifdef OPENACC_ENABLED 63 | #pragma acc atomic 64 | #endif 65 | cluster_w[starting_index_of_cluster + i] += temporary_weight; 66 | } 67 | #ifdef OPENACC_ENABLED 68 | } // end kernel 69 | #endif 70 | return; 71 | } 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_SS_CP_H 3 | #define H_K_COULOMB_SS_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | 16 | #endif /* H_K_COULOMB_SS_CP_H */ 17 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_SS_PC_H 3 | #define H_K_COULOMB_SS_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_Coulomb_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, double *target_charge, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_COULOMB_SS_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "coulomb_ss_pp.h" 7 | 8 | void K_Coulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 13 | { 14 | double kernel_parameter = run_params->kernel_params[0]; 15 | double kernel_parameter2 = kernel_parameter * kernel_parameter; 16 | 17 | #ifdef OPENACC_ENABLED 18 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ 19 | source_x, source_y, source_z, source_charge, source_weight, potential) 20 | { 21 | #endif 22 | #ifdef OPENACC_ENABLED 23 | #pragma acc loop independent 24 | #endif 25 | for (int i = 0; i < number_of_targets_in_batch; i++) { 26 | 27 | int ii = starting_index_of_target + i; 28 | double temporary_potential = 0.0; 29 | 30 | double tx = target_x[ii]; 31 | double ty = target_y[ii]; 32 | double tz = target_z[ii]; 33 | double tq = target_charge[ii]; 34 | 35 | #ifdef OPENACC_ENABLED 36 | #pragma acc loop independent reduction(+:temporary_potential) 37 | #endif 38 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 39 | 40 | int jj = starting_index_of_source + j; 41 | double dx = tx - source_x[jj]; 42 | double dy = ty - source_y[jj]; 43 | double dz = tz - source_z[jj]; 44 | double r = sqrt(dx*dx + dy*dy + dz*dz); 45 | 46 | if (r > DBL_MIN){ 47 | temporary_potential += (source_charge[jj] - tq * exp(-r*r/kernel_parameter2)) 48 | * source_weight[jj] / r; 49 | } 50 | } // end loop over interpolation points 51 | #ifdef OPENACC_ENABLED 52 | #pragma acc atomic 53 | #endif 54 | potential[ii] += temporary_potential; 55 | } 56 | #ifdef OPENACC_ENABLED 57 | } // end kernel 58 | #endif 59 | return; 60 | } 61 | 62 | 63 | void coulombSingularitySubtractionApproximationLagrange(int number_of_targets_in_batch, 64 | int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, 65 | double *target_x, double *target_y, double *target_z, double *target_charge, 66 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, 67 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 68 | { 69 | double kernel_parameter = run_params->kernel_params[0]; 70 | double kernel_parameter2 = kernel_parameter * kernel_parameter; 71 | 72 | #ifdef OPENACC_ENABLED 73 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ 74 | cluster_x, cluster_y, cluster_z, cluster_charge, cluster_weight, potential) 75 | { 76 | #endif 77 | #ifdef OPENACC_ENABLED 78 | #pragma acc loop independent 79 | #endif 80 | for (int i = 0; i < number_of_targets_in_batch; i++) { 81 | 82 | int ii = starting_index_of_target + i; 83 | double temporary_potential = 0.0; 84 | 85 | double tx = target_x[ii]; 86 | double ty = target_y[ii]; 87 | double tz = target_z[ii]; 88 | double tq = target_charge[ii]; 89 | 90 | #ifdef OPENACC_ENABLED 91 | #pragma acc loop independent reduction(+:temporary_potential) 92 | #endif 93 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 94 | 95 | int jj = starting_index_of_cluster + j; 96 | double dx = tx - cluster_x[jj]; 97 | double dy = ty - cluster_y[jj]; 98 | double dz = tz - cluster_z[jj]; 99 | double r = sqrt(dx*dx + dy*dy + dz*dz); 100 | 101 | if (r > DBL_MIN) { 102 | temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] * exp(-r*r/kernel_parameter2)) / r; 103 | } 104 | } // end loop over interpolation points 105 | #ifdef OPENACC_ENABLED 106 | #pragma acc atomic 107 | #endif 108 | potential[ii] += temporary_potential; 109 | } 110 | #ifdef OPENACC_ENABLED 111 | } // end kernel 112 | #endif 113 | return; 114 | } 115 | -------------------------------------------------------------------------------- /src/kernels/coulomb/coulomb_ss_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_COULOMB_SS_PP_H 3 | #define H_K_COULOMB_SS_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Coulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_COULOMB_SS_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/dcf/dcf.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_DCF_H 3 | #define H_DCF_H 4 | 5 | 6 | void dcfDirect(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 7 | int starting_index_of_target, int starting_index_of_cluster, 8 | double *target_x, double *target_y, double *target_z, 9 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 10 | double kernel_parameter, double *potential, int gpu_async_stream_id); 11 | 12 | void dcfApproximationLagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 13 | int starting_index_of_target, int starting_index_of_cluster, 14 | double *target_x, double *target_y, double *target_z, 15 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 16 | double kernel_parameter, double *potential, int gpu_async_stream_id); 17 | 18 | void dcfApproximationHermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 19 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 20 | double *target_x, double *target_y, double *target_z, 21 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 22 | double kernel_parameter, double *potential, int gpu_async_stream_id); 23 | 24 | #endif /* H_DCF_H */ 25 | -------------------------------------------------------------------------------- /src/kernels/mq/mq.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_MQ_H 3 | #define H_K_MQ_H 4 | 5 | 6 | #include "mq_pp.h" 7 | #include "mq_pc.h" 8 | 9 | #endif /* H_K_MQ_PC_H */ 10 | -------------------------------------------------------------------------------- /src/kernels/mq/mq_pc.c: -------------------------------------------------------------------------------- 1 | #ifdef OPENACC_ENABLED 2 | #include 3 | #define M_PI 3.14159265358979323846264338327950288 4 | #else 5 | #include 6 | #endif 7 | #include 8 | 9 | #include "../../run_params/struct_run_params.h" 10 | #include "mq_pc.h" 11 | 12 | 13 | void K_MQ_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 14 | int starting_index_of_target, int starting_index_of_cluster, 15 | double *target_x, double *target_y, double *target_z, 16 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 17 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 18 | { 19 | 20 | double domainLength = run_params->kernel_params[0]; 21 | double delta = run_params->kernel_params[1]; 22 | double deltaLsq = delta * delta / domainLength / domainLength; 23 | double norm_delta_L = sqrt(1 + 4 * deltaLsq); 24 | 25 | #ifdef OPENACC_ENABLED 26 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 27 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 28 | { 29 | #endif 30 | #ifdef OPENACC_ENABLED 31 | #pragma acc loop independent 32 | #endif 33 | for (int i = 0; i < number_of_targets_in_batch; i++) { 34 | 35 | int ii = starting_index_of_target + i; 36 | double temporary_potential = 0.0; 37 | double tz = target_z[ii]; 38 | 39 | #ifdef OPENACC_ENABLED 40 | #pragma acc loop independent reduction(+:temporary_potential) 41 | #endif 42 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 43 | 44 | int jj = starting_index_of_cluster + j; 45 | double dz = (tz - cluster_z[jj]) / domainLength; 46 | 47 | if (dz < -0.5) { 48 | dz += 1.0; 49 | } 50 | if (dz > 0.5) { 51 | dz -= 1.0; 52 | } 53 | 54 | temporary_potential += cluster_charge[jj] 55 | * (.5 * dz * norm_delta_L / sqrt(dz * dz + deltaLsq) - dz); 56 | } // end loop over interpolation points 57 | #ifdef OPENACC_ENABLED 58 | #pragma acc atomic 59 | #endif 60 | potential[ii] += temporary_potential; 61 | } 62 | #ifdef OPENACC_ENABLED 63 | } // end kernel 64 | #endif 65 | return; 66 | } 67 | 68 | 69 | 70 | 71 | void K_MQ_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 72 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 73 | double *target_x, double *target_y, double *target_z, 74 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 75 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 76 | { 77 | printf("[BaryTree] ERROR! MQ KERNEL NOT IMPLEMENTED FOR HERMITE. Exiting.\n"); 78 | return; 79 | } 80 | -------------------------------------------------------------------------------- /src/kernels/mq/mq_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_MQ_PC_H 3 | #define H_K_MQ_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_MQ_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_MQ_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_MQ_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/mq/mq_pp.c: -------------------------------------------------------------------------------- 1 | #ifdef OPENACC_ENABLED 2 | #include 3 | #define M_PI 3.14159265358979323846264338327950288 4 | #else 5 | #include 6 | #endif 7 | #include 8 | 9 | #include "../../run_params/struct_run_params.h" 10 | #include "mq_pp.h" 11 | 12 | 13 | void K_MQ_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 14 | int starting_index_of_target, int starting_index_of_source, 15 | double *target_x, double *target_y, double *target_z, 16 | double *source_x, double *source_y, double *source_z, double *source_charge, 17 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 18 | { 19 | 20 | double domainLength = run_params->kernel_params[0]; 21 | double delta = run_params->kernel_params[1]; 22 | double deltaLsq = delta * delta / domainLength / domainLength; 23 | double norm_delta_L = sqrt(1 + 4 * deltaLsq); 24 | 25 | 26 | #ifdef OPENACC_ENABLED 27 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 28 | source_x, source_y, source_z, source_charge, potential) 29 | { 30 | #endif 31 | #ifdef OPENACC_ENABLED 32 | #pragma acc loop independent 33 | #endif 34 | for (int i = 0; i < number_of_targets_in_batch; i++) { 35 | 36 | int ii = starting_index_of_target + i; 37 | double temporary_potential = 0.0; 38 | double tz = target_z[ii]; 39 | 40 | #ifdef OPENACC_ENABLED 41 | #pragma acc loop independent reduction(+:temporary_potential) 42 | #endif 43 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 44 | 45 | int jj = starting_index_of_source + j; 46 | double dz = (tz - source_z[jj]) / domainLength; 47 | 48 | if (dz < -0.5) { 49 | dz += 1.0; 50 | } 51 | if (dz > 0.5) { 52 | dz -= 1.0; 53 | } 54 | temporary_potential += source_charge[jj] 55 | * (.5 * dz * norm_delta_L / sqrt(dz * dz + deltaLsq) - dz); 56 | } // end loop over interpolation points 57 | #ifdef OPENACC_ENABLED 58 | #pragma acc atomic 59 | #endif 60 | potential[ii] += temporary_potential; 61 | } 62 | #ifdef OPENACC_ENABLED 63 | } // end kernel 64 | #endif 65 | return; 66 | } 67 | -------------------------------------------------------------------------------- /src/kernels/mq/mq_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_MQ_PP_H 3 | #define H_K_MQ_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_MQ_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_MQ_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_U_H 3 | #define H_K_RBS_U_H 4 | 5 | #include "rbs-u_pp.h" 6 | #include "rbs-u_pc.h" 7 | #include "rbs-u_cp.h" 8 | 9 | #endif /* H_K_RBS_U_H */ 10 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u_cp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "rbs-u_cp.h" 7 | 8 | 9 | void K_RBSu_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_source, int starting_index_of_cluster, 11 | double *source_x, double *source_y, double *source_z, double *source_q, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double delta = run_params->kernel_params[0]; 17 | double delta2 = delta * delta; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ 21 | cluster_x, cluster_y, cluster_z, cluster_q) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double cx = cluster_x[starting_index_of_cluster + i]; 32 | double cy = cluster_y[starting_index_of_cluster + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_sources_in_batch; j++) { 38 | #ifdef OPENACC_ENABLED 39 | #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \ 40 | source_y[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \ 41 | source_z[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \ 42 | source_q[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch]) 43 | #endif 44 | 45 | int jj = starting_index_of_source + j; 46 | double dx = cx - source_x[jj]; 47 | double dy = cy - source_y[jj]; 48 | double r = dx*dx + dy*dy + delta2; 49 | 50 | temporary_potential += 1. / (2. * M_PI) * source_q[jj] * dy / r; 51 | 52 | } // end loop over interpolation points 53 | #ifdef OPENACC_ENABLED 54 | #pragma acc atomic 55 | #endif 56 | cluster_q[starting_index_of_cluster + i] += temporary_potential; 57 | } 58 | #ifdef OPENACC_ENABLED 59 | } // end kernel 60 | #endif 61 | return; 62 | } 63 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_U_CP_H 3 | #define H_K_RBS_U_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RBSu_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_RBS_U_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u_pc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "rbs-u_pc.h" 7 | 8 | 9 | void K_RBSu_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_cluster, 11 | double *target_x, double *target_y, double *target_z, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double delta = run_params->kernel_params[0]; 17 | double delta2 = delta * delta; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 21 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_targets_in_batch; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[starting_index_of_target + i]; 32 | double ty = target_y[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 38 | 39 | int jj = starting_index_of_cluster + j; 40 | double dx = tx - cluster_x[jj]; 41 | double dy = ty - cluster_y[jj]; 42 | double r = dx*dx + dy*dy + delta2; 43 | 44 | temporary_potential += 1. / (2. * M_PI) * cluster_charge[jj] * dy / r; 45 | 46 | } // end loop over interpolation points 47 | #ifdef OPENACC_ENABLED 48 | #pragma acc atomic 49 | #endif 50 | potential[starting_index_of_target + i] += temporary_potential; 51 | } 52 | #ifdef OPENACC_ENABLED 53 | } // end kernel 54 | #endif 55 | return; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_U_PC_H 3 | #define H_K_RBS_U_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RBSu_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_RBS_U_PC_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "rbs-u_pp.h" 7 | 8 | 9 | void K_RBSu_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double delta = run_params->kernel_params[0]; 17 | double delta2 = delta * delta; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 21 | source_x, source_y, source_z, source_charge, potential) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_targets_in_batch; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[starting_index_of_target + i]; 32 | double ty = target_y[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 38 | 39 | int jj = starting_index_of_source + j; 40 | double dx = tx - source_x[jj]; 41 | double dy = ty - source_y[jj]; 42 | double r = dx*dx + dy*dy + delta2; 43 | 44 | temporary_potential += 1. / (2. * M_PI) * source_charge[jj] * dy / r; 45 | 46 | } // end loop over interpolation points 47 | #ifdef OPENACC_ENABLED 48 | #pragma acc atomic 49 | #endif 50 | potential[starting_index_of_target + i] += temporary_potential; 51 | } 52 | #ifdef OPENACC_ENABLED 53 | } // end kernel 54 | #endif 55 | return; 56 | } 57 | -------------------------------------------------------------------------------- /src/kernels/rbs-u/rbs-u_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_U_PP_H 3 | #define H_K_RBS_U_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | void K_RBSu_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 8 | int starting_index_of_target, int starting_index_of_cluster, 9 | double *target_x, double *target_y, double *target_z, 10 | double *source_x, double *source_y, double *source_z, double *source_charge, 11 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 12 | 13 | 14 | #endif /* H_K_RBS_U_PP_H */ 15 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_V_H 3 | #define H_K_RBS_V_H 4 | 5 | #include "rbs-v_pp.h" 6 | #include "rbs-v_pc.h" 7 | #include "rbs-v_cp.h" 8 | 9 | #endif /* H_K_RBS_V_H */ 10 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v_cp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "rbs-v_cp.h" 7 | 8 | 9 | void K_RBSv_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_source, int starting_index_of_cluster, 11 | double *source_x, double *source_y, double *source_z, double *source_q, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double delta = run_params->kernel_params[0]; 17 | double delta2 = delta * delta; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ 21 | cluster_x, cluster_y, cluster_z, cluster_q) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double cx = cluster_x[starting_index_of_cluster + i]; 32 | double cy = cluster_y[starting_index_of_cluster + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_sources_in_batch; j++) { 38 | #ifdef OPENACC_ENABLED 39 | #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \ 40 | source_y[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \ 41 | source_z[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \ 42 | source_q[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch]) 43 | #endif 44 | 45 | int jj = starting_index_of_source + j; 46 | double dx = cx - source_x[jj]; 47 | double dy = cy - source_y[jj]; 48 | double r = dx*dx + dy*dy + delta2; 49 | 50 | temporary_potential += -1. / (2. * M_PI) * source_q[jj] * dx / r; 51 | 52 | } // end loop over interpolation points 53 | #ifdef OPENACC_ENABLED 54 | #pragma acc atomic 55 | #endif 56 | cluster_q[starting_index_of_cluster + i] += temporary_potential; 57 | } 58 | #ifdef OPENACC_ENABLED 59 | } // end kernel 60 | #endif 61 | return; 62 | } 63 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_V_CP_H 3 | #define H_K_RBS_V_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RBSv_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_RBS_V_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v_pc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "rbs-v_pc.h" 7 | 8 | 9 | void K_RBSv_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_cluster, 11 | double *target_x, double *target_y, double *target_z, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double delta = run_params->kernel_params[0]; 17 | double delta2 = delta * delta; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 21 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_targets_in_batch; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[starting_index_of_target + i]; 32 | double ty = target_y[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 38 | 39 | int jj = starting_index_of_cluster + j; 40 | double dx = tx - cluster_x[jj]; 41 | double dy = ty - cluster_y[jj]; 42 | double r = dx*dx + dy*dy + delta2; 43 | 44 | temporary_potential += -1. / (2. * M_PI) * cluster_charge[jj] * dx / r; 45 | 46 | } // end loop over interpolation points 47 | #ifdef OPENACC_ENABLED 48 | #pragma acc atomic 49 | #endif 50 | potential[starting_index_of_target + i] += temporary_potential; 51 | } 52 | #ifdef OPENACC_ENABLED 53 | } // end kernel 54 | #endif 55 | return; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_V_PC_H 3 | #define H_K_RBS_V_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RBSv_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_RBS_V_PC_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "rbs-v_pp.h" 7 | 8 | 9 | void K_RBSv_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double delta = run_params->kernel_params[0]; 17 | double delta2 = delta * delta; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 21 | source_x, source_y, source_z, source_charge, potential) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_targets_in_batch; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[starting_index_of_target + i]; 32 | double ty = target_y[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 38 | 39 | int jj = starting_index_of_source + j; 40 | double dx = tx - source_x[jj]; 41 | double dy = ty - source_y[jj]; 42 | double r = dx*dx + dy*dy + delta2; 43 | 44 | temporary_potential += -1. / (2. * M_PI) * source_charge[jj] * dx / r; 45 | 46 | } // end loop over interpolation points 47 | #ifdef OPENACC_ENABLED 48 | #pragma acc atomic 49 | #endif 50 | potential[starting_index_of_target + i] += temporary_potential; 51 | } 52 | #ifdef OPENACC_ENABLED 53 | } // end kernel 54 | #endif 55 | return; 56 | } 57 | -------------------------------------------------------------------------------- /src/kernels/rbs-v/rbs-v_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_RBS_V_PP_H 3 | #define H_K_RBS_V_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | void K_RBSv_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 8 | int starting_index_of_target, int starting_index_of_cluster, 9 | double *target_x, double *target_y, double *target_z, 10 | double *source_x, double *source_y, double *source_z, double *source_charge, 11 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 12 | 13 | 14 | #endif /* H_K_RBS_V_PP_H */ 15 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_H 3 | #define H_K_REGULARIZED_COULOMB_H 4 | 5 | 6 | #include "regularized-coulomb_pp.h" 7 | #include "regularized-coulomb_pc.h" 8 | #include "regularized-coulomb_cp.h" 9 | 10 | #include "regularized-coulomb_ss_pp.h" 11 | #include "regularized-coulomb_ss_correction.h" 12 | #include "regularized-coulomb_ss_pc.h" 13 | //#include "regularized-coulomb_ss_cp.h" 14 | 15 | 16 | #endif /* H_K_REGULARIZED_COULOMB_H */ 17 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_CP_H 3 | #define H_K_REGULARIZED_COULOMB_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_sources, int starting_index_of_cluster, 16 | double *source_x, double *source_y, double *source_z, double *source_q, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_REGULARIZED_COULOMB_CP_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_PC_H 3 | #define H_K_REGULARIZED_COULOMB_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedCoulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_RegularizedCoulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_REGULARIZED_COULOMB_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-coulomb_pp.h" 7 | 8 | 9 | void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | double epsilon=run_params->kernel_params[0]; 16 | 17 | #ifdef OPENACC_ENABLED 18 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 19 | source_x, source_y, source_z, source_charge, potential) 20 | { 21 | #pragma acc loop independent 22 | #endif 23 | for (int i = 0; i < number_of_targets_in_batch; i++) { 24 | 25 | int ii = starting_index_of_target + i; 26 | double temporary_potential = 0.0; 27 | 28 | double tx = target_x[ii]; 29 | double ty = target_y[ii]; 30 | double tz = target_z[ii]; 31 | 32 | #ifdef OPENACC_ENABLED 33 | #pragma acc loop independent reduction(+:temporary_potential) 34 | #endif 35 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ 38 | source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ 39 | source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \ 40 | source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1]) 41 | #endif 42 | 43 | 44 | int jj = starting_index_of_source + j; 45 | double dx = tx - source_x[jj]; 46 | double dy = ty - source_y[jj]; 47 | double dz = tz - source_z[jj]; 48 | double r2 = dx*dx + dy*dy + dz*dz + epsilon*epsilon; 49 | 50 | temporary_potential += source_charge[jj] / sqrt(r2); 51 | } // end loop over interpolation points 52 | #ifdef OPENACC_ENABLED 53 | #pragma acc atomic 54 | #endif 55 | potential[ii] += temporary_potential; 56 | } 57 | #ifdef OPENACC_ENABLED 58 | } // end kernel 59 | #endif 60 | return; 61 | } 62 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_PP_H 3 | #define H_K_REGULARIZED_COULOMB_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_REGULARIZED_COULOMB_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_ss_correction.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-coulomb_ss_correction.h" 7 | 8 | 9 | void K_RegularizedCoulomb_SS_Correction(double *potential, double *target_q, 10 | int numTargets, struct RunParams *run_params) 11 | { 12 | double alpha = run_params->kernel_params[0]; 13 | double param = 2.0 * M_PI * alpha * alpha; 14 | for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i]; 15 | 16 | return; 17 | } 18 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_ss_correction.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_SS_CORRECTION_H 3 | #define H_K_REGULARIZED_COULOMB_SS_CORRECTION_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedCoulomb_SS_Correction(double *potential, double *target_q, 9 | int numTargets, struct RunParams *run_params); 10 | 11 | 12 | #endif /* H_K_REGULARIZED_COULOMB_SS_CORRECTION_H */ 13 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_ss_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_SS_PC_LAGRANGE_H 3 | #define H_K_REGULARIZED_COULOMB_SS_PC_LAGRANGE_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedCoulomb_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_RegularizedCoulomb_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, double *target_charge, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_REGULARIZED_COULOMB_SS_PC_LAGRANGE_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-coulomb_ss_pp.h" 7 | 8 | 9 | void K_RegularizedCoulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, double *target_charge, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | double alpha2 = run_params->kernel_params[0] * run_params->kernel_params[0]; 16 | double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ 20 | source_x, source_y, source_z, source_charge, source_weight, potential) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_targets_in_batch; i++) { 27 | 28 | int ii = starting_index_of_target + i; 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[ii]; 32 | double ty = target_y[ii]; 33 | double tz = target_z[ii]; 34 | double tq = target_charge[ii]; 35 | 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc loop independent reduction(+:temporary_potential) 38 | #endif 39 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 40 | 41 | int jj = starting_index_of_source + j; 42 | double dx = tx - source_x[jj]; 43 | double dy = ty - source_y[jj]; 44 | double dz = tz - source_z[jj]; 45 | double r2 = dx*dx + dy*dy + dz*dz; 46 | 47 | temporary_potential += (source_charge[jj] - tq * exp(-r2 / alpha2)) 48 | * source_weight[jj] / sqrt(r2 + epsilon2); 49 | } // end loop over interpolation points 50 | #ifdef OPENACC_ENABLED 51 | #pragma acc atomic 52 | #endif 53 | potential[ii] += temporary_potential; 54 | } 55 | #ifdef OPENACC_ENABLED 56 | } // end kernel 57 | #endif 58 | return; 59 | } 60 | -------------------------------------------------------------------------------- /src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_COULOMB_SS_PP_H 3 | #define H_K_REGULARIZED_COULOMB_SS_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedCoulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_REGULARIZED_COULOMB_SS_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_YUKAWA_H 3 | #define H_K_REGULARIZED_YUKAWA_H 4 | 5 | 6 | #include "regularized-yukawa_pp.h" 7 | #include "regularized-yukawa_pc.h" 8 | #include "regularized-yukawa_cp.h" 9 | 10 | #include "regularized-yukawa_ss_pp.h" 11 | #include "regularized-yukawa_ss_correction.h" 12 | #include "regularized-yukawa_ss_pc.h" 13 | //#include "regularized-yukawa_ss_cp.h" 14 | 15 | 16 | #endif /* H_K_REGULARIZED_YUKAWA_H */ 17 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_cp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-yukawa_cp.h" 7 | 8 | 9 | void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_sources, int starting_index_of_cluster, 11 | double *source_x, double *source_y, double *source_z, double *source_q, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double kappa = run_params->kernel_params[0]; 17 | double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1]; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ 21 | cluster_x, cluster_y, cluster_z, cluster_q) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double cx = cluster_x[starting_index_of_cluster + i]; 32 | double cy = cluster_y[starting_index_of_cluster + i]; 33 | double cz = cluster_z[starting_index_of_cluster + i]; 34 | 35 | #ifdef OPENACC_ENABLED 36 | #pragma acc loop independent reduction(+:temporary_potential) 37 | #endif 38 | for (int j = 0; j < number_of_sources_in_batch; j++) { 39 | #ifdef OPENACC_ENABLED 40 | #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 41 | source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 42 | source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 43 | source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) 44 | #endif 45 | 46 | int jj = starting_index_of_sources + j; 47 | double dx = cx - source_x[jj]; 48 | double dy = cy - source_y[jj]; 49 | double dz = cz - source_z[jj]; 50 | double r2 = dx*dx + dy*dy + dz*dz; 51 | double r = sqrt(r2); 52 | 53 | temporary_potential += source_q[jj] * exp(-kappa * r) / sqrt(r2 + epsilon2); 54 | 55 | } // end loop over interpolation points 56 | #ifdef OPENACC_ENABLED 57 | #pragma acc atomic 58 | #endif 59 | cluster_q[starting_index_of_cluster + i] += temporary_potential; 60 | } 61 | #ifdef OPENACC_ENABLED 62 | 63 | } // end kernel 64 | #endif 65 | return; 66 | } 67 | 68 | 69 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_YUKAWA_CP_H 3 | #define H_K_REGULARIZED_YUKAWA_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | 16 | #endif /* H_K_REGULARIZED_YUKAWA_CP_H */ 17 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_pc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-yukawa_pc.h" 7 | 8 | 9 | void K_RegularizedYukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_cluster, 11 | double *target_x, double *target_y, double *target_z, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kappa = run_params->kernel_params[0]; 17 | double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1]; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 21 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_targets_in_batch; i++) { 28 | 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[starting_index_of_target + i]; 32 | double ty = target_y[starting_index_of_target + i]; 33 | double tz = target_z[starting_index_of_target + i]; 34 | 35 | #ifdef OPENACC_ENABLED 36 | #pragma acc loop independent reduction(+:temporary_potential) 37 | #endif 38 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 39 | 40 | int jj = starting_index_of_cluster + j; 41 | double dx = tx - cluster_x[jj]; 42 | double dy = ty - cluster_y[jj]; 43 | double dz = tz - cluster_z[jj]; 44 | double r2 = dx*dx + dy*dy + dz*dz; 45 | double r = sqrt(r2); 46 | 47 | temporary_potential += cluster_charge[jj] * exp(-kappa * r) / sqrt(r2 + epsilon2); 48 | } // end loop over interpolation points 49 | #ifdef OPENACC_ENABLED 50 | #pragma acc atomic 51 | #endif 52 | potential[starting_index_of_target + i] += temporary_potential; 53 | } 54 | #ifdef OPENACC_ENABLED 55 | } // end kernel 56 | #endif 57 | return; 58 | } 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_YUKAWA_PC_H 3 | #define H_K_REGULARIZED_YUKAWA_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedYukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | /* 15 | void K_RegularizedYukawa_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 16 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 17 | double *target_x, double *target_y, double *target_z, 18 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 19 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 20 | */ 21 | 22 | #endif /* H_K_REGULARIZED_YUKAWA_PC_H */ 23 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-yukawa_pp.h" 7 | 8 | 9 | void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kappa = run_params->kernel_params[0]; 17 | double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1]; 18 | 19 | 20 | #ifdef OPENACC_ENABLED 21 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 22 | source_x, source_y, source_z, source_charge, potential) 23 | { 24 | #endif 25 | #ifdef OPENACC_ENABLED 26 | #pragma acc loop independent 27 | #endif 28 | for (int i = 0; i < number_of_targets_in_batch; i++) { 29 | 30 | double temporary_potential = 0.0; 31 | 32 | double tx = target_x[starting_index_of_target + i]; 33 | double ty = target_y[starting_index_of_target + i]; 34 | double tz = target_z[starting_index_of_target + i]; 35 | 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc loop independent reduction(+:temporary_potential) 38 | #endif 39 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 40 | 41 | double dx = tx - source_x[starting_index_of_source + j]; 42 | double dy = ty - source_y[starting_index_of_source + j]; 43 | double dz = tz - source_z[starting_index_of_source + j]; 44 | double r = sqrt(dx*dx + dy*dy + dz*dz); 45 | 46 | 47 | temporary_potential += source_charge[starting_index_of_source + j] * exp(-kappa*r) / sqrt(r*r + epsilon2); 48 | } // end loop over interpolation points 49 | #ifdef OPENACC_ENABLED 50 | #pragma acc atomic 51 | #endif 52 | potential[starting_index_of_target + i] += temporary_potential; 53 | } 54 | #ifdef OPENACC_ENABLED 55 | } // end kernel 56 | #endif 57 | return; 58 | } 59 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_YUKAWA_PP_H 3 | #define H_K_REGULARIZED_YUKAWA_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_REGULARIZED_YUKAWA_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_ss_correction.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-yukawa_ss_correction.h" 7 | 8 | 9 | void K_RegularizedYukawa_SS_Correction(double *potential, double *target_q, 10 | int numTargets, struct RunParams *run_params) 11 | { 12 | double kappa=run_params->kernel_params[0]; 13 | double param = 4.0 * M_PI / kappa / kappa; 14 | for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i]; 15 | 16 | return; 17 | } 18 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_ss_correction.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_REGULARIZED_YUKAWA_SS_CORRECTION_H 3 | #define H_REGULARIZED_YUKAWA_SS_CORRECTION_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedYukawa_SS_Correction(double *potential, double *target_q, 9 | int numTargets, struct RunParams *run_params); 10 | 11 | 12 | #endif /* H_K_REGULARIZED_YUKAWA_SS_CORRECTION_H */ 13 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "regularized-yukawa_ss_pc.h" 7 | 8 | 9 | void K_RegularizedYukawa_SS_PC_Lagrange(int number_of_targets_in_batch, 10 | int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster, 11 | double *target_x, double *target_y, double *target_z, double *target_charge, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | double kappa=run_params->kernel_params[0]; 16 | double epsilon=run_params->kernel_params[1]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ 20 | cluster_x, cluster_y, cluster_z, cluster_charge, cluster_weight, potential) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_targets_in_batch; i++) { 27 | 28 | int ii = starting_index_of_target + i; 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[ii]; 32 | double ty = target_y[ii]; 33 | double tz = target_z[ii]; 34 | double tq = target_charge[ii]; 35 | 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc loop independent reduction(+:temporary_potential) 38 | #endif 39 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 40 | 41 | int jj = starting_index_of_cluster + j; 42 | double dx = tx - cluster_x[jj]; 43 | double dy = ty - cluster_y[jj]; 44 | double dz = tz - cluster_z[jj]; 45 | double r = sqrt(dx*dx + dy*dy + dz*dz); 46 | 47 | temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj]) * exp(-kappa*r) /sqrt(r*r+epsilon*epsilon); 48 | } // end loop over interpolation points 49 | #ifdef OPENACC_ENABLED 50 | #pragma acc atomic 51 | #endif 52 | potential[ii] += temporary_potential; 53 | } 54 | #ifdef OPENACC_ENABLED 55 | } // end kernel 56 | #endif 57 | return; 58 | } 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_YUKAWA_SS_PC_H 3 | #define H_K_REGULARIZED_YUKAWA_SS_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedYukawa_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | 16 | #endif /* H_K_REGULARIZED_YUKAWA_SS_PC_H */ 17 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | #include "regularized-yukawa_ss_pp.h" 8 | 9 | void K_RegularizedYukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, double *target_charge, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kappa=run_params->kernel_params[0]; 17 | double epsilon=run_params->kernel_params[1]; 18 | 19 | #ifdef OPENACC_ENABLED 20 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ 21 | source_x, source_y, source_z, source_charge, source_weight, potential) 22 | { 23 | #endif 24 | #ifdef OPENACC_ENABLED 25 | #pragma acc loop independent 26 | #endif 27 | for (int i = 0; i < number_of_targets_in_batch; i++) { 28 | 29 | int ii = starting_index_of_target + i; 30 | double temporary_potential = 0.0; 31 | 32 | double tx = target_x[ii]; 33 | double ty = target_y[ii]; 34 | double tz = target_z[ii]; 35 | double tq = target_charge[ii]; 36 | 37 | #ifdef OPENACC_ENABLED 38 | #pragma acc loop independent reduction(+:temporary_potential) 39 | #endif 40 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 41 | 42 | int jj = starting_index_of_source + j; 43 | double dx = tx - source_x[jj]; 44 | double dy = ty - source_y[jj]; 45 | double dz = tz - source_z[jj]; 46 | double r = sqrt(dx*dx + dy*dy + dz*dz); 47 | 48 | temporary_potential += (source_charge[jj] - tq) * source_weight[jj] * exp(-kappa*r) / sqrt(r*r+epsilon*epsilon); 49 | } // end loop over interpolation points 50 | #ifdef OPENACC_ENABLED 51 | #pragma acc atomic 52 | #endif 53 | potential[ii] += temporary_potential; 54 | } 55 | #ifdef OPENACC_ENABLED 56 | } // end kernel 57 | #endif 58 | return; 59 | } 60 | -------------------------------------------------------------------------------- /src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_REGULARIZED_YUKAWA_SS_PP_H 3 | #define H_K_REGULARIZED_YUKAWA_SS_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_RegularizedYukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_REGULARIZED_YUKAWA_SS_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/sin-over-r/sin-over-r.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_SIN_OVER_R_H 3 | #define H_K_SIN_OVER_R_H 4 | 5 | #include "sin-over-r_pp.h" 6 | #include "sin-over-r_pc.h" 7 | #include "sin-over-r_cp.h" 8 | 9 | 10 | 11 | #endif /* H_K_SIN_OVER_R_H */ 12 | -------------------------------------------------------------------------------- /src/kernels/sin-over-r/sin-over-r_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_SIN_OVER_R_CP_H 3 | #define H_K_SIN_OVER_R_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_sources, int starting_index_of_cluster, 16 | double *source_x, double *source_y, double *source_z, double *source_q, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_SIN_OVER_R_CP_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/sin-over-r/sin-over-r_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_SIN_OVER_R_PC_H 3 | #define H_K_SIN_OVER_R_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_SinOverR_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_SinOverR_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_SIN_OVER_R_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/sin-over-r/sin-over-r_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "sin-over-r_pp.h" 7 | 8 | void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 13 | { 14 | 15 | double kernel_parameter = run_params->kernel_params[0]; 16 | 17 | #ifdef OPENACC_ENABLED 18 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 19 | source_x, source_y, source_z, source_charge, potential) 20 | { 21 | #pragma acc loop independent 22 | #endif 23 | for (int i = 0; i < number_of_targets_in_batch; i++) { 24 | 25 | int ii = starting_index_of_target + i; 26 | double temporary_potential = 0.0; 27 | 28 | double tx = target_x[ii]; 29 | double ty = target_y[ii]; 30 | double tz = target_z[ii]; 31 | 32 | #ifdef OPENACC_ENABLED 33 | #pragma acc loop independent reduction(+:temporary_potential) 34 | #endif 35 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ 38 | source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ 39 | source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \ 40 | source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster]) 41 | #endif 42 | 43 | 44 | int jj = starting_index_of_source + j; 45 | double dx = tx - source_x[jj]; 46 | double dy = ty - source_y[jj]; 47 | double dz = tz - source_z[jj]; 48 | double r = sqrt(dx*dx + dy*dy + dz*dz); 49 | 50 | if (r > DBL_MIN) { 51 | temporary_potential += source_charge[jj] * sin(kernel_parameter * r) / r; 52 | } 53 | } // end loop over interpolation points 54 | #ifdef OPENACC_ENABLED 55 | #pragma acc atomic 56 | #endif 57 | potential[ii] += temporary_potential; 58 | } 59 | #ifdef OPENACC_ENABLED 60 | } // end kernel 61 | #endif 62 | return; 63 | } 64 | -------------------------------------------------------------------------------- /src/kernels/sin-over-r/sin-over-r_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_SIN_OVER_R_PP_H 3 | #define H_K_SIN_OVER_R_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_SIN_OVER_R_PP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/tcf/tcf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "tcf.h" 6 | 7 | void tcfDirect(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 8 | int starting_index_of_target, int starting_index_of_source, 9 | double *target_x, double *target_y, double *target_z, 10 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 11 | double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id) 12 | { 13 | 14 | double kap_eta_2 = kernel_parameter1 * kernel_parameter2 / 2.0; 15 | 16 | #ifdef OPENACC_ENABLED 17 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 18 | source_x, source_y, source_z, source_q, source_w, potential) 19 | { 20 | #endif 21 | #ifdef OPENACC_ENABLED 22 | #pragma acc loop independent 23 | #endif 24 | for (int i = 0; i < number_of_targets_in_batch; i++) { 25 | 26 | double temporary_potential = 0.0; 27 | 28 | double tx = target_x[starting_index_of_target + i]; 29 | double ty = target_y[starting_index_of_target + i]; 30 | double tz = target_z[starting_index_of_target + i]; 31 | 32 | #ifdef OPENACC_ENABLED 33 | #pragma acc loop independent reduction(+:temporary_potential) 34 | #endif 35 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 36 | 37 | double dx = tx - source_x[starting_index_of_source + j]; 38 | double dy = ty - source_y[starting_index_of_source + j]; 39 | double dz = tz - source_z[starting_index_of_source + j]; 40 | double r = sqrt(dx*dx + dy*dy + dz*dz); 41 | 42 | if (r > DBL_MIN) { 43 | double kap_r = kernel_parameter1 * r; 44 | double r_eta = r / kernel_parameter2; 45 | temporary_potential += source_q[starting_index_of_source + j] 46 | * source_w[starting_index_of_source + j] / r 47 | * (exp(-kap_r) * erfc(kap_eta_2 - r_eta) 48 | - exp( kap_r) * erfc(kap_eta_2 + r_eta)); 49 | } 50 | } // end loop over interpolation points 51 | #ifdef OPENACC_ENABLED 52 | #pragma acc atomic 53 | #endif 54 | potential[starting_index_of_target + i] += temporary_potential; 55 | } 56 | #ifdef OPENACC_ENABLED 57 | } // end kernel 58 | #endif 59 | return; 60 | } 61 | 62 | 63 | 64 | 65 | void tcfApproximationLagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 66 | int starting_index_of_target, int starting_index_of_cluster, 67 | double *target_x, double *target_y, double *target_z, 68 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 69 | double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id) 70 | { 71 | 72 | double kap_eta_2 = kernel_parameter1 * kernel_parameter2 / 2.0; 73 | 74 | #ifdef OPENACC_ENABLED 75 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 76 | cluster_x, cluster_y, cluster_z, cluster_q, potential) 77 | { 78 | #endif 79 | #ifdef OPENACC_ENABLED 80 | #pragma acc loop independent 81 | #endif 82 | for (int i = 0; i < number_of_targets_in_batch; i++) { 83 | 84 | double temporary_potential = 0.0; 85 | 86 | double tx = target_x[starting_index_of_target + i]; 87 | double ty = target_y[starting_index_of_target + i]; 88 | double tz = target_z[starting_index_of_target + i]; 89 | 90 | #ifdef OPENACC_ENABLED 91 | #pragma acc loop independent reduction(+:temporary_potential) 92 | #endif 93 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 94 | 95 | double dx = tx - cluster_x[starting_index_of_cluster + j]; 96 | double dy = ty - cluster_y[starting_index_of_cluster + j]; 97 | double dz = tz - cluster_z[starting_index_of_cluster + j]; 98 | double r = sqrt(dx*dx + dy*dy + dz*dz); 99 | 100 | if (r > DBL_MIN) { 101 | double kap_r = kernel_parameter1 * r; 102 | double r_eta = r / kernel_parameter2; 103 | temporary_potential += cluster_q[starting_index_of_cluster + j] / r 104 | * (exp(-kap_r) * erfc(kap_eta_2 - r_eta) 105 | - exp( kap_r) * erfc(kap_eta_2 + r_eta)); 106 | } 107 | } // end loop over interpolation points 108 | #ifdef OPENACC_ENABLED 109 | #pragma acc atomic 110 | #endif 111 | potential[starting_index_of_target + i] += temporary_potential; 112 | } 113 | #ifdef OPENACC_ENABLED 114 | } // end kernel 115 | #endif 116 | return; 117 | } 118 | -------------------------------------------------------------------------------- /src/kernels/tcf/tcf.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_TCF_H 3 | #define H_TCF_H 4 | 5 | 6 | void tcfDirect(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 7 | int starting_index_of_target, int starting_index_of_cluster, 8 | double *target_x, double *target_y, double *target_z, 9 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 10 | double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id); 11 | 12 | void tcfApproximationLagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 13 | int starting_index_of_target, int starting_index_of_cluster, 14 | double *target_x, double *target_y, double *target_z, 15 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 16 | double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id); 17 | 18 | #endif /* H_TCF_H */ 19 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_USER_KERNEL_H 3 | #define H_K_USER_KERNEL_H 4 | 5 | #include "user_kernel_pp.h" 6 | #include "user_kernel_pc.h" 7 | #include "user_kernel_cp.h" 8 | 9 | #endif /* H_K_USER_KERNEL_H */ 10 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel_cp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "user_kernel_cp.h" 7 | 8 | 9 | void K_User_Kernel_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_sources, int starting_index_of_cluster, 11 | double *source_x, double *source_y, double *source_z, double *source_q, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter = run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \ 20 | cluster_x, cluster_y, cluster_z, cluster_q) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 27 | 28 | double temporary_potential = 0.0; 29 | 30 | double cx = cluster_x[starting_index_of_cluster + i]; 31 | double cy = cluster_y[starting_index_of_cluster + i]; 32 | double cz = cluster_z[starting_index_of_cluster + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_sources_in_batch; j++) { 38 | #ifdef OPENACC_ENABLED 39 | #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 40 | source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 41 | source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 42 | source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) 43 | #endif 44 | 45 | int jj = starting_index_of_sources + j; 46 | double dx = cx - source_x[jj]; 47 | double dy = cy - source_y[jj]; 48 | double dz = cz - source_z[jj]; 49 | double r = sqrt(dx*dx + dy*dy + dz*dz); 50 | 51 | temporary_potential += source_q[jj] * exp(-kernel_parameter * r) / r; 52 | 53 | } // end loop over interpolation points 54 | #ifdef OPENACC_ENABLED 55 | #pragma acc atomic 56 | #endif 57 | cluster_q[starting_index_of_cluster + i] += temporary_potential; 58 | } 59 | #ifdef OPENACC_ENABLED 60 | } // end kernel 61 | #endif 62 | return; 63 | } 64 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_USER_KERNEL_CP_H 3 | #define H_K_USER_KERNEL_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_User_Kernel_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_USER_KERNEL_CP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel_pc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "user_kernel_pc.h" 7 | 8 | 9 | void K_User_Kernel_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_cluster, 11 | double *target_x, double *target_y, double *target_z, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter = run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 20 | cluster_x, cluster_y, cluster_z, cluster_charge, potential) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_targets_in_batch; i++) { 27 | 28 | double temporary_potential = 0.0; 29 | 30 | double tx = target_x[starting_index_of_target + i]; 31 | double ty = target_y[starting_index_of_target + i]; 32 | double tz = target_z[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) { 38 | 39 | double dx = tx - cluster_x[starting_index_of_cluster + j]; 40 | double dy = ty - cluster_y[starting_index_of_cluster + j]; 41 | double dz = tz - cluster_z[starting_index_of_cluster + j]; 42 | double r = sqrt(dx*dx + dy*dy + dz*dz); 43 | 44 | temporary_potential += cluster_charge[starting_index_of_cluster + j] * exp(-kernel_parameter * r) / r; 45 | 46 | } // end loop over interpolation points 47 | #ifdef OPENACC_ENABLED 48 | #pragma acc atomic 49 | #endif 50 | potential[starting_index_of_target + i] += temporary_potential; 51 | } 52 | #ifdef OPENACC_ENABLED 53 | } // end kernel 54 | #endif 55 | return; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_USER_KERNEL_PC_H 3 | #define H_K_USER_KERNEL_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_User_Kernel_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_USER_KERNEL_PC_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "user_kernel_pp.h" 7 | 8 | 9 | void K_User_Kernel_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter=run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 20 | source_x, source_y, source_z, source_charge, potential) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_targets_in_batch; i++) { 27 | 28 | double temporary_potential = 0.0; 29 | 30 | double tx = target_x[starting_index_of_target + i]; 31 | double ty = target_y[starting_index_of_target + i]; 32 | double tz = target_z[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 38 | 39 | double dx = tx - source_x[starting_index_of_source + j]; 40 | double dy = ty - source_y[starting_index_of_source + j]; 41 | double dz = tz - source_z[starting_index_of_source + j]; 42 | double r = sqrt(dx*dx + dy*dy + dz*dz); 43 | 44 | if (r > DBL_MIN) { 45 | temporary_potential += source_charge[starting_index_of_source + j] * exp(-kernel_parameter*r) / r; 46 | } 47 | } // end loop over interpolation points 48 | #ifdef OPENACC_ENABLED 49 | #pragma acc atomic 50 | #endif 51 | potential[starting_index_of_target + i] += temporary_potential; 52 | } 53 | #ifdef OPENACC_ENABLED 54 | } // end kernel 55 | #endif 56 | return; 57 | } 58 | -------------------------------------------------------------------------------- /src/kernels/user_kernel/user_kernel_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_USER_KERNEL_PP_H 3 | #define H_K_USER_KERNEL_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | void K_User_Kernel_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 8 | int starting_index_of_target, int starting_index_of_cluster, 9 | double *target_x, double *target_y, double *target_z, 10 | double *source_x, double *source_y, double *source_z, double *source_charge, 11 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 12 | 13 | 14 | #endif /* H_K_USER_KERNEL_PP_H */ 15 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_H 3 | #define H_K_YUKAWA_H 4 | 5 | #include "yukawa_pp.h" 6 | #include "yukawa_pc.h" 7 | #include "yukawa_cp.h" 8 | 9 | #include "yukawa_ss_pp.h" 10 | #include "yukawa_ss_correction.h" 11 | #include "yukawa_ss_pc.h" 12 | #include "yukawa_ss_cp.h" 13 | #include "yukawa_ss_cc.h" 14 | 15 | #endif /* H_K_YUKAWA_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_CP_H 3 | #define H_K_YUKAWA_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_sources, int starting_index_of_cluster, 16 | double *source_x, double *source_y, double *source_z, double *source_q, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_YUKAWA_CP_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_PC_H 3 | #define H_K_YUKAWA_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_Yukawa_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_YUKAWA_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "yukawa_pp.h" 7 | 8 | 9 | void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter=run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \ 20 | source_x, source_y, source_z, source_charge, potential) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_targets_in_batch; i++) { 27 | 28 | double temporary_potential = 0.0; 29 | 30 | double tx = target_x[starting_index_of_target + i]; 31 | double ty = target_y[starting_index_of_target + i]; 32 | double tz = target_z[starting_index_of_target + i]; 33 | 34 | #ifdef OPENACC_ENABLED 35 | #pragma acc loop independent reduction(+:temporary_potential) 36 | #endif 37 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 38 | 39 | double dx = tx - source_x[starting_index_of_source + j]; 40 | double dy = ty - source_y[starting_index_of_source + j]; 41 | double dz = tz - source_z[starting_index_of_source + j]; 42 | double r = sqrt(dx*dx + dy*dy + dz*dz); 43 | 44 | if (r > DBL_MIN) { 45 | temporary_potential += source_charge[starting_index_of_source + j] * exp(-kernel_parameter*r) / r; 46 | } 47 | } // end loop over interpolation points 48 | #ifdef OPENACC_ENABLED 49 | #pragma acc atomic 50 | #endif 51 | potential[starting_index_of_target + i] += temporary_potential; 52 | } 53 | #ifdef OPENACC_ENABLED 54 | } // end kernel 55 | #endif 56 | return; 57 | } 58 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_PP_H 3 | #define H_K_YUKAWA_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 8 | int starting_index_of_target, int starting_index_of_cluster, 9 | double *target_x, double *target_y, double *target_z, 10 | double *source_x, double *source_y, double *source_z, double *source_charge, 11 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 12 | 13 | 14 | #endif /* H_K_YUKAWA_PP_H */ 15 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_cc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "yukawa_ss_cc.h" 7 | 8 | 9 | void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_sources, int starting_index_of_cluster, 11 | double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, 12 | double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, double *target_cluster_w, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter = run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, source_cluster_w, \ 20 | target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, target_cluster_w) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 27 | 28 | double temporary_potential = 0.0; 29 | double temporary_weight = 0.0; 30 | 31 | double cx = target_cluster_x[starting_index_of_cluster + i]; 32 | double cy = target_cluster_y[starting_index_of_cluster + i]; 33 | double cz = target_cluster_z[starting_index_of_cluster + i]; 34 | 35 | #ifdef OPENACC_ENABLED 36 | #pragma acc loop independent reduction(+:temporary_potential,temporary_weight) 37 | #endif 38 | for (int j = 0; j < number_of_sources_in_batch; j++) { 39 | #ifdef OPENACC_ENABLED 40 | #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 41 | source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 42 | source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 43 | source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 44 | source_cluster_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) 45 | #endif 46 | 47 | int jj = starting_index_of_sources + j; 48 | double dx = cx - source_cluster_x[jj]; 49 | double dy = cy - source_cluster_y[jj]; 50 | double dz = cz - source_cluster_z[jj]; 51 | double r = sqrt(dx*dx + dy*dy + dz*dz); 52 | 53 | temporary_potential += source_cluster_q[jj] * exp(-kernel_parameter*r) /r; // source_cluster_q already has source_q * source_w 54 | temporary_weight += source_cluster_w[jj] * exp(-kernel_parameter*r) /r; 55 | 56 | } // end loop over interpolation points 57 | #ifdef OPENACC_ENABLED 58 | #pragma acc atomic 59 | #endif 60 | target_cluster_q[starting_index_of_cluster + i] += temporary_potential; 61 | #ifdef OPENACC_ENABLED 62 | #pragma acc atomic 63 | #endif 64 | target_cluster_w[starting_index_of_cluster + i] += temporary_weight; 65 | } 66 | #ifdef OPENACC_ENABLED 67 | } // end kernel 68 | #endif 69 | return; 70 | } 71 | 72 | 73 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_cc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_SS_CC_H 3 | #define H_K_YUKAWA_SS_CC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w, 11 | double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_charge, double *target_cluster_w, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_YUKAWA_SS_CC_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_correction.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "yukawa_ss_correction.h" 7 | 8 | 9 | void K_Yukawa_SS_Correction(double *potential, double *target_q, 10 | int numTargets, struct RunParams *run_params) 11 | { 12 | double kernel_parameter=run_params->kernel_params[0]; 13 | double param = 4.0 * M_PI / kernel_parameter / kernel_parameter; 14 | for (int i = 0; i < numTargets; i++){ 15 | potential[i] += param * target_q[i]; 16 | // printf("target_q[%i], target_w[%i] = %f, %f\n", i, i, potential[i], target_q[i]); 17 | } 18 | 19 | return; 20 | } 21 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_correction.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_SS_CORRECTION_H 3 | #define H_K_YUKAWA_SS_CORRECTION_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_SS_Correction(double *potential, double *target_q, 9 | int numTargets, struct RunParams *run_params); 10 | 11 | 12 | #endif /* H_K_YUKAWA_SS_CORRECTION_H */ 13 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_cp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "yukawa_ss_cp.h" 7 | 8 | 9 | void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 10 | int starting_index_of_sources, int starting_index_of_cluster, 11 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 12 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w, 13 | struct RunParams *run_params, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter = run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, source_w,\ 20 | cluster_x, cluster_y, cluster_z, cluster_q, cluster_w) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) { 27 | 28 | double temporary_potential = 0.0; 29 | double temporary_weight = 0.0; 30 | 31 | double cx = cluster_x[starting_index_of_cluster + i]; 32 | double cy = cluster_y[starting_index_of_cluster + i]; 33 | double cz = cluster_z[starting_index_of_cluster + i]; 34 | 35 | #ifdef OPENACC_ENABLED 36 | #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight) 37 | #endif 38 | for (int j = 0; j < number_of_sources_in_batch; j++) { 39 | #ifdef OPENACC_ENABLED 40 | #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 41 | source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 42 | source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 43 | source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \ 44 | source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch]) 45 | #endif 46 | 47 | int jj = starting_index_of_sources + j; 48 | double dx = cx - source_x[jj]; 49 | double dy = cy - source_y[jj]; 50 | double dz = cz - source_z[jj]; 51 | double r = sqrt(dx*dx + dy*dy + dz*dz); 52 | 53 | temporary_potential += source_q[jj] * source_w[jj] * exp(-kernel_parameter*r) /r; 54 | temporary_weight += source_w[jj] * exp(-kernel_parameter*r) /r; 55 | 56 | } // end loop over interpolation points 57 | #ifdef OPENACC_ENABLED 58 | #pragma acc atomic 59 | #endif 60 | cluster_q[starting_index_of_cluster + i] += temporary_potential; 61 | #ifdef OPENACC_ENABLED 62 | #pragma acc atomic 63 | #endif 64 | cluster_w[starting_index_of_cluster + i] += temporary_weight; 65 | } 66 | #ifdef OPENACC_ENABLED 67 | } // end kernel 68 | #endif 69 | return; 70 | } 71 | 72 | 73 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_cp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_SS_CP_H 3 | #define H_K_YUKAWA_SS_CP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_sources, int starting_index_of_cluster, 10 | double *source_x, double *source_y, double *source_z, double *source_q, double *source_w, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_w, 12 | struct RunParams *run_params, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_YUKAWA_SS_CP_H */ 16 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_pc.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_SS_PC_H 3 | #define H_K_YUKAWA_SS_PC_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_cluster, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight, 11 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | void K_Yukawa_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 15 | int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points, 16 | double *target_x, double *target_y, double *target_z, double *target_charge, 17 | double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight, 18 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 19 | 20 | 21 | #endif /* H_K_YUKAWA_SS_PC_H */ 22 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_pp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | #include "yukawa_ss_pp.h" 7 | 8 | 9 | void K_Yukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 10 | int starting_index_of_target, int starting_index_of_source, 11 | double *target_x, double *target_y, double *target_z, double *target_charge, 12 | double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight, 13 | struct RunParams *run_params, double *potential, int gpu_async_stream_id) 14 | { 15 | 16 | double kernel_parameter=run_params->kernel_params[0]; 17 | 18 | #ifdef OPENACC_ENABLED 19 | #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \ 20 | source_x, source_y, source_z, source_charge, source_weight, potential) 21 | { 22 | #endif 23 | #ifdef OPENACC_ENABLED 24 | #pragma acc loop independent 25 | #endif 26 | for (int i = 0; i < number_of_targets_in_batch; i++) { 27 | 28 | int ii = starting_index_of_target + i; 29 | double temporary_potential = 0.0; 30 | 31 | double tx = target_x[ii]; 32 | double ty = target_y[ii]; 33 | double tz = target_z[ii]; 34 | double tq = target_charge[ii]; 35 | 36 | #ifdef OPENACC_ENABLED 37 | #pragma acc loop independent reduction(+:temporary_potential) 38 | #endif 39 | for (int j = 0; j < number_of_source_points_in_cluster; j++) { 40 | 41 | int jj = starting_index_of_source + j; 42 | double dx = tx - source_x[jj]; 43 | double dy = ty - source_y[jj]; 44 | double dz = tz - source_z[jj]; 45 | double r = sqrt(dx*dx + dy*dy + dz*dz); 46 | 47 | if (r > DBL_MIN) { 48 | temporary_potential += (source_charge[jj] - tq) * source_weight[jj] * exp(-kernel_parameter*r) / r; 49 | } 50 | } // end loop over interpolation points 51 | #ifdef OPENACC_ENABLED 52 | #pragma acc atomic 53 | #endif 54 | potential[ii] += temporary_potential; 55 | } 56 | #ifdef OPENACC_ENABLED 57 | } // end kernel 58 | #endif 59 | return; 60 | } 61 | -------------------------------------------------------------------------------- /src/kernels/yukawa/yukawa_ss_pp.h: -------------------------------------------------------------------------------- 1 | /* Interaction Kernels */ 2 | #ifndef H_K_YUKAWA_SS_PP_H 3 | #define H_K_YUKAWA_SS_PP_H 4 | 5 | #include "../../run_params/struct_run_params.h" 6 | 7 | 8 | void K_Yukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster, 9 | int starting_index_of_target, int starting_index_of_source, 10 | double *target_x, double *target_y, double *target_z, double *target_charge, 11 | double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight, 12 | struct RunParams *run_params, double *potential, int gpu_async_stream_id); 13 | 14 | 15 | #endif /* H_K_YUKAWA_SS_PP_H */ 16 | -------------------------------------------------------------------------------- /src/particles/particles.h: -------------------------------------------------------------------------------- 1 | #ifndef H_PARTICLE_FUNCTIONS_H 2 | #define H_PARTICLE_FUNCTIONS_H 3 | 4 | #include "../run_params/struct_run_params.h" 5 | #include "struct_particles.h" 6 | 7 | void Particles_Alloc(struct Particles **particles_addr, int length); 8 | 9 | void Particles_Free(struct Particles **particles_addr); 10 | 11 | void Particles_Targets_Reorder(struct Particles *targets, double *potential); 12 | 13 | void Particles_Sources_Reorder(struct Particles *sources); 14 | 15 | void Particles_ConstructOrder(struct Particles *particles); 16 | 17 | void Particles_FreeOrder(struct Particles *particles); 18 | 19 | void Particles_Validate(struct Particles *sources, struct Particles *targets, 20 | struct RunParams *run_params); 21 | 22 | 23 | #endif /* H_PARTICLE_FUNCTIONS */ 24 | -------------------------------------------------------------------------------- /src/particles/struct_particles.h: -------------------------------------------------------------------------------- 1 | #ifndef H_PARTICLES_H 2 | #define H_PARTICLES_H 3 | 4 | /* declaration of struct with tag particles */ 5 | struct Particles 6 | { 7 | int num; 8 | double *x; 9 | double *y; 10 | double *z; 11 | double *q; 12 | // quadrature weights. Set = 1 if interacting particles, not performing convolution integral. 13 | double *w; 14 | 15 | int *ibeg; 16 | int *iend; 17 | 18 | int *order; 19 | 20 | int sources_w_dummy; 21 | int targets_q_dummy; 22 | 23 | int targets_x_duplicate; 24 | int targets_y_duplicate; 25 | int targets_z_duplicate; 26 | int targets_q_duplicate; 27 | }; 28 | 29 | #endif /* H_PARTICLES_H */ 30 | -------------------------------------------------------------------------------- /src/run_params/run_params.h: -------------------------------------------------------------------------------- 1 | #ifndef H_RUN_PARAMS_FUNCTIONS_H 2 | #define H_RUN_PARAMS_FUNCTIONS_H 3 | 4 | #include "../utilities/enums.h" 5 | #include "struct_run_params.h" 6 | 7 | 8 | void RunParams_Setup(struct RunParams **run_params_addr, 9 | KERNEL kernel, int num_kernel_params, double *kernel_params, 10 | APPROXIMATION approximation, 11 | SINGULARITY singularity, 12 | COMPUTE_TYPE compute_type, 13 | double theta, int interp_degree, 14 | int max_per_source_leaf, int max_per_target_leaf, double size_check_factor, 15 | double beta, int verbosity); 16 | 17 | void RunParams_Validate(struct RunParams *run_params); 18 | 19 | void RunParams_Free(struct RunParams **run_params_addr); 20 | 21 | void RunParams_Print(struct RunParams *run_params); 22 | 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/run_params/struct_run_params.h: -------------------------------------------------------------------------------- 1 | #ifndef H_RUN_PARAMS_H 2 | #define H_RUN_PARAMS_H 3 | 4 | #include "../utilities/enums.h" 5 | 6 | 7 | struct RunParams 8 | { 9 | KERNEL kernel; 10 | int num_kernel_params; 11 | double *kernel_params; 12 | 13 | APPROXIMATION approximation; 14 | SINGULARITY singularity; 15 | COMPUTE_TYPE compute_type; 16 | 17 | double theta; 18 | double size_check_factor; 19 | 20 | int interp_degree; 21 | int interp_pts_per_cluster; 22 | int interp_charges_per_cluster; 23 | int interp_weights_per_cluster; 24 | 25 | int max_per_source_leaf; 26 | int max_per_target_leaf; 27 | 28 | double beta; 29 | 30 | int verbosity; 31 | }; 32 | 33 | 34 | #endif /* H_RUN_PARAMS_H */ 35 | -------------------------------------------------------------------------------- /src/tree/batches.h: -------------------------------------------------------------------------------- 1 | #ifndef H_BATCH_FUNCTIONS_H 2 | #define H_BATCH_FUNCTIONS_H 3 | 4 | #include "../particles/struct_particles.h" 5 | #include "../run_params/struct_run_params.h" 6 | 7 | #include "struct_tree.h" 8 | 9 | 10 | void Batches_Sources_Construct(struct Tree **batches_addr, struct Particles *sources, 11 | struct RunParams *run_params); 12 | 13 | void Batches_Targets_Construct(struct Tree **batches_addr, struct Particles *targets, 14 | struct RunParams *run_params); 15 | 16 | void Batches_Alloc(struct Tree **batches_addr, int length); 17 | 18 | void Batches_Free(struct Tree **batches_addr); 19 | 20 | void Batches_Free_Win(struct Tree **batches_addr); 21 | 22 | void Batches_Print(struct Tree *batches); 23 | 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/tree/partition.h: -------------------------------------------------------------------------------- 1 | #ifndef H_PARTITION_H 2 | #define H_PARTITION_H 3 | 4 | /* 5 | * declaration of partition functions 6 | * 7 | * partition determines the index MIDIND, after partitioning in place the arrays a, b, c, 8 | * and q, such that a(ibeg:midind) <= val and a(midind+1:iend) > val. If on entry, ibeg > 9 | * iend, or a(ibeg:iend) > val then midind is returned as ibeg-1. 10 | */ 11 | 12 | void cp_partition(double *a, double *b, double *c, double *d, int *indarr, 13 | int ibeg, int iend, double val, int *midind); 14 | 15 | void pc_partition(double *a, double *b, double *c, double *d, double *w, int *indarr, 16 | int ibeg, int iend, double val, int *midind); 17 | 18 | void pc_partition_8(double *x, double *y, double *z, double *q, double *w, int *indarr, 19 | double xyzmms[6][8], double xl, double yl, double zl, 20 | int *numposchild, int max_num_children, 21 | double x_mid, double y_mid, double z_mid, 22 | int ind[8][2]); 23 | 24 | void cp_partition_8(double *x, double *y, double *z, double *q, int *indarr, 25 | double xyzmms[6][8], double xl, double yl, double zl, 26 | int *numposchild, int max_num_children, 27 | double x_mid, double y_mid, double z_mid, 28 | int ind[8][2]); 29 | 30 | 31 | #endif /* H_PARTITION_H */ 32 | -------------------------------------------------------------------------------- /src/tree/struct_tree.h: -------------------------------------------------------------------------------- 1 | #ifndef H_STRUCT_TREE_H 2 | #define H_STRUCT_TREE_H 3 | 4 | struct Tree 5 | { 6 | int numnodes; 7 | int numleaves; 8 | 9 | int min_leaf_size; 10 | int max_leaf_size; 11 | int max_depth; 12 | 13 | int *ibeg; 14 | int *iend; 15 | int *numpar; 16 | 17 | int *cluster_ind; 18 | 19 | double *radius; 20 | 21 | double *x_mid; 22 | double *y_mid; 23 | double *z_mid; 24 | 25 | double *x_min; 26 | double *y_min; 27 | double *z_min; 28 | 29 | double *x_max; 30 | double *y_max; 31 | double *z_max; 32 | 33 | int *num_children; 34 | int *children; 35 | int *parent; 36 | 37 | int **levels_list; 38 | int *levels_list_num; 39 | 40 | int *leaves_list; 41 | int leaves_list_num; 42 | 43 | }; 44 | 45 | #endif /* H_STRUCT_TREE_H */ 46 | -------------------------------------------------------------------------------- /src/tree/struct_tree_linked_list_node.h: -------------------------------------------------------------------------------- 1 | #ifndef H_STRUCT_TREE_LINKED_LIST_NODE_H 2 | #define H_STRUCT_TREE_LINKED_LIST_NODE_H 3 | 4 | struct TreeLinkedListNode 5 | { 6 | int numpar, ibeg, iend; 7 | 8 | double x_min, y_min, z_min; 9 | double x_max, y_max, z_max; 10 | double x_mid, y_mid, z_mid; 11 | 12 | double radius, aspect; 13 | 14 | int num_children; 15 | struct TreeLinkedListNode *child[8]; 16 | struct TreeLinkedListNode *parent; 17 | 18 | int node_index; 19 | 20 | int level; 21 | }; 22 | 23 | #endif /* H_STRUCT_TREE_LINKED_LIST_NODE_H */ 24 | -------------------------------------------------------------------------------- /src/tree/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef H_TREE_FUNCTIONS_H 2 | #define H_TREE_FUNCTIONS_H 3 | 4 | #include "../particles/struct_particles.h" 5 | #include "../run_params/struct_run_params.h" 6 | 7 | #include "struct_tree_linked_list_node.h" 8 | #include "struct_tree.h" 9 | 10 | 11 | void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, struct RunParams *run_params); 12 | 13 | void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, struct RunParams *run_params); 14 | 15 | void Tree_Set_Leaves_and_Levels(struct Tree *tree); 16 | 17 | void Tree_Fill_Levels(struct Tree *tree, int idx, int level, int *sizeof_levels_list, int *sizeof_leaves_list); 18 | 19 | void Tree_Alloc(struct Tree **tree_addr, int length); 20 | 21 | void Tree_Free(struct Tree **tree_addr); 22 | 23 | void Tree_Fill(struct Tree *tree, struct TreeLinkedListNode *p); 24 | 25 | void Tree_Print(struct Tree *tree); 26 | 27 | 28 | #endif /* H_TREEFUNCTIONS_H */ 29 | -------------------------------------------------------------------------------- /src/tree/tree_linked_list.h: -------------------------------------------------------------------------------- 1 | #ifndef H_TREE_LINKED_LIST_FUNCTIONS_H 2 | #define H_TREE_LINKED_LIST_FUNCTIONS_H 3 | 4 | #include "../particles/struct_particles.h" 5 | #include "struct_tree_linked_list_node.h" 6 | 7 | 8 | void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, 9 | struct Particles *targets, 10 | int ibeg, int iend, int maxparnode, double *xyzmm, int *numnodes, int *numleaves, 11 | int *min_leaf_size, int *max_leaf_size, int *max_depth, int current_level); 12 | 13 | void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent, 14 | struct Particles *sources, 15 | int ibeg, int iend, int maxparnode, double *xyzmm, 16 | int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size, 17 | int *max_depth, int current_level); 18 | 19 | int TreeLinkedList_SetIndex(struct TreeLinkedListNode *p, int index); 20 | 21 | void TreeLinkedList_Free(struct TreeLinkedListNode **p_addr); 22 | 23 | 24 | #endif /* H_TREE_LINKED_LIST_FUNCTIONS */ 25 | -------------------------------------------------------------------------------- /src/utilities/advanced_timings.h: -------------------------------------------------------------------------------- 1 | #ifndef H_ADVANCED_TIMINGS_H 2 | #define H_ADVANCED_TIMINGS_H 3 | 4 | #include 5 | 6 | #include "../run_params/struct_run_params.h" 7 | 8 | 9 | void Timing_Calculate(double time_tree_glob[3][13], double time_tree[13], double total_time_glob[1], double total_time[1]); 10 | 11 | void Timing_Print(double time_tree_glob[3][13], double total_time_glob[1], struct RunParams *run_params); 12 | 13 | 14 | #endif /* H_ADVANCED_TIMINGS_H */ 15 | -------------------------------------------------------------------------------- /src/utilities/array.h: -------------------------------------------------------------------------------- 1 | /* 2 | * C header file containing macros for vector and array creation 3 | * using the xmalloc.c memory allocation routine 4 | * 5 | * This C code written by: 6 | * Leighton Wilson, University of Michigan, Ann Arbor, MI 7 | * 8 | * Based on the work of Rouben Rostamian, presented in 9 | * "Programming Projects in C for Students of Engineering, 10 | * Science, and Mathematics" 11 | * 12 | * Last modified by Leighton Wilson, 06/23/2016 13 | */ 14 | 15 | 16 | #ifndef H_ARRAY_H 17 | #define H_ARRAY_H 18 | #include "xmalloc.h" 19 | 20 | #define make_vector(v,n) ((v) = xmalloc((n) * sizeof *(v))) 21 | 22 | #define free_vector(v) do { free(v); v = NULL; } while (0) 23 | 24 | #define realloc_vector(v,n) ((v) = realloc(v, (n) * sizeof *(v))) 25 | 26 | 27 | #define make_matrix(a, m, n) do { \ 28 | size_t make_matrix_loop_counter; \ 29 | make_vector(a, (m) + 1); \ 30 | for (make_matrix_loop_counter = 0; \ 31 | make_matrix_loop_counter < (size_t)(m); \ 32 | make_matrix_loop_counter++) \ 33 | make_vector((a)[make_matrix_loop_counter], (n)); \ 34 | (a)[m] = NULL; \ 35 | } while (0) 36 | 37 | #define free_matrix(a) do { \ 38 | if (a != NULL) { \ 39 | size_t make_matrix_loop_counter; \ 40 | for (make_matrix_loop_counter = 0; \ 41 | (a)[make_matrix_loop_counter] != NULL; \ 42 | make_matrix_loop_counter++) \ 43 | free_vector((a)[make_matrix_loop_counter]); \ 44 | free_vector(a); \ 45 | a = NULL; \ 46 | } \ 47 | } while (0) 48 | 49 | #define make_3array(a, l, m, n) do { \ 50 | size_t make_3array_loop_counter; \ 51 | make_vector(a, (l) + 1); \ 52 | for (make_3array_loop_counter = 0; \ 53 | make_3array_loop_counter < (size_t)(l); \ 54 | make_3array_loop_counter++) \ 55 | make_matrix((a)[make_3array_loop_counter], (m), (n)); \ 56 | (a)[l] = NULL; \ 57 | } while (0) 58 | 59 | #define free_3array(a) do { \ 60 | if (a != NULL) { \ 61 | size_t make_3array_loop_counter; \ 62 | for (make_3array_loop_counter = 0; \ 63 | (a)[make_3array_loop_counter] != NULL; \ 64 | make_3array_loop_counter++) \ 65 | free_matrix((a)[make_3array_loop_counter]); \ 66 | free_vector(a); \ 67 | a = NULL; \ 68 | } \ 69 | } while (0) 70 | 71 | #define print_vector(fmt, v, n) do { \ 72 | size_t print_vector_loop_counter; \ 73 | for (print_vector_loop_counter = 0; \ 74 | print_vector_loop_counter < (size_t)(n); \ 75 | print_vector_loop_counter++) \ 76 | printf(fmt, (v)[print_vector_loop_counter]); \ 77 | putchar('\n'); \ 78 | } while (0) 79 | 80 | #define print_matrix(fmt, a, m, n) do { \ 81 | size_t print_matrix_loop_counter; \ 82 | for (print_matrix_loop_counter = 0; \ 83 | print_matrix_loop_counter < (size_t)(m); \ 84 | print_matrix_loop_counter++) \ 85 | print_vector(fmt, (a)[print_matrix_loop_counter], (n)); \ 86 | } while (0) 87 | 88 | 89 | 90 | #endif /*H_ARRAY_H*/ 91 | -------------------------------------------------------------------------------- /src/utilities/enums.h: -------------------------------------------------------------------------------- 1 | #ifndef H_BARYTREE_TYPES_H 2 | #define H_BARYTREE_TYPES_H 3 | 4 | 5 | typedef enum KERNEL 6 | { 7 | NO_KERNEL, 8 | COULOMB, 9 | YUKAWA, 10 | REGULARIZED_COULOMB, 11 | REGULARIZED_YUKAWA, 12 | ATAN, 13 | TCF, 14 | DCF, 15 | SIN_OVER_R, 16 | MQ, 17 | RBS_U, 18 | RBS_V, 19 | USER 20 | } KERNEL; 21 | 22 | 23 | typedef enum SINGULARITY 24 | { 25 | NO_SINGULARITY, 26 | SKIPPING, 27 | SUBTRACTION 28 | } SINGULARITY; 29 | 30 | 31 | typedef enum APPROXIMATION 32 | { 33 | NO_APPROX, 34 | LAGRANGE, 35 | HERMITE 36 | } APPROXIMATION; 37 | 38 | 39 | typedef enum COMPUTE_TYPE 40 | { 41 | NO_COMPUTE_TYPE, 42 | PARTICLE_CLUSTER, 43 | CLUSTER_PARTICLE, 44 | CLUSTER_CLUSTER, 45 | } COMPUTE_TYPE; 46 | 47 | 48 | #endif /* H_BARYTREE_TYPES_H */ 49 | -------------------------------------------------------------------------------- /src/utilities/timers.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | void START_TIMER(double *time) 5 | { 6 | *time = MPI_Wtime(); 7 | 8 | return; 9 | } 10 | 11 | 12 | void STOP_TIMER(double *time) 13 | { 14 | *time = MPI_Wtime() - *time; 15 | 16 | return; 17 | } 18 | -------------------------------------------------------------------------------- /src/utilities/timers.h: -------------------------------------------------------------------------------- 1 | #ifndef H_TIMERS_H 2 | #define H_TIMERS_H 3 | 4 | void START_TIMER(double *time); 5 | 6 | void STOP_TIMER(double *time); 7 | 8 | #endif /* H_TIMERS_H */ 9 | -------------------------------------------------------------------------------- /src/utilities/tools.c: -------------------------------------------------------------------------------- 1 | /* tool functions for use by treecode routines */ 2 | #include 3 | #include "array.h" 4 | 5 | 6 | double minval(double *x, int numels) 7 | { 8 | double min = x[0]; 9 | 10 | for (int i = 1; i < numels; ++i) { 11 | if (min > x[i]) 12 | min = x[i]; 13 | } 14 | 15 | return min; 16 | } 17 | 18 | 19 | 20 | double maxval(double *x, int numels) 21 | { 22 | double max = x[0]; 23 | 24 | for (int i = 1; i < numels; ++i) { 25 | if (max < x[i]) 26 | max = x[i]; 27 | } 28 | 29 | return max; 30 | } 31 | 32 | 33 | 34 | double sum(double *x, int numels) 35 | { 36 | double sum = 0.0; 37 | 38 | for (int i = 0; i < numels; ++i) 39 | sum += x[i]; 40 | 41 | return sum; 42 | } 43 | 44 | 45 | 46 | int sum_int(int *x, int numels) 47 | { 48 | int sum = 0.0; 49 | 50 | for (int i = 0; i < numels; ++i) 51 | sum += x[i]; 52 | 53 | return sum; 54 | } 55 | 56 | 57 | 58 | double max3(double a, double b, double c) 59 | { 60 | double max = a; 61 | 62 | if (max < b) max = b; 63 | if (max < c) max = c; 64 | 65 | return max; 66 | } 67 | 68 | 69 | 70 | double min3(double a, double b, double c) 71 | { 72 | double min = a; 73 | 74 | if (min > b) min = b; 75 | if (min > c) min = c; 76 | 77 | return min; 78 | } 79 | 80 | 81 | 82 | int maxval_int(int *x, int numels) 83 | { 84 | int max = x[0]; 85 | 86 | for (int i = 1; i < numels; i++) { 87 | if (max < x[i]) 88 | max = x[i]; 89 | } 90 | 91 | return max; 92 | } 93 | -------------------------------------------------------------------------------- /src/utilities/tools.h: -------------------------------------------------------------------------------- 1 | /* tool functions for use by treecode routines */ 2 | #ifndef H_TOOLS_H 3 | #define H_TOOLS_H 4 | 5 | double minval(double *x, int numels); 6 | double maxval(double *x, int numels); 7 | int maxval_int(int *x, int numels); 8 | 9 | double sum(double *x, int numels); 10 | int sum_int(int *x, int numels); 11 | 12 | double max3(double a, double b, double c); 13 | double min3(double a, double b, double c); 14 | 15 | #endif /* H_TOOLS_H */ 16 | -------------------------------------------------------------------------------- /src/utilities/xmalloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * C code for a safe memory allocation routine 3 | * 4 | * This C code written by: 5 | * Leighton Wilson, University of Michigan, Ann Arbor, MI 6 | * 7 | * Based on the work of Rouben Rostamian, presented in 8 | * "Programming Projects in C for Students of Engineering, 9 | * Science, and Mathematics" 10 | * 11 | * Last modified by Leighton Wilson, 06/23/2016 12 | */ 13 | 14 | #include 15 | #include "xmalloc.h" 16 | 17 | void *malloc_or_exit(size_t nbytes, const char *file, int line) 18 | { 19 | void *x; 20 | if ((x = malloc(nbytes)) == NULL || nbytes == 0) 21 | { 22 | fprintf(stderr, "%s: line %d: malloc() of %zu bytes failed\n", 23 | file, line, nbytes); 24 | exit(EXIT_FAILURE); 25 | } 26 | else 27 | return x; 28 | } 29 | -------------------------------------------------------------------------------- /src/utilities/xmalloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * C header file for xmalloc.c memory allocation routine 3 | * 4 | * This C code written by: 5 | * Leighton Wilson, University of Michigan, Ann Arbor, MI 6 | * 7 | * Based on the work of Rouben Rostamian, presented in 8 | * "Programming Projects in C for Students of Engineering, 9 | * Science, and Mathematics" 10 | * 11 | * Last modified by Leighton Wilson, 06/23/2016 12 | */ 13 | 14 | #ifndef H_XMALLOC_H 15 | #define H_XMALLOC_H 16 | #include 17 | void *malloc_or_exit(size_t nbytes, const char *file, int line); 18 | #define xmalloc(nbytes) malloc_or_exit((nbytes), __FILE__, __LINE__) 19 | #endif /*H_XMALLOC_H*/ 20 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | /locally_run_tests.sh 2 | /queue_all_tests.sh 3 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(tests_cpu serial_tests.c) 2 | target_link_libraries(tests_cpu PRIVATE BaryTree_cpu) 3 | 4 | add_test(NAME direct_sum_on_10_particles COMMAND tests_cpu 0) 5 | add_test(NAME treecode_on_100_particles COMMAND tests_cpu 1) 6 | add_test(NAME treecode_on_1_target_10000_sources COMMAND tests_cpu 2) 7 | add_test(NAME treecode_parameters_on_1_target_10000_sources COMMAND tests_cpu 3) 8 | add_test(NAME test_treecode_wrapper COMMAND tests_cpu 4) 9 | add_test(NAME test_BLDTT COMMAND tests_cpu 5) -------------------------------------------------------------------------------- /tests/minunit.h: -------------------------------------------------------------------------------- 1 | /* file: minunit.h */ 2 | #define mu_assert(message, test) do { if (!(test)) return message; } while (0) 3 | #define mu_run_test(test) do { char *message = test(); tests_run++; \ 4 | if (message) return message; } while (0) 5 | extern int tests_run; 6 | --------------------------------------------------------------------------------