├── .gitignore
├── CMakeLists.txt
├── INSTALL.md
├── LICENSE
├── README.md
├── examples
    ├── CMakeLists.txt
    ├── README.md
    ├── example.in
    ├── random_cube.c
    ├── random_cube_reproducible.c
    ├── run_readin.c
    ├── support_fns.c
    ├── support_fns.h
    ├── test_BaryTreeInterface.c
    ├── zoltan_fns.c
    └── zoltan_fns.h
├── interfaces
    ├── README.md
    ├── fortran
    │   ├── BaryTreeInterface.fh
    │   └── example_interface.f03
    └── python
    │   ├── BaryTreeInterface.py
    │   └── testBaryTreeInterface.py
├── src
    ├── CMakeLists.txt
    ├── clusters
    │   ├── clusters.c
    │   ├── clusters.h
    │   └── struct_clusters.h
    ├── comm_cp
    │   ├── comm_cp.c
    │   └── comm_cp.h
    ├── comm_types
    │   ├── comm_types.c
    │   ├── comm_types.h
    │   └── struct_comm_types.h
    ├── comm_windows
    │   ├── comm_windows.c
    │   ├── comm_windows.h
    │   └── struct_comm_windows.h
    ├── drivers
    │   ├── directdriver.c
    │   ├── directdriver.h
    │   ├── treedriver.c
    │   └── treedriver.h
    ├── interaction_compute
    │   ├── interaction_compute.h
    │   ├── interaction_compute_cc.c
    │   ├── interaction_compute_correction.c
    │   ├── interaction_compute_cp.c
    │   ├── interaction_compute_direct.c
    │   ├── interaction_compute_downpass.c
    │   └── interaction_compute_pc.c
    ├── interaction_lists
    │   ├── interaction_lists.c
    │   ├── interaction_lists.h
    │   └── struct_interaction_lists.h
    ├── interface
    │   ├── BaryTreeInterface.c
    │   └── BaryTreeInterface.h
    ├── kernels
    │   ├── README.md
    │   ├── atan
    │   │   ├── atan.h
    │   │   ├── atan_pc.c
    │   │   ├── atan_pc.h
    │   │   ├── atan_pp.c
    │   │   └── atan_pp.h
    │   ├── coulomb
    │   │   ├── coulomb.h
    │   │   ├── coulomb_cc_hermite.c
    │   │   ├── coulomb_cc_hermite.h
    │   │   ├── coulomb_cp.c
    │   │   ├── coulomb_cp.h
    │   │   ├── coulomb_pc.c
    │   │   ├── coulomb_pc.h
    │   │   ├── coulomb_pp.c
    │   │   ├── coulomb_pp.h
    │   │   ├── coulomb_ss_cc.c
    │   │   ├── coulomb_ss_cc.h
    │   │   ├── coulomb_ss_correction.c
    │   │   ├── coulomb_ss_correction.h
    │   │   ├── coulomb_ss_cp.c
    │   │   ├── coulomb_ss_cp.h
    │   │   ├── coulomb_ss_pc.c
    │   │   ├── coulomb_ss_pc.h
    │   │   ├── coulomb_ss_pp.c
    │   │   └── coulomb_ss_pp.h
    │   ├── dcf
    │   │   ├── dcf.c
    │   │   └── dcf.h
    │   ├── mq
    │   │   ├── mq.h
    │   │   ├── mq_pc.c
    │   │   ├── mq_pc.h
    │   │   ├── mq_pp.c
    │   │   └── mq_pp.h
    │   ├── rbs-u
    │   │   ├── rbs-u.h
    │   │   ├── rbs-u_cp.c
    │   │   ├── rbs-u_cp.h
    │   │   ├── rbs-u_pc.c
    │   │   ├── rbs-u_pc.h
    │   │   ├── rbs-u_pp.c
    │   │   └── rbs-u_pp.h
    │   ├── rbs-v
    │   │   ├── rbs-v.h
    │   │   ├── rbs-v_cp.c
    │   │   ├── rbs-v_cp.h
    │   │   ├── rbs-v_pc.c
    │   │   ├── rbs-v_pc.h
    │   │   ├── rbs-v_pp.c
    │   │   └── rbs-v_pp.h
    │   ├── regularized-coulomb
    │   │   ├── regularized-coulomb.h
    │   │   ├── regularized-coulomb_cp.c
    │   │   ├── regularized-coulomb_cp.h
    │   │   ├── regularized-coulomb_pc.c
    │   │   ├── regularized-coulomb_pc.h
    │   │   ├── regularized-coulomb_pp.c
    │   │   ├── regularized-coulomb_pp.h
    │   │   ├── regularized-coulomb_ss_correction.c
    │   │   ├── regularized-coulomb_ss_correction.h
    │   │   ├── regularized-coulomb_ss_pc.c
    │   │   ├── regularized-coulomb_ss_pc.h
    │   │   ├── regularized-coulomb_ss_pp.c
    │   │   └── regularized-coulomb_ss_pp.h
    │   ├── regularized-yukawa
    │   │   ├── regularized-yukawa.h
    │   │   ├── regularized-yukawa_cp.c
    │   │   ├── regularized-yukawa_cp.h
    │   │   ├── regularized-yukawa_pc.c
    │   │   ├── regularized-yukawa_pc.h
    │   │   ├── regularized-yukawa_pp.c
    │   │   ├── regularized-yukawa_pp.h
    │   │   ├── regularized-yukawa_ss_correction.c
    │   │   ├── regularized-yukawa_ss_correction.h
    │   │   ├── regularized-yukawa_ss_pc.c
    │   │   ├── regularized-yukawa_ss_pc.h
    │   │   ├── regularized-yukawa_ss_pp.c
    │   │   └── regularized-yukawa_ss_pp.h
    │   ├── sin-over-r
    │   │   ├── sin-over-r.h
    │   │   ├── sin-over-r_cp.c
    │   │   ├── sin-over-r_cp.h
    │   │   ├── sin-over-r_pc.c
    │   │   ├── sin-over-r_pc.h
    │   │   ├── sin-over-r_pp.c
    │   │   └── sin-over-r_pp.h
    │   ├── tcf
    │   │   ├── tcf.c
    │   │   └── tcf.h
    │   ├── user_kernel
    │   │   ├── user_kernel.h
    │   │   ├── user_kernel_cp.c
    │   │   ├── user_kernel_cp.h
    │   │   ├── user_kernel_pc.c
    │   │   ├── user_kernel_pc.h
    │   │   ├── user_kernel_pp.c
    │   │   └── user_kernel_pp.h
    │   └── yukawa
    │   │   ├── yukawa.h
    │   │   ├── yukawa_cp.c
    │   │   ├── yukawa_cp.h
    │   │   ├── yukawa_pc.c
    │   │   ├── yukawa_pc.h
    │   │   ├── yukawa_pp.c
    │   │   ├── yukawa_pp.h
    │   │   ├── yukawa_ss_cc.c
    │   │   ├── yukawa_ss_cc.h
    │   │   ├── yukawa_ss_correction.c
    │   │   ├── yukawa_ss_correction.h
    │   │   ├── yukawa_ss_cp.c
    │   │   ├── yukawa_ss_cp.h
    │   │   ├── yukawa_ss_pc.c
    │   │   ├── yukawa_ss_pc.h
    │   │   ├── yukawa_ss_pp.c
    │   │   └── yukawa_ss_pp.h
    ├── particles
    │   ├── particles.c
    │   ├── particles.h
    │   └── struct_particles.h
    ├── run_params
    │   ├── run_params.c
    │   ├── run_params.h
    │   └── struct_run_params.h
    ├── tree
    │   ├── batches.c
    │   ├── batches.h
    │   ├── partition.c
    │   ├── partition.h
    │   ├── struct_tree.h
    │   ├── struct_tree_linked_list_node.h
    │   ├── tree.c
    │   ├── tree.h
    │   ├── tree_linked_list.c
    │   └── tree_linked_list.h
    └── utilities
    │   ├── advanced_timings.c
    │   ├── advanced_timings.h
    │   ├── array.h
    │   ├── enums.h
    │   ├── timers.c
    │   ├── timers.h
    │   ├── tools.c
    │   ├── tools.h
    │   ├── xmalloc.c
    │   └── xmalloc.h
└── tests
    ├── .gitignore
    ├── CMakeLists.txt
    ├── minunit.h
    └── serial_tests.c


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | */.DS_Store
 3 | *.ptp*
 4 | *.cproject
 5 | *org.eclipse*
 6 | *.settings*
 7 | *.tsv
 8 | *.csv
 9 | *.xml
10 | *.prefs
11 | build/
12 | .project
13 | .cproject
14 | .pydevproject
15 | interfaces/python/__pycache__


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.9...3.14)
 2 | project(BaryTree VERSION 1.0 LANGUAGES C)
 3 | 
 4 | set(default_build_type "Release")
 5 | 
 6 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 7 |     message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
 8 |     
 9 |     set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
10 |         STRING "Choose the type of build." FORCE)
11 | 
12 |     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
13 |                 "Debug" "Release" "None")
14 | endif()
15 |  
16 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
17 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
18 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
19 | 
20 | option(BUILD_EXAMPLES "build examples with Zoltan"              ON)
21 | option(ENABLE_GPU_BUILD "build OpenACC GPU version"             ON)
22 | option(BUILD_SHARED_LIBS "build libraries as shared objects"	ON)
23 | 
24 | find_package(MPI    REQUIRED)
25 | find_package(OpenMP REQUIRED)
26 | 
27 | if(BUILD_EXAMPLES OR BUILD_TESTS)
28 |     find_package(Zoltan REQUIRED)
29 |     add_library(Zoltan_Interface INTERFACE)
30 |     target_link_libraries(Zoltan_Interface INTERFACE zoltan)
31 |     target_include_directories(Zoltan_Interface INTERFACE ${Zoltan_INCLUDE_DIRS})
32 | endif()
33 | 
34 | #Some PGI versions seem to require a link flag for OpenMP
35 | set_property(TARGET OpenMP::OpenMP_C APPEND
36 |                                      PROPERTY
37 |                                          INTERFACE_LINK_LIBRARIES
38 |                                          $<$<C_COMPILER_ID:PGI>:-mp>) 
39 | 
40 | #Creating an interface for OpenACC
41 | add_library(OpenACC_C INTERFACE)
42 | set_property(TARGET OpenACC_C        PROPERTY
43 |                                          INTERFACE_COMPILE_OPTIONS
44 |                                          -acc
45 |                                          -ta=tesla:ccall,cc60$<$<CONFIG:Debug>:,time>
46 |                                          -Minfo)
47 | set_property(TARGET OpenACC_C        PROPERTY
48 |                                          INTERFACE_LINK_LIBRARIES
49 |                                          -acc
50 |                                          -ta=tesla:ccall,cc60)
51 | 
52 | if(ENABLE_GPU_BUILD AND NOT (CMAKE_C_COMPILER_ID MATCHES PGI))
53 |     message(STATUS "GPU build requires PGI compilers! Turning GPU build off.")
54 |     set(ENABLE_GPU_BUILD OFF CACHE BOOL "build OpenACC GPU version" FORCE)
55 | endif()
56 | 
57 | enable_testing()
58 | add_subdirectory(src)
59 | add_subdirectory(examples)
60 | add_subdirectory(tests)
61 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | Installing
 2 | ----------
 3 | 
 4 | In a best case scenario, building and installing the libraries and examples should be as simple as this:
 5 | 
 6 |     mkdir build; cd build; export CC=<C compiler>;
 7 |     cmake .. -DCMAKE_INSTALL_PREFIX=<install location>;
 8 |     make -j install;
 9 | 
10 | This assumes that you have a few things:
11 | 1. a sane C compiler,
12 | 2. a sane MPI installation that agrees with your C compiler,
13 | 3. CMake version 3.9 or newer,
14 | 4. an installed Trilinos Zoltan library (for building examples).
15 | 
16 | If you don't have an install of Zoltan, then you must turn off building of
17 | examples with `-DBUILD_EXAMPLES=OFF`.
18 | 
19 | Compiling GPU versions requires that a PGI C compiler be used. If another compiler
20 | other than pgcc is used, for instance gcc or icc, support for building GPU versions
21 | will be automatically turned off during configuration.
22 | 
23 | CMake Flags
24 | -----------
25 | The most useful CMake flags to use during configure are listed below. When passing a flag
26 | to `cmake` during configure, recall that it takes the form `-D<flag>=value`.
27 | | Flag                   | Option/ Value                | Description
28 | |------------------------|------------------------------|------------
29 | | `CMAKE_RELEASE_TYPE`   | Debug, Release               | Build either the Debug or Release version.
30 | | `ENABLE_GPU_BUILD`     | ON, OFF                      | Toggle whether to build the GPU versions.
31 | | `CMAKE_INSTALL_PREFIX` | `<where to install>`         | Specify install location for `make install`.
32 | | `BUILD_EXAMPLES`       | ON, OFF                      | Toggle whether to build examples (requires Zoltan).
33 | | `BUILD_SHARED_LIBS`    | ON, OFF                      | Toggle whether to build libraries as shared or static objects.
34 | | `Zoltan_DIR`           | `<location of Zoltan cmake>` | Specify location of Zoltan CMake configuration file if not picked up by CMake automatically (typically `lib/cmake/Zoltan` of wherever Trilinos was installed).
35 |  
36 |  If the Zoltan install isn't picked up automatically, you can also add the install location of Trilinos or Zoltan to the CMake module search path with `-DCMAKE_PREFIX_PATH=<location of Zoltan install>`. This is an alternative to explicitly setting `Zoltan_DIR`.
37 |     
38 | Testing
39 | -------
40 | After building, you can test the build by running `ctest` or `make test` from the build
41 | directory. This performs a series of simple serial tests.
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 The Regents of the University of Michigan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |      ____                _______            
 2 |     |  _ \              |__   __|           
 3 |     | |_) | __ _ _ __ _   _| |_ __ ___  ___ 
 4 |     |  _ < / _` | '__| | | | | '__/ _ \/ _ \
 5 |     | |_) | (_| | |  | |_| | | | |  __|  __/
 6 |     |____/ \__,_|_|   \__, |_|_|  \___|\___|
 7 |                        __/ |                
 8 |                       |___/         
 9 | BaryTree
10 | ========
11 | 
12 |    A work-in-progress library for fast computation of N-body interactions on multiple GPUs,
13 |    BaryTree implements barycentric Lagrange and Hermite polynomial interpolation fast
14 |    summation methods. The current code employs an OpenACC GPU implementation with MPI
15 |    for distributed memory parallelization.
16 | 
17 | 
18 |    Authors:  
19 |    - Leighton W. Wilson  (lwwilson@umich.edu) 
20 |    - Nathan J. Vaughn  (njvaughn@umich.edu) 
21 |    
22 |    Department of Mathematics,
23 |    University of Michigan, Ann Arbor.
24 |    
25 | 
26 | Building
27 | --------
28 | This project uses CMake to manage and configure its build system. In principle, 
29 | building this project is as simple as executing the following from the top level
30 | directory of BaryTree:
31 | 
32 |     mkdir build; cd build; export CC=<C compiler>; cmake ..; make
33 | 
34 | Compiling GPU versions requires that a PGI C compiler be used. For more information
35 | on building and installing, see __INSTALL.md__ in this directory.
36 | 
37 | 
38 | Examples
39 | --------
40 | See the __examples__ directory for several example executables that use __BaryTree__
41 | and the Trilinos __Zoltan__ library for load balancing, in addition to an example 
42 | using the library's interface for C programs. See __examples/README.md__ for more
43 | details.
44 | 
45 | 
46 | Interfaces
47 | ----------
48 | See the __interfaces__ directory for __BaryTree__ interfaces for non-C programs.
49 | Currently, there is a Python interface and an example script using that interface.
50 | See __interfaces/README.md__ for more details.
51 | 
52 | 
53 | References
54 | ----------
55 |    Please refer to the following references for more background:
56 |         
57 |    - L. Wilson, N. Vaughn, and R. Krasny, A GPU-accelerated fast 
58 |             multipole method based on barycentric Lagrange interpolation 
59 |             and dual tree traversal, 
60 | 	    _Comput. Phys. Commun._ __265__ (2021), 108017. 
61 | 	
62 |    - N. Vaughn, L. Wilson, and R. Krasny, A GPU-accelerated barycentric 
63 |             Lagrange treecode, 
64 | 	    _Proc. 21st IEEE Int. Workshop Parallel Distrib. Sci. Eng. 
65 | 	    Comput._ (PDSEC 2020) (2020).
66 | 	    
67 |    - L. Wang, R. Krasny, and S. Tlupova, A kernel-independent treecode 
68 |             based on barycentric Lagrange interpolation, 
69 | 	    _Commun. Comput. Phys._ __28__ (2020), 1415-1436.
70 | 	    
71 |    - R. Krasny and L. Wang, A treecode based on barycentric Hermite 
72 |             interpolation for electrostatic particle interactions,
73 | 	    _Comput. Math. Biophys._ __7__ (2019), 73-84.
74 | 		
75 |    - H. A. Boateng and R. Krasny, Comparison of treecodes for
76 |             computing electrostatic potentials in charged particle 
77 | 	    systems with disjoint targets and sources,
78 |             _J. Comput. Chem._ __34__ (2013), 2159-2167.	
79 | 	   
80 |    - J.-P. Berrut and L. N. Trefethen, Barycentric Lagrange interpolation,
81 |             _SIAM Rev._ __46__ (2004), 501-517.
82 | 
83 |    - Z.-H. Duan and R. Krasny, An adaptive treecode for computing
84 |             nonbonded potential energy in classical molecular systems,
85 |             _J. Comput. Chem._ __22__ (2001), 184–195.
86 | 
87 |                                                     
88 | License
89 | -------
90 | Copyright © 2019-2021, The Regents of the University of Michigan. Released under the [MIT License](LICENSE).
91 | 
92 | 
93 | Disclaimer
94 | ----------
95 | This material is based upon work supported by the National Science Foundation under grant DMS-1819094, and by the Extreme Science and Engineering Discovery Environment (XSEDE) under grants ACI-1548562 and ASC-190062. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the National Science Foundation.
96 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(BUILD_EXAMPLES)
 2 | 
 3 |    set (AUX_SRCS
 4 |         zoltan_fns.h
 5 |         zoltan_fns.c
 6 |         support_fns.h
 7 |         support_fns.c)
 8 | 
 9 |    add_executable(random_cube_cpu random_cube.c ${AUX_SRCS})
10 |    target_link_libraries(random_cube_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
11 |    install(TARGETS random_cube_cpu DESTINATION bin)
12 |    
13 |    add_executable(random_cube_reproducible_cpu random_cube_reproducible.c ${AUX_SRCS})
14 |    target_link_libraries(random_cube_reproducible_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
15 |    install(TARGETS random_cube_reproducible_cpu DESTINATION bin)
16 | 
17 |    add_executable(run_readin_cpu run_readin.c ${AUX_SRCS})
18 |    target_link_libraries(run_readin_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
19 |    install(TARGETS run_readin_cpu DESTINATION bin)
20 | 
21 |    add_executable(test_BaryTreeInterface_cpu test_BaryTreeInterface.c)
22 |    target_link_libraries(test_BaryTreeInterface_cpu PRIVATE BaryTree_cpu)
23 |    install(TARGETS test_BaryTreeInterface_cpu DESTINATION bin)
24 |    
25 |    if(ENABLE_GPU_BUILD)
26 |       add_executable(random_cube_gpu random_cube.c ${AUX_SRCS})
27 |       target_link_libraries(random_cube_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
28 |       install(TARGETS random_cube_gpu DESTINATION bin)
29 |    
30 |       add_executable(random_cube_reproducible_gpu random_cube_reproducible.c ${AUX_SRCS})
31 |       target_link_libraries(random_cube_reproducible_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
32 |       install(TARGETS random_cube_reproducible_gpu DESTINATION bin)
33 | 
34 |       add_executable(run_readin_gpu run_readin.c ${AUX_SRCS})
35 |       target_link_libraries(run_readin_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
36 |       install(TARGETS run_readin_gpu DESTINATION bin)
37 | 
38 |       add_executable(test_BaryTreeInterface_gpu test_BaryTreeInterface.c)
39 |       target_link_libraries(test_BaryTreeInterface_gpu PRIVATE BaryTree_gpu)
40 |       install(TARGETS test_BaryTreeInterface_gpu DESTINATION bin)
41 |    endif()
42 | 
43 | endif()
44 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | Examples
 2 | --------
 3 | 
 4 | This examples folder builds six executables:
 5 | 
 6 | 1. __random_cube_cpu__ and __random_cube_gpu__
 7 | 2. __random_cube_reproducible_cpu__ and __random_cube_reproducible_gpu__
 8 | 3. __testBaryTreeInterface_cpu__ and __testBaryTreeInterface_gpu__
 9 | 
10 | - - -
11 | 
12 | #### __random_cube__ and __random_cube_reproducible__
13 | 
14 | All of the random cube examples demonstrate the treecode's performance
15 | using a cube of uniformly distributed random particles, load balanced
16 | with Zoltan's recursive coordinate bisection.
17 | 
18 | The argument given to the executable is a parameter file that
19 | specifies the run. An example is given here as __example.in__. For
20 | example, one would run:
21 | 
22 |     mpirun -n 2 random_cube_cpu example.in
23 | 
24 | to run the __random_cube_cpu__ example with the parameters specified in
25 | the file __example.in__ across two ranks.
26 | 
27 | The parameters that can be specified in the infile are as follows:
28 | | Parameter         | Description
29 | |-------------------|------------------
30 | | `num_particles`   | Number of sources and targets. Its use is exclusive with the `num_sources` and `num_targets` parameters.
31 | | `num_sources`     | Number of sources.
32 | | `num_targets`     | Number of targets.
33 | | `distribution`    | Underlying particle distribution: `UNIFORM`, `GAUSSIAN`, `EXPONENTIAL`, `PLUMMER`, or `PLUMMER_SYMMETRIC`.
34 | | `degree`          | Degree of polynomial interpolation. 
35 | | `theta`           | Multipole acceptance criterion (MAC).
36 | | `max_per_source_leaf` | Maximum number of particles per source tree leaf (or source batch, for `CLUSTER_PARTICLE`).
37 | | `max_per_target_leaf` | Maximum number of particles per target tree leaf (or target batch, for `PARTICLE_CLUSTER`).
38 | | `beta`            | Automatic tuning accuracy parameter. Number in [0,1], higher is more accurate. 
39 | | `compute_type`    | Type of treecode method. `CLUSTER_PARTICLE`, `PARTICLE_CLUSTER` (i.e. BLTC), `CLUSTER_CLUSTER` (i.e. BLDTT).
40 | | `approximation`   | Type of polynomial: `LAGRANGE` and `HERMITE`. `HERMITE` is incompatible with cluster-cluster.
41 | | `kernel_name`     | Name of interaction kernel: `COULOMB`, `YUKAWA`, `REGULARIZED_COULOMB`, `REGULARIZED_YUKAWA`, `SIN_OVER_R`, `USER`.
42 | | `kernel_params`   | Comma separated list of parameters for given kernel.
43 | | `run_direct`      | Run direct calculation for error comparison: `ON` or `OFF`.
44 | | `verbosity`       | Determines verbosity level of output. Integer `0`, `1`, `2`, `3`. Higher means more output.
45 | | `slice`           | Determines the proportion of target sites at which the direct calculation is performed for error comparison. 10 would mean every 10th target is sampled.
46 | 
47 | 
48 | Note the difference between these executables:
49 | 
50 | - The __random_cube__ examples are designed to test the
51 | problem size limits of the treecode by overcoming limits in Zoltan's
52 | maximum array sizes. Unlike the __random_cube_reproducible__ examples, which first 
53 | generate all random particles and then use Zoltan to load balance them,
54 | these examples generate a small number of particles, load balances
55 | them, determines the resulting bounding boxes, and then generates the
56 | specified number of random particles in those bounding boxes. The results
57 | produced in terms of performance and accuracy should be very similar to
58 | the __random_cube_reproducible__ examples.
59 | 
60 | - The __random_cube_reproducible__ examples are designed for reproducibility
61 | of results. Given a total number of particles across all ranks, the
62 | actual random particles will be the same no matter how many ranks
63 | are used (given that the executable is run on the same computational
64 | resource). Additionally, this example requires that the number of sources
65 | and targets be equal.
66 | 
67 | - - -
68 | 
69 | #### __testBaryTreeInterface__
70 | 
71 | The __testBaryTreeInterface__ examples demonstrate how to use the C wrapper 
72 | for the treecode. A C program that links to the __BaryTree__ library can, 
73 | in fact, directly use the `treedriver` function if the calling program 
74 | implements the particle and kernel struct used by `treedriver` 
75 | (as done in the above examples). The `BaryTreeInterface` function, 
76 | however, takes source and target particle arrays directly.
77 | 


--------------------------------------------------------------------------------
/examples/example.in:
--------------------------------------------------------------------------------
 1 | num_sources           20000
 2 | num_targets           20000
 3 | degree                2
 4 | theta                 0.9
 5 | beta                 -1.0
 6 | size_check            0.0
 7 | max_per_source_leaf   100
 8 | max_per_target_leaf   100
 9 | kernel_name           coulomb
10 | kernel_params         1.0
11 | approximation         lagrange
12 | compute_type          particle-cluster
13 | distribution          uniform
14 | run_direct            1
15 | slice                 10
16 | verbosity             1
17 | 


--------------------------------------------------------------------------------
/examples/support_fns.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_SUPPORT_FUNCTIONS_H
 2 | #define H_SUPPORT_FUNCTIONS_H
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | #include "../src/run_params/struct_run_params.h"
 7 | 
 8 | 
 9 | typedef enum DISTRIBUTION
10 | {
11 |     NO_DISTRIBUTION,
12 |     UNIFORM,
13 |     GAUSSIAN,
14 |     EXPONENTIAL,
15 |     PLUMMER,
16 |     PLUMMER_SYMMETRIC,
17 |     SLAB_1,
18 |     SLAB_2,
19 |     SPHERICAL_SHELL
20 | } DISTRIBUTION;
21 | 
22 | typedef enum PARTITION
23 | {
24 |     NO_PARTITION,
25 |     RCB,
26 |     HSFC
27 | } PARTITION;
28 | 
29 | 
30 | void Params_Parse(FILE *fp, struct RunParams **run_params, int *N, int *M, int *run_direct, int *slice,
31 |                 double *xyz_limits, DISTRIBUTION *distribution, PARTITION *partition);
32 | 
33 | 
34 | double Point_Set_Init(DISTRIBUTION distribution);
35 | 
36 | double Point_Set(DISTRIBUTION distribution, double xmin, double xmax);
37 | 
38 | void Point_Plummer(double R, double *x, double *y, double *z);
39 | 
40 | void Point_Plummer_Octant(double R, double *x, double *y, double *z);
41 | 
42 | void Point_Gaussian(double *x, double *y, double *z);
43 | 
44 | void Point_Exponential(double *x, double *y, double *z);
45 | 
46 | void Point_Spherical_Shell(double R, double *x, double *y, double *z);
47 | 
48 | 
49 | void Timing_Calculate(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4],
50 |                 double time_run[4], double time_tree[13], double time_direct[4]);
51 |                       
52 | void Timing_Print(double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4],
53 |                 int run_direct, struct RunParams *run_params);
54 |                   
55 |                   
56 | void Accuracy_Calculate(double *potential_engy_glob, double *potential_engy_direct_glob,
57 |                 double *glob_inf_err, double *glob_relinf_err, double *glob_n2_err, double *glob_reln2_err,
58 |                 double *potential, double *potential_direct, int targets_num, int slice);
59 |                 
60 | void Accuracy_Print(double potential_engy_glob, double potential_engy_direct_glob,
61 |                 double glob_inf_err, double glob_relinf_err, double glob_n2_err, double glob_reln2_err,
62 |                 int slice);
63 |                 
64 |                 
65 | void CSV_Print(int N, int M, struct RunParams *run_params,
66 |                 double time_run_glob[3][4], double time_tree_glob[3][13], double time_direct_glob[3][4],
67 |                 double potential_engy_glob, double potential_engy_direct_glob,
68 |                 double glob_inf_err, double glob_relinf_err, double glob_n2_err, double glob_reln2_err);
69 | 
70 | 
71 | #endif /* H_SUPPORT_FUNCTIONS_H */
72 | 


--------------------------------------------------------------------------------
/examples/test_BaryTreeInterface.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <mpi.h>
 5 | 
 6 | #include "../src/interface/BaryTreeInterface.h"
 7 | 
 8 | int main(int argc, char **argv)
 9 | {
10 |     int rank, numProcs;
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
13 |     MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
14 | 
15 |     int numSources = 20000;
16 |     int numTargets = 20000;
17 | 
18 |     KERNEL kernel                = YUKAWA;
19 |     SINGULARITY singularity      = SKIPPING;
20 |     APPROXIMATION approximation  = LAGRANGE;
21 |     COMPUTE_TYPE compute_type    = PARTICLE_CLUSTER;
22 | 
23 |     int numParams = 1;
24 |     double kernelParams[1] = {0.5};
25 | 
26 |     int interpDegree = 5;
27 |     double theta = 0.8;
28 |     double beta = 1.0;
29 | 
30 |     int maxPerLeaf = 500;
31 |     int maxPerBatch = 500;
32 |     double sizeCheck = 1.0;
33 | 
34 |     int verbosity = 0;
35 | 
36 |     double *xS = malloc(numSources * sizeof(double));
37 |     double *yS = malloc(numSources * sizeof(double));
38 |     double *zS = malloc(numSources * sizeof(double));
39 |     double *qS = malloc(numSources * sizeof(double));
40 |     double *wS = malloc(numSources * sizeof(double));
41 | 
42 |     double *xT = malloc(numTargets * sizeof(double));
43 |     double *yT = malloc(numTargets * sizeof(double));
44 |     double *zT = malloc(numTargets * sizeof(double));
45 |     double *qT = malloc(numTargets * sizeof(double));
46 | 
47 |     double *potential = malloc(numTargets * sizeof(double));
48 | 
49 |     for (int i = 0; i < numSources; ++i) {
50 |         xS[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
51 |         yS[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
52 |         zS[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
53 |         qS[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
54 |         wS[i] =  1.;
55 |     }
56 | 
57 |     for (int i = 0; i < numTargets; ++i) {
58 |         xT[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
59 |         yT[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
60 |         zT[i] =  ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
61 |         qT[i] =  1.;
62 |     }
63 | 
64 |     BaryTreeInterface(numTargets, numSources, xT, yT, zT, qT,
65 |                       xS, yS, zS, qS, wS, potential,
66 |                       kernel, numParams, kernelParams,
67 |                       singularity, approximation, compute_type,
68 |                       theta, interpDegree, maxPerLeaf, maxPerBatch,
69 |                       sizeCheck, beta, verbosity);
70 | 
71 |     printf("[test BaryTree interface] BaryTree has finished.\n");
72 | 
73 |     free(xS);
74 |     free(yS);
75 |     free(zS);
76 |     free(qS);
77 |     free(wS);
78 |     free(xT);
79 |     free(yT);
80 |     free(zT);
81 |     free(qT);
82 |     free(potential);
83 | 
84 |     MPI_Finalize();
85 | 
86 |     return 0;
87 | }
88 | 


--------------------------------------------------------------------------------
/examples/zoltan_fns.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <zoltan.h>
  4 | 
  5 | #include "zoltan_fns.h"
  6 | 
  7 | 
  8 | int ztn_get_number_of_objects(void *data, int *ierr)
  9 | {
 10 |     MESH_DATA *mesh = (MESH_DATA *)data;
 11 |     *ierr = ZOLTAN_OK;
 12 |     return mesh->numMyPoints;
 13 | }
 14 | 
 15 | 
 16 | void ztn_get_object_list(void *data, int sizeGID, int sizeLID,
 17 |                      ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
 18 |                      int wgt_dim, float *obj_wgts, int *ierr)
 19 | {
 20 |     int i;
 21 |     MESH_DATA *mesh = (MESH_DATA *)data;
 22 |     *ierr = ZOLTAN_OK;
 23 | 
 24 |   /* In this example, return the IDs of our objects, but no weights.
 25 |    * Zoltan will assume equally weighted objects.
 26 |    */
 27 | 
 28 |     for (i = 0; i < mesh->numMyPoints; i++) {
 29 |         globalID[i] = mesh->myGlobalIDs[i];
 30 |         localID[i] = i;
 31 |         obj_wgts[i] = mesh->b[i];
 32 |     }
 33 | }
 34 | 
 35 | 
 36 | int ztn_get_num_geometry(void *data, int *ierr)
 37 | {
 38 |     *ierr = ZOLTAN_OK;
 39 |     return 3;
 40 | }
 41 | 
 42 | 
 43 | void ztn_get_geometry_list(void *data, int sizeGID, int sizeLID, int num_obj,
 44 |                        ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
 45 |                        int num_dim, double *geom_vec, int *ierr)
 46 | {
 47 |     int i;
 48 | 
 49 |     MESH_DATA *mesh = (MESH_DATA *)data;
 50 | 
 51 |     if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 3)) {
 52 |         *ierr = ZOLTAN_FATAL;
 53 |         return;
 54 |     }
 55 | 
 56 |     *ierr = ZOLTAN_OK;
 57 | 
 58 |     for (i = 0;  i < num_obj ; i++){
 59 |         geom_vec[3*i] = (double)mesh->x[i];
 60 |         geom_vec[3*i + 1] = (double)mesh->y[i];
 61 |         geom_vec[3*i + 2] = (double)mesh->z[i];
 62 |     } 
 63 | 
 64 |     return;
 65 | }
 66 | 
 67 | 
 68 | void ztn_pack(void *data, int num_gid_entries, int num_lid_entries,
 69 |          ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id,
 70 |          int dest, int size, char *buf, int *ierr) {
 71 | 
 72 |     SINGLE_MESH_DATA *mesh_single = (SINGLE_MESH_DATA *)buf;
 73 |     MESH_DATA *mesh = (MESH_DATA *)data;
 74 | 
 75 |     mesh_single->x = mesh->x[(*local_id)];
 76 |     mesh_single->y = mesh->y[(*local_id)];
 77 |     mesh_single->z = mesh->z[(*local_id)];
 78 |     mesh_single->q = mesh->q[(*local_id)];
 79 |     mesh_single->w = mesh->w[(*local_id)];
 80 |     mesh_single->b = mesh->b[(*local_id)];
 81 |     mesh_single->myGlobalID = mesh->myGlobalIDs[(*local_id)];
 82 | 
 83 |     mesh->myGlobalIDs[(*local_id)] = (ZOLTAN_ID_TYPE)(-1); // Mark local particle as exported
 84 | 
 85 |     return;
 86 | }
 87 | 
 88 | 
 89 | void ztn_unpack(void *data, int num_gid_entries,
 90 |          ZOLTAN_ID_PTR global_id,
 91 |          int size, char *buf, int *ierr) {
 92 | 
 93 |     SINGLE_MESH_DATA *mesh_single = (SINGLE_MESH_DATA *)buf;
 94 |     MESH_DATA *mesh = (MESH_DATA *)data;
 95 | 
 96 |     mesh->numMyPoints += 1;
 97 | 
 98 |     mesh->myGlobalIDs = (ZOLTAN_ID_TYPE *)realloc(mesh->myGlobalIDs,
 99 |                         sizeof(ZOLTAN_ID_TYPE) * mesh->numMyPoints);
100 |     mesh->x = (double *)realloc(mesh->x, sizeof(double) * mesh->numMyPoints);
101 |     mesh->y = (double *)realloc(mesh->y, sizeof(double) * mesh->numMyPoints);
102 |     mesh->z = (double *)realloc(mesh->z, sizeof(double) * mesh->numMyPoints);
103 |     mesh->q = (double *)realloc(mesh->q, sizeof(double) * mesh->numMyPoints);
104 |     mesh->w = (double *)realloc(mesh->w, sizeof(double) * mesh->numMyPoints);
105 |     mesh->b = (double *)realloc(mesh->b, sizeof(double) * mesh->numMyPoints);
106 | 
107 |     mesh->x[mesh->numMyPoints-1] = mesh_single->x;
108 |     mesh->y[mesh->numMyPoints-1] = mesh_single->y;
109 |     mesh->z[mesh->numMyPoints-1] = mesh_single->z;
110 |     mesh->q[mesh->numMyPoints-1] = mesh_single->q;
111 |     mesh->w[mesh->numMyPoints-1] = mesh_single->w;
112 |     mesh->b[mesh->numMyPoints-1] = mesh_single->b;
113 |     mesh->myGlobalIDs[mesh->numMyPoints-1] = mesh_single->myGlobalID;
114 | 
115 |     return;
116 | }
117 | 
118 | 
119 | int ztn_obj_size(void *data, int num_gid_entries, int num_lid_entries, 
120 |          ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id, int *ierr)
121 | {
122 |     return sizeof(SINGLE_MESH_DATA);
123 | }
124 | 


--------------------------------------------------------------------------------
/examples/zoltan_fns.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_ZOLTAN_SUPPORT_FUNCTIONS_H
 2 | #define H_ZOLTAN_SUPPORT_FUNCTIONS_H
 3 | 
 4 | #include <zoltan.h>
 5 | 
 6 | typedef struct{
 7 |     int numGlobalPoints;
 8 |     int numMyPoints;
 9 |     ZOLTAN_ID_PTR myGlobalIDs;
10 |     double *x;
11 |     double *y;
12 |     double *z;
13 |     double *q;
14 |     double *w;
15 |     double *b;
16 | } MESH_DATA;
17 | 
18 | typedef struct{
19 |     ZOLTAN_ID_TYPE myGlobalID;
20 |     double x;
21 |     double y;
22 |     double z;
23 |     double q;
24 |     double w;
25 |     double b;
26 | } SINGLE_MESH_DATA;
27 | 
28 | 
29 | int ztn_get_number_of_objects(void *data, int *ierr);
30 | 
31 | void ztn_get_object_list(void *data, int sizeGID, int sizeLID,
32 |      ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
33 |      int wgt_dim, float *obj_wgts, int *ierr);
34 | 
35 | int ztn_get_num_geometry(void *data, int *ierr);
36 | 
37 | void ztn_get_geometry_list(void *data, int sizeGID, int sizeLID,
38 |      int num_obj, ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
39 |      int num_dim, double *geom_vec, int *ierr);
40 | 
41 | void ztn_pack(void *data, int num_gid_entries, int num_lid_entries,
42 |      ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id,
43 |      int dest, int size, char *buf, int *ierr);
44 | 
45 | void ztn_unpack(void *data, int num_gid_entries,
46 |      ZOLTAN_ID_PTR global_id,
47 |      int size, char *buf, int *ierr);
48 | 
49 | int ztn_obj_size(void *data, int num_gid_entries, int num_lid_entries,
50 |      ZOLTAN_ID_PTR global_id, ZOLTAN_ID_PTR local_id, int *ierr);
51 | 
52 | 
53 | #endif /* H_ZOLTAN_SUPPORT_FUNCTIONS_H */
54 | 


--------------------------------------------------------------------------------
/interfaces/README.md:
--------------------------------------------------------------------------------
 1 | Interfaces
 2 | ----------
 3 | 
 4 | This folder contains interfaces between __BaryTree__ and other languages.
 5 | The BaryTree library itself contains `BaryTreeInterface`. This function takes as input 
 6 | pointers to the particle arrays, as well runtime parameters such as kernel information, 
 7 | MAC parameter, and batch and cluster size. This function first constructs the particle 
 8 | structs then calls the `treedriver`.
 9 | 
10 | The interfaces contained in these subdirectories are responsible for supplying 
11 | `BaryTreeInterface` with the necessary pointers to particle arrays and the runtime metadata.
12 | 
13 | ----------
14 | 
15 | ### Python
16 | 
17 | The Python folder contains __BaryTreeInterface.py__, which uses the `ctypes` module to load
18 | the library, set the argument types, construct pointers to the `numpy` arrays, and call the
19 | `BaryTreeInterface`. 
20 | 
21 | __testBaryTreeInterface.py__ imports the Python wrapper, generates some random particles, 
22 | and calls the treecode once.
23 | 
24 | The `w` array is for quadrature weights when computing discrete convolution sums; 
25 | it is set to ones for particle simulations.
26 | 


--------------------------------------------------------------------------------
/interfaces/fortran/BaryTreeInterface.fh:
--------------------------------------------------------------------------------
 1 | ENUM, BIND(C)
 2 |     ENUMERATOR :: NO_KERNEL = 0, COULOMB, YUKAWA, &
 3 |                   REGULARIZED_COULOMB, REGULARIZED_YUKAWA, &
 4 |                   ATANF, TCF, DCF, SIN_OVER_R, MQ, RBS_U, RBS_V, USER
 5 | 
 6 |     ENUMERATOR :: NO_SINGULARITY = 0, SKIPPING, SUBTRACTION
 7 | 
 8 |     ENUMERATOR :: NO_APPROX = 0, LAGRANGE, HERMITE
 9 | 
10 |     ENUMERATOR :: NO_COMPUTE_TYPE = 0, PARTICLE_CLUSTER, &
11 |                   CLUSTER_PARTICLE, CLUSTER_CLUSTER
12 | END ENUM
13 | 
14 | INTERFACE
15 |     SUBROUTINE BaryTreeInterface(num_targets, num_sources, &
16 |                     target_x, target_y, target_z, target_q, &
17 |                     source_x, source_y, source_z, source_q, source_w, &
18 |                     potential, kernel, num_kernel_params, kernel_params, &
19 |                     singularity, approximation, compute_type, &
20 |                     theta, degree, max_source_leaf, max_target_leaf, &
21 |                     size_check, beta, verbosity) &
22 |                BIND(C, NAME='BaryTreeInterface')
23 | 
24 |         USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT, C_DOUBLE, C_PTR
25 |         IMPLICIT NONE
26 | 
27 |         INTEGER(KIND=C_INT), VALUE, INTENT(IN) :: num_targets, num_sources
28 |         TYPE(C_PTR), VALUE, INTENT(IN) :: target_x, target_y, target_z, target_q
29 |         TYPE(C_PTR), VALUE, INTENT(IN) :: source_x, source_y, source_z, source_q, source_w
30 |         TYPE(C_PTR), VALUE, INTENT(IN) :: potential
31 | 
32 |         INTEGER(KIND=C_INT), VALUE, INTENT(IN) :: kernel, num_kernel_params, &
33 |                             singularity, approximation, compute_type, degree, &
34 |                             max_source_leaf, max_target_leaf, verbosity
35 |         TYPE(C_PTR), VALUE, INTENT(IN) :: kernel_params
36 |         REAL(KIND=C_DOUBLE), VALUE, INTENT(IN) :: theta, size_check, beta
37 | 
38 |     END SUBROUTINE BaryTreeInterface
39 | END INTERFACE
40 | 


--------------------------------------------------------------------------------
/interfaces/fortran/example_interface.f03:
--------------------------------------------------------------------------------
 1 | PROGRAM BaryTree_Fortran_Example
 2 | 
 3 |     USE MPI
 4 |     USE ISO_C_BINDING, ONLY: C_LOC
 5 |     IMPLICIT NONE
 6 | 
 7 |     INCLUDE "BaryTreeInterface.fh"
 8 | 
 9 |     INTEGER :: rank, num_proc, ierr
10 | 
11 |     INTEGER :: num_targets, num_sources, kernel, num_kernel_params, singularity, &
12 |             approximation, compute_type, degree, max_source_leaf, max_target_leaf, & 
13 |             verbosity
14 | 
15 |     DOUBLE PRECISION, POINTER, DIMENSION(:) :: target_x, target_y, target_z, target_q, &
16 |             source_x, source_y, source_z, source_q, source_w, potential
17 | 
18 |     DOUBLE PRECISION, DIMENSION(2) :: kernel_params
19 | 
20 |     DOUBLE PRECISION :: theta, size_check, beta
21 | 
22 |     CALL MPI_INIT(ierr)
23 |     CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
24 |     CALL MPI_COMM_SIZE(MPI_COMM_WORLD, num_proc, ierr)
25 | 
26 |     kernel = RBS_U
27 |     num_kernel_params = 1
28 |     kernel_params(1) = 0.5
29 | 
30 |     singularity = SKIPPING
31 |     approximation = LAGRANGE
32 |     verbosity = 3
33 | 
34 |     max_source_leaf = 50
35 |     max_target_leaf = 50
36 |     size_check = 1.0
37 |     beta = -1
38 | 
39 |     compute_type = CLUSTER_CLUSTER
40 |     theta = 0.7
41 |     degree = 3
42 | 
43 |     num_targets = 10000
44 |     num_sources = 10000
45 | 
46 |     ALLOCATE(target_x(num_targets), target_y(num_targets), target_z(num_targets), &
47 |              target_q(num_targets), potential(num_targets), &
48 |              source_x(num_sources), source_y(num_sources), source_z(num_sources), &
49 |              source_q(num_sources), source_w(num_sources))
50 | 
51 |     CALL RANDOM_NUMBER(target_x)
52 |     CALL RANDOM_NUMBER(target_y)
53 |     CALL RANDOM_NUMBER(target_z)
54 |     CALL RANDOM_NUMBER(target_q)
55 | 
56 |     CALL RANDOM_NUMBER(source_x)
57 |     CALL RANDOM_NUMBER(source_y)
58 |     CALL RANDOM_NUMBER(source_z)
59 |     CALL RANDOM_NUMBER(source_q)
60 |     CALL RANDOM_NUMBER(source_w)
61 | 
62 |     ! Calling with kernel as RBS_U
63 |     CALL BaryTreeInterface(num_targets, num_sources, &
64 |              C_LOC(target_x), C_LOC(target_y), C_LOC(target_z), C_LOC(target_q), &
65 |              C_LOC(source_x), C_LOC(source_y), C_LOC(source_z), C_LOC(source_q), &
66 |              C_LOC(source_w), C_LOC(potential), &
67 |              kernel, num_kernel_params, C_LOC(kernel_params), &
68 |              singularity, approximation, compute_type, theta, degree, &
69 |              max_source_leaf, max_target_leaf, size_check, beta, &
70 |              verbosity)
71 | 
72 |     PRINT *, "RBS u total potential is: ", SUM(potential)
73 | 
74 |     ! Calling with kernel as RBS_V
75 |     CALL BaryTreeInterface(num_targets, num_sources, &
76 |              C_LOC(target_x), C_LOC(target_y), C_LOC(target_z), C_LOC(target_q), &
77 |              C_LOC(source_x), C_LOC(source_y), C_LOC(source_z), C_LOC(source_q), &
78 |              C_LOC(source_w), C_LOC(potential), &
79 |              RBS_V, num_kernel_params, C_LOC(kernel_params), &
80 |              singularity, approximation, compute_type, theta, degree, &
81 |              max_source_leaf, max_target_leaf, size_check, beta, &
82 |              verbosity)
83 | 
84 |     PRINT *, "RBS v total potential is: ", SUM(potential)
85 | 
86 |     DEALLOCATE(target_x, target_y, target_z, target_q, potential, source_x, source_y, &
87 |              source_z, source_q, source_w);
88 | 
89 |     CALL MPI_FINALIZE(ierr)
90 | 
91 | END PROGRAM
92 | 


--------------------------------------------------------------------------------
/interfaces/python/testBaryTreeInterface.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | '''
 3 | import os
 4 | import sys
 5 | import resource
 6 | import numpy as np
 7 | import mpi4py.MPI as MPI
 8 | 
 9 | 
10 | sys.path.append(os.getcwd())
11 | try:
12 |     import BaryTreeInterface as BT
13 | except ImportError:
14 |     print('Unable to import BaryTreeInterface due to ImportError')
15 | except OSError:
16 |     print('Unable to import BaryTreeInterface due to OSError')
17 | 
18 | 
19 | if __name__=="__main__":
20 |     
21 |     # set treecode parameters
22 |     N = 5000
23 |     maxPerSourceLeaf = 50
24 |     maxPerTargetLeaf = 10
25 |     GPUpresent = False
26 |     theta = 0.8
27 |     treecodeDegree = 4
28 |     gaussianAlpha = 1.0
29 |     verbosity = 0
30 |     
31 |     approximation = BT.Approximation.LAGRANGE
32 |     singularity   = BT.Singularity.SUBTRACTION
33 |     computeType   = BT.ComputeType.PARTICLE_CLUSTER
34 |     
35 |     kernel = BT.Kernel.YUKAWA
36 |     numberOfKernelParameters = 1
37 |     kernelParameters = np.array([0.5])
38 | 
39 | 
40 |     # initialize some random data
41 |     np.random.seed(1)
42 |     RHO = np.random.rand(N)
43 |     X = np.random.rand(N)
44 |     Y = np.random.rand(N)
45 |     Z = np.random.rand(N)
46 |     W = np.ones(N)   # W stores quadrature weights for convolution integrals.  For particle simulations, set = ones.
47 |     
48 |     expectedOutput = 588.7432483318685  # using seed of 1, this is expected value of first element of output array.
49 |     
50 | 
51 |     # call the treecode
52 |         
53 |     output = BT.callTreedriver(  N, N,
54 |                                  X, Y, Z, RHO,
55 |                                  np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W),
56 |                                  kernel, numberOfKernelParameters, kernelParameters,
57 |                                  singularity, approximation, computeType,
58 |                                  GPUpresent, verbosity, 
59 |                                  theta=theta, degree=treecodeDegree, sourceLeafSize=maxPerSourceLeaf, targetLeafSize=maxPerTargetLeaf, sizeCheck=1.0)
60 | 
61 |     assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output using explicit theta/degree."
62 |     
63 |     
64 |     
65 |     
66 |     
67 |     
68 |     beta = 0.1
69 |     expectedOutput = 588.7445889051367  # this is expected value of first element of output array for beta = 0.1
70 |     output = BT.callTreedriver(  N, N,
71 |                                  X, Y, Z, RHO,
72 |                                  np.copy(X), np.copy(Y), np.copy(Z), np.copy(RHO), np.copy(W),
73 |                                  kernel, numberOfKernelParameters, kernelParameters,
74 |                                  singularity, approximation, computeType,
75 |                                  GPUpresent, verbosity, beta=beta,  sizeCheck=1.0)
76 |     assert (abs(output[0]-expectedOutput) < 1e-14), "Error: didn't get the expected output using beta."
77 |     
78 |     
79 |     print("If no errors printed, then the calls to the treecode wrapper worked (one using explicit theta/degree, one use beta)")
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/src/clusters/clusters.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_CLUSTER_FUNCTIONS_H
 2 | #define H_CLUSTER_FUNCTIONS_H
 3 | 
 4 | #include "../utilities/enums.h"
 5 | 
 6 | #include "../tree/struct_tree.h"
 7 | #include "../particles/struct_particles.h"
 8 | 
 9 | #include "struct_clusters.h"
10 | 
11 | 
12 | void Clusters_Sources_Construct(struct Clusters **clusters, const struct Particles *sources,
13 |                 const struct Tree *tree, const struct RunParams *run_params);
14 | 
15 | void Clusters_Targets_Construct(struct Clusters **clusters, const struct Particles *targets,
16 |                 const struct Tree *tree, const struct RunParams *run_params);
17 | 
18 | void Clusters_Alloc(struct Clusters **clusters_addr, int length,
19 |                 const struct RunParams *run_params);
20 | 
21 | void Clusters_Free(struct Clusters **clusters_addr);
22 | 
23 | void Clusters_Free_Win(struct Clusters **clusters_addr);
24 | 
25 | 
26 | #endif /* H_CLUSTER_FUNCTIONS_H */
27 | 


--------------------------------------------------------------------------------
/src/clusters/struct_clusters.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_CLUSTERS_H
 2 | #define H_CLUSTERS_H
 3 | 
 4 | /* declaration of struct with tag particles */
 5 | struct Clusters
 6 | {
 7 |         int num;
 8 |         int num_weights;
 9 |         int num_charges;
10 | 
11 |         double *x;
12 |         double *y;
13 |         double *z;
14 |         double *q;
15 |         // quadrature weights.  Set = 1 if interacting particles, not performing convolution integral.
16 |         double *w;
17 | };
18 | 
19 | #endif /* H_CLUSTERS_H */
20 | 


--------------------------------------------------------------------------------
/src/comm_cp/comm_cp.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_COMM_CP_FUNCTIONS_H
 2 | #define H_COMM_CP_FUNCTIONS_H
 3 | 
 4 | #include "../tree/struct_tree.h"
 5 | #include "../particles/struct_particles.h"
 6 | #include "../run_params/struct_run_params.h"
 7 | 
 8 | 
 9 | void Comm_CP_ConstructAndGetData(struct Tree **remote_batches_addr, struct Particles **remote_sources_addr,
10 |                                  const struct Tree *tree_array, const struct Tree *batches,
11 |                                  const struct Particles *sources, const struct RunParams *run_params);
12 | 
13 | 
14 | #endif /* H_COMM_CP_FUNCTIONS_H */
15 | 


--------------------------------------------------------------------------------
/src/comm_types/comm_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_COMM_TYPES_FUNCTIONS_H
 2 | #define H_COMM_TYPES_FUNCTIONS_H
 3 | 
 4 | #include "../tree/struct_tree.h"
 5 | #include "../run_params/struct_run_params.h"
 6 | 
 7 | #include "struct_comm_types.h"
 8 | 
 9 | 
10 | void CommTypesAndTrees_Construct(struct CommTypes **comm_types_addr, struct Tree ***let_trees_addr,
11 |                                  struct Tree *tree, struct Tree *batches,
12 |                                  struct RunParams *run_params);
13 | 
14 | void CommTypesAndTrees_Free(struct CommTypes **comm_types_addr, struct Tree ***let_trees_addr);
15 | 
16 | 
17 | #endif /* H_COMM_TYPES_FUNCTIONS_H */
18 | 


--------------------------------------------------------------------------------
/src/comm_types/struct_comm_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_STRUCT_COMM_TYPES_H
 2 | #define H_STRUCT_COMM_TYPES_H
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | 
 7 | struct CommTypes
 8 | {
 9 |         int *num_remote_approx_array;
10 |         int *previous_let_clusters_length_array;
11 | 
12 |         int let_clusters_length;
13 |         int let_clusters_num;
14 | 
15 |         MPI_Datatype *MPI_approx_type;
16 |         MPI_Datatype *MPI_approx_charges_type; 
17 |         MPI_Datatype *MPI_approx_weights_type;
18 | 
19 |         int *new_sources_length_array;
20 |         int *previous_let_sources_length_array;
21 | 
22 |         int let_sources_length;
23 | 
24 |         MPI_Datatype *MPI_direct_type;
25 | };
26 | 
27 | 
28 | #endif /* H_STRUCT_COMM_TYPES_H */
29 | 


--------------------------------------------------------------------------------
/src/comm_windows/comm_windows.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_COMM_WINDOWS_FUNCTIONS_H
 2 | #define H_COMM_WINDOWS_FUNCTIONS_H
 3 | 
 4 | #include "../clusters/struct_clusters.h"
 5 | #include "../particles/struct_particles.h"
 6 | #include "../run_params/struct_run_params.h"
 7 | #include "../comm_types/struct_comm_types.h"
 8 | 
 9 | #include "struct_comm_windows.h"
10 | 
11 | 
12 | void CommWindows_Create(struct CommWindows **comm_windows_addr,
13 |                         struct Clusters *clusters, struct Particles *sources, struct RunParams *run_params);
14 | 
15 | void CommWindows_Free(struct CommWindows **comm_windows_addr, struct RunParams *run_params);
16 | 
17 | void CommWindows_Lock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params);
18 | 
19 | void CommWindows_Unlock(struct CommWindows *comm_windows, int get_from, struct RunParams *run_params);
20 | 
21 | void CommWindows_GetData(struct Clusters *let_clusters, struct Particles *let_sources,
22 |                          struct CommTypes *comm_types, struct CommWindows *comm_windows,
23 |                          int get_from, struct RunParams *run_params);
24 | 
25 | 
26 | #endif /* H_COMM_WINDOWS_FUNCTIONS_H */
27 | 


--------------------------------------------------------------------------------
/src/comm_windows/struct_comm_windows.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_STRUCT_COMM_WINDOWS_H
 2 | #define H_STRUCT_COMM_WINDOWS_H
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | 
 7 | struct CommWindows
 8 | {
 9 |     MPI_Win win_clusters_x, win_clusters_y, win_clusters_z, win_clusters_q, win_clusters_w;
10 |     MPI_Win win_sources_x, win_sources_y, win_sources_z, win_sources_q, win_sources_w;
11 | };
12 | 
13 | 
14 | #endif /* H_STRUCT_COMM_WINDOWS_H */
15 | 


--------------------------------------------------------------------------------
/src/drivers/directdriver.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <math.h>
  5 | #include <float.h>
  6 | #include <mpi.h>
  7 | 
  8 | #include "../utilities/array.h"
  9 | #include "../utilities/tools.h"
 10 | #include "../utilities/timers.h"
 11 | #include "../utilities/enums.h"
 12 | 
 13 | #include "../particles/struct_particles.h"
 14 | #include "../particles/particles.h"
 15 | 
 16 | #include "../run_params/struct_run_params.h"
 17 | 
 18 | #include "../interaction_compute/interaction_compute.h"
 19 | 
 20 | #include "directdriver.h"
 21 | 
 22 | 
 23 | void directdriver(struct Particles *sources, struct Particles *targets, struct RunParams *run_params,
 24 |                   double *potential, double *time_direct)
 25 | {
 26 |     int rank, num_procs, ierr;
 27 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 28 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 29 | 
 30 |     int num_sources = sources->num;
 31 |     int num_targets = targets->num;
 32 |     int num_sources_on_proc[num_procs];
 33 | 
 34 |     double *source_x = sources->x;
 35 |     double *source_y = sources->y;
 36 |     double *source_z = sources->z;
 37 |     double *source_q = sources->q;
 38 |     double *source_w = sources->w;
 39 | 
 40 |     double *target_x = targets->x;
 41 |     double *target_y = targets->y;
 42 |     double *target_z = targets->z;
 43 |     double *target_q = targets->q;
 44 | 
 45 |     double time1;
 46 |     time_direct[0] = 0.0;
 47 |     time_direct[1] = 0.0;
 48 |     time_direct[2] = 0.0;
 49 |     time_direct[3] = 0.0;
 50 | 
 51 | 
 52 |     START_TIMER(&time1);
 53 |     MPI_Allgather(&num_sources, 1, MPI_INT, num_sources_on_proc, 1, MPI_INT, MPI_COMM_WORLD);
 54 |     MPI_Win win_sources_x, win_sources_y, win_sources_z, win_sources_q, win_sources_w;
 55 |     MPI_Win_create(source_x, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_x);
 56 |     MPI_Win_create(source_y, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_y);
 57 |     MPI_Win_create(source_z, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_z);
 58 |     MPI_Win_create(source_q, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_q);
 59 |     MPI_Win_create(source_w, num_sources*sizeof(double), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win_sources_w);
 60 |     STOP_TIMER(&time1);
 61 |     time_direct[0] += time1;
 62 | 
 63 |     for (int proc_id = 1; proc_id < num_procs; ++proc_id) {
 64 | 
 65 |         START_TIMER(&time1);
 66 |         int get_from = (num_procs + rank - proc_id) % num_procs;
 67 | 
 68 |         struct Particles *remote_sources = NULL;
 69 |         Particles_Alloc(&remote_sources, num_sources_on_proc[get_from]);
 70 |         
 71 |         MPI_Barrier(MPI_COMM_WORLD);
 72 | 
 73 |         MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_x);
 74 |         MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_y);
 75 |         MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_z);
 76 |         MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_q);
 77 |         MPI_Win_lock(MPI_LOCK_SHARED, get_from, 0, win_sources_w);
 78 | 
 79 |         MPI_Get(remote_sources->x, num_sources_on_proc[get_from], MPI_DOUBLE,
 80 |                        get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_x);
 81 |         MPI_Get(remote_sources->y, num_sources_on_proc[get_from], MPI_DOUBLE,
 82 |                        get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_y);
 83 |         MPI_Get(remote_sources->z, num_sources_on_proc[get_from], MPI_DOUBLE,
 84 |                        get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_z);
 85 |         MPI_Get(remote_sources->q, num_sources_on_proc[get_from], MPI_DOUBLE,
 86 |                        get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_q);
 87 |         MPI_Get(remote_sources->w, num_sources_on_proc[get_from], MPI_DOUBLE,
 88 |                        get_from, 0, num_sources_on_proc[get_from], MPI_DOUBLE, win_sources_w);
 89 | 
 90 |         MPI_Win_unlock(get_from, win_sources_x);
 91 |         MPI_Win_unlock(get_from, win_sources_y);
 92 |         MPI_Win_unlock(get_from, win_sources_z);
 93 |         MPI_Win_unlock(get_from, win_sources_q);
 94 |         MPI_Win_unlock(get_from, win_sources_w);
 95 |         
 96 |         MPI_Barrier(MPI_COMM_WORLD);
 97 | 
 98 |         STOP_TIMER(&time1);
 99 |         time_direct[0] += time1;
100 | 
101 | 
102 |         START_TIMER(&time1);
103 |         InteractionCompute_Direct(potential, remote_sources, targets, run_params);
104 | 
105 |         Particles_Free(&remote_sources);
106 |         STOP_TIMER(&time1);
107 |         time_direct[1] += time1;
108 |     }
109 | 
110 | 
111 |     START_TIMER(&time_direct[2]);
112 |     InteractionCompute_Direct(potential, sources, targets, run_params);
113 |     STOP_TIMER(&time_direct[2]);
114 | 
115 | 
116 |     START_TIMER(&time_direct[3]);
117 |     InteractionCompute_SubtractionPotentialCorrection(potential, targets, run_params);
118 |     STOP_TIMER(&time_direct[3]);
119 | 
120 |     return;
121 | 
122 | } /* END of function directdriver */
123 | 


--------------------------------------------------------------------------------
/src/drivers/directdriver.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_DIRECTDRIVER_H
 2 | #define H_DIRECTDRIVER_H
 3 | 
 4 | #include "../particles/struct_particles.h"
 5 | #include "../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void directdriver(struct Particles *sources, struct Particles *targets, struct RunParams *run_params,
 9 |                   double *potential_array, double *time_direct);
10 | 
11 | 
12 | #endif /* H_DIRECTDRIVER_H */
13 | 


--------------------------------------------------------------------------------
/src/drivers/treedriver.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_TREEDRIVER_H
 2 | #define H_TREEDRIVER_H
 3 | 
 4 | #include "../particles/struct_particles.h"
 5 | #include "../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void treedriver(struct Particles *sources, struct Particles *targets, struct RunParams *run_params,
 9 |                 double *potential_array, double *time_tree);
10 | 
11 | 
12 | #endif /* H_TREEDRIVER_H */
13 | 


--------------------------------------------------------------------------------
/src/interaction_compute/interaction_compute.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_INTERACTION_COMPUTE_H
 2 | #define H_INTERACTION_COMPUTE_H
 3 | 
 4 | #include "../tree/struct_tree.h"
 5 | #include "../particles/struct_particles.h"
 6 | #include "../clusters/struct_clusters.h"
 7 | #include "../run_params/struct_run_params.h"
 8 | #include "../interaction_lists/struct_interaction_lists.h"
 9 | 
10 | 
11 | void InteractionCompute_PC(double *potential, struct Tree *tree, struct Tree *batches,
12 |                            struct InteractionLists *interaction_list,
13 |                            struct Particles *sources, struct Particles *targets,
14 |                            struct Clusters *clusters, struct RunParams *run_params);
15 | 
16 | 
17 | void InteractionCompute_CP(double *potential, struct Tree *tree, struct Tree *batches,
18 |                            struct InteractionLists *interaction_list,
19 |                            struct Particles *sources, struct Particles *targets,
20 |                            struct Clusters *clusters, struct RunParams *run_params);
21 | 
22 | 
23 | void InteractionCompute_CC(double *potential, struct Tree *source_tree, struct Tree *target_tree,
24 |                            struct InteractionLists *interaction_list,
25 |                            struct Particles *sources, struct Particles *targets,
26 |                            struct Clusters *source_clusters, struct Clusters *target_clusters,
27 |                            struct RunParams *run_params);
28 | 
29 | 
30 | void InteractionCompute_Downpass(double *potential, struct Tree *tree,
31 |                            struct Particles *targets, struct Clusters *clusters,
32 |                            struct RunParams *run_params);
33 | 
34 | 
35 | void InteractionCompute_Direct(double *potential,
36 |                            struct Particles *sources, struct Particles *targets,
37 |                            struct RunParams *run_params);
38 | 
39 | 
40 | void InteractionCompute_SubtractionPotentialCorrection(double *potential, 
41 |                            struct Particles *targets, struct RunParams *run_params);
42 | 
43 | 
44 | #endif /* H_INTERACTION_COMPUTE_H */
45 | 


--------------------------------------------------------------------------------
/src/interaction_compute/interaction_compute_correction.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | #include <math.h>
 5 | #include <float.h>
 6 | 
 7 | #include "../particles/struct_particles.h"
 8 | #include "../run_params/struct_run_params.h"
 9 | 
10 | #include "../kernels/coulomb/coulomb.h"
11 | #include "../kernels/yukawa/yukawa.h"
12 | #include "../kernels/regularized-coulomb/regularized-coulomb.h"
13 | #include "../kernels/regularized-yukawa/regularized-yukawa.h"
14 | #include "../kernels/atan/atan.h"
15 | #include "../kernels/sin-over-r/sin-over-r.h"
16 | #include "../kernels/mq/mq.h"
17 | #include "../kernels/user_kernel/user_kernel.h"
18 | 
19 | #include "interaction_compute.h"
20 | 
21 | 
22 | void InteractionCompute_SubtractionPotentialCorrection(double *potential,
23 |                            struct Particles *targets, struct RunParams *run_params)
24 | {
25 |     int num_targets  = targets->num;
26 |     double *target_q = targets->q;
27 | 
28 |     if (run_params->singularity == SUBTRACTION) {
29 |         if (run_params->kernel == COULOMB) {
30 |             K_Coulomb_SS_Correction(potential, target_q, num_targets, run_params);
31 | 
32 |         } else if (run_params->kernel == REGULARIZED_COULOMB) {
33 |             K_RegularizedCoulomb_SS_Correction(potential, target_q, num_targets, run_params);
34 | 
35 |         } else if (run_params->kernel == YUKAWA) {
36 |             K_Yukawa_SS_Correction(potential, target_q, num_targets, run_params);
37 | 
38 |         } else if (run_params->kernel == REGULARIZED_YUKAWA) {
39 |             K_RegularizedYukawa_SS_Correction(potential, target_q, num_targets, run_params);
40 | 
41 |         }
42 |     }
43 | 
44 |     return;
45 | }
46 | 


--------------------------------------------------------------------------------
/src/interaction_lists/interaction_lists.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_INTERACTION_LISTS_H
 2 | #define H_INTERACTION_LISTS_H
 3 | 
 4 | #include "../tree/struct_tree.h"
 5 | #include "../run_params/struct_run_params.h"
 6 | #include "struct_interaction_lists.h"
 7 | 
 8 | 
 9 | void InteractionLists_Make(struct InteractionLists **interaction_list_addr,
10 |                           const struct Tree *source_tree, const struct Tree *target_tree,
11 |                           const struct RunParams *run_params);
12 |                           
13 | void InteractionLists_Free(struct InteractionLists **interaction_list_addr);
14 | 
15 | void InteractionLists_MakeRemote(const struct Tree *source_tree, const struct Tree *target_tree,
16 |                           int *approx_list_packed, int *approx_list_unpacked, int *direct_list,
17 |                           const struct RunParams *run_params);
18 | 
19 | 
20 | #endif /* H_INTERACTION_LISTS_H */
21 | 


--------------------------------------------------------------------------------
/src/interaction_lists/struct_interaction_lists.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_STRUCT_INTERACTION_LISTS_H
 2 | #define H_STRUCT_INTERACTION_LISTS_H
 3 | 
 4 | 
 5 | struct InteractionLists
 6 | {
 7 |     int *num_pp;
 8 |     int *num_cc;
 9 |     int *num_pc;
10 |     int *num_cp;
11 |     
12 |     int **pp_interactions;
13 |     int **cc_interactions;
14 |     int **pc_interactions;
15 |     int **cp_interactions;
16 | };
17 | 
18 | 
19 | #endif /* H_STRUCT_INTERACTION_LISTS_H */
20 | 


--------------------------------------------------------------------------------
/src/interface/BaryTreeInterface.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <math.h>
 3 | #include <mpi.h>
 4 | #include <limits.h>
 5 | #include <string.h>
 6 | 
 7 | 
 8 | #include "../particles/struct_particles.h"
 9 | #include "../particles/particles.h"
10 | 
11 | #include "../run_params/struct_run_params.h"
12 | #include "../run_params/run_params.h"
13 | 
14 | #include "../drivers/treedriver.h"
15 | #include "BaryTreeInterface.h"
16 | 
17 | 
18 | void BaryTreeInterface(int numTargets, int numSources,
19 | 		double *targetX, double *targetY, double *targetZ, double *targetValue,
20 | 		double *sourceX, double *sourceY, double *sourceZ, double *sourceValue, double *sourceWeight,
21 | 		double *outputArray,
22 |         KERNEL kernel, int numKernelParams, double *kernelParams,
23 |         SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type,
24 | 		double theta, int interpDegree, int maxPerSourceLeaf, int maxPerTargetLeaf,
25 |         double sizeCheck, double beta, int verbosity)
26 | {
27 | 
28 | 	double timing[12];
29 |     memset(outputArray, 0, numTargets * sizeof(double));
30 | 
31 |     struct RunParams *run_params = NULL;
32 |     RunParams_Setup(&run_params,
33 |                     kernel, numKernelParams, kernelParams,
34 |                     approximation, singularity, compute_type,
35 |                     theta, interpDegree,
36 |                     maxPerSourceLeaf, maxPerTargetLeaf, sizeCheck,
37 |                     beta, verbosity);
38 | 
39 | 	struct Particles sources, targets;
40 | 
41 | 	targets.num = numTargets;
42 | 	targets.x = targetX;
43 | 	targets.y = targetY;
44 | 	targets.z = targetZ;
45 | 	targets.q = targetValue;
46 | 
47 | 	sources.num = numSources;
48 | 	sources.x = sourceX;
49 | 	sources.y = sourceY;
50 | 	sources.z = sourceZ;
51 | 	sources.q = sourceValue;
52 | 	sources.w = sourceWeight;
53 | 
54 | 
55 | 	treedriver(&sources, &targets, run_params, outputArray, timing);
56 | 	MPI_Barrier(MPI_COMM_WORLD);
57 | 
58 | 
59 |     RunParams_Free(&run_params);
60 | 
61 | 	return;
62 | }
63 | 


--------------------------------------------------------------------------------
/src/interface/BaryTreeInterface.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_BARYTREE_INTERFACE_H
 2 | #define H_BARYTREE_INTERFACE_H
 3 | 
 4 |     #ifndef H_BARYTREE_TYPES_H
 5 |     #define H_BARYTREE_TYPES_H
 6 |     
 7 |     typedef enum KERNEL
 8 |     {
 9 |         NO_KERNEL,
10 |         COULOMB,
11 |         YUKAWA,
12 |         REGULARIZED_COULOMB,
13 |         REGULARIZED_YUKAWA,
14 |         ATAN,
15 |         TCF,
16 |         DCF,
17 |         SIN_OVER_R,
18 |         MQ,
19 |         RBS_U,
20 |         RBS_V,
21 |         USER
22 |     } KERNEL;
23 |     
24 |     
25 |     typedef enum SINGULARITY
26 |     {
27 |         NO_SINGULARITY,
28 |         SKIPPING,
29 |         SUBTRACTION
30 |     } SINGULARITY;
31 |     
32 |     
33 |     typedef enum APPROXIMATION
34 |     {
35 |         NO_APPROX,
36 |         LAGRANGE,
37 |         HERMITE
38 |     } APPROXIMATION;
39 |     
40 |     
41 |     typedef enum COMPUTE_TYPE
42 |     {
43 |         NO_COMPUTE_TYPE,
44 |         PARTICLE_CLUSTER,
45 |         CLUSTER_PARTICLE,
46 |         CLUSTER_CLUSTER,
47 |     } COMPUTE_TYPE;
48 |     
49 |     
50 |     #endif /* H_BARYTREE_TYPES_H */
51 | 
52 | void BaryTreeInterface(int numTargets, int numSources,
53 | 		double *targetX, double *targetY, double *targetZ, double *targetValue,
54 | 		double *sourceX, double *sourceY, double *sourceZ, double *sourceValue, double *sourceWeight,
55 | 		double *outputArray,
56 |         KERNEL kernel, int numKernelParams, double *kernelParams,
57 |         SINGULARITY singularity, APPROXIMATION approximation, COMPUTE_TYPE compute_type,
58 | 		double theta, int interpOrder, int maxPerSourceLeaf, int maxPerTargetLeaf,
59 |         double sizeCheck, double beta, int verbosity);
60 | 
61 | 
62 | #endif /* H_BARYTREE_INTERFACE_H */
63 | 


--------------------------------------------------------------------------------
/src/kernels/README.md:
--------------------------------------------------------------------------------
 1 | Adding New Kernels
 2 | ------------------
 3 | 
 4 | Steps for adding a new kernel named `custom-kernel` with support for particle-cluster are described below. Adding support for cluster-cluster and cluster-particle follows similarly. Consult existing kernel directories for more details.
 5 | 
 6 | 1. Add the name of the new kernel to the end of the KERNEL enum in `src/utilities/enums.h`. If you plan to use the Python wrapper, add it to the Kernel class in `interfaces/python/BaryTreeInterface.py` as well.
 7 | 
 8 | 2. Create a new directory `custom-kernel` in `src/kernels/`.
 9 | 
10 | 3. Create new source files (`custom-kernel_direct.c`, `custom-kernel_direct.h`, `custom-kernel_pc.c`, `custom-kernel_pc.h`) in `src/kernels/custom-kernel` containing three batch-cluster interaction functions: 
11 | 	- `K_CustomKernel_Direct( )`
12 | 	- `K_CustomKernel_PC_Lagrange( )`
13 | 	- `K_CustomKernel_PC_Lagrange( )`
14 | 	
15 | 4. Create a new source file `custom-kernel.h` in `src/kernels/custom-kernel/`. `#include` in this file all other headers associated with this kernel (`custom-kernel_direct.h` and `custom-kernel_pc.h`).
16 | 
17 | 5. Edit `interaction_compute_pc.c`:
18 | 	1. Include `custom-kernel.h` in `interaction_compute_pc.c` (`#include "kernels/custom_kernel.h"`).
19 | 	2. Add your custom kernel in several places, following the format for the already-present kernels, like `if (run_params->kernel == KERNEL_NAME)`:
20 | 		- In the POTENTIAL FROM APPROX subsection, add your Lagrange and/or Hermite kernels
21 | 		- In the POTENTIAL FROM DIRECT subsection, add your direct interaction kernel
22 | 			
23 | 6. Edit `interaction_compute_direct.c`:
24 | 	1. Include `custom-kernel.h` in `interaction_compute_pc.c` (`#include "kernels/custom_kernel.h"`).
25 | 	2. Add your direct interaction kernel, following the format for the already-present kernels.	
26 | 
27 | 7. Add your files to `src/CMakeLists.txt`:
28 | 	1. Add `custom-kernel_direct.c`, `custom-kernel_direct.h`, `custom-kernel_pc.c`, `custom-kernel_pc.h`, and `kernels/custom_kernel.h` to a new `SRCS_K_CUSTOM_KERNEL` list, following the format of the other kernels.
29 | 	2. Add `${SRCS_K_CUSTOM_KERNEL}` to the `SRCS_KERNELS` list.
30 | 


--------------------------------------------------------------------------------
/src/kernels/atan/atan.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_ATAN_H
 3 | #define H_K_ATAN_H
 4 |  
 5 | 
 6 | #include "atan_pp.h"
 7 | #include "atan_pc.h"
 8 | 
 9 | #endif /* H_K_ATAN_PC_H */
10 | 


--------------------------------------------------------------------------------
/src/kernels/atan/atan_pc.c:
--------------------------------------------------------------------------------
 1 | #ifdef OPENACC_ENABLED
 2 |     #include <accelmath.h>
 3 |     #define M_PI 3.14159265358979323846264338327950288
 4 | #else
 5 |     #include <math.h>
 6 | #endif
 7 | #include <stdio.h>
 8 | 
 9 | #include "../../run_params/struct_run_params.h"
10 | #include "atan_pc.h"
11 | 
12 | 
13 | void K_Atan_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
14 |         int starting_index_of_target, int starting_index_of_cluster,
15 |         double *target_x, double *target_y, double *target_z,
16 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
17 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
18 | {
19 | 
20 |     double domainLength = run_params->kernel_params[0];
21 |     double delta = run_params->kernel_params[1];
22 |     double wadj = 1. / (1. - delta / sqrt(1. + delta * delta));
23 |     double delta_factor = sqrt(1. + 1.0 / (delta * delta));
24 | 
25 | #ifdef OPENACC_ENABLED
26 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
27 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
28 |     {
29 | #endif
30 | #ifdef OPENACC_ENABLED
31 |     #pragma acc loop independent
32 | #endif
33 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
34 | 
35 |         int ii = starting_index_of_target + i;
36 |         double temporary_potential = 0.0;
37 |         double tz = target_z[ii];
38 | 
39 | #ifdef OPENACC_ENABLED
40 |         #pragma acc loop independent reduction(+:temporary_potential)
41 | #endif
42 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
43 | 
44 |             int jj = starting_index_of_cluster + j;
45 |             double dz = (tz - cluster_z[jj]) / domainLength;
46 |             if (dz < -0.5) {
47 |                 dz += 1.0;
48 |             }
49 |             if (dz > 0.5) {
50 |                 dz -= 1.0;
51 |             }
52 |             temporary_potential += cluster_charge[jj]
53 |                                 * (1.0 / M_PI * atan(delta_factor * tan(M_PI * dz)) - dz);
54 |         } // end loop over interpolation points
55 | #ifdef OPENACC_ENABLED
56 |         #pragma acc atomic
57 | #endif
58 |         potential[ii] += wadj * temporary_potential;
59 |     }
60 | #ifdef OPENACC_ENABLED
61 |     } // end kernel
62 | #endif
63 |     return;
64 | }
65 | 
66 | 
67 | 
68 | 
69 | void K_Atan_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
70 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
71 |         double *target_x, double *target_y, double *target_z,
72 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
73 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
74 | {
75 |     printf("[BaryTree] ERROR! ATAN KERNEL NOT IMPLEMENTED FOR HERMITE. Exiting.\n");
76 |     return;
77 | }
78 | 


--------------------------------------------------------------------------------
/src/kernels/atan/atan_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_ATAN_PC_H
 3 | #define H_K_ATAN_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Atan_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_Atan_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_ATAN_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/atan/atan_pp.c:
--------------------------------------------------------------------------------
 1 | #ifdef OPENACC_ENABLED
 2 |     #include <accelmath.h>
 3 |     #define M_PI 3.14159265358979323846264338327950288
 4 | #else
 5 |     #include <math.h>
 6 | #endif
 7 | #include <stdio.h>
 8 | 
 9 | #include "../../run_params/struct_run_params.h"
10 | #include "atan_pp.h"
11 | 
12 | 
13 | void K_Atan_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
14 |         int starting_index_of_target, int starting_index_of_source,
15 |         double *target_x, double *target_y, double *target_z,
16 |         double *source_x, double *source_y, double *source_z, double *source_charge,
17 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
18 | {
19 | 
20 |     double domainLength = run_params->kernel_params[0];
21 |     double delta = run_params->kernel_params[1];
22 |     double wadj = 1. / (1. - delta / sqrt(1. + delta * delta));
23 |     double delta_factor = sqrt(1. + 1.0 / (delta * delta));
24 | 
25 | #ifdef OPENACC_ENABLED
26 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
27 |                         source_x, source_y, source_z, source_charge, potential)
28 |     {
29 | #endif
30 | #ifdef OPENACC_ENABLED
31 |     #pragma acc loop independent
32 | #endif
33 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
34 | 
35 |         int ii = starting_index_of_target + i;
36 |         double temporary_potential = 0.0;
37 |         double tz = target_z[ii];
38 | 
39 | #ifdef OPENACC_ENABLED
40 |         #pragma acc loop independent reduction(+:temporary_potential)
41 | #endif
42 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
43 |             int jj = starting_index_of_source + j;
44 |             double dz = (tz - source_z[jj]) / domainLength;
45 | 
46 |             if (dz < -0.5) {
47 |                 dz += 1.0;
48 |             }
49 |             if (dz > 0.5) {
50 |                 dz -= 1.0;
51 |             }
52 |             temporary_potential += source_charge[jj]
53 |           			* (1.0 / M_PI * atan(delta_factor * tan(M_PI * dz)) - dz);
54 |         } // end loop over interpolation points
55 | #ifdef OPENACC_ENABLED
56 |         #pragma acc atomic
57 | #endif
58 |         potential[ii] += wadj * temporary_potential;
59 |     }
60 | #ifdef OPENACC_ENABLED
61 |     } // end kernel
62 | #endif
63 |     return;
64 | }
65 | 


--------------------------------------------------------------------------------
/src/kernels/atan/atan_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_ATAN_PP_H
 3 | #define H_K_ATAN_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Atan_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_ATAN_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_H
 3 | #define H_K_COULOMB_H
 4 |  
 5 | #include "coulomb_pp.h"
 6 | #include "coulomb_pc.h"
 7 | #include "coulomb_cp.h"
 8 | #include "coulomb_cc_hermite.h"
 9 | 
10 | #include "coulomb_ss_pp.h"
11 | #include "coulomb_ss_correction.h"
12 | #include "coulomb_ss_pc.h"
13 | #include "coulomb_ss_cp.h"
14 | #include "coulomb_ss_cc.h"
15 | 
16 | 
17 | 
18 | #endif /* H_K_COULOMB_H */
19 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_cc_hermite.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_CC_HERMITE_H
 3 | #define H_K_COULOMB_CC_HERMITE_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_CC_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_source_cluster, int starting_index_of_target_cluster,
10 |         double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q,
11 |         double *source_cluster_w,
12 |         double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q,
13 |         struct RunParams *run_params, int gpu_async_stream_id);
14 | 
15 | 
16 | #endif /* H_K_COULOMB_CC_HERMITE_H */
17 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_CP_H
 3 | #define H_K_COULOMB_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | void K_Coulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_sources, int starting_index_of_cluster,
16 |         double *source_x, double *source_y, double *source_z, double *source_q,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_COULOMB_CP_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_pc.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <float.h>
  3 | #include <stdio.h>
  4 | 
  5 | #include "../../run_params/struct_run_params.h"
  6 | #include "coulomb_pc.h"
  7 | 
  8 | 
  9 | void K_Coulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 10 |          int starting_index_of_target, int starting_index_of_cluster,
 11 |          double *target_x, double *target_y, double *target_z,
 12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
 13 |          struct RunParams *run_params, double *potential, int gpu_async_stream_id)
 14 | {
 15 | 
 16 | #ifdef OPENACC_ENABLED
 17 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
 18 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
 19 |     {
 20 | #endif
 21 | #ifdef OPENACC_ENABLED
 22 |     #pragma acc loop independent
 23 | #endif	
 24 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
 25 | 
 26 |         double temporary_potential = 0.0;
 27 | 
 28 |         double tx = target_x[starting_index_of_target + i];
 29 |         double ty = target_y[starting_index_of_target + i];
 30 |         double tz = target_z[starting_index_of_target + i];
 31 | 
 32 | #ifdef OPENACC_ENABLED
 33 |         #pragma acc loop independent reduction(+:temporary_potential)
 34 | #endif
 35 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
 36 | #ifdef OPENACC_ENABLED
 37 |             #pragma acc cache(cluster_x[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster], \
 38 |                               cluster_y[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster], \
 39 |                               cluster_z[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster], \
 40 |                               cluster_charge[starting_index_of_cluster : starting_index_of_cluster+number_of_interpolation_points_in_cluster])
 41 | #endif
 42 | 
 43 |             int jj = starting_index_of_cluster + j;
 44 |             double dx = tx - cluster_x[jj];
 45 |             double dy = ty - cluster_y[jj];
 46 |             double dz = tz - cluster_z[jj];
 47 |             double r2  = dx*dx + dy*dy + dz*dz;
 48 | 
 49 |             temporary_potential += cluster_charge[starting_index_of_cluster + j] / sqrt(r2);
 50 | 
 51 |         } // end loop over interpolation points
 52 | #ifdef OPENACC_ENABLED
 53 |         #pragma acc atomic
 54 | #endif
 55 |         potential[starting_index_of_target + i] += temporary_potential;
 56 |     }
 57 | #ifdef OPENACC_ENABLED
 58 |     } // end kernel
 59 | #endif
 60 |     return;
 61 | }
 62 | 
 63 | 
 64 | 
 65 | 
 66 | void K_Coulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 67 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
 68 |         double *target_x, double *target_y, double *target_z,
 69 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
 70 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
 71 | {
 72 | 
 73 |     // total_number_interpolation_points is the stride, separating clustersQ, clustersQx, clustersQy, etc.
 74 |     double *cluster_charge_          = &cluster_charge[8*starting_index_of_cluster + 0*number_of_interpolation_points_in_cluster];
 75 |     double *cluster_charge_delta_x   = &cluster_charge[8*starting_index_of_cluster + 1*number_of_interpolation_points_in_cluster];
 76 |     double *cluster_charge_delta_y   = &cluster_charge[8*starting_index_of_cluster + 2*number_of_interpolation_points_in_cluster];
 77 |     double *cluster_charge_delta_z   = &cluster_charge[8*starting_index_of_cluster + 3*number_of_interpolation_points_in_cluster];
 78 |     double *cluster_charge_delta_xy  = &cluster_charge[8*starting_index_of_cluster + 4*number_of_interpolation_points_in_cluster];
 79 |     double *cluster_charge_delta_yz  = &cluster_charge[8*starting_index_of_cluster + 5*number_of_interpolation_points_in_cluster];
 80 |     double *cluster_charge_delta_xz  = &cluster_charge[8*starting_index_of_cluster + 6*number_of_interpolation_points_in_cluster];
 81 |     double *cluster_charge_delta_xyz = &cluster_charge[8*starting_index_of_cluster + 7*number_of_interpolation_points_in_cluster];
 82 | 
 83 | 
 84 | 
 85 | #ifdef OPENACC_ENABLED
 86 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
 87 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential, \
 88 |                         cluster_charge_, cluster_charge_delta_x, cluster_charge_delta_y, cluster_charge_delta_z, \
 89 |                         cluster_charge_delta_xy, cluster_charge_delta_yz, cluster_charge_delta_xz, \
 90 |                         cluster_charge_delta_xyz)
 91 |     {
 92 | #endif
 93 | #ifdef OPENACC_ENABLED
 94 |     #pragma acc loop independent
 95 | #endif
 96 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
 97 | 
 98 |         int ii = starting_index_of_target + i;
 99 |         double temporary_potential = 0.0;
100 | 
101 |         double tx = target_x[ii];
102 |         double ty = target_y[ii];
103 |         double tz = target_z[ii];
104 | 
105 | #ifdef OPENACC_ENABLED
106 |         #pragma acc loop independent reduction(+:temporary_potential)
107 | #endif
108 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
109 | 
110 |             int jj = starting_index_of_cluster + j;
111 |             double dx = tx - cluster_x[jj];
112 |             double dy = ty - cluster_y[jj];
113 |             double dz = tz - cluster_z[jj];
114 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
115 | 
116 |             double rinv  = 1 / r;
117 |             double r3inv = rinv*rinv*rinv;
118 |             double r5inv = r3inv*rinv*rinv;
119 |             double r7inv = r5inv*rinv*rinv;
120 | 
121 |             temporary_potential +=  rinv  * (cluster_charge_[j])
122 |                              +      r3inv * (cluster_charge_delta_x[j]*dx + cluster_charge_delta_y[j]*dy
123 |                                            + cluster_charge_delta_z[j]*dz)
124 |                              +  3 * r5inv * (cluster_charge_delta_xy[j]*dx*dy + cluster_charge_delta_yz[j]*dy*dz
125 |                                            + cluster_charge_delta_xz[j]*dx*dz)
126 |                              + 15 * r7inv *  cluster_charge_delta_xyz[j]*dx*dy*dz;
127 | 
128 |         } // end loop over interpolation points
129 | #ifdef OPENACC_ENABLED
130 |         #pragma acc atomic
131 | #endif
132 |         potential[starting_index_of_target + i] += temporary_potential;
133 |     }
134 | #ifdef OPENACC_ENABLED
135 |     } // end kernel
136 | #endif
137 |     return;
138 | }
139 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_PC_H
 3 | #define H_K_COULOMB_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_Coulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_COULOMB_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "coulomb_pp.h"
 7 | 
 8 | void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_source,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
13 | {
14 | 
15 | #ifdef OPENACC_ENABLED
16 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
17 |                         source_x, source_y, source_z, source_charge, potential)
18 |     {
19 |     #pragma acc loop independent
20 | #endif
21 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
22 | 
23 |         int ii = starting_index_of_target + i;
24 |         double temporary_potential = 0.0;
25 | 
26 |         double tx = target_x[ii];
27 |         double ty = target_y[ii];
28 |         double tz = target_z[ii];
29 | 
30 | #ifdef OPENACC_ENABLED
31 |         #pragma acc loop independent reduction(+:temporary_potential)
32 | #endif
33 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
34 | #ifdef OPENACC_ENABLED
35 |             #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \
36 |                               source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \
37 |                               source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \
38 |                               source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster])
39 | #endif
40 | 
41 | 
42 |             int jj = starting_index_of_source + j;
43 |             double dx = tx - source_x[jj];
44 |             double dy = ty - source_y[jj];
45 |             double dz = tz - source_z[jj];
46 |             double r2  = dx*dx + dy*dy + dz*dz;
47 | 
48 |             if (r2 > DBL_MIN) {
49 |                 temporary_potential += source_charge[jj] / sqrt(r2);
50 |             }
51 |         } // end loop over interpolation points
52 | #ifdef OPENACC_ENABLED
53 |         #pragma acc atomic
54 | #endif
55 |         potential[ii] += temporary_potential;
56 |     }
57 | #ifdef OPENACC_ENABLED
58 |     } // end kernel
59 | #endif
60 |     return;
61 | }
62 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_PP_H
 3 | #define H_K_COULOMB_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_COULOMB_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_cc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "coulomb_ss_cc.h"
 7 | 
 8 | 
 9 | void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_sources, int starting_index_of_cluster,
11 |          double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w,
12 |          double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, double *target_cluster_w,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter = run_params->kernel_params[0];
17 |     double kernel_parameter2 = kernel_parameter * kernel_parameter;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, source_cluster_w, \
21 |                         target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, target_cluster_w)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif	
27 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 |         double temporary_weight    = 0.0;
31 | 
32 |         double cx = target_cluster_x[starting_index_of_cluster + i];
33 |         double cy = target_cluster_y[starting_index_of_cluster + i];
34 |         double cz = target_cluster_z[starting_index_of_cluster + i];
35 | 
36 | #ifdef OPENACC_ENABLED
37 |         #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight)
38 | #endif
39 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
40 | #ifdef OPENACC_ENABLED
41 |             #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
42 |                               source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
43 |                               source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
44 |                               source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
45 |                               source_cluster_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch])
46 | #endif
47 | 
48 |             int jj = starting_index_of_sources + j;
49 |             double dx = cx - source_cluster_x[jj];
50 |             double dy = cy - source_cluster_y[jj];
51 |             double dz = cz - source_cluster_z[jj];
52 |             double r = sqrt(dx*dx + dy*dy + dz*dz);
53 | 
54 |             temporary_potential +=                               source_cluster_q[jj] / r;
55 |             temporary_weight    += exp(-r*r/kernel_parameter2) * source_cluster_w[jj] / r;
56 | 
57 |         } // end loop over interpolation points
58 | #ifdef OPENACC_ENABLED
59 |         #pragma acc atomic
60 | #endif
61 |         target_cluster_q[starting_index_of_cluster + i] += temporary_potential;
62 | #ifdef OPENACC_ENABLED
63 |         #pragma acc atomic
64 | #endif
65 |         target_cluster_w[starting_index_of_cluster + i] += temporary_weight;
66 |     }
67 | #ifdef OPENACC_ENABLED
68 |     } // end kernel
69 | #endif
70 |     return;
71 | }
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_cc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_SS_CC_H
 3 | #define H_K_COULOMB_SS_CC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w,
11 |         double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_charge, double *target_cluster_weight,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_COULOMB_SS_CC_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_correction.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "coulomb_ss_correction.h"
 7 | 
 8 | 
 9 | void K_Coulomb_SS_Correction(double *potential, double *target_q,
10 |                              int numTargets, struct RunParams *run_params)
11 | {
12 |     double kernel_parameter = run_params->kernel_params[0];
13 |     double param = 2.0 * M_PI * kernel_parameter * kernel_parameter;
14 |     for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i];
15 | 
16 |     return;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_correction.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_SS_CORRECTION_H
 3 | #define H_K_COULOMB_SS_CORRECTION_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_SS_Correction(double *potential, double *target_q,
 9 |         int numTargets, struct RunParams *run_params);
10 | 
11 | 
12 | #endif /* H_K_COULOMB_SS_CORRECTION_H */
13 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_cp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "coulomb_ss_cp.h"
 7 | 
 8 | 
 9 | void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_sources, int starting_index_of_cluster,
11 |          double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter = run_params->kernel_params[0];
17 |     double kernel_parameter2 = kernel_parameter * kernel_parameter;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, source_w, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_q, cluster_w)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif	
27 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 |         double temporary_weight    = 0.0;
31 | 
32 |         double cx = cluster_x[starting_index_of_cluster + i];
33 |         double cy = cluster_y[starting_index_of_cluster + i];
34 |         double cz = cluster_z[starting_index_of_cluster + i];
35 | 
36 | #ifdef OPENACC_ENABLED
37 |         #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight)
38 | #endif
39 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
40 | #ifdef OPENACC_ENABLED
41 |             #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
42 |                               source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
43 |                               source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
44 |                               source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
45 |                               source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch])
46 | #endif
47 | 
48 |             int jj = starting_index_of_sources + j;
49 |             double dx = cx - source_x[jj];
50 |             double dy = cy - source_y[jj];
51 |             double dz = cz - source_z[jj];
52 |             double r = sqrt(dx*dx + dy*dy + dz*dz);
53 | 
54 |             temporary_potential +=                source_q[jj] * source_w[jj] / r;
55 |             temporary_weight    += exp(-r*r/kernel_parameter2) * source_w[jj] / r;
56 | 
57 |         } // end loop over interpolation points
58 | #ifdef OPENACC_ENABLED
59 |         #pragma acc atomic
60 | #endif
61 |         cluster_q[starting_index_of_cluster + i] += temporary_potential;
62 | #ifdef OPENACC_ENABLED
63 |         #pragma acc atomic
64 | #endif
65 |         cluster_w[starting_index_of_cluster + i] += temporary_weight;
66 |     }
67 | #ifdef OPENACC_ENABLED
68 |     } // end kernel
69 | #endif
70 |     return;
71 | }
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_SS_CP_H
 3 | #define H_K_COULOMB_SS_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | 
16 | #endif /* H_K_COULOMB_SS_CP_H */
17 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_SS_PC_H
 3 | #define H_K_COULOMB_SS_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_Coulomb_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster, 
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z, double *target_charge,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_COULOMB_SS_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_pp.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <float.h>
  3 | #include <stdio.h>
  4 | 
  5 | #include "../../run_params/struct_run_params.h"
  6 | #include "coulomb_ss_pp.h"
  7 | 
  8 | void K_Coulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
  9 |         int starting_index_of_target, int starting_index_of_source,
 10 |         double *target_x, double *target_y, double *target_z, double *target_charge,
 11 |         double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight,
 12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
 13 | {
 14 |     double kernel_parameter = run_params->kernel_params[0];
 15 |     double kernel_parameter2 = kernel_parameter * kernel_parameter;
 16 | 
 17 | #ifdef OPENACC_ENABLED
 18 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \
 19 |                         source_x, source_y, source_z, source_charge, source_weight, potential)
 20 |     {
 21 | #endif
 22 | #ifdef OPENACC_ENABLED
 23 |     #pragma acc loop independent
 24 | #endif
 25 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
 26 | 
 27 |         int ii = starting_index_of_target + i;
 28 |         double temporary_potential = 0.0;
 29 | 
 30 |         double tx = target_x[ii];
 31 |         double ty = target_y[ii];
 32 |         double tz = target_z[ii];
 33 |         double tq = target_charge[ii];
 34 | 
 35 | #ifdef OPENACC_ENABLED
 36 |         #pragma acc loop independent reduction(+:temporary_potential)
 37 | #endif
 38 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
 39 | 
 40 |             int jj = starting_index_of_source + j;
 41 |             double dx = tx - source_x[jj];
 42 |             double dy = ty - source_y[jj];
 43 |             double dz = tz - source_z[jj];
 44 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
 45 | 
 46 |             if (r > DBL_MIN){
 47 |                 temporary_potential += (source_charge[jj] - tq * exp(-r*r/kernel_parameter2))
 48 |                                       * source_weight[jj] / r;
 49 |             }
 50 |         } // end loop over interpolation points
 51 | #ifdef OPENACC_ENABLED
 52 |         #pragma acc atomic
 53 | #endif
 54 |         potential[ii] += temporary_potential;
 55 |     }
 56 | #ifdef OPENACC_ENABLED
 57 |     } // end kernel
 58 | #endif
 59 |     return;
 60 | }
 61 | 
 62 | 
 63 | void coulombSingularitySubtractionApproximationLagrange(int number_of_targets_in_batch,
 64 |         int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster,
 65 |         double *target_x, double *target_y, double *target_z, double *target_charge,
 66 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight,
 67 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
 68 | {
 69 |     double kernel_parameter = run_params->kernel_params[0];
 70 |     double kernel_parameter2 = kernel_parameter * kernel_parameter;
 71 | 
 72 | #ifdef OPENACC_ENABLED
 73 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \
 74 |                         cluster_x, cluster_y, cluster_z, cluster_charge, cluster_weight, potential)
 75 |     {
 76 | #endif
 77 | #ifdef OPENACC_ENABLED
 78 |     #pragma acc loop independent
 79 | #endif
 80 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
 81 | 
 82 |         int ii = starting_index_of_target + i;
 83 |         double temporary_potential = 0.0;
 84 | 
 85 |         double tx = target_x[ii];
 86 |         double ty = target_y[ii];
 87 |         double tz = target_z[ii];
 88 |         double tq = target_charge[ii];
 89 | 
 90 | #ifdef OPENACC_ENABLED
 91 |         #pragma acc loop independent reduction(+:temporary_potential)
 92 | #endif
 93 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
 94 | 
 95 |             int jj = starting_index_of_cluster + j;
 96 |             double dx = tx - cluster_x[jj];
 97 |             double dy = ty - cluster_y[jj];
 98 |             double dz = tz - cluster_z[jj];
 99 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
100 | 
101 |             if (r > DBL_MIN) {
102 |                 temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj] * exp(-r*r/kernel_parameter2)) / r;
103 |             }
104 |         } // end loop over interpolation points
105 | #ifdef OPENACC_ENABLED
106 |         #pragma acc atomic
107 | #endif
108 |         potential[ii] += temporary_potential;
109 |     }
110 | #ifdef OPENACC_ENABLED
111 |     } // end kernel
112 | #endif
113 |     return;
114 | }
115 | 


--------------------------------------------------------------------------------
/src/kernels/coulomb/coulomb_ss_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_COULOMB_SS_PP_H
 3 | #define H_K_COULOMB_SS_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Coulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_source,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_COULOMB_SS_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/dcf/dcf.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_DCF_H
 3 | #define H_DCF_H
 4 |  
 5 | 
 6 | void dcfDirect(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 7 |         int starting_index_of_target, int starting_index_of_cluster,
 8 |         double *target_x, double *target_y, double *target_z,
 9 |         double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
10 |         double kernel_parameter, double *potential, int gpu_async_stream_id);
11 | 
12 | void dcfApproximationLagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
13 |         int starting_index_of_target, int starting_index_of_cluster,
14 |         double *target_x, double *target_y, double *target_z,
15 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
16 |         double kernel_parameter, double *potential, int gpu_async_stream_id);
17 | 
18 | void dcfApproximationHermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
19 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
20 |         double *target_x, double *target_y, double *target_z,
21 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
22 |         double kernel_parameter, double *potential, int gpu_async_stream_id);
23 | 
24 | #endif /* H_DCF_H */
25 | 


--------------------------------------------------------------------------------
/src/kernels/mq/mq.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_MQ_H
 3 | #define H_K_MQ_H
 4 |  
 5 | 
 6 | #include "mq_pp.h"
 7 | #include "mq_pc.h"
 8 | 
 9 | #endif /* H_K_MQ_PC_H */
10 | 


--------------------------------------------------------------------------------
/src/kernels/mq/mq_pc.c:
--------------------------------------------------------------------------------
 1 | #ifdef OPENACC_ENABLED
 2 |     #include <accelmath.h>
 3 |     #define M_PI 3.14159265358979323846264338327950288
 4 | #else
 5 |     #include <math.h>
 6 | #endif
 7 | #include <stdio.h>
 8 | 
 9 | #include "../../run_params/struct_run_params.h"
10 | #include "mq_pc.h"
11 | 
12 | 
13 | void K_MQ_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
14 |         int starting_index_of_target, int starting_index_of_cluster,
15 |         double *target_x, double *target_y, double *target_z,
16 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
17 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
18 | {
19 | 
20 |     double domainLength = run_params->kernel_params[0];
21 |     double delta = run_params->kernel_params[1];
22 |     double deltaLsq = delta * delta / domainLength / domainLength;
23 |     double norm_delta_L = sqrt(1 + 4 * deltaLsq);
24 | 
25 | #ifdef OPENACC_ENABLED
26 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
27 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
28 |     {
29 | #endif
30 | #ifdef OPENACC_ENABLED
31 |     #pragma acc loop independent
32 | #endif
33 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
34 | 
35 |         int ii = starting_index_of_target + i;
36 |         double temporary_potential = 0.0;
37 |         double tz = target_z[ii];
38 | 
39 | #ifdef OPENACC_ENABLED
40 |         #pragma acc loop independent reduction(+:temporary_potential)
41 | #endif
42 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
43 | 
44 |             int jj = starting_index_of_cluster + j;
45 |             double dz = (tz - cluster_z[jj]) / domainLength;
46 | 
47 |             if (dz < -0.5) {
48 |                 dz += 1.0;
49 |             }
50 |             if (dz > 0.5) {
51 |                 dz -= 1.0;
52 |             }
53 | 
54 |             temporary_potential += cluster_charge[jj]
55 |                                 * (.5 * dz * norm_delta_L / sqrt(dz * dz + deltaLsq) - dz);
56 |         } // end loop over interpolation points
57 | #ifdef OPENACC_ENABLED
58 |         #pragma acc atomic
59 | #endif
60 |         potential[ii] += temporary_potential;
61 |     }
62 | #ifdef OPENACC_ENABLED
63 |     } // end kernel
64 | #endif
65 |     return;
66 | }
67 | 
68 | 
69 | 
70 | 
71 | void K_MQ_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
72 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
73 |         double *target_x, double *target_y, double *target_z,
74 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
75 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
76 | {
77 |     printf("[BaryTree] ERROR! MQ KERNEL NOT IMPLEMENTED FOR HERMITE. Exiting.\n");
78 |     return;
79 | }
80 | 


--------------------------------------------------------------------------------
/src/kernels/mq/mq_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_MQ_PC_H
 3 | #define H_K_MQ_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_MQ_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_MQ_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_MQ_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/mq/mq_pp.c:
--------------------------------------------------------------------------------
 1 | #ifdef OPENACC_ENABLED
 2 |     #include <accelmath.h>
 3 |     #define M_PI 3.14159265358979323846264338327950288
 4 | #else
 5 |     #include <math.h>
 6 | #endif
 7 | #include <stdio.h>
 8 | 
 9 | #include "../../run_params/struct_run_params.h"
10 | #include "mq_pp.h"
11 | 
12 | 
13 | void K_MQ_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
14 |         int starting_index_of_target, int starting_index_of_source,
15 |         double *target_x, double *target_y, double *target_z,
16 |         double *source_x, double *source_y, double *source_z, double *source_charge,
17 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
18 | {
19 | 
20 |     double domainLength = run_params->kernel_params[0];
21 |     double delta = run_params->kernel_params[1];
22 |     double deltaLsq = delta * delta / domainLength / domainLength;
23 |     double norm_delta_L = sqrt(1 + 4 * deltaLsq);
24 |     
25 | 
26 | #ifdef OPENACC_ENABLED
27 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
28 |                         source_x, source_y, source_z, source_charge, potential)
29 |     {
30 | #endif
31 | #ifdef OPENACC_ENABLED
32 |     #pragma acc loop independent
33 | #endif
34 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
35 | 
36 |         int ii = starting_index_of_target + i;
37 |         double temporary_potential = 0.0;
38 |         double tz = target_z[ii];
39 | 
40 | #ifdef OPENACC_ENABLED
41 |         #pragma acc loop independent reduction(+:temporary_potential)
42 | #endif
43 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
44 | 
45 |             int jj = starting_index_of_source + j;
46 |             double dz = (tz - source_z[jj]) / domainLength;
47 | 
48 |             if (dz < -0.5) {
49 |                 dz += 1.0;
50 |             }
51 |             if (dz > 0.5) {
52 |                 dz -= 1.0;
53 |             }
54 |             temporary_potential += source_charge[jj]
55 |                                 * (.5 * dz * norm_delta_L / sqrt(dz * dz + deltaLsq) - dz);
56 |         } // end loop over interpolation points
57 | #ifdef OPENACC_ENABLED
58 |         #pragma acc atomic
59 | #endif
60 |         potential[ii] += temporary_potential;
61 |     }
62 | #ifdef OPENACC_ENABLED
63 |     } // end kernel
64 | #endif
65 |     return;
66 | }
67 | 


--------------------------------------------------------------------------------
/src/kernels/mq/mq_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_MQ_PP_H
 3 | #define H_K_MQ_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_MQ_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_MQ_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_U_H
 3 | #define H_K_RBS_U_H
 4 |  
 5 | #include "rbs-u_pp.h"
 6 | #include "rbs-u_pc.h"
 7 | #include "rbs-u_cp.h"
 8 | 
 9 | #endif /* H_K_RBS_U_H */
10 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u_cp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "rbs-u_cp.h"
 7 | 
 8 | 
 9 | void K_RBSu_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_source, int starting_index_of_cluster,
11 |          double *source_x, double *source_y, double *source_z, double *source_q,
12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double delta = run_params->kernel_params[0];
17 |     double delta2 = delta * delta;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_q)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif	
27 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double cx = cluster_x[starting_index_of_cluster + i];
32 |         double cy = cluster_y[starting_index_of_cluster + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
38 | #ifdef OPENACC_ENABLED
39 |             #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \
40 |                               source_y[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \
41 |                               source_z[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \
42 |                               source_q[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch])
43 | #endif
44 | 
45 |             int jj = starting_index_of_source + j;
46 |             double dx = cx - source_x[jj];
47 |             double dy = cy - source_y[jj];
48 |             double r = dx*dx + dy*dy + delta2;
49 | 
50 |             temporary_potential += 1. / (2. * M_PI) * source_q[jj] * dy / r;
51 | 
52 |         } // end loop over interpolation points
53 | #ifdef OPENACC_ENABLED
54 |         #pragma acc atomic
55 | #endif
56 |         cluster_q[starting_index_of_cluster + i] += temporary_potential;
57 |     }
58 | #ifdef OPENACC_ENABLED
59 |     } // end kernel
60 | #endif
61 |     return;
62 | }
63 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_U_CP_H
 3 | #define H_K_RBS_U_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RBSu_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_RBS_U_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u_pc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "rbs-u_pc.h"
 7 | 
 8 | 
 9 | void K_RBSu_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_cluster,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double delta = run_params->kernel_params[0];
17 |     double delta2 = delta * delta;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif
27 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[starting_index_of_target + i];
32 |         double ty = target_y[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
38 | 
39 |             int jj = starting_index_of_cluster + j;
40 |             double dx = tx - cluster_x[jj];
41 |             double dy = ty - cluster_y[jj];
42 |             double r  = dx*dx + dy*dy + delta2;
43 | 
44 |             temporary_potential += 1. / (2. * M_PI) * cluster_charge[jj] * dy / r;
45 | 
46 |         } // end loop over interpolation points
47 | #ifdef OPENACC_ENABLED
48 |         #pragma acc atomic
49 | #endif
50 |         potential[starting_index_of_target + i] += temporary_potential;
51 |     }
52 | #ifdef OPENACC_ENABLED
53 |     } // end kernel
54 | #endif
55 |     return;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_U_PC_H
 3 | #define H_K_RBS_U_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RBSu_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_RBS_U_PC_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "rbs-u_pp.h"
 7 | 
 8 | 
 9 | void K_RBSu_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double delta = run_params->kernel_params[0];
17 |     double delta2 = delta * delta;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
21 |                         source_x, source_y, source_z, source_charge, potential)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif
27 |         for (int i = 0; i < number_of_targets_in_batch; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[starting_index_of_target + i];
32 |         double ty = target_y[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
38 | 
39 |             int jj = starting_index_of_source + j;
40 |             double dx = tx - source_x[jj];
41 |             double dy = ty - source_y[jj];
42 |             double r  = dx*dx + dy*dy + delta2;
43 | 
44 |             temporary_potential += 1. / (2. * M_PI) * source_charge[jj] * dy / r;
45 | 
46 |         } // end loop over interpolation points
47 | #ifdef OPENACC_ENABLED
48 |         #pragma acc atomic
49 | #endif
50 |         potential[starting_index_of_target + i] += temporary_potential;
51 |     }
52 | #ifdef OPENACC_ENABLED
53 |     } // end kernel
54 | #endif
55 |     return;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-u/rbs-u_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_U_PP_H
 3 | #define H_K_RBS_U_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | void K_RBSu_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 8 |         int starting_index_of_target, int starting_index_of_cluster,
 9 |         double *target_x, double *target_y, double *target_z,
10 |         double *source_x, double *source_y, double *source_z, double *source_charge,
11 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
12 | 
13 | 
14 | #endif /* H_K_RBS_U_PP_H */
15 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_V_H
 3 | #define H_K_RBS_V_H
 4 |  
 5 | #include "rbs-v_pp.h"
 6 | #include "rbs-v_pc.h"
 7 | #include "rbs-v_cp.h"
 8 | 
 9 | #endif /* H_K_RBS_V_H */
10 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v_cp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "rbs-v_cp.h"
 7 | 
 8 | 
 9 | void K_RBSv_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_source, int starting_index_of_cluster,
11 |          double *source_x, double *source_y, double *source_z, double *source_q,
12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double delta = run_params->kernel_params[0];
17 |     double delta2 = delta * delta;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_q)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif	
27 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double cx = cluster_x[starting_index_of_cluster + i];
32 |         double cy = cluster_y[starting_index_of_cluster + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
38 | #ifdef OPENACC_ENABLED
39 |             #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \
40 |                               source_y[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \
41 |                               source_z[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch], \
42 |                               source_q[starting_index_of_source : starting_index_of_source+number_of_sources_in_batch])
43 | #endif
44 | 
45 |             int jj = starting_index_of_source + j;
46 |             double dx = cx - source_x[jj];
47 |             double dy = cy - source_y[jj];
48 |             double r = dx*dx + dy*dy + delta2;
49 | 
50 |             temporary_potential += -1. / (2. * M_PI) * source_q[jj] * dx / r;
51 | 
52 |         } // end loop over interpolation points
53 | #ifdef OPENACC_ENABLED
54 |         #pragma acc atomic
55 | #endif
56 |         cluster_q[starting_index_of_cluster + i] += temporary_potential;
57 |     }
58 | #ifdef OPENACC_ENABLED
59 |     } // end kernel
60 | #endif
61 |     return;
62 | }
63 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_V_CP_H
 3 | #define H_K_RBS_V_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RBSv_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_RBS_V_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v_pc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "rbs-v_pc.h"
 7 | 
 8 | 
 9 | void K_RBSv_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_cluster,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double delta = run_params->kernel_params[0];
17 |     double delta2 = delta * delta;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif
27 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[starting_index_of_target + i];
32 |         double ty = target_y[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
38 | 
39 |             int jj = starting_index_of_cluster + j;
40 |             double dx = tx - cluster_x[jj];
41 |             double dy = ty - cluster_y[jj];
42 |             double r  = dx*dx + dy*dy + delta2;
43 | 
44 |             temporary_potential += -1. / (2. * M_PI) * cluster_charge[jj] * dx / r;
45 | 
46 |         } // end loop over interpolation points
47 | #ifdef OPENACC_ENABLED
48 |         #pragma acc atomic
49 | #endif
50 |         potential[starting_index_of_target + i] += temporary_potential;
51 |     }
52 | #ifdef OPENACC_ENABLED
53 |     } // end kernel
54 | #endif
55 |     return;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_V_PC_H
 3 | #define H_K_RBS_V_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RBSv_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_RBS_V_PC_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "rbs-v_pp.h"
 7 | 
 8 | 
 9 | void K_RBSv_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double delta = run_params->kernel_params[0];
17 |     double delta2 = delta * delta;
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
21 |                         source_x, source_y, source_z, source_charge, potential)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif
27 |         for (int i = 0; i < number_of_targets_in_batch; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[starting_index_of_target + i];
32 |         double ty = target_y[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
38 | 
39 |             int jj = starting_index_of_source + j;
40 |             double dx = tx - source_x[jj];
41 |             double dy = ty - source_y[jj];
42 |             double r  = dx*dx + dy*dy + delta2;
43 | 
44 |             temporary_potential += -1. / (2. * M_PI) * source_charge[jj] * dx / r;
45 | 
46 |         } // end loop over interpolation points
47 | #ifdef OPENACC_ENABLED
48 |         #pragma acc atomic
49 | #endif
50 |         potential[starting_index_of_target + i] += temporary_potential;
51 |     }
52 | #ifdef OPENACC_ENABLED
53 |     } // end kernel
54 | #endif
55 |     return;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/kernels/rbs-v/rbs-v_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_RBS_V_PP_H
 3 | #define H_K_RBS_V_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | void K_RBSv_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 8 |         int starting_index_of_target, int starting_index_of_cluster,
 9 |         double *target_x, double *target_y, double *target_z,
10 |         double *source_x, double *source_y, double *source_z, double *source_charge,
11 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
12 | 
13 | 
14 | #endif /* H_K_RBS_V_PP_H */
15 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_H
 3 | #define H_K_REGULARIZED_COULOMB_H
 4 |  
 5 | 
 6 | #include "regularized-coulomb_pp.h"
 7 | #include "regularized-coulomb_pc.h"
 8 | #include "regularized-coulomb_cp.h"
 9 | 
10 | #include "regularized-coulomb_ss_pp.h"
11 | #include "regularized-coulomb_ss_correction.h"
12 | #include "regularized-coulomb_ss_pc.h"
13 | //#include "regularized-coulomb_ss_cp.h"
14 | 
15 | 
16 | #endif /* H_K_REGULARIZED_COULOMB_H */
17 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_CP_H
 3 | #define H_K_REGULARIZED_COULOMB_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedCoulomb_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | void K_RegularizedCoulomb_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_sources, int starting_index_of_cluster,
16 |         double *source_x, double *source_y, double *source_z, double *source_q,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_REGULARIZED_COULOMB_CP_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_PC_H
 3 | #define H_K_REGULARIZED_COULOMB_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedCoulomb_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_RegularizedCoulomb_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_REGULARIZED_COULOMB_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-coulomb_pp.h"
 7 | 
 8 | 
 9 | void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 |     double epsilon=run_params->kernel_params[0];
16 | 
17 | #ifdef OPENACC_ENABLED
18 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
19 |                         source_x, source_y, source_z, source_charge, potential)
20 |     {
21 |     #pragma acc loop independent
22 | #endif
23 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
24 | 
25 |         int ii = starting_index_of_target + i;
26 |         double temporary_potential = 0.0;
27 | 
28 |         double tx = target_x[ii];
29 |         double ty = target_y[ii];
30 |         double tz = target_z[ii];
31 | 
32 | #ifdef OPENACC_ENABLED
33 |         #pragma acc loop independent reduction(+:temporary_potential)
34 | #endif
35 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
36 | #ifdef OPENACC_ENABLED
37 |             #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \
38 |                               source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \
39 |                               source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1], \
40 |                               source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster-1])
41 | #endif
42 | 
43 | 
44 |             int jj = starting_index_of_source + j;
45 |             double dx = tx - source_x[jj];
46 |             double dy = ty - source_y[jj];
47 |             double dz = tz - source_z[jj];
48 |             double r2  = dx*dx + dy*dy + dz*dz + epsilon*epsilon;
49 | 
50 |             temporary_potential += source_charge[jj] / sqrt(r2);
51 |         } // end loop over interpolation points
52 | #ifdef OPENACC_ENABLED
53 |         #pragma acc atomic
54 | #endif
55 |         potential[ii] += temporary_potential;
56 |     }
57 | #ifdef OPENACC_ENABLED
58 |     } // end kernel
59 | #endif
60 |     return;
61 | }
62 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_PP_H
 3 | #define H_K_REGULARIZED_COULOMB_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedCoulomb_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_REGULARIZED_COULOMB_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_ss_correction.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-coulomb_ss_correction.h"
 7 | 
 8 | 
 9 | void K_RegularizedCoulomb_SS_Correction(double *potential, double *target_q,
10 |                                         int numTargets, struct RunParams *run_params)
11 | {
12 |     double alpha = run_params->kernel_params[0];
13 |     double param = 2.0 * M_PI * alpha * alpha;
14 |     for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i];
15 | 
16 |     return;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_ss_correction.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_SS_CORRECTION_H
 3 | #define H_K_REGULARIZED_COULOMB_SS_CORRECTION_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedCoulomb_SS_Correction(double *potential, double *target_q,
 9 |         int numTargets, struct RunParams *run_params);
10 | 
11 | 
12 | #endif /* H_K_REGULARIZED_COULOMB_SS_CORRECTION_H */
13 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_ss_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_SS_PC_LAGRANGE_H
 3 | #define H_K_REGULARIZED_COULOMB_SS_PC_LAGRANGE_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedCoulomb_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_RegularizedCoulomb_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z, double *target_charge,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_REGULARIZED_COULOMB_SS_PC_LAGRANGE_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-coulomb_ss_pp.h"
 7 | 
 8 | 
 9 | void K_RegularizedCoulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z, double *target_charge,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 |     double alpha2   = run_params->kernel_params[0] * run_params->kernel_params[0];
16 |     double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \
20 |                         source_x, source_y, source_z, source_charge, source_weight, potential)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif
26 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
27 | 
28 |         int ii = starting_index_of_target + i;
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[ii];
32 |         double ty = target_y[ii];
33 |         double tz = target_z[ii];
34 |         double tq = target_charge[ii];
35 | 
36 | #ifdef OPENACC_ENABLED
37 |         #pragma acc loop independent reduction(+:temporary_potential)
38 | #endif
39 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
40 | 
41 |             int jj = starting_index_of_source + j;
42 |             double dx = tx - source_x[jj];
43 |             double dy = ty - source_y[jj];
44 |             double dz = tz - source_z[jj];
45 |             double r2 = dx*dx + dy*dy + dz*dz;
46 | 
47 |                 temporary_potential += (source_charge[jj] - tq * exp(-r2 / alpha2))
48 |                                       * source_weight[jj] / sqrt(r2 + epsilon2);
49 |         } // end loop over interpolation points
50 | #ifdef OPENACC_ENABLED
51 |         #pragma acc atomic
52 | #endif
53 |         potential[ii] += temporary_potential;
54 |     }
55 | #ifdef OPENACC_ENABLED
56 |     } // end kernel
57 | #endif
58 |     return;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-coulomb/regularized-coulomb_ss_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_COULOMB_SS_PP_H
 3 | #define H_K_REGULARIZED_COULOMB_SS_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedCoulomb_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_source,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_REGULARIZED_COULOMB_SS_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_YUKAWA_H
 3 | #define H_K_REGULARIZED_YUKAWA_H
 4 |  
 5 | 
 6 | #include "regularized-yukawa_pp.h"
 7 | #include "regularized-yukawa_pc.h"
 8 | #include "regularized-yukawa_cp.h"
 9 | 
10 | #include "regularized-yukawa_ss_pp.h"
11 | #include "regularized-yukawa_ss_correction.h"
12 | #include "regularized-yukawa_ss_pc.h"
13 | //#include "regularized-yukawa_ss_cp.h"
14 | 
15 | 
16 | #endif /* H_K_REGULARIZED_YUKAWA_H */
17 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_cp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-yukawa_cp.h"
 7 | 
 8 | 
 9 | void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_sources, int starting_index_of_cluster,
11 |          double *source_x, double *source_y, double *source_z, double *source_q,
12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kappa    = run_params->kernel_params[0];
17 |     double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1];
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_q)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif	
27 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double cx = cluster_x[starting_index_of_cluster + i];
32 |         double cy = cluster_y[starting_index_of_cluster + i];
33 |         double cz = cluster_z[starting_index_of_cluster + i];
34 | 
35 | #ifdef OPENACC_ENABLED
36 |         #pragma acc loop independent reduction(+:temporary_potential)
37 | #endif
38 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
39 | #ifdef OPENACC_ENABLED
40 |             #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
41 |                               source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
42 |                               source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
43 |                               source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch])
44 | #endif
45 | 
46 |             int jj = starting_index_of_sources + j;
47 |             double dx = cx - source_x[jj];
48 |             double dy = cy - source_y[jj];
49 |             double dz = cz - source_z[jj];
50 |             double r2 = dx*dx + dy*dy + dz*dz;
51 |             double r  = sqrt(r2);
52 | 
53 |             temporary_potential += source_q[jj] * exp(-kappa * r) / sqrt(r2 + epsilon2);
54 | 
55 |         } // end loop over interpolation points
56 | #ifdef OPENACC_ENABLED
57 |         #pragma acc atomic
58 | #endif
59 |         cluster_q[starting_index_of_cluster + i] += temporary_potential;
60 |     }
61 | #ifdef OPENACC_ENABLED
62 | 
63 |     } // end kernel
64 | #endif
65 |     return;
66 | }
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_YUKAWA_CP_H
 3 | #define H_K_REGULARIZED_YUKAWA_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedYukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | 
16 | #endif /* H_K_REGULARIZED_YUKAWA_CP_H */
17 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_pc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-yukawa_pc.h"
 7 | 
 8 | 
 9 | void K_RegularizedYukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_cluster,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kappa    = run_params->kernel_params[0];
17 |     double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1];
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
21 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif
27 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
28 | 
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[starting_index_of_target + i];
32 |         double ty = target_y[starting_index_of_target + i];
33 |         double tz = target_z[starting_index_of_target + i];
34 | 
35 | #ifdef OPENACC_ENABLED
36 |         #pragma acc loop independent reduction(+:temporary_potential)
37 | #endif
38 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
39 | 
40 |             int jj = starting_index_of_cluster + j;
41 |             double dx = tx - cluster_x[jj];
42 |             double dy = ty - cluster_y[jj];
43 |             double dz = tz - cluster_z[jj];
44 |             double r2 = dx*dx + dy*dy + dz*dz;
45 |             double r  = sqrt(r2);
46 | 
47 |             temporary_potential += cluster_charge[jj] * exp(-kappa * r) / sqrt(r2 + epsilon2);
48 |         } // end loop over interpolation points
49 | #ifdef OPENACC_ENABLED
50 |         #pragma acc atomic
51 | #endif
52 |         potential[starting_index_of_target + i] += temporary_potential;
53 |     }
54 | #ifdef OPENACC_ENABLED
55 |     } // end kernel
56 | #endif
57 |     return;
58 | }
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_YUKAWA_PC_H
 3 | #define H_K_REGULARIZED_YUKAWA_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedYukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | /*
15 | void K_RegularizedYukawa_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
16 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
17 |         double *target_x, double *target_y, double *target_z,
18 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
19 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
20 | */
21 | 
22 | #endif /* H_K_REGULARIZED_YUKAWA_PC_H */
23 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-yukawa_pp.h"
 7 | 
 8 | 
 9 | void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kappa    = run_params->kernel_params[0];
17 |     double epsilon2 = run_params->kernel_params[1] * run_params->kernel_params[1];
18 | 
19 | 
20 | #ifdef OPENACC_ENABLED
21 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
22 |                         source_x, source_y, source_z, source_charge, potential)
23 |     {
24 | #endif
25 | #ifdef OPENACC_ENABLED
26 |     #pragma acc loop independent
27 | #endif
28 |         for (int i = 0; i < number_of_targets_in_batch; i++) {
29 | 
30 |         double temporary_potential = 0.0;
31 | 
32 |         double tx = target_x[starting_index_of_target + i];
33 |         double ty = target_y[starting_index_of_target + i];
34 |         double tz = target_z[starting_index_of_target + i];
35 | 
36 | #ifdef OPENACC_ENABLED
37 |         #pragma acc loop independent reduction(+:temporary_potential)
38 | #endif
39 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
40 | 
41 |             double dx = tx - source_x[starting_index_of_source + j];
42 |             double dy = ty - source_y[starting_index_of_source + j];
43 |             double dz = tz - source_z[starting_index_of_source + j];
44 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
45 | 
46 | 
47 |             temporary_potential += source_charge[starting_index_of_source + j] * exp(-kappa*r) / sqrt(r*r + epsilon2);
48 |         } // end loop over interpolation points
49 | #ifdef OPENACC_ENABLED
50 |         #pragma acc atomic
51 | #endif
52 |         potential[starting_index_of_target + i] += temporary_potential;
53 |     }
54 | #ifdef OPENACC_ENABLED
55 |     } // end kernel
56 | #endif
57 |     return;
58 | }
59 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_YUKAWA_PP_H
 3 | #define H_K_REGULARIZED_YUKAWA_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedYukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_REGULARIZED_YUKAWA_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_ss_correction.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-yukawa_ss_correction.h"
 7 | 
 8 | 
 9 | void K_RegularizedYukawa_SS_Correction(double *potential, double *target_q,
10 |                                        int numTargets, struct RunParams *run_params)
11 | {
12 |     double kappa=run_params->kernel_params[0];
13 |     double param = 4.0 * M_PI / kappa / kappa;
14 |     for (int i = 0; i < numTargets; i++) potential[i] += param * target_q[i];
15 | 
16 |     return;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_ss_correction.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_REGULARIZED_YUKAWA_SS_CORRECTION_H
 3 | #define H_REGULARIZED_YUKAWA_SS_CORRECTION_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedYukawa_SS_Correction(double *potential, double *target_q,
 9 |         int numTargets, struct RunParams *run_params);
10 | 
11 | 
12 | #endif /* H_K_REGULARIZED_YUKAWA_SS_CORRECTION_H */
13 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "regularized-yukawa_ss_pc.h"
 7 | 
 8 | 
 9 | void K_RegularizedYukawa_SS_PC_Lagrange(int number_of_targets_in_batch,
10 |         int number_of_interpolation_points_in_cluster, int starting_index_of_target, int starting_index_of_cluster,
11 |         double *target_x, double *target_y, double *target_z, double *target_charge,
12 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 |     double kappa=run_params->kernel_params[0];
16 |     double epsilon=run_params->kernel_params[1];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \
20 |                         cluster_x, cluster_y, cluster_z, cluster_charge, cluster_weight, potential)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif
26 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
27 | 
28 |         int ii = starting_index_of_target + i;
29 |         double temporary_potential = 0.0;
30 | 
31 |         double tx = target_x[ii];
32 |         double ty = target_y[ii];
33 |         double tz = target_z[ii];
34 |         double tq = target_charge[ii];
35 | 
36 | #ifdef OPENACC_ENABLED
37 |         #pragma acc loop independent reduction(+:temporary_potential)
38 | #endif
39 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
40 | 
41 |             int jj = starting_index_of_cluster + j;
42 |             double dx = tx - cluster_x[jj];
43 |             double dy = ty - cluster_y[jj];
44 |             double dz = tz - cluster_z[jj];
45 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
46 | 
47 |             temporary_potential += (cluster_charge[jj] - tq * cluster_weight[jj]) * exp(-kappa*r) /sqrt(r*r+epsilon*epsilon);
48 |         } // end loop over interpolation points
49 | #ifdef OPENACC_ENABLED
50 |         #pragma acc atomic
51 | #endif
52 |         potential[ii] += temporary_potential;
53 |     }
54 | #ifdef OPENACC_ENABLED
55 |     } // end kernel
56 | #endif
57 |     return;
58 | }
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_ss_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_YUKAWA_SS_PC_H
 3 | #define H_K_REGULARIZED_YUKAWA_SS_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedYukawa_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | 
16 | #endif /* H_K_REGULARIZED_YUKAWA_SS_PC_H */
17 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | #include "regularized-yukawa_ss_pp.h"
 8 | 
 9 | void K_RegularizedYukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z, double *target_charge,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kappa=run_params->kernel_params[0];
17 |     double epsilon=run_params->kernel_params[1];
18 | 
19 | #ifdef OPENACC_ENABLED
20 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \
21 |                         source_x, source_y, source_z, source_charge, source_weight, potential)
22 |     {
23 | #endif
24 | #ifdef OPENACC_ENABLED
25 |     #pragma acc loop independent
26 | #endif
27 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
28 | 
29 |         int ii = starting_index_of_target + i;
30 |         double temporary_potential = 0.0;
31 |         
32 |         double tx = target_x[ii];
33 |         double ty = target_y[ii];
34 |         double tz = target_z[ii];
35 |         double tq = target_charge[ii];
36 | 
37 | #ifdef OPENACC_ENABLED
38 |         #pragma acc loop independent reduction(+:temporary_potential)
39 | #endif
40 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
41 | 
42 |             int jj = starting_index_of_source + j;
43 |             double dx = tx - source_x[jj];
44 |             double dy = ty - source_y[jj];
45 |             double dz = tz - source_z[jj];
46 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
47 | 
48 |             temporary_potential += (source_charge[jj] - tq) * source_weight[jj] * exp(-kappa*r) / sqrt(r*r+epsilon*epsilon);
49 |         } // end loop over interpolation points
50 | #ifdef OPENACC_ENABLED
51 |         #pragma acc atomic
52 | #endif
53 |         potential[ii] += temporary_potential;
54 |     }
55 | #ifdef OPENACC_ENABLED
56 |     } // end kernel
57 | #endif
58 |     return;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/kernels/regularized-yukawa/regularized-yukawa_ss_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_REGULARIZED_YUKAWA_SS_PP_H
 3 | #define H_K_REGULARIZED_YUKAWA_SS_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_RegularizedYukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_source,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_REGULARIZED_YUKAWA_SS_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/sin-over-r/sin-over-r.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_SIN_OVER_R_H
 3 | #define H_K_SIN_OVER_R_H
 4 |  
 5 | #include "sin-over-r_pp.h"
 6 | #include "sin-over-r_pc.h"
 7 | #include "sin-over-r_cp.h"
 8 | 
 9 | 
10 | 
11 | #endif /* H_K_SIN_OVER_R_H */
12 | 


--------------------------------------------------------------------------------
/src/kernels/sin-over-r/sin-over-r_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_SIN_OVER_R_CP_H
 3 | #define H_K_SIN_OVER_R_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_SinOverR_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | void K_SinOverR_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_sources, int starting_index_of_cluster,
16 |         double *source_x, double *source_y, double *source_z, double *source_q,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_SIN_OVER_R_CP_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/sin-over-r/sin-over-r_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_SIN_OVER_R_PC_H
 3 | #define H_K_SIN_OVER_R_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_SinOverR_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_SinOverR_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_SIN_OVER_R_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/sin-over-r/sin-over-r_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "sin-over-r_pp.h"
 7 | 
 8 | void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_source,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
13 | {
14 | 
15 |     double kernel_parameter = run_params->kernel_params[0];
16 | 
17 | #ifdef OPENACC_ENABLED
18 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
19 |                         source_x, source_y, source_z, source_charge, potential)
20 |     {
21 |     #pragma acc loop independent
22 | #endif
23 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
24 | 
25 |         int ii = starting_index_of_target + i;
26 |         double temporary_potential = 0.0;
27 | 
28 |         double tx = target_x[ii];
29 |         double ty = target_y[ii];
30 |         double tz = target_z[ii];
31 | 
32 | #ifdef OPENACC_ENABLED
33 |         #pragma acc loop independent reduction(+:temporary_potential)
34 | #endif
35 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
36 | #ifdef OPENACC_ENABLED
37 |             #pragma acc cache(source_x[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \
38 |                               source_y[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \
39 |                               source_z[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster], \
40 |                               source_charge[starting_index_of_source : starting_index_of_source+number_of_source_points_in_cluster])
41 | #endif
42 | 
43 | 
44 |             int jj = starting_index_of_source + j;
45 |             double dx = tx - source_x[jj];
46 |             double dy = ty - source_y[jj];
47 |             double dz = tz - source_z[jj];
48 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
49 | 
50 |             if (r > DBL_MIN) {
51 |                 temporary_potential += source_charge[jj] * sin(kernel_parameter * r) / r;
52 |             }
53 |         } // end loop over interpolation points
54 | #ifdef OPENACC_ENABLED
55 |         #pragma acc atomic
56 | #endif
57 |         potential[ii] += temporary_potential;
58 |     }
59 | #ifdef OPENACC_ENABLED
60 |     } // end kernel
61 | #endif
62 |     return;
63 | }
64 | 


--------------------------------------------------------------------------------
/src/kernels/sin-over-r/sin-over-r_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_SIN_OVER_R_PP_H
 3 | #define H_K_SIN_OVER_R_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_SinOverR_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_SIN_OVER_R_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/tcf/tcf.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <float.h>
  3 | #include <stdio.h>
  4 | 
  5 | #include "tcf.h"
  6 | 
  7 | void tcfDirect(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
  8 |         int starting_index_of_target, int starting_index_of_source,
  9 |         double *target_x, double *target_y, double *target_z,
 10 |         double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
 11 |         double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id)
 12 | {
 13 | 
 14 |     double kap_eta_2 = kernel_parameter1 * kernel_parameter2 / 2.0;
 15 | 
 16 | #ifdef OPENACC_ENABLED
 17 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
 18 |                         source_x, source_y, source_z, source_q, source_w, potential)
 19 |     {
 20 | #endif
 21 | #ifdef OPENACC_ENABLED
 22 |     #pragma acc loop independent
 23 | #endif
 24 |         for (int i = 0; i < number_of_targets_in_batch; i++) {
 25 | 
 26 |         double temporary_potential = 0.0;
 27 | 
 28 |         double tx = target_x[starting_index_of_target + i];
 29 |         double ty = target_y[starting_index_of_target + i];
 30 |         double tz = target_z[starting_index_of_target + i];
 31 | 
 32 | #ifdef OPENACC_ENABLED
 33 |         #pragma acc loop independent reduction(+:temporary_potential)
 34 | #endif
 35 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
 36 | 
 37 |             double dx = tx - source_x[starting_index_of_source + j];
 38 |             double dy = ty - source_y[starting_index_of_source + j];
 39 |             double dz = tz - source_z[starting_index_of_source + j];
 40 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
 41 | 
 42 |             if (r > DBL_MIN) {
 43 |                 double kap_r = kernel_parameter1 * r;
 44 |                 double r_eta = r / kernel_parameter2;
 45 |                 temporary_potential += source_q[starting_index_of_source + j]
 46 |                                      * source_w[starting_index_of_source + j] / r
 47 |                                      * (exp(-kap_r) * erfc(kap_eta_2 - r_eta)
 48 |                                      -  exp( kap_r) * erfc(kap_eta_2 + r_eta));
 49 |             }
 50 |         } // end loop over interpolation points
 51 | #ifdef OPENACC_ENABLED
 52 |         #pragma acc atomic
 53 | #endif
 54 |         potential[starting_index_of_target + i] += temporary_potential;
 55 |     }
 56 | #ifdef OPENACC_ENABLED
 57 |     } // end kernel
 58 | #endif
 59 |     return;
 60 | }
 61 | 
 62 | 
 63 | 
 64 | 
 65 | void tcfApproximationLagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 66 |         int starting_index_of_target, int starting_index_of_cluster,
 67 |         double *target_x, double *target_y, double *target_z,
 68 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
 69 |         double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id)
 70 | {
 71 | 
 72 |     double kap_eta_2 = kernel_parameter1 * kernel_parameter2 / 2.0;
 73 | 
 74 | #ifdef OPENACC_ENABLED
 75 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
 76 |                         cluster_x, cluster_y, cluster_z, cluster_q, potential)
 77 |     {
 78 | #endif
 79 | #ifdef OPENACC_ENABLED
 80 |     #pragma acc loop independent
 81 | #endif
 82 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
 83 | 
 84 |         double temporary_potential = 0.0;
 85 | 
 86 |         double tx = target_x[starting_index_of_target + i];
 87 |         double ty = target_y[starting_index_of_target + i];
 88 |         double tz = target_z[starting_index_of_target + i];
 89 | 
 90 | #ifdef OPENACC_ENABLED
 91 |         #pragma acc loop independent reduction(+:temporary_potential)
 92 | #endif
 93 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
 94 | 
 95 |             double dx = tx - cluster_x[starting_index_of_cluster + j];
 96 |             double dy = ty - cluster_y[starting_index_of_cluster + j];
 97 |             double dz = tz - cluster_z[starting_index_of_cluster + j];
 98 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
 99 | 
100 |             if (r > DBL_MIN) {
101 |                 double kap_r = kernel_parameter1 * r;
102 |                 double r_eta = r / kernel_parameter2;
103 |                 temporary_potential += cluster_q[starting_index_of_cluster + j] / r
104 |                                      * (exp(-kap_r) * erfc(kap_eta_2 - r_eta)
105 |                                      -  exp( kap_r) * erfc(kap_eta_2 + r_eta));
106 |             }
107 |         } // end loop over interpolation points
108 | #ifdef OPENACC_ENABLED
109 |         #pragma acc atomic
110 | #endif
111 |         potential[starting_index_of_target + i] += temporary_potential;
112 |     }
113 | #ifdef OPENACC_ENABLED
114 |     } // end kernel
115 | #endif
116 |     return;
117 | }
118 | 


--------------------------------------------------------------------------------
/src/kernels/tcf/tcf.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_TCF_H
 3 | #define H_TCF_H
 4 |  
 5 | 
 6 | void tcfDirect(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 7 |         int starting_index_of_target, int starting_index_of_cluster,
 8 |         double *target_x, double *target_y, double *target_z,
 9 |         double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
10 |         double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id);
11 | 
12 | void tcfApproximationLagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
13 |         int starting_index_of_target, int starting_index_of_cluster,
14 |         double *target_x, double *target_y, double *target_z,
15 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
16 |         double kernel_parameter1, double kernel_parameter2, double *potential, int gpu_async_stream_id);
17 | 
18 | #endif /* H_TCF_H */
19 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_USER_KERNEL_H
 3 | #define H_K_USER_KERNEL_H
 4 |  
 5 | #include "user_kernel_pp.h"
 6 | #include "user_kernel_pc.h"
 7 | #include "user_kernel_cp.h"
 8 | 
 9 | #endif /* H_K_USER_KERNEL_H */
10 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel_cp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "user_kernel_cp.h"
 7 | 
 8 | 
 9 | void K_User_Kernel_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_sources, int starting_index_of_cluster,
11 |          double *source_x, double *source_y, double *source_z, double *source_q,
12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter = run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, \
20 |                         cluster_x, cluster_y, cluster_z, cluster_q)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif	
26 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
27 | 
28 |         double temporary_potential = 0.0;
29 | 
30 |         double cx = cluster_x[starting_index_of_cluster + i];
31 |         double cy = cluster_y[starting_index_of_cluster + i];
32 |         double cz = cluster_z[starting_index_of_cluster + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
38 | #ifdef OPENACC_ENABLED
39 |             #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
40 |                               source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
41 |                               source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
42 |                               source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch])
43 | #endif
44 | 
45 |             int jj = starting_index_of_sources + j;
46 |             double dx = cx - source_x[jj];
47 |             double dy = cy - source_y[jj];
48 |             double dz = cz - source_z[jj];
49 |             double r = sqrt(dx*dx + dy*dy + dz*dz);
50 | 
51 |             temporary_potential += source_q[jj] * exp(-kernel_parameter * r) / r;
52 | 
53 |         } // end loop over interpolation points
54 | #ifdef OPENACC_ENABLED
55 |         #pragma acc atomic
56 | #endif
57 |         cluster_q[starting_index_of_cluster + i] += temporary_potential;
58 |     }
59 | #ifdef OPENACC_ENABLED
60 |     } // end kernel
61 | #endif
62 |     return;
63 | }
64 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_USER_KERNEL_CP_H
 3 | #define H_K_USER_KERNEL_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_User_Kernel_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_USER_KERNEL_CP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel_pc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "user_kernel_pc.h"
 7 | 
 8 | 
 9 | void K_User_Kernel_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_cluster,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter = run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
20 |                         cluster_x, cluster_y, cluster_z, cluster_charge, potential)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif
26 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
27 | 
28 |         double temporary_potential = 0.0;
29 | 
30 |         double tx = target_x[starting_index_of_target + i];
31 |         double ty = target_y[starting_index_of_target + i];
32 |         double tz = target_z[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_interpolation_points_in_cluster; j++) {
38 | 
39 |             double dx = tx - cluster_x[starting_index_of_cluster + j];
40 |             double dy = ty - cluster_y[starting_index_of_cluster + j];
41 |             double dz = tz - cluster_z[starting_index_of_cluster + j];
42 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
43 | 
44 |             temporary_potential += cluster_charge[starting_index_of_cluster + j] * exp(-kernel_parameter * r) / r;
45 | 
46 |         } // end loop over interpolation points
47 | #ifdef OPENACC_ENABLED
48 |         #pragma acc atomic
49 | #endif
50 |         potential[starting_index_of_target + i] += temporary_potential;
51 |     }
52 | #ifdef OPENACC_ENABLED
53 |     } // end kernel
54 | #endif
55 |     return;
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_USER_KERNEL_PC_H
 3 | #define H_K_USER_KERNEL_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_User_Kernel_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_USER_KERNEL_PC_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "user_kernel_pp.h"
 7 | 
 8 | 
 9 | void K_User_Kernel_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter=run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
20 |                         source_x, source_y, source_z, source_charge, potential)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif
26 |         for (int i = 0; i < number_of_targets_in_batch; i++) {
27 | 
28 |         double temporary_potential = 0.0;
29 | 
30 |         double tx = target_x[starting_index_of_target + i];
31 |         double ty = target_y[starting_index_of_target + i];
32 |         double tz = target_z[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
38 | 
39 |             double dx = tx - source_x[starting_index_of_source + j];
40 |             double dy = ty - source_y[starting_index_of_source + j];
41 |             double dz = tz - source_z[starting_index_of_source + j];
42 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
43 | 
44 |             if (r > DBL_MIN) {
45 |                 temporary_potential += source_charge[starting_index_of_source + j] * exp(-kernel_parameter*r) / r;
46 |             }
47 |         } // end loop over interpolation points
48 | #ifdef OPENACC_ENABLED
49 |         #pragma acc atomic
50 | #endif
51 |         potential[starting_index_of_target + i] += temporary_potential;
52 |     }
53 | #ifdef OPENACC_ENABLED
54 |     } // end kernel
55 | #endif
56 |     return;
57 | }
58 | 


--------------------------------------------------------------------------------
/src/kernels/user_kernel/user_kernel_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_USER_KERNEL_PP_H
 3 | #define H_K_USER_KERNEL_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | void K_User_Kernel_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 8 |         int starting_index_of_target, int starting_index_of_cluster,
 9 |         double *target_x, double *target_y, double *target_z,
10 |         double *source_x, double *source_y, double *source_z, double *source_charge,
11 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
12 | 
13 | 
14 | #endif /* H_K_USER_KERNEL_PP_H */
15 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_H
 3 | #define H_K_YUKAWA_H
 4 |  
 5 | #include "yukawa_pp.h"
 6 | #include "yukawa_pc.h"
 7 | #include "yukawa_cp.h"
 8 | 
 9 | #include "yukawa_ss_pp.h"
10 | #include "yukawa_ss_correction.h"
11 | #include "yukawa_ss_pc.h"
12 | #include "yukawa_ss_cp.h"
13 | #include "yukawa_ss_cc.h"
14 | 
15 | #endif /* H_K_YUKAWA_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_CP_H
 3 | #define H_K_YUKAWA_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | void K_Yukawa_CP_Hermite(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_sources, int starting_index_of_cluster,
16 |         double *source_x, double *source_y, double *source_z, double *source_q,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_YUKAWA_CP_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_PC_H
 3 | #define H_K_YUKAWA_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_Yukawa_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_YUKAWA_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "yukawa_pp.h"
 7 | 
 8 | 
 9 | void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter=run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, \
20 |                         source_x, source_y, source_z, source_charge, potential)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif
26 |         for (int i = 0; i < number_of_targets_in_batch; i++) {
27 | 
28 |         double temporary_potential = 0.0;
29 | 
30 |         double tx = target_x[starting_index_of_target + i];
31 |         double ty = target_y[starting_index_of_target + i];
32 |         double tz = target_z[starting_index_of_target + i];
33 | 
34 | #ifdef OPENACC_ENABLED
35 |         #pragma acc loop independent reduction(+:temporary_potential)
36 | #endif
37 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
38 | 
39 |             double dx = tx - source_x[starting_index_of_source + j];
40 |             double dy = ty - source_y[starting_index_of_source + j];
41 |             double dz = tz - source_z[starting_index_of_source + j];
42 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
43 | 
44 |             if (r > DBL_MIN) {
45 |                 temporary_potential += source_charge[starting_index_of_source + j] * exp(-kernel_parameter*r) / r;
46 |             }
47 |         } // end loop over interpolation points
48 | #ifdef OPENACC_ENABLED
49 |         #pragma acc atomic
50 | #endif
51 |         potential[starting_index_of_target + i] += temporary_potential;
52 |     }
53 | #ifdef OPENACC_ENABLED
54 |     } // end kernel
55 | #endif
56 |     return;
57 | }
58 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_PP_H
 3 | #define H_K_YUKAWA_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | void K_Yukawa_PP(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 8 |         int starting_index_of_target, int starting_index_of_cluster,
 9 |         double *target_x, double *target_y, double *target_z,
10 |         double *source_x, double *source_y, double *source_z, double *source_charge,
11 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
12 | 
13 | 
14 | #endif /* H_K_YUKAWA_PP_H */
15 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_cc.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "yukawa_ss_cc.h"
 7 | 
 8 | 
 9 | void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_sources, int starting_index_of_cluster,
11 |          double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w,
12 |          double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_q, double *target_cluster_w,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter = run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(source_cluster_x, source_cluster_y, source_cluster_z, source_cluster_q, source_cluster_w, \
20 |                         target_cluster_x, target_cluster_y, target_cluster_z, target_cluster_q, target_cluster_w)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif	
26 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
27 | 
28 |         double temporary_potential = 0.0;
29 |         double temporary_weight = 0.0;
30 | 
31 |         double cx = target_cluster_x[starting_index_of_cluster + i];
32 |         double cy = target_cluster_y[starting_index_of_cluster + i];
33 |         double cz = target_cluster_z[starting_index_of_cluster + i];
34 | 
35 | #ifdef OPENACC_ENABLED
36 |         #pragma acc loop independent reduction(+:temporary_potential,temporary_weight)
37 | #endif
38 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
39 | #ifdef OPENACC_ENABLED
40 |             #pragma acc cache(source_cluster_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
41 |                               source_cluster_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
42 |                               source_cluster_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
43 |                               source_cluster_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
44 |                               source_cluster_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch])
45 | #endif
46 | 
47 |             int jj = starting_index_of_sources + j;
48 |             double dx = cx - source_cluster_x[jj];
49 |             double dy = cy - source_cluster_y[jj];
50 |             double dz = cz - source_cluster_z[jj];
51 |             double r = sqrt(dx*dx + dy*dy + dz*dz);
52 | 
53 |             temporary_potential += source_cluster_q[jj] * exp(-kernel_parameter*r) /r; // source_cluster_q already has source_q * source_w
54 |             temporary_weight    += source_cluster_w[jj] * exp(-kernel_parameter*r) /r;
55 | 
56 |         } // end loop over interpolation points
57 | #ifdef OPENACC_ENABLED
58 |         #pragma acc atomic
59 | #endif
60 |         target_cluster_q[starting_index_of_cluster + i] += temporary_potential;
61 | #ifdef OPENACC_ENABLED
62 |         #pragma acc atomic
63 | #endif
64 |         target_cluster_w[starting_index_of_cluster + i] += temporary_weight;
65 |     }
66 | #ifdef OPENACC_ENABLED
67 |     } // end kernel
68 | #endif
69 |     return;
70 | }
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_cc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_SS_CC_H
 3 | #define H_K_YUKAWA_SS_CC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_SS_CC_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_cluster_x, double *source_cluster_y, double *source_cluster_z, double *source_cluster_q, double *source_cluster_w,
11 |         double *target_cluster_x, double *target_cluster_y, double *target_cluster_z, double *target_cluster_charge, double *target_cluster_w,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_YUKAWA_SS_CC_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_correction.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "yukawa_ss_correction.h"
 7 | 
 8 | 
 9 | void K_Yukawa_SS_Correction(double *potential, double *target_q,
10 |                             int numTargets, struct RunParams *run_params)
11 | {
12 |     double kernel_parameter=run_params->kernel_params[0];
13 |     double param = 4.0 * M_PI / kernel_parameter / kernel_parameter;
14 |     for (int i = 0; i < numTargets; i++){
15 |         potential[i] += param * target_q[i];
16 | //        printf("target_q[%i], target_w[%i] = %f, %f\n", i, i, potential[i], target_q[i]);
17 |     }
18 | 
19 |     return;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_correction.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_SS_CORRECTION_H
 3 | #define H_K_YUKAWA_SS_CORRECTION_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_SS_Correction(double *potential, double *target_q,
 9 |         int numTargets, struct RunParams *run_params);
10 | 
11 | 
12 | #endif /* H_K_YUKAWA_SS_CORRECTION_H */
13 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_cp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "yukawa_ss_cp.h"
 7 | 
 8 | 
 9 | void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
10 |          int starting_index_of_sources, int starting_index_of_cluster,
11 |          double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
12 |          double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_q, double *cluster_w,
13 |          struct RunParams *run_params, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter = run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(source_x, source_y, source_z, source_q, source_w,\
20 |                         cluster_x, cluster_y, cluster_z, cluster_q, cluster_w)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif	
26 |     for (int i = 0; i < number_of_interpolation_points_in_cluster; i++) {
27 | 
28 |         double temporary_potential = 0.0;
29 |         double temporary_weight = 0.0;
30 | 
31 |         double cx = cluster_x[starting_index_of_cluster + i];
32 |         double cy = cluster_y[starting_index_of_cluster + i];
33 |         double cz = cluster_z[starting_index_of_cluster + i];
34 | 
35 | #ifdef OPENACC_ENABLED
36 |         #pragma acc loop independent reduction(+:temporary_potential) reduction(+:temporary_weight)
37 | #endif
38 |         for (int j = 0; j < number_of_sources_in_batch; j++) {
39 | #ifdef OPENACC_ENABLED
40 |             #pragma acc cache(source_x[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
41 |                               source_y[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
42 |                               source_z[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
43 |                               source_q[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch], \
44 |                               source_w[starting_index_of_sources : starting_index_of_sources+number_of_sources_in_batch])
45 | #endif
46 | 
47 |             int jj = starting_index_of_sources + j;
48 |             double dx = cx - source_x[jj];
49 |             double dy = cy - source_y[jj];
50 |             double dz = cz - source_z[jj];
51 |             double r = sqrt(dx*dx + dy*dy + dz*dz);
52 | 
53 |             temporary_potential += source_q[jj]  * source_w[jj] * exp(-kernel_parameter*r) /r;
54 |             temporary_weight    +=                 source_w[jj] * exp(-kernel_parameter*r) /r;
55 | 
56 |         } // end loop over interpolation points
57 | #ifdef OPENACC_ENABLED
58 |         #pragma acc atomic
59 | #endif
60 |         cluster_q[starting_index_of_cluster + i] += temporary_potential;
61 | #ifdef OPENACC_ENABLED
62 |         #pragma acc atomic
63 | #endif
64 |         cluster_w[starting_index_of_cluster + i] += temporary_weight;
65 |     }
66 | #ifdef OPENACC_ENABLED
67 |     } // end kernel
68 | #endif
69 |     return;
70 | }
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_cp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_SS_CP_H
 3 | #define H_K_YUKAWA_SS_CP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_SS_CP_Lagrange(int number_of_sources_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_sources, int starting_index_of_cluster,
10 |         double *source_x, double *source_y, double *source_z, double *source_q, double *source_w,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_w,
12 |         struct RunParams *run_params, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_YUKAWA_SS_CP_H */
16 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_pc.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_SS_PC_H
 3 | #define H_K_YUKAWA_SS_PC_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_SS_PC_Lagrange(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_cluster,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge, double *cluster_weight,
11 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | void K_Yukawa_SS_PC_Hermite(int number_of_targets_in_batch, int number_of_interpolation_points_in_cluster,
15 |         int starting_index_of_target, int starting_index_of_cluster, int total_number_interpolation_points,
16 |         double *target_x, double *target_y, double *target_z, double *target_charge,
17 |         double *cluster_x, double *cluster_y, double *cluster_z, double *cluster_charge, double *cluster_weight,
18 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
19 | 
20 | 
21 | #endif /* H_K_YUKAWA_SS_PC_H */
22 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_pp.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <float.h>
 3 | #include <stdio.h>
 4 | 
 5 | #include "../../run_params/struct_run_params.h"
 6 | #include "yukawa_ss_pp.h"
 7 | 
 8 | 
 9 | void K_Yukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
10 |         int starting_index_of_target, int starting_index_of_source,
11 |         double *target_x, double *target_y, double *target_z, double *target_charge,
12 |         double *source_x, double *source_y, double *source_z, double *source_charge, double * source_weight,
13 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id)
14 | {
15 | 
16 |     double kernel_parameter=run_params->kernel_params[0];
17 | 
18 | #ifdef OPENACC_ENABLED
19 |     #pragma acc kernels async(gpu_async_stream_id) present(target_x, target_y, target_z, target_charge, \
20 |                         source_x, source_y, source_z, source_charge, source_weight, potential)
21 |     {
22 | #endif
23 | #ifdef OPENACC_ENABLED
24 |     #pragma acc loop independent
25 | #endif
26 |     for (int i = 0; i < number_of_targets_in_batch; i++) {
27 | 
28 |         int ii = starting_index_of_target + i;
29 |         double temporary_potential = 0.0;
30 |         
31 |         double tx = target_x[ii];
32 |         double ty = target_y[ii];
33 |         double tz = target_z[ii];
34 |         double tq = target_charge[ii];
35 | 
36 | #ifdef OPENACC_ENABLED
37 |         #pragma acc loop independent reduction(+:temporary_potential)
38 | #endif
39 |         for (int j = 0; j < number_of_source_points_in_cluster; j++) {
40 | 
41 |             int jj = starting_index_of_source + j;
42 |             double dx = tx - source_x[jj];
43 |             double dy = ty - source_y[jj];
44 |             double dz = tz - source_z[jj];
45 |             double r  = sqrt(dx*dx + dy*dy + dz*dz);
46 | 
47 |             if (r > DBL_MIN) {
48 |                 temporary_potential += (source_charge[jj] - tq) * source_weight[jj] * exp(-kernel_parameter*r) / r;
49 |             }
50 |         } // end loop over interpolation points
51 | #ifdef OPENACC_ENABLED
52 |         #pragma acc atomic
53 | #endif
54 |         potential[ii] += temporary_potential;
55 |     }
56 | #ifdef OPENACC_ENABLED
57 |     } // end kernel
58 | #endif
59 |     return;
60 | }
61 | 


--------------------------------------------------------------------------------
/src/kernels/yukawa/yukawa_ss_pp.h:
--------------------------------------------------------------------------------
 1 | /* Interaction Kernels */
 2 | #ifndef H_K_YUKAWA_SS_PP_H
 3 | #define H_K_YUKAWA_SS_PP_H
 4 |  
 5 | #include "../../run_params/struct_run_params.h"
 6 | 
 7 | 
 8 | void K_Yukawa_SS_PP(int number_of_targets_in_batch, int number_of_source_points_in_cluster,
 9 |         int starting_index_of_target, int starting_index_of_source,
10 |         double *target_x, double *target_y, double *target_z, double *target_charge,
11 |         double *source_x, double *source_y, double *source_z, double *source_charge, double *source_weight,
12 |         struct RunParams *run_params, double *potential, int gpu_async_stream_id);
13 | 
14 | 
15 | #endif /* H_K_YUKAWA_SS_PP_H */
16 | 


--------------------------------------------------------------------------------
/src/particles/particles.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_PARTICLE_FUNCTIONS_H
 2 | #define H_PARTICLE_FUNCTIONS_H
 3 | 
 4 | #include "../run_params/struct_run_params.h"
 5 | #include "struct_particles.h"
 6 | 
 7 | void Particles_Alloc(struct Particles **particles_addr, int length);
 8 | 
 9 | void Particles_Free(struct Particles **particles_addr);
10 | 
11 | void Particles_Targets_Reorder(struct Particles *targets, double *potential);
12 | 
13 | void Particles_Sources_Reorder(struct Particles *sources);
14 | 
15 | void Particles_ConstructOrder(struct Particles *particles);
16 | 
17 | void Particles_FreeOrder(struct Particles *particles);
18 | 
19 | void Particles_Validate(struct Particles *sources, struct Particles *targets,
20 |         struct RunParams *run_params);
21 | 
22 | 
23 | #endif /* H_PARTICLE_FUNCTIONS */
24 | 


--------------------------------------------------------------------------------
/src/particles/struct_particles.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_PARTICLES_H
 2 | #define H_PARTICLES_H
 3 | 
 4 | /* declaration of struct with tag particles */
 5 | struct Particles
 6 | {
 7 |         int num;
 8 |         double *x;
 9 |         double *y;
10 |         double *z;
11 |         double *q;
12 |         // quadrature weights.  Set = 1 if interacting particles, not performing convolution integral.
13 |         double *w;
14 | 
15 | 		int *ibeg;
16 | 		int *iend;
17 |   
18 |         int *order;
19 |         
20 |         int sources_w_dummy;
21 |         int targets_q_dummy;
22 |         
23 |         int targets_x_duplicate;
24 |         int targets_y_duplicate;
25 |         int targets_z_duplicate;
26 |         int targets_q_duplicate;
27 | };
28 | 
29 | #endif /* H_PARTICLES_H */
30 | 


--------------------------------------------------------------------------------
/src/run_params/run_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_RUN_PARAMS_FUNCTIONS_H
 2 | #define H_RUN_PARAMS_FUNCTIONS_H
 3 | 
 4 | #include "../utilities/enums.h"
 5 | #include "struct_run_params.h"
 6 | 
 7 | 
 8 | void RunParams_Setup(struct RunParams **run_params_addr,
 9 |                      KERNEL kernel, int num_kernel_params, double *kernel_params,
10 |                      APPROXIMATION approximation,
11 |                      SINGULARITY singularity,
12 |                      COMPUTE_TYPE compute_type,
13 |                      double theta, int interp_degree,
14 |                      int max_per_source_leaf, int max_per_target_leaf, double size_check_factor,
15 |                      double beta, int verbosity);
16 | 
17 | void RunParams_Validate(struct RunParams *run_params);
18 | 
19 | void RunParams_Free(struct RunParams **run_params_addr);
20 | 
21 | void RunParams_Print(struct RunParams *run_params);
22 | 
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/run_params/struct_run_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_RUN_PARAMS_H
 2 | #define H_RUN_PARAMS_H
 3 | 
 4 | #include "../utilities/enums.h"
 5 | 
 6 | 
 7 | struct RunParams
 8 | {
 9 |     KERNEL kernel;
10 |     int num_kernel_params;
11 |     double *kernel_params;
12 | 
13 |     APPROXIMATION approximation;
14 |     SINGULARITY singularity;
15 |     COMPUTE_TYPE compute_type;
16 | 
17 |     double theta;
18 |     double size_check_factor;
19 | 
20 |     int interp_degree;
21 |     int interp_pts_per_cluster;
22 |     int interp_charges_per_cluster;
23 |     int interp_weights_per_cluster;
24 | 
25 |     int max_per_source_leaf;
26 |     int max_per_target_leaf;
27 |     
28 |     double beta;
29 | 
30 |     int verbosity;
31 | };
32 | 
33 | 
34 | #endif /* H_RUN_PARAMS_H */
35 | 


--------------------------------------------------------------------------------
/src/tree/batches.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_BATCH_FUNCTIONS_H
 2 | #define H_BATCH_FUNCTIONS_H
 3 | 
 4 | #include "../particles/struct_particles.h"
 5 | #include "../run_params/struct_run_params.h"
 6 | 
 7 | #include "struct_tree.h"
 8 | 
 9 | 
10 | void Batches_Sources_Construct(struct Tree **batches_addr, struct Particles *sources,
11 |                 struct RunParams *run_params);
12 |                 
13 | void Batches_Targets_Construct(struct Tree **batches_addr, struct Particles *targets,
14 |                 struct RunParams *run_params);
15 | 
16 | void Batches_Alloc(struct Tree **batches_addr, int length);
17 | 
18 | void Batches_Free(struct Tree **batches_addr);
19 | 
20 | void Batches_Free_Win(struct Tree **batches_addr);
21 | 
22 | void Batches_Print(struct Tree *batches);
23 | 
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/tree/partition.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_PARTITION_H
 2 | #define H_PARTITION_H
 3 | 
 4 | /* 
 5 |  * declaration of partition functions
 6 |  *
 7 |  * partition determines the index MIDIND, after partitioning in place the arrays a, b, c,
 8 |  * and q, such that a(ibeg:midind) <= val and a(midind+1:iend) > val. If on entry, ibeg >
 9 |  * iend, or a(ibeg:iend) > val then midind is returned as ibeg-1.
10 |  */
11 | 
12 | void cp_partition(double *a, double *b, double *c, double *d, int *indarr,
13 |                   int ibeg, int iend, double val, int *midind);
14 | 
15 | void pc_partition(double *a, double *b, double *c, double *d, double *w, int *indarr,
16 |                   int ibeg, int iend, double val, int *midind);
17 | 
18 | void pc_partition_8(double *x, double *y, double *z, double *q, double *w, int *indarr,
19 |                     double xyzmms[6][8], double xl, double yl, double zl,
20 |                     int *numposchild, int max_num_children,
21 |                     double x_mid, double y_mid, double z_mid,
22 |                     int ind[8][2]);
23 | 
24 | void cp_partition_8(double *x, double *y, double *z, double *q, int *indarr,
25 |                     double xyzmms[6][8], double xl, double yl, double zl,
26 |                     int *numposchild, int max_num_children,
27 |                     double x_mid, double y_mid, double z_mid,
28 |                     int ind[8][2]);
29 | 
30 | 
31 | #endif /* H_PARTITION_H */
32 | 


--------------------------------------------------------------------------------
/src/tree/struct_tree.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_STRUCT_TREE_H
 2 | #define H_STRUCT_TREE_H
 3 | 
 4 | struct Tree
 5 | {
 6 |     int numnodes;
 7 |     int numleaves;
 8 |     
 9 |     int min_leaf_size;
10 |     int max_leaf_size;
11 |     int max_depth;
12 |     
13 |     int *ibeg;
14 |     int *iend;
15 |     int *numpar;
16 |     
17 |     int *cluster_ind;
18 |     
19 |     double *radius;
20 | 
21 |     double *x_mid;
22 |     double *y_mid;
23 |     double *z_mid;
24 | 
25 |     double *x_min;
26 |     double *y_min;
27 |     double *z_min;
28 | 
29 |     double *x_max;
30 |     double *y_max;
31 |     double *z_max;
32 | 
33 |     int *num_children;
34 |     int *children;
35 |     int *parent;
36 | 
37 |     int **levels_list;
38 |     int *levels_list_num;
39 | 
40 |     int *leaves_list;
41 |     int leaves_list_num;
42 | 
43 | };
44 | 
45 | #endif /* H_STRUCT_TREE_H */
46 | 


--------------------------------------------------------------------------------
/src/tree/struct_tree_linked_list_node.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_STRUCT_TREE_LINKED_LIST_NODE_H
 2 | #define H_STRUCT_TREE_LINKED_LIST_NODE_H
 3 | 
 4 | struct TreeLinkedListNode
 5 | {
 6 |     int numpar, ibeg, iend;
 7 |     
 8 |     double x_min, y_min, z_min;
 9 |     double x_max, y_max, z_max;
10 |     double x_mid, y_mid, z_mid;
11 |     
12 |     double radius, aspect;
13 |     
14 |     int num_children;
15 |     struct TreeLinkedListNode *child[8];
16 |     struct TreeLinkedListNode *parent;
17 | 
18 |     int node_index;
19 | 
20 |     int level;
21 | };
22 | 
23 | #endif /* H_STRUCT_TREE_LINKED_LIST_NODE_H */
24 | 


--------------------------------------------------------------------------------
/src/tree/tree.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_TREE_FUNCTIONS_H
 2 | #define H_TREE_FUNCTIONS_H
 3 | 
 4 | #include "../particles/struct_particles.h"
 5 | #include "../run_params/struct_run_params.h"
 6 | 
 7 | #include "struct_tree_linked_list_node.h"
 8 | #include "struct_tree.h"
 9 | 
10 | 
11 | void Tree_Sources_Construct(struct Tree **tree_addr, struct Particles *sources, struct RunParams *run_params);
12 | 
13 | void Tree_Targets_Construct(struct Tree **tree_addr, struct Particles *targets, struct RunParams *run_params);
14 | 
15 | void Tree_Set_Leaves_and_Levels(struct Tree *tree);
16 | 
17 | void Tree_Fill_Levels(struct Tree *tree, int idx, int level, int *sizeof_levels_list, int *sizeof_leaves_list);
18 | 
19 | void Tree_Alloc(struct Tree **tree_addr, int length);
20 | 
21 | void Tree_Free(struct Tree **tree_addr);
22 | 
23 | void Tree_Fill(struct Tree *tree, struct TreeLinkedListNode *p);
24 | 
25 | void Tree_Print(struct Tree *tree);
26 | 
27 | 
28 | #endif /* H_TREEFUNCTIONS_H */
29 | 


--------------------------------------------------------------------------------
/src/tree/tree_linked_list.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_TREE_LINKED_LIST_FUNCTIONS_H
 2 | #define H_TREE_LINKED_LIST_FUNCTIONS_H
 3 | 
 4 | #include "../particles/struct_particles.h"
 5 | #include "struct_tree_linked_list_node.h"
 6 | 
 7 | 
 8 | void TreeLinkedList_Targets_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent,
 9 |                 struct Particles *targets,
10 |                 int ibeg, int iend, int maxparnode, double *xyzmm, int *numnodes, int *numleaves,
11 |                 int *min_leaf_size, int *max_leaf_size, int *max_depth, int current_level);
12 | 
13 | void TreeLinkedList_Sources_Construct(struct TreeLinkedListNode **p, struct TreeLinkedListNode *parent,
14 |                 struct Particles *sources,
15 |                 int ibeg, int iend, int maxparnode, double *xyzmm,
16 |                 int *numnodes, int *numleaves, int *min_leaf_size, int *max_leaf_size,
17 |                 int *max_depth, int current_level);
18 | 
19 | int TreeLinkedList_SetIndex(struct TreeLinkedListNode *p, int index);
20 | 
21 | void TreeLinkedList_Free(struct TreeLinkedListNode **p_addr);
22 | 
23 | 
24 | #endif /* H_TREE_LINKED_LIST_FUNCTIONS */
25 | 


--------------------------------------------------------------------------------
/src/utilities/advanced_timings.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_ADVANCED_TIMINGS_H
 2 | #define H_ADVANCED_TIMINGS_H
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | #include "../run_params/struct_run_params.h"
 7 | 
 8 | 
 9 | void Timing_Calculate(double time_tree_glob[3][13], double time_tree[13], double total_time_glob[1], double total_time[1]);
10 |                       
11 | void Timing_Print(double time_tree_glob[3][13], double total_time_glob[1], struct RunParams *run_params);
12 |                   
13 |                   
14 | #endif /* H_ADVANCED_TIMINGS_H */
15 | 


--------------------------------------------------------------------------------
/src/utilities/array.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * C header file containing macros for vector and array creation
 3 |  * using the xmalloc.c memory allocation routine
 4 |  *
 5 |  * This C code written by:
 6 |  * Leighton Wilson, University of Michigan, Ann Arbor, MI
 7 |  *
 8 |  * Based on the work of Rouben Rostamian, presented in
 9 |  * "Programming Projects in C for Students of Engineering,
10 |  *  Science, and Mathematics"
11 |  *
12 |  * Last modified by Leighton Wilson, 06/23/2016
13 |  */
14 | 
15 | 
16 | #ifndef H_ARRAY_H
17 | #define H_ARRAY_H
18 | #include "xmalloc.h"
19 | 
20 | #define make_vector(v,n) ((v) = xmalloc((n) * sizeof *(v)))
21 | 
22 | #define free_vector(v)  do { free(v); v = NULL; } while (0)
23 | 
24 | #define realloc_vector(v,n) ((v) = realloc(v, (n) * sizeof *(v)))
25 | 
26 | 
27 | #define make_matrix(a, m, n) do {                              \
28 |     size_t make_matrix_loop_counter;                           \
29 |     make_vector(a, (m) + 1);                                   \
30 |     for (make_matrix_loop_counter = 0;                         \
31 |             make_matrix_loop_counter < (size_t)(m);            \
32 |             make_matrix_loop_counter++)                        \
33 |         make_vector((a)[make_matrix_loop_counter], (n));       \
34 |     (a)[m] = NULL;                                             \
35 | } while (0)
36 | 
37 | #define free_matrix(a) do {                                    \
38 |     if (a != NULL) {                                           \
39 |         size_t make_matrix_loop_counter;                       \
40 |         for (make_matrix_loop_counter = 0;                     \
41 |                 (a)[make_matrix_loop_counter] != NULL;         \
42 |                 make_matrix_loop_counter++)                    \
43 |             free_vector((a)[make_matrix_loop_counter]);        \
44 |         free_vector(a);                                        \
45 |         a = NULL;                                              \
46 |     }                                                          \
47 | } while (0)
48 | 
49 | #define make_3array(a, l, m, n) do {                           \
50 |     size_t make_3array_loop_counter;                           \
51 |     make_vector(a, (l) + 1);                                   \
52 |     for (make_3array_loop_counter = 0;                         \
53 |             make_3array_loop_counter < (size_t)(l);            \
54 |             make_3array_loop_counter++)                        \
55 |         make_matrix((a)[make_3array_loop_counter], (m), (n));  \
56 |     (a)[l] = NULL;                                             \
57 | } while (0)
58 | 
59 | #define free_3array(a) do {                                    \
60 |     if (a != NULL) {                                           \
61 |         size_t make_3array_loop_counter;                       \
62 |         for (make_3array_loop_counter = 0;                     \
63 |                 (a)[make_3array_loop_counter] != NULL;         \
64 |                 make_3array_loop_counter++)                    \
65 |             free_matrix((a)[make_3array_loop_counter]);        \
66 |         free_vector(a);                                        \
67 |         a = NULL;                                              \
68 |     }                                                          \
69 | } while (0)
70 | 
71 | #define print_vector(fmt, v, n) do {                           \
72 |     size_t print_vector_loop_counter;                          \
73 |     for (print_vector_loop_counter = 0;                        \
74 |             print_vector_loop_counter < (size_t)(n);           \
75 |             print_vector_loop_counter++)                       \
76 |         printf(fmt, (v)[print_vector_loop_counter]);           \
77 |     putchar('\n');                                             \
78 | } while (0)
79 | 
80 | #define print_matrix(fmt, a, m, n) do {                        \
81 |     size_t print_matrix_loop_counter;                          \
82 |     for (print_matrix_loop_counter = 0;                        \
83 |             print_matrix_loop_counter < (size_t)(m);           \
84 |             print_matrix_loop_counter++)                       \
85 |     print_vector(fmt, (a)[print_matrix_loop_counter], (n));    \
86 | } while (0)
87 | 
88 | 
89 | 
90 | #endif /*H_ARRAY_H*/
91 | 


--------------------------------------------------------------------------------
/src/utilities/enums.h:
--------------------------------------------------------------------------------
 1 | #ifndef H_BARYTREE_TYPES_H
 2 | #define H_BARYTREE_TYPES_H
 3 | 
 4 | 
 5 | typedef enum KERNEL
 6 | {
 7 |     NO_KERNEL,
 8 |     COULOMB,
 9 |     YUKAWA,
10 |     REGULARIZED_COULOMB,
11 |     REGULARIZED_YUKAWA,
12 |     ATAN,
13 |     TCF,
14 |     DCF,
15 |     SIN_OVER_R,
16 |     MQ,
17 |     RBS_U,
18 |     RBS_V,
19 |     USER
20 | } KERNEL;
21 | 
22 | 
23 | typedef enum SINGULARITY
24 | {
25 |     NO_SINGULARITY,
26 |     SKIPPING,
27 |     SUBTRACTION
28 | } SINGULARITY;
29 | 
30 | 
31 | typedef enum APPROXIMATION
32 | {
33 |     NO_APPROX,
34 |     LAGRANGE,
35 |     HERMITE
36 | } APPROXIMATION;
37 | 
38 | 
39 | typedef enum COMPUTE_TYPE
40 | {
41 |     NO_COMPUTE_TYPE,
42 |     PARTICLE_CLUSTER,
43 |     CLUSTER_PARTICLE,
44 |     CLUSTER_CLUSTER,
45 | } COMPUTE_TYPE;
46 | 
47 | 
48 | #endif /* H_BARYTREE_TYPES_H */
49 | 


--------------------------------------------------------------------------------
/src/utilities/timers.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | 
 3 | 
 4 | void START_TIMER(double *time)
 5 | {
 6 |     *time = MPI_Wtime();
 7 | 
 8 |     return;
 9 | }
10 | 
11 | 
12 | void STOP_TIMER(double *time)
13 | {
14 |     *time = MPI_Wtime() - *time;
15 | 
16 |     return;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/utilities/timers.h:
--------------------------------------------------------------------------------
1 | #ifndef H_TIMERS_H
2 | #define H_TIMERS_H
3 | 
4 | void START_TIMER(double *time);
5 | 
6 | void STOP_TIMER(double *time);
7 | 
8 | #endif /* H_TIMERS_H */
9 | 


--------------------------------------------------------------------------------
/src/utilities/tools.c:
--------------------------------------------------------------------------------
 1 | /* tool functions for use by treecode routines */
 2 | #include <math.h>
 3 | #include "array.h"
 4 | 
 5 | 
 6 | double minval(double *x, int numels)
 7 | {
 8 |     double min = x[0];
 9 | 
10 |     for (int i = 1; i < numels; ++i) {
11 |         if (min > x[i])
12 |             min = x[i];
13 |     }
14 | 
15 |     return min;
16 | }
17 | 
18 | 
19 | 
20 | double maxval(double *x, int numels)
21 | {
22 |     double max = x[0];
23 | 
24 |     for (int i = 1; i < numels; ++i) {
25 |         if (max < x[i])
26 |             max = x[i];
27 |     }
28 | 
29 |     return max;
30 | }
31 | 
32 | 
33 | 
34 | double sum(double *x, int numels)
35 | {
36 |     double sum = 0.0;
37 | 
38 |     for (int i = 0; i < numels; ++i)
39 |         sum += x[i];
40 | 
41 |     return sum;
42 | }
43 | 
44 | 
45 | 
46 | int sum_int(int *x, int numels)
47 | {
48 |     int sum = 0.0;
49 | 
50 |     for (int i = 0; i < numels; ++i)
51 |         sum += x[i];
52 | 
53 |     return sum;
54 | }
55 | 
56 | 
57 | 
58 | double max3(double a, double b, double c)
59 | {
60 |     double max = a;
61 | 
62 |     if (max < b) max = b;
63 |     if (max < c) max = c;
64 | 
65 |     return max;
66 | }
67 | 
68 | 
69 | 
70 | double min3(double a, double b, double c)
71 | {
72 |     double min = a;
73 | 
74 |     if (min > b) min = b;
75 |     if (min > c) min = c;
76 | 
77 |     return min;
78 | }
79 | 
80 | 
81 | 
82 | int maxval_int(int *x, int numels)
83 | {
84 |     int max = x[0];
85 | 
86 |     for (int i = 1; i < numels; i++) {
87 |         if (max < x[i])
88 |             max = x[i];
89 |     }
90 | 
91 |     return max;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/utilities/tools.h:
--------------------------------------------------------------------------------
 1 | /* tool functions for use by treecode routines */
 2 | #ifndef H_TOOLS_H                                                                     
 3 | #define H_TOOLS_H 
 4 | 
 5 | double minval(double *x, int numels);
 6 | double maxval(double *x, int numels);
 7 | int maxval_int(int *x, int numels);
 8 | 
 9 | double sum(double *x, int numels);
10 | int sum_int(int *x, int numels);
11 | 
12 | double max3(double a, double b, double c);
13 | double min3(double a, double b, double c);
14 | 
15 | #endif /* H_TOOLS_H */
16 | 


--------------------------------------------------------------------------------
/src/utilities/xmalloc.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * C code for a safe memory allocation routine
 3 |  *
 4 |  * This C code written by:
 5 |  * Leighton Wilson, University of Michigan, Ann Arbor, MI
 6 |  *
 7 |  * Based on the work of Rouben Rostamian, presented in
 8 |  * "Programming Projects in C for Students of Engineering,
 9 |  *  Science, and Mathematics"
10 |  *
11 |  * Last modified by Leighton Wilson, 06/23/2016
12 |  */
13 | 
14 | #include <stdio.h>
15 | #include "xmalloc.h"
16 | 
17 | void *malloc_or_exit(size_t nbytes, const char *file, int line)
18 | {
19 |     void *x;
20 |     if ((x = malloc(nbytes)) == NULL || nbytes == 0) 
21 |     {
22 |         fprintf(stderr, "%s: line %d: malloc() of %zu bytes failed\n",
23 |                 file, line, nbytes);
24 |         exit(EXIT_FAILURE);
25 |     }
26 |     else
27 |         return x;
28 | }
29 | 


--------------------------------------------------------------------------------
/src/utilities/xmalloc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * C header file for xmalloc.c memory allocation routine
 3 |  *
 4 |  * This C code written by:
 5 |  * Leighton Wilson, University of Michigan, Ann Arbor, MI
 6 |  *
 7 |  * Based on the work of Rouben Rostamian, presented in
 8 |  * "Programming Projects in C for Students of Engineering,
 9 |  *  Science, and Mathematics"
10 |  *
11 |  * Last modified by Leighton Wilson, 06/23/2016
12 |  */
13 | 
14 | #ifndef H_XMALLOC_H
15 | #define H_XMALLOC_H
16 | #include <stdlib.h>
17 | void *malloc_or_exit(size_t nbytes, const char *file, int line);
18 | #define xmalloc(nbytes)  malloc_or_exit((nbytes), __FILE__, __LINE__)
19 | #endif /*H_XMALLOC_H*/
20 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | /locally_run_tests.sh
2 | /queue_all_tests.sh
3 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(tests_cpu serial_tests.c)
2 | target_link_libraries(tests_cpu PRIVATE BaryTree_cpu)
3 | 
4 | add_test(NAME direct_sum_on_10_particles COMMAND tests_cpu 0)
5 | add_test(NAME treecode_on_100_particles COMMAND tests_cpu 1)
6 | add_test(NAME treecode_on_1_target_10000_sources COMMAND tests_cpu 2)
7 | add_test(NAME treecode_parameters_on_1_target_10000_sources COMMAND tests_cpu 3)
8 | add_test(NAME test_treecode_wrapper COMMAND tests_cpu 4)
9 | add_test(NAME test_BLDTT COMMAND tests_cpu 5)


--------------------------------------------------------------------------------
/tests/minunit.h:
--------------------------------------------------------------------------------
1 |  /* file: minunit.h */
2 |  #define mu_assert(message, test) do { if (!(test)) return message; } while (0)
3 |  #define mu_run_test(test) do { char *message = test(); tests_run++; \
4 |                                 if (message) return message; } while (0)
5 |  extern int tests_run;
6 | 


--------------------------------------------------------------------------------