├── .clang-format ├── .cmake-format ├── .gitattributes ├── .gitignore ├── CMakeLists.txt ├── Config ├── BenchP0.toml ├── LargeP0.toml ├── PlotTestResult.py ├── PointDistribution.py ├── Verify.toml └── VerifyWall.toml ├── Demo ├── CMakeLists.txt ├── Demo.cpp └── Demo_SK.cpp ├── LICENSE ├── Lib ├── CMakeLists.txt ├── include │ └── STKFMM │ │ ├── LaplaceLayerKernel.hpp │ │ ├── RPYKernel.hpp │ │ ├── STKFMM.h │ │ ├── STKFMM.hpp │ │ ├── STKFMM_common.hpp │ │ ├── STKFMM_impl.hpp │ │ ├── StokesDoubleLayerKernel.hpp │ │ ├── StokesLayerKernel.hpp │ │ ├── StokesRegSingleLayerKernel.hpp │ │ ├── StokesSingleLayerKernel.hpp │ │ └── stkfmm_helpers.hpp └── src │ ├── FMMData.cpp │ ├── STKFMM.cpp │ ├── Stk3DFMM-c.cpp │ ├── Stk3DFMM.cpp │ └── StkWallFMM.cpp ├── M2L ├── CMakeLists.txt ├── Laplace │ ├── Laplace1D3D.cpp │ ├── Laplace2D3D.cpp │ ├── Laplace3D3D.cpp │ └── main.cpp ├── SVD_pvfmm.hpp ├── StokesPVel │ ├── StokesPVel1D3D.cpp │ ├── StokesPVel2D3D.cpp │ ├── StokesPVel3D3D.cpp │ └── main.cpp ├── Stokeslet │ ├── Stokes1D3D.cpp │ ├── Stokes2D3D.cpp │ ├── Stokes3D3D.cpp │ └── main.cpp ├── gen.py └── svd_test.cpp ├── Note ├── LaplaceQuadrupole.nb └── StokesSingularity.tex ├── Python ├── CMakeLists.txt ├── PySTKFMM.py ├── check_flux.py ├── example.py ├── kernels.py ├── rpy_test.py └── timer.py ├── README.md ├── README_Tex.md ├── STKFMM_Logo_RGB.svg ├── Test ├── CMakeLists.txt ├── SimpleKernel.cpp ├── SimpleKernel.hpp ├── Test.cpp ├── Test.hpp ├── Util │ ├── CLI11.hpp │ ├── ChebNodal.hpp │ ├── PointDistribution.cpp │ ├── PointDistribution.hpp │ ├── Timer.hpp │ └── json.hpp └── main.cpp ├── do-cmake.sh ├── doc └── Doxyfile.in └── svgs ├── 00ab3fb1d4498352d564322b3d8281ab.svg ├── 07e476cc0252962199ca482cc8788e94.svg ├── 080e940370a8293ef0ea9c02e8836013.svg ├── 0b5c36a960bf1e20da870975949caf38.svg ├── 0b9096df9697907d2beba351423393bb.svg ├── 0ec0e7629c233c51a807937c9c2e0008.svg ├── 1069b8abbb5837aa1e07cd46c48ff62d.svg ├── 12dd280bcf2f8b88954a119a6fe0cc82.svg ├── 1492e52db7f896468254a8034fcbf840.svg ├── 1519c3ecab000d56ee33670b9426c1ed.svg ├── 194516c014804d683d1ab5a74f8c5647.svg ├── 199f58fd308548442348e4a586184098.svg ├── 1a0ddfa5e81ec1e06ac5c4b3e4530e4e.svg ├── 1abfc937b0f1b385c8c69b2730a6cda6.svg ├── 1fc018edd54a76a01783d1cf35676916.svg ├── 2103f85b8b1477f430fc407cad462224.svg ├── 23776aad854f2d33e83e4f4cad44e1b9.svg ├── 23bbbdfa14b7ee7a030d0c04fd38250a.svg ├── 2f5303a6c997e9f9a4ebdb12d00348c1.svg ├── 34be5e6cbc28b74e8f561c1d527644ce.svg ├── 3ce145d17b292a694572c25966e7805f.svg ├── 3d9f8b131aee6306786da85243ef8109.svg ├── 4bdc8d9bcfb35e1c9bfb51fc69687dfc.svg ├── 4ed9389b413af04e9786fa6e147ddbdb.svg ├── 635bf1b5a0d6b2f1f190d90b7ceb4060.svg ├── 6b6d8b569851a0e4b1e2c3143e9ec356.svg ├── 75d6f59e125d5e0ccc1984cd60ecbaca.svg ├── 79a624f595dfa02aaede80594ce7a077.svg ├── 7ccca27b5ccc533a2dd72dc6fa28ed84.svg ├── 8234ec946e597939f115b30d797912cd.svg ├── 912607c89eae037134fdf3e74d602929.svg ├── 9a709c64f59f11dbeaecdd88b3339783.svg ├── a04fd15dcb86e6e1eadfc08d64cb38d6.svg ├── a4256cdebe78f0dbefcbeef82a7adb35.svg ├── a499cec8684687006cb7f8267c392af8.svg ├── a5a3c89b53bed887e7e194b0670abc9a.svg ├── aab7f6d0d35c1902f2a8b8ac1cc3061a.svg ├── ac9424c220341fa74016e5769014f456.svg ├── b1f2675e5b0e8444482d1bdfac266e90.svg ├── b6d71cb440aa28c10ccfa14da6d8700e.svg ├── bdbf342b57819773421273d508dba586.svg ├── bee1683ac5a86212efac5d2804145b0f.svg ├── c0b7bdafbb8aef85d4275c543c04eeb7.svg ├── c0e8acfba65f15b77a9457b9e727c409.svg ├── c37ded03564c90141c5f1e058edc4ab8.svg ├── c4dd4df1478960c5f0d78f517ad773e5.svg ├── d132b5b7147e3ea90b176f89736f9d8b.svg ├── d4b5cf8f522b37d2d4a1d1ee619261ec.svg ├── d5c18a8ca1894fd3a7d25f242cbe8890.svg ├── df06f340b9915e0682b914a0b1de03b9.svg └── e714d66356b6c29eeee3f7985e73c67f.svg /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: LLVM 4 | TabWidth: 4 5 | ColumnLimit: 120 6 | IndentWidth: 4 7 | AlwaysBreakTemplateDeclarations: true 8 | ... 9 | 10 | -------------------------------------------------------------------------------- /.cmake-format: -------------------------------------------------------------------------------- 1 | # -------------------------- 2 | # General Formatting Options 3 | # -------------------------- 4 | # How wide to allow formatted cmake files 5 | line_width = 80 6 | 7 | # How many spaces to tab for indent 8 | tab_size = 2 9 | 10 | # If an argument group contains more than this many sub-groups (parg or kwarg 11 | # groups), then force it to a vertical layout. 12 | max_subgroups_hwrap = 2 13 | 14 | # If a positional argument group contains more than this many arguments, then 15 | # force it to a vertical layout. 16 | max_pargs_hwrap = 6 17 | 18 | # If true, separate flow control names from their parentheses with a space 19 | separate_ctrl_name_with_space = False 20 | 21 | # If true, separate function names from parentheses with a space 22 | separate_fn_name_with_space = False 23 | 24 | # If a statement is wrapped to more than one line, than dangle the closing 25 | # parenthesis on its own line. 26 | dangle_parens = False 27 | 28 | # If the trailing parenthesis must be 'dangled' on its on line, then align it to 29 | # this reference: `prefix`: the start of the statement, `prefix-indent`: the 30 | # start of the statement, plus one indentation level, `child`: align to the 31 | # column of the arguments 32 | dangle_align = 'prefix' 33 | 34 | # If the statement spelling length (including space and parenthesis) is smaller 35 | # than this amount, then force reject nested layouts. 36 | min_prefix_chars = 4 37 | 38 | # If the statement spelling length (including space and parenthesis) is larger 39 | # than the tab width by more than this amount, then force reject un-nested 40 | # layouts. 41 | max_prefix_chars = 10 42 | 43 | # If a candidate layout is wrapped horizontally but it exceeds this many lines, 44 | # then reject the layout. 45 | max_lines_hwrap = 2 46 | 47 | # What style line endings to use in the output. 48 | line_ending = 'unix' 49 | 50 | # Format command names consistently as 'lower' or 'upper' case 51 | command_case = 'canonical' 52 | 53 | # Format keywords consistently as 'lower' or 'upper' case 54 | keyword_case = 'unchanged' 55 | 56 | # Specify structure for custom cmake functions 57 | additional_commands = {'pkg_find': {'kwargs': {'PKG': '*'}}} 58 | 59 | # A list of command names which should always be wrapped 60 | always_wrap = [] 61 | 62 | # If true, the argument lists which are known to be sortable will be sorted 63 | # lexicographicall 64 | enable_sort = True 65 | 66 | # If true, the parsers may infer whether or not an argument list is sortable 67 | # (without annotation). 68 | autosort = False 69 | 70 | # By default, if cmake-format cannot successfully fit everything into the 71 | # desired linewidth it will apply the last, most agressive attempt that it made. 72 | # If this flag is True, however, cmake-format will print error, exit with non- 73 | # zero status code, and write-out nothing 74 | require_valid_layout = False 75 | 76 | # A dictionary containing any per-command configuration overrides. Currently 77 | # only `command_case` is supported. 78 | per_command = {} 79 | 80 | # A dictionary mapping layout nodes to a list of wrap decisions. See the 81 | # documentation for more information. 82 | layout_passes = {} 83 | 84 | # ---------------------------- 85 | # Options affecting the linter 86 | # ---------------------------- 87 | with section("linter"): 88 | # regular expression pattern describing valid function names 89 | function_pattern = '[0-9a-z_]+' 90 | 91 | # regular expression pattern describing valid macro names 92 | macro_pattern = '[0-9A-Z_]+' 93 | 94 | # regular expression pattern describing valid names for variables with global 95 | # scope 96 | global_var_pattern = '[0-9A-Z][0-9A-Z_]+' 97 | 98 | internal_var_pattern = '_[0-9A-Z_]+' 99 | 100 | # regular expression pattern describing valid names for variables with local 101 | # scope 102 | local_var_pattern = '[0-9a-z][0-9a-z_]+' 103 | 104 | private_var_pattern = '_[0-9a-z_]+' 105 | 106 | public_var_pattern = '[0-9A-Z][0-9A-Z_]+' 107 | 108 | # regular expression pattern describing valid names for keywords used in 109 | # functions or macros 110 | keyword_pattern = '[0-9A-Z_]+' 111 | 112 | # In the heuristic for C0201, how many conditionals to match within a loop in 113 | # before considering the loop a parser. 114 | max_conditionals_custom_parser = 2 115 | 116 | # Require at least this many newlines between statements 117 | min_statement_spacing = 1 118 | 119 | # Require no more than this many newlines between statements 120 | max_statement_spacing = 1 121 | 122 | max_returns = 6 123 | 124 | max_branches = 12 125 | 126 | max_arguments = 5 127 | 128 | max_localvars = 15 129 | 130 | max_statements = 50 131 | 132 | # ------------------------------------ 133 | # Options affecting comment formatting 134 | # ------------------------------------ 135 | # What character to use for bulleted lists 136 | bullet_char = '*' 137 | 138 | # What character to use as punctuation after numerals in an enumerated list 139 | enum_char = '.' 140 | 141 | # If comment markup is enabled, don't reflow the first comment block in each 142 | # listfile. Use this to preserve formatting of your copyright/license 143 | # statements. 144 | first_comment_is_literal = False 145 | 146 | # If comment markup is enabled, don't reflow any comment block which matches 147 | # this (regex) pattern. Default is `None` (disabled). 148 | literal_comment_pattern = None 149 | 150 | # Regular expression to match preformat fences in comments 151 | # default=r'^\s*([`~]{3}[`~]*)(.*)$' 152 | fence_pattern = '^\\s*([`~]{3}[`~]*)(.*)$' 153 | 154 | # Regular expression to match rulers in comments 155 | # default=r'^\s*[^\w\s]{3}.*[^\w\s]{3}$' 156 | ruler_pattern = '^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$' 157 | 158 | # If a comment line starts with at least this many consecutive hash characters, 159 | # then don't lstrip() them off. This allows for lazy hash rulers where the first 160 | # hash char is not separated by space 161 | hashruler_min_length = 10 162 | 163 | # If true, then insert a space between the first hash char and remaining hash 164 | # chars in a hash ruler, and normalize its length to fill the column 165 | canonicalize_hashrulers = True 166 | 167 | # enable comment markup parsing and reflow 168 | enable_markup = False 169 | 170 | # --------------------- 171 | # Miscellaneous options 172 | # --------------------- 173 | # If true, emit the unicode byte-order mark (BOM) at the start of the file 174 | emit_byteorder_mark = False 175 | 176 | # Specify the encoding of the input file. Defaults to utf-8. 177 | input_encoding = 'utf-8' 178 | 179 | # Specify the encoding of the output file. Defaults to utf-8. Note that cmake 180 | # only claims to support utf-8 so be careful when using anything else 181 | output_encoding = 'utf-8' -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # folders 2 | .vscode/ 3 | build/ 4 | *.dSYM/ 5 | 6 | # latex files 7 | *.log 8 | *.aux 9 | *.pdf 10 | *.synctex.gz 11 | 12 | # test logs 13 | 14 | # temporary files 15 | .o-* 16 | .o.* 17 | 18 | # Prerequisites 19 | *.d 20 | 21 | # Compiled Object files 22 | *.slo 23 | *.lo 24 | *.o 25 | *.obj 26 | 27 | # Precompiled Headers 28 | *.gch 29 | *.pch 30 | 31 | # Compiled Dynamic libraries 32 | *.so 33 | *.dylib 34 | *.dll 35 | 36 | # Fortran module files 37 | *.mod 38 | *.smod 39 | 40 | # Compiled Static libraries 41 | *.lai 42 | *.la 43 | *.a 44 | *.lib 45 | 46 | # Executables 47 | *.X 48 | *.exe 49 | *.out 50 | *.app 51 | 52 | # Precompute files 53 | *.data 54 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project( 3 | STKFMM 4 | VERSION 1.0 5 | LANGUAGES CXX) 6 | 7 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 8 | set(CMAKE_CXX_STANDARD 14) 9 | set(CMAKE_CXX_EXTENSIONS OFF) 10 | set(CMAKE_C_STANDARD 99) 11 | set(CMAKE_C_EXTENSIONS OFF) 12 | 13 | # compiler flags 14 | if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 15 | # using Clang 16 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 17 | # using GCC 18 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") 19 | # using intel, disable offloading 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qno-offload") 21 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qno-offload") 22 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") 23 | # using Visual Studio C++ 24 | endif() 25 | 26 | # Add all the SCTL relevant flags 27 | add_compile_options(-DSCTL_QUAD_T=__float128 -DSCTL_HAVE_BLAS -DSCTL_HAVE_LAPACK -DSCTL_HAVE_FFTW -I${PROJECT_SOURCE_DIR}/SCTL/include) 28 | 29 | set(MPI_CXX_SKIP_MPICXX 30 | true 31 | CACHE BOOL "The MPI-2 C++ bindings are disabled.") 32 | # required compiler features 33 | find_package(MPI REQUIRED) 34 | find_package(OpenMP REQUIRED) 35 | # library 36 | find_package(pvfmm REQUIRED) 37 | find_package(Eigen3 REQUIRED) 38 | 39 | enable_testing() 40 | 41 | set(BUILD_M2L 42 | OFF 43 | CACHE BOOL "compile M2L data generator") 44 | set(BUILD_TEST 45 | ON 46 | CACHE BOOL "compile c++ test driver") 47 | set(BUILD_DOC 48 | OFF 49 | CACHE BOOL "build doxygen doc") 50 | set(PyInterface 51 | OFF 52 | CACHE BOOL "build python interface") 53 | 54 | add_subdirectory(Lib) 55 | add_subdirectory(Demo) 56 | 57 | if(BUILD_M2L) 58 | add_subdirectory(M2L) 59 | endif() 60 | 61 | if(BUILD_TEST) 62 | add_subdirectory(Test) 63 | endif() 64 | 65 | if(PyInterface) 66 | add_subdirectory(Python) 67 | endif() 68 | 69 | # doxygen documentation 70 | if(BUILD_DOC) 71 | find_package(Doxygen REQUIRED) 72 | set(DOXYGEN_GENERATE_LATEX NO) 73 | set(DOXYGEN_GENERATE_HTML YES) 74 | set(DOXYGEN_GENERATE_MAN NO) 75 | 76 | # set input and output files 77 | set(DOXYGEN_IN ${CMAKE_SOURCE_DIR}/doc/Doxyfile.in) 78 | set(DOXYGEN_OUT ${CMAKE_BINARY_DIR}/Doxyfile) 79 | 80 | # request to configure the file 81 | configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) 82 | message("Doxygen build started") 83 | 84 | # note the option ALL which allows to build the docs together with the application 85 | add_custom_target( 86 | doc_doxygen ALL 87 | COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} 88 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 89 | COMMENT "Generating API documentation with Doxygen" 90 | VERBATIM) 91 | endif() 92 | -------------------------------------------------------------------------------- /Config/BenchP0.toml: -------------------------------------------------------------------------------- 1 | nsl = 99 2 | ndl = 99 3 | ntrg = 99 4 | box = 32 5 | origin = [1,2,3] 6 | kernel = 0 7 | pbc = 0 8 | seed = 0 9 | eps = 1e-4 10 | max = 2000 11 | direct = false 12 | verify = false 13 | convergence = true 14 | distType = 2 15 | distParam = [-1.0, 0.5] 16 | random = true 17 | wall = false 18 | 19 | -------------------------------------------------------------------------------- /Config/LargeP0.toml: -------------------------------------------------------------------------------- 1 | nsl = 199 2 | ndl = 199 3 | ntrg = 199 4 | box = 64 5 | origin = [1,2,3] 6 | kernel = 0 7 | pbc = 0 8 | seed = 0 9 | eps = 1e-4 10 | max = 2000 11 | direct = false 12 | verify = false 13 | convergence = true 14 | distType = 2 15 | distParam = [-1.0, 0.5] 16 | random = true 17 | wall = false 18 | 19 | -------------------------------------------------------------------------------- /Config/PointDistribution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import matplotlib as mpl 4 | import matplotlib.pyplot as plt 5 | import argparse 6 | 7 | # define rc params 8 | params = { 9 | # 'backend': 'Agg', 10 | 'font.family': 'serif', 11 | 'font.size': 9, 12 | 'axes.labelsize': 9, 13 | 'legend.fontsize': 7, 14 | 'xtick.labelsize': 7, 15 | 'ytick.labelsize': 7, 16 | 'text.usetex': True, 17 | 'text.latex.preamble': r'\usepackage{amsmath} \usepackage[notextcomp]{stix} \usepackage[T1]{fontenc} \usepackage{bm}'} 18 | plt.rcParams.update(params) 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("point_file") 22 | args = parser.parse_args() 23 | print("Parsing "+args.point_file) 24 | 25 | pts3D = np.loadtxt(args.point_file, delimiter=',')[:, 0:3] 26 | 27 | npts = 1000 28 | azm = -60 29 | skip = int(pts3D.shape[0]/npts) 30 | 31 | x = pts3D[::skip, 0] 32 | y = pts3D[::skip, 1] 33 | z = pts3D[::skip, 2] 34 | 35 | fig = plt.figure(figsize=(4.0, 3.0), dpi=150, constrained_layout=True) 36 | ax1 = fig.add_subplot(111, projection='3d') 37 | ax1.scatter(x, y, z, marker='+') 38 | ax1.set_xlabel('X') 39 | ax1.set_ylabel('Y') 40 | ax1.set_zlabel('Z') 41 | # ax1.set_xlim(0, 1) 42 | # ax1.set_ylim(0, 1) 43 | # ax1.set_zlim(0, 1) 44 | ax1.grid(False) 45 | ax1.view_init(10, azm) 46 | 47 | plt.savefig('PointDist.png', dpi=600) 48 | -------------------------------------------------------------------------------- /Config/Verify.toml: -------------------------------------------------------------------------------- 1 | nsl = 32 2 | ndl = 32 3 | ntrg = 32 4 | box = 16 5 | origin = [1,2,3] 6 | kernel = 0 7 | pbc = 0 8 | seed = 0 9 | eps = 1e-4 10 | max = 1000 11 | direct = false 12 | verify = true 13 | convergence = true 14 | random = true 15 | distType = 2 16 | distParam = [-1.0, 0.5] 17 | wall = false 18 | 19 | -------------------------------------------------------------------------------- /Config/VerifyWall.toml: -------------------------------------------------------------------------------- 1 | nsl = 32 2 | ndl = 0 3 | ntrg = 32 4 | box = 16 5 | origin = [1,2,3] 6 | kernel = 24 7 | pbc = 0 8 | seed = 0 9 | eps = 1e-4 10 | max = 1000 11 | direct = false 12 | verify = true 13 | convergence = false 14 | random = true 15 | distType = 2 16 | distParam = [-1.0, 0.5] 17 | wall = true 18 | 19 | -------------------------------------------------------------------------------- /Demo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(Demo.X Demo.cpp) 2 | target_link_libraries(Demo.X PRIVATE STKFMM_STATIC OpenMP::OpenMP_CXX 3 | MPI::MPI_CXX) 4 | 5 | add_executable(Demo_SK.X Demo_SK.cpp ../Test/SimpleKernel.cpp) 6 | target_link_libraries(Demo_SK.X PRIVATE OpenMP::OpenMP_CXX MPI::MPI_CXX) 7 | -------------------------------------------------------------------------------- /Demo/Demo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "STKFMM/STKFMM.hpp" 6 | 7 | #include "mpi.h" 8 | #include "omp.h" 9 | 10 | int main(int argc, char **argv) { 11 | MPI_Init(&argc, &argv); 12 | double reps = 1e-4; 13 | std::vector srcCoord = {0.6 + reps, 0.6 + 2 * reps, 0.6 + 3 * reps, 0.6, 0.6, 0.6}; 14 | std::vector trgCoord = srcCoord; 15 | std::vector srcValue = {0.1, 0.2, 0.3, 0.4, -0.1, -0.2, -0.3, -0.4}; 16 | std::vector trgValue(2 * 9, 0.0); 17 | std::vector trgValue2(2 * 9, 0.0); 18 | 19 | double origin[3] = {0, 0, 0}; 20 | double box = 1; 21 | 22 | auto kernel = stkfmm::KERNEL::Traction; 23 | unsigned int kernelComb = stkfmm::asInteger(kernel); 24 | { 25 | auto fmm = stkfmm::Stk3DFMM(16, 2000, stkfmm::PAXIS::PXY, kernelComb, false); 26 | fmm.showActiveKernels(); 27 | fmm.setBox(origin, box); 28 | // first evaluation 29 | { 30 | fmm.clearFMM(kernel); 31 | fmm.setPoints(2, srcCoord.data(), 2, trgCoord.data()); 32 | fmm.setupTree(kernel); 33 | fmm.evaluateFMM(kernel, 2, srcValue.data(), 2, trgValue.data()); 34 | } 35 | 36 | // shift points 37 | double shift[3] = {0.5, 0.5, 0}; 38 | for (int i = 0; i < 2; i++) { 39 | for (int j = 0; j < 3; j++) { 40 | srcCoord[3 * i + j] += shift[j]; 41 | trgCoord[3 * i + j] += shift[j]; 42 | } 43 | } 44 | 45 | // second evaluation 46 | { 47 | fmm.clearFMM(kernel); 48 | fmm.setPoints(2, srcCoord.data(), 2, trgCoord.data()); 49 | fmm.setupTree(kernel); 50 | fmm.evaluateFMM(kernel, 2, srcValue.data(), 2, trgValue2.data()); 51 | } 52 | 53 | for (int i = 0; i < 18; i++) { 54 | printf("%18.16g,%18.16g,%g,%g\n", trgValue[i], trgValue2[i], trgValue[i] - trgValue2[i], 55 | (trgValue[i] - trgValue2[i]) / trgValue2[i]); 56 | } 57 | } 58 | 59 | MPI_Finalize(); 60 | return 0; 61 | } -------------------------------------------------------------------------------- /Demo/Demo_SK.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../Test/SimpleKernel.hpp" 6 | 7 | #include "mpi.h" 8 | #include "omp.h" 9 | 10 | int main(int argc, char **argv) { 11 | MPI_Init(&argc, &argv); 12 | double reps = 1e-4; 13 | std::vector srcCoord = {0.6 + reps, 0.6 + 2 * reps, 0.6 + 3 * reps, 0.6, 0.6, 0.6}; 14 | std::vector trgCoord = srcCoord; 15 | std::vector srcValue = {0.1, 0.2, 0.3, 0.4, -0.1, -0.2, -0.3, -0.4}; 16 | std::vector trgValue(2 * 9, 0.0); 17 | std::vector trgValue2(2 * 9, 0.0); 18 | 19 | double origin[3] = {0, 0, 0}; 20 | double box = 1; 21 | 22 | { 23 | StokesSLTraction(srcCoord.data() + 3, trgCoord.data(), srcValue.data() + 4, trgValue.data()); 24 | // shift points 25 | double shift[3] = {0.5, 0.5, 0}; 26 | for (int i = 0; i < 2; i++) { 27 | for (int j = 0; j < 3; j++) { 28 | srcCoord[3 * i + j] += shift[j]; 29 | trgCoord[3 * i + j] += shift[j]; 30 | } 31 | } 32 | 33 | StokesSLTraction(srcCoord.data() + 3, trgCoord.data(), srcValue.data() + 4, trgValue2.data()); 34 | for (int i = 0; i < 18; i++) { 35 | printf("%18.16g,%18.16g,%g,%g\n", trgValue[i], trgValue2[i], trgValue[i] - trgValue2[i], 36 | (trgValue[i] - trgValue2[i]) / trgValue2[i]); 37 | } 38 | } 39 | 40 | MPI_Finalize(); 41 | return 0; 42 | } -------------------------------------------------------------------------------- /Lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # part 1, core library 2 | 3 | # shared lib 4 | add_library(STKFMM_SHARED SHARED src/FMMData.cpp src/STKFMM.cpp 5 | src/Stk3DFMM.cpp src/StkWallFMM.cpp 6 | src/Stk3DFMM-c.cpp) 7 | target_include_directories( 8 | STKFMM_SHARED 9 | PUBLIC $ 10 | $ 11 | ${PVFMM_INCLUDE_DIR}/pvfmm ${PVFMM_DEP_INCLUDE_DIR}) 12 | target_link_libraries(STKFMM_SHARED PUBLIC ${PVFMM_LIB_DIR}/${PVFMM_SHARED_LIB} 13 | ${PVFMM_DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX) 14 | 15 | target_compile_options(STKFMM_SHARED PUBLIC ${OpenMP_CXX_FLAGS}) 16 | # static lib 17 | add_library(STKFMM_STATIC STATIC src/FMMData.cpp src/STKFMM.cpp 18 | src/Stk3DFMM.cpp src/StkWallFMM.cpp) 19 | target_include_directories( 20 | STKFMM_STATIC 21 | PUBLIC $ 22 | $ 23 | ${PVFMM_INCLUDE_DIR}/pvfmm ${PVFMM_DEP_INCLUDE_DIR}) 24 | target_link_libraries(STKFMM_STATIC PUBLIC ${PVFMM_LIB_DIR}/${PVFMM_STATIC_LIB} 25 | ${PVFMM_DEP_LIB}) 26 | 27 | target_compile_options(STKFMM_STATIC PUBLIC ${OpenMP_CXX_FLAGS}) 28 | 29 | # install core library and headers 30 | include(GNUInstallDirs) 31 | install( 32 | TARGETS STKFMM_SHARED STKFMM_STATIC 33 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 34 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 35 | install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ 36 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 37 | -------------------------------------------------------------------------------- /Lib/include/STKFMM/STKFMM.h: -------------------------------------------------------------------------------- 1 | 2 | typedef struct Stk3DFMM Stk3DFMM; 3 | typedef struct StkWallFMM StkWallFMM; 4 | 5 | Stk3DFMM *Stk3DFMM_create(int mult_order, int max_pts, int pbc, unsigned kernelComb); 6 | 7 | void Stk3DFMM_destroy(Stk3DFMM *fmm); 8 | 9 | void Stk3DFMM_set_points(Stk3DFMM *fmm, const int nSL, double *src_SL_coord, const int nTrg, double *trg_coord, 10 | const int nDL, double *src_DL_coord); 11 | 12 | void Stk3DFMM_set_box(Stk3DFMM *fmm, double *origin, double len); 13 | 14 | void Stk3DFMM_setup_tree(Stk3DFMM *fmm, unsigned kernel); 15 | 16 | void Stk3DFMM_evaluate_fmm(Stk3DFMM *fmm, unsigned kernel, const int nSL, double *src_SL_value, const int nTrg, 17 | double *trg_value, const int nDL, double *src_DL_value); 18 | 19 | void Stk3DFMM_show_active_kernels(Stk3DFMM *fmm); 20 | 21 | 22 | StkWallFMM *StkWallFMM_create(int mult_order, int max_pts, int pbc, unsigned kernelComb); 23 | 24 | void StkWallFMM_destroy(StkWallFMM *fmm); 25 | 26 | void StkWallFMM_set_points(StkWallFMM *fmm, const int nSL, double *src_SL_coord, const int nTrg, double *trg_coord, 27 | const int nDL, double *src_DL_coord); 28 | 29 | void StkWallFMM_set_box(StkWallFMM *fmm, double *origin, double len); 30 | 31 | void StkWallFMM_setup_tree(StkWallFMM *fmm, unsigned kernel); 32 | 33 | void StkWallFMM_evaluate_fmm(StkWallFMM *fmm, unsigned kernel, const int nSL, double *src_SL_value, const int nTrg, 34 | double *trg_value, const int nDL, double *src_DL_value); 35 | 36 | void StkWallFMM_show_active_kernels(StkWallFMM *fmm); 37 | -------------------------------------------------------------------------------- /Lib/include/STKFMM/STKFMM_common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STKFMM_COMMON_ 2 | #define STKFMM_COMMON_ 3 | 4 | #include 5 | #include 6 | 7 | #include "LaplaceLayerKernel.hpp" 8 | #include "RPYKernel.hpp" 9 | #include "StokesLayerKernel.hpp" 10 | #include "StokesRegSingleLayerKernel.hpp" 11 | 12 | #include 13 | 14 | namespace stkfmm { 15 | /** 16 | * @brief Allow printing various FMM information during execution 17 | * 18 | * Set environment variable STKFMM_VERBOSE=1 to enable 19 | */ 20 | extern const bool verbose; 21 | 22 | /** 23 | * @brief choose the periodic boundary condition 24 | * 25 | */ 26 | enum class PAXIS : unsigned { 27 | NONE = 0, ///< non-periodic, free-space 28 | PX = 1, ///< periodic along x axis 29 | PXY = 2, ///< periodic along XY axis 30 | PXYZ = 3 ///< periodic along XYZ axis 31 | }; 32 | 33 | /** 34 | * @brief directly run point-to-point kernels without buildling FMM tree 35 | * 36 | */ 37 | enum class PPKERNEL : unsigned { 38 | SLS2T = 1, ///< Single Layer S -> T kernel 39 | DLS2T = 2, ///< Double Layer S -> T kernel 40 | L2T = 4, ///< L -> T kernel 41 | }; 42 | 43 | /** 44 | * @brief choose a kernel 45 | */ 46 | enum class KERNEL : unsigned { 47 | LapPGrad = 1, ///< Laplace 48 | LapPGradGrad = 2, ///< Laplace 49 | LapQPGradGrad = 4, ///< Laplace quadrupole 50 | 51 | Stokes = 8, ///< Stokeslet 3x3 52 | RPY = 16, ///< RPY 53 | StokesRegVel = 32, ///< Regularized Stokes Velocity 54 | StokesRegVelOmega = 64, ///< Regularized Stokes Velocity/Rotation 55 | 56 | PVel = 128, ///< Stokes 57 | PVelGrad = 256, ///< Stokes 58 | PVelLaplacian = 512, ///< Stokes 59 | Traction = 1024, ///< Stokes 60 | 61 | LapGrad = 2048 62 | }; 63 | 64 | /** 65 | * @brief map of kernel -> kernel function pointer 66 | * 67 | */ 68 | extern const std::unordered_map *> kernelMap; 69 | 70 | /** 71 | * @brief Get kernel dimension 72 | * 73 | * @param kernel_ one of the kernels 74 | * @return [single layer kernel dimension, double layer kernel dimension, 75 | * target kernel dimension] 76 | */ 77 | std::tuple getKernelDimension(KERNEL kernel_); 78 | 79 | /** 80 | * @brief Get the name of the kernel 81 | * 82 | * @param kernel_ 83 | * @return std::string 84 | */ 85 | std::string getKernelName(KERNEL kernel_); 86 | 87 | /** 88 | * @brief Get the Kernel Function Pointer 89 | * 90 | * @param kernel_ 91 | * @return const pvfmm::Kernel* 92 | */ 93 | const pvfmm::Kernel *getKernelFunction(KERNEL kernel_); 94 | 95 | /** 96 | * @brief Enum to integer 97 | * 98 | * @tparam Enumeration 99 | * @param value 100 | * @return std::underlying_type::type 101 | */ 102 | template 103 | auto asInteger(Enumeration const value) -> typename std::underlying_type::type { 104 | return static_cast::type>(value); 105 | } 106 | 107 | } // namespace stkfmm 108 | #endif 109 | -------------------------------------------------------------------------------- /Lib/include/STKFMM/STKFMM_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STKFMM_IMPL_ 2 | #define STKFMM_IMPL_ 3 | 4 | #include "STKFMM_common.hpp" 5 | 6 | #include 7 | #include 8 | 9 | namespace stkfmm { 10 | 11 | namespace impl { 12 | 13 | /** 14 | * @brief Run FMM for a chosen kernel 15 | * (1) accept only coordinates within [0,1) box 16 | * (2) handles periodicity 17 | * Remark: this class is not supposed to be used by the user of this library 18 | */ 19 | class FMMData { 20 | public: 21 | const stkfmm::KERNEL kernelChoice; ///< chosen kernel 22 | const stkfmm::PAXIS periodicity; ///< chosen periodicity 23 | bool enableFF; ///< enable periodic Far-Field fix 24 | 25 | int kdimSL; ///< Single Layer kernel dimension 26 | int kdimDL; ///< Double Layer kernel dimension 27 | int kdimTrg; ///< Target kernel dimension 28 | 29 | int multOrder; ///< multipole order 30 | int maxPts; ///< max number of points per octant 31 | 32 | const pvfmm::Kernel *kernelFunctionPtr; ///< pointer to kernel function 33 | 34 | std::vector equivCoord; ///< periodicity L2T equivalent point coord 35 | std::vector M2Ldata; ///< periodicity M2L operator data 36 | std::vector M2Cdata; ///< periodicity M2C operator data 37 | 38 | FMMData() = delete; ///< forbid default constructor 39 | 40 | // forbid copy constructor 41 | FMMData(const FMMData &) = delete; 42 | FMMData &operator=(const FMMData &) = delete; 43 | FMMData(FMMData &&) = delete; 44 | FMMData &operator=(FMMData &&) = delete; 45 | 46 | /** 47 | * @brief Construct a new FMMData object 48 | * 49 | * @param kernelChoice_ 50 | * @param periodicity_ 51 | * @param multOrder_ 52 | * @param maxPts_ 53 | */ 54 | FMMData(KERNEL kernelChoice_, PAXIS periodicity_, int multOrder_, int maxPts_, bool enableFF_ = true); 55 | 56 | /** 57 | * @brief Destroy the FMMData object 58 | * 59 | */ 60 | ~FMMData(); 61 | 62 | /** 63 | * @brief Set kernel function in pvfmm data structure 64 | * 65 | */ 66 | void setKernel(); 67 | 68 | // computation routines 69 | 70 | /** 71 | * @brief setup tree 72 | * 73 | * @param srcSLCoord single layer source coordinate 74 | * @param srcDLCoord double layer source coordinate 75 | * @param trgCoord target coordinate 76 | * @param ntreePts 77 | * @param treePtsPtr 78 | */ 79 | void setupTree(const std::vector &srcSLCoord, const std::vector &srcDLCoord, 80 | const std::vector &trgCoord, const int ntreePts = 0, const double *treePtsPtr = nullptr); 81 | 82 | /** 83 | * @brief runFMM 84 | * 85 | * @param srcSLValue [in] single layer source value 86 | * @param srcDLValue [in] double layer source value 87 | * @param trgValue [out] target value 88 | * @param scale 89 | */ 90 | void evaluateFMM(std::vector &srcSLValue, std::vector &srcDLValue, std::vector &trgValue, 91 | const double scale); 92 | 93 | /** 94 | * @brief directly evaluate kernel functions without FMM tree 95 | * 96 | * @param nThreads number of threads to use 97 | * @param chooseSD choose which kernel function to use 98 | * @param nSrc source number of points 99 | * @param srcCoordPtr source coordinate 100 | * @param srcValuePtr source value 101 | * @param nTrg target number of points 102 | * @param trgCoordPtr target coordinate 103 | * @param trgValuePtr target value 104 | */ 105 | void evaluateKernel(int nThreads, PPKERNEL chooseSD, // 106 | const int nSrc, double *srcCoordPtr, 107 | double *srcValuePtr, // 108 | const int nTrg, double *trgCoordPtr, double *trgValuePtr); 109 | 110 | /** 111 | * @brief delete the fmm tree 112 | * 113 | */ 114 | void deleteTree(); 115 | 116 | /** 117 | * @brief clear the FMM data 118 | * 119 | */ 120 | void clear(); 121 | 122 | /** 123 | * @brief if this kernel has DL 124 | * 125 | * @return true 126 | * @return false 127 | */ 128 | bool hasDL() const { return kernelFunctionPtr->dbl_layer_poten; } 129 | 130 | private: 131 | pvfmm::PtFMM *matrixPtr; ///< pvfmm PtFMM pointer 132 | pvfmm::PtFMM_Tree *treePtr; ///< pvfmm PtFMM_Tree pointer 133 | pvfmm::PtFMM_Data *treeDataPtr; ///< pvfmm PtFMM_Data pointer 134 | MPI_Comm comm; ///< MPI_comm communicator 135 | 136 | /** 137 | * @brief scale SrcSl and SrcDL Values before FMM call 138 | * operate on srcSLValue and srcDLValue 139 | * 140 | * @param srcSLValue 141 | * @param srcDLValue 142 | * @param scaleFactor 143 | */ 144 | void scaleSrc(std::vector &srcSLValue, std::vector &srcDLValue, const double scaleFactor); 145 | 146 | /** 147 | * @brief scale Trg Values after FMM call 148 | * operate on trgSLValue 149 | * 150 | * @param trgDLValue 151 | * @param scaleFactor 152 | */ 153 | void scaleTrg(std::vector &trgDLValue, const double scaleFactor); 154 | 155 | /** 156 | * @brief read the M2L Matrix from file 157 | * 158 | */ 159 | void readMat(const int kDim, const std::string &dataName, std::vector &data); 160 | 161 | /** 162 | * @brief setup this->M2Ldata, this->M2Cdata 163 | * 164 | */ 165 | void setupPeriodicData(); 166 | 167 | /** 168 | * @brief periodize the target values 169 | * 170 | * 171 | * @param trgValue 172 | */ 173 | void periodizeFMM(std::vector &trgValue); 174 | }; 175 | 176 | } // namespace impl 177 | } // namespace stkfmm 178 | #endif -------------------------------------------------------------------------------- /Lib/include/STKFMM/StokesLayerKernel.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file StokesLayerKernel.hpp 3 | * @author Wen Yan (wenyan4work@gmail.com) 4 | * @brief Stokes kernels 5 | * @version 0.1 6 | * @date 2019-12-23 7 | * 8 | * @copyright Copyright (c) 2019 9 | * 10 | */ 11 | #ifndef STOKESLAYERKERNEL_HPP 12 | #define STOKESLAYERKERNEL_HPP 13 | 14 | #include "StokesDoubleLayerKernel.hpp" 15 | #include "StokesSingleLayerKernel.hpp" 16 | 17 | /** 18 | * @brief inject into pvfmm namespace 19 | * 20 | */ 21 | namespace pvfmm { 22 | 23 | /** 24 | * @brief Stokes Layer Kernels 25 | * 26 | * @tparam T float or double 27 | */ 28 | template 29 | struct StokesLayerKernel { 30 | inline static const Kernel &Vel(); ///< Stokeslet 3x3, SL->Vel 31 | inline static const Kernel &PVel(); ///< SL+DL -> PVel 32 | inline static const Kernel &PVelGrad(); ///< SL+DL -> PVelGrad 33 | inline static const Kernel &PVelLaplacian(); ///< SL+DL -> PVelLaplacian 34 | inline static const Kernel &Traction(); ///< SL+DL -> Traction 35 | }; 36 | 37 | 38 | template 39 | inline const Kernel &StokesLayerKernel::Vel() { 40 | static Kernel ker = BuildKernel>("stokes_vel", 3, std::pair(3, 3)); 41 | return ker; 42 | } 43 | 44 | template 45 | inline const Kernel &StokesLayerKernel::PVel() { 46 | static Kernel stokes_pker = BuildKernel, stokes_doublepvel::Eval>( 47 | "stokes_PVel", 3, std::pair(4, 4)); 48 | stokes_pker.surf_dim = 9; 49 | return stokes_pker; 50 | } 51 | 52 | template 53 | inline const Kernel &StokesLayerKernel::PVelGrad() { 54 | static Kernel stokes_pker = BuildKernel, stokes_doublepvel::Eval>( 55 | "stokes_PVel", 3, std::pair(4, 4)); 56 | stokes_pker.surf_dim = 9; 57 | static Kernel stokes_pgker = BuildKernel, stokes_doublepvelgrad::Eval>( 58 | "stokes_PVelGrad", 3, std::pair(4, 16), &stokes_pker, &stokes_pker, NULL, &stokes_pker, &stokes_pker, 59 | NULL, &stokes_pker, NULL); 60 | stokes_pgker.surf_dim = 9; 61 | return stokes_pgker; 62 | } 63 | 64 | template 65 | inline const Kernel &StokesLayerKernel::PVelLaplacian() { 66 | static Kernel stokes_pker = BuildKernel, stokes_doublepvel::Eval>( 67 | "stokes_PVel", 3, std::pair(4, 4)); 68 | stokes_pker.surf_dim = 9; 69 | static Kernel stokes_pgker = 70 | BuildKernel, stokes_doublelaplacian::Eval>( 71 | "stokes_PVelLaplacian", 3, std::pair(4, 7), &stokes_pker, &stokes_pker, NULL, &stokes_pker, 72 | &stokes_pker, NULL, &stokes_pker, NULL); 73 | stokes_pgker.surf_dim = 9; 74 | return stokes_pgker; 75 | } 76 | 77 | template 78 | inline const Kernel &StokesLayerKernel::Traction() { 79 | static Kernel stokes_pker = BuildKernel, stokes_doublepvel::Eval>( 80 | "stokes_PVel", 3, std::pair(4, 4)); 81 | stokes_pker.surf_dim = 9; 82 | static Kernel stokes_pgker = 83 | BuildKernel, stokes_doubletraction::Eval>( 84 | "stokes_Traction", 3, std::pair(4, 9), &stokes_pker, &stokes_pker, NULL, &stokes_pker, 85 | &stokes_pker, NULL, &stokes_pker, NULL); 86 | stokes_pgker.surf_dim = 9; 87 | return stokes_pgker; 88 | } 89 | } // namespace pvfmm 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /Lib/include/STKFMM/stkfmm_helpers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STKFMM_HELPERS_HPP 2 | #define STKFMM_HELPERS_HPP 3 | 4 | #include 5 | 6 | // clang-format off 7 | // do not format macro 8 | 9 | namespace pvfmm { 10 | constexpr int SRC_BLK = 500; 11 | } // namespace pvfmm 12 | 13 | namespace stkfmm { 14 | 15 | /** 16 | * @brief delete the pointer ptr if not null 17 | * 18 | * @tparam T 19 | * @param ptr 20 | */ 21 | template 22 | void safeDeletePtr(T *&ptr) { 23 | if (ptr != nullptr) { 24 | delete ptr; 25 | ptr = nullptr; 26 | } 27 | } 28 | 29 | /** 30 | * @brief set x to its fractional part 31 | * 32 | * @param x 33 | */ 34 | inline void fracwrap(double &x) { x = x - floor(x); } 35 | 36 | /** 37 | * @brief generate equivalent point coordinate 38 | * 39 | * @tparam Real_t 40 | * @param p 41 | * @param c 42 | * @param alpha 43 | * @param depth 44 | * @return std::vector 45 | */ 46 | template 47 | std::vector surface(int p, Real_t *c, Real_t alpha, int depth) { 48 | int n_ = (6 * (p - 1) * (p - 1) + 2); // Total number of points. 49 | 50 | std::vector coord(n_ * 3); 51 | coord[0] = coord[1] = coord[2] = -1.0; 52 | int cnt = 1; 53 | for (int i = 0; i < p - 1; i++) 54 | for (int j = 0; j < p - 1; j++) { 55 | coord[cnt * 3] = -1.0; 56 | coord[cnt * 3 + 1] = (2.0 * (i + 1) - p + 1) / (p - 1); 57 | coord[cnt * 3 + 2] = (2.0 * j - p + 1) / (p - 1); 58 | cnt++; 59 | } 60 | for (int i = 0; i < p - 1; i++) 61 | for (int j = 0; j < p - 1; j++) { 62 | coord[cnt * 3] = (2.0 * i - p + 1) / (p - 1); 63 | coord[cnt * 3 + 1] = -1.0; 64 | coord[cnt * 3 + 2] = (2.0 * (j + 1) - p + 1) / (p - 1); 65 | cnt++; 66 | } 67 | for (int i = 0; i < p - 1; i++) 68 | for (int j = 0; j < p - 1; j++) { 69 | coord[cnt * 3] = (2.0 * (i + 1) - p + 1) / (p - 1); 70 | coord[cnt * 3 + 1] = (2.0 * j - p + 1) / (p - 1); 71 | coord[cnt * 3 + 2] = -1.0; 72 | cnt++; 73 | } 74 | for (int i = 0; i < (n_ / 2) * 3; i++) 75 | coord[cnt * 3 + i] = -coord[i]; 76 | 77 | Real_t r = 0.5 * pow(0.5, depth); 78 | Real_t b = alpha * r; 79 | for (int i = 0; i < n_; i++) { 80 | coord[i * 3 + 0] = (coord[i * 3 + 0] + 1.0) * b + c[0]; 81 | coord[i * 3 + 1] = (coord[i * 3 + 1] + 1.0) * b + c[1]; 82 | coord[i * 3 + 2] = (coord[i * 3 + 2] + 1.0) * b + c[2]; 83 | } 84 | return coord; 85 | } 86 | 87 | } // namespace stkfmm 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /Lib/src/STKFMM.cpp: -------------------------------------------------------------------------------- 1 | #include "STKFMM/STKFMM.hpp" 2 | 3 | // extern pvfmm::PeriodicType pvfmm::periodicType; 4 | 5 | namespace stkfmm { 6 | 7 | bool get_verbosity() { 8 | char *verbose_env; 9 | verbose_env = getenv("STKFMM_VERBOSE"); 10 | if (verbose_env == nullptr || verbose_env[0] == '0') 11 | return false; 12 | 13 | return true; 14 | } 15 | 16 | const bool verbose = get_verbosity(); 17 | 18 | const std::unordered_map *> kernelMap = { 19 | {KERNEL::LapPGrad, &pvfmm::LaplaceLayerKernel::PGrad()}, 20 | {KERNEL::LapPGradGrad, &pvfmm::LaplaceLayerKernel::PGradGrad()}, 21 | {KERNEL::LapQPGradGrad, &pvfmm::LaplaceLayerKernel::QPGradGrad()}, 22 | {KERNEL::Stokes, &pvfmm::StokesLayerKernel::Vel()}, 23 | {KERNEL::RPY, &pvfmm::RPYKernel::ulapu()}, 24 | {KERNEL::StokesRegVel, &pvfmm::StokesRegKernel::Vel()}, 25 | {KERNEL::StokesRegVelOmega, &pvfmm::StokesRegKernel::FTVelOmega()}, 26 | {KERNEL::PVel, &pvfmm::StokesLayerKernel::PVel()}, 27 | {KERNEL::PVelGrad, &pvfmm::StokesLayerKernel::PVelGrad()}, 28 | {KERNEL::PVelLaplacian, &pvfmm::StokesLayerKernel::PVelLaplacian()}, 29 | {KERNEL::Traction, &pvfmm::StokesLayerKernel::Traction()}, 30 | // {KERNEL::LapGrad, &pvfmm::LaplaceLayerKernel::Grad()}, // for internal test only 31 | }; 32 | 33 | std::tuple getKernelDimension(KERNEL kernel_) { 34 | using namespace impl; 35 | const pvfmm::Kernel *kernelFunctionPtr = getKernelFunction(kernel_); 36 | int kdimSL = kernelFunctionPtr->ker_dim[0]; 37 | int kdimTrg = kernelFunctionPtr->ker_dim[1]; 38 | int kdimDL = kernelFunctionPtr->surf_dim; 39 | return std::tuple(kdimSL, kdimDL, kdimTrg); 40 | } 41 | 42 | std::string getKernelName(KERNEL kernel_) { 43 | using namespace impl; 44 | const pvfmm::Kernel *kernelFunctionPtr = getKernelFunction(kernel_); 45 | return kernelFunctionPtr->ker_name; 46 | } 47 | 48 | const pvfmm::Kernel *getKernelFunction(KERNEL kernelChoice_) { 49 | auto it = kernelMap.find(kernelChoice_); 50 | if (it != kernelMap.end()) { 51 | return it->second; 52 | } else { 53 | printf("Error: Kernel not found.\n"); 54 | std::exit(1); 55 | return nullptr; 56 | } 57 | } 58 | 59 | // base class STKFMM 60 | 61 | STKFMM::STKFMM(int multOrder_, int maxPts_, PAXIS pbc_, unsigned int kernelComb_, bool enableFF_) 62 | : multOrder(multOrder_), maxPts(maxPts_), pbc(pbc_), kernelComb(kernelComb_) { 63 | using namespace impl; 64 | 65 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 66 | 67 | #ifdef FMMDEBUG 68 | pvfmm::Profile::Enable(true); 69 | if (myRank == 0) 70 | printf("FMM Initialized\n"); 71 | #endif 72 | } 73 | 74 | void STKFMM::setBox(double origin_[3], double len_) { 75 | origin[0] = origin_[0]; 76 | origin[1] = origin_[1]; 77 | origin[2] = origin_[2]; 78 | len = len_; 79 | // find and calculate scale & shift factor to map the box to [0,1) 80 | scaleFactor = 1.0 / len; 81 | // new coordinate = (pos-origin)*scaleFactor, in [0,1) 82 | 83 | if (stkfmm::verbose && rank == 0) { 84 | std::cout << "scale factor " << scaleFactor << std::endl; 85 | } 86 | }; 87 | 88 | void STKFMM::evaluateKernel(const KERNEL kernel, const int nThreads, const PPKERNEL p2p, const int nSrc, 89 | double *srcCoordPtr, double *srcValuePtr, const int nTrg, double *trgCoordPtr, 90 | double *trgValuePtr) { 91 | using namespace impl; 92 | if (poolFMM.find(kernel) == poolFMM.end()) { 93 | std::cout << "Error: no such FMMData exists for kernel " << getKernelName(kernel) << std::endl; 94 | exit(1); 95 | } 96 | FMMData &fmm = *((*poolFMM.find(kernel)).second); 97 | 98 | fmm.evaluateKernel(nThreads, p2p, nSrc, srcCoordPtr, srcValuePtr, nTrg, trgCoordPtr, trgValuePtr); 99 | } 100 | 101 | void STKFMM::showActiveKernels() const { 102 | if (!rank) { 103 | std::cout << "active kernels: "; 104 | for (auto it : kernelMap) { 105 | if (kernelComb & asInteger(it.first)) { 106 | std::cout << "\t" << it.second->ker_name; 107 | } 108 | } 109 | std::cout << std::endl; 110 | } 111 | } 112 | 113 | void STKFMM::scaleCoord(const int npts, double *coordPtr) const { 114 | // scale and shift points to [0,1) 115 | const double sF = this->scaleFactor; 116 | 117 | #pragma omp parallel for 118 | for (int i = 0; i < npts; i++) { 119 | for (int j = 0; j < 3; j++) { 120 | coordPtr[3 * i + j] = (coordPtr[3 * i + j] - origin[j]) * sF; 121 | } 122 | } 123 | } 124 | 125 | void STKFMM::wrapCoord(const int npts, double *coordPtr) const { 126 | // wrap periodic images 127 | if (pbc == PAXIS::PX) { 128 | #pragma omp parallel for 129 | for (int i = 0; i < npts; i++) { 130 | fracwrap(coordPtr[3 * i]); 131 | } 132 | } else if (pbc == PAXIS::PXY) { 133 | #pragma omp parallel for 134 | for (int i = 0; i < npts; i++) { 135 | fracwrap(coordPtr[3 * i]); 136 | fracwrap(coordPtr[3 * i + 1]); 137 | } 138 | } else if (pbc == PAXIS::PXYZ) { 139 | #pragma omp parallel for 140 | for (int i = 0; i < npts; i++) { 141 | fracwrap(coordPtr[3 * i]); 142 | fracwrap(coordPtr[3 * i + 1]); 143 | fracwrap(coordPtr[3 * i + 2]); 144 | } 145 | } else { 146 | assert(pbc == PAXIS::NONE); 147 | } 148 | 149 | return; 150 | } 151 | 152 | } // namespace stkfmm 153 | -------------------------------------------------------------------------------- /Lib/src/Stk3DFMM-c.cpp: -------------------------------------------------------------------------------- 1 | #include "STKFMM/STKFMM.hpp" 2 | 3 | extern "C" { 4 | using namespace stkfmm; 5 | Stk3DFMM *Stk3DFMM_create(int mult_order, int max_pts, int pbc, unsigned kernelComb) { 6 | return new Stk3DFMM(mult_order, max_pts, static_cast(pbc), kernelComb); 7 | } 8 | 9 | void Stk3DFMM_destroy(Stk3DFMM *fmm) { 10 | delete fmm; 11 | } 12 | 13 | void Stk3DFMM_set_points(Stk3DFMM *fmm, const int nSL, double *src_SL_coord, const int nTrg, double *trg_coord, 14 | const int nDL, double *src_DL_coord) { 15 | fmm->setPoints(nSL, src_SL_coord, nTrg, trg_coord, nDL, src_DL_coord); 16 | } 17 | 18 | void Stk3DFMM_get_kernel_dimension(unsigned kernel, int *dims) { 19 | std::tie(dims[0], dims[1], dims[2]) = getKernelDimension(static_cast(kernel)); 20 | } 21 | 22 | void Stk3DFMM_set_box(Stk3DFMM *fmm, double *origin, double len) { fmm->setBox(origin, len); } 23 | 24 | void Stk3DFMM_setup_tree(Stk3DFMM *fmm, unsigned kernel) { fmm->setupTree(static_cast(kernel)); } 25 | 26 | void Stk3DFMM_clear_fmm(Stk3DFMM *fmm, unsigned kernel) { fmm->clearFMM(static_cast(kernel)); } 27 | 28 | void Stk3DFMM_evaluate_fmm(Stk3DFMM *fmm, unsigned kernel, const int nSL, double *src_SL_value, const int nTrg, 29 | double *trg_value, const int nDL, double *src_DL_value) { 30 | fmm->evaluateFMM(static_cast(kernel), nSL, src_SL_value, nTrg, trg_value, nDL, src_DL_value); 31 | } 32 | 33 | void Stk3DFMM_show_active_kernels(Stk3DFMM *fmm) { 34 | fmm->showActiveKernels(); 35 | } 36 | 37 | StkWallFMM *StkWallFMM_create(int mult_order, int max_pts, int pbc, unsigned kernelComb) { 38 | return new StkWallFMM(mult_order, max_pts, static_cast(pbc), kernelComb); 39 | } 40 | 41 | void StkWallFMM_destroy(StkWallFMM *fmm) { 42 | delete fmm; 43 | } 44 | 45 | void StkWallFMM_set_points(StkWallFMM *fmm, const int nSL, double *src_SL_coord, const int nTrg, double *trg_coord, 46 | const int nDL, double *src_DL_coord) { 47 | fmm->setPoints(nSL, src_SL_coord, nTrg, trg_coord, nDL, src_DL_coord); 48 | } 49 | 50 | void StkWallFMM_get_kernel_dimension(unsigned kernel, int *dims) { 51 | std::tie(dims[0], dims[1], dims[2]) = getKernelDimension(static_cast(kernel)); 52 | } 53 | 54 | void StkWallFMM_set_box(StkWallFMM *fmm, double *origin, double len) { fmm->setBox(origin, len); } 55 | 56 | void StkWallFMM_setup_tree(StkWallFMM *fmm, unsigned kernel) { fmm->setupTree(static_cast(kernel)); } 57 | 58 | void StkWallFMM_clear_fmm(StkWallFMM *fmm, unsigned kernel) { fmm->clearFMM(static_cast(kernel)); } 59 | 60 | void StkWallFMM_evaluate_fmm(StkWallFMM *fmm, unsigned kernel, const int nSL, double *src_SL_value, const int nTrg, 61 | double *trg_value, const int nDL, double *src_DL_value) { 62 | fmm->evaluateFMM(static_cast(kernel), nSL, src_SL_value, nTrg, trg_value, nDL, src_DL_value); 63 | } 64 | 65 | void StkWallFMM_show_active_kernels(StkWallFMM *fmm) { 66 | fmm->showActiveKernels(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /Lib/src/Stk3DFMM.cpp: -------------------------------------------------------------------------------- 1 | #include "STKFMM/STKFMM.hpp" 2 | #include 3 | 4 | namespace stkfmm { 5 | 6 | Stk3DFMM::Stk3DFMM(int multOrder_, int maxPts_, PAXIS pbc_, unsigned int kernelComb_, bool enableFF_) 7 | : STKFMM(multOrder_, maxPts_, pbc_, kernelComb_) { 8 | using namespace impl; 9 | poolFMM.clear(); 10 | 11 | for (const auto &it : kernelMap) { 12 | const auto kernel = it.first; 13 | if (kernelComb & asInteger(kernel)) { 14 | poolFMM[kernel] = new FMMData(kernel, pbc, multOrder, maxPts, enableFF_); 15 | if (!rank) 16 | std::cout << "enable kernel " << it.second->ker_name << std::endl; 17 | } 18 | } 19 | 20 | if (poolFMM.empty()) { 21 | std::cout << "Error: no kernel activated\n"; 22 | } 23 | } 24 | 25 | Stk3DFMM::~Stk3DFMM() { 26 | // delete all FMMData 27 | for (auto &fmm : poolFMM) { 28 | safeDeletePtr(fmm.second); 29 | } 30 | } 31 | 32 | void Stk3DFMM::setPoints(const int nSL, const double *srcSLCoordPtr, const int nTrg, const double *trgCoordPtr, 33 | const int nDL, const double *srcDLCoordPtr) { 34 | 35 | if (!poolFMM.empty()) { 36 | for (auto &fmm : poolFMM) { 37 | // if (rank == 0) 38 | // printf("kernel %u \n", asInteger(fmm.second->kernelChoice)); 39 | fmm.second->deleteTree(); 40 | } 41 | if (stkfmm::verbose && rank == 0) 42 | std::cout << "ALL FMM Tree Cleared\n"; 43 | } 44 | 45 | // setup point coordinates 46 | auto setCoord = [&](const int nPts, const double *coordPtr, std::vector &coord) { 47 | coord.resize(nPts * 3); 48 | std::copy(coordPtr, coordPtr + 3 * nPts, coord.begin()); 49 | scaleCoord(nPts, coord.data()); 50 | wrapCoord(nPts, coord.data()); 51 | }; 52 | 53 | #pragma omp parallel sections 54 | { 55 | #pragma omp section 56 | { setCoord(nSL, srcSLCoordPtr, srcSLCoordInternal); } 57 | #pragma omp section 58 | { 59 | if (nDL > 0 && srcDLCoordPtr != nullptr) 60 | setCoord(nDL, srcDLCoordPtr, srcDLCoordInternal); 61 | } 62 | #pragma omp section 63 | { setCoord(nTrg, trgCoordPtr, trgCoordInternal); } 64 | } 65 | 66 | if (stkfmm::verbose && rank == 0) 67 | std::cout << "points set\n"; 68 | } 69 | 70 | void Stk3DFMM::setupTree(KERNEL kernel) { 71 | auto &fmmPtr = poolFMM[kernel]; 72 | if (fmmPtr->hasDL()) { 73 | poolFMM[kernel]->setupTree(srcSLCoordInternal, srcDLCoordInternal, trgCoordInternal); 74 | } else { 75 | std::vector empty; 76 | poolFMM[kernel]->setupTree(srcSLCoordInternal, empty, trgCoordInternal); 77 | } 78 | } 79 | 80 | void Stk3DFMM::evaluateFMM(const KERNEL kernel, const int nSL, const double *srcSLValuePtr, const int nTrg, 81 | double *trgValuePtr, const int nDL, const double *srcDLValuePtr) { 82 | 83 | using namespace impl; 84 | if (poolFMM.find(kernel) == poolFMM.end()) { 85 | std::cout << "Error: no such FMMData exists for kernel " << getKernelName(kernel) << std::endl; 86 | exit(1); 87 | } 88 | FMMData &fmm = *((*poolFMM.find(kernel)).second); 89 | 90 | srcSLValueInternal.resize(nSL * fmm.kdimSL); 91 | trgValueInternal.resize(nTrg * fmm.kdimTrg); 92 | std::copy(srcSLValuePtr, srcSLValuePtr + nSL * fmm.kdimSL, srcSLValueInternal.begin()); 93 | 94 | // run FMM with proper scaling 95 | if (fmm.hasDL()) { 96 | srcDLValueInternal.resize(nDL * fmm.kdimDL); 97 | std::copy(srcDLValuePtr, srcDLValuePtr + nDL * fmm.kdimDL, srcDLValueInternal.begin()); 98 | fmm.evaluateFMM(srcSLValueInternal, srcDLValueInternal, trgValueInternal, scaleFactor); 99 | } else { 100 | std::vector empty(0); 101 | fmm.evaluateFMM(srcSLValueInternal, empty, trgValueInternal, scaleFactor); 102 | } 103 | 104 | const int nloop = nTrg * fmm.kdimTrg; 105 | #pragma omp parallel for 106 | for (int i = 0; i < nloop; i++) { 107 | trgValuePtr[i] += trgValueInternal[i]; 108 | } 109 | 110 | return; 111 | } 112 | 113 | void Stk3DFMM::clearFMM(KERNEL kernel) { 114 | trgValueInternal.clear(); 115 | auto it = poolFMM.find(kernel); 116 | if (it != poolFMM.end()) 117 | it->second->clear(); 118 | else { 119 | std::cout << "kernel not found\n"; 120 | std::exit(1); 121 | } 122 | } 123 | 124 | } // namespace stkfmm 125 | -------------------------------------------------------------------------------- /M2L/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(M2LLaplace Laplace/main.cpp Laplace/Laplace1D3D.cpp 2 | Laplace/Laplace2D3D.cpp Laplace/Laplace3D3D.cpp) 3 | target_link_libraries(M2LLaplace Eigen3::Eigen OpenMP::OpenMP_CXX MPI::MPI_CXX) 4 | target_include_directories(M2LLaplace PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) 5 | 6 | add_executable(M2LStokeslet Stokeslet/main.cpp Stokeslet/Stokes1D3D.cpp 7 | Stokeslet/Stokes2D3D.cpp Stokeslet/Stokes3D3D.cpp) 8 | target_link_libraries(M2LStokeslet Eigen3::Eigen OpenMP::OpenMP_CXX 9 | MPI::MPI_CXX) 10 | target_include_directories(M2LStokeslet PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) 11 | 12 | add_executable( 13 | M2LStokesPVel StokesPVel/main.cpp StokesPVel/StokesPVel1D3D.cpp 14 | StokesPVel/StokesPVel2D3D.cpp StokesPVel/StokesPVel3D3D.cpp) 15 | target_link_libraries(M2LStokesPVel Eigen3::Eigen OpenMP::OpenMP_CXX 16 | MPI::MPI_CXX) 17 | target_include_directories(M2LStokesPVel PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) 18 | 19 | add_executable(SVD svd_test.cpp) 20 | target_link_libraries(SVD Eigen3::Eigen OpenMP::OpenMP_CXX MPI::MPI_CXX) 21 | # target_include_directories(SVD PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) 22 | 23 | -------------------------------------------------------------------------------- /M2L/Laplace/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace Laplace1D3D { 5 | int main(int argc, char *argv[]); 6 | } 7 | 8 | namespace Laplace2D3D { 9 | int main(int argc, char *argv[]); 10 | } 11 | 12 | namespace Laplace3D3D { 13 | int main(int argc, char *argv[]); 14 | } 15 | 16 | int main(int argc, char *argv[]) { 17 | if (argc != 3) { 18 | std::cerr << "Input: {dim} {N}.\n"; 19 | return 1; 20 | } 21 | 22 | int dim = atoi(argv[1]); 23 | argc--; 24 | argv++; 25 | 26 | switch (dim) { 27 | case 1: 28 | Laplace1D3D::main(argc, argv); 29 | break; 30 | case 2: 31 | Laplace2D3D::main(argc, argv); 32 | break; 33 | case 3: 34 | Laplace3D3D::main(argc, argv); 35 | break; 36 | default: 37 | std::cerr << "Dimension {" << dim << "} not supported for charge.\n"; 38 | return 1; 39 | } 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /M2L/StokesPVel/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace StokesPVel1D3D { 4 | int main(int argc, char *argv[]); 5 | } 6 | 7 | namespace StokesPVel2D3D { 8 | int main(int argc, char *argv[]); 9 | } 10 | 11 | namespace StokesPVel3D3D { 12 | int main(int argc, char *argv[]); 13 | } 14 | 15 | int main(int argc, char *argv[]) { 16 | if (argc != 3) { 17 | std::cerr << "Input: {dim} {N}.\n"; 18 | return 1; 19 | } 20 | 21 | int dim = atoi(argv[1]); 22 | argc--; 23 | argv++; 24 | switch (dim) { 25 | case 1: 26 | StokesPVel1D3D::main(argc, argv); 27 | break; 28 | case 2: 29 | StokesPVel2D3D::main(argc, argv); 30 | break; 31 | case 3: 32 | StokesPVel3D3D::main(argc, argv); 33 | break; 34 | default: 35 | std::cerr << "Invalid dimension {" << dim << "}.\n"; 36 | return 1; 37 | } 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /M2L/Stokeslet/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace Stokes1D3D { 4 | int main(int argc, char *argv[]); 5 | } 6 | 7 | namespace Stokes2D3D { 8 | int main(int argc, char *argv[]); 9 | } 10 | 11 | namespace Stokes3D3D { 12 | int main(int argc, char *argv[]); 13 | } 14 | 15 | int main(int argc, char *argv[]) { 16 | if (argc != 3) { 17 | std::cerr << "Input: {dim} {N}.\n"; 18 | return 1; 19 | } 20 | 21 | int dim = atoi(argv[1]); 22 | argc--; 23 | argv++; 24 | switch (dim) { 25 | case 1: 26 | Stokes1D3D::main(argc, argv); 27 | break; 28 | case 2: 29 | Stokes2D3D::main(argc, argv); 30 | break; 31 | case 3: 32 | Stokes3D3D::main(argc, argv); 33 | break; 34 | default: 35 | std::cerr << "Invalid dimension {" << dim << "}.\n"; 36 | return 1; 37 | } 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /M2L/gen.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def runCmd(cmd): 5 | print(cmd) 6 | process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) 7 | process.wait() 8 | print(process.returncode) 9 | return 10 | 11 | 12 | for kernel in ['Laplace', 'Stokeslet', 'StokesPVel']: 13 | for m in [6, 8, 10, 12, 14, 16]: 14 | for dim in [1, 2, 3]: 15 | cmd = './M2L'+kernel+' {:d} {:d} > log'.format(dim, m)+kernel+'_{:d}_{:d}'.format( 16 | dim, m) 17 | runCmd(cmd) 18 | -------------------------------------------------------------------------------- /M2L/svd_test.cpp: -------------------------------------------------------------------------------- 1 | #include "SVD_pvfmm.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | // Assume A=(m,n), m>n 12 | // U = (m,n), S = (n,n), VT = (n,n) 13 | void testSVD(const EMat &U, const EVec &Sdiag, const EMat &VT, const EMat &A, const EVec &x, const EVec &b) { 14 | EMat S(U.cols(), VT.rows()); 15 | S = Sdiag.asDiagonal(); 16 | 17 | // step 1, test if USVT==A 18 | EMat Arecon = U * (S * VT); 19 | EMat Aerror = Arecon - A; 20 | printf("Aerror max min %g, %g\n", Aerror.maxCoeff(), Aerror.minCoeff()); 21 | 22 | // step 2, test backward error 23 | EVec Sdiaginv = Sdiag; 24 | for (int i = 0; i < Sdiaginv.size(); i++) { 25 | Sdiaginv[i] = Sdiaginv[i] < Sdiag[0] * eps ? 0 : 1.0 / Sdiaginv[i]; 26 | } 27 | 28 | EMat V = VT.transpose(); 29 | for (int i = 0; i < Sdiaginv.size(); i++) { 30 | V.col(i) *= Sdiaginv[i]; 31 | } 32 | 33 | EVec x2 = V * (U.transpose() * b); 34 | EVec b2 = A * x2; 35 | EVec xerror = x2 - x; 36 | EVec berror = b2 - b; 37 | printf("xerror max min %g, %g\n", xerror.maxCoeff(), xerror.minCoeff()); 38 | printf("berror max min %g, %g\n", berror.maxCoeff(), berror.minCoeff()); 39 | } 40 | 41 | inline double pot(const EVec3 &target, const EVec3 &source) { 42 | EVec3 rst = target - source; 43 | double rnorm = rst.norm(); 44 | return rnorm < eps ? 0 : 1 / rnorm; 45 | } 46 | 47 | int main(int argc, char **argv) { 48 | Eigen::initParallel(); 49 | 50 | const int pEquiv = atoi(argv[1]); // (8-1)^2*6 + 2 points 51 | const int pCheck = atoi(argv[1]); 52 | const double scaleEquiv = 1.05; 53 | const double scaleCheck = 2.95; 54 | const double pCenterEquiv[3] = {-(scaleEquiv - 1) / 2, -(scaleEquiv - 1) / 2, -(scaleEquiv - 1) / 2}; 55 | const double pCenterCheck[3] = {-(scaleCheck - 1) / 2, -(scaleCheck - 1) / 2, -(scaleCheck - 1) / 2}; 56 | 57 | auto pointMEquiv = surface(pEquiv, (double *)&(pCenterEquiv[0]), scaleEquiv, 0); 58 | auto pointMCheck = surface(pCheck, (double *)&(pCenterCheck[0]), scaleCheck, 0); 59 | 60 | // Aup for solving MEquiv 61 | const int equivN = pointMEquiv.size() / 3; 62 | const int checkN = pointMCheck.size() / 3; 63 | EMat Aup(checkN, equivN); 64 | for (int k = 0; k < checkN; k++) { 65 | EVec3 Cpoint(pointMCheck[3 * k], pointMCheck[3 * k + 1], pointMCheck[3 * k + 2]); 66 | for (int l = 0; l < equivN; l++) { 67 | const EVec3 Lpoint(pointMEquiv[3 * l], pointMEquiv[3 * l + 1], pointMEquiv[3 * l + 2]); 68 | Aup(k, l) = pot(Cpoint, Lpoint); 69 | } 70 | } 71 | 72 | EVec x(Aup.cols()); 73 | x.setRandom(); 74 | EVec b = Aup * x; 75 | 76 | // jacobi svd 77 | using std::cout; 78 | using std::endl; 79 | 80 | { 81 | cout << "JacobiSVD" << endl; 82 | Eigen::JacobiSVD svd(Aup, Eigen::ComputeThinU | Eigen::ComputeThinV); 83 | EMat U = svd.matrixU(); 84 | EMat VT = svd.matrixV().transpose(); 85 | EVec Svec = svd.singularValues(); 86 | testSVD(U, Svec, VT, Aup, x, b); 87 | } 88 | // this triggers error #13212make -j: Reference to ebx in function requiring stack alignment 89 | // { 90 | // cout << "BDCSVD" << endl; 91 | // Eigen::BDCSVD svd(Aup, Eigen::ComputeThinU | Eigen::ComputeThinV); 92 | // EMat U = svd.matrixU(); 93 | // EMat VT = svd.matrixV().transpose(); 94 | // EVec Svec = svd.singularValues(); 95 | // testSVD(U, Svec, VT, Aup, x, b); 96 | // } 97 | { 98 | cout << "HouseholderQR" << endl; 99 | EVec x2 = Aup.colPivHouseholderQr().solve(b); 100 | EVec b2 = Aup * x2; 101 | EVec xerror = x2 - x; 102 | EVec berror = b2 - b; 103 | printf("xerror max min %g, %g\n", xerror.maxCoeff(), xerror.minCoeff()); 104 | printf("berror max min %g, %g\n", berror.maxCoeff(), berror.minCoeff()); 105 | } 106 | 107 | return 0; 108 | } -------------------------------------------------------------------------------- /Python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(PyInterface) 2 | # FIXME: We should just move all python stuff to pip 3 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/PySTKFMM.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/python) 4 | 5 | endif() 6 | -------------------------------------------------------------------------------- /Python/check_flux.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.integrate import trapz 3 | 4 | vecnames = ["ux.xhat", "uy.yhat", "uz.zhat", "lapux.xhat", "lapuy.yhat", "lapuz.zhat"] 5 | 6 | for p in [6, 8, 10, 12, 14]: 7 | with open("p{}_trg_K16.txt".format(p), "r") as f: 8 | r_vectors = [] 9 | ulapu = [] 10 | data = np.loadtxt(f, delimiter=',') 11 | r_vectors = data[:, :3] 12 | ulapu = data[:, 3:] 13 | 14 | # move periodic vectors back where they belong 15 | # r_vectors[r_vectors==1]=0.0 16 | 17 | # Get number of points in each dimension 18 | nPts = int(round((r_vectors.shape[0])**(1./3.))) 19 | assert(nPts**3 == r_vectors.shape[0]) 20 | x = r_vectors[0:nPts, 2] 21 | 22 | print("p: {}".format(p)) 23 | # calc flux for u and lapu 24 | for dim in range(0, 6): 25 | val = ulapu[r_vectors[:, dim % 3] == 0, dim].reshape((nPts, nPts)) 26 | 27 | print("{}: {}".format(vecnames[dim], trapz(trapz(val, x), x))) 28 | -------------------------------------------------------------------------------- /Python/example.py: -------------------------------------------------------------------------------- 1 | from mpi4py import MPI 2 | import numpy as np 3 | import timer 4 | from PySTKFMM import Stk3DFMM, DArray, KERNEL 5 | import kernels as kr 6 | 7 | # Get MPI parameters 8 | comm = MPI.COMM_WORLD 9 | rank = comm.Get_rank() 10 | 11 | 12 | # Convenience wrapper to print only on MPI rank 0 13 | def printer(*args, **kwargs): 14 | if rank == 0: 15 | print(*args, **kwargs) 16 | 17 | 18 | # Convenience wrapper to remove timer.timer() spam 19 | def time_func(name, func, *args, **kwargs): 20 | timer.timer(name) 21 | res = func(*args, **kwargs) 22 | timer.timer(name) 23 | return res 24 | 25 | 26 | def calc_true_value(kernel, src_SL_coord, trg_coord, src_SL_value, src_DL_coord, src_DL_value): 27 | epsilon_distance = 2e-4 28 | if kernel == KERNEL.PVel: 29 | trg_value = kr.StokesSLPVel(src_SL_coord, trg_coord, src_SL_value, epsilon_distance=epsilon_distance) 30 | trg_value += kr.StokesDLPVel(src_DL_coord, trg_coord, src_DL_value, epsilon_distance=epsilon_distance) 31 | elif kernel == KERNEL.PVelGrad: 32 | trg_value = kr.StokesSLPVelGrad(src_SL_coord, trg_coord, src_SL_value, epsilon_distance=epsilon_distance) 33 | trg_value += kr.StokesDLPVelGrad(src_DL_coord, trg_coord, src_DL_value, epsilon_distance=epsilon_distance) 34 | elif kernel == KERNEL.PVelLaplacian: 35 | trg_value = kr.StokesSLPVelLaplacian(src_SL_coord, trg_coord, src_SL_value, epsilon_distance=epsilon_distance) 36 | trg_value += kr.StokesDLPVelLaplacian(src_DL_coord, trg_coord, src_DL_value, epsilon_distance=epsilon_distance) 37 | elif kernel == KERNEL.Traction: 38 | trg_value = kr.StokesSLTraction(src_SL_coord, trg_coord, src_SL_value, epsilon_distance=epsilon_distance) 39 | trg_value += kr.StokesDLTraction(src_DL_coord, trg_coord, src_DL_value, epsilon_distance=epsilon_distance) 40 | elif kernel == KERNEL.LapPGrad: 41 | trg_value = kr.LaplaceSLPGrad(src_SL_coord, trg_coord, src_SL_value, epsilon_distance=epsilon_distance) 42 | trg_value += kr.LaplaceDLPGrad(src_DL_coord, trg_coord, src_DL_value, epsilon_distance=epsilon_distance) 43 | else: 44 | trg_value = None 45 | return trg_value 46 | 47 | 48 | # FMM parameters 49 | mult_order = 10 # Multipole order (higher is slower, but more accurate) 50 | max_pts = 128 # Max points per OctTree leaf 51 | pbc = 0 # Number of dimensions to periodize (0, 1, 2, 3) 52 | kernels = [KERNEL.PVel, KERNEL.PVelGrad, KERNEL.PVelLaplacian, KERNEL.Traction, KERNEL.LapPGrad] 53 | verify = True 54 | 55 | # Create sources and targets coordinates 56 | nsrc_SL = 1000 57 | nsrc_DL = 1000 58 | ntrg = 1000 59 | 60 | # Create points on rank = 0, handled by DArray wrapper 61 | # DArray only necessary when using MPI, otherwise a plain numpy object is fine 62 | src_SL_coord = DArray(None if rank else np.random.rand(nsrc_SL, 3)) 63 | src_DL_coord = DArray(None if rank else np.random.rand(nsrc_DL, 3)) 64 | trg_coord = DArray(None if rank else np.random.rand(ntrg, 3)) 65 | 66 | # Distribute points among MPI ranks 67 | src_SL_coord.scatter() 68 | src_DL_coord.scatter() 69 | trg_coord.scatter() 70 | 71 | for kernel in kernels: 72 | # Create FMM 73 | printer("\n\n==============================") 74 | fmm = time_func('create_fmm', Stk3DFMM, mult_order, max_pts, pbc, kernel) 75 | kdimSL, kdimDL, kdimTrg = time_func('get_kernel_dimension', fmm.get_kernel_dimension, kernel) 76 | 77 | printer("kdimSL = {}\nkdimDL = {}\nkdimTrg = {}".format(kdimSL, kdimDL, kdimTrg)) 78 | 79 | # Create source and target values 80 | src_SL_value = DArray(None if rank else np.random.randn(nsrc_SL, kdimSL)) 81 | src_DL_value = DArray(None if rank else np.random.randn(nsrc_DL, kdimDL)) 82 | trg_value = DArray(None if rank else np.zeros((ntrg, kdimTrg))) 83 | 84 | src_SL_value.scatter() 85 | src_DL_value.scatter() 86 | trg_value.scatter() 87 | 88 | # Create box, add points, and build tree 89 | # Tree only needs to be built when points move - their values can change 90 | time_func('set_box', fmm.set_box, np.zeros(3), 1.0) 91 | time_func('set_points', fmm.set_points, src_SL_coord.chunk, trg_coord.chunk, src_DL_coord.chunk) 92 | time_func('setup_tree', fmm.setup_tree, kernel) 93 | 94 | # Evaluate FMM 95 | time_func('evaluate_fmm', fmm.evaluate_fmm, kernel, src_SL_value.chunk, trg_value.chunk, src_DL_value.chunk) 96 | 97 | # Clear FMM and evaluate again 98 | trg_value.chunk[:, :] = 0 99 | time_func('clear_fmm', fmm.clear_fmm, kernel) 100 | time_func('evaluate_fmm', fmm.evaluate_fmm, kernel, src_SL_value.chunk, trg_value.chunk, src_DL_value.chunk) 101 | 102 | # Collect target points into rank=0 array (for MPI) 103 | src_SL_value.gather() 104 | src_DL_value.gather() 105 | trg_value.gather() 106 | if verify and rank == 0: 107 | trg_value_true = time_func('true_value', calc_true_value, 108 | kernel, src_SL_coord.data, 109 | trg_coord.data, src_SL_value.data, 110 | src_DL_coord.data, src_DL_value.data) 111 | if trg_value_true is not None: 112 | diff = trg_value.data - trg_value_true 113 | printer('relative L2 error = ', np.linalg.norm(diff) / np.linalg.norm(trg_value_true)) 114 | printer('Linf error = ', np.linalg.norm(diff.flatten(), ord=np.inf)) 115 | 116 | comm.Barrier() 117 | if rank == 0: 118 | timer.timer(None, print_all=True) 119 | printer('# End') 120 | -------------------------------------------------------------------------------- /Python/rpy_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | try: 4 | from mpi4py import MPI 5 | except ImportError: 6 | print('It didn\'t find mpi4py!') 7 | import PySTKFMM 8 | 9 | 10 | def rotne_prager_tensor(r_vectors, eta, a): 11 | ''' 12 | Calculate free rotne prager tensor for particles at locations given by 13 | r_vectors of radius a. 14 | ''' 15 | # Extract variables 16 | r_vectors = r_vectors.reshape((r_vectors.size // 3, 3)) 17 | x = r_vectors[:,0] 18 | y = r_vectors[:,1] 19 | z = r_vectors[:,2] 20 | 21 | # Compute distances between blobs 22 | dx = x - x[:, None] 23 | dy = y - y[:, None] 24 | dz = z - z[:, None] 25 | dr = np.sqrt(dx**2 + dy**2 + dz**2) 26 | 27 | # Compute scalar functions f(r) and g(r) 28 | factor = 1.0 / (6.0 * np.pi * eta) 29 | fr = np.zeros_like(dr) 30 | gr = np.zeros_like(dr) 31 | sel = dr > 2.0 * a 32 | nsel = np.logical_not(sel) 33 | sel_zero = dr == 0. 34 | nsel[sel_zero] = False 35 | 36 | fr[sel] = factor * (0.75 / dr[sel] + a**2 / (2.0 * dr[sel]**3)) 37 | gr[sel] = factor * (0.75 / dr[sel]**3 - 1.5 * a**2 / dr[sel]**5) 38 | 39 | fr[sel_zero] = (factor / a) 40 | fr[nsel] = factor * (1.0 / a - 0.28125 * dr[nsel] / a**2) 41 | gr[nsel] = factor * (3.0 / (32.0 * a**2 * dr[nsel])) 42 | 43 | # Build mobility matrix of size 3N \times 3N 44 | M = np.zeros((r_vectors.size, r_vectors.size)) 45 | M[0::3, 0::3] = fr + gr * dx * dx 46 | M[0::3, 1::3] = gr * dx * dy 47 | M[0::3, 2::3] = gr * dx * dz 48 | 49 | M[1::3, 0::3] = gr * dy * dx 50 | M[1::3, 1::3] = fr + gr * dy * dy 51 | M[1::3, 2::3] = gr * dy * dz 52 | 53 | M[2::3, 0::3] = gr * dz * dx 54 | M[2::3, 1::3] = gr * dz * dy 55 | M[2::3, 2::3] = fr + gr * dz * dz 56 | return M 57 | 58 | 59 | if __name__ == '__main__': 60 | # FMM parameters 61 | # expansion order 62 | mult_order = 10 63 | # max points for fmm cell 64 | max_pts = 128 65 | # set pbc to PX, PXY, or PXYZ for periodicity 66 | pbc = PySTKFMM.PAXIS.NONE 67 | # u, lapu kernel (4->6) 68 | kernel = PySTKFMM.KERNEL.RPY 69 | # RPY diameter of particle. Each particle can have separate diameter, but we'll set it to a const here 70 | a = 0.01 71 | 72 | # Get MPI parameters 73 | comm = MPI.COMM_WORLD 74 | rank = comm.Get_rank() 75 | 76 | # Create source and target coordinates 77 | nsrc = 100 78 | ntrg = nsrc 79 | src_coord = np.random.rand(nsrc, 3) 80 | trg_coord = src_coord 81 | sys.stdout.flush() 82 | comm.Barrier() 83 | 84 | # Setup FMM 85 | myFMM = PySTKFMM.Stk3DFMM(mult_order, max_pts, pbc, kernel) 86 | kdim, _, kdimTrg = myFMM.get_kernel_dimension(kernel) 87 | 88 | # Create sources and target values 89 | src_value = np.random.randn(nsrc, kdim) 90 | src_value[:, 3] = a 91 | trg_value = np.zeros((ntrg, kdimTrg)) 92 | if rank == 0: 93 | myFMM.show_active_kernels() 94 | print('kdimSL = ', kdim) 95 | print('kdimTrg = ', kdimTrg) 96 | 97 | # Set tree 98 | myFMM.set_box(np.array([0.0, 0.0, 0.0]), 2.0) 99 | myFMM.set_points(src_coord, trg_coord, np.empty(shape=(0,0))) 100 | myFMM.setup_tree(kernel) 101 | 102 | # Evaluate FMM 103 | myFMM.evaluate_fmm(kernel, src_value, trg_value, np.empty(shape=(0,0))) 104 | 105 | # Clear FMM and evaluate again 106 | trg_value[:,:] = 0 107 | myFMM.clear_fmm(kernel) 108 | myFMM.evaluate_fmm(kernel, src_value, trg_value, np.empty(shape=(0,0))) 109 | 110 | comm.Barrier() 111 | 112 | # calculate rpy tensor minus self-diffusion matrix 113 | rpy = rotne_prager_tensor(src_coord, 1, a) - np.identity(src_coord.size) / (6.*np.pi*a) 114 | 115 | # unpack FMM result 116 | u_trg = trg_value[:,0:3] 117 | lapu_trg = trg_value[:,3:] 118 | 119 | # Calculate M*F using calculated matrix directly 120 | MFdirect = (np.dot(rpy, src_value[:,0:3].flatten())).reshape(src_value[:,0:3].shape) 121 | 122 | # Calculate M*F using FMM results 123 | # WARNING: THIS DOES NOT FILTER OUT OVERLAPS, WILL SPONTANEOUSLY GENERATE LARGE ERROR 124 | MFstkfmm = u_trg + a * a * lapu_trg / 6.0 125 | 126 | # Print RMS difference of two techniques 127 | L2 = np.sqrt(np.multiply(MFdirect - MFstkfmm, MFdirect - MFstkfmm)) 128 | 129 | print("RMS error: {}".format(np.mean(L2, axis=(0,1)))) 130 | print("Max L2 error: {}".format(np.max(L2, axis=(0,1)))) 131 | -------------------------------------------------------------------------------- /Python/timer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import time 3 | 4 | # Static Variable decorator 5 | def static_var(varname, value): 6 | def decorate(func): 7 | setattr(func, varname, value) 8 | return func 9 | return decorate 10 | 11 | @static_var('timers', {}) 12 | def timer(name, print_one = False, print_all = False, output_file = None): 13 | ''' 14 | Timer to profile the code. It measures the time elapsed between successive 15 | calls and it prints the total time elapsed after sucesive calls. 16 | ''' 17 | if name is None: 18 | pass 19 | elif name not in timer.timers: 20 | timer.timers[name] = (0, time.time()) 21 | elif timer.timers[name][1] is None: 22 | time_tuple = (timer.timers[name][0], time.time()) 23 | timer.timers[name] = time_tuple 24 | else: 25 | time_tuple = (timer.timers[name][0] + (time.time() - timer.timers[name][1]), None) 26 | timer.timers[name] = time_tuple 27 | if print_one is True: 28 | print(name, ' = ', timer.timers[name][0]) 29 | 30 | if print_all is True: 31 | print('\n') 32 | col_width = max(len(key) for key in timer.timers) 33 | for key in sorted(timer.timers): 34 | print("".join(key.ljust(col_width)), ' = ', timer.timers[key][0]) 35 | if output_file is not None: 36 | with open(output_file, 'w') as f: 37 | for key in sorted(timer.timers): 38 | f.write("".join(key.ljust(col_width)) + ' = ' + str(timer.timers[key][0]) + '\n') 39 | return 40 | 41 | -------------------------------------------------------------------------------- /STKFMM_Logo_RGB.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 23 | 24 | 25 | 26 | 27 | 32 | 35 | 36 | 37 | 44 | 45 | 47 | 48 | 50 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /Test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(TestFMM.X main.cpp SimpleKernel.cpp Test.cpp 2 | Util/PointDistribution.cpp) 3 | target_include_directories(TestFMM.X PRIVATE ${CMAKE_SOURCE_DIR}/Util) 4 | target_link_libraries(TestFMM.X PRIVATE STKFMM_STATIC Eigen3::Eigen 5 | OpenMP::OpenMP_CXX MPI::MPI_CXX) 6 | -------------------------------------------------------------------------------- /Test/SimpleKernel.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLEKERNEL_HPP 2 | #define SIMPLEKERNEL_HPP 3 | 4 | // 3 3 4 4/16/9/7 5 | void StokesSLPVel(double *s, double *t, double *f, double *pvel); 6 | void StokesSLPVelGrad(double *s, double *t, double *f, double *pvelGrad); 7 | void StokesSLTraction(double *s, double *t, double *f, double *traction); 8 | void StokesSLPVelLaplacian(double *s, double *t, double *f, double *pvelLaplacian); 9 | 10 | // 3 3 9 4/16/9/7 11 | void StokesDLPVel(double *s, double *t, double *db, double *pvel); 12 | void StokesDLPVelGrad(double *s, double *t, double *db, double *pvelGrad); 13 | void StokesDLTraction(double *s, double *t, double *db, double *traction); 14 | void StokesDLPVelLaplacian(double *s, double *t, double *db, double *pvelLaplacian); 15 | 16 | void LaplaceSLGrad(double *s, double *t, double *q, double *pgrad); 17 | void LaplaceDLGrad(double *s, double *t, double *db, double *pgrad); 18 | 19 | // 3 3 1 4/10 20 | void LaplaceSLPGrad(double *s, double *t, double *q, double *pgrad); 21 | void LaplaceSLPGradGrad(double *s, double *t, double *q, double *pgradgrad); 22 | 23 | // 3 3 3 4/10 24 | void LaplaceDLPGrad(double *s, double *t, double *db, double *pgrad); 25 | void LaplaceDLPGradGrad(double *s, double *t, double *db, double *pgradgrad); 26 | 27 | // 3 3 9 10 28 | void LaplaceQPGradGrad(double *s, double *t, double *q, double *pgradgrad); 29 | 30 | // 31 | void StokesRegSLVel(double *s, double *t, double *f, double *vel); 32 | void StokesRegSLVelOmega(double *s, double *t, double *f, double *velomega); 33 | 34 | // 35 | void StokesRegDLVel(double *s, double *t, double *f, double *vel); 36 | void StokesRegDLVelOmega(double *s, double *t, double *f, double *velomega); 37 | 38 | // 39 | void StokesSLRPY(double *s, double *t, double *f, double *vlapv); 40 | void StokesDLRPY(double *s, double *t, double *f, double *vlapv); 41 | 42 | void StokesSL(double *s, double *t, double *f, double *v); 43 | void StokesDL(double *s, double *t, double *f, double *v); 44 | 45 | // 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /Test/Test.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TEST_HPP_ 2 | #define TEST_HPP_ 3 | 4 | #include "STKFMM/STKFMM.hpp" 5 | 6 | #include "SimpleKernel.hpp" 7 | 8 | #include "Util/PointDistribution.hpp" 9 | #include "Util/Timer.hpp" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | using KERNEL = stkfmm::KERNEL; 16 | using Stk3DFMM = stkfmm::Stk3DFMM; 17 | using StkWallFMM = stkfmm::StkWallFMM; 18 | 19 | struct Config { 20 | int nSL = 1, nDL = 1, nTrg = 1; 21 | int rngseed = 0; 22 | double box = 1; 23 | std::array origin = {0, 0, 0}; 24 | std::array distParam = {-1.0, 0.5}; 25 | int distType = 1; 26 | int K = 1; 27 | int maxOrder = 16; 28 | int pbc = 0; 29 | int maxPoints = 50; 30 | double epsilon = 1e-3; 31 | bool random = true; 32 | bool direct = false; 33 | bool verify = true; 34 | bool convergence = true; 35 | bool wall = false; 36 | bool dump = true; 37 | 38 | Config() = default; 39 | void parse(int argc, char **argv); 40 | void print() const; 41 | }; 42 | 43 | struct Point { 44 | std::vector srcLocalSL; 45 | std::vector srcLocalDL; 46 | std::vector trgLocal; 47 | }; 48 | 49 | struct Source { 50 | std::vector srcLocalSL; 51 | std::vector srcLocalDL; 52 | }; 53 | 54 | struct ComponentError { 55 | // error without drift correction 56 | double errorL2 = 0; // L2 error 57 | double errorRMS = 0; // RMS error 58 | double errorMaxRel = 0; // max relative error 59 | 60 | double drift = 0; // mean drift 61 | 62 | ComponentError() = default; 63 | ComponentError(const std::vector &A, const std::vector &B); 64 | }; 65 | 66 | struct Record { 67 | KERNEL kernel; 68 | int multOrder; 69 | double treeTime = 0; 70 | double runTime = 0; 71 | std::vector errorConvergence; // error for each trgValue component 72 | std::vector errorVerify; // error for each trgValue component 73 | std::vector errorTranslate; // error for each trgValue component 74 | }; 75 | 76 | using Input = std::unordered_map; 77 | using Result = std::unordered_map>; 78 | using Timing = std::unordered_map>; 79 | 80 | void genPoint(const Config &config, Point &point, int dim); 81 | void translatePoint(const Config &config, Point &point); 82 | 83 | void genSrcValue(const Config &config, const Point &point, Input &input); 84 | 85 | void runSimpleKernel(const Config &config, const Point &point, Input &input, Result &result); 86 | 87 | void runFMM(const Config &config, const int p, const Point &point, Input &input, Result &result, Timing &timing); 88 | 89 | void checkError(const int dim, const std::vector &A, const std::vector &B, 90 | std::vector &error); 91 | 92 | void appendHistory(std::vector &history, const int p, const Timing &timing, const Result &result, 93 | const Result &verifyResult, const Result &convergeResult, const Result &translateResult); 94 | 95 | void dumpValue(const std::string &tag, const Point &point, const Input &input, const Result &result); 96 | 97 | void recordJson(const Config &config, const std::vector &history); 98 | 99 | template 100 | inline void printf_rank0(Args... args) { 101 | int rank; 102 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 103 | if (!rank) { 104 | printf(args...); 105 | } 106 | } 107 | 108 | #endif -------------------------------------------------------------------------------- /Test/Util/ChebNodal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ChebNodal.hpp 3 | * 4 | * Created on: Dec 20, 2019 5 | * Author: wyan 6 | */ 7 | 8 | #ifndef CHEBNODAL_HPP_ 9 | #define CHEBNODAL_HPP_ 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | // points belong to -1, 1 16 | // scaling is the user's duty 17 | class ChebNodal { 18 | public: 19 | int chebN; // points.size() = pChebN+1 20 | bool includeEnd; 21 | std::vector points; 22 | std::vector weights; 23 | 24 | public: 25 | ChebNodal(int chebN, bool includeEnd = true) : chebN(chebN), includeEnd(includeEnd) { 26 | points.resize(chebN + 1); 27 | weights.resize(chebN + 1); 28 | 29 | calcWeight(); 30 | if (!includeEnd) { 31 | points.resize(points.size() - 1); 32 | weights.resize(weights.size() - 1); 33 | } 34 | } 35 | 36 | ~ChebNodal() = default; 37 | 38 | private: 39 | void calcWeight() { 40 | assert(chebN > 0); 41 | /* Python code: 42 | * Dkn=np.zeros((pCheb+1,pCheb+1)) 43 | for k in range(pCheb+1): 44 | for n in range(pCheb+1): 45 | Dkn[k,n]=np.cos(k*n/pCheb*np.pi)*2.0/pCheb 46 | if(n==0 or n==pCheb): 47 | Dkn[k,n]=np.cos(k*n/pCheb*np.pi)*1.0/pCheb 48 | dvec=np.zeros(pCheb+1) 49 | for i in range(pCheb+1): 50 | if(i%2==1): 51 | dvec[i]=0 52 | else: 53 | dvec[i]=2/(1.0-i**2) 54 | dvec[0]=1 55 | weightCC=np.dot(Dkn.transpose(),dvec) 56 | * 57 | * */ 58 | const double Pi = 3.1415926535897932384626433; 59 | 60 | double *Dkn = new double[(chebN + 1) * (chebN + 1)]; 61 | for (int k = 0; k < chebN + 1; k++) { 62 | int n = 0; 63 | Dkn[k * (chebN + 1) + n] = cos(k * n * Pi / chebN) / chebN; 64 | for (n = 1; n < chebN; n++) { 65 | Dkn[k * (chebN + 1) + n] = cos(k * n * Pi / chebN) * 2 / chebN; 66 | } 67 | n = chebN; 68 | Dkn[k * (chebN + 1) + n] = cos(k * n * Pi / chebN) / chebN; 69 | } 70 | double *dvec = new double[chebN + 1]; 71 | for (int i = 0; i < chebN + 1; i++) { 72 | dvec[i] = i % 2 == 1 ? 0 : 2 / (1.0 - static_cast(i * i)); 73 | } 74 | dvec[0] = 1; 75 | points.resize(chebN + 1); 76 | weights.resize(chebN + 1); 77 | for (int i = 0; i < chebN + 1; i++) { 78 | double temp = 0; 79 | for (int j = 0; j < chebN + 1; j++) { 80 | temp += Dkn[j * (chebN + 1) + i] * dvec[j]; // not optimal layout for speed. 81 | } 82 | weights[i] = temp; 83 | points[i] = -cos(i * Pi / chebN); 84 | } 85 | 86 | delete[] Dkn; 87 | delete[] dvec; 88 | }; 89 | }; 90 | 91 | #endif /* HYDROFIBER_CHEBNODAL_H_ */ 92 | -------------------------------------------------------------------------------- /Test/Util/PointDistribution.hpp: -------------------------------------------------------------------------------- 1 | #ifndef POINTDISTRIBUTION_HPP 2 | #define POINTDISTRIBUTION_HPP 3 | 4 | // #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | enum DistType { 11 | Uniform = 1, 12 | LogNormal = 2, 13 | Gaussian = 3, 14 | Ellipse = 4, 15 | }; 16 | 17 | class PointDistribution { 18 | std::mt19937 gen_; 19 | 20 | public: 21 | PointDistribution(int seed) : gen_(seed){}; 22 | 23 | // non-static methods depending on rng seed 24 | void randomPoints(int dim, int nPts, double box, double shift, DistType type, std::vector &ptsCoord, 25 | double m = 1.0, double s = 1.0); 26 | 27 | void randomUniformFill(std::vector &vec, double low, double high); 28 | 29 | void randomLogNormalFill(std::vector &vec, double a, double b); 30 | 31 | void randomNormalFill(std::vector &vec, double a, double b); 32 | 33 | void randomShuffle(const int kdim, std::vector& coord, std::vector& value); 34 | 35 | // static methods 36 | static void fixedPoints(int nPts, double box, double shift, std::vector &srcCoord); 37 | 38 | static void shiftAndScalePoints(std::vector &ptsCoord, double shift[3], double scale); 39 | 40 | static void meshPoints(int dim, int nPts, double box, double shift, std::vector &ptsCoord, 41 | bool cheb = false); 42 | 43 | static void dumpPoints(const std::string &filename, std::vector &coord, std::vector &value, 44 | const int valueDimension); 45 | 46 | static void distributePts(std::vector &pts, int dimension); 47 | 48 | static void collectPts(std::vector &pts); 49 | 50 | static void collectPtsAll(std::vector &pts); 51 | }; 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /Test/Util/Timer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Timer.hpp 3 | * 4 | * Created on: Nov 17, 2017 5 | * Author: wyan 6 | * 7 | * Reference: https://gist.github.com/jtilly/a423be999929d70406489a4103e67453 8 | */ 9 | 10 | #ifndef TIMER_HPP_ 11 | #define TIMER_HPP_ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | class Timer { 18 | private: 19 | bool work = true; 20 | struct Block { 21 | std::chrono::high_resolution_clock::time_point startTime; 22 | std::chrono::high_resolution_clock::time_point stopTime; 23 | std::string message; 24 | }; 25 | std::vector timing; 26 | 27 | public: 28 | explicit Timer() = default; 29 | 30 | explicit Timer(bool work_) : Timer() { work = work_; } 31 | 32 | ~Timer() = default; 33 | 34 | void enable() { work = true; } 35 | void disable() { work = false; } 36 | 37 | void tick() { 38 | if (work) { 39 | timing.emplace_back(); 40 | auto &recording = timing.back(); 41 | recording.startTime = std::chrono::high_resolution_clock::now(); 42 | } 43 | } 44 | 45 | void tock(const std::string &s) { 46 | if (work) { 47 | auto &recording = timing.back(); 48 | recording.stopTime = std::chrono::high_resolution_clock::now(); 49 | recording.message = s; 50 | } 51 | } 52 | 53 | void dump() { 54 | for (const auto &event : timing) { 55 | std::cout 56 | << event.message << " :time " 57 | << std::chrono::duration_cast(event.stopTime - event.startTime).count() / 1e6 58 | << " seconds." << std::endl; 59 | } 60 | } 61 | 62 | std::vector getTime() { 63 | std::vector time; 64 | for (const auto &event : timing) { 65 | time.push_back( 66 | std::chrono::duration_cast(event.stopTime - event.startTime).count() / 1e6); 67 | } 68 | return time; 69 | } 70 | }; 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /Test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Test.hpp" 2 | 3 | #include 4 | 5 | int main(int argc, char **argv) { 6 | MPI_Init(&argc, &argv); 7 | 8 | Config config; 9 | config.parse(argc, argv); 10 | config.print(); 11 | 12 | int myRank; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &myRank); 14 | 15 | Point point, trans_point; 16 | Input input; 17 | Result pResult, verifyResult, convResult, transResult; 18 | Timing timing; 19 | std::vector history; 20 | 21 | genPoint(config, point, 3); 22 | 23 | genSrcValue(config, point, input); 24 | if (config.pbc) { 25 | trans_point = point; 26 | translatePoint(config, trans_point); 27 | } 28 | 29 | printf_rank0("src value generated\n"); 30 | 31 | if (config.verify) { 32 | if (config.wall) { 33 | // verify with zero on wall 34 | for (auto &k : input) { 35 | auto kernel = k.first; 36 | int kdimTrg = std::get<2>(stkfmm::getKernelDimension(kernel)); 37 | int nTrg = point.trgLocal.size() / 3; 38 | verifyResult[kernel].resize(nTrg * kdimTrg, 0); 39 | } 40 | } else { 41 | runSimpleKernel(config, point, input, verifyResult); 42 | } 43 | if (config.dump) 44 | dumpValue("verify", point, input, verifyResult); 45 | } 46 | 47 | if (config.convergence) { 48 | runFMM(config, config.maxOrder, point, input, convResult, timing); 49 | if (config.dump) 50 | dumpValue("maxp" + std::to_string(config.maxOrder), point, input, convResult); 51 | } 52 | 53 | if (config.direct) { 54 | printf_rank0("*********Testing direct sum***********\n"); 55 | pResult.clear(); 56 | transResult.clear(); 57 | timing.clear(); 58 | int order = 0; 59 | runFMM(config, order, point, input, pResult, timing); 60 | if (config.dump) 61 | dumpValue("direct", point, input, pResult); 62 | appendHistory(history, order, timing, pResult, verifyResult, convResult, transResult); 63 | } else { 64 | for (int p = 6; p < config.maxOrder; p += 2) { 65 | printf_rank0("*********Testing order p = %d*********\n", p); 66 | pResult.clear(); 67 | transResult.clear(); 68 | timing.clear(); 69 | 70 | runFMM(config, p, point, input, pResult, timing); 71 | if (config.dump) 72 | dumpValue("p" + std::to_string(p), point, input, pResult); 73 | 74 | if (config.pbc) { 75 | Timing transTiming; 76 | runFMM(config, p, trans_point, input, transResult, transTiming); 77 | if (config.dump) 78 | dumpValue("trans_p" + std::to_string(p), trans_point, input, transResult); 79 | } 80 | 81 | appendHistory(history, p, timing, pResult, verifyResult, convResult, transResult); 82 | } 83 | } 84 | 85 | recordJson(config, history); 86 | 87 | MPI_Finalize(); 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /do-cmake.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # change this to find Eigen3Config.cmake in this folder 4 | export EIGENPATH=$HOME/env_intel/share/eigen3/cmake/ 5 | 6 | cmake \ 7 | -D CMAKE_CXX_COMPILER=mpicxx \ 8 | -D CMAKE_BUILD_TYPE=Release \ 9 | -D Eigen3_DIR=${EIGENPATH} \ 10 | -D BUILD_TEST=ON \ 11 | -D BUILD_DOC=OFF \ 12 | -D BUILD_M2L=OFF \ 13 | -D PyInterface=OFF \ 14 | ../ 15 | 16 | -------------------------------------------------------------------------------- /svgs/07e476cc0252962199ca482cc8788e94.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /svgs/0b5c36a960bf1e20da870975949caf38.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /svgs/0ec0e7629c233c51a807937c9c2e0008.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/1069b8abbb5837aa1e07cd46c48ff62d.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/12dd280bcf2f8b88954a119a6fe0cc82.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /svgs/194516c014804d683d1ab5a74f8c5647.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/1abfc937b0f1b385c8c69b2730a6cda6.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /svgs/1fc018edd54a76a01783d1cf35676916.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/2103f85b8b1477f430fc407cad462224.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /svgs/23776aad854f2d33e83e4f4cad44e1b9.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/23bbbdfa14b7ee7a030d0c04fd38250a.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /svgs/34be5e6cbc28b74e8f561c1d527644ce.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /svgs/3ce145d17b292a694572c25966e7805f.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /svgs/4bdc8d9bcfb35e1c9bfb51fc69687dfc.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /svgs/4ed9389b413af04e9786fa6e147ddbdb.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /svgs/79a624f595dfa02aaede80594ce7a077.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /svgs/7ccca27b5ccc533a2dd72dc6fa28ed84.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /svgs/912607c89eae037134fdf3e74d602929.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /svgs/a5a3c89b53bed887e7e194b0670abc9a.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /svgs/ac9424c220341fa74016e5769014f456.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/b1f2675e5b0e8444482d1bdfac266e90.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /svgs/bdbf342b57819773421273d508dba586.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /svgs/bee1683ac5a86212efac5d2804145b0f.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /svgs/c0b7bdafbb8aef85d4275c543c04eeb7.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /svgs/c0e8acfba65f15b77a9457b9e727c409.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /svgs/c37ded03564c90141c5f1e058edc4ab8.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /svgs/c4dd4df1478960c5f0d78f517ad773e5.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /svgs/d5c18a8ca1894fd3a7d25f242cbe8890.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /svgs/df06f340b9915e0682b914a0b1de03b9.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /svgs/e714d66356b6c29eeee3f7985e73c67f.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | --------------------------------------------------------------------------------