├── .gitattributes ├── CMakeLists.txt ├── FindHOOMD.cmake ├── PSEv1 ├── Brownian.cu ├── Brownian.cuh ├── CMakeLists.txt ├── Helper.cu ├── Helper.cuh ├── Mobility.cu ├── Mobility.cuh ├── ShearFunction.cc ├── ShearFunction.h ├── ShearFunctionWrap.cc ├── ShearFunctionWrap.h ├── SpecificShearFunction.cc ├── SpecificShearFunction.h ├── Stokes.cc ├── Stokes.cu ├── Stokes.cuh ├── Stokes.h ├── VariantShearFunction.cc ├── VariantShearFunction.h ├── __init__.py ├── integrate.py ├── module.cc ├── shear_function.py └── variant.py ├── README.md └── examples └── run.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.c text 7 | *.h text 8 | *.cc text 9 | *.cu text 10 | *cuh text 11 | 12 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(PSEv1) 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2 FATAL_ERROR) 3 | 4 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_MODULE_PATH}) 5 | 6 | include(FindHOOMD.cmake) 7 | 8 | # plugins must be built as shared libraries 9 | if (ENABLE_STATIC) 10 | message(SEND_ERROR "Plugins cannot be built against a statically compiled hoomd") 11 | endif (ENABLE_STATIC) 12 | 13 | set(BUILD_SHARED_LIBS on) 14 | 15 | if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 16 | set(CMAKE_INSTALL_PREFIX ${HOOMD_ROOT} CACHE PATH "Installation prefix" FORCE) 17 | endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 18 | 19 | set(PYTHON_MODULE_BASE_DIR ${CMAKE_INSTALL_PREFIX}) 20 | message(STATUS "Install plugin to: " ${PYTHON_MODULE_BASE_DIR}) 21 | 22 | # add subdirectories 23 | add_subdirectory(${PROJECT_NAME}) 24 | -------------------------------------------------------------------------------- /FindHOOMD.cmake: -------------------------------------------------------------------------------- 1 | # CMake script for finding HOOMD and setting up all needed compile options to create and link a plugin library 2 | # 3 | # Variables taken as input to this module: 4 | # HOOMD_ROOT : location to look for HOOMD, if it is not in the python path 5 | # 6 | # Variables defined by this module: 7 | # FOUND_HOOMD : set to true if HOOMD is found 8 | # HOOMD_LIBRARIES : a list of all libraries needed to link to to access hoomd (uncached) 9 | # HOOMD_INCLUDE_DIR : a list of all include directories that need to be set to include HOOMD 10 | # HOOMD_LIB : a cached var locating the hoomd library to link to 11 | # 12 | # various ENABLE_ flags translated from hoomd_config.h so this plugin build can match the ABI of the installed hoomd 13 | # 14 | # as a convenience (for the intended purpose of this find script), all include directories and definitions needed 15 | # to compile with all the various libs (boost, python, winsoc, etc...) are set within this script 16 | 17 | set(HOOMD_ROOT "" CACHE FILEPATH "Directory containing a hoomd installation (i.e. _hoomd.so)") 18 | 19 | # Let HOOMD_ROOT take precedence, but if unset, try letting Python find a hoomd package in its default paths. 20 | if(HOOMD_ROOT) 21 | set(hoomd_installation_guess ${HOOMD_ROOT}) 22 | else(HOOMD_ROOT) 23 | find_package(PythonInterp) 24 | 25 | set(find_hoomd_script " 26 | from __future__ import print_function; 27 | import sys, os; sys.stdout = open(os.devnull, 'w') 28 | import hoomd 29 | print(os.path.dirname(hoomd.__file__), file=sys.stderr, end='')") 30 | 31 | execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "${find_hoomd_script}" 32 | ERROR_VARIABLE hoomd_installation_guess) 33 | message(STATUS "Python output: " ${hoomd_installation_guess}) 34 | endif(HOOMD_ROOT) 35 | 36 | message(STATUS "Looking for a HOOMD installation at " ${hoomd_installation_guess}) 37 | find_path(FOUND_HOOMD_ROOT 38 | NAMES _hoomd.so __init__.py 39 | HINTS ${hoomd_installation_guess} 40 | ) 41 | 42 | if(FOUND_HOOMD_ROOT) 43 | set(HOOMD_ROOT ${FOUND_HOOMD_ROOT} CACHE FILEPATH "Directory containing a hoomd installation (i.e. _hoomd.so)" FORCE) 44 | message(STATUS "Found hoomd installation at " ${HOOMD_ROOT}) 45 | else(FOUND_HOOMD_ROOT) 46 | message(FATAL_ERROR "Could not find hoomd installation, either set HOOMD_ROOT or set PYTHON_EXECUTABLE to a python which can find hoomd") 47 | endif(FOUND_HOOMD_ROOT) 48 | 49 | # search for the hoomd include directory 50 | find_path(HOOMD_INCLUDE_DIR 51 | NAMES HOOMDVersion.h 52 | HINTS ${HOOMD_ROOT}/include 53 | ) 54 | 55 | if (HOOMD_INCLUDE_DIR) 56 | message(STATUS "Found HOOMD include directory: ${HOOMD_INCLUDE_DIR}") 57 | mark_as_advanced(HOOMD_INCLUDE_DIR) 58 | endif (HOOMD_INCLUDE_DIR) 59 | 60 | set(HOOMD_FOUND FALSE) 61 | if (HOOMD_INCLUDE_DIR AND HOOMD_ROOT) 62 | set(HOOMD_FOUND TRUE) 63 | mark_as_advanced(HOOMD_ROOT) 64 | endif (HOOMD_INCLUDE_DIR AND HOOMD_ROOT) 65 | 66 | if (NOT HOOMD_FOUND) 67 | message(SEND_ERROR "HOOMD Not found. Please specify the location of your hoomd installation in HOOMD_ROOT") 68 | endif (NOT HOOMD_FOUND) 69 | 70 | ############################################################# 71 | ## Now that we've found hoomd, lets do some setup 72 | if (HOOMD_FOUND) 73 | 74 | include_directories(${HOOMD_INCLUDE_DIR}) 75 | 76 | # run all of HOOMD's generic lib setup scripts 77 | set(CMAKE_MODULE_PATH ${HOOMD_ROOT} 78 | ${HOOMD_ROOT}/CMake/hoomd 79 | ${HOOMD_ROOT}/CMake/thrust 80 | ${CMAKE_MODULE_PATH} 81 | ) 82 | 83 | # grab previously-set hoomd configuration 84 | include (hoomd_cache) 85 | 86 | # Handle user build options 87 | include (CMake_build_options) 88 | include (CMake_preprocessor_flags) 89 | # setup the install directories 90 | include (CMake_install_options) 91 | 92 | # Find the python executable and libraries 93 | include (HOOMDPythonSetup) 94 | # Find CUDA and set it up 95 | include (HOOMDCUDASetup) 96 | # Set default CFlags 97 | include (HOOMDCFlagsSetup) 98 | # include some os specific options 99 | include (HOOMDOSSpecificSetup) 100 | # setup common libraries used by all targets in this project 101 | include (HOOMDCommonLibsSetup) 102 | # setup macros 103 | include (HOOMDMacros) 104 | # setup MPI support 105 | include (HOOMDMPISetup) 106 | 107 | set(HOOMD_LIB ${HOOMD_ROOT}/_hoomd${PYTHON_MODULE_EXTENSION}) 108 | set(HOOMD_MD_LIB ${HOOMD_ROOT}/md/_md${PYTHON_MODULE_EXTENSION}) 109 | set(HOOMD_DEM_LIB ${HOOMD_ROOT}/dem/_dem${PYTHON_MODULE_EXTENSION}) 110 | set(HOOMD_HPMC_LIB ${HOOMD_ROOT}/hpmc/_hpmc${PYTHON_MODULE_EXTENSION}) 111 | set(HOOMD_CGCMM_LIB ${HOOMD_ROOT}/cgcmm/_cgcmm${PYTHON_MODULE_EXTENSION}) 112 | set(HOOMD_METAL_LIB ${HOOMD_ROOT}/metal/_metal${PYTHON_MODULE_EXTENSION}) 113 | set(HOOMD_DEPRECATED_LIB ${HOOMD_ROOT}/deprecated/_deprecated${PYTHON_MODULE_EXTENSION}) 114 | 115 | set(HOOMD_LIBRARIES ${HOOMD_LIB} ${HOOMD_COMMON_LIBS}) 116 | 117 | # NEED THIS ONE FOR THE PLUGIN!!! 118 | set(HOOMD_LIBRARIES ${HOOMD_LIB} ${HOOMD_MD_LIB} ${HOOMD_COMMON_LIBS}) 119 | 120 | endif (HOOMD_FOUND) 121 | -------------------------------------------------------------------------------- /PSEv1/Brownian.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // HOOMD Maintainer: joaander 51 | // Modified by Andrew Fiore 52 | 53 | #include "Brownian.cuh" 54 | #include "Mobility.cuh" 55 | #include "Helper.cuh" 56 | 57 | #include "hoomd/Saru.h" 58 | #include "hoomd/TextureTools.h" 59 | using namespace hoomd; 60 | 61 | #include 62 | #include 63 | 64 | #include "lapacke.h" 65 | #include "cblas.h" 66 | 67 | #ifdef WIN32 68 | #include 69 | #else 70 | #include 71 | #endif 72 | 73 | 74 | /*! \file Brownian.cu 75 | \brief Defines functions for PSE calculation of the Brownian Displacements 76 | 77 | // Uses LAPACKE to perform the final square root of the tridiagonal matrix 78 | resulting from the Lanczos Method 79 | */ 80 | 81 | //! Shared memory array for partial sum of dot product kernel 82 | extern __shared__ Scalar partial_sum[]; 83 | extern __shared__ Scalar4 shared_Fpos[]; 84 | 85 | /*! 86 | Generate random numbers on particles 87 | 88 | \param d_psi random vector 89 | \param group_size number of particles 90 | \param d_group_members index to particle arrays 91 | \param timestep current time step 92 | \param seed seed for random number generation 93 | 94 | Thread-per-particle operations to generate random numbers 95 | for the real space part of the Brownian calculation. Grid 96 | and blocks are 1-D. 97 | 98 | */ 99 | __global__ void gpu_stokes_BrownianGenerate_kernel( 100 | Scalar4 *d_psi, 101 | unsigned int group_size, 102 | unsigned int *d_group_members, 103 | const unsigned int timestep, 104 | const unsigned int seed 105 | ){ 106 | 107 | // Thread ID 108 | int group_idx = blockDim.x * blockIdx.x + threadIdx.x; 109 | 110 | // Make sure that thread is in bounds 111 | if (group_idx < group_size) { 112 | 113 | // Global particle index 114 | unsigned int idx = d_group_members[group_idx]; 115 | 116 | // Initialize random number generator 117 | detail::Saru s(idx, timestep + seed); 118 | 119 | // Draw numbers from a Uniform distribution (-sqrt(3),sqrt(3), 120 | // so that variance = 1/3 121 | Scalar sqrt3 = 1.73205080757; 122 | Scalar randomx = s.f( -sqrt3, sqrt3 ); 123 | Scalar randomy = s.f( -sqrt3, sqrt3 ); 124 | Scalar randomz = s.f( -sqrt3, sqrt3 ); 125 | 126 | // Write to global memory, leaving the 4th element unchanged 127 | d_psi[idx] = make_scalar4(randomx, randomy, randomz, d_psi[idx].w); 128 | 129 | } 130 | } 131 | 132 | /*! 133 | Generate random numbers for wave space Brownian motion ( random numbers on grid ) 134 | - scale forces as they're generated and add directly to the existing grid. 135 | 136 | \param d_gridX x-component of vectors on grid 137 | \param d_gridY y-component of vectors on grid 138 | \param d_gridZ z-component of vectors on grid 139 | \param d_gridk reciprocal lattice vectors for each grid point 140 | \param NxNyNz total number of grid points 141 | \param Nx number of grid points in x-direction 142 | \param Ny number of grid points in y-direction 143 | \param Nz number of grid points in z-direction 144 | \param timestep current simulation time step 145 | \param seed seed for random number generation 146 | \param T simulation temperature 147 | \param dt simulation time step size 148 | \param quadW quadrature weight for spectral Ewald integration 149 | 150 | Thread per grid node. 1-D grid of blocks, 1-D block of threads. 151 | 152 | */ 153 | __global__ void gpu_stokes_BrownianGridGenerate_kernel( 154 | CUFFTCOMPLEX *gridX, 155 | CUFFTCOMPLEX *gridY, 156 | CUFFTCOMPLEX *gridZ, 157 | Scalar4 *gridk, 158 | unsigned int NxNyNz, 159 | int Nx, 160 | int Ny, 161 | int Nz, 162 | const unsigned int timestep, 163 | const unsigned int seed, 164 | Scalar T, 165 | Scalar dt, 166 | Scalar quadW 167 | ){ 168 | 169 | // Current thread index 170 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 171 | 172 | // Check if threads are in bounds 173 | if ( idx < NxNyNz ) { 174 | 175 | // Random number generator 176 | detail::Saru s(idx, timestep + seed); 177 | 178 | // Square root of 3.0 / 2.0 179 | Scalar sqrt3d2 = 1.2247448713915889; 180 | 181 | // Get random numbers from uniform distribution 182 | // on (-sqrt(3/2),sqrt(3/2)) so that variance 183 | // of ( reX + reY ) = 1.0 184 | Scalar reX = s.f( -sqrt3d2, sqrt3d2 ); 185 | Scalar reY = s.f( -sqrt3d2, sqrt3d2 ); 186 | Scalar reZ = s.f( -sqrt3d2, sqrt3d2 ); 187 | Scalar imX = s.f( -sqrt3d2, sqrt3d2 ); 188 | Scalar imY = s.f( -sqrt3d2, sqrt3d2 ); 189 | Scalar imZ = s.f( -sqrt3d2, sqrt3d2 ); 190 | 191 | // Modulo arithmetic for indices for current grid point 192 | int kk = idx % Nz; 193 | int jj = ( ( idx - kk ) / Nz ) % Ny; 194 | int ii = ( ( idx - kk ) / Nz - jj ) / Ny; 195 | 196 | // Scaling factor for covariance 197 | Scalar fac = sqrtf(2.0*T/dt/quadW); 198 | 199 | // Variables required to place values on the grid 200 | Scalar2 fX, fY, fZ; // forces for thread's point 201 | Scalar2 fX_conj, fY_conj, fZ_conj; // forces for thread's conjugate point 202 | Scalar2 kdF, kdF_conj; // dot(k,F) for thread and conjugate point 203 | Scalar B12, B12_conj; // Scaling factors for thread and conjugate point 204 | 205 | // Only do work on half the grid points because we are simultaneously assigning values 206 | // to each grid point and its conjugate. The following check makes sure we pick all of 207 | // the points without conjugates (zeros and nyquist points) as well as all the points 208 | // in the upper half of the grid. Also, ignore the origin in the wave space sum. (Sum 209 | // is over all k!= 0) 210 | if ( 211 | !( 2 * kk >= Nz + 1 ) && 212 | !( ( kk == 0 ) && ( 2 * jj >= Ny + 1 ) ) && 213 | !( ( kk == 0 ) && ( jj == 0 ) && ( 2 * ii >= Nx + 1 ) ) && 214 | !( ( kk == 0 ) && ( jj == 0 ) && ( ii == 0 ) ) 215 | ){ 216 | 217 | // Is current grid point a nyquist point 218 | bool ii_nyquist = ( ( ii == Nx/2 ) && ( Nx/2 == (Nx+1)/2 ) ); 219 | bool jj_nyquist = ( ( jj == Ny/2 ) && ( Ny/2 == (Ny+1)/2 ) ); 220 | bool kk_nyquist = ( ( kk == Nz/2 ) && ( Nz/2 == (Nz+1)/2 ) ); 221 | 222 | // Index of conjugate point 223 | int ii_conj, jj_conj, kk_conj; 224 | if ( ii == 0 ){ 225 | ii_conj = ii; 226 | } 227 | else { 228 | ii_conj = Nx - ii; 229 | } 230 | if ( jj == 0 ){ 231 | jj_conj = jj; 232 | } 233 | else { 234 | jj_conj = Ny - jj; 235 | } 236 | if ( kk == 0 ){ 237 | kk_conj = kk; 238 | } 239 | else { 240 | kk_conj = Nz - kk; 241 | } 242 | 243 | // Global index of conjugate grid point 244 | int conj_idx = ii_conj * Ny*Nz + jj_conj * Nz + kk_conj; 245 | 246 | // Current wave-space vector, conjugate wave space vector, and their 247 | // magnitudes 248 | Scalar4 tk = gridk[idx]; 249 | Scalar4 tk_conj = gridk[conj_idx]; 250 | 251 | Scalar ksq = tk.x*tk.x + tk.y*tk.y + tk.z*tk.z; 252 | Scalar ksq_conj = tk_conj.x*tk_conj.x + tk_conj.y*tk_conj.y + tk_conj.z*tk_conj.z; 253 | 254 | // Assign fluctuating values to the Nyquist points (no conjugate points) 255 | if ( ( ii == 0 && jj_nyquist && kk == 0 ) || 256 | ( ii_nyquist && jj == 0 && kk == 0 ) || 257 | ( ii_nyquist && jj_nyquist && kk == 0 ) || 258 | ( ii == 0 && jj == 0 && kk_nyquist ) || 259 | ( ii == 0 && jj_nyquist && kk_nyquist ) || 260 | ( ii_nyquist && jj == 0 && kk_nyquist ) || 261 | ( ii_nyquist && jj_nyquist && kk_nyquist ) ){ 262 | 263 | // At the nyquist point, the random quantity only has a real component. Have to 264 | // multiply by sqrt(2.0) to make sure the variance is still 1 265 | Scalar sqrt2 = 1.4142135623730951; 266 | fX = make_scalar2( sqrt2*reX, 0.0 ); 267 | fY = make_scalar2( sqrt2*reY, 0.0 ); 268 | fZ = make_scalar2( sqrt2*reZ, 0.0 ); 269 | 270 | // Dot product of wave-vector with stochastic quantity 271 | kdF = make_scalar2( ( tk.x*fX.x + tk.y*fY.x + tk.z*fZ.x ) / ksq, ( tk.x*fX.y + tk.y*fY.y + tk.z*fZ.y ) / ksq ); 272 | 273 | // Scaling factor 274 | B12 = sqrtf( tk.w ); 275 | Scalar k = sqrtf( ksq ); 276 | B12 *= sinf( k ) / k; 277 | 278 | // Add random quantity to the grid AND scale by B^(1/2) simultaneously to save effort 279 | gridX[idx].x = gridX[idx].x + fac * ( fX.x - tk.x * kdF.x ) * B12; 280 | gridX[idx].y = gridX[idx].y + fac * ( fX.y - tk.x * kdF.y ) * B12; 281 | 282 | gridY[idx].x = gridY[idx].x + fac * ( fY.x - tk.y * kdF.x ) * B12; 283 | gridY[idx].y = gridY[idx].y + fac * ( fY.y - tk.y * kdF.y ) * B12; 284 | 285 | gridZ[idx].x = gridZ[idx].x + fac * ( fZ.x - tk.z * kdF.x ) * B12; 286 | gridZ[idx].y = gridZ[idx].y + fac * ( fZ.y - tk.z * kdF.y ) * B12; 287 | 288 | } 289 | else { 290 | 291 | // Construct random force 292 | fX = make_scalar2( reX, imX ); 293 | fY = make_scalar2( reY, imY ); 294 | fZ = make_scalar2( reZ, imZ ); 295 | 296 | // The random force at the conjugate point is the conjugate of the force at 297 | // the current point 298 | fX_conj = make_scalar2( reX, -imX ); 299 | fY_conj = make_scalar2( reY, -imY ); 300 | fZ_conj = make_scalar2( reZ, -imZ ); 301 | 302 | // Dot prodcut of force with wave vector at current and conjugate point 303 | kdF = make_scalar2( ( tk.x*fX.x + tk.y*fY.x + tk.z*fZ.x ) / ksq, ( tk.x*fX.y + tk.y*fY.y + tk.z*fZ.y ) / ksq ); 304 | kdF_conj = make_scalar2( ( tk_conj.x*fX_conj.x + tk_conj.y*fY_conj.x + tk_conj.z*fZ_conj.x ) / ksq_conj, ( tk_conj.x*fX_conj.y + tk_conj.y*fY_conj.y + tk_conj.z*fZ_conj.y ) / ksq_conj ); 305 | 306 | // Scaling factors at current and conjugate point 307 | B12 = sqrtf( tk.w ); 308 | B12_conj = sqrtf( tk_conj.w ); 309 | 310 | Scalar k = sqrtf( ksq ); 311 | Scalar kconj = sqrtf( ksq_conj ); 312 | B12 *= sinf( k ) / k; 313 | B12_conj *= sinf( kconj ) / kconj; 314 | 315 | // Add random quantity to the grid AND scale by B^(1/2) simultaneously to save effort 316 | // Current grid point 317 | gridX[idx].x = gridX[idx].x + fac * ( fX.x - tk.x * kdF.x ) * B12; 318 | gridX[idx].y = gridX[idx].y + fac * ( fX.y - tk.x * kdF.y ) * B12; 319 | 320 | gridY[idx].x = gridY[idx].x + fac * ( fY.x - tk.y * kdF.x ) * B12; 321 | gridY[idx].y = gridY[idx].y + fac * ( fY.y - tk.y * kdF.y ) * B12; 322 | 323 | gridZ[idx].x = gridZ[idx].x + fac * ( fZ.x - tk.z * kdF.x ) * B12; 324 | gridZ[idx].y = gridZ[idx].y + fac * ( fZ.y - tk.z * kdF.y ) * B12; 325 | 326 | // Add random quantity to the grid AND scale by B^(1/2) simultaneously to save effort 327 | // Conjugate grid point 328 | gridX[conj_idx].x = gridX[conj_idx].x + fac * ( fX_conj.x - tk_conj.x * kdF_conj.x ) * B12_conj; 329 | gridX[conj_idx].y = gridX[conj_idx].y + fac * ( fX_conj.y - tk_conj.x * kdF_conj.y ) * B12_conj; 330 | 331 | gridY[conj_idx].x = gridY[conj_idx].x + fac * ( fY_conj.x - tk_conj.y * kdF_conj.x ) * B12_conj; 332 | gridY[conj_idx].y = gridY[conj_idx].y + fac * ( fY_conj.y - tk_conj.y * kdF_conj.y ) * B12_conj; 333 | 334 | gridZ[conj_idx].x = gridZ[conj_idx].x + fac * ( fZ_conj.x - tk_conj.z * kdF_conj.x ) * B12_conj; 335 | gridZ[conj_idx].y = gridZ[conj_idx].y + fac * ( fZ_conj.y - tk_conj.z * kdF_conj.y ) * B12_conj; 336 | 337 | } 338 | 339 | 340 | 341 | } 342 | 343 | 344 | } 345 | } 346 | 347 | 348 | /*! 349 | Use Lanczos method to compute Mreal^0.5 * psi 350 | 351 | This method is detailed in: 352 | "Preconditioned Krylov Subspace Methods for Sampling Multivariate Gaussian Distributions" 353 | Edmond Chow and Yousef Saad, IAM J. Sci. Comput., 36(2), A588–A608 354 | 355 | 356 | */ 357 | void gpu_stokes_BrealLanczos_wrap( 358 | Scalar4 *d_psi, 359 | Scalar4 *d_pos, 360 | unsigned int *d_group_members, 361 | unsigned int group_size, 362 | const BoxDim& box, 363 | Scalar dt, 364 | Scalar4 *d_vel, 365 | const Scalar T, 366 | const unsigned int timestep, 367 | const unsigned int seed, 368 | Scalar xi, 369 | Scalar ewald_cut, 370 | Scalar ewald_dr, 371 | int ewald_n, 372 | Scalar4 *d_ewaldC1, 373 | const unsigned int *d_n_neigh, 374 | const unsigned int *d_nlist, 375 | const unsigned int *d_headlist, 376 | int& m, 377 | Scalar cheb_error, 378 | dim3 grid, 379 | dim3 threads, 380 | int gridBlockSize, 381 | int gridNBlock, 382 | Scalar3 gridh, 383 | Scalar self 384 | ){ 385 | 386 | // Dot product kernel specifications 387 | unsigned int thread_for_dot = 512; // Must be 2^n 388 | unsigned int grid_for_dot = (group_size/thread_for_dot) + 1; 389 | 390 | // Temp var for dot product. 391 | Scalar *dot_sum; 392 | cudaMalloc( (void**)&dot_sum, grid_for_dot*sizeof(Scalar) ); 393 | 394 | // Allocate storage 395 | // 396 | int m_in = m; 397 | int m_max = 100; 398 | 399 | // Storage vectors for tridiagonal factorization 400 | float *alpha, *beta, *alpha_save, *beta_save; 401 | alpha = (float *)malloc( (m_max)*sizeof(float) ); 402 | alpha_save = (float *)malloc( (m_max)*sizeof(float) ); 403 | beta = (float *)malloc( (m_max+1)*sizeof(float) ); 404 | beta_save = (float *)malloc( (m_max+1)*sizeof(float) ); 405 | 406 | // Vectors for Lapacke and square root 407 | float *W; 408 | W = (float *)malloc( (m_max*m_max)*sizeof(float) ); 409 | float *W1; // W1 = Lambda^(1/2) * ( W^T * e1 ) 410 | W1 = (float *)malloc( (m_max)*sizeof(float) ); 411 | float *Tm; 412 | Tm = (float *)malloc( m_max*sizeof(float) ); 413 | Scalar *d_Tm; 414 | cudaMalloc( (void**)&d_Tm, m_max * sizeof(Scalar) ); 415 | 416 | // Vectors for Lanczos iterations 417 | Scalar4 *d_v, *d_vj, *d_vjm1; 418 | cudaMalloc( (void**)&d_v, group_size*sizeof(Scalar4) ); 419 | cudaMalloc( (void**)&d_vj, group_size*sizeof(Scalar4) ); 420 | cudaMalloc( (void**)&d_vjm1, group_size*sizeof(Scalar4) ); 421 | 422 | // Storage vector for M*vj 423 | Scalar4 *d_Mvj; 424 | cudaMalloc( (void**)&d_Mvj, group_size*sizeof(Scalar4) ); 425 | 426 | // Storage array for V 427 | Scalar4 *d_V; 428 | cudaMalloc( (void**)&d_V, m_max*group_size * sizeof(Scalar4) ); 429 | 430 | // Step-norm things 431 | Scalar4 *d_vel_old, *d_Mpsi; 432 | cudaMalloc( (void**)&d_vel_old, group_size*sizeof(Scalar4) ); 433 | cudaMalloc( (void**)&d_Mpsi, group_size*sizeof(Scalar4) ); 434 | Scalar psiMpsi; 435 | 436 | // Temporary pointer 437 | Scalar4 *d_temp; 438 | 439 | // Copy random vector to v0 440 | cudaMemcpy( d_vj, d_psi, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice ); 441 | 442 | // Compute the norm of the d_psi (also the norm of basis vector v0) 443 | Scalar vnorm; 444 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vj, d_vj, dot_sum, group_size, d_group_members); 445 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 446 | cudaMemcpy(&vnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 447 | vnorm = sqrtf( vnorm ); 448 | 449 | Scalar psinorm = vnorm; 450 | 451 | // Compute psi * M * psi ( for step norm ) 452 | gpu_stokes_Mreal_kernel<<>>(d_pos, d_Mpsi, d_psi, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist ); 453 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_psi, d_Mpsi, dot_sum, group_size, d_group_members); 454 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 455 | cudaMemcpy(&psiMpsi, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 456 | 457 | psiMpsi = psiMpsi / ( psinorm * psinorm ); 458 | 459 | // First iteration, vjm1 = 0, vj = psi / norm( psi ) 460 | gpu_stokes_LinearCombination_kernel<<>>(d_vj, d_vj, d_vjm1, 0.0, 0.0, group_size, d_group_members); 461 | gpu_stokes_LinearCombination_kernel<<>>(d_vj, d_vj, d_vj, 1.0/vnorm, 0.0, group_size, d_group_members); 462 | 463 | // Start by computing (m-1) iterations, so that the stepnorm for the given 464 | // number of iterations can be compute 465 | m = m_in - 1; 466 | m = m < 1 ? 1 : m; 467 | 468 | // Values for current alpha and beta in the iteration 469 | Scalar tempalpha; 470 | Scalar tempbeta = 0.0; 471 | 472 | // Apply the Lanczos method 473 | for ( int jj = 0; jj < m; ++jj ){ 474 | 475 | // Store current basis vector 476 | cudaMemcpy( &d_V[jj*group_size], d_vj, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice ); 477 | 478 | // Store beta 479 | beta[jj] = tempbeta; 480 | 481 | // v = M*vj - betaj*vjm1 482 | gpu_stokes_Mreal_kernel<<>>(d_pos, d_Mvj, d_vj, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist ); 483 | gpu_stokes_LinearCombination_kernel<<>>(d_Mvj, d_vjm1, d_v, 1.0, -1.0*tempbeta, group_size, d_group_members); 484 | 485 | // vj dot v 486 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vj, d_v, dot_sum, group_size, d_group_members); 487 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 488 | cudaMemcpy(&tempalpha, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 489 | 490 | // Store updated alpha 491 | alpha[jj] = tempalpha; 492 | 493 | // v = v - alphaj*vj 494 | gpu_stokes_LinearCombination_kernel<<>>(d_v, d_vj, d_v, 1.0, -1.0*tempalpha, group_size, d_group_members); 495 | 496 | // v dot v 497 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_v, d_v, dot_sum, group_size, d_group_members); 498 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 499 | cudaMemcpy(&vnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 500 | vnorm = sqrtf( vnorm ); 501 | 502 | // betajp1 = norm( v ) 503 | tempbeta = vnorm; 504 | 505 | // Check that the basis vector is not too small. If so, end the iteration 506 | // (If norm is too small, will have numerical trouble) 507 | if ( vnorm < 1E-8 ){ 508 | m = jj; 509 | break; 510 | } 511 | 512 | // vjp1 = v / betajp1 513 | gpu_stokes_LinearCombination_kernel<<>>(d_v, d_v, d_v, 1.0/tempbeta, 0.0, group_size, d_group_members); 514 | 515 | // Swap pointers 516 | d_temp = d_vjm1; 517 | d_vjm1 = d_vj; 518 | d_vj = d_v; 519 | d_v = d_temp; 520 | 521 | } 522 | 523 | // Save alpha, beta vectors (will be overwritten by lapack) 524 | for ( int ii = 0; ii < m; ++ii ){ 525 | alpha_save[ii] = alpha[ii]; 526 | beta_save[ii] = beta[ii]; 527 | } 528 | beta_save[m] = beta[m]; 529 | 530 | // Now that we have alpha, beta, have to compute the square root of the tridiagonal 531 | // matrix Tm. Do this using eigen-decomposition. 532 | // 533 | // Compute eigen-decomposition of tridiagonal matrix 534 | // alpha (input) - vector of entries on main diagonal 535 | // alpha (output) - eigenvalues sorted in descending order 536 | // beta (input) - vector of entries of sub-diagonal 537 | // beta (output) - overwritten (zeros?) 538 | // W - (output) - matrix of eigenvectors. ith column corresponds to ith eigenvalue 539 | // INFO (output) = 0 if operation was succesful 540 | int INFO = LAPACKE_spteqr( LAPACK_ROW_MAJOR, 'I', m, alpha, &beta[1], W, m ); 541 | 542 | // Check whether the eigen-decomposition failed, and throw error on failure 543 | if ( INFO != 0 ){ 544 | printf("Eigenvalue decomposition #1 failed \n"); 545 | printf("INFO = %i \n", INFO); 546 | 547 | printf("\n alpha: \n"); 548 | for( int ii = 0; ii < m; ++ii ){ 549 | printf("%f \n", alpha_save[ii]); 550 | } 551 | printf("\n beta: \n"); 552 | for( int ii = 0; ii < m; ++ii ){ 553 | printf("%f \n", beta_save[ii]); 554 | } 555 | printf("%f \n", beta_save[m]); 556 | 557 | printf("Note to User: restart simulation and proceed. \n"); 558 | 559 | exit(EXIT_FAILURE); 560 | } 561 | 562 | // Now, we have to compute Tm^(1/2) * e1 563 | // Tm^(1/2) = W * Lambda^(1/2) * W^T * e1 564 | // = W * Lambda^(1/2) * ( W^T * e1 ) 565 | // The quantity in parentheses is the first row of W 566 | // Lambda^(1/2) only has diagonal entries, so it's product with the first row of W 567 | // is easy to compute. 568 | for ( int ii = 0; ii < m; ++ii ){ 569 | W1[ii] = sqrtf( alpha[ii] ) * W[ii]; 570 | } 571 | 572 | // Tm = W * W1 = W * Lambda^(1/2) * W^T * e1 573 | float tempsum; 574 | for ( int ii = 0; ii < m; ++ii ){ 575 | tempsum = 0.0; 576 | for ( int jj = 0; jj < m; ++jj ){ 577 | int idx = m*ii + jj; 578 | 579 | tempsum += W[idx] * W1[jj]; 580 | } 581 | Tm[ii] = tempsum; 582 | } 583 | 584 | // Copy matrix to GPU 585 | cudaMemcpy( d_Tm, Tm, m*sizeof(Scalar), cudaMemcpyHostToDevice ); 586 | 587 | // Multiply basis vectors by Tm, [ V0, V1, ..., Vm-1 ] * Tm 588 | gpu_stokes_MatVecMultiply_kernel<<>>(d_V, d_Tm, d_vel, group_size, m); 589 | 590 | // Copy velocity 591 | cudaMemcpy( d_vel_old, d_vel, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice ); 592 | 593 | // Restore alpha, beta 594 | for ( int ii = 0; ii < m; ++ii ){ 595 | alpha[ii] = alpha_save[ii]; 596 | beta[ii] = beta_save[ii]; 597 | } 598 | beta[m] = beta_save[m]; 599 | 600 | 601 | // 602 | // Keep adding to basis vectors until the step norm is small enough 603 | // 604 | Scalar stepnorm = 1.0; 605 | int jj; 606 | while( stepnorm > cheb_error && m < m_max ){ 607 | m++; 608 | jj = m - 1; 609 | 610 | // 611 | // Do another Lanczos iteration 612 | // 613 | 614 | // Store the current basis vector 615 | cudaMemcpy( &d_V[jj*group_size], d_vj, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice ); 616 | 617 | // Store beta 618 | beta[jj] = tempbeta; 619 | 620 | // v = M*vj - betaj*vjm1 621 | gpu_stokes_Mreal_kernel<<>>(d_pos, d_Mvj, d_vj, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist ); 622 | gpu_stokes_LinearCombination_kernel<<>>(d_Mvj, d_vjm1, d_v, 1.0, -1.0*tempbeta, group_size, d_group_members); 623 | 624 | // vj dot v 625 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vj, d_v, dot_sum, group_size, d_group_members); 626 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 627 | cudaMemcpy(&tempalpha, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 628 | 629 | // Store updated alpha 630 | alpha[jj] = tempalpha; 631 | 632 | // v = v - alphaj*vj 633 | gpu_stokes_LinearCombination_kernel<<>>(d_v, d_vj, d_v, 1.0, -1.0*tempalpha, group_size, d_group_members); 634 | 635 | // v dot v 636 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_v, d_v, dot_sum, group_size, d_group_members); 637 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 638 | cudaMemcpy(&vnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 639 | vnorm = sqrtf( vnorm ); 640 | 641 | // betajp1 = norm( v ) 642 | tempbeta = vnorm; 643 | 644 | // Check if the norm of the basis vector is too small. If 645 | // so, end the iteration. 646 | if ( vnorm < 1E-8 ){ 647 | m = jj; 648 | break; 649 | } 650 | 651 | // vjp1 = v / betajp1 652 | gpu_stokes_LinearCombination_kernel<<>>(d_v, d_v, d_v, 1.0/tempbeta, 0.0, group_size, d_group_members); 653 | 654 | // Swap pointers 655 | d_temp = d_vjm1; 656 | d_vjm1 = d_vj; 657 | d_vj = d_v; 658 | d_v = d_temp; 659 | 660 | // Save alpha, beta vectors (will be overwritten by lapack) 661 | for ( int ii = 0; ii < m; ++ii ){ 662 | alpha_save[ii] = alpha[ii]; 663 | beta_save[ii] = beta[ii]; 664 | } 665 | beta_save[m] = beta[m]; 666 | 667 | // 668 | // Square root calculation with addition of latest Lanczos iteration 669 | // (see first implementation above more description) 670 | // 671 | 672 | // Compute eigen-decomposition of tridiagonal matrix 673 | int INFO = LAPACKE_spteqr( LAPACK_ROW_MAJOR, 'I', m, alpha, &beta[1], W, m ); 674 | 675 | // Check whether the eigen-decomposition failed, and throw error on failure 676 | if ( INFO != 0 ){ 677 | printf("Eigenvalue decomposition #2 failed \n"); 678 | printf("INFO = %i \n", INFO); 679 | 680 | printf("\n alpha: \n"); 681 | for( int ii = 0; ii < m; ++ii ){ 682 | printf("%f \n", alpha_save[ii]); 683 | } 684 | printf("\n beta: \n"); 685 | for( int ii = 0; ii < m; ++ii ){ 686 | printf("%f \n", beta_save[ii]); 687 | } 688 | printf("%f \n", beta_save[m]); 689 | 690 | printf("Note to User: restart simulation and proceed. \n"); 691 | 692 | exit(EXIT_FAILURE); 693 | } 694 | 695 | // Now, we have to compute Tm^(1/2) * e1 696 | for ( int ii = 0; ii < m; ++ii ){ 697 | W1[ii] = sqrtf( alpha[ii] ) * W[ii]; 698 | } 699 | 700 | // Tm = W * W1 = W * Lambda^(1/2) * W^T * e1 701 | float tempsum; 702 | for ( int ii = 0; ii < m; ++ii ){ 703 | tempsum = 0.0; 704 | for ( int jj = 0; jj < m; ++jj ){ 705 | int idx = m*ii + jj; 706 | 707 | tempsum += W[idx] * W1[jj]; 708 | } 709 | Tm[ii] = tempsum; 710 | } 711 | 712 | // Copy matrix to GPU 713 | cudaMemcpy( d_Tm, Tm, m*sizeof(Scalar), cudaMemcpyHostToDevice ); 714 | 715 | // Multiply basis vectors by Tm -- velocity = Vm * Tm 716 | gpu_stokes_MatVecMultiply_kernel<<>>(d_V, d_Tm, d_vel, group_size, m); 717 | 718 | // Compute step norm error 719 | gpu_stokes_LinearCombination_kernel<<>>(d_vel, d_vel_old, d_vel_old, 1.0, -1.0, group_size, d_group_members); 720 | gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vel_old, d_vel_old, dot_sum, group_size, d_group_members); 721 | gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot); 722 | cudaMemcpy(&stepnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost); 723 | 724 | stepnorm = sqrtf( stepnorm / psiMpsi ); 725 | 726 | // Copy velocity 727 | cudaMemcpy( d_vel_old, d_vel, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice ); 728 | 729 | // Restore alpha, beta 730 | for ( int ii = 0; ii < m; ++ii ){ 731 | alpha[ii] = alpha_save[ii]; 732 | beta[ii] = beta_save[ii]; 733 | } 734 | beta[m] = beta_save[m]; 735 | 736 | } 737 | 738 | // Rescale by original norm of Psi and include thermal variance 739 | gpu_stokes_LinearCombination_kernel<<>>(d_vel, d_vel, d_vel, psinorm * sqrtf(2.0*T/dt), 0.0, group_size, d_group_members); 740 | 741 | // 742 | // Clean up 743 | // 744 | cudaFree(dot_sum); 745 | cudaFree(d_Mvj); 746 | cudaFree(d_v); 747 | cudaFree(d_vj); 748 | cudaFree(d_vjm1); 749 | cudaFree(d_V); 750 | cudaFree(d_Tm); 751 | cudaFree(d_vel_old); 752 | cudaFree(d_Mpsi); 753 | 754 | d_temp = NULL; 755 | 756 | free(alpha); 757 | free(beta); 758 | free(alpha_save); 759 | free(beta_save); 760 | 761 | free(W); 762 | free(W1); 763 | free(Tm); 764 | 765 | } 766 | 767 | 768 | // Wrap up everything to compute mobility AND brownian if necessary 769 | // - Combine Fourier components of Deterministic and Brownian calculation 770 | // in order to save extra FFTs and contraction operations 771 | // - Add deterministic and stochastic real space contributions 772 | void gpu_stokes_CombinedMobilityBrownian_wrap( 773 | Scalar4 *d_pos, 774 | Scalar4 *d_net_force, 775 | unsigned int *d_group_members, 776 | unsigned int group_size, 777 | const BoxDim& box, 778 | Scalar dt, 779 | Scalar4 *d_vel, 780 | const Scalar T, 781 | const unsigned int timestep, 782 | const unsigned int seed, 783 | Scalar xi, 784 | Scalar eta, 785 | Scalar P, 786 | Scalar ewald_cut, 787 | Scalar ewald_dr, 788 | int ewald_n, 789 | Scalar4 *d_ewaldC1, 790 | Scalar4 *d_gridk, 791 | CUFFTCOMPLEX *d_gridX, 792 | CUFFTCOMPLEX *d_gridY, 793 | CUFFTCOMPLEX *d_gridZ, 794 | cufftHandle plan, 795 | const int Nx, 796 | const int Ny, 797 | const int Nz, 798 | const unsigned int *d_n_neigh, 799 | const unsigned int *d_nlist, 800 | const unsigned int *d_headlist, 801 | int& m_Lanczos, 802 | const unsigned int N_total, 803 | unsigned int NxNyNz, 804 | dim3 grid, 805 | dim3 threads, 806 | int gridBlockSize, 807 | int gridNBlock, 808 | Scalar3 gridh, 809 | Scalar cheb_error, 810 | Scalar self ){ 811 | 812 | // Real space velocity to add 813 | Scalar4 *d_vel2; 814 | cudaMalloc( (void**)&d_vel2, group_size*sizeof(Scalar4) ); 815 | 816 | // Generate uniform distribution (-1,1) on d_psi 817 | Scalar4 *d_psi; 818 | cudaMalloc( (void**)&d_psi, group_size*sizeof(Scalar4) ); 819 | gpu_stokes_BrownianGenerate_kernel<<>>( d_psi, group_size, d_group_members, timestep, seed ); 820 | 821 | // Spreading and contraction grid information and parameters 822 | dim3 Cgrid( group_size, 1, 1); 823 | int B = ( P < 10 ) ? P : 10; 824 | dim3 Cthreads(B, B, B); 825 | 826 | Scalar quadW = gridh.x * gridh.y * gridh.z; 827 | Scalar xisq = xi * xi; 828 | Scalar prefac = ( 2.0 * xisq / 3.1415926536 / eta ) * sqrtf( 2.0 * xisq / 3.1415926536 / eta ); 829 | Scalar expfac = 2.0 * xisq / eta; 830 | 831 | // ******************************************** 832 | // Wave Space Part of Deterministic Calculation 833 | // ******************************************** 834 | 835 | // Reset the grid (remove any previously distributed forces) 836 | gpu_stokes_ZeroGrid_kernel<<>>(d_gridX,NxNyNz); 837 | gpu_stokes_ZeroGrid_kernel<<>>(d_gridY,NxNyNz); 838 | gpu_stokes_ZeroGrid_kernel<<>>(d_gridZ,NxNyNz); 839 | 840 | // Spread forces onto grid 841 | gpu_stokes_Spread_kernel<<>>( d_pos, d_net_force, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, d_group_members, box, P, gridh, xi, eta, prefac, expfac ); 842 | 843 | // Perform FFT on gridded forces 844 | cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_FORWARD); 845 | cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_FORWARD); 846 | cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_FORWARD); 847 | 848 | // Apply wave space scaling to FFT'd forces 849 | gpu_stokes_Green_kernel<<>>( d_gridX, d_gridY, d_gridZ, d_gridk, NxNyNz); 850 | 851 | 852 | // *************************************** 853 | // Wave Space Part of Brownian Calculation 854 | // *************************************** 855 | if ( T > 0.0 ){ 856 | 857 | // Apply random fluctuations to wave space grid 858 | gpu_stokes_BrownianGridGenerate_kernel<<>>( d_gridX, d_gridY, d_gridZ, d_gridk, NxNyNz, Nx, Ny, Nz, timestep, seed, T, dt, quadW ); 859 | 860 | } 861 | 862 | // ************************************ 863 | // Finish the Wave Space Calculation 864 | // ************************************ 865 | 866 | // Return rescaled forces to real space 867 | cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_INVERSE); 868 | cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_INVERSE); 869 | cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_INVERSE); 870 | 871 | // Evaluate contribution of grid velocities at particle centers 872 | gpu_stokes_Contract_kernel<<>>( d_pos, d_vel, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, xi, eta, d_group_members, box, P, gridh, quadW*prefac, expfac ); 873 | 874 | // *************************************** 875 | // Real Space Part of Both Calculations 876 | // *************************************** 877 | 878 | // Deterministic part 879 | gpu_stokes_Mreal_kernel<<>>(d_pos, d_vel2, d_net_force, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist ); 880 | 881 | // Add to velocity 882 | gpu_stokes_LinearCombination_kernel<<>>(d_vel2, d_vel, d_vel, 1.0, 1.0, group_size, d_group_members); 883 | 884 | // Stochastic 885 | if ( T > 0.0 ){ 886 | 887 | gpu_stokes_BrealLanczos_wrap( d_psi, 888 | d_pos, 889 | d_group_members, 890 | group_size, 891 | box, 892 | dt, 893 | d_vel2, 894 | T, 895 | timestep, 896 | seed, 897 | xi, 898 | ewald_cut, 899 | ewald_dr, 900 | ewald_n, 901 | d_ewaldC1, 902 | d_n_neigh, 903 | d_nlist, 904 | d_headlist, 905 | m_Lanczos, 906 | cheb_error, 907 | grid, 908 | threads, 909 | gridBlockSize, 910 | gridNBlock, 911 | gridh, 912 | self ); 913 | 914 | // Add to velocity 915 | gpu_stokes_LinearCombination_kernel<<>>(d_vel2, d_vel, d_vel, 1.0, 1.0, group_size, d_group_members); 916 | 917 | } 918 | 919 | // Free Memory 920 | cudaFree( d_vel2 ); 921 | cudaFree( d_psi ); 922 | 923 | } 924 | 925 | -------------------------------------------------------------------------------- /PSEv1/Brownian.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Andrew Fiore 52 | 53 | /*! \file Brownian.cuh 54 | \brief Declares GPU kernel codes for Brownian Calculations. 55 | */ 56 | #include "hoomd/ParticleData.cuh" 57 | #include "hoomd/HOOMDMath.h" 58 | #include "hoomd/Index1D.h" 59 | 60 | #include 61 | 62 | //! Define the kernel 63 | #ifndef __BROWNIAN_CUH__ 64 | #define __BROWNIAN_CUH__ 65 | 66 | //! Definition for complex variable storage 67 | #ifdef SINGLE_PRECISION 68 | #define CUFFTCOMPLEX cufftComplex 69 | #else 70 | #define CUFFTCOMPLEX cufftComplex 71 | #endif 72 | 73 | __global__ void gpu_stokes_BrownianGenerate_kernel( 74 | Scalar4 *d_psi, 75 | unsigned int group_size, 76 | unsigned int *d_group_members, 77 | const unsigned int timestep, 78 | const unsigned int seed 79 | ); 80 | 81 | __global__ void gpu_stokes_BrownianGridGenerate_kernel( 82 | CUFFTCOMPLEX *gridX, 83 | CUFFTCOMPLEX *gridY, 84 | CUFFTCOMPLEX *gridZ, 85 | Scalar4 *gridk, 86 | unsigned int NxNyNz, 87 | int Nx, 88 | int Ny, 89 | int Nz, 90 | const unsigned int timestep, 91 | const unsigned int seed, 92 | Scalar T, 93 | Scalar dt, 94 | Scalar quadW 95 | ); 96 | 97 | void gpu_stokes_CombinedMobilityBrownian_wrap( 98 | Scalar4 *d_pos, 99 | Scalar4 *d_net_force, 100 | unsigned int *d_group_members, 101 | unsigned int group_size, 102 | const BoxDim& box, 103 | Scalar dt, 104 | Scalar4 *d_vel, 105 | const Scalar T, 106 | const unsigned int timestep, 107 | const unsigned int seed, 108 | Scalar xi, 109 | Scalar eta, 110 | Scalar P, 111 | Scalar ewald_cut, 112 | Scalar ewald_dr, 113 | int ewald_n, 114 | Scalar4 *d_ewaldC1, 115 | Scalar4 *d_gridk, 116 | CUFFTCOMPLEX *d_gridX, 117 | CUFFTCOMPLEX *d_gridY, 118 | CUFFTCOMPLEX *d_gridZ, 119 | cufftHandle plan, 120 | const int Nx, 121 | const int Ny, 122 | const int Nz, 123 | const unsigned int *d_n_neigh, 124 | const unsigned int *d_nlist, 125 | const unsigned int *d_headlist, 126 | int& m_Lanczos, 127 | const unsigned int N_total, 128 | unsigned int NxNyNz, 129 | dim3 grid, 130 | dim3 threads, 131 | int gridBlockSize, 132 | int gridNBlock, 133 | Scalar3 gridh, 134 | Scalar cheb_error, 135 | Scalar self 136 | ); 137 | 138 | #endif 139 | -------------------------------------------------------------------------------- /PSEv1/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Maintainer: Andrew M. Fiore 2 | 3 | set(COMPONENT_NAME PSEv1) 4 | 5 | set(_${COMPONENT_NAME}_sources 6 | module.cc 7 | Stokes.cc 8 | ShearFunction.cc 9 | ShearFunctionWrap.cc 10 | SpecificShearFunction.cc 11 | VariantShearFunction.cc 12 | ) 13 | 14 | set(_${COMPONENT_NAME}_cu_sources 15 | Stokes.cu 16 | Brownian.cu 17 | Helper.cu 18 | Mobility.cu 19 | ) 20 | 21 | if (ENABLE_CUDA) 22 | CUDA_COMPILE(_CUDA_GENERATED_FILES ${_${COMPONENT_NAME}_cu_sources} OPTIONS ${CUDA_ADDITIONAL_OPTIONS} SHARED) 23 | endif (ENABLE_CUDA) 24 | 25 | pybind11_add_module (_${COMPONENT_NAME} SHARED ${_${COMPONENT_NAME}_sources} ${_CUDA_GENERATED_FILES} NO_EXTRAS) 26 | if (APPLE) 27 | set_target_properties(_${COMPONENT_NAME} PROPERTIES INSTALL_RPATH "@loader_path/..;@loader_path") 28 | else() 29 | set_target_properties(_${COMPONENT_NAME} PROPERTIES INSTALL_RPATH "\$ORIGIN/..;\$ORIGIN") 30 | endif() 31 | 32 | # Find additional libraries to be linked for the plugin 33 | find_library( LAPACKE_LIBRARIES lapacke PATHS /usr/local/lapack-3.6.0/lib/ ) 34 | find_library( LAPACK_LIBRARIES lapack PATHS /usr/local/lapack-3.6.0/lib/ ) 35 | find_library( BLAS_LIBRARIES blas PATHS /usr/local/lapack-3.6.0/lib/ ) 36 | find_library( CBLAS_LIBRARIES cblas PATHS /usr/local/lapack-3.6.0/lib/ ) 37 | if( LAPACKE_LIBRARIES-NOTFOUND ) 38 | message(FATAL_ERROR "lapacke libraries not found") 39 | endif( LAPACKE_LIBRARIES-NOTFOUND ) 40 | message(STATUS "found lapacke libraries: ${LAPACKE_LIBRARIES}") 41 | if( LAPACK_LIBRARIES-NOTFOUND ) 42 | message(FATAL_ERROR "lapack libraries not found") 43 | endif( LAPACK_LIBRARIES-NOTFOUND ) 44 | message(STATUS "found lapack libraries: ${LAPACK_LIBRARIES}") 45 | set( LAPACK_LIBRARIES ${LAPACKE_LIBRARIES} ${LAPACK_LIBRARIES} ${CBLAS_LIBRARIES} ${BLAS_LIBRARIES} ) 46 | 47 | # link the library to its dependencies 48 | target_link_libraries(_${COMPONENT_NAME} PRIVATE ${HOOMD_LIBRARIES} ${LAPACK_LIBRARIES}) 49 | 50 | # if we are compiling with MPI support built in, set appropriate 51 | # compiler/linker flags 52 | if (ENABLE_MPI) 53 | if(MPI_COMPILE_FLAGS) 54 | set_target_properties(_${COMPONENT_NAME} PROPERTIES COMPILE_FLAGS "${MPI_CXX_COMPILE_FLAGS}") 55 | endif(MPI_COMPILE_FLAGS) 56 | if(MPI_LINK_FLAGS) 57 | set_target_properties(_${COMPONENT_NAME} PROPERTIES LINK_FLAGS "${MPI_CXX_LINK_FLAGS}") 58 | endif(MPI_LINK_FLAGS) 59 | endif(ENABLE_MPI) 60 | 61 | fix_cudart_rpath(_${COMPONENT_NAME}) 62 | 63 | # install the library 64 | install(TARGETS _${COMPONENT_NAME} 65 | LIBRARY DESTINATION ${PYTHON_MODULE_BASE_DIR}/${COMPONENT_NAME} 66 | ) 67 | 68 | ################ Python only modules 69 | # copy python modules to the build directory to make it a working python package 70 | MACRO(copy_file file) 71 | add_custom_command ( 72 | OUTPUT ${file} 73 | DEPENDS ${file} 74 | POST_BUILD 75 | COMMAND ${CMAKE_COMMAND} 76 | ARGS -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${file} ${CMAKE_CURRENT_BINARY_DIR}/${file} 77 | COMMENT "Copy hoomd/${COMPONENT_NAME}/${file}" 78 | ) 79 | ENDMACRO(copy_file) 80 | 81 | set(files 82 | __init__.py 83 | integrate.py 84 | shear_function.py 85 | variant.py 86 | ) 87 | 88 | install(FILES ${files} 89 | DESTINATION ${PYTHON_MODULE_BASE_DIR}/${COMPONENT_NAME} 90 | ) 91 | 92 | foreach(file ${files}) 93 | copy_file(${file}) 94 | endforeach() 95 | 96 | add_custom_target(copy_${COMPONENT_NAME} ALL DEPENDS ${files}) 97 | 98 | if (BUILD_TESTING) 99 | add_subdirectory(test-py) 100 | endif() 101 | -------------------------------------------------------------------------------- /PSEv1/Helper.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | 54 | 55 | #include "Helper.cuh" 56 | 57 | #include "hoomd/TextureTools.h" 58 | 59 | #include 60 | 61 | #ifdef WIN32 62 | #include 63 | #else 64 | #include 65 | #endif 66 | 67 | //! command to convert floats or doubles to integers 68 | #ifdef SINGLE_PRECISION 69 | #define __scalar2int_rd __float2int_rd 70 | #else 71 | #define __scalar2int_rd __double2int_rd 72 | #endif 73 | 74 | 75 | /*! \file Helper.cu 76 | \brief Helper functions to perform additions, dot products, etc., for Mobility and Brownian 77 | */ 78 | 79 | //! Shared memory array for partial sum of dot product kernel 80 | extern __shared__ Scalar partial_sum[]; 81 | 82 | //! Zero out the force grid 83 | /*! 84 | \param grid the grid going to be zero out 85 | \param NxNyNz dimension of the grid 86 | */ 87 | __global__ 88 | void gpu_stokes_ZeroGrid_kernel(CUFFTCOMPLEX *grid, unsigned int NxNyNz) { 89 | 90 | unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; 91 | 92 | if ( tid < NxNyNz ) { 93 | 94 | grid[tid] = make_scalar2( 0.0, 0.0 ); 95 | 96 | } 97 | } 98 | 99 | /*! 100 | Linear combination helper function 101 | C = a*A + b*B 102 | C can be A or B, so that A or B will be overwritten 103 | The fourth element of Scalar4 is not changed! 104 | 105 | \param d_a input vector, A 106 | \param d_b input vector, B 107 | \param d_c output vector, C 108 | \param coeff_a scaling factor for A, a 109 | \param coeff_b scaling factor for B, b 110 | \param group_size length of vectors 111 | \param d_group_members index into vectors 112 | */ 113 | __global__ void gpu_stokes_LinearCombination_kernel( 114 | Scalar4 *d_a, 115 | Scalar4 *d_b, 116 | Scalar4 *d_c, 117 | Scalar coeff_a, 118 | Scalar coeff_b, 119 | unsigned int group_size, 120 | unsigned int *d_group_members 121 | ){ 122 | 123 | int group_idx = blockDim.x * blockIdx.x + threadIdx.x; 124 | if (group_idx < group_size) { 125 | unsigned int idx = d_group_members[group_idx]; 126 | Scalar4 A4 = d_a[idx]; 127 | Scalar4 B4 = d_b[idx]; 128 | Scalar3 A = make_scalar3(A4.x, A4.y, A4.z); 129 | Scalar3 B = make_scalar3(B4.x, B4.y, B4.z); 130 | A = coeff_a * A + coeff_b * B; 131 | d_c[idx] = make_scalar4(A.x, A.y, A.z, d_c[idx].w); 132 | } 133 | } 134 | 135 | /*! 136 | Dot product helper function: First step 137 | d_a .* d_b -> d_c -> Partial sum 138 | BlockDim of this kernel should be 2^n, which is 512. (Based on HOOMD ComputeThermoGPU class) 139 | 140 | \param d_a first vector in dot product 141 | \param d_b second vector in dot product 142 | \param dot_sum partial dot product sum 143 | \param group_size length of vectors a and b 144 | \param d_group_members index into vectors 145 | */ 146 | __global__ void gpu_stokes_DotStepOne_kernel( 147 | Scalar4 *d_a, 148 | Scalar4 *d_b, 149 | Scalar *dot_sum, 150 | unsigned int group_size, 151 | unsigned int *d_group_members 152 | ){ 153 | 154 | int group_idx = blockDim.x * blockIdx.x + threadIdx.x; 155 | Scalar temp; 156 | 157 | if (group_idx < group_size) { 158 | 159 | unsigned int idx = d_group_members[group_idx]; 160 | Scalar4 a4 = d_a[idx]; 161 | Scalar4 b4 = d_b[idx]; 162 | Scalar3 a = make_scalar3(a4.x, a4.y, a4.z); 163 | Scalar3 b = make_scalar3(b4.x, b4.y, b4.z); 164 | 165 | temp = dot(a,b); // Partial sum, each thread, shared memory 166 | 167 | } 168 | else { 169 | temp = 0; 170 | } 171 | 172 | partial_sum[threadIdx.x] = temp; 173 | 174 | __syncthreads(); 175 | 176 | int offs = blockDim.x >> 1; 177 | 178 | while (offs > 0) 179 | { 180 | if (threadIdx.x < offs) 181 | { 182 | partial_sum[threadIdx.x] += partial_sum[threadIdx.x + offs]; 183 | } 184 | offs >>= 1; 185 | __syncthreads(); 186 | } 187 | 188 | if (threadIdx.x == 0){ 189 | dot_sum[blockIdx.x] = partial_sum[0]; 190 | } 191 | } 192 | 193 | 194 | 195 | /*! 196 | Dot product helper function: Second step 197 | Partial sum -> Final sum 198 | Only one block will be launched for this step 199 | 200 | \param dot_sum partial sum from first dot product kernel 201 | \param num_partial_sums length of dot_sum array 202 | 203 | */ 204 | __global__ void gpu_stokes_DotStepTwo_kernel( 205 | Scalar *dot_sum, 206 | unsigned int num_partial_sums 207 | ){ 208 | 209 | partial_sum[threadIdx.x] = 0.0; 210 | __syncthreads(); 211 | for (unsigned int start = 0; start < num_partial_sums; start += blockDim.x) 212 | { 213 | if (start + threadIdx.x < num_partial_sums) 214 | { 215 | partial_sum[threadIdx.x] += dot_sum[start + threadIdx.x]; 216 | } 217 | } 218 | 219 | int offs = blockDim.x >> 1; 220 | while (offs > 0) 221 | { 222 | __syncthreads(); 223 | if (threadIdx.x < offs) 224 | { 225 | partial_sum[threadIdx.x] += partial_sum[threadIdx.x + offs]; 226 | } 227 | offs >>= 1; 228 | 229 | } 230 | __syncthreads(); 231 | if (threadIdx.x == 0) 232 | { 233 | dot_sum[0] = partial_sum[0]; // Save the dot product to the first element of dot_sum array 234 | } 235 | 236 | } 237 | 238 | 239 | /*! 240 | 241 | Perform matrix-vector multiply needed for the Lanczos contribution to the Brownian velocity 242 | 243 | \param d_A matrix, N x m 244 | \param d_x multiplying vector, m x 1 245 | \param d_b result vector, A*x, m x 1 246 | \param group_size number of particles 247 | \param m number of iterations ( number of columns of A, length of x ) 248 | 249 | */ 250 | 251 | __global__ void gpu_stokes_MatVecMultiply_kernel( 252 | Scalar4 *d_A, 253 | Scalar *d_x, 254 | Scalar4 *d_b, 255 | unsigned int group_size, 256 | int m 257 | ){ 258 | 259 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 260 | if (idx < group_size) { 261 | 262 | Scalar3 tempprod = make_scalar3( 0.0, 0.0, 0.0 ); 263 | 264 | for ( int ii = 0; ii < m; ++ii ){ 265 | 266 | Scalar4 matidx = d_A[ idx + ii*group_size ]; 267 | 268 | Scalar xcurr = d_x[ii]; 269 | 270 | tempprod.x = tempprod.x + matidx.x * xcurr; 271 | tempprod.y = tempprod.y + matidx.y * xcurr; 272 | tempprod.z = tempprod.z + matidx.z * xcurr; 273 | 274 | } 275 | 276 | d_b[idx] = make_scalar4( tempprod.x, tempprod.y, tempprod.z, d_A[idx].w ); 277 | 278 | } 279 | } 280 | 281 | /*! 282 | Kernel function to calculate position of each grid in reciprocal space: gridk 283 | */ 284 | __global__ 285 | void gpu_stokes_SetGridk_kernel( 286 | Scalar4 *gridk, 287 | int Nx, 288 | int Ny, 289 | int Nz, 290 | unsigned int NxNyNz, 291 | BoxDim box, 292 | Scalar xi, 293 | Scalar eta 294 | ){ 295 | 296 | int tid = blockDim.x * blockIdx.x + threadIdx.x; 297 | 298 | if ( tid < NxNyNz ) { 299 | 300 | int i = tid / (Ny*Nz); 301 | int j = (tid - i * Ny * Nz) / Nz; 302 | int k = tid % Nz; 303 | 304 | Scalar3 L = box.getL(); 305 | Scalar xy = box.getTiltFactorXY(); 306 | Scalar4 gridk_value; 307 | 308 | gridk_value.x = (i < (Nx+1) / 2) ? i : i - Nx; 309 | gridk_value.y = ( ((j < (Ny+1) / 2) ? j : j - Ny) - xy * gridk_value.x * L.y / L.x ) / L.y; // Fixed by Zsigi 2015 310 | gridk_value.x = gridk_value.x / L.x; 311 | gridk_value.z = ((k < (Nz+1) / 2) ? k : k - Nz) / L.z; 312 | 313 | gridk_value.x *= 2.0*3.1416926536; 314 | gridk_value.y *= 2.0*3.1416926536; 315 | gridk_value.z *= 2.0*3.1416926536; 316 | 317 | Scalar k2 = gridk_value.x*gridk_value.x + gridk_value.y*gridk_value.y + gridk_value.z*gridk_value.z; 318 | Scalar xisq = xi * xi; 319 | 320 | // Scaling factor used in wave space sum 321 | if (i == 0 && j == 0 && k == 0){ 322 | gridk_value.w = 0.0; 323 | } 324 | else{ 325 | // Have to divide by Nx*Ny*Nz to normalize the FFTs 326 | gridk_value.w = 6.0*3.1415926536 * (1.0 + k2/4.0/xisq) * expf( -(1-eta) * k2/4.0/xisq ) / ( k2 ) / Scalar( Nx*Ny*Nz ); 327 | } 328 | 329 | gridk[tid] = gridk_value; 330 | 331 | } 332 | } 333 | 334 | 335 | -------------------------------------------------------------------------------- /PSEv1/Helper.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Andrew Fiore 52 | 53 | /*! \file Helper.cuh 54 | \brief Declares GPU kernel code for helper functions for the Brownian and Mobility calculations. 55 | */ 56 | #include "hoomd/ParticleData.cuh" 57 | #include "hoomd/HOOMDMath.h" 58 | #include "hoomd/Index1D.h" 59 | 60 | #include 61 | 62 | //! Define the step_one kernel 63 | #ifndef __HELPER_CUH__ 64 | #define __HELPER_CUH__ 65 | 66 | //! Definition for comxplex variable storage 67 | #ifdef SINGLE_PRECISION 68 | #define CUFFTCOMPLEX cufftComplex 69 | #else 70 | #define CUFFTCOMPLEX cufftComplex 71 | #endif 72 | 73 | __global__ void gpu_stokes_ZeroGrid_kernel(CUFFTCOMPLEX *grid, unsigned int NxNyNz); 74 | 75 | __global__ void gpu_stokes_LinearCombination_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar4 *d_c, Scalar coeff_a, Scalar coeff_b, unsigned int group_size, unsigned int *d_group_members); 76 | 77 | __global__ void gpu_stokes_DotStepOne_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar *dot_sum, unsigned int group_size, unsigned int *d_group_members); 78 | 79 | __global__ void gpu_stokes_DotStepTwo_kernel(Scalar *dot_sum, unsigned int num_partial_sums); 80 | 81 | __global__ void gpu_stokes_MatVecMultiply_kernel(Scalar4 *d_A, Scalar *d_x, Scalar4 *d_b, unsigned int group_size, int m); 82 | 83 | __global__ void gpu_stokes_SetGridk_kernel(Scalar4 *gridk, int Nx, int Ny, int Nz, unsigned int NxNyNz, BoxDim box, Scalar xi, Scalar eta); 84 | 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /PSEv1/Mobility.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Andrew Fiore 52 | 53 | 54 | #include "Mobility.cuh" 55 | #include "Helper.cuh" 56 | 57 | #include "hoomd/TextureTools.h" 58 | 59 | #include 60 | 61 | #include 62 | #include 63 | 64 | #ifdef WIN32 65 | #include 66 | #else 67 | #include 68 | #endif 69 | 70 | //! command to convert floats or doubles to integers 71 | #ifdef SINGLE_PRECISION 72 | #define __scalar2int_rd __float2int_rd 73 | #else 74 | #define __scalar2int_rd __double2int_rd 75 | #endif 76 | 77 | 78 | /*! \file Mobility.cu 79 | \brief Defines GPU kernel code for Mobility calculations. 80 | */ 81 | 82 | //! Shared memory array for partial sum of dot product kernel 83 | extern __shared__ Scalar partial_sum[]; 84 | extern __shared__ Scalar4 shared_Fpos[]; 85 | 86 | //! Texture for reading table values 87 | scalar4_tex_t tables1_tex; 88 | //! Texture for reading particle positions 89 | scalar4_tex_t pos_tex; 90 | 91 | //! Spread particle quantities to the grid ( ALL PARTICLES SAME SIZE ) -- give one block per particle 92 | /*! \param d_pos positions of the particles, actually they are fetched on texture memory 93 | \param d_net_force net forces on the particles 94 | \param gridX x-component of force moments projected onto grid 95 | \param gridY y-component of force moments projected onto grid 96 | \param gridZ z-component of force moments projected onto grid 97 | \param group_size size of the group, i.e. number of particles 98 | \param Nx number of grid nodes in x direction 99 | \param Ny number of grid nodes in y direction 100 | \param Nz number of grid nodes in z direction 101 | \param d_group_members index array to global HOOMD tag on each particle 102 | \param box array containing box dimensions 103 | \param P number of grid nodes in support of spreading Gaussians 104 | \param gridh space between grid nodes in each dimension 105 | \param xi Ewald splitting parameter 106 | \param eta Spectral splitting parameter 107 | \param prefac Spreading function prefactor 108 | \param expfac Spreading function exponential factor 109 | 110 | One 3-D block of threads is launched per particle (block dimension = PxPxP). Max dimension 111 | is 10x10x10. If P > 10, each thread will do more than one grid point worth of work. 112 | 113 | */ 114 | __global__ void gpu_stokes_Spread_kernel( 115 | Scalar4 *d_pos, 116 | Scalar4 *d_net_force, 117 | CUFFTCOMPLEX *gridX, 118 | CUFFTCOMPLEX *gridY, 119 | CUFFTCOMPLEX *gridZ, 120 | int group_size, 121 | int Nx, 122 | int Ny, 123 | int Nz, 124 | unsigned int *d_group_members, 125 | BoxDim box, 126 | const int P, 127 | Scalar3 gridh, 128 | Scalar xi, 129 | Scalar eta, 130 | Scalar prefac, 131 | Scalar expfac 132 | ){ 133 | 134 | // Shared memory for particle force and position, so that each block 135 | // only has to read once 136 | __shared__ Scalar3 shared[2]; // 16 kb max 137 | 138 | Scalar3 *force_shared = shared; 139 | Scalar3 *pos_shared = &shared[1]; 140 | 141 | // Offset for the block (i.e. particle ID within group) 142 | int group_idx = blockIdx.x; 143 | 144 | // Offset for the thread (i.e. grid point ID within particle's support) 145 | int thread_offset = threadIdx.z + threadIdx.y * blockDim.z + threadIdx.x * blockDim.z*blockDim.y; 146 | 147 | // Global particle ID 148 | unsigned int idx = d_group_members[group_idx]; 149 | 150 | // Initialize shared memory and get particle position 151 | if ( thread_offset == 0 ){ 152 | Scalar4 tpos = texFetchScalar4(d_pos, pos_tex, idx); 153 | pos_shared[0].x = tpos.x; 154 | pos_shared[0].y = tpos.y; 155 | pos_shared[0].z = tpos.z; 156 | 157 | Scalar4 tforce = d_net_force[idx]; 158 | force_shared[0].x = tforce.x; 159 | force_shared[0].y = tforce.y; 160 | force_shared[0].z = tforce.z; 161 | } 162 | __syncthreads(); 163 | 164 | // Box dimension 165 | Scalar3 L = box.getL(); 166 | Scalar3 Ld2 = L / 2.0; 167 | 168 | // Retrieve position from shared memory 169 | Scalar3 pos = pos_shared[0]; 170 | Scalar3 force = force_shared[0]; 171 | 172 | // Fractional position within box 173 | Scalar3 pos_frac = box.makeFraction(pos); 174 | 175 | pos_frac.x *= (Scalar)Nx; 176 | pos_frac.y *= (Scalar)Ny; 177 | pos_frac.z *= (Scalar)Nz; 178 | 179 | // Grid index of floor of fractional position 180 | int x = int( pos_frac.x ); 181 | int y = int( pos_frac.y ); 182 | int z = int( pos_frac.z ); 183 | 184 | // Amount of work needed for each thread to cover support 185 | // (Required in case support size is larger than grid dimension, 186 | // but in most cases, should have n.x = n.y = n.z = 1 ) 187 | int3 n, t; 188 | n.x = ( P + blockDim.x - 1 ) / blockDim.x; // ceiling 189 | n.y = ( P + blockDim.y - 1 ) / blockDim.y; 190 | n.z = ( P + blockDim.z - 1 ) / blockDim.z; 191 | 192 | // Grid point associated with current thread 193 | int Pd2 = P/2; // integer division does floor 194 | 195 | for ( int ii = 0; ii < n.x; ++ii ){ 196 | 197 | t.x = threadIdx.x + ii*blockDim.x; 198 | 199 | for ( int jj = 0; jj < n.y; ++jj ){ 200 | 201 | t.y = threadIdx.y + jj*blockDim.y; 202 | 203 | for ( int kk = 0; kk < n.z; ++kk ){ 204 | 205 | t.z = threadIdx.z + kk*blockDim.z; 206 | 207 | if ( ( t.x < P ) && ( t.y < P ) && ( t.z < P ) ){ 208 | 209 | // x,y,z indices for current thread 210 | // 211 | // Arithmetic with P makes sure distribution is centered on the particle 212 | int x_inp = x + t.x - Pd2 + 1 - (P % 2) * ( pos_frac.x - Scalar( x ) < 0.5 ); 213 | int y_inp = y + t.y - Pd2 + 1 - (P % 2) * ( pos_frac.y - Scalar( y ) < 0.5 ); 214 | int z_inp = z + t.z - Pd2 + 1 - (P % 2) * ( pos_frac.z - Scalar( z ) < 0.5 ); 215 | 216 | // Periodic wrapping of grid point 217 | x_inp = (x_inp<0) ? x_inp+Nx : ( (x_inp>Nx-1) ? x_inp-Nx : x_inp ); 218 | y_inp = (y_inp<0) ? y_inp+Ny : ( (y_inp>Ny-1) ? y_inp-Ny : y_inp ); 219 | z_inp = (z_inp<0) ? z_inp+Nz : ( (z_inp>Nz-1) ? z_inp-Nz : z_inp ); 220 | 221 | // x,y,z coordinates for current thread 222 | Scalar3 pos_grid; 223 | pos_grid.x = gridh.x*x_inp - Ld2.x; 224 | pos_grid.y = gridh.y*y_inp - Ld2.y; 225 | pos_grid.z = gridh.z*z_inp - Ld2.z; 226 | 227 | // Shear the grid position 228 | // !!! This only works for linear shear where the shear gradient is along y 229 | // and the shear direction is along x 230 | pos_grid.x = pos_grid.x + box.getTiltFactorXY() * pos_grid.y; 231 | 232 | // Global index for current grid point 233 | int grid_idx = x_inp * Ny * Nz + y_inp * Nz + z_inp; 234 | 235 | // Distance from particle to grid node 236 | Scalar3 r = pos_grid - pos; 237 | r = box.minImage(r); 238 | Scalar rsq = r.x*r.x + r.y*r.y + r.z*r.z; 239 | 240 | // Magnitude of the force contribution to the current grid node 241 | Scalar3 force_inp = prefac * expf( -expfac * rsq ) * force; 242 | 243 | // Add force to the grid 244 | atomicAdd( &(gridX[grid_idx].x), force_inp.x); 245 | atomicAdd( &(gridY[grid_idx].x), force_inp.y); 246 | atomicAdd( &(gridZ[grid_idx].x), force_inp.z); 247 | }// check thread is within support 248 | }// kk 249 | }// jj 250 | }// ii 251 | 252 | } 253 | 254 | //! Compute the velocity from the force moments on the grid (Same Size Particles) 255 | // 256 | // This is the operator "B" from the paper 257 | // 258 | /*! \param gridX x-component of force moments projected onto grid 259 | \param gridY y-component of force moments projected onto grid 260 | \param gridZ z-component of force moments projected onto grid 261 | \param gridk wave vector and scaling factor associated with each reciprocal grid node 262 | \param NxNyNz total number of grid nodes 263 | */ 264 | __global__ void gpu_stokes_Green_kernel( 265 | CUFFTCOMPLEX *gridX, 266 | CUFFTCOMPLEX *gridY, 267 | CUFFTCOMPLEX *gridZ, 268 | Scalar4 *gridk, 269 | unsigned int NxNyNz 270 | ) { 271 | 272 | int tid = blockDim.x * blockIdx.x + threadIdx.x; 273 | 274 | if ( tid < NxNyNz ) { 275 | 276 | // Read the FFT force from global memory 277 | Scalar2 fX = gridX[tid]; 278 | Scalar2 fY = gridY[tid]; 279 | Scalar2 fZ = gridZ[tid]; 280 | 281 | // Current wave-space vector 282 | Scalar4 tk = gridk[tid]; 283 | Scalar ksq = tk.x*tk.x + tk.y*tk.y + tk.z*tk.z; 284 | Scalar k = sqrtf( ksq ); 285 | 286 | // Dot product of the wave-vector with the force 287 | Scalar2 kdF = (tid==0) ? make_scalar2(0.0,0.0) : make_scalar2( ( tk.x*fX.x + tk.y*fY.x + tk.z*fZ.x ) / ksq, ( tk.x*fX.y + tk.y*fY.y + tk.z*fZ.y ) / ksq ); 288 | 289 | // Scaling factor 290 | Scalar B = (tid==0) ? 0.0 : tk.w * ( sinf( k ) / k ) * ( sinf( k ) / k ); 291 | 292 | // Write the velocity to global memory 293 | gridX[tid] = make_scalar2( ( fX.x - tk.x * kdF.x ) * B, ( fX.y - tk.x * kdF.y ) * B ); 294 | gridY[tid] = make_scalar2( ( fY.x - tk.y * kdF.x ) * B, ( fY.y - tk.y * kdF.y ) * B ); 295 | gridZ[tid] = make_scalar2( ( fZ.x - tk.z * kdF.x ) * B, ( fZ.y - tk.z * kdF.y ) * B ); 296 | 297 | 298 | } 299 | } 300 | 301 | //! Add velocity from grid to particles ( Same Size Particles, Block Per Particle (support) ) 302 | /*! \param d_pos positions of the particles, actually they are fetched on texture memory 303 | \param d_net_force net forces on the particles 304 | \param d_vel particle velocity 305 | \param gridX x-component of force moments projected onto grid 306 | \param gridY y-component of force moments projected onto grid 307 | \param gridZ z-component of force moments projected onto grid 308 | \param group_size size of the group, i.e. number of particles 309 | \param Nx number of grid nodes in x direction 310 | \param Ny number of grid nodes in y direction 311 | \param Nz number of grid nodes in z direction 312 | \param xi Ewald splitting parameter 313 | \param eta Spectral splitting parameter 314 | \param d_group_members index array to global HOOMD tag on each particle 315 | \param box array containing box dimensions 316 | \param P number of grid nodes in support of spreading Gaussians 317 | \param gridh space between grid nodes in each dimension 318 | \param prefac Spreading function prefactor 319 | \param expfac Spreading function exponential factor 320 | 321 | One 3-D block of threads is launched per particle (block dimension = PxPxP). Max dimension 322 | is 10x10x10 because of shared memory limitations. If P > 10, each thread will do more 323 | than one grid point worth of work. 324 | */ 325 | __global__ void gpu_stokes_Contract_kernel( 326 | Scalar4 *d_pos, 327 | Scalar4 *d_vel, 328 | CUFFTCOMPLEX *gridX, 329 | CUFFTCOMPLEX *gridY, 330 | CUFFTCOMPLEX *gridZ, 331 | int group_size, 332 | int Nx, 333 | int Ny, 334 | int Nz, 335 | Scalar xi, 336 | Scalar eta, 337 | unsigned int *d_group_members, 338 | BoxDim box, 339 | const int P, 340 | Scalar3 gridh, 341 | Scalar prefac, 342 | Scalar expfac 343 | ){ 344 | 345 | // Shared memory for particle velocity and position, so that each block 346 | // only has to read one 347 | extern __shared__ Scalar3 shared[]; 348 | 349 | Scalar3 *velocity = shared; 350 | Scalar3 *pos_shared = &shared[blockDim.x*blockDim.y*blockDim.z]; 351 | 352 | // Particle index within each group (block per particle) 353 | int group_idx = blockIdx.x; 354 | 355 | // Thread index within the block (grid point index) 356 | int thread_offset = threadIdx.z + threadIdx.y * blockDim.z + threadIdx.x * blockDim.z*blockDim.y; 357 | 358 | // Total number of threads within the block 359 | int block_size = blockDim.x * blockDim.y * blockDim.z; 360 | 361 | // Global particle ID 362 | unsigned int idx = d_group_members[group_idx]; 363 | 364 | // Initialize shared memory and get particle position 365 | velocity[thread_offset] = make_scalar3(0.0,0.0,0.0); 366 | if ( thread_offset == 0 ){ 367 | Scalar4 tpos = texFetchScalar4(d_pos, pos_tex, idx); 368 | pos_shared[0] = make_scalar3( tpos.x, tpos.y, tpos.z ); 369 | } 370 | __syncthreads(); 371 | 372 | // Box dimension 373 | Scalar3 L = box.getL(); 374 | Scalar3 Ld2 = L / 2.0; 375 | 376 | // Retrieve position from shared memory 377 | Scalar3 pos = pos_shared[0]; 378 | 379 | // Fractional position within box 380 | Scalar3 pos_frac = box.makeFraction(pos); 381 | 382 | pos_frac.x *= (Scalar)Nx; 383 | pos_frac.y *= (Scalar)Ny; 384 | pos_frac.z *= (Scalar)Nz; 385 | 386 | int x = int( pos_frac.x ); 387 | int y = int( pos_frac.y ); 388 | int z = int( pos_frac.z ); 389 | 390 | // Amount of work needed for each thread to cover support 391 | // (Required in case support size is larger than grid dimension, 392 | // but in most cases, should have n.x = n.y = n.z = 1 ) 393 | int3 n, t; 394 | n.x = ( P + blockDim.x - 1 ) / blockDim.x; // ceiling 395 | n.y = ( P + blockDim.y - 1 ) / blockDim.y; 396 | n.z = ( P + blockDim.z - 1 ) / blockDim.z; 397 | 398 | // Grid point associated with current thread 399 | int Pd2 = P / 2; // integer division does floor 400 | 401 | for ( int ii = 0; ii < n.x; ++ii ){ 402 | 403 | t.x = threadIdx.x + ii*blockDim.x; 404 | 405 | for ( int jj = 0; jj < n.y; ++jj ){ 406 | 407 | t.y = threadIdx.y + jj*blockDim.y; 408 | 409 | for ( int kk = 0; kk < n.z; ++kk ){ 410 | 411 | t.z = threadIdx.z + kk*blockDim.z; 412 | 413 | if( ( t.x < P ) && ( t.y < P ) && ( t.z < P ) ){ 414 | 415 | // x,y,z indices for current thread 416 | // 417 | // Arithmetic with P makes sure distribution is centered on the particle 418 | int x_inp = x + t.x - Pd2 + 1 - (P % 2) * ( pos_frac.x - Scalar( x ) < 0.5 ); 419 | int y_inp = y + t.y - Pd2 + 1 - (P % 2) * ( pos_frac.y - Scalar( y ) < 0.5 ); 420 | int z_inp = z + t.z - Pd2 + 1 - (P % 2) * ( pos_frac.z - Scalar( z ) < 0.5 ); 421 | 422 | // Periodic wrapping of grid point 423 | x_inp = (x_inp<0) ? x_inp+Nx : ( (x_inp>Nx-1) ? x_inp-Nx : x_inp ); 424 | y_inp = (y_inp<0) ? y_inp+Ny : ( (y_inp>Ny-1) ? y_inp-Ny : y_inp ); 425 | z_inp = (z_inp<0) ? z_inp+Nz : ( (z_inp>Nz-1) ? z_inp-Nz : z_inp ); 426 | 427 | // x,y,z coordinates for current thread 428 | Scalar3 pos_grid; 429 | pos_grid.x = gridh.x*x_inp - Ld2.x; 430 | pos_grid.y = gridh.y*y_inp - Ld2.y; 431 | pos_grid.z = gridh.z*z_inp - Ld2.z; 432 | 433 | // Shear the grid position 434 | // !!! This only works for linear shear where the shear gradient is along y 435 | // and the shear direction is along x 436 | pos_grid.x = pos_grid.x + box.getTiltFactorXY() * pos_grid.y; 437 | 438 | // Global index for current grid point 439 | int grid_idx = x_inp * Ny * Nz + y_inp * Nz + z_inp; 440 | 441 | // Distance from particle to grid node 442 | Scalar3 r = pos_grid - pos; 443 | r = box.minImage(r); 444 | Scalar rsq = r.x*r.x + r.y*r.y + r.z*r.z; 445 | 446 | // Spreading Factor 447 | Scalar Cfac = prefac * expf( -expfac * rsq ); 448 | 449 | // Get velocity from reduction (THIS IS THE SLOW STEP): 450 | velocity[thread_offset] += Cfac * make_scalar3( gridX[grid_idx].x, gridY[grid_idx].x, gridZ[grid_idx].x ); 451 | } 452 | }//kk 453 | }//jj 454 | }//ii 455 | 456 | // Intra-block reduction for the total particle velocity 457 | // (add contributions from all grid points) 458 | int offs = block_size; 459 | int offs_prev; 460 | while (offs > 1) 461 | { 462 | offs_prev = offs; 463 | offs = ( offs + 1 ) / 2; 464 | __syncthreads(); 465 | if (thread_offset + offs < offs_prev) 466 | { 467 | velocity[thread_offset] += velocity[thread_offset + offs]; 468 | } 469 | 470 | } 471 | 472 | // Write out to global memory 473 | if (thread_offset == 0){ 474 | d_vel[idx] = make_scalar4(velocity[0].x, velocity[0].y, velocity[0].z, d_vel[idx].w); 475 | } 476 | 477 | } 478 | 479 | /*! 480 | Wrapper to drive all the kernel functions used to compute 481 | the wave space part of Mobility ( Same Size Particles ) 482 | 483 | */ 484 | /*! \param d_pos positions of the particles, actually they are fetched on texture memory 485 | \param d_vel particle velocity 486 | \param d_net_force net forces on the particles 487 | \param group_size size of the group, i.e. number of particles 488 | \param d_group_members index array to global HOOMD tag on each particle 489 | \param box array containing box dimensions 490 | \param xi Ewald splitting parameter 491 | \param eta Spectral splitting parameter 492 | \param ewald_cut Cut-off distance for real-space interaction 493 | \param ewald_dr Distance spacing using in computing the pre-tabulated tables 494 | \param ewald_n Number of entries in the Ewald tables 495 | \param d_ewaldC Pre-tabulated form of the real-space Ewald sum for the Velocity-Force coupling 496 | \param d_gridX x-component of force moments projected onto grid 497 | \param d_gridY y-component of force moments projected onto grid 498 | \param d_gridZ z-component of force moments projected onto grid 499 | \param d_gridk wave vector and scaling factor associated with each reciprocal grid node 500 | \param plan Plan for cufft 501 | \param Nx Number of grid/FFT nodes in x-direction 502 | \param Ny Number of grid/FFT nodes in y-direction 503 | \param Nz Number of grid/FFT nodes in z-direction 504 | \param d_n_neigh list containing number of neighbors for each particle 505 | \param d_nlist list containing neighbors of each particle 506 | \param nli index into nlist 507 | \param NxNyNz total number of grid/FFT nodes 508 | \param grid block grid to use when launching kernels 509 | \param threads number of threads per block for kernels 510 | \param gridBlockSize number of threads per block 511 | \param gridNBlock number of blocks 512 | \param P number of nodes in support of each gaussian for k-space sum 513 | \param gridh distance between grid nodes 514 | */ 515 | void gpu_stokes_Mwave_wrap( 516 | Scalar4 *d_pos, 517 | Scalar4 *d_vel, 518 | Scalar4 *d_net_force, 519 | unsigned int *d_group_members, 520 | unsigned int group_size, 521 | const BoxDim& box, 522 | Scalar xi, 523 | Scalar eta, 524 | Scalar4 *d_gridk, 525 | CUFFTCOMPLEX *d_gridX, 526 | CUFFTCOMPLEX *d_gridY, 527 | CUFFTCOMPLEX *d_gridZ, 528 | cufftHandle plan, 529 | const int Nx, 530 | const int Ny, 531 | const int Nz, 532 | unsigned int NxNyNz, 533 | dim3 grid, 534 | dim3 threads, 535 | int gridBlockSize, 536 | int gridNBlock, 537 | const int P, 538 | Scalar3 gridh 539 | ){ 540 | 541 | // Spreading and contraction stuff 542 | dim3 Cgrid( group_size, 1, 1); 543 | int B = ( P < 10 ) ? P : 10; 544 | dim3 Cthreads(B, B, B); 545 | 546 | Scalar quadW = gridh.x * gridh.y * gridh.z; 547 | Scalar xisq = xi * xi; 548 | Scalar prefac = ( 2.0 * xisq / 3.1415926536 / eta ) * sqrtf( 2.0 * xisq / 3.1415926536 / eta ); 549 | Scalar expfac = 2.0 * xisq / eta; 550 | 551 | // Reset the grid ( remove any previously distributed forces ) 552 | gpu_stokes_ZeroGrid_kernel<<>>(d_gridX,NxNyNz); 553 | gpu_stokes_ZeroGrid_kernel<<>>(d_gridY,NxNyNz); 554 | gpu_stokes_ZeroGrid_kernel<<>>(d_gridZ,NxNyNz); 555 | 556 | // Spread forces onto grid 557 | gpu_stokes_Spread_kernel<<>>( d_pos, d_net_force, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, d_group_members, box, P, gridh, xi, eta, prefac, expfac ); 558 | 559 | // Perform FFT on gridded forces 560 | cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_FORWARD); 561 | cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_FORWARD); 562 | cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_FORWARD); 563 | 564 | // Apply wave space scaling to FFT'd forces 565 | gpu_stokes_Green_kernel<<>>( d_gridX, d_gridY, d_gridZ, d_gridk, NxNyNz); 566 | 567 | // Return rescaled forces to real space 568 | cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_INVERSE); 569 | cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_INVERSE); 570 | cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_INVERSE); 571 | 572 | // Evaluate contribution of grid velocities at particle centers 573 | gpu_stokes_Contract_kernel<<>>( d_pos, d_vel, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, xi, eta, d_group_members, box, P, gridh, quadW*prefac, expfac ); 574 | 575 | } 576 | 577 | // Add real space Ewald summation to velocity of each particle 578 | // NLIST Method 579 | /*! \param d_pos positions of the particles, actually they are fetched on texture memory 580 | \param d_vel particle velocity 581 | \param d_net_force net forces on the particles 582 | \param group_size size of the group, i.e. number of particles 583 | \param xi Ewald splitting parameter 584 | \param d_ewaldC Pre-tabulated form of the real-space Ewald sum for the Velocity-Force coupling 585 | \param ewald_cut Cut-off distance for real-space interaction 586 | \param ewald_n Number of entries in the Ewald tables 587 | \param ewald_dr Distance spacing using in computing the pre-tabulated tables 588 | \param d_group_members index array to global HOOMD tag on each particle 589 | \param box array containing box dimensions 590 | \param d_n_neigh list containing number of neighbors for each particle 591 | \param d_nlist list containing neighbors of all particles 592 | \param d_headlist list of particle offsets into d_nlist 593 | */ 594 | __global__ void gpu_stokes_Mreal_kernel( 595 | Scalar4 *d_pos, 596 | Scalar4 *d_vel, 597 | Scalar4 *d_net_force, 598 | int group_size, 599 | Scalar xi, 600 | Scalar4 *d_ewaldC1, 601 | Scalar self, 602 | Scalar ewald_cut, 603 | int ewald_n, 604 | Scalar ewald_dr, 605 | unsigned int *d_group_members, 606 | BoxDim box, 607 | const unsigned int *d_n_neigh, 608 | const unsigned int *d_nlist, 609 | const unsigned int *d_headlist 610 | ){ 611 | 612 | // Index for current thread 613 | int group_idx = blockDim.x * blockIdx.x + threadIdx.x; 614 | 615 | // Initialize contribution to velocity 616 | Scalar4 u = make_scalar4( 0.0, 0.0, 0.0, 0.0 ); 617 | 618 | if (group_idx < group_size) { 619 | 620 | // Particle for this thread 621 | unsigned int idx = d_group_members[group_idx]; 622 | 623 | // Number of neighbors for current particle 624 | unsigned int n_neigh = d_n_neigh[idx]; 625 | unsigned int head_idx = d_headlist[idx]; 626 | 627 | // Particle position and table ID 628 | Scalar4 posi = texFetchScalar4(d_pos, pos_tex, idx); 629 | 630 | // Self contribution 631 | Scalar4 F = d_net_force[idx]; 632 | u = make_scalar4( self * F.x, self * F.y, self * F.z, 0.0 ); 633 | 634 | // Minimum and maximum distance for pair calculation 635 | Scalar mindistSq = ewald_dr * ewald_dr; 636 | Scalar maxdistSq = ewald_cut * ewald_cut; 637 | 638 | for (int neigh_idx = 0; neigh_idx < n_neigh; neigh_idx++) { 639 | 640 | // Get index for current neightbor 641 | unsigned int cur_j = d_nlist[ head_idx + neigh_idx ]; 642 | 643 | // Position and size of neighbor particle 644 | Scalar4 posj = texFetchScalar4(d_pos, pos_tex, cur_j); 645 | 646 | // Distance vector between current particle and neighbor 647 | Scalar3 r = make_scalar3( posi.x - posj.x, posi.y - posj.y, posi.z - posj.z ); 648 | r = box.minImage(r); 649 | Scalar distSqr = dot(r,r); 650 | 651 | // Add neighbor contribution if it is within the real space cutoff radius 652 | if ( ( distSqr < maxdistSq ) && ( distSqr >= mindistSq ) ) { 653 | 654 | // Need distance 655 | Scalar dist = sqrtf( distSqr ); 656 | 657 | // Force on neighbor particle 658 | Scalar4 Fj = d_net_force[cur_j]; 659 | 660 | // Fetch relevant elements from textured table for real space interaction 661 | int r_ind = __scalar2int_rd( ewald_n * ( dist - ewald_dr ) / ( ewald_cut - ewald_dr ) ); 662 | int offset = r_ind; 663 | 664 | Scalar4 tewaldC1 = texFetchScalar4(d_ewaldC1, tables1_tex, offset); 665 | 666 | // Linear interpolation of table 667 | Scalar fac = dist / ewald_dr - r_ind - Scalar(1.0); 668 | 669 | Scalar Imrr = tewaldC1.x + ( tewaldC1.z - tewaldC1.x ) * fac; 670 | Scalar rr = tewaldC1.y + ( tewaldC1.w - tewaldC1.y ) * fac; 671 | 672 | // Update velocity 673 | Scalar rdotf = ( r.x*Fj.x + r.y*Fj.y + r.z*Fj.z ) / distSqr; 674 | 675 | u.x += Imrr * Fj.x + ( rr - Imrr ) * rdotf * r.x; 676 | u.y += Imrr * Fj.y + ( rr - Imrr ) * rdotf * r.y; 677 | u.z += Imrr * Fj.z + ( rr - Imrr ) * rdotf * r.z; 678 | 679 | } 680 | 681 | } 682 | 683 | // Write to output 684 | d_vel[idx] = u; 685 | 686 | } 687 | } 688 | 689 | 690 | 691 | /*! 692 | Wrap all the functions to compute U = M * F ( SAME SIZE PARTICLES ) 693 | Drive GPU kernel functions 694 | 695 | d_vel = M * d_net_force 696 | 697 | */ 698 | /*! \param d_pos positions of the particles, actually they are fetched on texture memory 699 | \param d_vel particle velocity 700 | \param d_net_force net forces on the particles 701 | \param group_size size of the group, i.e. number of particles 702 | \param d_group_members index array to global HOOMD tag on each particle 703 | \param box array containing box dimensions 704 | \param xi Ewald splitting parameter 705 | \param eta Spectral splitting parameter 706 | \param ewald_cut Cut-off distance for real-space interaction 707 | \param ewald_dr Distance spacing using in computing the pre-tabulated tables 708 | \param ewald_n Number of entries in the Ewald tables 709 | \param d_ewaldC Pre-tabulated form of the real-space Ewald sum for the Velocity-Force coupling 710 | \param d_gridX x-component of force moments projected onto grid 711 | \param d_gridY y-component of force moments projected onto grid 712 | \param d_gridZ z-component of force moments projected onto grid 713 | \param d_gridk wave vector and scaling factor associated with each reciprocal grid node 714 | \param plan Plan for cufft 715 | \param Nx Number of grid/FFT nodes in x-direction 716 | \param Ny Number of grid/FFT nodes in y-direction 717 | \param Nz Number of grid/FFT nodes in z-direction 718 | \param d_n_neigh list containing number of neighbors for each particle 719 | \param d_nlist list containing neighbors of each particle 720 | \param nli index into nlist 721 | \param NxNyNz total number of grid/FFT nodes 722 | \param grid block grid to use when launching kernels 723 | \param threads number of threads per block for kernels 724 | \param gridBlockSize number of threads per block 725 | \param gridNBlock number of blocks 726 | \param P number of nodes in support of each gaussian for k-space sum 727 | \param gridh distance between grid nodes 728 | */ 729 | void gpu_stokes_Mobility_wrap( 730 | Scalar4 *d_pos, 731 | Scalar4 *d_vel, 732 | Scalar4 *d_net_force, 733 | unsigned int *d_group_members, 734 | unsigned int group_size, 735 | const BoxDim& box, 736 | Scalar xi, 737 | Scalar eta, 738 | Scalar ewald_cut, 739 | Scalar ewald_dr, 740 | int ewald_n, 741 | Scalar4 *d_ewaldC1, 742 | Scalar self, 743 | Scalar4 *d_gridk, 744 | CUFFTCOMPLEX *d_gridX, 745 | CUFFTCOMPLEX *d_gridY, 746 | CUFFTCOMPLEX *d_gridZ, 747 | cufftHandle plan, 748 | const int Nx, 749 | const int Ny, 750 | const int Nz, 751 | const unsigned int *d_n_neigh, 752 | const unsigned int *d_nlist, 753 | const unsigned int *d_headlist, 754 | unsigned int NxNyNz, 755 | dim3 grid, 756 | dim3 threads, 757 | int gridBlockSize, 758 | int gridNBlock, 759 | const int P, 760 | Scalar3 gridh ){ 761 | 762 | // Real and wave space velocity 763 | Scalar4 *d_vel1, *d_vel2; 764 | cudaMalloc( &d_vel1, group_size*sizeof(Scalar4) ); 765 | cudaMalloc( &d_vel2, group_size*sizeof(Scalar4) ); 766 | 767 | // Add the wave space contribution to the velocity 768 | gpu_stokes_Mwave_wrap( d_pos, d_vel1, d_net_force, d_group_members, group_size, box, xi, eta, d_gridk, d_gridX, d_gridY, d_gridZ, plan, Nx, Ny, Nz, NxNyNz, grid, threads, gridBlockSize, gridNBlock, P, gridh ); 769 | 770 | // Add the real space contribution to the velocity 771 | // 772 | // Real space calculation takes care of self contributions 773 | gpu_stokes_Mreal_kernel<<>>(d_pos, d_vel2, d_net_force, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist ); 774 | 775 | // Add real and wave space parts together 776 | gpu_stokes_LinearCombination_kernel<<>>(d_vel1, d_vel2, d_vel, 1.0, 1.0, group_size, d_group_members); 777 | 778 | // Free memory 779 | cudaFree(d_vel1); 780 | cudaFree(d_vel2); 781 | 782 | } 783 | 784 | 785 | -------------------------------------------------------------------------------- /PSEv1/Mobility.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Andrew Fiore 52 | 53 | /*! \file Stokes.cuh 54 | \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes. 55 | */ 56 | #include "hoomd/ParticleData.cuh" 57 | #include "hoomd/HOOMDMath.h" 58 | #include "hoomd/Index1D.h" 59 | #include 60 | 61 | //! Define the step_one kernel 62 | #ifndef __MOBILITYSAMESIZE_CUH__ 63 | #define __MOBILITYSAMESIZE_CUH__ 64 | 65 | //! Definition for comxplex variable storage 66 | #ifdef SINGLE_PRECISION 67 | #define CUFFTCOMPLEX cufftComplex 68 | #else 69 | #define CUFFTCOMPLEX cufftComplex 70 | #endif 71 | 72 | 73 | void gpu_stokes_Mobility_wrap( Scalar4 *d_pos, 74 | Scalar4 *d_vel, 75 | Scalar4 *d_net_force, 76 | unsigned int *d_group_members, 77 | unsigned int group_size, 78 | const BoxDim& box, 79 | Scalar xi, 80 | Scalar eta, 81 | Scalar ewald_cut, 82 | Scalar ewald_dr, 83 | int ewald_n, 84 | Scalar4 *d_ewaldC1, 85 | Scalar self, 86 | Scalar4 *d_gridk, 87 | CUFFTCOMPLEX *d_gridX, 88 | CUFFTCOMPLEX *d_gridY, 89 | CUFFTCOMPLEX *d_gridZ, 90 | cufftHandle plan, 91 | const int Nx, 92 | const int Ny, 93 | const int Nz, 94 | const unsigned int *d_n_neigh, 95 | const unsigned int *d_nlist, 96 | const unsigned int *d_headlist, 97 | unsigned int NxNyNz, 98 | dim3 grid, 99 | dim3 threads, 100 | int gridBlockSize, 101 | int gridNBlock, 102 | const int P, 103 | Scalar3 gridh ); 104 | 105 | __global__ 106 | void gpu_stokes_Mreal_kernel( Scalar4 *d_pos, 107 | Scalar4 *d_vel, 108 | Scalar4 *d_net_force, 109 | int group_size, 110 | Scalar xi, 111 | Scalar4 *d_ewaldC1, 112 | Scalar self, 113 | Scalar ewald_cut, 114 | int ewald_n, 115 | Scalar ewald_dr, 116 | unsigned int *d_group_members, 117 | BoxDim box, 118 | const unsigned int *d_n_neigh, 119 | const unsigned int *d_nlist, 120 | const unsigned int *d_headlist ); 121 | 122 | __global__ void gpu_stokes_Spread_kernel( Scalar4 *d_pos, 123 | Scalar4 *d_net_force, 124 | CUFFTCOMPLEX *gridX, 125 | CUFFTCOMPLEX *gridY, 126 | CUFFTCOMPLEX *gridZ, 127 | int group_size, 128 | int Nx, 129 | int Ny, 130 | int Nz, 131 | unsigned int *d_group_members, 132 | BoxDim box, 133 | const int P, 134 | Scalar3 gridh, 135 | Scalar xi, 136 | Scalar eta, 137 | Scalar prefac, 138 | Scalar expfac ); 139 | 140 | __global__ void gpu_stokes_Green_kernel(CUFFTCOMPLEX *gridX, CUFFTCOMPLEX *gridY, CUFFTCOMPLEX *gridZ, Scalar4 *gridk, unsigned int NxNyNz); 141 | 142 | __global__ void gpu_stokes_Contract_kernel( Scalar4 *d_pos, 143 | Scalar4 *d_vel, 144 | CUFFTCOMPLEX *gridX, 145 | CUFFTCOMPLEX *gridY, 146 | CUFFTCOMPLEX *gridZ, 147 | int group_size, 148 | int Nx, 149 | int Ny, 150 | int Nz, 151 | Scalar xi, 152 | Scalar eta, 153 | unsigned int *d_group_members, 154 | BoxDim box, 155 | const int P, 156 | Scalar3 gridh, 157 | Scalar prefac, 158 | Scalar expfac ); 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /PSEv1/ShearFunction.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore 3 | 4 | /*! \file ShearFunction.cc 5 | \brief Defines ShearFunction class and relevant functions 6 | */ 7 | 8 | #ifdef WIN32 9 | #pragma warning( push ) 10 | #pragma warning( disable : 4103 4244 ) 11 | #endif 12 | 13 | #include "ShearFunction.h" 14 | 15 | using namespace std; 16 | 17 | void export_ShearFunction(pybind11::module& m) 18 | { 19 | 20 | pybind11::class_ >( m, "ShearFunction" ) 21 | .def(pybind11::init< >()) 22 | .def("getShearRate", &ShearFunction::getShearRate) 23 | .def("getStrain", &ShearFunction::getStrain) 24 | .def("getOffset", &ShearFunction::getOffset); 25 | 26 | } 27 | 28 | #ifdef WIN32 29 | #pragma warning( pop ) 30 | #endif 31 | -------------------------------------------------------------------------------- /PSEv1/ShearFunction.h: -------------------------------------------------------------------------------- 1 | #ifdef NVCC 2 | #error This header cannot be compiled by nvcc 3 | #endif 4 | 5 | #include 6 | 7 | #ifndef __SHEAR_FUNCTION_H__ 8 | #define __SHEAR_FUNCTION_H__ 9 | 10 | #include 11 | 12 | //! Abstract class representing the function of shear rate and shear strain 13 | /*! ShearFunction class, having three public pure virtual functions: 14 | 1) getShearRate; 2) getStrain; and 3) getOffset 15 | This interface can make it easier to add new shear functionality to HOOMD. 16 | Compared with previous approach, we can simply subclass this interface without 17 | changing any existing code or creating a new plugin. 18 | */ 19 | class ShearFunction 20 | { 21 | public: 22 | 23 | //! Get shear rate at certain timestep 24 | /*! \param timestep the timestep 25 | */ 26 | virtual double getShearRate(unsigned int timestep){ return double(0.0); } 27 | 28 | //! Get strain at certain timestep (unwrapped) 29 | /*! \param timestep the timestep 30 | */ 31 | virtual double getStrain(unsigned int timestep){ return double(0.0); } 32 | 33 | //! Get the offset of timestep (typically offset is the timestep when the shear starts) 34 | virtual unsigned int getOffset(){ return int(0); } 35 | 36 | }; 37 | 38 | //! Export the ShearFunction class to python 39 | void export_ShearFunction(pybind11::module& m); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /PSEv1/ShearFunctionWrap.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore 3 | 4 | /*! \file ShearFunction.cc 5 | \brief Defines ShearFunction class and relevant functions 6 | */ 7 | 8 | #ifdef WIN32 9 | #pragma warning( push ) 10 | #pragma warning( disable : 4103 4244 ) 11 | #endif 12 | 13 | #include "ShearFunctionWrap.h" 14 | 15 | using namespace std; 16 | 17 | void export_ShearFunctionWrap(pybind11::module& m) 18 | { 19 | 20 | pybind11::class_ >( m, "ShearFunctionWrap", pybind11::base() ) 21 | .def(pybind11::init< >()) 22 | .def("getShearRate", &ShearFunction::getShearRate) 23 | .def("getStrain", &ShearFunction::getStrain) 24 | .def("getOffset", &ShearFunction::getOffset); 25 | 26 | } 27 | 28 | #ifdef WIN32 29 | #pragma warning( pop ) 30 | #endif 31 | -------------------------------------------------------------------------------- /PSEv1/ShearFunctionWrap.h: -------------------------------------------------------------------------------- 1 | #ifdef NVCC 2 | #error This header cannot be compiled by nvcc 3 | #endif 4 | 5 | #include 6 | 7 | #ifndef __SHEAR_FUNCTION_WRAP_H__ 8 | #define __SHEAR_FUNCTION_WRAP_H__ 9 | 10 | #include "ShearFunction.h" 11 | 12 | #include 13 | 14 | //! Abstract class representing the function of shear rate and shear strain 15 | /*! ShearFunction class, having three public pure virtual functions: 16 | 1) getShearRate; 2) getStrain; and 3) getOffset 17 | This interface can make it easier to add new shear functionality to HOOMD. 18 | Compared with previous approach, we can simply subclass this interface without 19 | changing any existing code or creating a new plugin. 20 | */ 21 | class ShearFunctionWrap : public ShearFunction 22 | { 23 | public: 24 | 25 | //! Get shear rate at certain timestep 26 | /*! \param timestep the timestep 27 | */ 28 | virtual double getShearRate(unsigned int timestep){ return double(0.0); } 29 | 30 | //! Get strain at certain timestep (unwrapped) 31 | /*! \param timestep the timestep 32 | */ 33 | virtual double getStrain(unsigned int timestep){ return double(0.0); } 34 | 35 | //! Get the offset of timestep (typically offset is the timestep when the shear starts) 36 | virtual unsigned int getOffset(){ return int(0); } 37 | 38 | }; 39 | 40 | 41 | void export_ShearFunctionWrap(pybind11::module& m); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /PSEv1/SpecificShearFunction.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore 3 | 4 | /*! \file ShearFunction.cc 5 | \brief Defines ShearFunction class and relevant functions 6 | */ 7 | 8 | #ifdef WIN32 9 | #pragma warning( push ) 10 | #pragma warning( disable : 4103 4244 ) 11 | #endif 12 | 13 | #include "SpecificShearFunction.h" 14 | 15 | using namespace std; 16 | 17 | void export_SpecificShearFunction(pybind11::module& m) 18 | { 19 | 20 | pybind11::class_ >( m, "SinShearFunction", pybind11::base()) 21 | .def(pybind11::init< double, double, unsigned int, double >()); 22 | 23 | pybind11::class_ > (m, "SteadyShearFunction", pybind11::base()) 24 | .def(pybind11::init< double, unsigned int, double >()); 25 | 26 | pybind11::class_ >(m, "ChirpShearFunction", pybind11::base()) 27 | .def(pybind11::init< double, double, double, double, unsigned int, double >()); 28 | 29 | pybind11::class_ >( m, "TukeyWindowFunction", pybind11::base()) 30 | .def(pybind11::init< double, double, unsigned int, double >()); 31 | 32 | pybind11::class_ >(m, "WindowedFunction", pybind11::base()) 33 | .def(pybind11::init< std::shared_ptr, std::shared_ptr >()); 34 | } 35 | 36 | #ifdef WIN32 37 | #pragma warning( pop ) 38 | #endif 39 | -------------------------------------------------------------------------------- /PSEv1/SpecificShearFunction.h: -------------------------------------------------------------------------------- 1 | #ifdef NVCC 2 | #error This header cannot be compiled by nvcc 3 | #endif 4 | 5 | #include 6 | 7 | #include "ShearFunction.h" 8 | 9 | #ifndef __SPECIFIC_SHEAR_FUNCTION_H__ 10 | #define __SPECIFIC_SHEAR_FUNCTION_H__ 11 | 12 | #include 13 | 14 | 15 | //! Simple sinusoidal shear implementing the abstract class ShearFunction 16 | class SinShearFunction : public ShearFunction 17 | { 18 | public: 19 | //! Constructor of SinShearFunction class 20 | /*! \param max_shear_rate maximum shear rate 21 | \param frequency real (NOT angular) frequency of oscillatory shear 22 | \param offset the offset of oscillatory shear 23 | \param dt the time interval 24 | */ 25 | SinShearFunction(double max_shear_rate, double frequency, unsigned int offset, double dt) : 26 | ShearFunction(), 27 | m_max_shear_rate(max_shear_rate), 28 | m_frequency(frequency), 29 | m_offset(offset), 30 | m_dt(dt) { } 31 | double getShearRate(unsigned int timestep) { 32 | return m_max_shear_rate * cos( m_frequency * 2 * m_pi * ( (timestep - m_offset) * m_dt ) ); 33 | } 34 | double getStrain(unsigned int timestep) { 35 | return m_max_shear_rate * sin( m_frequency * 2 * m_pi * ( (timestep - m_offset) * m_dt ) ) / m_frequency / 2 / m_pi; 36 | } 37 | unsigned int getOffset() { 38 | return m_offset; 39 | } 40 | private: 41 | const double m_max_shear_rate; //!< maximum shear rate 42 | const double m_frequency; //!< Real frequency, not angular frequency 43 | const unsigned int m_offset; //!< offset of the sinusoidal oscillatory shear 44 | const double m_dt; //!< time step 45 | static constexpr double m_pi = 3.1415926536; 46 | }; 47 | 48 | //! Simple steady shear implementing the abstract class ShearFunction 49 | class SteadyShearFunction : public ShearFunction 50 | { 51 | public: 52 | //! Constructor of SteadyShearFunction 53 | /*! \param shear_rate the shear rate 54 | \param offset the offset of the steady shear 55 | \param the time interval between each timestep 56 | */ 57 | SteadyShearFunction(double shear_rate, unsigned int offset, double dt) : 58 | ShearFunction(), 59 | m_shear_rate(shear_rate), 60 | m_offset(offset), 61 | m_dt(dt) { } 62 | double getShearRate(unsigned int timestep) { 63 | return m_shear_rate; 64 | } 65 | double getStrain(unsigned int timestep) { 66 | return m_shear_rate * (timestep - m_offset) * m_dt; 67 | } 68 | unsigned int getOffset() { 69 | return m_offset; 70 | } 71 | private: 72 | const double m_shear_rate; //!< constant shear rate 73 | const unsigned int m_offset; //!< offset of the steady shear 74 | const double m_dt; //!< time step 75 | }; 76 | 77 | //! Chirp oscillatory shear implementing abstract class ShearFunction 78 | /*! Adjusted from code of Zsigmond Varga, plugin PSEv1a_chirpv2 79 | */ 80 | class ChirpShearFunction : public ShearFunction 81 | { 82 | public: 83 | //! Constructor of ChirpShearFunction class 84 | /*! \param amp the strain amplitude of the chirp shear 85 | \param omega_0 the starting ANGULAR frequency of the shear 86 | \param omega_f the ending ANGULAR frequency of the shear 87 | \param periodT the total time of the chirp run 88 | \param offset the offset of the chirp return 89 | \param dt the time interval between each timestep 90 | */ 91 | ChirpShearFunction(double amp, double omega_0, double omega_f, double periodT, unsigned int offset, double dt) : 92 | ShearFunction(), 93 | m_amp(amp), 94 | m_omega_0(omega_0), 95 | m_omega_f(omega_f), 96 | m_periodT(periodT), 97 | m_offset(offset), 98 | m_dt(dt) { } 99 | double getShearRate(unsigned int timestep) { 100 | double current_omega = getCurrentOmega(timestep); 101 | double current_phase = getCurrentPhase(timestep); 102 | return m_amp * current_omega * cos(current_phase); 103 | } 104 | double getStrain(unsigned int timestep) { 105 | double current_phase = getCurrentPhase(timestep); 106 | return m_amp * sin( current_phase ); 107 | } 108 | unsigned int getOffset() { 109 | return m_offset; 110 | } 111 | private: 112 | double getCurrentOmega(unsigned int timestep) { 113 | return m_omega_0 * exp( m_dt * (timestep - m_offset) * logf(m_omega_f / m_omega_0) / m_periodT ); 114 | } 115 | double getCurrentPhase(unsigned int timestep) { 116 | return m_periodT * m_omega_0 / logf( m_omega_f / m_omega_0 ) * ( exp( m_dt * (timestep - m_offset) * logf(m_omega_f / m_omega_0) / m_periodT ) - 1 ); 117 | } 118 | const double m_amp; //!< Amplitude 119 | const double m_omega_0; //!< Minimum angular frequency 120 | const double m_omega_f; //!< Maximum angular frequency 121 | const double m_periodT; //!< Final time of Chirp 122 | const unsigned int m_offset; //!< offset of the shear 123 | const double m_dt; //!< time step 124 | }; 125 | 126 | 127 | //! Tukey Window Function implementing abstract class ShearFunction 128 | /*! Strictly speaking, this function is not a ShearFunction since it will only be 129 | used as a window function and applied to other ShearFunctions. This class should 130 | never be used by itself. However, since ShearFunction provides all the abstract 131 | functions it needs. We will call this a ShearFunction to avoid duplicate base classes 132 | TODO: Change the names of ShearFunction/getShearRate/getStrain to more general descriptions. 133 | */ 134 | class TukeyWindowFunction : public ShearFunction 135 | { 136 | public: 137 | //! Constructor of TukeyWindowFunction class 138 | /*! \param periodT the total time of the window 139 | \param tukey_param the parameter of Tukey window function, must be within (0, 1] 140 | \param offset the offset of the window 141 | \param dt the time interval between each timestep 142 | */ 143 | TukeyWindowFunction(double periodT, double tukey_param, unsigned int offset, double dt) : 144 | ShearFunction(), 145 | m_periodT(periodT), 146 | m_tukey_param(tukey_param), 147 | m_offset(offset), 148 | m_dt(dt) { 149 | m_omega_value = 2 * m_pi / tukey_param; 150 | } 151 | double getShearRate(unsigned int timestep) { 152 | double rel_time = (timestep - m_offset) * m_dt / m_periodT; // supposed to be within [0,1] 153 | if (rel_time <= 0 || rel_time >= 1) { 154 | return 0; 155 | } 156 | else if (rel_time >= m_tukey_param / 2 && rel_time <= 1 - m_tukey_param / 2) { 157 | return 0; 158 | } 159 | else if (rel_time < 0.5) { 160 | return -( sin( m_omega_value * (rel_time - m_tukey_param / 2) ) ) / 2 * m_omega_value / m_periodT; 161 | } 162 | else { 163 | return -( sin( m_omega_value * (rel_time - 1 + m_tukey_param / 2) ) ) / 2 * m_omega_value / m_periodT; 164 | } 165 | } 166 | double getStrain(unsigned int timestep) { 167 | double rel_time = (timestep - m_offset) * m_dt / m_periodT; // supposed to be within [0,1] 168 | if (rel_time <= 0 || rel_time >= 1) { 169 | return 0; 170 | } 171 | else if (rel_time >= m_tukey_param / 2 && rel_time <= 1 - m_tukey_param / 2) { 172 | return 1; 173 | } 174 | else if (rel_time < 0.5) { 175 | return ( 1 + cos( m_omega_value * (rel_time - m_tukey_param / 2) ) ) / 2; 176 | } 177 | else { 178 | return ( 1 + cos( m_omega_value * (rel_time - 1 + m_tukey_param / 2) ) ) / 2; 179 | } 180 | } 181 | unsigned int getOffset() { 182 | return m_offset; 183 | } 184 | private: 185 | const double m_periodT; //!< The time period of the window 186 | const double m_tukey_param; //!< The parameter of Tukey window function (scales the cosine lobe) 187 | const unsigned int m_offset; //!< offset of the window function 188 | const double m_dt; //!< time step 189 | static constexpr double m_pi = 3.1415926536; 190 | double m_omega_value; //!< omega value of the cosine function 191 | }; 192 | 193 | 194 | //! Windowed ShearFunction: A ShearFunction windowed by a window function (which is also a ShearFunction subclass) 195 | /*! WindowedFunction represents a strain field whose strain is the product of original ShearFunction and the window 196 | function. Therefore, the shear rate satisfies the product rule of derivative. 197 | */ 198 | class WindowedFunction : public ShearFunction 199 | { 200 | public: 201 | //! Constructor of WindowedFunction class 202 | /*! It is recommended to use the same offset for base shear function and window function 203 | \param base_shear_func the base shear function 204 | \param window_func the window function 205 | */ 206 | WindowedFunction(std::shared_ptr base_shear_func, std::shared_ptr window_func) : 207 | ShearFunction(), 208 | m_base_shear_func(base_shear_func), 209 | m_window_func(window_func) { } 210 | double getShearRate(unsigned int timestep) { 211 | return ( m_base_shear_func -> getShearRate(timestep) ) * ( m_window_func -> getStrain(timestep) ) + 212 | ( m_base_shear_func -> getStrain(timestep) ) * ( m_window_func -> getShearRate(timestep) ); 213 | } 214 | double getStrain(unsigned int timestep) { 215 | return ( m_base_shear_func -> getStrain(timestep) ) * ( m_window_func -> getStrain(timestep) ); 216 | } 217 | unsigned int getOffset() { 218 | return m_base_shear_func -> getOffset(); 219 | } 220 | private: 221 | const std::shared_ptr m_base_shear_func; //!< Base shear function 222 | const std::shared_ptr m_window_func; //!< Window function 223 | }; 224 | 225 | 226 | void export_SpecificShearFunction(pybind11::module& m); 227 | 228 | #endif 229 | -------------------------------------------------------------------------------- /PSEv1/Stokes.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | 54 | #ifdef WIN32 55 | #pragma warning( push ) 56 | #pragma warning( disable : 4244 ) 57 | #endif 58 | 59 | using namespace std; 60 | 61 | #include 62 | #include 63 | 64 | #include "Stokes.h" 65 | #include "Stokes.cuh" 66 | 67 | #include 68 | #include 69 | #include 70 | 71 | /*! \file Stokes.cc 72 | \brief Contains code for the Stokes class 73 | */ 74 | 75 | /*! 76 | \param sysdef SystemDefinition this method will act on. Must not be NULL. 77 | \param group The group of particles this integration method is to work on 78 | \param T temperature 79 | \param seed seed for random number generator 80 | \param nlist neighbor list 81 | \param xi Ewald parameter 82 | \param m_error Tolerance for all calculations 83 | 84 | */ 85 | Stokes::Stokes( std::shared_ptr sysdef, 86 | std::shared_ptr group, 87 | std::shared_ptr T, 88 | unsigned int seed, 89 | std::shared_ptr nlist, 90 | Scalar xi, 91 | Scalar error ) 92 | : IntegrationMethodTwoStep(sysdef, group), 93 | m_T(T), 94 | m_seed(seed), 95 | m_nlist(nlist), 96 | m_xi(xi), 97 | m_error(error) 98 | { 99 | m_exec_conf->msg->notice(5) << "Constructing Stokes" << endl; 100 | 101 | // Hash the User's Seed to make it less likely to be a low positive integer 102 | m_seed = m_seed * 0x12345677 + 0x12345; m_seed ^= (m_seed >> 16); m_seed *= 0x45679; 103 | 104 | // only one GPU is supported 105 | if (!m_exec_conf->isCUDAEnabled()) 106 | { 107 | m_exec_conf->msg->error() << "Creating a Stokes when CUDA is disabled" << endl; 108 | throw std::runtime_error("Error initializing Stokes"); 109 | } 110 | 111 | } 112 | 113 | //! Destructor for the Stokes class 114 | Stokes::~Stokes() 115 | { 116 | m_exec_conf->msg->notice(5) << "Destroying Stokes" << endl; 117 | cufftDestroy(plan); 118 | } 119 | 120 | 121 | /*! 122 | Set the parameters for Hydrodynamic Calculation. Do once at the beginning 123 | of the simulation and then reuse computed values 124 | 125 | - Pre-tabulate real space interaction functions (f and g) 126 | - Set wave space vectors 127 | 128 | */ 129 | void Stokes::setParams() 130 | { 131 | // Try two Lanczos iterations to start (number of iterations will adapt as needed) 132 | m_m_Lanczos = 2; 133 | 134 | // Real space cutoff 135 | m_ewald_cut = sqrtf( - logf( m_error ) ) / m_xi; 136 | 137 | // Number of grid points 138 | int kmax = int( 2.0 * sqrtf( - logf( m_error ) ) * m_xi ) + 1; 139 | 140 | const BoxDim& box = m_pdata->getBox(); // Only for box not changing with time. 141 | Scalar3 L = box.getL(); 142 | 143 | m_Nx = int( kmax * L.x / (2.0 * 3.1415926536 ) * 2.0 ) + 1; 144 | m_Ny = int( kmax * L.y / (2.0 * 3.1415926536 ) * 2.0 ) + 1; 145 | m_Nz = int( kmax * L.z / (2.0 * 3.1415926536 ) * 2.0 ) + 1; 146 | 147 | // Get list of int values between 8 and 4096 that can be written as 148 | // (2^a)*(3^b)*(5^c) 149 | // Then sort list from low to high 150 | // 151 | // Go to such large values so as to be able simulate boxes with large 152 | // aspect ratios 153 | std::vector Mlist; 154 | for ( int ii = 0; ii < 13; ++ii ){ 155 | int pow2 = 1; 156 | for ( int i = 0; i < ii; ++i ){ 157 | pow2 *= 2; 158 | } 159 | for ( int jj = 0; jj < 8; ++jj ){ 160 | int pow3 = 1; 161 | for ( int j = 0; j < jj; ++j ){ 162 | pow3 *= 3; 163 | } 164 | for ( int kk = 0; kk < 6; ++kk ){ 165 | int pow5 = 1; 166 | for ( int k = 0; k < kk; ++k ){ 167 | pow5 *= 5; 168 | } 169 | int Mcurr = pow2 * pow3 * pow5; 170 | if ( Mcurr >= 8 && Mcurr <= 4096 ){ 171 | Mlist.push_back(Mcurr); 172 | } 173 | } 174 | } 175 | } 176 | std::sort(Mlist.begin(), Mlist.end()); 177 | const int nmult = Mlist.size(); 178 | 179 | // Compute the number of grid points in each direction 180 | // 181 | // Number of grid points should be a power of 2,3,5 for most efficient FFTs 182 | for ( int ii = 0; ii < nmult; ++ii ){ 183 | if (m_Nx <= Mlist[ii]){ 184 | m_Nx = Mlist[ii]; 185 | break; 186 | } 187 | } 188 | for ( int ii = 0; ii < nmult; ++ii ){ 189 | if (m_Ny <= Mlist[ii]){ 190 | m_Ny = Mlist[ii]; 191 | break; 192 | } 193 | } 194 | for ( int ii = 0; ii < nmult; ++ii ){ 195 | if (m_Nz <= Mlist[ii]){ 196 | m_Nz = Mlist[ii]; 197 | break; 198 | } 199 | } 200 | 201 | // Check that we haven't asked for too many grid points 202 | // Max allowable by cuFFT is 512^3 203 | if ( m_Nx * m_Ny * m_Nz > 512*512*512 ){ 204 | 205 | printf("Requested Number of Fourier Nodes Exceeds Max Dimension of 512^3\n"); 206 | printf("Mx = %i \n", m_Nx); 207 | printf("My = %i \n", m_Ny); 208 | printf("Mz = %i \n", m_Nz); 209 | printf("Mx*My*Mz = %i \n", m_Nx * m_Ny * m_Nz); 210 | printf("\n"); 211 | printf("Note to User: Fix is to reduce xi and try again. \n"); 212 | 213 | exit(EXIT_FAILURE); 214 | } 215 | 216 | // Maximum eigenvalue of A'*A to scale P 217 | Scalar gamma = m_max_strain; 218 | Scalar gamma2 = gamma*gamma; 219 | Scalar lambda = 1.0 + gamma2/2.0 + gamma*sqrtf(1.0 + gamma2/4.0); 220 | 221 | // Grid spacing 222 | m_gridh = L / make_scalar3(m_Nx,m_Ny,m_Nz); 223 | 224 | // Parameters for the Spectral Ewald Method (Lindbo and Tornberg, J. Comp. Phys., 2011) 225 | m_gaussm = 1.0; 226 | while ( erfcf( m_gaussm / sqrtf(2.0*lambda) ) > m_error ){ 227 | m_gaussm = m_gaussm + 0.01; 228 | } 229 | m_gaussP = int( m_gaussm*m_gaussm / 3.1415926536 ) + 1; 230 | 231 | if (m_gaussP > m_Nx) m_gaussP = m_Nx; // Can't be supported beyond grid 232 | if (m_gaussP > m_Ny) m_gaussP = m_Ny; 233 | if (m_gaussP > m_Nz) m_gaussP = m_Nz; 234 | Scalar w = m_gaussP*m_gridh.x / 2.0; // Gaussian width in simulation units 235 | Scalar xisq = m_xi * m_xi; 236 | m_eta = (2.0*w/m_gaussm)*(2.0*w/m_gaussm) * ( xisq ); // Gaussian splitting parameter 237 | 238 | // Print summary to command line output 239 | printf("\n"); 240 | printf("\n"); 241 | m_exec_conf->msg->notice(2) << "--- NUFFT Hydrodynamics Statistics ---" << endl; 242 | m_exec_conf->msg->notice(2) << "Mx: " << m_Nx << endl; 243 | m_exec_conf->msg->notice(2) << "My: " << m_Ny << endl; 244 | m_exec_conf->msg->notice(2) << "Mz: " << m_Nz << endl; 245 | m_exec_conf->msg->notice(2) << "rcut: " << m_ewald_cut << endl; 246 | m_exec_conf->msg->notice(2) << "Points per radius (x,y,z): " << m_Nx / L.x << ", " << m_Ny / L.y << ", " << m_Nz / L.z << endl; 247 | m_exec_conf->msg->notice(2) << "--- Gaussian Spreading Parameters ---" << endl; 248 | m_exec_conf->msg->notice(2) << "gauss_m: " << m_gaussm << endl; 249 | m_exec_conf->msg->notice(2) << "gauss_P: " << m_gaussP << endl; 250 | m_exec_conf->msg->notice(2) << "gauss_eta: " << m_eta << endl; 251 | m_exec_conf->msg->notice(2) << "gauss_w: " << w << endl; 252 | m_exec_conf->msg->notice(2) << "gauss_gridh (x,y,z): " << L.x/m_Nx << ", " << L.y/m_Ny << ", " << L.z/m_Nz << endl; 253 | printf("\n"); 254 | printf("\n"); 255 | 256 | // Create plan for CUFFT on the GPU 257 | cufftPlan3d(&plan, m_Nx, m_Ny, m_Nz, CUFFT_C2C); 258 | 259 | // Prepare GPUArrays for grid vectors and gridded forces 260 | GPUArray n_gridk(m_Nx*m_Ny*m_Nz, m_exec_conf); 261 | m_gridk.swap(n_gridk); 262 | GPUArray n_gridX(m_Nx*m_Ny*m_Nz, m_exec_conf); 263 | m_gridX.swap(n_gridX); 264 | GPUArray n_gridY(m_Nx*m_Ny*m_Nz, m_exec_conf); 265 | m_gridY.swap(n_gridY); 266 | GPUArray n_gridZ(m_Nx*m_Ny*m_Nz, m_exec_conf); 267 | m_gridZ.swap(n_gridZ); 268 | 269 | // Get list of reciprocal space vectors, and scaling factor for the wave space calculation at each grid point 270 | ArrayHandle h_gridk(m_gridk, access_location::host, access_mode::readwrite); 271 | for (int i = 0; i < m_Nx; i++) { 272 | for (int j = 0; j < m_Ny; j++) { 273 | for (int k = 0; k < m_Nz; k++) { 274 | 275 | // Index into grid vector storage array 276 | int idx = i * m_Ny*m_Nz + j * m_Nz + k; 277 | 278 | // k goes from -N/2 to N/2 279 | h_gridk.data[idx].x = 2.0*3.1415926536 * ((i < ( m_Nx + 1 ) / 2) ? i : i - m_Nx) / L.x; 280 | h_gridk.data[idx].y = 2.0*3.1415926536 * ((j < ( m_Ny + 1 ) / 2) ? j : j - m_Ny) / L.y; 281 | h_gridk.data[idx].z = 2.0*3.1415926536 * ((k < ( m_Nz + 1 ) / 2) ? k : k - m_Nz) / L.z; 282 | 283 | // k dot k 284 | Scalar k2 = h_gridk.data[idx].x*h_gridk.data[idx].x + h_gridk.data[idx].y*h_gridk.data[idx].y + h_gridk.data[idx].z*h_gridk.data[idx].z; 285 | 286 | // Scaling factor used in wave space sum 287 | // 288 | // Can't include k=0 term in the Ewald sum 289 | if (i == 0 && j == 0 && k == 0){ 290 | h_gridk.data[idx].w = 0; 291 | } 292 | else{ 293 | // Have to divide by Nx*Ny*Nz to normalize the FFTs 294 | h_gridk.data[idx].w = 6.0*3.1415926536 * (1.0 + k2/4.0/xisq) * expf( -(1-m_eta) * k2/4.0/xisq ) / ( k2 ) / Scalar( m_Nx*m_Ny*m_Nz ); 295 | } 296 | 297 | } 298 | } 299 | } 300 | 301 | // Store the coefficients for the real space part of Ewald summation 302 | // 303 | // Will precompute scaling factors for real space component of summation for a given 304 | // discretization to speed up GPU calculations 305 | // 306 | // Do calculation in double precision, then truncate and tabulate, because the 307 | // expressions don't behave very well numerically, and double precision ensures 308 | // it works. 309 | m_ewald_dr = 0.001; // Distance resolution 310 | m_ewald_n = m_ewald_cut / m_ewald_dr - 1; // Number of entries in tabulation 311 | 312 | double dr = 0.0010000000000000; 313 | 314 | // Assume all particles have radius of 1.0 315 | Scalar pi12 = 1.77245385091; 316 | Scalar aa = 1.0; 317 | Scalar axi = aa * m_xi; 318 | Scalar axi2 = axi * axi; 319 | m_self = (1. + 4.*pi12*axi*erfc(2.*axi) - exp(-4.*axi2))/(4.*pi12*axi*aa); 320 | 321 | // Allocate storage for real space Ewald table 322 | int nR = m_ewald_n + 1; // number of entries in ewald table 323 | GPUArray n_ewaldC1( nR, m_exec_conf); 324 | m_ewaldC1.swap(n_ewaldC1); 325 | ArrayHandle h_ewaldC1(m_ewaldC1, access_location::host, access_mode::readwrite); 326 | 327 | // Functions are complicated so calculation should be done in double precision, then truncated to single precision 328 | // in order to ensure accurate evaluation 329 | double xi = m_xi; 330 | double Pi = 3.141592653589793; 331 | double a = aa; 332 | 333 | // Fill tables 334 | for ( int kk = 0; kk < nR; kk++ ) 335 | { 336 | 337 | // Initialize entries 338 | h_ewaldC1.data[ kk ].x = 0.0; // UF1 at r 339 | h_ewaldC1.data[ kk ].y = 0.0; // UF2 at r 340 | h_ewaldC1.data[ kk ].z = 0.0; // UF1 at r + dr 341 | h_ewaldC1.data[ kk ].w = 0.0; // UF2 at r + dr 342 | 343 | // Distance for current entry 344 | double r = double( kk ) * dr + dr; 345 | double Imrr = 0, rr = 0; 346 | 347 | // Expression have been simplified assuming no overlap, touching, and overlap 348 | if ( r > 2.0*a ){ 349 | 350 | Imrr = -pow(a,-1) + (pow(a,2)*pow(r,-3))/2. + (3*pow(r,-1))/4. + (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*(-12*pow(r,4) + pow(xi,-4)))/128. + 351 | pow(a,-2)*((9*r)/32. - (3*pow(r,-3)*pow(xi,-4))/128.) + 352 | (erfc((2*a + r)*xi)*(128*pow(a,-1) + 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(36*r - 3*pow(r,-3)*pow(xi,-4))))/256. + 353 | (erfc(2*a*xi - r*xi)*(128*pow(a,-1) - 64*pow(a,2)*pow(r,-3) - 96*pow(r,-1) + pow(a,-2)*(-36*r + 3*pow(r,-3)*pow(xi,-4))))/ 354 | 256. + (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(1 + 6*pow(r,2)*pow(xi,2)))/64. + 355 | (exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 356 | (8*r*pow(a,2)*pow(xi,2) - 16*pow(a,3)*pow(xi,2) + a*(2 - 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128. + 357 | (exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 358 | (8*r*pow(a,2)*pow(xi,2) + 16*pow(a,3)*pow(xi,2) + a*(-2 + 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128.; 359 | 360 | rr = -pow(a,-1) - pow(a,2)*pow(r,-3) + (3*pow(r,-1))/2. + (3*pow(a,-2)*pow(r,-3)*(4*pow(r,4) + pow(xi,-4)))/64. + 361 | (erfc(2*a*xi - r*xi)*(64*pow(a,-1) + 64*pow(a,2)*pow(r,-3) - 96*pow(r,-1) + pow(a,-2)*(-12*r - 3*pow(r,-3)*pow(xi,-4))))/128. + 362 | (erfc((2*a + r)*xi)*(64*pow(a,-1) - 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(12*r + 3*pow(r,-3)*pow(xi,-4))))/128. + 363 | (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(-1 + 2*pow(r,2)*pow(xi,2)))/32. - 364 | ((2*a + 3*r)*exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 365 | (-1 - 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. + 366 | ((2*a - 3*r)*exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 367 | (-1 + 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. - 368 | (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*pow(xi,-4)*(1 + 4*pow(r,4)*pow(xi,4)))/64.; 369 | 370 | } 371 | else if ( r == 2.0*a ){ 372 | 373 | Imrr = -(pow(a,-5)*(3 + 16*a*xi*pow(Pi,-0.5))*pow(xi,-4))/2048. + (3*erfc(2*a*xi)*pow(a,-5)*(-192*pow(a,4) + pow(xi,-4)))/1024. + 374 | erfc(4*a*xi)*(pow(a,-1) - (3*pow(a,-5)*pow(xi,-4))/2048.) + 375 | (exp(-16*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(-1 - 64*pow(a,2)*pow(xi,2)))/256. + 376 | (3*exp(-4*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(1 + 24*pow(a,2)*pow(xi,2)))/256.; 377 | 378 | rr = (pow(a,-5)*(3 + 16*a*xi*pow(Pi,-0.5))*pow(xi,-4))/1024. + erfc(2*a*xi)*((-3*pow(a,-1))/8. - (3*pow(a,-5)*pow(xi,-4))/512.) + 379 | erfc(4*a*xi)*(pow(a,-1) + (3*pow(a,-5)*pow(xi,-4))/1024.) + 380 | (exp(-16*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(1 - 32*pow(a,2)*pow(xi,2)))/128. + 381 | (3*exp(-4*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(-1 + 8*pow(a,2)*pow(xi,2)))/128.; 382 | 383 | } 384 | else if ( r < 2*a){ 385 | 386 | Imrr = (-9*r*pow(a,-2))/32. + pow(a,-1) - (pow(a,2)*pow(r,-3))/2. - (3*pow(r,-1))/4. + 387 | (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*(-12*pow(r,4) + pow(xi,-4)))/128. + 388 | (erfc((-2*a + r)*xi)*(-128*pow(a,-1) + 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(36*r - 3*pow(r,-3)*pow(xi,-4))))/ 389 | 256. + (erfc((2*a + r)*xi)*(128*pow(a,-1) + 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(36*r - 3*pow(r,-3)*pow(xi,-4))))/ 390 | 256. + (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(1 + 6*pow(r,2)*pow(xi,2)))/64. + 391 | (exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 392 | (8*r*pow(a,2)*pow(xi,2) - 16*pow(a,3)*pow(xi,2) + a*(2 - 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128. + 393 | (exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 394 | (8*r*pow(a,2)*pow(xi,2) + 16*pow(a,3)*pow(xi,2) + a*(-2 + 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128.; 395 | 396 | rr = ((2*a + 3*r)*pow(a,-2)*pow(2*a - r,3)*pow(r,-3))/16. + 397 | (erfc((-2*a + r)*xi)*(-64*pow(a,-1) - 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(12*r + 3*pow(r,-3)*pow(xi,-4))))/128. + 398 | (erfc((2*a + r)*xi)*(64*pow(a,-1) - 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(12*r + 3*pow(r,-3)*pow(xi,-4))))/128. + 399 | (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(-1 + 2*pow(r,2)*pow(xi,2)))/32. - 400 | ((2*a + 3*r)*exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 401 | (-1 - 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. + 402 | ((2*a - 3*r)*exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)* 403 | (-1 + 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. - 404 | (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*pow(xi,-4)*(1 + 4*pow(r,4)*pow(xi,4)))/64.; 405 | 406 | } 407 | 408 | // Save values to table 409 | h_ewaldC1.data[ kk ].x = Scalar( Imrr ); // UF1 410 | h_ewaldC1.data[ kk ].y = Scalar( rr ); // UF2 411 | 412 | } // kk loop over distances 413 | 414 | // Both pieces of UF data for faster interpolation (r and r+dr stored in same Scalar4) 415 | for ( int kk = 0; kk < (nR-1); kk++ ){ 416 | 417 | int offset1 = kk; 418 | int offset2 = (kk+1); 419 | 420 | h_ewaldC1.data[ offset1 ].z = h_ewaldC1.data[ offset2 ].x; 421 | h_ewaldC1.data[ offset1 ].w = h_ewaldC1.data[ offset2 ].y; 422 | } 423 | 424 | } 425 | 426 | /*! \param timestep Current time step 427 | \post Particle positions and velocities are moved forward to timestep+1 428 | */ 429 | void Stokes::integrateStepOne(unsigned int timestep) 430 | { 431 | 432 | // Recompute neighborlist ( if needed ) 433 | m_nlist->compute(timestep); 434 | 435 | // access the neighbor list 436 | ArrayHandle d_n_neigh(m_nlist->getNNeighArray(), access_location::device, access_mode::read); 437 | ArrayHandle d_nlist(m_nlist->getNListArray(), access_location::device, access_mode::read); 438 | ArrayHandle d_headlist(m_nlist->getHeadList(), access_location::device, access_mode::read); 439 | 440 | // Consistency check 441 | unsigned int group_size = m_group->getNumMembers(); 442 | assert(group_size <= m_pdata->getN()); 443 | if (group_size == 0) 444 | return; 445 | 446 | // Get particle forces 447 | const GPUArray< Scalar4 >& net_force = m_pdata->getNetForce(); 448 | 449 | // profile this step 450 | if (m_prof) 451 | m_prof->push(m_exec_conf, "Stokes step 1 (no step 2)"); 452 | 453 | // Access all the needed data for the calculation 454 | ArrayHandle d_pos(m_pdata->getPositions(), access_location::device, access_mode::readwrite); 455 | ArrayHandle d_vel(m_pdata->getVelocities(), access_location::device, access_mode::readwrite); 456 | ArrayHandle d_accel(m_pdata->getAccelerations(), access_location::device, access_mode::readwrite); 457 | ArrayHandle d_net_force(net_force, access_location::device, access_mode::read); 458 | ArrayHandle d_image(m_pdata->getImages(), access_location::device, access_mode::readwrite); 459 | 460 | BoxDim box = m_pdata->getBox(); 461 | ArrayHandle< unsigned int > d_index_array(m_group->getIndexArray(), access_location::device, access_mode::read); 462 | 463 | // Grid vectors 464 | ArrayHandle d_gridk(m_gridk, access_location::device, access_mode::readwrite); 465 | ArrayHandle d_gridX(m_gridX, access_location::device, access_mode::readwrite); 466 | ArrayHandle d_gridY(m_gridY, access_location::device, access_mode::readwrite); 467 | ArrayHandle d_gridZ(m_gridZ, access_location::device, access_mode::readwrite); 468 | 469 | // Real space interaction tabulation 470 | ArrayHandle d_ewaldC1(m_ewaldC1, access_location::device, access_mode::read); 471 | 472 | // Calculate the shear rate of the current timestep 473 | Scalar current_shear_rate = m_shear_func -> getShearRate(timestep); 474 | 475 | // perform the update on the GPU 476 | gpu_stokes_step_one( 477 | d_pos.data, 478 | d_vel.data, 479 | d_accel.data, 480 | d_image.data, 481 | d_index_array.data, 482 | group_size, 483 | box, 484 | m_deltaT, 485 | 256, 486 | d_net_force.data, 487 | m_T->getValue(timestep), 488 | timestep, 489 | m_seed, 490 | m_xi, 491 | m_eta, 492 | m_ewald_cut, 493 | m_ewald_dr, 494 | m_ewald_n, 495 | d_ewaldC1.data, 496 | m_self, 497 | d_gridk.data, 498 | d_gridX.data, 499 | d_gridY.data, 500 | d_gridZ.data, 501 | plan, 502 | m_Nx, 503 | m_Ny, 504 | m_Nz, 505 | d_n_neigh.data, 506 | d_nlist.data, 507 | d_headlist.data, 508 | m_m_Lanczos, 509 | m_pdata->getN(), 510 | m_gaussP, 511 | m_gridh, 512 | m_error, 513 | current_shear_rate 514 | ); 515 | 516 | if (m_exec_conf->isCUDAErrorCheckingEnabled()) 517 | CHECK_CUDA_ERROR(); 518 | 519 | // done profiling 520 | if (m_prof) 521 | m_prof->pop(m_exec_conf); 522 | 523 | } 524 | 525 | /*! \param timestep Current time step 526 | \post Nothing is done. 527 | */ 528 | void Stokes::integrateStepTwo(unsigned int timestep) 529 | { 530 | } 531 | 532 | void export_Stokes(pybind11::module& m) 533 | { 534 | pybind11::class_ > (m, "Stokes", pybind11::base()) 535 | .def(pybind11::init< std::shared_ptr, std::shared_ptr, std::shared_ptr, unsigned int, std::shared_ptr, Scalar, Scalar >()) 536 | .def("setT", &Stokes::setT) 537 | .def("setParams", &Stokes::setParams) 538 | .def("setShear", &Stokes::setShear) 539 | ; 540 | } 541 | 542 | #ifdef WIN32 543 | #pragma warning( pop ) 544 | #endif 545 | -------------------------------------------------------------------------------- /PSEv1/Stokes.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | 54 | 55 | #include "Stokes.cuh" 56 | #include "Mobility.cuh" 57 | #include "Brownian.cuh" 58 | #include "Helper.cuh" 59 | 60 | #include "hoomd/Saru.h" 61 | #include "hoomd/TextureTools.h" 62 | 63 | #include 64 | 65 | #include 66 | #include 67 | 68 | #ifdef WIN32 69 | #include 70 | #else 71 | #include 72 | #endif 73 | 74 | //! command to convert floats or doubles to integers 75 | #ifdef SINGLE_PRECISION 76 | #define __scalar2int_rd __float2int_rd 77 | #else 78 | #define __scalar2int_rd __double2int_rd 79 | #endif 80 | 81 | #ifndef __ERRCHK_CUH__ 82 | #define __ERRCHK_CUH__ 83 | //! Function to check for errors 84 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 85 | /*! 86 | \param code returned error code 87 | \param file which file the error occured in 88 | \param line which line error check was tripped 89 | \param abort whether to kill code upon error trigger 90 | */ 91 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 92 | { 93 | if (code != cudaSuccess) 94 | { 95 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 96 | if (abort) exit(code); 97 | } 98 | } 99 | #endif 100 | 101 | /*! \file Stokes.cu 102 | \brief Defines GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes.cc. 103 | */ 104 | 105 | 106 | //! Shared memory array for partial sum of dot product kernel 107 | extern __shared__ Scalar partial_sum[]; 108 | extern __shared__ Scalar4 shared_Fpos[]; 109 | 110 | //! Texture for reading table values 111 | scalar4_tex_t tables1_tex; 112 | //! Texture for reading particle positions 113 | scalar4_tex_t pos_tex; 114 | 115 | //! Takes the integration on a group of particles 116 | /*! \param d_pos array of particle positions 117 | \param d_vel array of particle velocities 118 | \param d_delu1 first 4 components of gradient of particle velocity 119 | \param d_delu2 second 4 components of gradient of particle velocity 120 | \param d_accel array of particle "accelerations" (This is an overdamped integrator, so accelerations don't have physical meaning) 121 | \param d_image array of particle images 122 | \param d_group_members Device array listing the indicies of the mebers of the group to integrate 123 | \param group_size Number of members in the group 124 | \param box Box dimensions for periodic boundary condition handling 125 | \param deltaT timestep 126 | \param d_net_force net force on each particle, only used to set "accelerations" 127 | 128 | This kernel must be executed with a 1D grid of any block size such that the number of threads is greater than or 129 | equal to the number of members in the group. The kernel's implementation simply reads one particle in each thread 130 | and updates that particle. (Not necessary true for Stokesian Dynamics simulation) 131 | 132 | Performance notes: 133 | Particle properties are read via the texture cache to optimize the bandwidth obtained with sparse groups. The writes 134 | in sparse groups will not be coalesced. However, because ParticleGroup sorts the index list the writes will be as 135 | contiguous as possible leading to fewer memory transactions on compute 1.3 hardware and more cache hits on Fermi. (Not sure about this..) 136 | */ 137 | extern "C" __global__ 138 | void gpu_stokes_step_one_kernel( 139 | Scalar4 *d_pos, 140 | Scalar4 *d_vel, 141 | Scalar3 *d_accel, 142 | int3 *d_image, 143 | unsigned int *d_group_members, 144 | unsigned int group_size, 145 | BoxDim box, 146 | Scalar deltaT, 147 | Scalar4 *d_net_force, 148 | Scalar shear_rate 149 | ){ 150 | 151 | // determine which particle this thread works on (MEM TRANSFER: 4 bytes) 152 | int group_idx = blockIdx.x * blockDim.x + threadIdx.x; 153 | 154 | if (group_idx < group_size){ 155 | 156 | unsigned int idx = d_group_members[group_idx]; 157 | 158 | // read the particle's posision (MEM TRANSFER: 16 bytes) 159 | Scalar4 postype = d_pos[idx]; 160 | Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z); 161 | 162 | // read the particle's velocity and acceleration (MEM TRANSFER: 32 bytes) 163 | Scalar4 velmass = d_vel[idx]; 164 | Scalar mass = velmass.w; 165 | Scalar3 vel = make_scalar3(velmass.x, velmass.y, velmass.z); 166 | 167 | // Add the shear 168 | vel.x += shear_rate * pos.y; 169 | 170 | Scalar4 net_force = d_net_force[idx]; 171 | Scalar3 accel = make_scalar3(net_force.x, net_force.y, net_force.z); 172 | 173 | // update the position 174 | Scalar3 dx = vel * deltaT; 175 | 176 | // FLOPS: 3 177 | pos += dx; 178 | 179 | accel = accel/mass; 180 | 181 | // read in the particle's image (MEM TRANSFER: 16 bytes) 182 | int3 image = d_image[idx]; 183 | 184 | // fix the periodic boundary conditions (FLOPS: 15) 185 | box.wrap(pos, image); 186 | 187 | // write out the results (MEM_TRANSFER: 48 bytes) 188 | d_accel[idx] = accel; 189 | d_pos[idx] = make_scalar4(pos.x, pos.y, pos.z, postype.w); 190 | d_image[idx] = image; 191 | } 192 | } 193 | 194 | /*! \param d_pos array of particle positions 195 | \param d_vel array of particle velocities 196 | \param d_accel array of particle accelerations 197 | \param d_image array of particle images 198 | \param d_group_members Device array listing the indicies of the mebers of the group to integrate 199 | \param group_size Number of members in the group ( i.e. number of particles to consider ) 200 | \param box Box dimensions for periodic boundary condition handling 201 | \param dt timestep 202 | \param block_size optimum block size returned by an autotuner 203 | \param d_net_force net force on the particles 204 | \param T temperature 205 | \param timestep time step 206 | \param seed seed for random number generation 207 | \param xi splitting coefficient for Ewald summation 208 | \param eta Spectral splitting parameter 209 | \param P number of nodes in support of each gaussian for k-space sum 210 | \param ewald_cut cut off radius for Ewald summation 211 | \param ewald_dr discretization of look up tables 212 | \param ewald_n number of elements in look up tables 213 | \param d_ewaldC Ewald coefficients for real space sum 214 | \param d_gridk reciprocal lattice vectors and parameters for Ewald reciprocal space sum 215 | \param d_gridX x-component of force moment projection onto the grid 216 | \param d_gridY y-component of force moment projection onto the grid 217 | \param d_gridZ z-component of force moment projection onto the grid 218 | \param plan cudaFFT plan 219 | \param Nx number of grid nodes in the x-direction 220 | \param Ny number of grid nodes in the y-direction 221 | \param Nz number of grid nodes in the z-direction 222 | \param d_n_neigh Number of neighbors for every particle 223 | \param d_nlist Neighbor list of every particle, 2D array, can be accessed by nli 224 | \param nli Index lookup helper for d_nlist 225 | \param cheb_an Chebychev coefficients 226 | \param n_cheb Order of Chebyshev approximation 227 | \param N_total total number of particles ( should be same as group_size ) 228 | \param gridh Spacing between grid ndoes 229 | \param cheb_recompute whether to recompute chebyshev approximation 230 | \param eig_recompute whether to recompute eigenvalues of matrix approximation 231 | \param stored_eigenvalue previous max eigenvalue 232 | \param cheb_error error tolerance in chebyshev approximation 233 | */ 234 | cudaError_t gpu_stokes_step_one( 235 | Scalar4 *d_pos, 236 | Scalar4 *d_vel, 237 | Scalar3 *d_accel, 238 | int3 *d_image, 239 | unsigned int *d_group_members, 240 | unsigned int group_size, 241 | const BoxDim& box, 242 | Scalar dt, 243 | unsigned int block_size, 244 | Scalar4 *d_net_force, 245 | const Scalar T, 246 | const unsigned int timestep, 247 | const unsigned int seed, 248 | Scalar xi, 249 | Scalar eta, 250 | Scalar ewald_cut, 251 | Scalar ewald_dr, 252 | int ewald_n, 253 | Scalar4 *d_ewaldC1, 254 | Scalar self, 255 | Scalar4 *d_gridk, 256 | CUFFTCOMPLEX *d_gridX, 257 | CUFFTCOMPLEX *d_gridY, 258 | CUFFTCOMPLEX *d_gridZ, 259 | cufftHandle plan, 260 | const int Nx, 261 | const int Ny, 262 | const int Nz, 263 | const unsigned int *d_n_neigh, 264 | const unsigned int *d_nlist, 265 | const unsigned int *d_headlist, 266 | int& m_Lanczos, 267 | const unsigned int N_total, 268 | const int P, 269 | Scalar3 gridh, 270 | Scalar cheb_error, 271 | Scalar shear_rate 272 | ){ 273 | 274 | // Total number of grid points 275 | unsigned int NxNyNz = Nx*Ny*Nz; 276 | 277 | // setup the grid to run the kernel 278 | // block for particle calculation 279 | dim3 grid( (group_size/block_size) + 1, 1, 1); 280 | dim3 threads(block_size, 1, 1); 281 | 282 | // block for grid calculation 283 | int gridBlockSize = ( NxNyNz > block_size ) ? block_size : NxNyNz; 284 | int gridNBlock = ( NxNyNz + gridBlockSize - 1 ) / gridBlockSize ; 285 | 286 | // Get the textured tables for real space Ewald sum tabulation 287 | tables1_tex.normalized = false; // Not normalized 288 | tables1_tex.filterMode = cudaFilterModeLinear; // Filter mode: floor of the index 289 | // One dimension, Read mode: ElementType(Get what we write) 290 | cudaBindTexture(0, tables1_tex, d_ewaldC1, sizeof(Scalar4) * (ewald_n+1)); // This was a bug in former versions! 291 | 292 | // Same for the positions and forces 293 | pos_tex.normalized = false; // Not normalized 294 | pos_tex.filterMode = cudaFilterModePoint; // Filter mode: floor of the index 295 | cudaBindTexture(0, pos_tex, d_pos, sizeof(Scalar4) * N_total); 296 | 297 | // Get sheared grid vectors 298 | gpu_stokes_SetGridk_kernel<<>>(d_gridk,Nx,Ny,Nz,NxNyNz,box,xi,eta); 299 | 300 | // Do Mobility and Brownian Calculations (compute the velocity from the forces) 301 | gpu_stokes_CombinedMobilityBrownian_wrap( 302 | d_pos, 303 | d_net_force, 304 | d_group_members, 305 | group_size, 306 | box, 307 | dt, 308 | d_vel, // output 309 | T, 310 | timestep, 311 | seed, 312 | xi, 313 | eta, 314 | P, 315 | ewald_cut, 316 | ewald_dr, 317 | ewald_n, 318 | d_ewaldC1, 319 | d_gridk, 320 | d_gridX, 321 | d_gridY, 322 | d_gridZ, 323 | plan, 324 | Nx, 325 | Ny, 326 | Nz, 327 | d_n_neigh, 328 | d_nlist, 329 | d_headlist, 330 | m_Lanczos, 331 | N_total, 332 | NxNyNz, 333 | grid, 334 | threads, 335 | gridBlockSize, 336 | gridNBlock, 337 | gridh, 338 | cheb_error, 339 | self ); 340 | 341 | 342 | // Use forward Euler integration to move the particles according the velocity 343 | // computed from the Mobility and Brownian calculations 344 | gpu_stokes_step_one_kernel<<< grid, threads >>>( 345 | d_pos, 346 | d_vel, 347 | d_accel, 348 | d_image, 349 | d_group_members, 350 | group_size, 351 | box, 352 | dt, 353 | d_net_force, 354 | shear_rate 355 | ); 356 | 357 | // Quick error check 358 | gpuErrchk(cudaPeekAtLastError()); 359 | 360 | // Cleanup 361 | cudaUnbindTexture(tables1_tex); 362 | cudaUnbindTexture(pos_tex); 363 | 364 | return cudaSuccess; 365 | } 366 | -------------------------------------------------------------------------------- /PSEv1/Stokes.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | 54 | /*! \file Stokes.cuh 55 | \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes. 56 | */ 57 | #include "hoomd/ParticleData.cuh" 58 | #include "hoomd/HOOMDMath.h" 59 | #include "hoomd/Index1D.h" 60 | #include 61 | 62 | //! Define the step_one kernel 63 | #ifndef __STOKES_CUH__ 64 | #define __STOKES_CUH__ 65 | 66 | //! Definition for comxplex variable storage 67 | #ifdef SINGLE_PRECISION 68 | #define CUFFTCOMPLEX cufftComplex 69 | #else 70 | #define CUFFTCOMPLEX cufftComplex 71 | #endif 72 | 73 | 74 | //! Kernel driver for the first part (no second part) of the Stokes update called by Stokes.cc 75 | cudaError_t gpu_stokes_step_one(Scalar4 *d_pos, 76 | Scalar4 *d_vel, 77 | Scalar3 *d_accel, 78 | int3 *d_image, 79 | unsigned int *d_group_members, 80 | unsigned int group_size, 81 | const BoxDim& box, 82 | Scalar deltaT, 83 | unsigned int block_size, 84 | Scalar4 *d_net_force, 85 | const Scalar T, 86 | const unsigned int timestep, 87 | const unsigned int seed, 88 | Scalar xi, 89 | Scalar eta, 90 | Scalar ewald_cut, 91 | Scalar ewald_dr, 92 | int ewald_n, 93 | Scalar4 *d_ewald1, 94 | Scalar self, 95 | Scalar4 *d_gridk, 96 | CUFFTCOMPLEX *d_gridX, 97 | CUFFTCOMPLEX *d_gridY, 98 | CUFFTCOMPLEX *d_gridZ, 99 | cufftHandle plan, 100 | const int Nx, 101 | const int Ny, 102 | const int Nz, 103 | const unsigned int *d_n_neigh, 104 | const unsigned int *d_nlist, 105 | const unsigned int *d_headlist, 106 | int& m_Lanczos, 107 | const unsigned int N_total, 108 | const int P, 109 | Scalar3 gridh, 110 | Scalar cheb_error, 111 | Scalar current_shear_rate); 112 | 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /PSEv1/Stokes.h: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | 54 | #ifndef SINGLE_PRECISION 55 | #define CUFFTCOMPLEX cufftComplex 56 | #else 57 | #define CUFFTCOMPLEX cufftComplex 58 | #endif 59 | 60 | #ifndef __STOKES_H__ 61 | #define __STOKES_H__ 62 | 63 | /*! \file Stokes.h 64 | \brief Declares the Stokes class 65 | */ 66 | 67 | #include 68 | #include 69 | #include 70 | 71 | #include 72 | 73 | #include "ShearFunction.h" 74 | 75 | 76 | #ifdef NVCC 77 | #error This header cannot be compiled by nvcc 78 | #endif 79 | 80 | #include 81 | 82 | //! Integrates the system forward considering hydrodynamic interactions by GPU 83 | /*! Implements overdamped integration (one step) through IntegrationMethodTwoStep interface, runs on the GPU 84 | */ 85 | 86 | class Stokes : public IntegrationMethodTwoStep 87 | { 88 | public: 89 | 90 | //! Constructs the integration method and associates it with the system 91 | Stokes( std::shared_ptr sysdef, 92 | std::shared_ptr group, 93 | std::shared_ptr T, 94 | unsigned int seed, 95 | std::shared_ptr nlist, 96 | Scalar xi, 97 | Scalar error); 98 | 99 | virtual ~Stokes(); 100 | 101 | //! Set a new temperature 102 | /*! \param T new temperature to set */ 103 | void setT(std::shared_ptr T) 104 | { 105 | m_T = T; 106 | } 107 | 108 | //! Performs the first step of the integration 109 | virtual void integrateStepOne(unsigned int timestep); 110 | 111 | //! Performs the second step of the integration 112 | virtual void integrateStepTwo(unsigned int timestep); 113 | 114 | //! Set the parameters for Ewald summation 115 | void setParams(); 116 | 117 | //! Set the shear rate and shear frequency 118 | void setShear(std::shared_ptr shear_func, Scalar max_strain) { 119 | m_shear_func = shear_func; 120 | m_max_strain = max_strain; 121 | } 122 | 123 | protected: 124 | 125 | std::shared_ptr m_T; //!< The Temperature of the Stochastic Bath 126 | unsigned int m_seed; //!< The seed for the RNG of the Stochastic Bath 127 | 128 | cufftHandle plan; //!< Used for the Fast Fourier Transformations performed on the GPU 129 | 130 | std::shared_ptr m_nlist; //!< The neighborlist to use for the computation 131 | 132 | std::shared_ptr m_shear_func; //!< mutable shared pointer towards a ShearFunction object 133 | Scalar m_max_strain; //!< Maximum total strain before box resizing 134 | 135 | Scalar m_xi; //!< ewald splitting parameter xi 136 | Scalar m_ewald_cut; //!< Real space cutoff 137 | GPUArray m_ewaldC1; //!< Real space Ewald coefficients table 138 | int m_ewald_n; //!< Number of entries in table of Ewald coefficients 139 | Scalar m_ewald_dr; //!< Real space Ewald table spacing 140 | 141 | Scalar m_self; //!< self piece 142 | 143 | int m_Nx; //!< Number of grid points in x direction 144 | int m_Ny; //!< Number of grid points in y direction 145 | int m_Nz; //!< Number of grid points in z direction 146 | 147 | GPUArray m_gridk; //!< k-vectors for each grid point 148 | GPUArray m_gridX; //!< x component of the grid based force 149 | GPUArray m_gridY; //!< x component of the grid based force 150 | GPUArray m_gridZ; //!< x component of the grid based force 151 | 152 | Scalar m_gaussm; //!< Gaussian width in standard deviations for wave space spreading/contraction 153 | int m_gaussP; //!< Number of points in each dimension for Gaussian support 154 | Scalar m_eta; //!< Gaussian spreading parameter 155 | Scalar3 m_gridh; //!< Size of the grid box in 3 direction 156 | 157 | int m_m_Lanczos; //!< Number of Lanczos Iterations to use for calculation of Brownian displacement 158 | 159 | Scalar m_error; //!< Error tolerance for all calculations 160 | 161 | }; 162 | 163 | //! Exports the Stokes class to python 164 | void export_Stokes(pybind11::module& m); 165 | 166 | #endif 167 | -------------------------------------------------------------------------------- /PSEv1/VariantShearFunction.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | 3 | /*! \file VariantShearFunction.cc 4 | \brief Defines VariantShearFunction class 5 | */ 6 | 7 | #ifdef WIN32 8 | #pragma warning( push ) 9 | #pragma warning( disable : 4103 4244 ) 10 | #endif 11 | 12 | #include "VariantShearFunction.h" 13 | 14 | using namespace std; 15 | 16 | 17 | VariantShearFunction::VariantShearFunction(std::shared_ptr shear_func, 18 | unsigned int total_timestep, 19 | double min_value, 20 | double max_value) : 21 | m_shear_func(shear_func), 22 | m_total_timestep(total_timestep), 23 | m_min_value(min_value), 24 | m_max_value(max_value) 25 | { 26 | setOffset( m_shear_func -> getOffset() ); // This line ensures the offsets of ShearFunction and Variant class are equal 27 | m_value_range = m_max_value - m_min_value; 28 | m_end_value = wrapValue( m_shear_func -> getStrain( m_offset + m_total_timestep ) ); 29 | } 30 | 31 | /*! \param timestep Timestep to get the value at 32 | \return value by the user-specified function 33 | */ 34 | double VariantShearFunction::getValue(unsigned int timestep) 35 | { 36 | if (timestep < m_offset) { 37 | return 0; 38 | } 39 | else if (timestep >= m_offset + m_total_timestep) { 40 | return m_end_value; 41 | } 42 | return wrapValue( m_shear_func -> getStrain(timestep) ); 43 | } 44 | 45 | void export_VariantShearFunction(pybind11::module& m) 46 | { 47 | pybind11::class_ >(m, "VariantShearFunction", pybind11::base()) 48 | .def(pybind11::init< std::shared_ptr, unsigned int, double, double >()); 49 | } 50 | 51 | #ifdef WIN32 52 | #pragma warning( pop ) 53 | #endif 54 | -------------------------------------------------------------------------------- /PSEv1/VariantShearFunction.h: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | 3 | /*! \file VariantShearFunction.h 4 | \brief Declares the VariantShearFunction class 5 | */ 6 | 7 | #ifdef NVCC 8 | #error This header cannot be compiled by nvcc 9 | #endif 10 | 11 | #include 12 | 13 | #ifndef __VARIANT_SHEAR_FUNCTION_H__ 14 | #define __VARIANT_SHEAR_FUNCTION_H__ 15 | 16 | #include 17 | #include 18 | #include "ShearFunction.h" 19 | 20 | //! Variant class for shear flowfield described by a function 21 | /*! This variant gives the strain value based on a function (which is ShearFunction type) 22 | The strain is wrapped based on the min_value and max_value since HOOMD cannot deal with 23 | very thin box. In most cases, max_value - min_value is an integer (and the recommended value 24 | is [-0.5, 0.5]). If the timestep is smaller than offset, 0 is returned when calling 25 | getValue; if the timestep is larger than offset + total_timestep, the strain of the last 26 | time point is returned. 27 | */ 28 | class VariantShearFunction : public Variant 29 | { 30 | public: 31 | //! Constructs a VariantShearFunction type with a shared_ptr to ShearFunction and total timestep 32 | /*! \param shear_func the shared pointer to the ShearFunction object 33 | \param total_timestep total time step this Variant is going to be effective 34 | \param min_value the minimal value of this Variant 35 | \param max_value the maximal value of this Variant 36 | */ 37 | VariantShearFunction(std::shared_ptr shear_func, 38 | unsigned int total_timestep, 39 | double min_value, 40 | double max_value); 41 | 42 | //! Gets the value at a given time step 43 | virtual double getValue(unsigned int timestep); 44 | 45 | //! Wrap the value between m_min_value and m_max_value 46 | double wrapValue(double functionValue) { 47 | return functionValue - m_value_range * floor( (functionValue - m_min_value) / m_value_range ); 48 | } 49 | 50 | private: 51 | const std::shared_ptr m_shear_func; 52 | const unsigned int m_total_timestep; //!< the total timestep for the Variant class 53 | const double m_min_value; //!< minimum value of the output of the Variant class 54 | const double m_max_value; //!< maximum value of the output of the Variant class 55 | double m_end_value; //!< the last value of output after time > m_offset + m_total_timestep 56 | double m_value_range; //!< max_value - min_value 57 | }; 58 | 59 | //! Exports VariantShearFunction class to python 60 | void export_VariantShearFunction(pybind11::module& m); 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /PSEv1/__init__.py: -------------------------------------------------------------------------------- 1 | # this file exists to mark this directory as a python module 2 | # need to import all submodules defined in this directory 3 | from hoomd.PSEv1 import integrate 4 | from hoomd.PSEv1 import shear_function 5 | from hoomd.PSEv1 import variant 6 | -------------------------------------------------------------------------------- /PSEv1/integrate.py: -------------------------------------------------------------------------------- 1 | # First, we need to import the C++ module. It has the same name as this module (plugin_template) but with an underscore 2 | # in front 3 | from hoomd.PSEv1 import _PSEv1 4 | from hoomd.PSEv1 import shear_function 5 | 6 | # Next, since we are extending an integrator, we need to bring in the base class integrator and some other parts from 7 | # hoomd_script 8 | import hoomd 9 | from hoomd import _hoomd 10 | from hoomd import compute 11 | from hoomd.md import _md 12 | import math 13 | 14 | ## One step overdamped integration with hydrodynamic interactions 15 | class PSEv1(hoomd.md.integrate._integration_method): 16 | ## Specifies the Stokes integrator 17 | # 18 | # \param group Group of particles on which to apply this method. 19 | # \param T Temperature of the simulation (in energy units) 20 | # \param seed Random seed to use for the run. Simulations that are identical, except for the seed, will follow 21 | # different trajectories. 22 | # \param xi Ewald splitting parameter 23 | # \param error Relative error for all calculations 24 | # \param function_form Functional form for shear 25 | # \param max_strain Maximum box deformation for shear 26 | # 27 | # 28 | # T can be a variant type, allowing for temperature ramps in simulation runs. 29 | # 30 | # Internally, a compute.thermo is automatically specified and associated with \a group. 31 | 32 | def __init__(self, group, T, seed=0, xi = 0.5, error = 0.001, function_form = None, max_strain = 0.5, nlist_type = "cell" ): 33 | 34 | # Print the status of the initialization 35 | hoomd.util.print_status_line(); 36 | 37 | # initialize base class 38 | hoomd.md.integrate._integration_method.__init__(self); 39 | 40 | # setup the variant inputs 41 | T = hoomd.variant._setup_variant_input(T); 42 | 43 | # create the compute thermo 44 | compute._get_unique_thermo(group=group); 45 | 46 | # Real space neighborlist cutoff based on error estimate for spectral sums 47 | self.rcut = math.sqrt( - math.log( error ) ) / xi; 48 | # If this line is changed, remember to change in C++ code as well!! 49 | 50 | # initialize the reflected c++ class 51 | if not hoomd.context.exec_conf.isCUDAEnabled(): 52 | hoomd.context.msg.error("Sorry, we have not written CPU code for PSE RPY simulation. \n"); 53 | raise RuntimeError('Error creating Stokes'); 54 | else: 55 | 56 | # Create a neighborlist exclusively for real space interactions. Use cell lists by 57 | # default, but also allow the user to specify 58 | if ( nlist_type.upper() == "CELL" ): 59 | 60 | cl_stokes = _hoomd.CellListGPU(hoomd.context.current.system_definition); 61 | hoomd.context.current.system.addCompute(cl_stokes, "stokes_cl") 62 | self.neighbor_list = _md.NeighborListGPUBinned(hoomd.context.current.system_definition, self.rcut, 0.4, cl_stokes); 63 | 64 | elif ( nlist_type.upper() == "TREE" ): 65 | 66 | self.neighbor_list = _md.NeighborListGPUTree(hoomd.context.current.system_definition, self.rcut, 0.4) 67 | 68 | elif ( nlist_type.upper() == "STENCIL" ): 69 | 70 | cl_stokes = _hoomd.CellListGPU(hoomd.context.current.system_definition) 71 | hoomd.context.current.system.addCompute(cl_stokes, "stokes_cl") 72 | cls_stokes = _hoomd.CellListStencil( hoomd.context.current.system_definition, cl_stokes ) 73 | hoomd.context.current.system.addCompute( cls_stokes, "stokes_cls") 74 | self.neighbor_list = _md.NeighborListGPUStencil(hoomd.context.current.system_definition, self.rcut, 0.4, cl_stokes, cls_stokes) 75 | 76 | else: 77 | hoomd.context.msg.error("Invalid neighborlist method specified. Valid options are: cell, tree, stencil. \n"); 78 | raise RuntimeError('Error constructing neighborlist'); 79 | 80 | # Set neighborlist properties 81 | self.neighbor_list.setEvery(1, True); 82 | hoomd.context.current.system.addCompute(self.neighbor_list, "stokes_nlist") 83 | self.neighbor_list.countExclusions(); 84 | 85 | # Call the stokes integrator 86 | self.cpp_method = _PSEv1.Stokes(hoomd.context.current.system_definition, group.cpp_group, T.cpp_variant, seed, self.neighbor_list, xi, error); 87 | 88 | self.cpp_method.validateGroup() 89 | 90 | if function_form is not None: 91 | self.cpp_method.setShear(function_form.cpp_function, max_strain) 92 | else: 93 | no_shear_function = shear_function.steady(dt = 0) 94 | self.cpp_method.setShear(no_shear_function.cpp_function, max_strain) 95 | 96 | self.cpp_method.setParams() 97 | 98 | ## Changes parameters of an existing integrator 99 | # \param self self 100 | # \param T Temperature 101 | # 102 | # To change the parameters of an existing integrator, you must save it in a variable when it is 103 | # specified, like so: 104 | # \code 105 | # integrator = integrate.nve(group=all) 106 | # \endcode 107 | 108 | def set_params(self, T=None, function_form = None, max_strain=0.5): 109 | util.print_status_line(); 110 | self.check_initialization(); 111 | 112 | if T is not None: 113 | # setup the variant inputs 114 | T = hoomd.variant._setup_variant_input(T); 115 | self.cpp_method.setT(T.cpp_variant); 116 | 117 | if function_form is not None: 118 | self.cpp_method.setShear(function_form.cpp_function, max_strain) 119 | 120 | ## Stop any shear 121 | def stop_shear(self, max_strain = 0.5): 122 | no_shear_function = shear_function.steady(dt = 0) 123 | self.cpp_method.setShear(no_shear_function.cpp_function, max_strain) 124 | 125 | 126 | -------------------------------------------------------------------------------- /PSEv1/module.cc: -------------------------------------------------------------------------------- 1 | // Include the defined classes that are to be exported to python 2 | #include "Stokes.h" 3 | #include "VariantShearFunction.h" 4 | #include "ShearFunction.h" 5 | #include "ShearFunctionWrap.h" 6 | #include "SpecificShearFunction.h" 7 | 8 | // Include pybind11 9 | #include 10 | 11 | // specify the python module. Note that the name must explicitly match the PROJECT() name provided in CMakeLists 12 | // (with an underscore in front) 13 | PYBIND11_MODULE(_PSEv1, m) 14 | { 15 | #ifdef ENABLE_CUDA 16 | export_Stokes(m); 17 | #endif 18 | export_ShearFunction(m); 19 | export_ShearFunctionWrap(m); 20 | export_VariantShearFunction(m); 21 | export_SpecificShearFunction(m); 22 | } 23 | -------------------------------------------------------------------------------- /PSEv1/shear_function.py: -------------------------------------------------------------------------------- 1 | ## \package PSEv1.shear_function 2 | # classes representing shear functions, which can be input of an integrator and variant 3 | # to shear the box of a simulation 4 | 5 | from hoomd.PSEv1 import _PSEv1 6 | 7 | import hoomd 8 | 9 | ## shear function interface representing shear flow field described by a function 10 | class _shear_function: 11 | ## Constructor and check the validity of zero param 12 | # \param zero Specify absolute time step number location for 0 in \a points. Use 'now' to indicate the current step. 13 | def __init__(self, zero = 'now'): 14 | self.cpp_function = None 15 | 16 | if zero == 'now': 17 | self._offset = hoomd.context.current.system.getCurrentTimeStep() 18 | else: 19 | # validate zero 20 | if zero < 0: 21 | hoomd.context.msg.error("Cannot create a shear_function variant with a negative zero\n") 22 | raise RuntimeError('Error creating shear function') 23 | if zero > hoomd.context.current.system.getCurrentTimeStep(): 24 | hoomd.context.msg.error("Cannot create a shear_function variant with a zero in the future\n") 25 | raise RuntimeError('Error creating shear function') 26 | self._offset = zero 27 | 28 | ## Get shear rate at a certain time step, might be useful when switching strain field 29 | # \param timestep the timestep 30 | def get_shear_rate(self, timestep): 31 | return self.cpp_function.getShearRate(timestep) 32 | 33 | ## Get the strain at a certain time step. The strain is not wrapped 34 | # \param timestep the timestep 35 | def get_strain(self, timestep): 36 | return self.cpp_function.getStrain(timestep) 37 | 38 | ## Get the offset of this shear function 39 | def get_offset(self): 40 | return self.cpp_function.getOffset() 41 | 42 | 43 | ## concrete class representing steady shear, no shear by default if shear_rate is not provided 44 | class steady(_shear_function): 45 | ## Constructor of steady shear function 46 | # \param dt the time interval between each timestep, must be the same with the global timestep 47 | # \param shear_rate the shear rate of the shear, default is zero, should be zero or positive 48 | # \param zero the time offset 49 | def __init__(self, dt, shear_rate = 0, zero = 'now'): 50 | _shear_function.__init__(self, zero) 51 | self.cpp_function = _PSEv1.SteadyShearFunction(shear_rate, self._offset, dt) 52 | 53 | 54 | ## concrete class representing simple sinusoidal oscillatory shear 55 | class sine(_shear_function): 56 | ## Constructor of simple sinusoidal oscillatory shear 57 | # \param dt the time interval between each timestep, must be the same with the global timestep 58 | # \param shear_rate the maximum shear rate of the ocsillatory shear, must be positive 59 | # \param shear_freq the frequency (real frequency, not angular frequency) of the ocsillatory shear, must be positive 60 | # \param zero the time offset 61 | def __init__(self, dt, shear_rate, shear_freq, zero = 'now'): 62 | 63 | if shear_rate <= 0: 64 | hoomd.context.msg.error("Shear rate must be positive (use steady class instead for zero shear)\n") 65 | raise RuntimeError("Error creating shear function") 66 | if shear_freq <= 0: 67 | hoomd.context.msg.error("Shear frequency must be positive (use steady class instead for steady shear)\n") 68 | raise RuntimeError("Error creating shear function") 69 | 70 | _shear_function.__init__(self, zero) 71 | self.cpp_function = _PSEv1.SinShearFunction(shear_rate, shear_freq, self._offset, dt) 72 | 73 | 74 | ## concrete class representing chirp oscillatory shear 75 | class chirp(_shear_function): 76 | ## Constructor of chirp oscillatory shear 77 | # \param dt the time interval between each timestep, must be the same with the global timestep 78 | # \param amplitude the strain amplitude of Chirp oscillatory shear, must be positive 79 | # \param omega_0 minimum angular frequency, must be positive 80 | # \param omega_f maximum angular frequency, must be positive and larger than omega_0 81 | # \param periodT final time of chirp 82 | # \param zero the time offset 83 | def __init__(self, dt, amplitude, omega_0, omega_f, periodT, zero = 'now'): 84 | _shear_function.__init__(self, zero) 85 | self.cpp_function = _PSEv1.ChirpShearFunction(amplitude, omega_0, omega_f, periodT, self._offset, dt) 86 | 87 | 88 | ## concrete class representing Tukey window function 89 | class tukey_window(_shear_function): 90 | ## Constructor of Tukey window function 91 | # \param dt the time interval between each timestep, must be the same with the global timestep 92 | # \param periodT time length of the Tukey window function 93 | # \param tukey_param Tukey window function parameter, must be within (0, 1] 94 | # \param zero the time offset 95 | def __init__(self, dt, periodT, tukey_param, zero = 'now'): 96 | 97 | if tukey_param <= 0 or tukey_param > 1: 98 | hoomd.context.msg.error("Tukey parameter must be within (0, 1]") 99 | raise RuntimeError("Error creating Tukey window function") 100 | 101 | _shear_function.__init__(self, zero) 102 | self.cpp_function = _PSEv1.TukeyWindowFunction(periodT, tukey_param, self._offset, dt) 103 | 104 | 105 | ## concrete class represeting a windowed shear function 106 | class windowed(_shear_function): 107 | ## Constructor of a windowed shear function 108 | # The strain of the resulting windowed shear function will be the product of the original shear function and 109 | # the provided window function 110 | # \param function_form the original shear function 111 | # \param window the window function. It is recommended to make sure the offset (zero) of the window function is the same with shear function 112 | def __init__(self, function_form, window): 113 | _shear_function.__init__(self, 'now') # zero parameter is not used in windowed class anyways 114 | self.cpp_function = _PSEv1.WindowedFunction(function_form.cpp_function, window.cpp_function) 115 | -------------------------------------------------------------------------------- /PSEv1/variant.py: -------------------------------------------------------------------------------- 1 | ## \package PSEv1.variant 2 | # classes representing the variant class to facilitate box_resize 3 | 4 | from hoomd.PSEv1 import _PSEv1 5 | from hoomd.PSEv1 import shear_function 6 | 7 | from hoomd import variant 8 | 9 | from hoomd import _hoomd 10 | import hoomd 11 | import sys 12 | 13 | ## Variant class holding a functional form of shear field 14 | # Used as an argument for box_resize class to deform the box 15 | class shear_variant(hoomd.variant._variant): 16 | ## Specify shear field represented by a function form with a limited timesteps 17 | # 18 | # \param function_form the functional form of the sinusoidal shear 19 | # \param total_timestep the total timesteps of the shear, equal to shear_end_timestep - shear_start_timestep, must be positive 20 | # \param max_strain the maximum absolute value of the strain, use 0.5 in almost all the cases 21 | def __init__(self, function_form, total_timestep, max_strain = 0.5): 22 | 23 | # initialize the base class 24 | _variant.__init__(self) 25 | 26 | # check total_timestep is positive 27 | if total_timestep <= 0: 28 | hoomd.context.msg.error("Cannot create a shear_variant with 0 or negative points\n") 29 | raise RuntimeError('Error creating variant') 30 | 31 | # create the c++ mirror class 32 | self.cpp_variant = _PSEv1.VariantShearFunction(function_form.cpp_function, int(total_timestep), -max_strain, max_strain) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Positively Split Ewald (PSE) 2 | PSE is a HOOMD plugin by Andrew M. Fiore containing a GPU implemention of the Positively Split Ewald 3 | (PSE) algorithm for calculation of the Rotne-Prager-Yamakawa (RPY) 4 | hydrodynamic mobility and stochastic thermal displacements. This repository is no longer maintained. 5 | 6 | An alternative maintained GPU implementation can be found in [UAMMD](https://github.com/RaulPPelaez/UAMMD) and is 7 | accessible using a simple [python interface](https://github.com/RaulPPelaez/UAMMD_PSE_Python). 8 | 9 | The theory behind the PSE method is described in the reference: 10 | 11 | 1. **Rapid Sampling of Stochastic Displacements in Brownian Dynamics 12 | Simulations**, Andrew M. Fiore, Florencio Balboa Usabiaga, Aleksandar 13 | Donev, and James W. Swan, The Journal of Chemical Physics, **146**, 14 | 124116 (2017).[DOI](http://doi.org/10.1063/1.4978242) [arXiv](https://arxiv.org/abs/1611.09322) 15 | 16 | 17 | ## Files that come in this template 18 | - doc/TUTORIAL.pdf : a tutorial to use PSE. 19 | - CMakeLists.txt : main CMake configuration file for the plugin 20 | - FindHOOMD.cmake : script to find a HOOMD-Blue installation to link against 21 | - README : This file 22 | - PSEv1 : Directory containing C++ and CUDA source code that interacts with HOOMD. Also contains python UI level source code that drives the C++ module 23 | - cppmodule : Directory containing C++ and CUDA source code that interacts with HOOMD 24 | - examples/run.py : python example to use PSE. 25 | 26 | ## Software requirements 27 | 28 | The PSE plugin requires the following additional software: 29 | - HOOMD, compiled with CUDA (tested with version 2.3.3). 30 | - CUDA (tested with version 9.2). 31 | - LAPACKE (tested with version 3.6.1). 32 | - CBLAS (tested with version 3.6.1). 33 | 34 | ## Software Installation 35 | 36 | HOOMD can be installed following the instructions given in the [documentation](http://hoomd-blue.readthedocs.io/en/stable/compiling.html). HOOMD must be compiled with CUDA enabled. It is recommended to use the following cmake command 37 | ``` 38 | cmake ../ -DCMAKE_INSTALL_PREFIX=${SOFTWARE_ROOT}/lib/python -DCMAKE_CXX_FLAGS=-march=native -DCMAKE_C_FLAGS=-march=native -DENABLE_CUDA=ON -DENABLE_MPI=ON 39 | ``` 40 | where `${SOFTWARE_ROOT}` is the path variable specifying the installation location for HOOMD. 41 | 42 | LAPACKE and CBLAS can be install manually after downloading the source code from [netlib](http://www.netlib.org/lapacke) and [openblas](https://www.openblas.net) or from repositorities. In Ubuntu, the simplest method is via repository: 43 | ``` 44 | sudo apt-get install liblapack3 liblapack-dev liblapacke liblapacke-dev 45 | sudo apt-get install libblas3 libblas-dev libopenblas-dev libatlas-base-dev 46 | ``` 47 | 48 | ## Plugin Compilation 49 | To compile this example plugin, follow steps similar to those in compiling HOOMD-Blue. The process of finding a HOOMD 50 | installation to link to will be fully automatic IF you have hoomd_install_dir/bin in your PATH when running cmake. 51 | 52 | Note that plugins can only be built against a HOOMD build that has been installed via a package or compiled and then 53 | installed via 'make install'. HOOMD must be built with CUDA enabled -DENABLE_CUDA=ON in order for the package to work. 54 | Plugins can only be built against hoomd when it is built as a shared library. 55 | 56 | From the root PSE folder do: 57 | 58 | ``` 59 | $ mkdir plugin_build 60 | $ cd plugin_build 61 | $ cmake ../ 62 | $ make -j6 63 | $ make install 64 | ``` 65 | 66 | If hoomd is not in your PATH, you can specify the root using 67 | 68 | `$ cmake -DHOOMD_ROOT=/path/to/hoomd ../` 69 | 70 | You can also provide to `cmake` the location of `LAPACKE`, `LAPACK`, `CBLAS`, 71 | `BLAS` and the `python` version with the options 72 | 73 | ``` 74 | $ cmake -DHOOMD_ROOT=/path/to/hoomd \ 75 | -DCBLAS_LIBRARIES=/path/to/cblas \ 76 | -DBLAS_LIBRARIES=/path/to/blas \ 77 | -DLAPACKE_LIBRARIES=/path/to/lapacke \ 78 | -DLAPACK_LIBRARIES=/path/to/lapack \ 79 | -DPYTHON_EXECUTABLE=`which python` \ 80 | ../ 81 | ``` 82 | however, these options are unecessary if these libraries have been installed into the standard directories. 83 | 84 | By default, make install will install the plugin into 85 | 86 | `${HOOMD_ROOT}/lib/python/hoomd/PSEv1` 87 | 88 | This works if you have `make install`ed hoomd into your home directory. 89 | 90 | ### Using the Plugin 91 | A sample script demonstrating how the plugin is used can be found in examples/run.py. You can 92 | call this script with the command 93 | ``` 94 | python3 run.py 95 | ``` 96 | -------------------------------------------------------------------------------- /examples/run.py: -------------------------------------------------------------------------------- 1 | import hoomd; 2 | from hoomd import _hoomd 3 | from hoomd.md import _md 4 | import hoomd.PSEv1 5 | import os; 6 | import math 7 | hoomd.context.initialize(''); 8 | 9 | # Time stepping information 10 | dt = 1e-3 # time step 11 | tf = 1e0 # the final time of the simulation (in units of bare particle diffusion time) 12 | nrun = tf / dt # number of steps 13 | 14 | # Particle size 15 | # 16 | # Changing this won't change the PSE hydrodynamics, which assumes that all particles 17 | # have radius = 1.0, and ignores HOOMD's size data. However, might be necessary if 18 | # hydrodynamic radius is different from other radii needed. 19 | radius = 1.0 20 | diameter = 2.0 * radius 21 | 22 | # File output location 23 | loc = 'Data/' 24 | if not os.path.isdir( loc ): 25 | os.mkdir( loc ) 26 | 27 | # Simple cubic crystal of 1000 particles 28 | N = 1000; 29 | L = 64 30 | n = math.ceil(N ** (1.0/3.0)) # number of particles along 1D 31 | a = L / n # spacing between particles 32 | 33 | # Create the box and particles 34 | hoomd.init.create_lattice(unitcell=hoomd.lattice.sc(a=a),n=n) 35 | 36 | # Shear function form, using sinusoidal oscillatory shear as example 37 | # 38 | # Options are: none (no shear. default if left unspecified in integrator call) 39 | # steady (steady shear) 40 | # sine (sinusoidal oscillatory shear) 41 | # chirp (chirp frequency sweep) 42 | function_form = hoomd.PSEv1.shear_function.sine( dt = dt, shear_rate = 1.0, shear_freq = 1.0 ) 43 | 44 | # Set up PSE integrator 45 | # 46 | # Arguments to PSE integrator (default values given in parentheses): 47 | # group -- group of particle to act on (should be all) 48 | # seed (1) -- Seed for the random number generator used in Brownian calculations 49 | # T (1.0) -- Temperature 50 | # xi (0.5) -- Ewald splitting parameter. Changing value will not affect results, only speed. 51 | # error (1E-3) -- Calculation error tolerance 52 | # function_form (none) -- Functional form for shearing. See above (or source code) for valid options. 53 | hoomd.md.integrate.mode_standard(dt=dt) 54 | pse = hoomd.PSEv1.integrate.PSEv1( group = hoomd.group.all(), seed = 1, T = 1.0, xi = 0.5, error = 1E-3, function_form = function_form ) 55 | 56 | # Run the simulation 57 | hoomd.run( nrun ) 58 | 59 | 60 | --------------------------------------------------------------------------------