├── .gitattributes
├── CMakeLists.txt
├── FindHOOMD.cmake
├── PSEv1
    ├── Brownian.cu
    ├── Brownian.cuh
    ├── CMakeLists.txt
    ├── Helper.cu
    ├── Helper.cuh
    ├── Mobility.cu
    ├── Mobility.cuh
    ├── ShearFunction.cc
    ├── ShearFunction.h
    ├── ShearFunctionWrap.cc
    ├── ShearFunctionWrap.h
    ├── SpecificShearFunction.cc
    ├── SpecificShearFunction.h
    ├── Stokes.cc
    ├── Stokes.cu
    ├── Stokes.cuh
    ├── Stokes.h
    ├── VariantShearFunction.cc
    ├── VariantShearFunction.h
    ├── __init__.py
    ├── integrate.py
    ├── module.cc
    ├── shear_function.py
    └── variant.py
├── README.md
└── examples
    └── run.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted
 5 | # to native line endings on checkout.
 6 | *.c  text
 7 | *.h  text
 8 | *.cc text
 9 | *.cu text
10 | *cuh text
11 | 
12 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(PSEv1)
 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2 FATAL_ERROR)
 3 | 
 4 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_MODULE_PATH})
 5 | 
 6 | include(FindHOOMD.cmake)
 7 | 
 8 | # plugins must be built as shared libraries
 9 | if (ENABLE_STATIC)
10 |     message(SEND_ERROR "Plugins cannot be built against a statically compiled hoomd")
11 | endif (ENABLE_STATIC)
12 | 
13 | set(BUILD_SHARED_LIBS on)
14 | 
15 | if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
16 |   set(CMAKE_INSTALL_PREFIX ${HOOMD_ROOT} CACHE PATH "Installation prefix" FORCE)
17 | endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
18 | 
19 | set(PYTHON_MODULE_BASE_DIR ${CMAKE_INSTALL_PREFIX})
20 | message(STATUS "Install plugin to: " ${PYTHON_MODULE_BASE_DIR})
21 | 
22 | # add subdirectories
23 | add_subdirectory(${PROJECT_NAME})
24 | 


--------------------------------------------------------------------------------
/FindHOOMD.cmake:
--------------------------------------------------------------------------------
  1 | # CMake script for finding HOOMD and setting up all needed compile options to create and link a plugin library
  2 | #
  3 | # Variables taken as input to this module:
  4 | # HOOMD_ROOT :          location to look for HOOMD, if it is not in the python path
  5 | #
  6 | # Variables defined by this module:
  7 | # FOUND_HOOMD :         set to true if HOOMD is found
  8 | # HOOMD_LIBRARIES :     a list of all libraries needed to link to to access hoomd (uncached)
  9 | # HOOMD_INCLUDE_DIR :   a list of all include directories that need to be set to include HOOMD
 10 | # HOOMD_LIB :           a cached var locating the hoomd library to link to
 11 | #
 12 | # various ENABLE_ flags translated from hoomd_config.h so this plugin build can match the ABI of the installed hoomd
 13 | #
 14 | # as a convenience (for the intended purpose of this find script), all include directories and definitions needed
 15 | # to compile with all the various libs (boost, python, winsoc, etc...) are set within this script
 16 | 
 17 | set(HOOMD_ROOT "" CACHE FILEPATH "Directory containing a hoomd installation (i.e. _hoomd.so)")
 18 | 
 19 | # Let HOOMD_ROOT take precedence, but if unset, try letting Python find a hoomd package in its default paths.
 20 | if(HOOMD_ROOT)
 21 |   set(hoomd_installation_guess ${HOOMD_ROOT})
 22 | else(HOOMD_ROOT)
 23 |   find_package(PythonInterp)
 24 | 
 25 |   set(find_hoomd_script "
 26 | from __future__ import print_function;
 27 | import sys, os; sys.stdout = open(os.devnull, 'w')
 28 | import hoomd
 29 | print(os.path.dirname(hoomd.__file__), file=sys.stderr, end='')")
 30 | 
 31 |   execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "${find_hoomd_script}"
 32 |                   ERROR_VARIABLE hoomd_installation_guess)
 33 |   message(STATUS "Python output: " ${hoomd_installation_guess})
 34 | endif(HOOMD_ROOT)
 35 | 
 36 | message(STATUS "Looking for a HOOMD installation at " ${hoomd_installation_guess})
 37 | find_path(FOUND_HOOMD_ROOT
 38 |         NAMES _hoomd.so __init__.py
 39 |         HINTS ${hoomd_installation_guess}
 40 |         )
 41 | 
 42 | if(FOUND_HOOMD_ROOT)
 43 |   set(HOOMD_ROOT ${FOUND_HOOMD_ROOT} CACHE FILEPATH "Directory containing a hoomd installation (i.e. _hoomd.so)" FORCE)
 44 |   message(STATUS "Found hoomd installation at " ${HOOMD_ROOT})
 45 | else(FOUND_HOOMD_ROOT)
 46 |   message(FATAL_ERROR "Could not find hoomd installation, either set HOOMD_ROOT or set PYTHON_EXECUTABLE to a python which can find hoomd")
 47 | endif(FOUND_HOOMD_ROOT)
 48 | 
 49 | # search for the hoomd include directory
 50 | find_path(HOOMD_INCLUDE_DIR
 51 |           NAMES HOOMDVersion.h
 52 |           HINTS ${HOOMD_ROOT}/include
 53 |           )
 54 | 
 55 | if (HOOMD_INCLUDE_DIR)
 56 |     message(STATUS "Found HOOMD include directory: ${HOOMD_INCLUDE_DIR}")
 57 |     mark_as_advanced(HOOMD_INCLUDE_DIR)
 58 | endif (HOOMD_INCLUDE_DIR)
 59 | 
 60 | set(HOOMD_FOUND FALSE)
 61 | if (HOOMD_INCLUDE_DIR AND HOOMD_ROOT)
 62 |     set(HOOMD_FOUND TRUE)
 63 |     mark_as_advanced(HOOMD_ROOT)
 64 | endif (HOOMD_INCLUDE_DIR AND HOOMD_ROOT)
 65 | 
 66 | if (NOT HOOMD_FOUND)
 67 |     message(SEND_ERROR "HOOMD Not found. Please specify the location of your hoomd installation in HOOMD_ROOT")
 68 | endif (NOT HOOMD_FOUND)
 69 | 
 70 | #############################################################
 71 | ## Now that we've found hoomd, lets do some setup
 72 | if (HOOMD_FOUND)
 73 | 
 74 | include_directories(${HOOMD_INCLUDE_DIR})
 75 | 
 76 | # run all of HOOMD's generic lib setup scripts
 77 | set(CMAKE_MODULE_PATH ${HOOMD_ROOT}
 78 |                       ${HOOMD_ROOT}/CMake/hoomd
 79 |                       ${HOOMD_ROOT}/CMake/thrust
 80 |                       ${CMAKE_MODULE_PATH}
 81 |                       )
 82 | 
 83 | # grab previously-set hoomd configuration
 84 | include (hoomd_cache)
 85 | 
 86 | # Handle user build options
 87 | include (CMake_build_options)
 88 | include (CMake_preprocessor_flags)
 89 | # setup the install directories
 90 | include (CMake_install_options)
 91 | 
 92 | # Find the python executable and libraries
 93 | include (HOOMDPythonSetup)
 94 | # Find CUDA and set it up
 95 | include (HOOMDCUDASetup)
 96 | # Set default CFlags
 97 | include (HOOMDCFlagsSetup)
 98 | # include some os specific options
 99 | include (HOOMDOSSpecificSetup)
100 | # setup common libraries used by all targets in this project
101 | include (HOOMDCommonLibsSetup)
102 | # setup macros
103 | include (HOOMDMacros)
104 | # setup MPI support
105 | include (HOOMDMPISetup)
106 | 
107 | set(HOOMD_LIB ${HOOMD_ROOT}/_hoomd${PYTHON_MODULE_EXTENSION})
108 | set(HOOMD_MD_LIB ${HOOMD_ROOT}/md/_md${PYTHON_MODULE_EXTENSION})
109 | set(HOOMD_DEM_LIB ${HOOMD_ROOT}/dem/_dem${PYTHON_MODULE_EXTENSION})
110 | set(HOOMD_HPMC_LIB ${HOOMD_ROOT}/hpmc/_hpmc${PYTHON_MODULE_EXTENSION})
111 | set(HOOMD_CGCMM_LIB ${HOOMD_ROOT}/cgcmm/_cgcmm${PYTHON_MODULE_EXTENSION})
112 | set(HOOMD_METAL_LIB ${HOOMD_ROOT}/metal/_metal${PYTHON_MODULE_EXTENSION})
113 | set(HOOMD_DEPRECATED_LIB ${HOOMD_ROOT}/deprecated/_deprecated${PYTHON_MODULE_EXTENSION})
114 | 
115 | set(HOOMD_LIBRARIES ${HOOMD_LIB} ${HOOMD_COMMON_LIBS})
116 | 
117 | # NEED THIS ONE FOR THE PLUGIN!!!
118 | set(HOOMD_LIBRARIES ${HOOMD_LIB} ${HOOMD_MD_LIB} ${HOOMD_COMMON_LIBS})
119 | 
120 | endif (HOOMD_FOUND)
121 | 


--------------------------------------------------------------------------------
/PSEv1/Brownian.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // HOOMD Maintainer: joaander
 51 | // Modified by Andrew Fiore
 52 | 
 53 | #include "Brownian.cuh"
 54 | #include "Mobility.cuh"
 55 | #include "Helper.cuh"
 56 | 
 57 | #include "hoomd/Saru.h"
 58 | #include "hoomd/TextureTools.h"
 59 | using namespace hoomd;
 60 | 
 61 | #include <stdio.h>
 62 | #include <math.h>
 63 | 
 64 | #include "lapacke.h"
 65 | #include "cblas.h"
 66 | 
 67 | #ifdef WIN32
 68 | #include <cassert>
 69 | #else
 70 | #include <assert.h>
 71 | #endif
 72 | 
 73 | 
 74 | /*! \file Brownian.cu
 75 |     \brief Defines functions for PSE calculation of the Brownian Displacements
 76 | 
 77 |     // Uses LAPACKE to perform the final square root of the tridiagonal matrix
 78 | 	resulting from the Lanczos Method
 79 | */
 80 | 
 81 | //! Shared memory array for partial sum of dot product kernel
 82 | extern __shared__ Scalar partial_sum[];
 83 | extern __shared__ Scalar4 shared_Fpos[];
 84 | 
 85 | /*!
 86 |   	Generate random numbers on particles
 87 | 	
 88 | 	\param d_psi            random vector
 89 |         \param group_size       number of particles
 90 | 	\param d_group_members  index to particle arrays
 91 | 	\param timestep         current time step
 92 | 	\param seed             seed for random number generation
 93 | 
 94 | 	Thread-per-particle operations to generate random numbers
 95 | 	for the real space part of the Brownian calculation. Grid
 96 | 	and blocks are 1-D.
 97 | 
 98 | */
 99 | __global__ void gpu_stokes_BrownianGenerate_kernel(
100 | 					Scalar4 *d_psi,
101 | 					unsigned int group_size,
102 | 					unsigned int *d_group_members,
103 | 					const unsigned int timestep, 
104 | 					const unsigned int seed
105 | 					){
106 | 
107 | 	// Thread ID
108 | 	int group_idx = blockDim.x * blockIdx.x + threadIdx.x;
109 | 
110 | 	// Make sure that thread is in bounds
111 | 	if (group_idx < group_size) {
112 | 
113 | 		// Global particle index
114 | 		unsigned int idx = d_group_members[group_idx];
115 | 
116 | 		// Initialize random number generator
117 | 		detail::Saru s(idx, timestep + seed);
118 | 
119 | 		// Draw numbers from a Uniform distribution (-sqrt(3),sqrt(3),
120 | 		// so that variance = 1/3
121 | 		Scalar sqrt3 = 1.73205080757;
122 | 		Scalar randomx = s.f( -sqrt3, sqrt3 );
123 | 		Scalar randomy = s.f( -sqrt3, sqrt3 );
124 | 		Scalar randomz = s.f( -sqrt3, sqrt3 );
125 | 
126 | 		// Write to global memory, leaving the 4th element unchanged
127 | 		d_psi[idx] = make_scalar4(randomx, randomy, randomz, d_psi[idx].w);
128 | 
129 | 	}
130 | }
131 | 
132 | /*!
133 |   	Generate random numbers for wave space Brownian motion ( random numbers on grid )
134 |         	- scale forces as they're generated and add directly to the existing grid.	
135 | 
136 | 	\param d_gridX		x-component of vectors on grid
137 | 	\param d_gridY		y-component of vectors on grid
138 | 	\param d_gridZ		z-component of vectors on grid
139 | 	\param d_gridk		reciprocal lattice vectors for each grid point
140 | 	\param NxNyNz		total number of grid points
141 | 	\param Nx		number of grid points in x-direction
142 | 	\param Ny		number of grid points in y-direction
143 | 	\param Nz		number of grid points in z-direction
144 | 	\param timestep         current simulation time step
145 | 	\param seed             seed for random number generation
146 | 	\param T		simulation temperature
147 | 	\param dt		simulation time step size
148 | 	\param quadW		quadrature weight for spectral Ewald integration
149 | 
150 | 	Thread per grid node. 1-D grid of blocks, 1-D block of threads.
151 | 
152 | */
153 | __global__ void gpu_stokes_BrownianGridGenerate_kernel(  
154 | 						CUFFTCOMPLEX *gridX,
155 | 						CUFFTCOMPLEX *gridY,
156 | 						CUFFTCOMPLEX *gridZ,
157 | 						Scalar4 *gridk,
158 | 				        	unsigned int NxNyNz,
159 | 						int Nx,
160 | 						int Ny,
161 | 						int Nz,
162 | 				        	const unsigned int timestep, 
163 | 				        	const unsigned int seed,
164 | 						Scalar T,
165 | 						Scalar dt,
166 | 						Scalar quadW
167 | 				             	){
168 | 
169 | 	// Current thread index
170 | 	int idx = blockDim.x * blockIdx.x + threadIdx.x;
171 | 
172 | 	// Check if threads are in bounds
173 | 	if ( idx < NxNyNz ) {
174 |       
175 | 		// Random number generator
176 | 		detail::Saru s(idx, timestep + seed);
177 | 	
178 | 		// Square root of 3.0 / 2.0
179 | 		Scalar sqrt3d2 = 1.2247448713915889;
180 | 	
181 | 		// Get random numbers from uniform distribution
182 | 		// on (-sqrt(3/2),sqrt(3/2)) so that variance
183 | 		// of ( reX + reY ) = 1.0
184 | 		Scalar reX = s.f( -sqrt3d2, sqrt3d2 );
185 | 		Scalar reY = s.f( -sqrt3d2, sqrt3d2 );
186 | 		Scalar reZ = s.f( -sqrt3d2, sqrt3d2 );
187 | 		Scalar imX = s.f( -sqrt3d2, sqrt3d2 );
188 | 		Scalar imY = s.f( -sqrt3d2, sqrt3d2 );
189 | 		Scalar imZ = s.f( -sqrt3d2, sqrt3d2 );
190 | 		
191 | 		// Modulo arithmetic for indices for current grid point
192 | 		int kk = idx % Nz;
193 | 		int jj = ( ( idx - kk ) / Nz ) % Ny;
194 | 		int ii = ( ( idx - kk ) / Nz - jj ) / Ny;
195 | 			
196 | 		// Scaling factor for covariance
197 | 		Scalar fac = sqrtf(2.0*T/dt/quadW);
198 | 		
199 | 		// Variables required to place values on the grid
200 | 		Scalar2 fX, fY, fZ;  		   // forces for thread's point
201 | 		Scalar2 fX_conj, fY_conj, fZ_conj; // forces for thread's conjugate point
202 | 		Scalar2 kdF, kdF_conj;		   // dot(k,F) for thread and conjugate point
203 | 		Scalar B12, B12_conj;		   // Scaling factors for thread and conjugate point
204 | 		
205 | 		// Only do work on half the grid points because we are simultaneously assigning values
206 | 		// to each grid point and its conjugate. The following check makes sure we pick all of
207 | 		// the points without conjugates (zeros and nyquist points) as well as all the points
208 | 		// in the upper half of the grid. Also, ignore the origin in the wave space sum. (Sum
209 | 		// is over all k!= 0)
210 | 		if ( 
211 | 		     !( 2 * kk >= Nz + 1 ) &&
212 | 		     !( ( kk == 0 ) && ( 2 * jj >= Ny + 1 ) ) &&
213 | 		     !( ( kk == 0 ) && ( jj == 0 ) && ( 2 * ii >= Nx + 1 ) ) &&
214 | 		     !( ( kk == 0 ) && ( jj == 0 ) && ( ii == 0 ) ) 
215 | 		){
216 | 
217 | 			// Is current grid point a nyquist point
218 | 			bool ii_nyquist = ( ( ii == Nx/2 ) && ( Nx/2 == (Nx+1)/2 ) );
219 | 			bool jj_nyquist = ( ( jj == Ny/2 ) && ( Ny/2 == (Ny+1)/2 ) );
220 | 			bool kk_nyquist = ( ( kk == Nz/2 ) && ( Nz/2 == (Nz+1)/2 ) );
221 | 			
222 | 			// Index of conjugate point
223 | 			int ii_conj, jj_conj, kk_conj;
224 | 			if ( ii == 0 ){
225 | 				ii_conj = ii;
226 | 			}
227 | 			else {
228 | 				ii_conj = Nx - ii;
229 | 			}
230 | 			if ( jj == 0 ){
231 | 				jj_conj = jj;
232 | 			}
233 | 			else {
234 | 				jj_conj = Ny - jj;
235 | 			}
236 | 			if ( kk == 0 ){
237 | 				kk_conj = kk;
238 | 			}
239 | 			else {
240 | 				kk_conj = Nz - kk;
241 | 			}
242 | 		
243 | 			// Global index of conjugate grid point
244 | 			int conj_idx = ii_conj * Ny*Nz + jj_conj * Nz + kk_conj;
245 | 		
246 | 			// Current wave-space vector, conjugate wave space vector, and their
247 | 			// magnitudes
248 | 			Scalar4 tk = gridk[idx];
249 | 			Scalar4 tk_conj = gridk[conj_idx];
250 | 		
251 | 			Scalar ksq = tk.x*tk.x + tk.y*tk.y + tk.z*tk.z;
252 | 			Scalar ksq_conj = tk_conj.x*tk_conj.x + tk_conj.y*tk_conj.y + tk_conj.z*tk_conj.z;
253 | 		
254 | 			// Assign fluctuating values to the Nyquist points (no conjugate points)
255 | 			if ( ( ii == 0    && jj_nyquist && kk == 0 ) ||
256 | 			     ( ii_nyquist && jj == 0    && kk == 0 ) ||
257 | 			     ( ii_nyquist && jj_nyquist && kk == 0 ) ||
258 | 			     ( ii == 0    && jj == 0    && kk_nyquist ) ||
259 | 			     ( ii == 0    && jj_nyquist && kk_nyquist ) ||
260 | 			     ( ii_nyquist && jj == 0    && kk_nyquist ) ||
261 | 			     ( ii_nyquist && jj_nyquist && kk_nyquist ) ){
262 | 		
263 | 				// At the nyquist point, the random quantity only has a real component. Have to
264 | 				// multiply by sqrt(2.0) to make sure the variance is still 1
265 | 				Scalar sqrt2 = 1.4142135623730951;
266 | 				fX = make_scalar2( sqrt2*reX, 0.0 );
267 | 				fY = make_scalar2( sqrt2*reY, 0.0 );
268 | 				fZ = make_scalar2( sqrt2*reZ, 0.0 );
269 | 		
270 | 				// Dot product of wave-vector with stochastic quantity
271 | 				kdF = make_scalar2( ( tk.x*fX.x + tk.y*fY.x + tk.z*fZ.x ) / ksq,  ( tk.x*fX.y + tk.y*fY.y + tk.z*fZ.y ) / ksq );
272 | 				
273 | 				// Scaling factor
274 | 				B12 = sqrtf( tk.w );
275 | 				Scalar k = sqrtf( ksq );
276 | 				B12 *= sinf( k ) / k;
277 | 		
278 | 				// Add random quantity to the grid AND scale by B^(1/2) simultaneously to save effort
279 | 				gridX[idx].x = gridX[idx].x + fac * ( fX.x - tk.x * kdF.x ) * B12;
280 | 				gridX[idx].y = gridX[idx].y + fac * ( fX.y - tk.x * kdF.y ) * B12;
281 | 				
282 | 				gridY[idx].x = gridY[idx].x + fac * ( fY.x - tk.y * kdF.x ) * B12;
283 | 				gridY[idx].y = gridY[idx].y + fac * ( fY.y - tk.y * kdF.y ) * B12;
284 | 				
285 | 				gridZ[idx].x = gridZ[idx].x + fac * ( fZ.x - tk.z * kdF.x ) * B12;
286 | 				gridZ[idx].y = gridZ[idx].y + fac * ( fZ.y - tk.z * kdF.y ) * B12;
287 | 		
288 | 			}
289 | 			else {
290 | 		
291 | 				// Construct random force
292 | 				fX = make_scalar2( reX, imX );
293 | 				fY = make_scalar2( reY, imY );
294 | 				fZ = make_scalar2( reZ, imZ );
295 | 		
296 | 				// The random force at the conjugate point is the conjugate of the force at
297 | 				// the current point
298 | 				fX_conj = make_scalar2( reX, -imX );
299 | 				fY_conj = make_scalar2( reY, -imY );
300 | 				fZ_conj = make_scalar2( reZ, -imZ );
301 | 
302 | 				// Dot prodcut of force with wave vector at current and conjugate point
303 | 				kdF = make_scalar2( ( tk.x*fX.x + tk.y*fY.x + tk.z*fZ.x ) / ksq,  ( tk.x*fX.y + tk.y*fY.y + tk.z*fZ.y ) / ksq );
304 | 				kdF_conj = make_scalar2( ( tk_conj.x*fX_conj.x + tk_conj.y*fY_conj.x + tk_conj.z*fZ_conj.x ) / ksq_conj,  ( tk_conj.x*fX_conj.y + tk_conj.y*fY_conj.y + tk_conj.z*fZ_conj.y ) / ksq_conj );
305 | 			
306 | 				// Scaling factors at current and conjugate point
307 | 				B12 = sqrtf( tk.w );
308 | 				B12_conj = sqrtf( tk_conj.w );
309 | 				
310 | 				Scalar k = sqrtf( ksq );
311 | 				Scalar kconj = sqrtf( ksq_conj );
312 | 				B12 *= sinf( k ) / k;
313 | 				B12_conj *= sinf( kconj ) / kconj;
314 | 		
315 | 				// Add random quantity to the grid AND scale by B^(1/2) simultaneously to save effort
316 | 				// Current grid point
317 | 				gridX[idx].x = gridX[idx].x + fac * ( fX.x - tk.x * kdF.x ) * B12;
318 | 				gridX[idx].y = gridX[idx].y + fac * ( fX.y - tk.x * kdF.y ) * B12;
319 | 				
320 | 				gridY[idx].x = gridY[idx].x + fac * ( fY.x - tk.y * kdF.x ) * B12;
321 | 				gridY[idx].y = gridY[idx].y + fac * ( fY.y - tk.y * kdF.y ) * B12;
322 | 				
323 | 				gridZ[idx].x = gridZ[idx].x + fac * ( fZ.x - tk.z * kdF.x ) * B12;
324 | 				gridZ[idx].y = gridZ[idx].y + fac * ( fZ.y - tk.z * kdF.y ) * B12;
325 | 			
326 | 				// Add random quantity to the grid AND scale by B^(1/2) simultaneously to save effort
327 | 				// Conjugate grid point
328 | 				gridX[conj_idx].x = gridX[conj_idx].x + fac * ( fX_conj.x - tk_conj.x * kdF_conj.x ) * B12_conj;
329 | 				gridX[conj_idx].y = gridX[conj_idx].y + fac * ( fX_conj.y - tk_conj.x * kdF_conj.y ) * B12_conj;
330 | 				
331 | 				gridY[conj_idx].x = gridY[conj_idx].x + fac * ( fY_conj.x - tk_conj.y * kdF_conj.x ) * B12_conj;
332 | 				gridY[conj_idx].y = gridY[conj_idx].y + fac * ( fY_conj.y - tk_conj.y * kdF_conj.y ) * B12_conj;
333 | 				
334 | 				gridZ[conj_idx].x = gridZ[conj_idx].x + fac * ( fZ_conj.x - tk_conj.z * kdF_conj.x ) * B12_conj;
335 | 				gridZ[conj_idx].y = gridZ[conj_idx].y + fac * ( fZ_conj.y - tk_conj.z * kdF_conj.y ) * B12_conj;
336 | 		
337 | 			}
338 | 		
339 | 		
340 | 		
341 | 		}
342 | 
343 |  
344 |     	}
345 | }
346 | 
347 | 
348 | /*!
349 | 	Use Lanczos method to compute Mreal^0.5 * psi
350 | 
351 | 	This method is detailed in:
352 | 		"Preconditioned Krylov Subspace Methods for Sampling Multivariate Gaussian Distributions"
353 | 		Edmond Chow and Yousef Saad, IAM J. Sci. Comput., 36(2), A588–A608
354 | 
355 | 
356 | */
357 | void gpu_stokes_BrealLanczos_wrap( 	
358 | 					Scalar4 *d_psi,
359 | 				   	Scalar4 *d_pos,
360 |                                    	unsigned int *d_group_members,
361 |                                    	unsigned int group_size,
362 |                                    	const BoxDim& box,
363 |                                    	Scalar dt,
364 | 			           	Scalar4 *d_vel,
365 | 			           	const Scalar T,
366 | 			           	const unsigned int timestep,
367 | 			           	const unsigned int seed,
368 | 			           	Scalar xi,
369 | 			           	Scalar ewald_cut,
370 | 			           	Scalar ewald_dr,
371 | 			           	int ewald_n,
372 | 			           	Scalar4 *d_ewaldC1, 
373 | 			           	const unsigned int *d_n_neigh,
374 |                                    	const unsigned int *d_nlist,
375 |                                    	const unsigned int *d_headlist,
376 | 			           	int& m,
377 | 				   	Scalar cheb_error,
378 | 			           	dim3 grid,
379 | 			           	dim3 threads,
380 | 			           	int gridBlockSize,
381 | 			           	int gridNBlock,
382 | 			           	Scalar3 gridh,
383 | 			           	Scalar self 
384 | 					){
385 | 
386 | 	// Dot product kernel specifications
387 | 	unsigned int thread_for_dot = 512; // Must be 2^n
388 | 	unsigned int grid_for_dot = (group_size/thread_for_dot) + 1;
389 | 
390 | 	// Temp var for dot product.
391 | 	Scalar *dot_sum;
392 | 	cudaMalloc( (void**)&dot_sum, grid_for_dot*sizeof(Scalar) );
393 | 
394 | 	// Allocate storage
395 | 	// 
396 | 	int m_in = m;
397 | 	int m_max = 100;
398 | 
399 |         // Storage vectors for tridiagonal factorization
400 | 	float *alpha, *beta, *alpha_save, *beta_save;
401 |         alpha = (float *)malloc( (m_max)*sizeof(float) );
402 |         alpha_save = (float *)malloc( (m_max)*sizeof(float) );
403 |         beta = (float *)malloc( (m_max+1)*sizeof(float) );
404 |         beta_save = (float *)malloc( (m_max+1)*sizeof(float) );
405 | 
406 | 	// Vectors for Lapacke and square root
407 | 	float *W;
408 | 	W = (float *)malloc( (m_max*m_max)*sizeof(float) );
409 | 	float *W1; // W1 = Lambda^(1/2) * ( W^T * e1 )
410 | 	W1 = (float *)malloc( (m_max)*sizeof(float) );
411 | 	float *Tm;
412 | 	Tm = (float *)malloc( m_max*sizeof(float) );
413 | 	Scalar *d_Tm;
414 | 	cudaMalloc( (void**)&d_Tm, m_max * sizeof(Scalar) );
415 | 
416 | 	// Vectors for Lanczos iterations
417 | 	Scalar4 *d_v, *d_vj, *d_vjm1;
418 | 	cudaMalloc( (void**)&d_v, group_size*sizeof(Scalar4) );
419 | 	cudaMalloc( (void**)&d_vj, group_size*sizeof(Scalar4) );
420 | 	cudaMalloc( (void**)&d_vjm1, group_size*sizeof(Scalar4) );
421 | 
422 | 	// Storage vector for M*vj
423 | 	Scalar4 *d_Mvj;
424 | 	cudaMalloc( (void**)&d_Mvj, group_size*sizeof(Scalar4) );
425 | 
426 | 	// Storage array for V
427 | 	Scalar4 *d_V;
428 | 	cudaMalloc( (void**)&d_V, m_max*group_size * sizeof(Scalar4) );
429 | 
430 | 	// Step-norm things
431 | 	Scalar4 *d_vel_old, *d_Mpsi;
432 | 	cudaMalloc( (void**)&d_vel_old, group_size*sizeof(Scalar4) );
433 | 	cudaMalloc( (void**)&d_Mpsi, group_size*sizeof(Scalar4) );
434 | 	Scalar psiMpsi;
435 | 
436 | 	// Temporary pointer
437 | 	Scalar4 *d_temp;
438 | 
439 | 	// Copy random vector to v0
440 | 	cudaMemcpy( d_vj, d_psi, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice );
441 | 	
442 | 	// Compute the norm of the d_psi (also the norm of basis vector v0)
443 |         Scalar vnorm;
444 | 	gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vj, d_vj, dot_sum, group_size, d_group_members);
445 | 	gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
446 | 	cudaMemcpy(&vnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
447 | 	vnorm = sqrtf( vnorm );
448 | 
449 | 	Scalar psinorm = vnorm;
450 | 
451 |     	// Compute psi * M * psi ( for step norm )
452 |     	gpu_stokes_Mreal_kernel<<<grid, threads>>>(d_pos, d_Mpsi, d_psi, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist );
453 |     	gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_psi, d_Mpsi, dot_sum, group_size, d_group_members);
454 |     	gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
455 |     	cudaMemcpy(&psiMpsi, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
456 | 
457 | 	psiMpsi = psiMpsi / ( psinorm * psinorm );
458 | 
459 |         // First iteration, vjm1 = 0, vj = psi / norm( psi )
460 | 	gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vj, d_vj, d_vjm1, 0.0, 0.0, group_size, d_group_members);
461 | 	gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vj, d_vj, d_vj, 1.0/vnorm, 0.0, group_size, d_group_members);
462 | 
463 | 	// Start by computing (m-1) iterations, so that the stepnorm for the given
464 | 	// number of iterations can be compute
465 | 	m = m_in - 1;
466 | 	m = m < 1 ? 1 : m;
467 | 
468 | 	// Values for current alpha and beta in the iteration
469 | 	Scalar tempalpha;
470 | 	Scalar tempbeta = 0.0;
471 | 
472 | 	// Apply the Lanczos method
473 | 	for ( int jj = 0; jj < m; ++jj ){
474 | 
475 | 		// Store current basis vector
476 | 		cudaMemcpy( &d_V[jj*group_size], d_vj, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice );
477 | 
478 | 		// Store beta
479 | 		beta[jj] = tempbeta;
480 | 
481 | 		// v = M*vj - betaj*vjm1
482 |     		gpu_stokes_Mreal_kernel<<<grid, threads>>>(d_pos, d_Mvj, d_vj, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist );
483 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_Mvj, d_vjm1, d_v, 1.0, -1.0*tempbeta, group_size, d_group_members);
484 | 
485 | 		// vj dot v
486 | 	        gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vj, d_v, dot_sum, group_size, d_group_members);
487 | 	        gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
488 | 	        cudaMemcpy(&tempalpha, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
489 | 
490 | 		// Store updated alpha
491 | 		alpha[jj] = tempalpha;
492 | 	
493 | 		// v = v - alphaj*vj
494 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_v, d_vj, d_v, 1.0, -1.0*tempalpha, group_size, d_group_members);
495 | 
496 | 		// v dot v 
497 | 	        gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_v, d_v, dot_sum, group_size, d_group_members);
498 | 	        gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
499 | 	        cudaMemcpy(&vnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
500 | 		vnorm = sqrtf( vnorm );
501 | 
502 | 		// betajp1 = norm( v )
503 | 		tempbeta = vnorm;
504 | 
505 | 		// Check that the basis vector is not too small. If so, end the iteration
506 | 		// (If norm is too small, will have numerical trouble)
507 | 		if ( vnorm < 1E-8 ){
508 | 		    m = jj;
509 | 		    break;
510 | 		}
511 | 
512 | 		// vjp1 = v / betajp1
513 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_v, d_v, d_v, 1.0/tempbeta, 0.0, group_size, d_group_members);
514 | 
515 | 		// Swap pointers
516 | 		d_temp = d_vjm1;
517 | 		d_vjm1 = d_vj;
518 | 		d_vj = d_v;
519 | 		d_v = d_temp;
520 | 		
521 | 	}
522 | 
523 | 	// Save alpha, beta vectors (will be overwritten by lapack)
524 | 	for ( int ii = 0; ii < m; ++ii ){
525 | 		alpha_save[ii] = alpha[ii];
526 | 		beta_save[ii] = beta[ii];
527 | 	}
528 | 	beta_save[m] = beta[m];
529 | 
530 | 	// Now that we have alpha, beta, have to compute the square root of the tridiagonal
531 | 	// matrix Tm. Do this using eigen-decomposition.
532 | 	//
533 | 	// Compute eigen-decomposition of tridiagonal matrix
534 | 	// 	alpha (input) - vector of entries on main diagonal
535 | 	//      alpha (output) - eigenvalues sorted in descending order
536 | 	//      beta (input) - vector of entries of sub-diagonal
537 | 	//      beta (output) - overwritten (zeros?)
538 | 	//      W - (output) - matrix of eigenvectors. ith column corresponds to ith eigenvalue
539 | 	// 	INFO (output) = 0 if operation was succesful
540 | 	int INFO = LAPACKE_spteqr( LAPACK_ROW_MAJOR, 'I', m, alpha, &beta[1], W, m );
541 | 
542 | 	// Check whether the eigen-decomposition failed, and throw error on failure
543 | 	if ( INFO != 0 ){
544 | 		printf("Eigenvalue decomposition #1 failed \n");
545 | 		printf("INFO = %i \n", INFO);
546 | 
547 | 		printf("\n alpha: \n");
548 | 		for( int ii = 0; ii < m; ++ii ){
549 | 		    printf("%f \n", alpha_save[ii]);
550 | 		} 
551 | 		printf("\n beta: \n");
552 | 		for( int ii = 0; ii < m; ++ii ){
553 | 		    printf("%f \n", beta_save[ii]);
554 | 		}
555 | 		printf("%f \n", beta_save[m]); 
556 | 	    
557 | 		printf("Note to User: restart simulation and proceed. \n");
558 | 
559 | 		exit(EXIT_FAILURE);
560 | 	}
561 | 
562 | 	// Now, we have to compute Tm^(1/2) * e1
563 | 	// 	Tm^(1/2) = W * Lambda^(1/2) * W^T * e1
564 | 	//	         = W * Lambda^(1/2) * ( W^T * e1 )
565 | 	// The quantity in parentheses is the first row of W 
566 | 	// Lambda^(1/2) only has diagonal entries, so it's product with the first row of W
567 | 	//     is easy to compute.
568 | 	for ( int ii = 0; ii < m; ++ii ){
569 | 	    W1[ii] = sqrtf( alpha[ii] ) * W[ii];
570 | 	}
571 | 
572 | 	// Tm = W * W1 = W * Lambda^(1/2) * W^T * e1
573 | 	float tempsum;
574 | 	for ( int ii = 0; ii < m; ++ii ){
575 | 	    tempsum = 0.0;
576 | 	    for ( int jj = 0; jj < m; ++jj ){
577 | 		int idx = m*ii + jj;
578 | 
579 | 		tempsum += W[idx] * W1[jj];
580 | 	    }
581 | 	    Tm[ii] = tempsum;
582 | 	}
583 | 
584 | 	// Copy matrix to GPU
585 | 	cudaMemcpy( d_Tm, Tm, m*sizeof(Scalar), cudaMemcpyHostToDevice );
586 | 
587 | 	// Multiply basis vectors by Tm, [ V0, V1, ..., Vm-1 ] * Tm
588 | 	gpu_stokes_MatVecMultiply_kernel<<<grid,threads>>>(d_V, d_Tm, d_vel, group_size, m);
589 | 
590 | 	// Copy velocity
591 | 	cudaMemcpy( d_vel_old, d_vel, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice );
592 | 
593 | 	// Restore alpha, beta
594 | 	for ( int ii = 0; ii < m; ++ii ){
595 | 		alpha[ii] = alpha_save[ii];
596 | 		beta[ii] = beta_save[ii];
597 | 	}
598 | 	beta[m] = beta_save[m];
599 | 
600 | 
601 | 	//
602 | 	// Keep adding to basis vectors until the step norm is small enough
603 | 	//
604 | 	Scalar stepnorm = 1.0;
605 | 	int jj;
606 | 	while( stepnorm > cheb_error && m < m_max ){
607 | 		m++;
608 | 		jj = m - 1;
609 | 
610 | 		//
611 | 		// Do another Lanczos iteration
612 | 		//
613 | 
614 | 		// Store the current basis vector
615 | 		cudaMemcpy( &d_V[jj*group_size], d_vj, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice );
616 | 
617 | 		// Store beta
618 | 		beta[jj] = tempbeta;
619 | 
620 | 		// v = M*vj - betaj*vjm1
621 | 		gpu_stokes_Mreal_kernel<<<grid, threads>>>(d_pos, d_Mvj, d_vj, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist );
622 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_Mvj, d_vjm1, d_v, 1.0, -1.0*tempbeta, group_size, d_group_members);
623 | 
624 | 		// vj dot v
625 | 	        gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vj, d_v, dot_sum, group_size, d_group_members);
626 | 	        gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
627 | 	        cudaMemcpy(&tempalpha, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
628 | 
629 | 		// Store updated alpha
630 | 		alpha[jj] = tempalpha;
631 | 	
632 | 		// v = v - alphaj*vj
633 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_v, d_vj, d_v, 1.0, -1.0*tempalpha, group_size, d_group_members);
634 | 
635 | 		// v dot v 
636 | 	        gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_v, d_v, dot_sum, group_size, d_group_members);
637 | 	        gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
638 | 	        cudaMemcpy(&vnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
639 | 		vnorm = sqrtf( vnorm );
640 | 
641 | 		// betajp1 = norm( v )
642 | 		tempbeta = vnorm; 
643 | 
644 | 		// Check if the norm of the basis vector is too small. If
645 | 		// so, end the iteration. 
646 | 		if ( vnorm < 1E-8 ){
647 | 		    m = jj;
648 | 		    break;
649 | 		}
650 | 
651 | 		// vjp1 = v / betajp1
652 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_v, d_v, d_v, 1.0/tempbeta, 0.0, group_size, d_group_members);
653 | 
654 | 		// Swap pointers
655 | 		d_temp = d_vjm1;
656 | 		d_vjm1 = d_vj;
657 | 		d_vj = d_v;
658 | 		d_v = d_temp;
659 | 			
660 | 		// Save alpha, beta vectors (will be overwritten by lapack)
661 | 		for ( int ii = 0; ii < m; ++ii ){
662 | 			alpha_save[ii] = alpha[ii];
663 | 			beta_save[ii] = beta[ii];
664 | 		}
665 | 		beta_save[m] = beta[m];
666 | 	
667 | 		//
668 | 		// Square root calculation with addition of latest Lanczos iteration
669 | 		// (see first implementation above more description)
670 | 		//
671 | 	
672 | 		// Compute eigen-decomposition of tridiagonal matrix
673 | 		int INFO = LAPACKE_spteqr( LAPACK_ROW_MAJOR, 'I', m, alpha, &beta[1], W, m );
674 | 
675 | 		// Check whether the eigen-decomposition failed, and throw error on failure
676 | 		if ( INFO != 0 ){
677 | 		    	printf("Eigenvalue decomposition #2 failed \n");
678 | 		    	printf("INFO = %i \n", INFO); 
679 | 	    
680 | 	    	    	printf("\n alpha: \n");
681 | 	    	    	for( int ii = 0; ii < m; ++ii ){
682 | 	    	    	    printf("%f \n", alpha_save[ii]);
683 | 	    	    	} 
684 | 	    	    	printf("\n beta: \n");
685 | 	    	    	for( int ii = 0; ii < m; ++ii ){
686 | 	    	    	    printf("%f \n", beta_save[ii]);
687 | 	    	    	}
688 | 		    	printf("%f \n", beta_save[m]); 
689 | 		
690 | 			printf("Note to User: restart simulation and proceed. \n");
691 | 	    
692 | 			exit(EXIT_FAILURE);
693 | 		}
694 | 
695 | 		// Now, we have to compute Tm^(1/2) * e1
696 | 		for ( int ii = 0; ii < m; ++ii ){
697 | 		    W1[ii] = sqrtf( alpha[ii] ) * W[ii];
698 | 		}
699 | 
700 | 		// Tm = W * W1 = W * Lambda^(1/2) * W^T * e1
701 | 		float tempsum;
702 | 		for ( int ii = 0; ii < m; ++ii ){
703 | 		    tempsum = 0.0;
704 | 		    for ( int jj = 0; jj < m; ++jj ){
705 | 			int idx = m*ii + jj;
706 | 
707 | 			tempsum += W[idx] * W1[jj];
708 | 		    }
709 | 		    Tm[ii] = tempsum;
710 | 		}
711 | 
712 | 		// Copy matrix to GPU
713 | 		cudaMemcpy( d_Tm, Tm, m*sizeof(Scalar), cudaMemcpyHostToDevice );
714 | 
715 | 		// Multiply basis vectors by Tm -- velocity = Vm * Tm
716 | 		gpu_stokes_MatVecMultiply_kernel<<<grid,threads>>>(d_V, d_Tm, d_vel, group_size, m);
717 | 
718 | 		// Compute step norm error
719 |     		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vel, d_vel_old, d_vel_old, 1.0, -1.0, group_size, d_group_members);
720 |         	gpu_stokes_DotStepOne_kernel<<< grid_for_dot, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(d_vel_old, d_vel_old, dot_sum, group_size, d_group_members);
721 |         	gpu_stokes_DotStepTwo_kernel<<< 1, thread_for_dot, thread_for_dot*sizeof(Scalar) >>>(dot_sum, grid_for_dot);
722 |         	cudaMemcpy(&stepnorm, dot_sum, sizeof(Scalar), cudaMemcpyDeviceToHost);
723 | 
724 | 		stepnorm = sqrtf( stepnorm / psiMpsi );
725 | 
726 | 		// Copy velocity
727 | 		cudaMemcpy( d_vel_old, d_vel, group_size*sizeof(Scalar4), cudaMemcpyDeviceToDevice );
728 | 
729 | 		// Restore alpha, beta
730 | 		for ( int ii = 0; ii < m; ++ii ){
731 | 			alpha[ii] = alpha_save[ii];
732 | 			beta[ii] = beta_save[ii];
733 | 		}
734 | 		beta[m] = beta_save[m];
735 | 
736 | 	}
737 | 
738 | 	// Rescale by original norm of Psi and include thermal variance
739 | 	gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vel, d_vel, d_vel, psinorm * sqrtf(2.0*T/dt), 0.0, group_size, d_group_members);
740 | 	
741 | 	//
742 | 	// Clean up
743 | 	//
744 | 	cudaFree(dot_sum);
745 | 	cudaFree(d_Mvj);
746 | 	cudaFree(d_v);
747 | 	cudaFree(d_vj);
748 | 	cudaFree(d_vjm1);
749 | 	cudaFree(d_V);
750 | 	cudaFree(d_Tm);
751 | 	cudaFree(d_vel_old);
752 | 	cudaFree(d_Mpsi);
753 | 
754 | 	d_temp = NULL;
755 | 
756 | 	free(alpha);
757 | 	free(beta);
758 | 	free(alpha_save);
759 | 	free(beta_save);
760 | 
761 | 	free(W);
762 | 	free(W1);
763 | 	free(Tm);
764 | 	
765 | }
766 | 
767 | 
768 | // Wrap up everything to compute mobility AND brownian if necessary
769 | // 	- Combine Fourier components of Deterministic and Brownian calculation
770 | //		in order to save extra FFTs and contraction operations
771 | //      - Add deterministic and stochastic real space contributions
772 | void gpu_stokes_CombinedMobilityBrownian_wrap(  
773 | 				Scalar4 *d_pos,
774 | 				Scalar4 *d_net_force,
775 |                                 unsigned int *d_group_members,
776 |                                 unsigned int group_size,
777 |                                 const BoxDim& box,
778 |                                 Scalar dt,
779 | 			        Scalar4 *d_vel,
780 | 			        const Scalar T,
781 | 			        const unsigned int timestep,
782 | 			        const unsigned int seed,
783 | 			        Scalar xi,
784 | 				Scalar eta,
785 | 				Scalar P,
786 | 			        Scalar ewald_cut,
787 | 			        Scalar ewald_dr,
788 | 			        int ewald_n,
789 | 			        Scalar4 *d_ewaldC1, 
790 | 			        Scalar4 *d_gridk,
791 | 			        CUFFTCOMPLEX *d_gridX,
792 | 			        CUFFTCOMPLEX *d_gridY,
793 | 			        CUFFTCOMPLEX *d_gridZ,
794 | 			        cufftHandle plan,
795 | 			        const int Nx,
796 | 			        const int Ny,
797 | 			        const int Nz,
798 | 			        const unsigned int *d_n_neigh,
799 |                                 const unsigned int *d_nlist,
800 |                                 const unsigned int *d_headlist,
801 | 			        int& m_Lanczos,
802 | 			        const unsigned int N_total,
803 | 			        unsigned int NxNyNz,
804 | 			        dim3 grid,
805 | 			        dim3 threads,
806 | 			        int gridBlockSize,
807 | 			        int gridNBlock,
808 | 				Scalar3 gridh,
809 | 			        Scalar cheb_error,
810 | 				Scalar self ){
811 | 
812 | 	// Real space velocity to add
813 | 	Scalar4 *d_vel2;
814 | 	cudaMalloc( (void**)&d_vel2, group_size*sizeof(Scalar4) );
815 | 	
816 | 	// Generate uniform distribution (-1,1) on d_psi
817 | 	Scalar4 *d_psi;
818 | 	cudaMalloc( (void**)&d_psi, group_size*sizeof(Scalar4) );
819 | 	gpu_stokes_BrownianGenerate_kernel<<<grid, threads>>>( d_psi, group_size, d_group_members, timestep, seed );
820 | 	
821 | 	// Spreading and contraction grid information and parameters
822 | 	dim3 Cgrid( group_size, 1, 1);
823 | 	int B = ( P < 10 ) ? P : 10;
824 | 	dim3 Cthreads(B, B, B);
825 | 	
826 | 	Scalar quadW = gridh.x * gridh.y * gridh.z;
827 | 	Scalar xisq = xi * xi;
828 | 	Scalar prefac = ( 2.0 * xisq / 3.1415926536 / eta ) * sqrtf( 2.0 * xisq / 3.1415926536 / eta );
829 | 	Scalar expfac = 2.0 * xisq / eta;
830 | 	
831 | 	// ********************************************
832 | 	// Wave Space Part of Deterministic Calculation
833 | 	// ********************************************
834 | 	
835 | 	// Reset the grid (remove any previously distributed forces)
836 | 	gpu_stokes_ZeroGrid_kernel<<<gridNBlock,gridBlockSize>>>(d_gridX,NxNyNz);
837 | 	gpu_stokes_ZeroGrid_kernel<<<gridNBlock,gridBlockSize>>>(d_gridY,NxNyNz);
838 | 	gpu_stokes_ZeroGrid_kernel<<<gridNBlock,gridBlockSize>>>(d_gridZ,NxNyNz);
839 | 	
840 | 	// Spread forces onto grid
841 | 	gpu_stokes_Spread_kernel<<<Cgrid, Cthreads>>>( d_pos, d_net_force, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, d_group_members, box, P, gridh, xi, eta, prefac, expfac );
842 | 	
843 | 	// Perform FFT on gridded forces
844 | 	cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_FORWARD);
845 | 	cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_FORWARD);
846 | 	cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_FORWARD);
847 | 	
848 | 	// Apply wave space scaling to FFT'd forces
849 | 	gpu_stokes_Green_kernel<<<gridNBlock,gridBlockSize>>>( d_gridX, d_gridY, d_gridZ, d_gridk, NxNyNz);
850 | 	
851 | 	
852 | 	// ***************************************
853 | 	// Wave Space Part of Brownian Calculation
854 | 	// ***************************************
855 | 	if ( T > 0.0 ){
856 | 	
857 | 		// Apply random fluctuations to wave space grid
858 | 		gpu_stokes_BrownianGridGenerate_kernel<<<gridNBlock,gridBlockSize>>>( d_gridX, d_gridY, d_gridZ, d_gridk, NxNyNz, Nx, Ny, Nz, timestep, seed, T, dt, quadW );
859 | 	
860 | 	}
861 | 	
862 | 	// ************************************
863 | 	// Finish the Wave Space Calculation
864 | 	// ************************************
865 | 	
866 | 	// Return rescaled forces to real space
867 | 	cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_INVERSE);
868 | 	cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_INVERSE);
869 | 	cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_INVERSE);
870 | 	
871 | 	// Evaluate contribution of grid velocities at particle centers
872 | 	gpu_stokes_Contract_kernel<<<Cgrid, Cthreads, (B*B*B+1)*sizeof(float3)>>>( d_pos, d_vel, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, xi, eta, d_group_members, box, P, gridh, quadW*prefac, expfac );
873 | 	
874 | 	// ***************************************
875 | 	// Real Space Part of Both Calculations
876 | 	// ***************************************
877 | 	
878 | 	// Deterministic part
879 | 	gpu_stokes_Mreal_kernel<<<grid, threads>>>(d_pos, d_vel2, d_net_force, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist );
880 | 	
881 | 	// Add to velocity
882 | 	gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vel2, d_vel, d_vel, 1.0, 1.0, group_size, d_group_members);
883 | 	
884 | 	// Stochastic
885 | 	if ( T > 0.0 ){
886 | 	
887 | 		gpu_stokes_BrealLanczos_wrap( 	d_psi,
888 | 						d_pos,
889 | 						d_group_members,
890 | 						group_size,
891 | 						box,
892 | 						dt,
893 | 						d_vel2,
894 | 						T,
895 | 						timestep,
896 | 						seed,
897 | 						xi,
898 | 						ewald_cut,
899 | 						ewald_dr,
900 | 						ewald_n,
901 | 						d_ewaldC1, 
902 | 						d_n_neigh,
903 | 						d_nlist,
904 | 						d_headlist,
905 | 						m_Lanczos,
906 | 			    			cheb_error,
907 | 						grid,
908 | 						threads,
909 | 						gridBlockSize,
910 | 						gridNBlock,
911 | 						gridh,
912 | 			    			self );
913 | 	
914 | 		// Add to velocity
915 | 		gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vel2, d_vel, d_vel, 1.0, 1.0, group_size, d_group_members);
916 | 	
917 | 	}
918 | 	
919 | 	// Free Memory
920 | 	cudaFree( d_vel2 );
921 | 	cudaFree( d_psi );
922 | 
923 | }
924 | 
925 | 


--------------------------------------------------------------------------------
/PSEv1/Brownian.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Andrew Fiore
 52 | 
 53 | /*! \file Brownian.cuh
 54 |     \brief Declares GPU kernel codes for Brownian Calculations.
 55 | */
 56 | #include "hoomd/ParticleData.cuh"
 57 | #include "hoomd/HOOMDMath.h"
 58 | #include "hoomd/Index1D.h"
 59 | 
 60 | #include <cufft.h>
 61 | 
 62 | //! Define the kernel
 63 | #ifndef __BROWNIAN_CUH__
 64 | #define __BROWNIAN_CUH__
 65 | 
 66 | //! Definition for complex variable storage
 67 | #ifdef SINGLE_PRECISION
 68 | #define CUFFTCOMPLEX cufftComplex
 69 | #else
 70 | #define CUFFTCOMPLEX cufftComplex
 71 | #endif
 72 | 
 73 | __global__ void gpu_stokes_BrownianGenerate_kernel(
 74 | 				Scalar4 *d_psi,
 75 | 				unsigned int group_size,
 76 | 				unsigned int *d_group_members,
 77 | 				const unsigned int timestep, 
 78 | 				const unsigned int seed 
 79 | 				);
 80 | 
 81 | __global__ void gpu_stokes_BrownianGridGenerate_kernel(  
 82 | 					CUFFTCOMPLEX *gridX,
 83 | 					CUFFTCOMPLEX *gridY,
 84 | 					CUFFTCOMPLEX *gridZ,
 85 | 					Scalar4 *gridk,
 86 | 				        unsigned int NxNyNz,
 87 | 					int Nx,
 88 | 					int Ny,
 89 | 					int Nz,
 90 | 				        const unsigned int timestep, 
 91 | 				        const unsigned int seed,
 92 | 					Scalar T,
 93 | 					Scalar dt,
 94 | 					Scalar quadW 
 95 | 					);
 96 | 
 97 | void gpu_stokes_CombinedMobilityBrownian_wrap( 
 98 | 				Scalar4 *d_pos,
 99 | 				Scalar4 *d_net_force,
100 |                                 unsigned int *d_group_members,
101 |                                 unsigned int group_size,
102 |                                 const BoxDim& box,
103 |                                 Scalar dt,
104 | 			        Scalar4 *d_vel,
105 | 			        const Scalar T,
106 | 			        const unsigned int timestep,
107 | 			        const unsigned int seed,
108 | 			        Scalar xi,
109 | 				Scalar eta,
110 | 				Scalar P,
111 | 			        Scalar ewald_cut,
112 | 			        Scalar ewald_dr,
113 | 			        int ewald_n,
114 | 			        Scalar4 *d_ewaldC1, 
115 | 			        Scalar4 *d_gridk,
116 | 			        CUFFTCOMPLEX *d_gridX,
117 | 			        CUFFTCOMPLEX *d_gridY,
118 | 			        CUFFTCOMPLEX *d_gridZ,
119 | 			        cufftHandle plan,
120 | 			        const int Nx,
121 | 			        const int Ny,
122 | 			        const int Nz,
123 | 			        const unsigned int *d_n_neigh,
124 |                                 const unsigned int *d_nlist,
125 |                                 const unsigned int *d_headlist,
126 | 			        int& m_Lanczos,
127 | 			        const unsigned int N_total,
128 | 			        unsigned int NxNyNz,
129 | 			        dim3 grid,
130 | 			        dim3 threads,
131 | 			        int gridBlockSize,
132 | 			        int gridNBlock,
133 | 				Scalar3 gridh,
134 | 			        Scalar cheb_error,
135 | 				Scalar self 
136 | 				);
137 | 
138 | #endif
139 | 


--------------------------------------------------------------------------------
/PSEv1/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Maintainer: Andrew M. Fiore
  2 | 
  3 | set(COMPONENT_NAME PSEv1)
  4 | 
  5 | set(_${COMPONENT_NAME}_sources
  6 |     module.cc
  7 |     Stokes.cc
  8 |     ShearFunction.cc
  9 |     ShearFunctionWrap.cc
 10 |     SpecificShearFunction.cc
 11 |     VariantShearFunction.cc
 12 |     )
 13 | 
 14 | set(_${COMPONENT_NAME}_cu_sources
 15 |     Stokes.cu
 16 |     Brownian.cu
 17 |     Helper.cu
 18 |     Mobility.cu
 19 |     )
 20 | 
 21 | if (ENABLE_CUDA)
 22 | CUDA_COMPILE(_CUDA_GENERATED_FILES ${_${COMPONENT_NAME}_cu_sources} OPTIONS ${CUDA_ADDITIONAL_OPTIONS} SHARED)
 23 | endif (ENABLE_CUDA)
 24 | 
 25 | pybind11_add_module (_${COMPONENT_NAME} SHARED ${_${COMPONENT_NAME}_sources} ${_CUDA_GENERATED_FILES} NO_EXTRAS)
 26 | if (APPLE)
 27 | set_target_properties(_${COMPONENT_NAME} PROPERTIES INSTALL_RPATH "@loader_path/..;@loader_path")
 28 | else()
 29 | set_target_properties(_${COMPONENT_NAME} PROPERTIES INSTALL_RPATH "\$ORIGIN/..;\$ORIGIN")
 30 | endif()
 31 | 
 32 | # Find additional libraries to be linked for the plugin
 33 | find_library( LAPACKE_LIBRARIES lapacke PATHS /usr/local/lapack-3.6.0/lib/ )
 34 | find_library( LAPACK_LIBRARIES lapack PATHS /usr/local/lapack-3.6.0/lib/ )
 35 | find_library( BLAS_LIBRARIES blas PATHS /usr/local/lapack-3.6.0/lib/ )
 36 | find_library( CBLAS_LIBRARIES cblas PATHS /usr/local/lapack-3.6.0/lib/ )
 37 | if( LAPACKE_LIBRARIES-NOTFOUND )
 38 |         message(FATAL_ERROR "lapacke libraries not found")
 39 | endif( LAPACKE_LIBRARIES-NOTFOUND )
 40 | message(STATUS  "found lapacke libraries: ${LAPACKE_LIBRARIES}")
 41 | if( LAPACK_LIBRARIES-NOTFOUND )
 42 |         message(FATAL_ERROR "lapack libraries not found")
 43 | endif( LAPACK_LIBRARIES-NOTFOUND )
 44 | message(STATUS  "found lapack libraries: ${LAPACK_LIBRARIES}")
 45 | set( LAPACK_LIBRARIES ${LAPACKE_LIBRARIES} ${LAPACK_LIBRARIES} ${CBLAS_LIBRARIES}  ${BLAS_LIBRARIES} )
 46 | 
 47 | # link the library to its dependencies
 48 | target_link_libraries(_${COMPONENT_NAME} PRIVATE ${HOOMD_LIBRARIES} ${LAPACK_LIBRARIES})
 49 | 
 50 | # if we are compiling with MPI support built in, set appropriate
 51 | # compiler/linker flags
 52 | if (ENABLE_MPI)
 53 |    if(MPI_COMPILE_FLAGS)
 54 |        set_target_properties(_${COMPONENT_NAME} PROPERTIES COMPILE_FLAGS "${MPI_CXX_COMPILE_FLAGS}")
 55 |    endif(MPI_COMPILE_FLAGS)
 56 |    if(MPI_LINK_FLAGS)
 57 |        set_target_properties(_${COMPONENT_NAME} PROPERTIES LINK_FLAGS "${MPI_CXX_LINK_FLAGS}")
 58 |    endif(MPI_LINK_FLAGS)
 59 | endif(ENABLE_MPI)
 60 | 
 61 | fix_cudart_rpath(_${COMPONENT_NAME})
 62 | 
 63 | # install the library
 64 | install(TARGETS _${COMPONENT_NAME}
 65 |         LIBRARY DESTINATION ${PYTHON_MODULE_BASE_DIR}/${COMPONENT_NAME}
 66 |         )
 67 | 
 68 | ################ Python only modules
 69 | # copy python modules to the build directory to make it a working python package
 70 | MACRO(copy_file file)
 71 |     add_custom_command (
 72 |         OUTPUT ${file}
 73 |         DEPENDS ${file}
 74 |         POST_BUILD
 75 |         COMMAND    ${CMAKE_COMMAND}
 76 |         ARGS       -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${file} ${CMAKE_CURRENT_BINARY_DIR}/${file}
 77 |         COMMENT    "Copy hoomd/${COMPONENT_NAME}/${file}"
 78 |     )
 79 | ENDMACRO(copy_file)
 80 | 
 81 | set(files
 82 |     __init__.py
 83 |     integrate.py
 84 |     shear_function.py
 85 |     variant.py
 86 |     )
 87 | 
 88 | install(FILES ${files}
 89 |         DESTINATION ${PYTHON_MODULE_BASE_DIR}/${COMPONENT_NAME}
 90 |        )
 91 | 
 92 | foreach(file ${files})
 93 |     copy_file(${file})
 94 | endforeach()
 95 | 
 96 | add_custom_target(copy_${COMPONENT_NAME} ALL DEPENDS ${files})
 97 | 
 98 | if (BUILD_TESTING)
 99 |     add_subdirectory(test-py)
100 | endif()
101 | 


--------------------------------------------------------------------------------
/PSEv1/Helper.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Gang Wang
 52 | // Modified by Andrew Fiore
 53 | 
 54 | 
 55 | #include "Helper.cuh"
 56 | 
 57 | #include "hoomd/TextureTools.h"
 58 | 
 59 | #include <stdio.h>
 60 | 
 61 | #ifdef WIN32
 62 | #include <cassert>
 63 | #else
 64 | #include <assert.h>
 65 | #endif
 66 | 
 67 | //! command to convert floats or doubles to integers
 68 | #ifdef SINGLE_PRECISION
 69 | #define __scalar2int_rd __float2int_rd
 70 | #else
 71 | #define __scalar2int_rd __double2int_rd
 72 | #endif
 73 | 
 74 | 
 75 | /*! \file Helper.cu
 76 |     	\brief Helper functions to perform additions, dot products, etc., for Mobility and Brownian
 77 | */
 78 | 
 79 | //! Shared memory array for partial sum of dot product kernel
 80 | extern __shared__ Scalar partial_sum[];
 81 | 
 82 | //! Zero out the force grid
 83 | /*! 
 84 | 	\param grid the grid going to be zero out
 85 |    	\param NxNyNz dimension of the grid
 86 | */
 87 | __global__
 88 | void gpu_stokes_ZeroGrid_kernel(CUFFTCOMPLEX *grid, unsigned int NxNyNz) {
 89 | 
 90 | 	unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
 91 | 	
 92 | 	if ( tid < NxNyNz ) {
 93 | 	
 94 | 		grid[tid] = make_scalar2( 0.0, 0.0 );  
 95 | 	
 96 | 	}
 97 | }
 98 | 
 99 | /*!
100 | 	Linear combination helper function
101 | 	C = a*A + b*B
102 | 	C can be A or B, so that A or B will be overwritten
103 | 	The fourth element of Scalar4 is not changed!
104 | 
105 | 	\param d_a              input vector, A
106 | 	\param d_b              input vector, B
107 | 	\param d_c              output vector, C
108 | 	\param coeff_a          scaling factor for A, a
109 | 	\param coeff_b          scaling factor for B, b
110 | 	\param group_size       length of vectors
111 | 	\param d_group_members  index into vectors
112 | */
113 | __global__ void gpu_stokes_LinearCombination_kernel(
114 | 						Scalar4 *d_a,
115 | 						Scalar4 *d_b,
116 | 						Scalar4 *d_c,
117 | 						Scalar coeff_a,
118 | 						Scalar coeff_b,
119 | 						unsigned int group_size,
120 | 						unsigned int *d_group_members
121 | 						){
122 | 
123 | 	int group_idx = blockDim.x * blockIdx.x + threadIdx.x;
124 | 	if (group_idx < group_size) {
125 | 		unsigned int idx = d_group_members[group_idx];
126 | 		Scalar4 A4 = d_a[idx];
127 | 		Scalar4 B4 = d_b[idx];
128 | 		Scalar3 A = make_scalar3(A4.x, A4.y, A4.z);
129 | 		Scalar3 B = make_scalar3(B4.x, B4.y, B4.z);
130 | 		A = coeff_a * A + coeff_b * B;
131 | 		d_c[idx] = make_scalar4(A.x, A.y, A.z, d_c[idx].w);
132 | 	}
133 | }
134 | 
135 | /*!
136 | 	Dot product helper function: First step
137 | 	d_a .* d_b -> d_c -> Partial sum
138 | 	BlockDim of this kernel should be 2^n, which is 512. (Based on HOOMD ComputeThermoGPU class)
139 | 	
140 | 	\param d_a              first vector in dot product
141 | 	\param d_b              second vector in dot product
142 | 	\param dot_sum          partial dot product sum
143 | 	\param group_size       length of vectors a and b
144 |         \param d_group_members  index into vectors
145 | */
146 | __global__ void gpu_stokes_DotStepOne_kernel(
147 | 					Scalar4 *d_a, 
148 | 					Scalar4 *d_b, 
149 | 					Scalar *dot_sum, 
150 | 					unsigned int group_size, 
151 | 					unsigned int *d_group_members
152 | 					){
153 | 
154 | 	int group_idx = blockDim.x * blockIdx.x + threadIdx.x;
155 | 	Scalar temp;
156 | 
157 | 	if (group_idx < group_size) {
158 | 
159 | 		unsigned int idx = d_group_members[group_idx];
160 | 		Scalar4 a4 = d_a[idx];
161 | 		Scalar4 b4 = d_b[idx];
162 | 		Scalar3 a = make_scalar3(a4.x, a4.y, a4.z);
163 | 		Scalar3 b = make_scalar3(b4.x, b4.y, b4.z);
164 | 
165 | 		temp = dot(a,b); // Partial sum, each thread, shared memory
166 | 
167 | 	}
168 | 	else {
169 | 		temp = 0;
170 | 	}
171 | 
172 | 	partial_sum[threadIdx.x] = temp;
173 | 
174 | 	__syncthreads();
175 | 
176 | 	int offs = blockDim.x >> 1;
177 | 
178 | 	while (offs > 0)
179 |         {
180 |         	if (threadIdx.x < offs)
181 |             	{
182 |             		partial_sum[threadIdx.x] += partial_sum[threadIdx.x + offs];
183 |             	}
184 |         	offs >>= 1;
185 |         	__syncthreads();
186 |         }
187 | 
188 | 	if (threadIdx.x == 0){
189 | 		dot_sum[blockIdx.x] = partial_sum[0];
190 | 	}
191 | }
192 | 
193 | 
194 | 
195 | /*!
196 | 	Dot product helper function: Second step
197 | 	Partial sum -> Final sum
198 | 	Only one block will be launched for this step
199 | 
200 | 	\param dot_sum           partial sum from first dot product kernel
201 | 	\param num_partial_sums  length of dot_sum array
202 | 
203 | */
204 | __global__ void gpu_stokes_DotStepTwo_kernel(
205 | 					Scalar *dot_sum, 
206 | 					unsigned int num_partial_sums
207 | 					){
208 | 
209 | 	partial_sum[threadIdx.x] = 0.0;
210 | 	__syncthreads();
211 | 	for (unsigned int start = 0; start < num_partial_sums; start += blockDim.x)
212 |        	{
213 |         	if (start + threadIdx.x < num_partial_sums)
214 |             	{
215 |             		partial_sum[threadIdx.x] += dot_sum[start + threadIdx.x];
216 |             	}
217 | 	}
218 | 
219 | 	int offs = blockDim.x >> 1;
220 | 	while (offs > 0)
221 |        	{
222 | 		__syncthreads();
223 |             	if (threadIdx.x < offs)
224 |                 {
225 |                 	partial_sum[threadIdx.x] += partial_sum[threadIdx.x + offs];
226 |                 }
227 |             	offs >>= 1;
228 |             	
229 |         }
230 | 	__syncthreads();
231 |         if (threadIdx.x == 0)
232 | 	{
233 |             	dot_sum[0] = partial_sum[0]; // Save the dot product to the first element of dot_sum array
234 | 	}
235 | 
236 | }
237 | 
238 | 
239 | /*!
240 | 
241 | 	Perform matrix-vector multiply needed for the Lanczos contribution to the Brownian velocity
242 | 
243 | 	\param d_A 		matrix, N x m
244 | 	\param d_x		multiplying vector, m x 1
245 | 	\param d_b		result vector, A*x, m x 1
246 | 	\param group_size	number of particles
247 | 	\param m		number of iterations ( number of columns of A, length of x )
248 | 
249 | */
250 | 
251 | __global__ void gpu_stokes_MatVecMultiply_kernel(
252 | 						Scalar4 *d_A, 
253 | 						Scalar *d_x, 
254 | 						Scalar4 *d_b, 
255 | 						unsigned int group_size, 
256 | 						int m
257 | 						){
258 | 
259 | 	int idx = blockDim.x * blockIdx.x + threadIdx.x;
260 | 	if (idx < group_size) {
261 | 
262 | 		Scalar3 tempprod = make_scalar3( 0.0, 0.0, 0.0 );
263 | 
264 | 		for ( int ii = 0; ii < m; ++ii ){
265 | 
266 | 		    Scalar4 matidx = d_A[ idx + ii*group_size ];
267 | 
268 | 		    Scalar xcurr = d_x[ii];
269 | 
270 | 		    tempprod.x = tempprod.x + matidx.x * xcurr;
271 | 		    tempprod.y = tempprod.y + matidx.y * xcurr;
272 | 		    tempprod.z = tempprod.z + matidx.z * xcurr;
273 | 
274 | 		}
275 | 
276 | 		d_b[idx] = make_scalar4( tempprod.x, tempprod.y, tempprod.z, d_A[idx].w );
277 | 
278 | 	}
279 | }
280 | 
281 | /*!
282 |   Kernel function to calculate position of each grid in reciprocal space: gridk
283 |   */
284 | __global__
285 | void gpu_stokes_SetGridk_kernel(
286 | 				Scalar4 *gridk,
287 |                                 int Nx,
288 |                                 int Ny,
289 |                                 int Nz,
290 |                                 unsigned int NxNyNz,
291 |                                 BoxDim box,
292 |                                 Scalar xi,
293 | 				Scalar eta
294 | 				){
295 | 
296 |         int tid = blockDim.x * blockIdx.x + threadIdx.x;
297 | 
298 |         if ( tid < NxNyNz ) {
299 | 
300 |                 int i = tid / (Ny*Nz);
301 |                 int j = (tid - i * Ny * Nz) / Nz;
302 |                 int k = tid % Nz;
303 | 
304 |                 Scalar3 L = box.getL();
305 |                 Scalar xy = box.getTiltFactorXY();
306 |                 Scalar4 gridk_value;
307 | 
308 |                 gridk_value.x = (i < (Nx+1) / 2) ? i : i - Nx;
309 |                 gridk_value.y = ( ((j < (Ny+1) / 2) ? j : j - Ny) - xy * gridk_value.x * L.y / L.x ) / L.y; // Fixed by Zsigi 2015
310 |                 gridk_value.x = gridk_value.x / L.x;
311 |                 gridk_value.z = ((k < (Nz+1) / 2) ? k : k - Nz) / L.z;
312 | 
313 | 		gridk_value.x *= 2.0*3.1416926536;
314 | 		gridk_value.y *= 2.0*3.1416926536;
315 | 		gridk_value.z *= 2.0*3.1416926536;
316 | 
317 |                 Scalar k2 = gridk_value.x*gridk_value.x + gridk_value.y*gridk_value.y + gridk_value.z*gridk_value.z;
318 | 		Scalar xisq = xi * xi;
319 | 
320 | 		// Scaling factor used in wave space sum
321 | 		if (i == 0 && j == 0 && k == 0){
322 | 			gridk_value.w = 0.0;
323 | 		}
324 | 		else{
325 | 			// Have to divide by Nx*Ny*Nz to normalize the FFTs
326 | 			gridk_value.w = 6.0*3.1415926536 * (1.0 + k2/4.0/xisq) * expf( -(1-eta) * k2/4.0/xisq ) / ( k2 ) / Scalar( Nx*Ny*Nz );
327 | 		}
328 | 
329 |                 gridk[tid] = gridk_value;
330 | 
331 |         }
332 | }
333 | 
334 | 
335 | 


--------------------------------------------------------------------------------
/PSEv1/Helper.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
 4 | the University of Michigan All rights reserved.
 5 | 
 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
 7 | copyright is held, by various Contributors who have granted The Regents of the
 8 | University of Michigan the right to modify and/or distribute such Contributions.
 9 | 
10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 | and binary forms, provided you abide by the following conditions:
12 | 
13 | * Redistributions of source code must retain the above copyright notice, this
14 | list of conditions, and the following disclaimer both in the code and
15 | prominently in any materials provided with the distribution.
16 | 
17 | * Redistributions in binary form must reproduce the above copyright notice, this
18 | list of conditions, and the following disclaimer in the documentation and/or
19 | other materials provided with the distribution.
20 | 
21 | * All publications and presentations based on HOOMD-blue, including any reports
22 | or published results obtained, in whole or in part, with HOOMD-blue, will
23 | acknowledge its use according to the terms posted at the time of submission on:
24 | http://codeblue.umich.edu/hoomd-blue/citations.html
25 | 
26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 | http://codeblue.umich.edu/hoomd-blue/
28 | 
29 | * Apart from the above required attributions, neither the name of the copyright
30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 | promote products derived from this software without specific prior written
32 | permission.
33 | 
34 | Disclaimer
35 | 
36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
40 | 
41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 | */
49 | 
50 | // Maintainer: joaander
51 | // Modified by Andrew Fiore
52 | 
53 | /*! \file Helper.cuh
54 |     \brief Declares GPU kernel code for helper functions for the Brownian and Mobility calculations.
55 | */
56 | #include "hoomd/ParticleData.cuh"
57 | #include "hoomd/HOOMDMath.h"
58 | #include "hoomd/Index1D.h"
59 | 
60 | #include <cufft.h>
61 | 
62 | //! Define the step_one kernel
63 | #ifndef __HELPER_CUH__
64 | #define __HELPER_CUH__
65 | 
66 | //! Definition for comxplex variable storage
67 | #ifdef SINGLE_PRECISION
68 | #define CUFFTCOMPLEX cufftComplex
69 | #else
70 | #define CUFFTCOMPLEX cufftComplex
71 | #endif
72 | 
73 | __global__ void gpu_stokes_ZeroGrid_kernel(CUFFTCOMPLEX *grid, unsigned int NxNyNz);
74 | 
75 | __global__ void gpu_stokes_LinearCombination_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar4 *d_c, Scalar coeff_a, Scalar coeff_b, unsigned int group_size, unsigned int *d_group_members);
76 | 
77 | __global__ void gpu_stokes_DotStepOne_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar *dot_sum, unsigned int group_size, unsigned int *d_group_members);
78 | 
79 | __global__ void gpu_stokes_DotStepTwo_kernel(Scalar *dot_sum, unsigned int num_partial_sums);
80 | 
81 | __global__ void gpu_stokes_MatVecMultiply_kernel(Scalar4 *d_A, Scalar *d_x, Scalar4 *d_b, unsigned int group_size, int m);
82 | 
83 | __global__ void gpu_stokes_SetGridk_kernel(Scalar4 *gridk, int Nx, int Ny, int Nz, unsigned int NxNyNz, BoxDim box, Scalar xi, Scalar eta);
84 | 
85 | 
86 | #endif
87 | 


--------------------------------------------------------------------------------
/PSEv1/Mobility.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Andrew Fiore
 52 | 
 53 | 
 54 | #include "Mobility.cuh"
 55 | #include "Helper.cuh"
 56 | 
 57 | #include "hoomd/TextureTools.h"
 58 | 
 59 | #include <stdio.h>
 60 | 
 61 | #include <cuda_runtime.h>
 62 | #include <cublas_v2.h>
 63 | 
 64 | #ifdef WIN32
 65 | #include <cassert>
 66 | #else
 67 | #include <assert.h>
 68 | #endif
 69 | 
 70 | //! command to convert floats or doubles to integers
 71 | #ifdef SINGLE_PRECISION
 72 | #define __scalar2int_rd __float2int_rd
 73 | #else
 74 | #define __scalar2int_rd __double2int_rd
 75 | #endif
 76 | 
 77 | 
 78 | /*! \file Mobility.cu
 79 |     \brief Defines GPU kernel code for Mobility calculations.
 80 | */
 81 | 
 82 | //! Shared memory array for partial sum of dot product kernel
 83 | extern __shared__ Scalar partial_sum[];
 84 | extern __shared__ Scalar4 shared_Fpos[];
 85 | 
 86 | //! Texture for reading table values
 87 | scalar4_tex_t tables1_tex;
 88 | //! Texture for reading particle positions
 89 | scalar4_tex_t pos_tex;
 90 | 
 91 | //! Spread particle quantities to the grid ( ALL PARTICLES SAME SIZE ) -- give one block per particle
 92 | /*! \param d_pos            positions of the particles, actually they are fetched on texture memory
 93 |     \param d_net_force      net forces on the particles
 94 |     \param gridX            x-component of force moments projected onto grid
 95 |     \param gridY            y-component of force moments projected onto grid
 96 |     \param gridZ            z-component of force moments projected onto grid
 97 |     \param group_size       size of the group, i.e. number of particles
 98 |     \param Nx               number of grid nodes in x direction
 99 |     \param Ny               number of grid nodes in y direction
100 |     \param Nz               number of grid nodes in z direction
101 |     \param d_group_members  index array to global HOOMD tag on each particle
102 |     \param box              array containing box dimensions
103 |     \param P                number of grid nodes in support of spreading Gaussians
104 |     \param gridh            space between grid nodes in each dimension
105 |     \param xi               Ewald splitting parameter
106 |     \param eta              Spectral splitting parameter
107 |     \param prefac	    Spreading function prefactor
108 |     \param expfac	    Spreading function exponential factor
109 | 
110 |     One 3-D block of threads is launched per particle (block dimension = PxPxP). Max dimension
111 |     is 10x10x10. If P > 10, each thread will do more than one grid point worth of work. 
112 | 
113 | */
114 | __global__ void gpu_stokes_Spread_kernel( 	
115 | 				Scalar4 *d_pos,
116 | 			    	Scalar4 *d_net_force,
117 | 			    	CUFFTCOMPLEX *gridX,
118 | 			    	CUFFTCOMPLEX *gridY,
119 | 			    	CUFFTCOMPLEX *gridZ,
120 | 			    	int group_size,
121 | 			    	int Nx,
122 | 			    	int Ny,
123 | 			    	int Nz,
124 | 			    	unsigned int *d_group_members,
125 | 			    	BoxDim box,
126 | 			    	const int P,
127 | 			    	Scalar3 gridh,
128 | 			    	Scalar xi,
129 | 			    	Scalar eta,
130 | 				Scalar prefac,
131 | 				Scalar expfac 
132 | 				){
133 | 
134 | 	// Shared memory for particle force and position, so that each block
135 | 	// only has to read once
136 | 	__shared__ Scalar3 shared[2]; // 16 kb max
137 | 	
138 | 	Scalar3 *force_shared = shared;
139 | 	Scalar3 *pos_shared = &shared[1];
140 | 
141 | 	// Offset for the block (i.e. particle ID within group)	
142 | 	int group_idx = blockIdx.x;
143 | 
144 | 	// Offset for the thread (i.e. grid point ID within particle's support)
145 | 	int thread_offset = threadIdx.z + threadIdx.y * blockDim.z + threadIdx.x * blockDim.z*blockDim.y;
146 | 	
147 | 	// Global particle ID
148 | 	unsigned int idx = d_group_members[group_idx];
149 | 	
150 | 	// Initialize shared memory and get particle position
151 | 	if ( thread_offset == 0 ){
152 | 		Scalar4 tpos = texFetchScalar4(d_pos, pos_tex, idx);
153 | 		pos_shared[0].x = tpos.x; 
154 | 		pos_shared[0].y = tpos.y; 
155 | 		pos_shared[0].z = tpos.z;
156 | 		
157 | 		Scalar4 tforce = d_net_force[idx];
158 | 		force_shared[0].x = tforce.x;
159 | 		force_shared[0].y = tforce.y;
160 | 		force_shared[0].z = tforce.z;
161 | 	}
162 | 	__syncthreads();
163 | 	
164 | 	// Box dimension
165 | 	Scalar3 L = box.getL();
166 | 	Scalar3 Ld2 = L / 2.0;
167 | 	
168 | 	// Retrieve position from shared memory
169 | 	Scalar3 pos = pos_shared[0];
170 | 	Scalar3 force = force_shared[0];
171 | 	
172 | 	// Fractional position within box 
173 | 	Scalar3 pos_frac = box.makeFraction(pos);
174 | 	
175 | 	pos_frac.x *= (Scalar)Nx;
176 | 	pos_frac.y *= (Scalar)Ny;
177 | 	pos_frac.z *= (Scalar)Nz;
178 | 	
179 | 	// Grid index of floor of fractional position
180 | 	int x = int( pos_frac.x );
181 | 	int y = int( pos_frac.y );
182 | 	int z = int( pos_frac.z );
183 | 
184 | 	// Amount of work needed for each thread to cover support
185 | 	// (Required in case support size is larger than grid dimension,
186 | 	//  but in most cases, should have n.x = n.y = n.z = 1 )
187 | 	int3 n, t;
188 |         n.x = ( P + blockDim.x - 1 ) / blockDim.x; // ceiling
189 |         n.y = ( P + blockDim.y - 1 ) / blockDim.y;
190 |         n.z = ( P + blockDim.z - 1 ) / blockDim.z;
191 | 
192 | 	// Grid point associated with current thread
193 | 	int Pd2 = P/2; // integer division does floor
194 | 
195 | 	for ( int ii = 0; ii < n.x; ++ii ){
196 | 
197 | 		t.x = threadIdx.x + ii*blockDim.x;
198 | 
199 | 		for ( int jj = 0; jj < n.y; ++jj ){
200 | 
201 | 			t.y = threadIdx.y + jj*blockDim.y;
202 | 
203 | 			for ( int kk = 0; kk < n.z; ++kk ){
204 | 
205 | 				t.z = threadIdx.z + kk*blockDim.z;
206 | 
207 | 				if ( ( t.x < P ) && ( t.y < P ) && ( t.z < P ) ){
208 | 
209 | 					// x,y,z indices for current thread
210 | 					// 
211 | 					// Arithmetic with P makes sure distribution is centered on the particle
212 | 					int x_inp = x + t.x - Pd2 + 1 - (P % 2) * ( pos_frac.x - Scalar( x ) < 0.5  );
213 | 					int y_inp = y + t.y - Pd2 + 1 - (P % 2) * ( pos_frac.y - Scalar( y ) < 0.5  );
214 | 					int z_inp = z + t.z - Pd2 + 1 - (P % 2) * ( pos_frac.z - Scalar( z ) < 0.5  );
215 | 
216 | 					// Periodic wrapping of grid point
217 | 					x_inp = (x_inp<0) ? x_inp+Nx : ( (x_inp>Nx-1) ? x_inp-Nx : x_inp );
218 | 					y_inp = (y_inp<0) ? y_inp+Ny : ( (y_inp>Ny-1) ? y_inp-Ny : y_inp );
219 | 					z_inp = (z_inp<0) ? z_inp+Nz : ( (z_inp>Nz-1) ? z_inp-Nz : z_inp );
220 | 					
221 | 					// x,y,z coordinates for current thread
222 | 					Scalar3 pos_grid;
223 | 					pos_grid.x = gridh.x*x_inp - Ld2.x;
224 | 					pos_grid.y = gridh.y*y_inp - Ld2.y;
225 | 					pos_grid.z = gridh.z*z_inp - Ld2.z;
226 | 
227 | 					// Shear the grid position 
228 | 					// !!! This only works for linear shear where the shear gradient is along y
229 | 					//     and the shear direction is along x
230 | 					pos_grid.x = pos_grid.x + box.getTiltFactorXY() * pos_grid.y;
231 | 					
232 | 					// Global index for current grid point
233 | 					int grid_idx = x_inp * Ny * Nz + y_inp * Nz + z_inp;
234 | 					
235 | 					// Distance from particle to grid node
236 | 					Scalar3 r = pos_grid - pos;
237 | 					r = box.minImage(r);
238 | 					Scalar rsq = r.x*r.x + r.y*r.y + r.z*r.z;
239 | 					
240 | 					// Magnitude of the force contribution to the current grid node
241 | 					Scalar3 force_inp = prefac * expf( -expfac * rsq ) * force;
242 | 					
243 | 					// Add force to the grid
244 | 					atomicAdd( &(gridX[grid_idx].x), force_inp.x);
245 | 					atomicAdd( &(gridY[grid_idx].x), force_inp.y);
246 | 					atomicAdd( &(gridZ[grid_idx].x), force_inp.z);
247 | 				}// check thread is within support
248 | 			}// kk
249 | 		}// jj
250 | 	}// ii
251 | 
252 | }
253 | 
254 | //! Compute the velocity from the force moments on the grid (Same Size Particles)
255 | //
256 | //	This is the operator "B" from the paper
257 | //
258 | /*! \param gridX            x-component of force moments projected onto grid
259 |     \param gridY            y-component of force moments projected onto grid
260 |     \param gridZ            z-component of force moments projected onto grid
261 |     \param gridk            wave vector and scaling factor associated with each reciprocal grid node
262 |     \param NxNyNz           total number of grid nodes
263 | */
264 | __global__ void gpu_stokes_Green_kernel(
265 | 				CUFFTCOMPLEX *gridX, 
266 | 				CUFFTCOMPLEX *gridY, 
267 | 				CUFFTCOMPLEX *gridZ, 
268 | 				Scalar4 *gridk, 
269 | 				unsigned int NxNyNz
270 | 				) {
271 | 
272 | 	int tid = blockDim.x * blockIdx.x + threadIdx.x;
273 | 	
274 | 	if ( tid < NxNyNz ) {
275 | 	
276 | 	  // Read the FFT force from global memory
277 | 	  Scalar2 fX = gridX[tid];  
278 | 	  Scalar2 fY = gridY[tid];
279 | 	  Scalar2 fZ = gridZ[tid];
280 | 	
281 | 	  // Current wave-space vector 
282 | 	  Scalar4 tk = gridk[tid];
283 | 	  Scalar ksq = tk.x*tk.x + tk.y*tk.y + tk.z*tk.z;
284 | 	  Scalar k = sqrtf( ksq );
285 | 	
286 | 	  // Dot product of the wave-vector with the force 
287 | 	  Scalar2 kdF = (tid==0) ? make_scalar2(0.0,0.0) : make_scalar2( ( tk.x*fX.x + tk.y*fY.x + tk.z*fZ.x ) / ksq,  ( tk.x*fX.y + tk.y*fY.y + tk.z*fZ.y ) / ksq );
288 | 	
289 | 	  // Scaling factor
290 | 	  Scalar B = (tid==0) ? 0.0 : tk.w * ( sinf( k ) / k ) * ( sinf( k ) / k );
291 | 	
292 | 	  // Write the velocity to global memory
293 | 	  gridX[tid] = make_scalar2( ( fX.x - tk.x * kdF.x ) * B, ( fX.y - tk.x * kdF.y ) * B );
294 | 	  gridY[tid] = make_scalar2( ( fY.x - tk.y * kdF.x ) * B, ( fY.y - tk.y * kdF.y ) * B );
295 | 	  gridZ[tid] = make_scalar2( ( fZ.x - tk.z * kdF.x ) * B, ( fZ.y - tk.z * kdF.y ) * B );
296 | 	
297 | 	
298 | 	}
299 | }
300 | 
301 | //! Add velocity from grid to particles ( Same Size Particles, Block Per Particle (support) )
302 | /*! \param d_pos            positions of the particles, actually they are fetched on texture memory
303 |     \param d_net_force      net forces on the particles
304 |     \param d_vel            particle velocity
305 |     \param gridX            x-component of force moments projected onto grid
306 |     \param gridY            y-component of force moments projected onto grid
307 |     \param gridZ            z-component of force moments projected onto grid
308 |     \param group_size       size of the group, i.e. number of particles
309 |     \param Nx               number of grid nodes in x direction
310 |     \param Ny               number of grid nodes in y direction
311 |     \param Nz               number of grid nodes in z direction
312 |     \param xi               Ewald splitting parameter
313 |     \param eta              Spectral splitting parameter
314 |     \param d_group_members  index array to global HOOMD tag on each particle
315 |     \param box              array containing box dimensions
316 |     \param P                number of grid nodes in support of spreading Gaussians
317 |     \param gridh            space between grid nodes in each dimension
318 |     \param prefac	    Spreading function prefactor
319 |     \param expfac	    Spreading function exponential factor
320 | 
321 |     One 3-D block of threads is launched per particle (block dimension = PxPxP). Max dimension
322 |     is 10x10x10 because of shared memory limitations. If P > 10, each thread will do more 
323 |     than one grid point worth of work. 
324 | */
325 | __global__ void gpu_stokes_Contract_kernel( 	
326 | 					Scalar4 *d_pos,
327 | 				 	Scalar4 *d_vel,
328 | 				 	CUFFTCOMPLEX *gridX,
329 | 				 	CUFFTCOMPLEX *gridY,
330 | 				 	CUFFTCOMPLEX *gridZ,
331 | 				 	int group_size,
332 | 				 	int Nx,
333 | 				 	int Ny,
334 | 				 	int Nz,
335 | 				 	Scalar xi,
336 | 				 	Scalar eta,
337 | 				 	unsigned int *d_group_members,
338 | 				 	BoxDim box,
339 | 				 	const int P,
340 | 				 	Scalar3 gridh,
341 | 				 	Scalar prefac,
342 | 				 	Scalar expfac 
343 | 					){
344 | 
345 | 	// Shared memory for particle velocity and position, so that each block
346 | 	// only has to read one
347 | 	extern __shared__ Scalar3 shared[];
348 | 	
349 | 	Scalar3 *velocity = shared;
350 | 	Scalar3 *pos_shared = &shared[blockDim.x*blockDim.y*blockDim.z];
351 | 	
352 | 	// Particle index within each group (block per particle)
353 | 	int group_idx = blockIdx.x;
354 | 
355 | 	// Thread index within the block (grid point index)
356 | 	int thread_offset = threadIdx.z + threadIdx.y * blockDim.z + threadIdx.x * blockDim.z*blockDim.y;
357 | 
358 | 	// Total number of threads within the block
359 | 	int block_size = blockDim.x * blockDim.y * blockDim.z;
360 | 	
361 | 	// Global particle ID
362 | 	unsigned int idx = d_group_members[group_idx];
363 | 	
364 | 	// Initialize shared memory and get particle position
365 | 	velocity[thread_offset] = make_scalar3(0.0,0.0,0.0);
366 | 	if ( thread_offset == 0 ){
367 | 		Scalar4 tpos = texFetchScalar4(d_pos, pos_tex, idx);
368 | 		pos_shared[0] = make_scalar3( tpos.x, tpos.y, tpos.z ); 
369 | 	}
370 | 	__syncthreads();
371 | 	
372 | 	// Box dimension
373 | 	Scalar3 L = box.getL();
374 | 	Scalar3 Ld2 = L / 2.0;
375 | 	
376 | 	// Retrieve position from shared memory
377 | 	Scalar3 pos = pos_shared[0];
378 | 	
379 | 	// Fractional position within box 
380 | 	Scalar3 pos_frac = box.makeFraction(pos);
381 | 	
382 | 	pos_frac.x *= (Scalar)Nx;
383 | 	pos_frac.y *= (Scalar)Ny;
384 | 	pos_frac.z *= (Scalar)Nz;
385 | 	
386 | 	int x = int( pos_frac.x );
387 | 	int y = int( pos_frac.y );
388 | 	int z = int( pos_frac.z );
389 | 	
390 | 	// Amount of work needed for each thread to cover support
391 | 	// (Required in case support size is larger than grid dimension,
392 | 	//  but in most cases, should have n.x = n.y = n.z = 1 )
393 | 	int3 n, t;
394 |         n.x = ( P + blockDim.x - 1 ) / blockDim.x; // ceiling
395 |         n.y = ( P + blockDim.y - 1 ) / blockDim.y;
396 |         n.z = ( P + blockDim.z - 1 ) / blockDim.z;
397 |  
398 | 	// Grid point associated with current thread
399 | 	int Pd2 = P / 2; // integer division does floor
400 | 	
401 | 	for ( int ii = 0; ii < n.x; ++ii ){
402 | 
403 | 		t.x = threadIdx.x + ii*blockDim.x;
404 | 
405 | 		for ( int jj = 0; jj < n.y; ++jj ){
406 | 
407 | 			t.y = threadIdx.y + jj*blockDim.y;
408 | 
409 | 			for ( int kk = 0; kk < n.z; ++kk ){
410 | 
411 | 				t.z = threadIdx.z + kk*blockDim.z;
412 | 
413 | 				if( ( t.x < P ) && ( t.y < P ) && ( t.z < P ) ){
414 | 
415 | 					// x,y,z indices for current thread
416 | 					// 
417 | 					// Arithmetic with P makes sure distribution is centered on the particle
418 | 					int x_inp = x + t.x - Pd2 + 1 - (P % 2) * ( pos_frac.x - Scalar( x ) < 0.5  );
419 | 					int y_inp = y + t.y - Pd2 + 1 - (P % 2) * ( pos_frac.y - Scalar( y ) < 0.5  );
420 | 					int z_inp = z + t.z - Pd2 + 1 - (P % 2) * ( pos_frac.z - Scalar( z ) < 0.5  );
421 | 					
422 | 					// Periodic wrapping of grid point
423 | 					x_inp = (x_inp<0) ? x_inp+Nx : ( (x_inp>Nx-1) ? x_inp-Nx : x_inp );
424 | 					y_inp = (y_inp<0) ? y_inp+Ny : ( (y_inp>Ny-1) ? y_inp-Ny : y_inp );
425 | 					z_inp = (z_inp<0) ? z_inp+Nz : ( (z_inp>Nz-1) ? z_inp-Nz : z_inp );
426 | 					
427 | 					// x,y,z coordinates for current thread
428 | 					Scalar3 pos_grid;
429 | 					pos_grid.x = gridh.x*x_inp - Ld2.x;
430 | 					pos_grid.y = gridh.y*y_inp - Ld2.y;
431 | 					pos_grid.z = gridh.z*z_inp - Ld2.z;
432 | 
433 | 					// Shear the grid position 
434 | 					// !!! This only works for linear shear where the shear gradient is along y
435 | 					//     and the shear direction is along x
436 | 					pos_grid.x = pos_grid.x + box.getTiltFactorXY() * pos_grid.y;
437 | 					
438 | 					// Global index for current grid point
439 | 					int grid_idx = x_inp * Ny * Nz + y_inp * Nz + z_inp;
440 | 					
441 | 					// Distance from particle to grid node
442 | 					Scalar3 r = pos_grid - pos;
443 | 					r = box.minImage(r);
444 | 					Scalar rsq = r.x*r.x + r.y*r.y + r.z*r.z;
445 | 					
446 | 					// Spreading Factor
447 | 					Scalar Cfac = prefac * expf( -expfac * rsq );
448 | 					
449 | 					// Get velocity from reduction (THIS IS THE SLOW STEP):
450 | 					velocity[thread_offset] += Cfac * make_scalar3( gridX[grid_idx].x, gridY[grid_idx].x, gridZ[grid_idx].x );
451 | 				}
452 | 			}//kk
453 | 		}//jj
454 | 	}//ii
455 | 
456 | 	// Intra-block reduction for the total particle velocity
457 | 	// (add contributions from all grid points)
458 | 	int offs = block_size;
459 | 	int offs_prev; 
460 | 	while (offs > 1)
461 | 	{
462 | 	      offs_prev = offs; 
463 | 	      offs = ( offs + 1 ) / 2;
464 | 		__syncthreads();
465 | 	    	if (thread_offset + offs < offs_prev)
466 | 	        {
467 | 	        	velocity[thread_offset] += velocity[thread_offset + offs];
468 | 	        }
469 | 	    	
470 | 	}
471 | 	
472 | 	// Write out to global memory
473 | 	if (thread_offset == 0){
474 | 		d_vel[idx] = make_scalar4(velocity[0].x, velocity[0].y, velocity[0].z, d_vel[idx].w);
475 | 	}
476 | 	
477 | }
478 | 
479 | /*!
480 | 	Wrapper to drive all the kernel functions used to compute 
481 | 	the wave space part of Mobility ( Same Size Particles )
482 | 
483 | */
484 | /*! \param d_pos            positions of the particles, actually they are fetched on texture memory
485 |     \param d_vel            particle velocity
486 |     \param d_net_force      net forces on the particles
487 |     \param group_size       size of the group, i.e. number of particles
488 |     \param d_group_members  index array to global HOOMD tag on each particle
489 |     \param box              array containing box dimensions
490 |     \param xi               Ewald splitting parameter
491 |     \param eta              Spectral splitting parameter
492 |     \param ewald_cut        Cut-off distance for real-space interaction
493 |     \param ewald_dr         Distance spacing using in computing the pre-tabulated tables
494 |     \param ewald_n          Number of entries in the Ewald tables
495 |     \param d_ewaldC         Pre-tabulated form of the real-space Ewald sum for the Velocity-Force coupling
496 |     \param d_gridX          x-component of force moments projected onto grid
497 |     \param d_gridY          y-component of force moments projected onto grid
498 |     \param d_gridZ          z-component of force moments projected onto grid
499 |     \param d_gridk          wave vector and scaling factor associated with each reciprocal grid node
500 |     \param plan             Plan for cufft
501 |     \param Nx               Number of grid/FFT nodes in x-direction
502 |     \param Ny               Number of grid/FFT nodes in y-direction
503 |     \param Nz               Number of grid/FFT nodes in z-direction
504 |     \param d_n_neigh        list containing number of neighbors for each particle
505 |     \param d_nlist          list containing neighbors of each particle
506 |     \param nli              index into nlist
507 |     \param NxNyNz           total number of grid/FFT nodes
508 |     \param grid             block grid to use when launching kernels
509 |     \param threads          number of threads per block for kernels
510 |     \param gridBlockSize    number of threads per block
511 |     \param gridNBlock       number of blocks
512 |     \param P                number of nodes in support of each gaussian for k-space sum
513 |     \param gridh            distance between grid nodes
514 | */
515 | void gpu_stokes_Mwave_wrap( 
516 | 				Scalar4 *d_pos,
517 |                         	Scalar4 *d_vel,
518 |                         	Scalar4 *d_net_force,
519 | 				unsigned int *d_group_members,
520 | 				unsigned int group_size,
521 |                         	const BoxDim& box,
522 | 				Scalar xi,
523 | 				Scalar eta,
524 | 				Scalar4 *d_gridk,
525 | 				CUFFTCOMPLEX *d_gridX,
526 | 				CUFFTCOMPLEX *d_gridY,
527 | 				CUFFTCOMPLEX *d_gridZ,
528 | 				cufftHandle plan,
529 | 				const int Nx,
530 | 				const int Ny,
531 | 				const int Nz,
532 | 				unsigned int NxNyNz,
533 | 				dim3 grid,
534 | 				dim3 threads,
535 | 				int gridBlockSize,
536 | 				int gridNBlock,
537 | 				const int P,
538 | 				Scalar3 gridh 
539 | 				){
540 |     
541 | 	// Spreading and contraction stuff
542 | 	dim3 Cgrid( group_size, 1, 1);
543 | 	int B = ( P < 10 ) ? P : 10;
544 | 	dim3 Cthreads(B, B, B);
545 | 
546 | 	Scalar quadW = gridh.x * gridh.y * gridh.z;
547 | 	Scalar xisq = xi * xi;
548 | 	Scalar prefac = ( 2.0 * xisq / 3.1415926536 / eta ) * sqrtf( 2.0 * xisq / 3.1415926536 / eta );
549 | 	Scalar expfac = 2.0 * xisq / eta;
550 | 	
551 | 	// Reset the grid ( remove any previously distributed forces )
552 | 	gpu_stokes_ZeroGrid_kernel<<<gridNBlock,gridBlockSize>>>(d_gridX,NxNyNz);
553 | 	gpu_stokes_ZeroGrid_kernel<<<gridNBlock,gridBlockSize>>>(d_gridY,NxNyNz);
554 | 	gpu_stokes_ZeroGrid_kernel<<<gridNBlock,gridBlockSize>>>(d_gridZ,NxNyNz);
555 | 	
556 | 	// Spread forces onto grid
557 | 	gpu_stokes_Spread_kernel<<<Cgrid, Cthreads>>>( d_pos, d_net_force, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, d_group_members, box, P, gridh, xi, eta, prefac, expfac );
558 | 	
559 | 	// Perform FFT on gridded forces
560 | 	cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_FORWARD);
561 | 	cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_FORWARD);
562 | 	cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_FORWARD);
563 | 	
564 | 	// Apply wave space scaling to FFT'd forces
565 | 	gpu_stokes_Green_kernel<<<gridNBlock,gridBlockSize>>>( d_gridX, d_gridY, d_gridZ, d_gridk, NxNyNz);
566 | 	
567 | 	// Return rescaled forces to real space
568 | 	cufftExecC2C(plan, d_gridX, d_gridX, CUFFT_INVERSE);
569 | 	cufftExecC2C(plan, d_gridY, d_gridY, CUFFT_INVERSE);
570 | 	cufftExecC2C(plan, d_gridZ, d_gridZ, CUFFT_INVERSE);
571 | 	
572 | 	// Evaluate contribution of grid velocities at particle centers
573 | 	gpu_stokes_Contract_kernel<<<Cgrid, Cthreads, (B*B*B+1)*sizeof(float3)>>>( d_pos, d_vel, d_gridX, d_gridY, d_gridZ, group_size, Nx, Ny, Nz, xi, eta, d_group_members, box, P, gridh, quadW*prefac, expfac );
574 |  
575 | }
576 | 
577 | // Add real space Ewald summation to velocity of each particle
578 | // NLIST Method
579 | /*! \param d_pos            positions of the particles, actually they are fetched on texture memory
580 |     \param d_vel            particle velocity
581 |     \param d_net_force      net forces on the particles
582 |     \param group_size       size of the group, i.e. number of particles
583 |     \param xi               Ewald splitting parameter
584 |     \param d_ewaldC         Pre-tabulated form of the real-space Ewald sum for the Velocity-Force coupling
585 |     \param ewald_cut        Cut-off distance for real-space interaction
586 |     \param ewald_n          Number of entries in the Ewald tables
587 |     \param ewald_dr         Distance spacing using in computing the pre-tabulated tables
588 |     \param d_group_members  index array to global HOOMD tag on each particle
589 |     \param box              array containing box dimensions
590 |     \param d_n_neigh        list containing number of neighbors for each particle
591 |     \param d_nlist          list containing neighbors of all particles
592 |     \param d_headlist       list of particle offsets into d_nlist
593 | */
594 | __global__ void gpu_stokes_Mreal_kernel( 	
595 | 				Scalar4 *d_pos,
596 | 			      	Scalar4 *d_vel,
597 | 			      	Scalar4 *d_net_force,
598 | 			      	int group_size,
599 | 			      	Scalar xi,
600 | 			      	Scalar4 *d_ewaldC1, 
601 | 			      	Scalar self,
602 | 			      	Scalar ewald_cut,
603 | 			      	int ewald_n,
604 | 			      	Scalar ewald_dr,
605 | 			      	unsigned int *d_group_members,
606 | 			      	BoxDim box,
607 | 			      	const unsigned int *d_n_neigh,
608 |                               	const unsigned int *d_nlist,
609 |                               	const unsigned int *d_headlist
610 | 				){
611 |  
612 | 	// Index for current thread 
613 | 	int group_idx = blockDim.x * blockIdx.x + threadIdx.x;
614 | 	
615 | 	// Initialize contribution to velocity
616 | 	Scalar4 u = make_scalar4( 0.0, 0.0, 0.0, 0.0 );
617 | 	
618 | 	if (group_idx < group_size) {
619 | 	  
620 | 		// Particle for this thread
621 | 		unsigned int idx = d_group_members[group_idx];
622 | 		
623 | 		// Number of neighbors for current particle
624 | 		unsigned int n_neigh = d_n_neigh[idx]; 
625 | 		unsigned int head_idx = d_headlist[idx];
626 | 		
627 | 		// Particle position and table ID
628 | 		Scalar4 posi = texFetchScalar4(d_pos, pos_tex, idx);
629 | 		
630 | 		// Self contribution
631 | 		Scalar4 F = d_net_force[idx];
632 | 		u = make_scalar4( self * F.x, self * F.y, self * F.z, 0.0 );
633 | 		
634 | 		// Minimum and maximum distance for pair calculation
635 | 		Scalar mindistSq = ewald_dr * ewald_dr;
636 | 		Scalar maxdistSq = ewald_cut * ewald_cut;
637 | 		
638 | 		for (int neigh_idx = 0; neigh_idx < n_neigh; neigh_idx++) {
639 | 
640 | 			// Get index for current neightbor
641 | 			unsigned int cur_j = d_nlist[ head_idx + neigh_idx ];	
642 | 	
643 | 			// Position and size of neighbor particle
644 | 			Scalar4 posj = texFetchScalar4(d_pos, pos_tex, cur_j);
645 | 		
646 | 			// Distance vector between current particle and neighbor
647 | 			Scalar3 r = make_scalar3( posi.x - posj.x, posi.y - posj.y, posi.z - posj.z );
648 | 			r = box.minImage(r);
649 | 			Scalar distSqr = dot(r,r);
650 | 		
651 | 			// Add neighbor contribution if it is within the real space cutoff radius
652 | 			if ( ( distSqr < maxdistSq ) && ( distSqr >= mindistSq ) ) {
653 | 		
654 | 				// Need distance 
655 | 				Scalar dist = sqrtf( distSqr );
656 | 				
657 | 				// Force on neighbor particle
658 | 				Scalar4 Fj = d_net_force[cur_j];
659 | 			
660 | 				// Fetch relevant elements from textured table for real space interaction
661 | 				int r_ind = __scalar2int_rd( ewald_n * ( dist - ewald_dr ) / ( ewald_cut - ewald_dr ) );
662 | 				int offset = r_ind;
663 | 		
664 | 				Scalar4 tewaldC1 = texFetchScalar4(d_ewaldC1, tables1_tex, offset);
665 | 		
666 | 				// Linear interpolation of table
667 | 				Scalar fac = dist / ewald_dr - r_ind - Scalar(1.0);
668 | 		
669 | 				Scalar Imrr = tewaldC1.x + ( tewaldC1.z - tewaldC1.x ) * fac;
670 | 				Scalar rr = tewaldC1.y + ( tewaldC1.w - tewaldC1.y ) * fac;
671 | 		
672 | 				// Update velocity
673 | 				Scalar rdotf = ( r.x*Fj.x + r.y*Fj.y + r.z*Fj.z ) / distSqr;
674 | 		
675 | 				u.x += Imrr * Fj.x + ( rr - Imrr ) * rdotf * r.x;
676 | 				u.y += Imrr * Fj.y + ( rr - Imrr ) * rdotf * r.y;
677 | 				u.z += Imrr * Fj.z + ( rr - Imrr ) * rdotf * r.z;
678 | 		
679 | 			}
680 | 		
681 | 		}
682 | 		
683 | 		// Write to output
684 | 		d_vel[idx] = u;
685 | 	
686 | 	}    
687 | }
688 | 
689 | 
690 | 
691 | /*!
692 | 	Wrap all the functions to compute U = M * F ( SAME SIZE PARTICLES )
693 | 	Drive GPU kernel functions
694 | 
695 | 	d_vel = M * d_net_force
696 | 
697 | */
698 | /*! \param d_pos            positions of the particles, actually they are fetched on texture memory
699 |     \param d_vel            particle velocity
700 |     \param d_net_force      net forces on the particles
701 |     \param group_size       size of the group, i.e. number of particles
702 |     \param d_group_members  index array to global HOOMD tag on each particle
703 |     \param box              array containing box dimensions
704 |     \param xi               Ewald splitting parameter
705 |     \param eta              Spectral splitting parameter
706 |     \param ewald_cut        Cut-off distance for real-space interaction
707 |     \param ewald_dr         Distance spacing using in computing the pre-tabulated tables
708 |     \param ewald_n          Number of entries in the Ewald tables
709 |     \param d_ewaldC         Pre-tabulated form of the real-space Ewald sum for the Velocity-Force coupling
710 |     \param d_gridX          x-component of force moments projected onto grid
711 |     \param d_gridY          y-component of force moments projected onto grid
712 |     \param d_gridZ          z-component of force moments projected onto grid
713 |     \param d_gridk          wave vector and scaling factor associated with each reciprocal grid node
714 |     \param plan             Plan for cufft
715 |     \param Nx               Number of grid/FFT nodes in x-direction
716 |     \param Ny               Number of grid/FFT nodes in y-direction
717 |     \param Nz               Number of grid/FFT nodes in z-direction
718 |     \param d_n_neigh        list containing number of neighbors for each particle
719 |     \param d_nlist          list containing neighbors of each particle
720 |     \param nli              index into nlist
721 |     \param NxNyNz           total number of grid/FFT nodes
722 |     \param grid             block grid to use when launching kernels
723 |     \param threads          number of threads per block for kernels
724 |     \param gridBlockSize    number of threads per block
725 |     \param gridNBlock       number of blocks
726 |     \param P                number of nodes in support of each gaussian for k-space sum
727 |     \param gridh            distance between grid nodes
728 | */
729 | void gpu_stokes_Mobility_wrap( 
730 | 				Scalar4 *d_pos,
731 | 				Scalar4 *d_vel,
732 | 				Scalar4 *d_net_force,
733 | 				unsigned int *d_group_members,
734 | 				unsigned int group_size,
735 | 				const BoxDim& box,
736 | 				Scalar xi,
737 | 				Scalar eta,
738 | 				Scalar ewald_cut,
739 | 				Scalar ewald_dr,
740 | 				int ewald_n,
741 | 				Scalar4 *d_ewaldC1, 
742 | 				Scalar self,
743 | 				Scalar4 *d_gridk,
744 | 				CUFFTCOMPLEX *d_gridX,
745 | 				CUFFTCOMPLEX *d_gridY,
746 | 				CUFFTCOMPLEX *d_gridZ,
747 | 				cufftHandle plan,
748 | 				const int Nx,
749 | 				const int Ny,
750 | 				const int Nz,
751 | 				const unsigned int *d_n_neigh,
752 | 				const unsigned int *d_nlist,
753 | 				const unsigned int *d_headlist,
754 | 				unsigned int NxNyNz,
755 | 				dim3 grid,
756 | 				dim3 threads,
757 | 				int gridBlockSize,
758 | 				int gridNBlock,
759 | 				const int P,
760 | 				Scalar3 gridh ){
761 | 
762 | 	// Real and wave space velocity
763 | 	Scalar4 *d_vel1, *d_vel2;
764 | 	cudaMalloc( &d_vel1, group_size*sizeof(Scalar4) );
765 | 	cudaMalloc( &d_vel2, group_size*sizeof(Scalar4) );
766 | 	
767 | 	// Add the wave space contribution to the velocity
768 | 	gpu_stokes_Mwave_wrap( d_pos, d_vel1, d_net_force, d_group_members, group_size, box, xi, eta, d_gridk, d_gridX, d_gridY, d_gridZ, plan, Nx, Ny, Nz, NxNyNz, grid, threads, gridBlockSize, gridNBlock, P, gridh );
769 | 	
770 | 	// Add the real space contribution to the velocity
771 | 	//
772 | 	// Real space calculation takes care of self contributions
773 | 	gpu_stokes_Mreal_kernel<<<grid, threads>>>(d_pos, d_vel2, d_net_force, group_size, xi, d_ewaldC1, self, ewald_cut, ewald_n, ewald_dr, d_group_members, box, d_n_neigh, d_nlist, d_headlist );
774 | 	
775 | 	// Add real and wave space parts together
776 | 	gpu_stokes_LinearCombination_kernel<<<grid, threads>>>(d_vel1, d_vel2, d_vel, 1.0, 1.0, group_size, d_group_members);
777 | 	
778 | 	// Free memory
779 | 	cudaFree(d_vel1);
780 | 	cudaFree(d_vel2);
781 |  
782 | }
783 | 
784 | 
785 | 


--------------------------------------------------------------------------------
/PSEv1/Mobility.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Andrew Fiore
 52 | 
 53 | /*! \file Stokes.cuh
 54 |     \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes.
 55 | */
 56 | #include "hoomd/ParticleData.cuh"
 57 | #include "hoomd/HOOMDMath.h"
 58 | #include "hoomd/Index1D.h"
 59 | #include <cufft.h>
 60 | 
 61 | //! Define the step_one kernel
 62 | #ifndef __MOBILITYSAMESIZE_CUH__
 63 | #define __MOBILITYSAMESIZE_CUH__
 64 | 
 65 | //! Definition for comxplex variable storage
 66 | #ifdef SINGLE_PRECISION
 67 | #define CUFFTCOMPLEX cufftComplex
 68 | #else
 69 | #define CUFFTCOMPLEX cufftComplex
 70 | #endif
 71 | 
 72 | 
 73 | void gpu_stokes_Mobility_wrap( Scalar4 *d_pos,
 74 |                                	Scalar4 *d_vel,
 75 |                                	Scalar4 *d_net_force,
 76 | 			       	unsigned int *d_group_members,
 77 | 			       	unsigned int group_size,
 78 |                                	const BoxDim& box,
 79 | 			       	Scalar xi,
 80 | 			       	Scalar eta,
 81 | 			       	Scalar ewald_cut,
 82 | 			       	Scalar ewald_dr,
 83 | 			       	int ewald_n,
 84 | 			       	Scalar4 *d_ewaldC1, 
 85 | 			       	Scalar self, 
 86 | 			       	Scalar4 *d_gridk,
 87 | 			       	CUFFTCOMPLEX *d_gridX,
 88 | 			       	CUFFTCOMPLEX *d_gridY,
 89 | 			       	CUFFTCOMPLEX *d_gridZ,
 90 | 			       	cufftHandle plan,
 91 | 			       	const int Nx,
 92 | 			       	const int Ny,
 93 | 			       	const int Nz,
 94 | 			       	const unsigned int *d_n_neigh,
 95 |                                	const unsigned int *d_nlist,
 96 |                                	const unsigned int *d_headlist,
 97 | 			       	unsigned int NxNyNz,
 98 | 			       	dim3 grid,
 99 | 			       	dim3 threads,
100 | 			       	int gridBlockSize,
101 | 			       	int gridNBlock,
102 | 			       	const int P,
103 | 			       	Scalar3 gridh );
104 | 
105 | __global__
106 | void gpu_stokes_Mreal_kernel( 	Scalar4 *d_pos,
107 | 			      	Scalar4 *d_vel,
108 | 			      	Scalar4 *d_net_force,
109 | 			      	int group_size,
110 | 			      	Scalar xi,
111 | 			      	Scalar4 *d_ewaldC1, 
112 | 			      	Scalar self, 
113 | 			      	Scalar ewald_cut,
114 | 			      	int ewald_n,
115 | 			      	Scalar ewald_dr,
116 | 			      	unsigned int *d_group_members,
117 | 			      	BoxDim box,
118 | 			      	const unsigned int *d_n_neigh,
119 |                               	const unsigned int *d_nlist,
120 |                               	const unsigned int *d_headlist );
121 | 
122 | __global__ void gpu_stokes_Spread_kernel( 	Scalar4 *d_pos,
123 | 				    		Scalar4 *d_net_force,
124 | 				    		CUFFTCOMPLEX *gridX,
125 | 				    		CUFFTCOMPLEX *gridY,
126 | 				    		CUFFTCOMPLEX *gridZ,
127 | 				    		int group_size,
128 | 				    		int Nx,
129 | 				    		int Ny,
130 | 				    		int Nz,
131 | 				    		unsigned int *d_group_members,
132 | 				    		BoxDim box,
133 | 				    		const int P,
134 | 				    		Scalar3 gridh,
135 | 				    		Scalar xi,
136 | 				    		Scalar eta,
137 | 						Scalar prefac,
138 | 						Scalar expfac );
139 | 
140 | __global__ void gpu_stokes_Green_kernel(CUFFTCOMPLEX *gridX, CUFFTCOMPLEX *gridY, CUFFTCOMPLEX *gridZ, Scalar4 *gridk, unsigned int NxNyNz);
141 | 
142 | __global__ void gpu_stokes_Contract_kernel( 	Scalar4 *d_pos,
143 | 				 		Scalar4 *d_vel,
144 | 				 		CUFFTCOMPLEX *gridX,
145 | 				 		CUFFTCOMPLEX *gridY,
146 | 				 		CUFFTCOMPLEX *gridZ,
147 | 				 		int group_size,
148 | 				 		int Nx,
149 | 				 		int Ny,
150 | 				 		int Nz,
151 | 				 		Scalar xi,
152 | 				 		Scalar eta,
153 | 				 		unsigned int *d_group_members,
154 | 				 		BoxDim box,
155 | 				 		const int P,
156 | 				 		Scalar3 gridh,
157 | 				 		Scalar prefac,
158 | 				 		Scalar expfac );
159 | 
160 | #endif
161 | 


--------------------------------------------------------------------------------
/PSEv1/ShearFunction.cc:
--------------------------------------------------------------------------------
 1 | // Maintainer: Gang Wang
 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore
 3 | 
 4 | /*! \file ShearFunction.cc
 5 |     \brief Defines ShearFunction class and relevant functions
 6 | */
 7 | 
 8 | #ifdef WIN32
 9 | #pragma warning( push )
10 | #pragma warning( disable : 4103 4244 )
11 | #endif
12 | 
13 | #include "ShearFunction.h"
14 | 
15 | using namespace std;
16 | 
17 | void export_ShearFunction(pybind11::module& m)
18 | {
19 |     
20 |     pybind11::class_<ShearFunction, std::shared_ptr<ShearFunction> >( m, "ShearFunction" )
21 |     .def(pybind11::init< >())
22 |     .def("getShearRate", &ShearFunction::getShearRate)
23 |     .def("getStrain", &ShearFunction::getStrain)
24 |     .def("getOffset", &ShearFunction::getOffset);
25 | 
26 | }
27 | 
28 | #ifdef WIN32
29 | #pragma warning( pop )
30 | #endif
31 | 


--------------------------------------------------------------------------------
/PSEv1/ShearFunction.h:
--------------------------------------------------------------------------------
 1 | #ifdef NVCC
 2 | #error This header cannot be compiled by nvcc
 3 | #endif
 4 | 
 5 | #include <hoomd/extern/pybind/include/pybind11/pybind11.h>
 6 | 
 7 | #ifndef __SHEAR_FUNCTION_H__
 8 | #define __SHEAR_FUNCTION_H__
 9 | 
10 | #include <cmath>
11 | 
12 | //! Abstract class representing the function of shear rate and shear strain
13 | /*! ShearFunction class, having three public pure virtual functions:
14 |         1) getShearRate; 2) getStrain; and 3) getOffset
15 |     This interface can make it easier to add new shear functionality to HOOMD.
16 |     Compared with previous approach, we can simply subclass this interface without
17 |     changing any existing code or creating a new plugin.
18 | */
19 | class ShearFunction
20 | {
21 | public:
22 | 
23 |     //! Get shear rate at certain timestep
24 |     /*! \param timestep the timestep
25 |      */
26 |     virtual double getShearRate(unsigned int timestep){ return double(0.0); }
27 | 
28 |     //! Get strain at certain timestep (unwrapped)
29 |     /*! \param timestep the timestep
30 |      */
31 |     virtual double getStrain(unsigned int timestep){ return double(0.0); }
32 | 
33 |     //! Get the offset of timestep (typically offset is the timestep when the shear starts)
34 |     virtual unsigned int getOffset(){ return int(0); }
35 | 
36 | };
37 | 
38 | //! Export the ShearFunction class to python
39 | void export_ShearFunction(pybind11::module& m);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/PSEv1/ShearFunctionWrap.cc:
--------------------------------------------------------------------------------
 1 | // Maintainer: Gang Wang
 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore
 3 | 
 4 | /*! \file ShearFunction.cc
 5 |     \brief Defines ShearFunction class and relevant functions
 6 | */
 7 | 
 8 | #ifdef WIN32
 9 | #pragma warning( push )
10 | #pragma warning( disable : 4103 4244 )
11 | #endif
12 | 
13 | #include "ShearFunctionWrap.h"
14 | 
15 | using namespace std;
16 | 
17 | void export_ShearFunctionWrap(pybind11::module& m)
18 | {
19 |     
20 |     pybind11::class_<ShearFunctionWrap, std::shared_ptr<ShearFunctionWrap> >( m, "ShearFunctionWrap", pybind11::base<ShearFunction>() )
21 |     .def(pybind11::init< >())
22 |     .def("getShearRate", &ShearFunction::getShearRate)
23 |     .def("getStrain", &ShearFunction::getStrain)
24 |     .def("getOffset", &ShearFunction::getOffset);
25 | 
26 | }
27 | 
28 | #ifdef WIN32
29 | #pragma warning( pop )
30 | #endif
31 | 


--------------------------------------------------------------------------------
/PSEv1/ShearFunctionWrap.h:
--------------------------------------------------------------------------------
 1 | #ifdef NVCC
 2 | #error This header cannot be compiled by nvcc
 3 | #endif
 4 | 
 5 | #include <hoomd/extern/pybind/include/pybind11/pybind11.h>
 6 | 
 7 | #ifndef __SHEAR_FUNCTION_WRAP_H__
 8 | #define __SHEAR_FUNCTION_WRAP_H__
 9 | 
10 | #include "ShearFunction.h"
11 | 
12 | #include <cmath>
13 | 
14 | //! Abstract class representing the function of shear rate and shear strain
15 | /*! ShearFunction class, having three public pure virtual functions:
16 |         1) getShearRate; 2) getStrain; and 3) getOffset
17 |     This interface can make it easier to add new shear functionality to HOOMD.
18 |     Compared with previous approach, we can simply subclass this interface without
19 |     changing any existing code or creating a new plugin.
20 | */
21 | class ShearFunctionWrap : public ShearFunction
22 | {
23 | public:
24 |     
25 |     //! Get shear rate at certain timestep
26 |     /*! \param timestep the timestep
27 |      */
28 |     virtual double getShearRate(unsigned int timestep){ return double(0.0); }
29 | 
30 |     //! Get strain at certain timestep (unwrapped)
31 |     /*! \param timestep the timestep
32 |      */
33 |     virtual double getStrain(unsigned int timestep){ return double(0.0); }
34 | 
35 |     //! Get the offset of timestep (typically offset is the timestep when the shear starts)
36 |     virtual unsigned int getOffset(){ return int(0); }
37 | 
38 | };
39 | 
40 | 
41 | void export_ShearFunctionWrap(pybind11::module& m);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/PSEv1/SpecificShearFunction.cc:
--------------------------------------------------------------------------------
 1 | // Maintainer: Gang Wang
 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore
 3 | 
 4 | /*! \file ShearFunction.cc
 5 |     \brief Defines ShearFunction class and relevant functions
 6 | */
 7 | 
 8 | #ifdef WIN32
 9 | #pragma warning( push )
10 | #pragma warning( disable : 4103 4244 )
11 | #endif
12 | 
13 | #include "SpecificShearFunction.h"
14 | 
15 | using namespace std;
16 | 
17 | void export_SpecificShearFunction(pybind11::module& m)
18 | {
19 |     
20 |     pybind11::class_<SinShearFunction, std::shared_ptr<SinShearFunction> >( m, "SinShearFunction", pybind11::base<ShearFunction>())
21 |     .def(pybind11::init< double, double, unsigned int, double >());
22 | 
23 |     pybind11::class_<SteadyShearFunction, std::shared_ptr<SteadyShearFunction> > (m, "SteadyShearFunction", pybind11::base<ShearFunction>())
24 |     .def(pybind11::init< double, unsigned int, double >());
25 | 
26 |     pybind11::class_<ChirpShearFunction, std::shared_ptr<ChirpShearFunction> >(m, "ChirpShearFunction", pybind11::base<ShearFunction>()) 
27 |     .def(pybind11::init< double, double, double, double, unsigned int, double >());
28 | 
29 |     pybind11::class_<TukeyWindowFunction, std::shared_ptr<TukeyWindowFunction> >( m, "TukeyWindowFunction", pybind11::base<ShearFunction>()) 
30 |     .def(pybind11::init< double, double, unsigned int, double >());
31 | 
32 |     pybind11::class_<WindowedFunction, std::shared_ptr<WindowedFunction> >(m, "WindowedFunction", pybind11::base<ShearFunction>()) 
33 |     .def(pybind11::init< std::shared_ptr<ShearFunction>, std::shared_ptr<ShearFunction> >());
34 | }
35 | 
36 | #ifdef WIN32
37 | #pragma warning( pop )
38 | #endif
39 | 


--------------------------------------------------------------------------------
/PSEv1/SpecificShearFunction.h:
--------------------------------------------------------------------------------
  1 | #ifdef NVCC
  2 | #error This header cannot be compiled by nvcc
  3 | #endif
  4 | 
  5 | #include <hoomd/extern/pybind/include/pybind11/pybind11.h>
  6 | 
  7 | #include "ShearFunction.h"
  8 | 
  9 | #ifndef __SPECIFIC_SHEAR_FUNCTION_H__
 10 | #define __SPECIFIC_SHEAR_FUNCTION_H__
 11 | 
 12 | #include <cmath>
 13 | 
 14 | 
 15 | //! Simple sinusoidal shear implementing the abstract class ShearFunction
 16 | class SinShearFunction : public ShearFunction
 17 | {
 18 | public:
 19 |     //! Constructor of SinShearFunction class
 20 |     /*! \param max_shear_rate maximum shear rate
 21 |         \param frequency real (NOT angular) frequency of oscillatory shear
 22 |         \param offset the offset of oscillatory shear
 23 |         \param dt the time interval
 24 |      */
 25 |     SinShearFunction(double max_shear_rate, double frequency, unsigned int offset, double dt) :
 26 |         ShearFunction(),
 27 |         m_max_shear_rate(max_shear_rate),
 28 |         m_frequency(frequency),
 29 |         m_offset(offset),
 30 |         m_dt(dt) { }
 31 |     double getShearRate(unsigned int timestep) {
 32 |         return m_max_shear_rate * cos( m_frequency * 2 * m_pi * ( (timestep - m_offset) * m_dt ) );
 33 |     }
 34 |     double getStrain(unsigned int timestep) {
 35 |         return m_max_shear_rate * sin( m_frequency * 2 * m_pi * ( (timestep - m_offset) * m_dt ) ) / m_frequency / 2 / m_pi;
 36 |     }
 37 |     unsigned int getOffset() {
 38 |         return m_offset;
 39 |     }
 40 | private:
 41 |     const double m_max_shear_rate; //!< maximum shear rate
 42 |     const double m_frequency; //!< Real frequency, not angular frequency
 43 |     const unsigned int m_offset; //!< offset of the sinusoidal oscillatory shear
 44 |     const double m_dt; //!< time step
 45 |     static constexpr double m_pi = 3.1415926536;
 46 | };
 47 | 
 48 | //! Simple steady shear implementing the abstract class ShearFunction
 49 | class SteadyShearFunction : public ShearFunction
 50 | {
 51 | public:
 52 |     //! Constructor of SteadyShearFunction
 53 |     /*! \param shear_rate the shear rate
 54 |         \param offset the offset of the steady shear
 55 |         \param the time interval between each timestep
 56 |      */
 57 |     SteadyShearFunction(double shear_rate, unsigned int offset, double dt) :
 58 |         ShearFunction(),
 59 |         m_shear_rate(shear_rate),
 60 |         m_offset(offset),
 61 |         m_dt(dt) { }
 62 |     double getShearRate(unsigned int timestep) {
 63 |         return m_shear_rate;
 64 |     }
 65 |     double getStrain(unsigned int timestep) {
 66 |         return m_shear_rate * (timestep - m_offset) * m_dt;
 67 |     }
 68 |     unsigned int getOffset() {
 69 |         return m_offset;
 70 |     }
 71 | private:
 72 |   const double m_shear_rate; //!< constant shear rate
 73 |   const unsigned int m_offset; //!< offset of the steady shear
 74 |   const double m_dt; //!< time step
 75 | };
 76 | 
 77 | //! Chirp oscillatory shear implementing abstract class ShearFunction
 78 | /*! Adjusted from code of Zsigmond Varga, plugin PSEv1a_chirpv2
 79 |  */
 80 | class ChirpShearFunction : public ShearFunction
 81 | {
 82 | public:
 83 |     //! Constructor of ChirpShearFunction class
 84 |     /*! \param amp the strain amplitude of the chirp shear
 85 |         \param omega_0 the starting ANGULAR frequency of the shear
 86 |         \param omega_f the ending ANGULAR frequency of the shear
 87 |         \param periodT the total time of the chirp run
 88 |         \param offset the offset of the chirp return
 89 |         \param dt the time interval between each timestep
 90 |      */
 91 |     ChirpShearFunction(double amp, double omega_0, double omega_f, double periodT, unsigned int offset, double dt) :
 92 |         ShearFunction(),
 93 |         m_amp(amp),
 94 |         m_omega_0(omega_0),
 95 |         m_omega_f(omega_f),
 96 |         m_periodT(periodT),
 97 |         m_offset(offset),
 98 |         m_dt(dt) { }
 99 |     double getShearRate(unsigned int timestep) {
100 |         double current_omega = getCurrentOmega(timestep);
101 |         double current_phase = getCurrentPhase(timestep);
102 |         return m_amp * current_omega * cos(current_phase);
103 |     }
104 |     double getStrain(unsigned int timestep) {
105 |         double current_phase = getCurrentPhase(timestep);
106 |         return m_amp * sin( current_phase );
107 |     }
108 |     unsigned int getOffset() {
109 |         return m_offset;
110 |     }
111 | private:
112 |     double getCurrentOmega(unsigned int timestep) {
113 |         return m_omega_0 * exp( m_dt * (timestep - m_offset) * logf(m_omega_f / m_omega_0) / m_periodT );
114 |     }
115 |     double getCurrentPhase(unsigned int timestep) {
116 |         return m_periodT * m_omega_0 / logf( m_omega_f / m_omega_0 ) * ( exp( m_dt * (timestep - m_offset) * logf(m_omega_f / m_omega_0) / m_periodT ) - 1 );
117 |     }
118 |     const double m_amp; //!< Amplitude
119 |     const double m_omega_0; //!< Minimum angular frequency
120 |     const double m_omega_f; //!< Maximum angular frequency
121 |     const double m_periodT; //!< Final time of Chirp
122 |     const unsigned int m_offset; //!< offset of the shear
123 |     const double m_dt; //!< time step
124 | };
125 | 
126 | 
127 | //! Tukey Window Function implementing abstract class ShearFunction
128 | /*! Strictly speaking, this function is not a ShearFunction since it will only be
129 |     used as a window function and applied to other ShearFunctions. This class should
130 |     never be used by itself. However, since ShearFunction provides all the abstract
131 |     functions it needs. We will call this a ShearFunction to avoid duplicate base classes
132 |     TODO: Change the names of ShearFunction/getShearRate/getStrain to more general descriptions.
133 |  */
134 | class TukeyWindowFunction : public ShearFunction
135 | {
136 | public:
137 |     //! Constructor of TukeyWindowFunction class
138 |     /*! \param periodT the total time of the window
139 |         \param tukey_param the parameter of Tukey window function, must be within (0, 1]
140 |         \param offset the offset of the window
141 |         \param dt the time interval between each timestep
142 |      */
143 |     TukeyWindowFunction(double periodT, double tukey_param, unsigned int offset, double dt) :
144 |         ShearFunction(),
145 |         m_periodT(periodT),
146 |         m_tukey_param(tukey_param),
147 |         m_offset(offset),
148 |         m_dt(dt) {
149 |             m_omega_value = 2 * m_pi / tukey_param;
150 |         }
151 |     double getShearRate(unsigned int timestep) {
152 |         double rel_time = (timestep - m_offset) * m_dt / m_periodT; // supposed to be within [0,1]
153 |         if (rel_time <= 0 || rel_time >= 1) {
154 |             return 0;
155 |         }
156 |         else if (rel_time >= m_tukey_param / 2 && rel_time <= 1 - m_tukey_param / 2) {
157 |             return 0;
158 |         }
159 |         else if (rel_time < 0.5) {
160 |             return -( sin( m_omega_value * (rel_time - m_tukey_param / 2) ) ) / 2 * m_omega_value / m_periodT;
161 |         }
162 |         else {
163 |             return -( sin( m_omega_value * (rel_time - 1 + m_tukey_param / 2) ) ) / 2 * m_omega_value / m_periodT;
164 |         }
165 |     }
166 |     double getStrain(unsigned int timestep) {
167 |         double rel_time = (timestep - m_offset) * m_dt / m_periodT; // supposed to be within [0,1]
168 |         if (rel_time <= 0 || rel_time >= 1) {
169 |             return 0;
170 |         }
171 |         else if (rel_time >= m_tukey_param / 2 && rel_time <= 1 - m_tukey_param / 2) {
172 |             return 1;
173 |         }
174 |         else if (rel_time < 0.5) {
175 |             return ( 1 + cos( m_omega_value * (rel_time - m_tukey_param / 2) ) ) / 2;
176 |         }
177 |         else {
178 |             return ( 1 + cos( m_omega_value * (rel_time - 1 + m_tukey_param / 2) ) ) / 2;
179 |         }
180 |     }
181 |     unsigned int getOffset() {
182 |         return m_offset;
183 |     }
184 | private:
185 |     const double m_periodT; //!< The time period of the window
186 |     const double m_tukey_param; //!< The parameter of Tukey window function (scales the cosine lobe)
187 |     const unsigned int m_offset; //!< offset of the window function
188 |     const double m_dt; //!< time step
189 |     static constexpr double m_pi = 3.1415926536;
190 |     double m_omega_value; //!< omega value of the cosine function
191 | };
192 | 
193 | 
194 | //! Windowed ShearFunction: A ShearFunction windowed by a window function (which is also a ShearFunction subclass)
195 | /*! WindowedFunction represents a strain field whose strain is the product of original ShearFunction and the window
196 |     function. Therefore, the shear rate satisfies the product rule of derivative.
197 |  */
198 | class WindowedFunction : public ShearFunction
199 | {
200 | public:
201 |     //! Constructor of WindowedFunction class
202 |     /*! It is recommended to use the same offset for base shear function and window function
203 |         \param base_shear_func the base shear function
204 |         \param window_func the window function
205 |      */
206 |     WindowedFunction(std::shared_ptr<ShearFunction> base_shear_func, std::shared_ptr<ShearFunction> window_func) :
207 |         ShearFunction(),
208 |         m_base_shear_func(base_shear_func),
209 |         m_window_func(window_func) { }
210 |     double getShearRate(unsigned int timestep) {
211 |         return ( m_base_shear_func -> getShearRate(timestep) ) * ( m_window_func -> getStrain(timestep) ) +
212 |             ( m_base_shear_func -> getStrain(timestep) ) * ( m_window_func -> getShearRate(timestep) );
213 |     }
214 |     double getStrain(unsigned int timestep) {
215 |         return ( m_base_shear_func -> getStrain(timestep) ) * ( m_window_func -> getStrain(timestep) );
216 |     }
217 |     unsigned int getOffset() {
218 |         return m_base_shear_func -> getOffset();
219 |     }
220 | private:
221 |     const std::shared_ptr<ShearFunction> m_base_shear_func; //!< Base shear function
222 |     const std::shared_ptr<ShearFunction> m_window_func; //!< Window function
223 | };
224 | 
225 | 
226 | void export_SpecificShearFunction(pybind11::module& m);
227 | 
228 | #endif
229 | 


--------------------------------------------------------------------------------
/PSEv1/Stokes.cc:
--------------------------------------------------------------------------------
  1 | ﻿/*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Gang Wang
 52 | // Modified by Andrew Fiore
 53 | 
 54 | #ifdef WIN32
 55 | #pragma warning( push )
 56 | #pragma warning( disable : 4244 )
 57 | #endif
 58 | 
 59 | using namespace std;
 60 | 
 61 | #include <vector>
 62 | #include <algorithm>
 63 | 
 64 | #include "Stokes.h"
 65 | #include "Stokes.cuh"
 66 | 
 67 | #include <stdio.h>
 68 | #include <cuda_runtime.h>
 69 | #include <cublas_v2.h>
 70 | 
 71 | /*! \file Stokes.cc
 72 |     \brief Contains code for the Stokes class
 73 | */
 74 | 
 75 | /*!
 76 | 	\param sysdef             SystemDefinition this method will act on. Must not be NULL.
 77 |     	\param group              The group of particles this integration method is to work on
 78 | 	\param T                  temperature
 79 | 	\param seed               seed for random number generator
 80 | 	\param nlist              neighbor list
 81 | 	\param xi                 Ewald parameter
 82 | 	\param m_error            Tolerance for all calculations
 83 | 
 84 | */
 85 | Stokes::Stokes(	std::shared_ptr<SystemDefinition> sysdef,
 86 |                 std::shared_ptr<ParticleGroup> group,
 87 | 		std::shared_ptr<Variant> T,
 88 | 		unsigned int seed,
 89 | 		std::shared_ptr<NeighborList> nlist,
 90 | 		Scalar xi,
 91 | 		Scalar error )
 92 | 		: IntegrationMethodTwoStep(sysdef, group),
 93 | 		m_T(T),
 94 | 		m_seed(seed),
 95 | 		m_nlist(nlist),
 96 | 		m_xi(xi),
 97 | 		m_error(error)
 98 |     {
 99 |     m_exec_conf->msg->notice(5) << "Constructing Stokes" << endl;
100 | 
101 | 	// Hash the User's Seed to make it less likely to be a low positive integer
102 | 	m_seed = m_seed * 0x12345677 + 0x12345; m_seed ^= (m_seed >> 16); m_seed *= 0x45679;
103 | 
104 | 	// only one GPU is supported
105 | 	if (!m_exec_conf->isCUDAEnabled())
106 | 	{
107 | 		m_exec_conf->msg->error() << "Creating a Stokes when CUDA is disabled" << endl;
108 | 		throw std::runtime_error("Error initializing Stokes");
109 | 	}
110 | 
111 |     }
112 | 
113 | //! Destructor for the Stokes class
114 | Stokes::~Stokes()
115 |     {
116 |     m_exec_conf->msg->notice(5) << "Destroying Stokes" << endl;
117 | 	cufftDestroy(plan);
118 |     }
119 | 
120 | 
121 | /*!
122 | 	Set the parameters for Hydrodynamic Calculation. Do once at the beginning
123 | 	of the simulation and then reuse computed values
124 | 
125 | 	- Pre-tabulate real space interaction functions (f and g)
126 | 	- Set wave space vectors
127 | 
128 | */
129 | void Stokes::setParams()
130 | {
131 | 	// Try two Lanczos iterations to start (number of iterations will adapt as needed)
132 | 	m_m_Lanczos = 2;
133 | 
134 | 	// Real space cutoff
135 | 	m_ewald_cut = sqrtf( - logf( m_error ) ) / m_xi;
136 | 
137 | 	// Number of grid points
138 | 	int kmax = int( 2.0 * sqrtf( - logf( m_error ) ) * m_xi ) + 1;
139 | 
140 | 	const BoxDim& box = m_pdata->getBox(); // Only for box not changing with time.
141 | 	Scalar3 L = box.getL();
142 | 
143 | 	m_Nx = int( kmax * L.x / (2.0 * 3.1415926536 ) * 2.0 ) + 1;
144 | 	m_Ny = int( kmax * L.y / (2.0 * 3.1415926536 ) * 2.0 ) + 1;
145 | 	m_Nz = int( kmax * L.z / (2.0 * 3.1415926536 ) * 2.0 ) + 1;
146 | 
147 | 	// Get list of int values between 8 and 4096 that can be written as
148 | 	// 	(2^a)*(3^b)*(5^c)
149 | 	// Then sort list from low to high
150 | 	//
151 | 	// Go to such large values so as to be able simulate boxes with large
152 | 	// aspect ratios
153 | 	std::vector<int> Mlist;
154 | 	for ( int ii = 0; ii < 13; ++ii ){
155 | 		int pow2 = 1;
156 | 		for ( int i = 0; i < ii; ++i ){
157 | 			pow2 *= 2;
158 | 		}
159 | 		for ( int jj = 0; jj < 8; ++jj ){
160 | 			int pow3 = 1;
161 | 			for ( int j = 0; j < jj; ++j ){
162 | 				pow3 *= 3;
163 | 			}
164 | 			for ( int kk = 0; kk < 6; ++kk ){
165 | 				int pow5 = 1;
166 | 				for ( int k = 0; k < kk; ++k ){
167 | 					pow5 *= 5;
168 | 				}
169 | 				int Mcurr = pow2 * pow3 * pow5;
170 | 				if ( Mcurr >= 8 && Mcurr <= 4096 ){
171 | 					Mlist.push_back(Mcurr);
172 | 				}
173 | 			}
174 | 		}
175 | 	}
176 | 	std::sort(Mlist.begin(), Mlist.end());
177 | 	const int nmult = Mlist.size(); 
178 | 
179 | 	// Compute the number of grid points in each direction
180 | 	//
181 | 	// Number of grid points should be a power of 2,3,5 for most efficient FFTs
182 | 	for ( int ii = 0; ii < nmult; ++ii ){
183 | 		if (m_Nx <= Mlist[ii]){
184 | 			m_Nx = Mlist[ii];
185 | 			break;
186 | 		}
187 | 	}
188 | 	for ( int ii = 0; ii < nmult; ++ii ){
189 | 		if (m_Ny <= Mlist[ii]){
190 | 			m_Ny = Mlist[ii];
191 | 			break;
192 | 		}
193 | 	}
194 | 	for ( int ii = 0; ii < nmult; ++ii ){
195 | 		if (m_Nz <= Mlist[ii]){
196 | 			m_Nz = Mlist[ii];
197 | 			break;
198 | 		}
199 | 	}
200 | 
201 | 	// Check that we haven't asked for too many grid points
202 | 	// Max allowable by cuFFT is 512^3
203 | 	if ( m_Nx * m_Ny * m_Nz > 512*512*512 ){
204 | 
205 | 		printf("Requested Number of Fourier Nodes Exceeds Max Dimension of 512^3\n");
206 | 		printf("Mx = %i \n", m_Nx);
207 | 		printf("My = %i \n", m_Ny);
208 | 		printf("Mz = %i \n", m_Nz);
209 | 		printf("Mx*My*Mz = %i \n", m_Nx * m_Ny * m_Nz);
210 | 		printf("\n");
211 | 		printf("Note to User: Fix is to reduce xi and try again. \n");
212 | 
213 | 		exit(EXIT_FAILURE);
214 | 	}
215 | 
216 | 	// Maximum eigenvalue of A'*A to scale P
217 | 	Scalar gamma = m_max_strain;
218 | 	Scalar gamma2 = gamma*gamma;
219 | 	Scalar lambda = 1.0 + gamma2/2.0 + gamma*sqrtf(1.0 + gamma2/4.0);
220 | 
221 | 	// Grid spacing
222 | 	m_gridh = L / make_scalar3(m_Nx,m_Ny,m_Nz);
223 | 
224 | 	// Parameters for the Spectral Ewald Method (Lindbo and Tornberg, J. Comp. Phys., 2011)
225 | 	m_gaussm = 1.0;
226 | 	while ( erfcf( m_gaussm / sqrtf(2.0*lambda) ) > m_error ){
227 | 	    m_gaussm = m_gaussm + 0.01;
228 | 	}
229 | 	m_gaussP = int( m_gaussm*m_gaussm / 3.1415926536 )  + 1;
230 | 
231 | 	if (m_gaussP > m_Nx) m_gaussP = m_Nx; // Can't be supported beyond grid
232 | 	if (m_gaussP > m_Ny) m_gaussP = m_Ny;
233 | 	if (m_gaussP > m_Nz) m_gaussP = m_Nz;
234 | 	Scalar w = m_gaussP*m_gridh.x / 2.0;	               // Gaussian width in simulation units
235 | 	Scalar xisq  = m_xi * m_xi;
236 | 	m_eta = (2.0*w/m_gaussm)*(2.0*w/m_gaussm) * ( xisq );  // Gaussian splitting parameter
237 | 
238 | 	// Print summary to command line output
239 | 	printf("\n");
240 | 	printf("\n");
241 | 	m_exec_conf->msg->notice(2) << "--- NUFFT Hydrodynamics Statistics ---" << endl;
242 | 	m_exec_conf->msg->notice(2) << "Mx: " << m_Nx << endl;
243 | 	m_exec_conf->msg->notice(2) << "My: " << m_Ny << endl;
244 | 	m_exec_conf->msg->notice(2) << "Mz: " << m_Nz << endl;
245 | 	m_exec_conf->msg->notice(2) << "rcut: " << m_ewald_cut << endl;
246 | 	m_exec_conf->msg->notice(2) << "Points per radius (x,y,z): " << m_Nx / L.x << ", " << m_Ny / L.y << ", " << m_Nz / L.z << endl;
247 | 	m_exec_conf->msg->notice(2) << "--- Gaussian Spreading Parameters ---"  << endl;
248 | 	m_exec_conf->msg->notice(2) << "gauss_m: " << m_gaussm << endl;
249 |         m_exec_conf->msg->notice(2) << "gauss_P: " << m_gaussP << endl;
250 | 	m_exec_conf->msg->notice(2) << "gauss_eta: " << m_eta << endl;
251 | 	m_exec_conf->msg->notice(2) << "gauss_w: " << w << endl;
252 | 	m_exec_conf->msg->notice(2) << "gauss_gridh (x,y,z): " << L.x/m_Nx << ", " << L.y/m_Ny << ", " << L.z/m_Nz << endl;
253 | 	printf("\n");
254 | 	printf("\n");
255 | 
256 | 	// Create plan for CUFFT on the GPU
257 | 	cufftPlan3d(&plan, m_Nx, m_Ny, m_Nz, CUFFT_C2C);
258 | 
259 | 	// Prepare GPUArrays for grid vectors and gridded forces
260 | 	GPUArray<Scalar4> n_gridk(m_Nx*m_Ny*m_Nz, m_exec_conf);
261 | 	m_gridk.swap(n_gridk);
262 | 	GPUArray<CUFFTCOMPLEX> n_gridX(m_Nx*m_Ny*m_Nz, m_exec_conf);
263 | 	m_gridX.swap(n_gridX);
264 | 	GPUArray<CUFFTCOMPLEX> n_gridY(m_Nx*m_Ny*m_Nz, m_exec_conf);
265 | 	m_gridY.swap(n_gridY);
266 | 	GPUArray<CUFFTCOMPLEX> n_gridZ(m_Nx*m_Ny*m_Nz, m_exec_conf);
267 | 	m_gridZ.swap(n_gridZ);
268 | 
269 | 	// Get list of reciprocal space vectors, and scaling factor for the wave space calculation at each grid point
270 | 	ArrayHandle<Scalar4> h_gridk(m_gridk, access_location::host, access_mode::readwrite);
271 | 	for (int i = 0; i < m_Nx; i++) {
272 | 		for (int j = 0; j < m_Ny; j++) {
273 | 			for (int k = 0; k < m_Nz; k++) {
274 | 
275 | 				// Index into grid vector storage array
276 | 				int idx = i * m_Ny*m_Nz + j * m_Nz + k;
277 | 
278 | 				// k goes from -N/2 to N/2
279 | 				h_gridk.data[idx].x = 2.0*3.1415926536 * ((i < ( m_Nx + 1 ) / 2) ? i : i - m_Nx) / L.x;
280 | 				h_gridk.data[idx].y = 2.0*3.1415926536 * ((j < ( m_Ny + 1 ) / 2) ? j : j - m_Ny) / L.y;
281 | 				h_gridk.data[idx].z = 2.0*3.1415926536 * ((k < ( m_Nz + 1 ) / 2) ? k : k - m_Nz) / L.z;
282 | 
283 | 				// k dot k
284 | 				Scalar k2 = h_gridk.data[idx].x*h_gridk.data[idx].x + h_gridk.data[idx].y*h_gridk.data[idx].y + h_gridk.data[idx].z*h_gridk.data[idx].z;
285 | 
286 | 				// Scaling factor used in wave space sum
287 | 				//
288 | 				// Can't include k=0 term in the Ewald sum
289 | 				if (i == 0 && j == 0 && k == 0){
290 | 					h_gridk.data[idx].w = 0;
291 | 				}
292 | 				else{
293 | 					// Have to divide by Nx*Ny*Nz to normalize the FFTs
294 | 					h_gridk.data[idx].w = 6.0*3.1415926536 * (1.0 + k2/4.0/xisq) * expf( -(1-m_eta) * k2/4.0/xisq ) / ( k2 ) / Scalar( m_Nx*m_Ny*m_Nz );
295 | 				}
296 | 
297 | 			}
298 | 		}
299 | 	}
300 | 
301 | 	// Store the coefficients for the real space part of Ewald summation
302 | 	//
303 | 	// Will precompute scaling factors for real space component of summation for a given
304 | 	//     discretization to speed up GPU calculations
305 | 	//
306 | 	// Do calculation in double precision, then truncate and tabulate, because the
307 | 	// expressions don't behave very well numerically, and double precision ensures
308 | 	// it works. 
309 | 	m_ewald_dr = 0.001; 		           // Distance resolution
310 | 	m_ewald_n = m_ewald_cut / m_ewald_dr - 1;  // Number of entries in tabulation
311 | 
312 | 	double dr = 0.0010000000000000;
313 | 
314 | 	// Assume all particles have radius of 1.0
315 |         Scalar pi12 = 1.77245385091;
316 |         Scalar aa = 1.0;
317 | 	Scalar axi = aa * m_xi;
318 | 	Scalar axi2 = axi * axi;
319 |         m_self = (1. + 4.*pi12*axi*erfc(2.*axi) - exp(-4.*axi2))/(4.*pi12*axi*aa);
320 | 
321 | 	// Allocate storage for real space Ewald table
322 | 	int nR = m_ewald_n + 1; // number of entries in ewald table
323 | 	GPUArray<Scalar4> n_ewaldC1( nR, m_exec_conf);
324 | 	m_ewaldC1.swap(n_ewaldC1);
325 | 	ArrayHandle<Scalar4> h_ewaldC1(m_ewaldC1, access_location::host, access_mode::readwrite);
326 | 
327 | 	// Functions are complicated so calculation should be done in double precision, then truncated to single precision
328 | 	// in order to ensure accurate evaluation
329 | 	double xi  = m_xi;
330 | 	double Pi = 3.141592653589793;
331 | 	double a = aa;
332 | 
333 | 	// Fill tables
334 | 	for ( int kk = 0; kk < nR; kk++ )
335 | 	{
336 | 
337 | 		// Initialize entries
338 | 		h_ewaldC1.data[ kk ].x = 0.0; // UF1 at r
339 | 		h_ewaldC1.data[ kk ].y = 0.0; // UF2 at r
340 | 		h_ewaldC1.data[ kk ].z = 0.0; // UF1 at r + dr
341 | 		h_ewaldC1.data[ kk ].w = 0.0; // UF2 at r + dr
342 | 
343 | 		// Distance for current entry
344 | 		double r = double( kk ) * dr + dr;
345 | 		double Imrr = 0, rr = 0;
346 | 
347 | 		// Expression have been simplified assuming no overlap, touching, and overlap
348 | 		if ( r > 2.0*a ){
349 | 
350 | 			Imrr = -pow(a,-1) + (pow(a,2)*pow(r,-3))/2. + (3*pow(r,-1))/4. + (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*(-12*pow(r,4) + pow(xi,-4)))/128. +
351 |    pow(a,-2)*((9*r)/32. - (3*pow(r,-3)*pow(xi,-4))/128.) +
352 |    (erfc((2*a + r)*xi)*(128*pow(a,-1) + 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(36*r - 3*pow(r,-3)*pow(xi,-4))))/256. +
353 |    (erfc(2*a*xi - r*xi)*(128*pow(a,-1) - 64*pow(a,2)*pow(r,-3) - 96*pow(r,-1) + pow(a,-2)*(-36*r + 3*pow(r,-3)*pow(xi,-4))))/
354 |     256. + (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(1 + 6*pow(r,2)*pow(xi,2)))/64. +
355 |    (exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
356 |       (8*r*pow(a,2)*pow(xi,2) - 16*pow(a,3)*pow(xi,2) + a*(2 - 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128. +
357 |    (exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
358 |       (8*r*pow(a,2)*pow(xi,2) + 16*pow(a,3)*pow(xi,2) + a*(-2 + 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128.;
359 | 
360 | 			rr = -pow(a,-1) - pow(a,2)*pow(r,-3) + (3*pow(r,-1))/2. + (3*pow(a,-2)*pow(r,-3)*(4*pow(r,4) + pow(xi,-4)))/64. +
361 |    (erfc(2*a*xi - r*xi)*(64*pow(a,-1) + 64*pow(a,2)*pow(r,-3) - 96*pow(r,-1) + pow(a,-2)*(-12*r - 3*pow(r,-3)*pow(xi,-4))))/128. +
362 |    (erfc((2*a + r)*xi)*(64*pow(a,-1) - 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(12*r + 3*pow(r,-3)*pow(xi,-4))))/128. +
363 |    (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(-1 + 2*pow(r,2)*pow(xi,2)))/32. -
364 |    ((2*a + 3*r)*exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
365 |       (-1 - 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. +
366 |    ((2*a - 3*r)*exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
367 |       (-1 + 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. -
368 |    (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*pow(xi,-4)*(1 + 4*pow(r,4)*pow(xi,4)))/64.;
369 | 
370 | 		}
371 | 		else if ( r == 2.0*a ){
372 | 
373 | 			Imrr = -(pow(a,-5)*(3 + 16*a*xi*pow(Pi,-0.5))*pow(xi,-4))/2048. + (3*erfc(2*a*xi)*pow(a,-5)*(-192*pow(a,4) + pow(xi,-4)))/1024. +
374 |    erfc(4*a*xi)*(pow(a,-1) - (3*pow(a,-5)*pow(xi,-4))/2048.) +
375 |    (exp(-16*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(-1 - 64*pow(a,2)*pow(xi,2)))/256. +
376 |    (3*exp(-4*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(1 + 24*pow(a,2)*pow(xi,2)))/256.;
377 | 
378 | 			rr = (pow(a,-5)*(3 + 16*a*xi*pow(Pi,-0.5))*pow(xi,-4))/1024. + erfc(2*a*xi)*((-3*pow(a,-1))/8. - (3*pow(a,-5)*pow(xi,-4))/512.) +
379 |    erfc(4*a*xi)*(pow(a,-1) + (3*pow(a,-5)*pow(xi,-4))/1024.) +
380 |    (exp(-16*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(1 - 32*pow(a,2)*pow(xi,2)))/128. +
381 |    (3*exp(-4*pow(a,2)*pow(xi,2))*pow(a,-4)*pow(Pi,-0.5)*pow(xi,-3)*(-1 + 8*pow(a,2)*pow(xi,2)))/128.;
382 | 
383 | 		}
384 | 		else if ( r < 2*a){
385 | 
386 | 			Imrr = (-9*r*pow(a,-2))/32. + pow(a,-1) - (pow(a,2)*pow(r,-3))/2. - (3*pow(r,-1))/4. +
387 |    (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*(-12*pow(r,4) + pow(xi,-4)))/128. +
388 |    (erfc((-2*a + r)*xi)*(-128*pow(a,-1) + 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(36*r - 3*pow(r,-3)*pow(xi,-4))))/
389 |     256. + (erfc((2*a + r)*xi)*(128*pow(a,-1) + 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(36*r - 3*pow(r,-3)*pow(xi,-4))))/
390 |     256. + (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(1 + 6*pow(r,2)*pow(xi,2)))/64. +
391 |    (exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
392 |       (8*r*pow(a,2)*pow(xi,2) - 16*pow(a,3)*pow(xi,2) + a*(2 - 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128. +
393 |    (exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
394 |       (8*r*pow(a,2)*pow(xi,2) + 16*pow(a,3)*pow(xi,2) + a*(-2 + 28*pow(r,2)*pow(xi,2)) - 3*(r + 6*pow(r,3)*pow(xi,2))))/128.;
395 | 
396 | 			rr = ((2*a + 3*r)*pow(a,-2)*pow(2*a - r,3)*pow(r,-3))/16. +
397 |    (erfc((-2*a + r)*xi)*(-64*pow(a,-1) - 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(12*r + 3*pow(r,-3)*pow(xi,-4))))/128. +
398 |    (erfc((2*a + r)*xi)*(64*pow(a,-1) - 64*pow(a,2)*pow(r,-3) + 96*pow(r,-1) + pow(a,-2)*(12*r + 3*pow(r,-3)*pow(xi,-4))))/128. +
399 |    (3*exp(-(pow(r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-2)*pow(xi,-3)*(-1 + 2*pow(r,2)*pow(xi,2)))/32. -
400 |    ((2*a + 3*r)*exp(-(pow(-2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
401 |       (-1 - 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. +
402 |    ((2*a - 3*r)*exp(-(pow(2*a + r,2)*pow(xi,2)))*pow(a,-2)*pow(Pi,-0.5)*pow(r,-3)*pow(xi,-3)*
403 |       (-1 + 8*a*r*pow(xi,2) + 8*pow(a,2)*pow(xi,2) + 2*pow(r,2)*pow(xi,2)))/64. -
404 |    (3*erfc(r*xi)*pow(a,-2)*pow(r,-3)*pow(xi,-4)*(1 + 4*pow(r,4)*pow(xi,4)))/64.;
405 | 
406 | 		}
407 | 
408 | 		// Save values to table
409 | 		h_ewaldC1.data[ kk ].x = Scalar( Imrr ); // UF1
410 | 		h_ewaldC1.data[ kk ].y = Scalar( rr );   // UF2
411 | 
412 | 	} // kk loop over distances
413 | 
414 | 	// Both pieces of UF data for faster interpolation (r and r+dr stored in same Scalar4)
415 | 	for ( int kk = 0; kk < (nR-1); kk++ ){
416 | 
417 | 		int offset1 = kk;
418 | 		int offset2 = (kk+1);
419 | 
420 | 		h_ewaldC1.data[ offset1 ].z = h_ewaldC1.data[ offset2 ].x;
421 | 		h_ewaldC1.data[ offset1 ].w = h_ewaldC1.data[ offset2 ].y;
422 | 	}
423 | 
424 | }
425 | 
426 | /*! \param timestep Current time step
427 | \post Particle positions and velocities are moved forward to timestep+1
428 | */
429 | void Stokes::integrateStepOne(unsigned int timestep)
430 | {
431 | 
432 | 	// Recompute neighborlist ( if needed )
433 | 	m_nlist->compute(timestep);
434 | 
435 | 	// access the neighbor list
436 | 	ArrayHandle<unsigned int> d_n_neigh(m_nlist->getNNeighArray(), access_location::device, access_mode::read);
437 | 	ArrayHandle<unsigned int> d_nlist(m_nlist->getNListArray(), access_location::device, access_mode::read);
438 | 	ArrayHandle<unsigned int> d_headlist(m_nlist->getHeadList(), access_location::device, access_mode::read);
439 | 
440 | 	// Consistency check
441 | 	unsigned int group_size = m_group->getNumMembers();
442 | 	assert(group_size <= m_pdata->getN());
443 | 	if (group_size == 0)
444 | 		return;
445 | 
446 | 	// Get particle forces
447 | 	const GPUArray< Scalar4 >& net_force = m_pdata->getNetForce();
448 | 
449 | 	// profile this step
450 | 	if (m_prof)
451 | 		m_prof->push(m_exec_conf, "Stokes step 1 (no step 2)");
452 | 
453 | 	// Access all the needed data for the calculation
454 | 	ArrayHandle<Scalar4> d_pos(m_pdata->getPositions(), access_location::device, access_mode::readwrite);
455 | 	ArrayHandle<Scalar4> d_vel(m_pdata->getVelocities(), access_location::device, access_mode::readwrite);
456 | 	ArrayHandle<Scalar3> d_accel(m_pdata->getAccelerations(), access_location::device, access_mode::readwrite);
457 | 	ArrayHandle<Scalar4> d_net_force(net_force, access_location::device, access_mode::read);
458 | 	ArrayHandle<int3> d_image(m_pdata->getImages(), access_location::device, access_mode::readwrite);
459 | 
460 | 	BoxDim box = m_pdata->getBox();
461 | 	ArrayHandle< unsigned int > d_index_array(m_group->getIndexArray(), access_location::device, access_mode::read);
462 | 
463 | 	// Grid vectors
464 | 	ArrayHandle<Scalar4> d_gridk(m_gridk, access_location::device, access_mode::readwrite);
465 | 	ArrayHandle<CUFFTCOMPLEX> d_gridX(m_gridX, access_location::device, access_mode::readwrite);
466 | 	ArrayHandle<CUFFTCOMPLEX> d_gridY(m_gridY, access_location::device, access_mode::readwrite);
467 | 	ArrayHandle<CUFFTCOMPLEX> d_gridZ(m_gridZ, access_location::device, access_mode::readwrite);
468 | 
469 | 	// Real space interaction tabulation
470 | 	ArrayHandle<Scalar4> d_ewaldC1(m_ewaldC1, access_location::device, access_mode::read);
471 | 
472 |         // Calculate the shear rate of the current timestep
473 |         Scalar current_shear_rate = m_shear_func -> getShearRate(timestep);
474 | 
475 | 	// perform the update on the GPU
476 | 	gpu_stokes_step_one(
477 | 				d_pos.data,
478 | 				d_vel.data,
479 | 				d_accel.data,
480 | 				d_image.data,
481 | 				d_index_array.data,
482 | 				group_size,
483 | 				box,
484 | 				m_deltaT,
485 | 				256,
486 | 				d_net_force.data,
487 | 				m_T->getValue(timestep),
488 | 				timestep,
489 | 				m_seed,
490 | 				m_xi,
491 | 				m_eta,
492 | 				m_ewald_cut,
493 | 				m_ewald_dr,
494 | 				m_ewald_n,
495 | 				d_ewaldC1.data,
496 | 				m_self,
497 | 				d_gridk.data,
498 | 				d_gridX.data,
499 | 				d_gridY.data,
500 | 				d_gridZ.data,
501 | 				plan,
502 | 				m_Nx,
503 | 				m_Ny,
504 | 				m_Nz,
505 | 				d_n_neigh.data,
506 | 				d_nlist.data,
507 | 				d_headlist.data,
508 | 				m_m_Lanczos,
509 | 				m_pdata->getN(),
510 | 				m_gaussP,
511 | 				m_gridh,
512 | 				m_error,
513 | 				current_shear_rate
514 | 				);
515 | 
516 | 	if (m_exec_conf->isCUDAErrorCheckingEnabled())
517 | 		CHECK_CUDA_ERROR();
518 | 
519 | 	// done profiling
520 | 	if (m_prof)
521 | 		m_prof->pop(m_exec_conf);
522 | 
523 | }
524 | 
525 | /*! \param timestep Current time step
526 | \post Nothing is done.
527 | */
528 | void Stokes::integrateStepTwo(unsigned int timestep)
529 | {
530 | }
531 | 
532 | void export_Stokes(pybind11::module& m)
533 |     {
534 |     pybind11::class_<Stokes, std::shared_ptr<Stokes> > (m, "Stokes", pybind11::base<IntegrationMethodTwoStep>()) 
535 | 		.def(pybind11::init< std::shared_ptr<SystemDefinition>, std::shared_ptr<ParticleGroup>, std::shared_ptr<Variant>, unsigned int, std::shared_ptr<NeighborList>, Scalar, Scalar >())
536 | 		.def("setT", &Stokes::setT)
537 | 		.def("setParams", &Stokes::setParams)
538 |                 .def("setShear", &Stokes::setShear)
539 |         ;
540 |     }
541 | 
542 | #ifdef WIN32
543 | #pragma warning( pop )
544 | #endif
545 | 


--------------------------------------------------------------------------------
/PSEv1/Stokes.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Gang Wang
 52 | // Modified by Andrew Fiore
 53 | 
 54 | 
 55 | #include "Stokes.cuh"
 56 | #include "Mobility.cuh"
 57 | #include "Brownian.cuh"
 58 | #include "Helper.cuh"
 59 | 
 60 | #include "hoomd/Saru.h"
 61 | #include "hoomd/TextureTools.h"
 62 | 
 63 | #include <stdio.h>
 64 | 
 65 | #include <cuda_runtime.h>
 66 | #include <cublas_v2.h>
 67 | 
 68 | #ifdef WIN32
 69 | #include <cassert>
 70 | #else
 71 | #include <assert.h>
 72 | #endif
 73 | 
 74 | //! command to convert floats or doubles to integers
 75 | #ifdef SINGLE_PRECISION
 76 | #define __scalar2int_rd __float2int_rd
 77 | #else
 78 | #define __scalar2int_rd __double2int_rd
 79 | #endif
 80 | 
 81 | #ifndef __ERRCHK_CUH__
 82 | #define __ERRCHK_CUH__
 83 | //! Function to check for errors
 84 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 85 | /*!
 86 |     \param code   returned error code
 87 |     \param file   which file the error occured in
 88 |     \param line   which line error check was tripped
 89 |     \param abort  whether to kill code upon error trigger
 90 | */
 91 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
 92 | {
 93 |    if (code != cudaSuccess) 
 94 |    {
 95 |       fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
 96 |       if (abort) exit(code);
 97 |    }
 98 | }
 99 | #endif
100 | 
101 | /*! \file Stokes.cu
102 |     \brief Defines GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes.cc.
103 | */
104 | 
105 | 
106 | //! Shared memory array for partial sum of dot product kernel
107 | extern __shared__ Scalar partial_sum[];
108 | extern __shared__ Scalar4 shared_Fpos[];
109 | 
110 | //! Texture for reading table values
111 | scalar4_tex_t tables1_tex;
112 | //! Texture for reading particle positions
113 | scalar4_tex_t pos_tex;
114 | 
115 | //! Takes the integration on a group of particles
116 | /*! \param d_pos            array of particle positions
117 |     \param d_vel            array of particle velocities
118 |     \param d_delu1          first 4 components of gradient of particle velocity
119 |     \param d_delu2          second 4 components of gradient of particle velocity
120 |     \param d_accel          array of particle "accelerations" (This is an overdamped integrator, so accelerations don't have physical meaning)
121 |     \param d_image          array of particle images
122 |     \param d_group_members  Device array listing the indicies of the mebers of the group to integrate
123 |     \param group_size       Number of members in the group
124 |     \param box Box          dimensions for periodic boundary condition handling
125 |     \param deltaT           timestep
126 |     \param d_net_force      net force on each particle, only used to set "accelerations"
127 | 
128 |     This kernel must be executed with a 1D grid of any block size such that the number of threads is greater than or
129 |     equal to the number of members in the group. The kernel's implementation simply reads one particle in each thread
130 |     and updates that particle. (Not necessary true for Stokesian Dynamics simulation)
131 | 
132 |     <b>Performance notes:</b>
133 |     Particle properties are read via the texture cache to optimize the bandwidth obtained with sparse groups. The writes
134 |     in sparse groups will not be coalesced. However, because ParticleGroup sorts the index list the writes will be as
135 |     contiguous as possible leading to fewer memory transactions on compute 1.3 hardware and more cache hits on Fermi. (Not sure about this..)
136 | */
137 | extern "C" __global__
138 | void gpu_stokes_step_one_kernel(
139 | 				Scalar4 *d_pos,
140 | 				Scalar4 *d_vel,
141 | 				Scalar3 *d_accel,
142 | 				int3 *d_image,
143 | 				unsigned int *d_group_members,
144 | 				unsigned int group_size,
145 | 				BoxDim box,
146 | 				Scalar deltaT,
147 | 				Scalar4 *d_net_force,
148 | 				Scalar shear_rate
149 | 				){
150 | 
151 |     // determine which particle this thread works on (MEM TRANSFER: 4 bytes)
152 |     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
153 | 
154 |     if (group_idx < group_size){
155 | 
156 |         unsigned int idx = d_group_members[group_idx];
157 | 
158 |         // read the particle's posision (MEM TRANSFER: 16 bytes)
159 |         Scalar4 postype = d_pos[idx];
160 |         Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
161 | 
162 |         // read the particle's velocity and acceleration (MEM TRANSFER: 32 bytes)
163 |         Scalar4 velmass = d_vel[idx];
164 | 	Scalar mass = velmass.w;
165 |         Scalar3 vel = make_scalar3(velmass.x, velmass.y, velmass.z);
166 | 
167 | 	// Add the shear
168 |         vel.x += shear_rate * pos.y;
169 | 
170 | 	Scalar4 net_force = d_net_force[idx];
171 |         Scalar3 accel = make_scalar3(net_force.x, net_force.y, net_force.z);
172 | 
173 |         // update the position
174 |         Scalar3 dx = vel * deltaT;
175 | 
176 |         // FLOPS: 3
177 |         pos += dx;
178 | 
179 | 	accel = accel/mass;
180 | 
181 |         // read in the particle's image (MEM TRANSFER: 16 bytes)
182 |         int3 image = d_image[idx];
183 | 
184 |         // fix the periodic boundary conditions (FLOPS: 15)
185 |         box.wrap(pos, image);
186 | 
187 |         // write out the results (MEM_TRANSFER: 48 bytes)
188 | 	d_accel[idx] = accel;
189 |         d_pos[idx] = make_scalar4(pos.x, pos.y, pos.z, postype.w);
190 |         d_image[idx] = image;
191 |         }
192 |     }
193 | 
194 | /*! \param d_pos              array of particle positions
195 |     \param d_vel              array of particle velocities
196 |     \param d_accel            array of particle accelerations
197 |     \param d_image            array of particle images
198 |     \param d_group_members    Device array listing the indicies of the mebers of the group to integrate
199 |     \param group_size         Number of members in the group ( i.e. number of particles to consider )
200 |     \param box                Box dimensions for periodic boundary condition handling
201 |     \param dt                 timestep
202 |     \param block_size         optimum block size returned by an autotuner
203 |     \param d_net_force        net force on the particles
204 |     \param T                  temperature
205 |     \param timestep           time step
206 |     \param seed               seed for random number generation
207 |     \param xi                 splitting coefficient for Ewald summation
208 |     \param eta                Spectral splitting parameter
209 |     \param P                  number of nodes in support of each gaussian for k-space sum
210 |     \param ewald_cut          cut off radius for Ewald summation
211 |     \param ewald_dr           discretization of look up tables
212 |     \param ewald_n            number of elements in look up tables
213 |     \param d_ewaldC           Ewald coefficients for real space sum
214 |     \param d_gridk            reciprocal lattice vectors and parameters for Ewald reciprocal space sum
215 |     \param d_gridX            x-component of force moment projection onto the grid
216 |     \param d_gridY            y-component of force moment projection onto the grid
217 |     \param d_gridZ            z-component of force moment projection onto the grid
218 |     \param plan cudaFFT       plan
219 |     \param Nx 		      number of grid nodes in the x-direction
220 |     \param Ny                 number of grid nodes in the y-direction
221 |     \param Nz                 number of grid nodes in the z-direction
222 |     \param d_n_neigh          Number of neighbors for every particle
223 |     \param d_nlist            Neighbor list of every particle, 2D array, can be accessed by nli
224 |     \param nli                Index lookup helper for d_nlist
225 |     \param cheb_an            Chebychev coefficients
226 |     \param n_cheb             Order of Chebyshev approximation
227 |     \param N_total            total number of particles ( should be same as group_size )
228 |     \param gridh              Spacing between grid ndoes
229 |     \param cheb_recompute     whether to recompute chebyshev approximation
230 |     \param eig_recompute      whether to recompute eigenvalues of matrix approximation
231 |     \param stored_eigenvalue  previous max eigenvalue
232 |     \param cheb_error         error tolerance in chebyshev approximation
233 | */
234 | cudaError_t gpu_stokes_step_one(
235 | 				Scalar4 *d_pos,
236 | 				Scalar4 *d_vel,
237 | 				Scalar3 *d_accel,
238 | 				int3 *d_image,
239 | 				unsigned int *d_group_members,
240 | 				unsigned int group_size,
241 | 				const BoxDim& box,
242 | 				Scalar dt,
243 | 				unsigned int block_size,
244 | 				Scalar4 *d_net_force,
245 | 				const Scalar T,
246 | 				const unsigned int timestep,
247 | 				const unsigned int seed,
248 | 				Scalar xi,
249 | 				Scalar eta,
250 | 				Scalar ewald_cut,
251 | 				Scalar ewald_dr,
252 | 				int ewald_n,
253 | 				Scalar4 *d_ewaldC1, 
254 | 				Scalar self,
255 | 				Scalar4 *d_gridk,
256 | 				CUFFTCOMPLEX *d_gridX,
257 | 				CUFFTCOMPLEX *d_gridY,
258 | 				CUFFTCOMPLEX *d_gridZ,
259 | 				cufftHandle plan,
260 | 				const int Nx,
261 | 				const int Ny,
262 | 				const int Nz,
263 | 				const unsigned int *d_n_neigh,
264 | 				const unsigned int *d_nlist,
265 | 				const unsigned int *d_headlist,
266 | 				int& m_Lanczos,
267 | 				const unsigned int N_total,
268 | 				const int P,
269 | 				Scalar3 gridh,
270 | 				Scalar cheb_error,
271 | 				Scalar shear_rate
272 | 				){
273 | 
274 | 	// Total number of grid points
275 | 	unsigned int NxNyNz = Nx*Ny*Nz;
276 | 
277 | 	// setup the grid to run the kernel
278 | 	// block for particle calculation
279 | 	dim3 grid( (group_size/block_size) + 1, 1, 1);
280 | 	dim3 threads(block_size, 1, 1);
281 | 	
282 | 	// block for grid calculation
283 | 	int gridBlockSize = ( NxNyNz > block_size ) ? block_size : NxNyNz;
284 | 	int gridNBlock = ( NxNyNz + gridBlockSize - 1 ) / gridBlockSize ; 
285 | 	
286 | 	// Get the textured tables for real space Ewald sum tabulation
287 | 	tables1_tex.normalized = false; // Not normalized
288 | 	tables1_tex.filterMode = cudaFilterModeLinear; // Filter mode: floor of the index
289 | 	// One dimension, Read mode: ElementType(Get what we write)
290 | 	cudaBindTexture(0, tables1_tex, d_ewaldC1, sizeof(Scalar4) * (ewald_n+1)); // This was a bug in former versions!
291 | 	
292 | 	// Same for the positions and forces
293 | 	pos_tex.normalized = false; // Not normalized
294 | 	pos_tex.filterMode = cudaFilterModePoint; // Filter mode: floor of the index
295 | 	cudaBindTexture(0, pos_tex, d_pos, sizeof(Scalar4) * N_total);
296 | 
297 | 	// Get sheared grid vectors
298 |     	gpu_stokes_SetGridk_kernel<<<gridNBlock,gridBlockSize>>>(d_gridk,Nx,Ny,Nz,NxNyNz,box,xi,eta);
299 | 
300 | 	// Do Mobility and Brownian Calculations (compute the velocity from the forces)
301 | 	gpu_stokes_CombinedMobilityBrownian_wrap(  	
302 | 							d_pos,
303 | 							d_net_force,
304 |                                 			d_group_members,
305 |                                 			group_size,
306 |                                 			box,
307 |                                 			dt,
308 | 			        			d_vel, // output
309 | 			        			T,
310 | 			        			timestep,
311 | 			        			seed,
312 | 			        			xi,
313 | 							eta,
314 | 							P,
315 | 			        			ewald_cut,
316 | 			        			ewald_dr,
317 | 			        			ewald_n,
318 | 			        			d_ewaldC1, 
319 | 			        			d_gridk,
320 | 			        			d_gridX,
321 | 			        			d_gridY,
322 | 			        			d_gridZ,
323 | 			        			plan,
324 | 			        			Nx,
325 | 			        			Ny,
326 | 			        			Nz,
327 | 			        			d_n_neigh,
328 |                                 			d_nlist,
329 |                                 			d_headlist,
330 | 			        			m_Lanczos,
331 | 			        			N_total,
332 | 			        			NxNyNz,
333 | 			        			grid,
334 | 			        			threads,
335 | 			        			gridBlockSize,
336 | 			        			gridNBlock,
337 | 							gridh,
338 | 			        			cheb_error,
339 | 							self );
340 | 
341 | 
342 | 	// Use forward Euler integration to move the particles according the velocity
343 | 	// computed from the Mobility and Brownian calculations
344 | 	gpu_stokes_step_one_kernel<<< grid, threads >>>(
345 | 							d_pos, 
346 | 							d_vel, 
347 | 							d_accel, 
348 | 							d_image, 
349 | 							d_group_members, 
350 | 							group_size, 
351 | 							box, 
352 | 							dt, 
353 | 							d_net_force, 
354 | 							shear_rate
355 | 							);
356 | 
357 | 	// Quick error check
358 | 	gpuErrchk(cudaPeekAtLastError());
359 | 	
360 | 	// Cleanup
361 | 	cudaUnbindTexture(tables1_tex);
362 | 	cudaUnbindTexture(pos_tex);
363 | 	
364 | 	return cudaSuccess;
365 | }
366 | 


--------------------------------------------------------------------------------
/PSEv1/Stokes.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Gang Wang
 52 | // Modified by Andrew Fiore
 53 | 
 54 | /*! \file Stokes.cuh
 55 |     \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes.
 56 | */
 57 | #include "hoomd/ParticleData.cuh"
 58 | #include "hoomd/HOOMDMath.h"
 59 | #include "hoomd/Index1D.h"
 60 | #include <cufft.h>
 61 | 
 62 | //! Define the step_one kernel
 63 | #ifndef __STOKES_CUH__
 64 | #define __STOKES_CUH__
 65 | 
 66 | //! Definition for comxplex variable storage
 67 | #ifdef SINGLE_PRECISION
 68 | #define CUFFTCOMPLEX cufftComplex
 69 | #else
 70 | #define CUFFTCOMPLEX cufftComplex
 71 | #endif
 72 | 
 73 | 
 74 | //! Kernel driver for the first part (no second part) of the Stokes update called by Stokes.cc
 75 | cudaError_t gpu_stokes_step_one(Scalar4 *d_pos,
 76 |                              Scalar4 *d_vel,
 77 |                              Scalar3 *d_accel,
 78 |                              int3 *d_image,
 79 |                              unsigned int *d_group_members,
 80 |                              unsigned int group_size,
 81 |                              const BoxDim& box,
 82 |                              Scalar deltaT,
 83 |                              unsigned int block_size,
 84 | 			     Scalar4 *d_net_force,
 85 | 			     const Scalar T,
 86 | 			     const unsigned int timestep,
 87 | 			     const unsigned int seed,
 88 | 			     Scalar xi,
 89 | 			     Scalar eta,
 90 | 			     Scalar ewald_cut,
 91 | 			     Scalar ewald_dr,
 92 | 		     	     int ewald_n,
 93 | 			     Scalar4 *d_ewald1,
 94 | 			     Scalar self,
 95 | 			     Scalar4 *d_gridk,
 96 | 			     CUFFTCOMPLEX *d_gridX,
 97 | 			     CUFFTCOMPLEX *d_gridY,
 98 | 			     CUFFTCOMPLEX *d_gridZ,
 99 | 			     cufftHandle plan,
100 | 			     const int Nx,
101 | 			     const int Ny,
102 | 			     const int Nz,
103 | 			     const unsigned int *d_n_neigh,
104 |                              const unsigned int *d_nlist,
105 |                              const unsigned int *d_headlist,
106 | 			     int& m_Lanczos,
107 | 			     const unsigned int N_total,
108 | 			     const int P,
109 | 			     Scalar3 gridh,
110 | 			     Scalar cheb_error,
111 | 			     Scalar current_shear_rate);
112 | 
113 | 
114 | #endif
115 | 


--------------------------------------------------------------------------------
/PSEv1/Stokes.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
  3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
  4 | the University of Michigan All rights reserved.
  5 | 
  6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which
  7 | copyright is held, by various Contributors who have granted The Regents of the
  8 | University of Michigan the right to modify and/or distribute such Contributions.
  9 | 
 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source
 11 | and binary forms, provided you abide by the following conditions:
 12 | 
 13 | * Redistributions of source code must retain the above copyright notice, this
 14 | list of conditions, and the following disclaimer both in the code and
 15 | prominently in any materials provided with the distribution.
 16 | 
 17 | * Redistributions in binary form must reproduce the above copyright notice, this
 18 | list of conditions, and the following disclaimer in the documentation and/or
 19 | other materials provided with the distribution.
 20 | 
 21 | * All publications and presentations based on HOOMD-blue, including any reports
 22 | or published results obtained, in whole or in part, with HOOMD-blue, will
 23 | acknowledge its use according to the terms posted at the time of submission on:
 24 | http://codeblue.umich.edu/hoomd-blue/citations.html
 25 | 
 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
 27 | http://codeblue.umich.edu/hoomd-blue/
 28 | 
 29 | * Apart from the above required attributions, neither the name of the copyright
 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or
 31 | promote products derived from this software without specific prior written
 32 | permission.
 33 | 
 34 | Disclaimer
 35 | 
 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
 40 | 
 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 48 | */
 49 | 
 50 | // Maintainer: joaander
 51 | // Modified by Gang Wang
 52 | // Modified by Andrew Fiore
 53 | 
 54 | #ifndef SINGLE_PRECISION
 55 | #define CUFFTCOMPLEX cufftComplex
 56 | #else
 57 | #define CUFFTCOMPLEX cufftComplex
 58 | #endif
 59 | 
 60 | #ifndef __STOKES_H__
 61 | #define __STOKES_H__
 62 | 
 63 | /*! \file Stokes.h
 64 |     \brief Declares the Stokes class
 65 | */
 66 | 
 67 | #include <hoomd/Variant.h>
 68 | #include <hoomd/md/NeighborList.h>
 69 | #include <hoomd/md/IntegrationMethodTwoStep.h>
 70 | 
 71 | #include <cufft.h>
 72 | 
 73 | #include "ShearFunction.h"
 74 | 
 75 | 
 76 | #ifdef NVCC
 77 | #error This header cannot be compiled by nvcc
 78 | #endif
 79 | 
 80 | #include <hoomd/extern/pybind/include/pybind11/pybind11.h>
 81 | 
 82 | //! Integrates the system forward considering hydrodynamic interactions by GPU
 83 | /*! Implements overdamped integration (one step) through IntegrationMethodTwoStep interface, runs on the GPU
 84 | */
 85 | 
 86 | class Stokes : public IntegrationMethodTwoStep
 87 |     {
 88 |     public:
 89 | 
 90 |         //! Constructs the integration method and associates it with the system
 91 |         Stokes(	std::shared_ptr<SystemDefinition> sysdef,
 92 |                 std::shared_ptr<ParticleGroup> group,
 93 | 		std::shared_ptr<Variant> T,
 94 | 		unsigned int seed,
 95 | 		std::shared_ptr<NeighborList> nlist,
 96 | 		Scalar xi,
 97 | 		Scalar error);
 98 | 
 99 |         virtual ~Stokes();
100 | 
101 |         //! Set a new temperature
102 |         /*! \param T new temperature to set */
103 |         void setT(std::shared_ptr<Variant> T)
104 |         {
105 |         	m_T = T;
106 |         }
107 | 
108 |         //! Performs the first step of the integration
109 |         virtual void integrateStepOne(unsigned int timestep);
110 | 
111 |         //! Performs the second step of the integration
112 |         virtual void integrateStepTwo(unsigned int timestep);
113 | 
114 |         //! Set the parameters for Ewald summation
115 |         void setParams();
116 | 
117 | 	//! Set the shear rate and shear frequency
118 |     	void setShear(std::shared_ptr<ShearFunction> shear_func, Scalar max_strain) {
119 |       		m_shear_func = shear_func;
120 |       		m_max_strain = max_strain;
121 |   	}
122 | 
123 |     protected:
124 | 
125 | 	std::shared_ptr<Variant> m_T;   //!< The Temperature of the Stochastic Bath
126 |         unsigned int m_seed;              //!< The seed for the RNG of the Stochastic Bath
127 | 
128 |         cufftHandle plan;       //!< Used for the Fast Fourier Transformations performed on the GPU
129 | 
130 |         std::shared_ptr<NeighborList> m_nlist;    //!< The neighborlist to use for the computation
131 | 
132 | 	std::shared_ptr<ShearFunction> m_shear_func; //!< mutable shared pointer towards a ShearFunction object
133 | 	Scalar m_max_strain; //!< Maximum total strain before box resizing
134 | 
135 |         Scalar m_xi;                   //!< ewald splitting parameter xi
136 |         Scalar m_ewald_cut;            //!< Real space cutoff
137 |         GPUArray<Scalar4> m_ewaldC1;   //!< Real space Ewald coefficients table
138 |         int m_ewald_n;                 //!< Number of entries in table of Ewald coefficients
139 |         Scalar m_ewald_dr;             //!< Real space Ewald table spacing
140 | 
141 | 	Scalar m_self; //!< self piece
142 | 
143 |         int m_Nx;  //!< Number of grid points in x direction
144 |         int m_Ny;  //!< Number of grid points in y direction
145 |         int m_Nz;  //!< Number of grid points in z direction
146 | 
147 |         GPUArray<Scalar4> m_gridk;        //!< k-vectors for each grid point
148 |         GPUArray<CUFFTCOMPLEX> m_gridX;   //!< x component of the grid based force
149 |         GPUArray<CUFFTCOMPLEX> m_gridY;   //!< x component of the grid based force
150 |         GPUArray<CUFFTCOMPLEX> m_gridZ;   //!< x component of the grid based force
151 | 
152 |         Scalar m_gaussm;  //!< Gaussian width in standard deviations for wave space spreading/contraction
153 |         int m_gaussP;     //!< Number of points in each dimension for Gaussian support
154 |         Scalar m_eta;     //!< Gaussian spreading parameter
155 |         Scalar3 m_gridh;  //!< Size of the grid box in 3 direction
156 | 
157 |         int m_m_Lanczos;       //!< Number of Lanczos Iterations to use for calculation of Brownian displacement
158 | 
159 |         Scalar m_error;  //!< Error tolerance for all calculations
160 | 
161 |     };
162 | 
163 | //! Exports the Stokes class to python
164 | void export_Stokes(pybind11::module& m);
165 | 
166 | #endif
167 | 


--------------------------------------------------------------------------------
/PSEv1/VariantShearFunction.cc:
--------------------------------------------------------------------------------
 1 | // Maintainer: Gang Wang
 2 | 
 3 | /*! \file VariantShearFunction.cc
 4 |     \brief Defines VariantShearFunction class
 5 | */
 6 | 
 7 | #ifdef WIN32
 8 | #pragma warning( push )
 9 | #pragma warning( disable : 4103 4244 )
10 | #endif
11 | 
12 | #include "VariantShearFunction.h"
13 | 
14 | using namespace std;
15 | 
16 | 
17 | VariantShearFunction::VariantShearFunction(std::shared_ptr<ShearFunction> shear_func,
18 |     unsigned int total_timestep,
19 |     double min_value,
20 |     double max_value) :
21 |     m_shear_func(shear_func),
22 |     m_total_timestep(total_timestep),
23 |     m_min_value(min_value),
24 |     m_max_value(max_value)
25 |     {
26 |         setOffset( m_shear_func -> getOffset() ); // This line ensures the offsets of ShearFunction and Variant class are equal
27 |         m_value_range = m_max_value - m_min_value;
28 |         m_end_value = wrapValue( m_shear_func -> getStrain( m_offset + m_total_timestep ) );
29 |     }
30 | 
31 | /*! \param timestep Timestep to get the value at
32 |     \return value by the user-specified function
33 | */
34 | double VariantShearFunction::getValue(unsigned int timestep)
35 | {
36 |     if (timestep < m_offset) {
37 |         return 0;
38 |     }
39 |     else if (timestep >= m_offset + m_total_timestep) {
40 |         return m_end_value;
41 |     }
42 |     return wrapValue( m_shear_func -> getStrain(timestep) );
43 | }
44 | 
45 | void export_VariantShearFunction(pybind11::module& m)
46 | {
47 |     pybind11::class_<VariantShearFunction, std::shared_ptr<VariantShearFunction> >(m, "VariantShearFunction", pybind11::base<Variant>())
48 |     .def(pybind11::init< std::shared_ptr<ShearFunction>, unsigned int, double, double >());
49 | }
50 | 
51 | #ifdef WIN32
52 | #pragma warning( pop )
53 | #endif
54 | 


--------------------------------------------------------------------------------
/PSEv1/VariantShearFunction.h:
--------------------------------------------------------------------------------
 1 | // Maintainer: Gang Wang
 2 | 
 3 | /*! \file VariantShearFunction.h
 4 |     \brief Declares the VariantShearFunction class
 5 | */
 6 | 
 7 | #ifdef NVCC
 8 | #error This header cannot be compiled by nvcc
 9 | #endif
10 | 
11 | #include <hoomd/extern/pybind/include/pybind11/pybind11.h>
12 | 
13 | #ifndef __VARIANT_SHEAR_FUNCTION_H__
14 | #define __VARIANT_SHEAR_FUNCTION_H__
15 | 
16 | #include <hoomd/Variant.h>
17 | #include <cmath>
18 | #include "ShearFunction.h"
19 | 
20 | //! Variant class for shear flowfield described by a function
21 | /*! This variant gives the strain value based on a function (which is ShearFunction type)
22 |     The strain is wrapped based on the min_value and max_value since HOOMD cannot deal with
23 |     very thin box. In most cases, max_value - min_value is an integer (and the recommended value
24 |     is [-0.5, 0.5]). If the timestep is smaller than offset, 0 is returned when calling
25 |     getValue; if the timestep is larger than offset + total_timestep, the strain of the last
26 |     time point is returned.
27 |  */
28 | class VariantShearFunction : public Variant
29 | {
30 | public:
31 |     //! Constructs a VariantShearFunction type with a shared_ptr to ShearFunction and total timestep
32 |     /*! \param shear_func the shared pointer to the ShearFunction object
33 |         \param total_timestep total time step this Variant is going to be effective
34 |         \param min_value the minimal value of this Variant
35 |         \param max_value the maximal value of this Variant
36 |     */
37 |     VariantShearFunction(std::shared_ptr<ShearFunction> shear_func,
38 |         unsigned int total_timestep,
39 |         double min_value,
40 |         double max_value);
41 | 
42 |     //! Gets the value at a given time step
43 |     virtual double getValue(unsigned int timestep);
44 | 
45 |     //! Wrap the value between m_min_value and m_max_value
46 |     double wrapValue(double functionValue) {
47 |         return functionValue - m_value_range * floor( (functionValue - m_min_value) / m_value_range );
48 |     }
49 | 
50 | private:
51 |     const std::shared_ptr<ShearFunction> m_shear_func;
52 |     const unsigned int m_total_timestep; //!< the total timestep for the Variant class
53 |     const double m_min_value; //!< minimum value of the output of the Variant class
54 |     const double m_max_value; //!< maximum value of the output of the Variant class
55 |     double m_end_value; //!< the last value of output after time > m_offset + m_total_timestep
56 |     double m_value_range; //!< max_value - min_value
57 | };
58 | 
59 | //! Exports VariantShearFunction class to python
60 | void export_VariantShearFunction(pybind11::module& m);
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/PSEv1/__init__.py:
--------------------------------------------------------------------------------
1 | # this file exists to mark this directory as a python module
2 | # need to import all submodules defined in this directory
3 | from hoomd.PSEv1 import integrate
4 | from hoomd.PSEv1 import shear_function
5 | from hoomd.PSEv1 import variant
6 | 


--------------------------------------------------------------------------------
/PSEv1/integrate.py:
--------------------------------------------------------------------------------
  1 | # First, we need to import the C++ module. It has the same name as this module (plugin_template) but with an underscore
  2 | # in front
  3 | from hoomd.PSEv1 import _PSEv1
  4 | from hoomd.PSEv1 import shear_function
  5 | 
  6 | # Next, since we are extending an integrator, we need to bring in the base class integrator and some other parts from
  7 | # hoomd_script
  8 | import hoomd
  9 | from hoomd import _hoomd
 10 | from hoomd import compute
 11 | from hoomd.md import _md
 12 | import math
 13 | 
 14 | ## One step overdamped integration with hydrodynamic interactions
 15 | class PSEv1(hoomd.md.integrate._integration_method):
 16 |     ## Specifies the Stokes integrator
 17 |     #
 18 |     # \param group              Group of particles on which to apply this method.
 19 |     # \param T                  Temperature of the simulation (in energy units)
 20 |     # \param seed               Random seed to use for the run. Simulations that are identical, except for the seed, will follow
 21 |     #                             different trajectories.
 22 |     # \param xi                 Ewald splitting parameter
 23 |     # \param error		Relative error for all calculations
 24 |     # \param function_form	Functional form for shear
 25 |     # \param max_strain		Maximum box deformation for shear
 26 |     #
 27 |     #
 28 |     # T can be a variant type, allowing for temperature ramps in simulation runs.
 29 |     #
 30 |     # Internally, a compute.thermo is automatically specified and associated with \a group.
 31 |     
 32 |     def __init__(self, group, T, seed=0, xi = 0.5, error = 0.001, function_form = None, max_strain = 0.5, nlist_type = "cell" ):
 33 | 
 34 | 	# Print the status of the initialization        
 35 |         hoomd.util.print_status_line();
 36 |         
 37 |         # initialize base class
 38 |         hoomd.md.integrate._integration_method.__init__(self);
 39 |         
 40 |         # setup the variant inputs
 41 |         T = hoomd.variant._setup_variant_input(T);
 42 |         
 43 |         # create the compute thermo
 44 |         compute._get_unique_thermo(group=group);
 45 |         
 46 |         # Real space neighborlist cutoff based on error estimate for spectral sums
 47 |         self.rcut = math.sqrt( - math.log( error ) ) / xi;
 48 |         # If this line is changed, remember to change in C++ code as well!!
 49 |         
 50 |         # initialize the reflected c++ class
 51 |         if not hoomd.context.exec_conf.isCUDAEnabled():
 52 |             hoomd.context.msg.error("Sorry, we have not written CPU code for PSE RPY simulation. \n");
 53 |             raise RuntimeError('Error creating Stokes');
 54 |         else:
 55 | 	    
 56 | 	    # Create a neighborlist exclusively for real space interactions. Use cell lists by 
 57 | 	    # default, but also allow the user to specify
 58 |             if ( nlist_type.upper() == "CELL" ):
 59 | 
 60 |                 cl_stokes = _hoomd.CellListGPU(hoomd.context.current.system_definition);
 61 |                 hoomd.context.current.system.addCompute(cl_stokes, "stokes_cl")
 62 |                 self.neighbor_list = _md.NeighborListGPUBinned(hoomd.context.current.system_definition, self.rcut, 0.4, cl_stokes);
 63 | 
 64 |             elif ( nlist_type.upper() == "TREE" ):
 65 | 
 66 |                 self.neighbor_list = _md.NeighborListGPUTree(hoomd.context.current.system_definition, self.rcut, 0.4)
 67 | 
 68 |             elif ( nlist_type.upper() == "STENCIL" ):
 69 | 
 70 |                 cl_stokes  = _hoomd.CellListGPU(hoomd.context.current.system_definition)
 71 |                 hoomd.context.current.system.addCompute(cl_stokes, "stokes_cl")
 72 |                 cls_stokes = _hoomd.CellListStencil( hoomd.context.current.system_definition, cl_stokes )
 73 |                 hoomd.context.current.system.addCompute( cls_stokes, "stokes_cls")
 74 |                 self.neighbor_list = _md.NeighborListGPUStencil(hoomd.context.current.system_definition, self.rcut, 0.4, cl_stokes, cls_stokes)
 75 | 
 76 |             else:
 77 |                 hoomd.context.msg.error("Invalid neighborlist method specified. Valid options are: cell, tree, stencil. \n");
 78 |                 raise RuntimeError('Error constructing neighborlist');
 79 | 
 80 |             # Set neighborlist properties
 81 |             self.neighbor_list.setEvery(1, True);
 82 |             hoomd.context.current.system.addCompute(self.neighbor_list, "stokes_nlist")
 83 |             self.neighbor_list.countExclusions();
 84 |         
 85 |             # Call the stokes integrator
 86 |             self.cpp_method = _PSEv1.Stokes(hoomd.context.current.system_definition, group.cpp_group, T.cpp_variant, seed, self.neighbor_list, xi, error);
 87 | 
 88 |         self.cpp_method.validateGroup()
 89 | 
 90 |         if function_form is not None:
 91 |             self.cpp_method.setShear(function_form.cpp_function, max_strain)
 92 |         else:
 93 |             no_shear_function = shear_function.steady(dt = 0)
 94 |             self.cpp_method.setShear(no_shear_function.cpp_function, max_strain)
 95 | 
 96 |         self.cpp_method.setParams()
 97 | 
 98 |     ## Changes parameters of an existing integrator
 99 |     # \param self self
100 |     # \param T Temperature
101 |     #
102 |     # To change the parameters of an existing integrator, you must save it in a variable when it is
103 |     # specified, like so:
104 |     # \code
105 |     # integrator = integrate.nve(group=all)
106 |     # \endcode
107 |     
108 |     def set_params(self, T=None, function_form = None, max_strain=0.5):
109 |         util.print_status_line();
110 |         self.check_initialization();
111 | 
112 |         if T is not None:
113 |             # setup the variant inputs
114 |             T = hoomd.variant._setup_variant_input(T);
115 |             self.cpp_method.setT(T.cpp_variant);
116 |         
117 |         if function_form is not None:
118 |             self.cpp_method.setShear(function_form.cpp_function, max_strain)
119 | 
120 |     ## Stop any shear
121 |     def stop_shear(self, max_strain = 0.5):
122 |         no_shear_function = shear_function.steady(dt = 0)
123 |         self.cpp_method.setShear(no_shear_function.cpp_function, max_strain)
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/PSEv1/module.cc:
--------------------------------------------------------------------------------
 1 | // Include the defined classes that are to be exported to python
 2 | #include "Stokes.h"
 3 | #include "VariantShearFunction.h"
 4 | #include "ShearFunction.h"
 5 | #include "ShearFunctionWrap.h"
 6 | #include "SpecificShearFunction.h"
 7 | 
 8 | // Include pybind11
 9 | #include <hoomd/extern/pybind/include/pybind11/pybind11.h>
10 | 
11 | // specify the python module. Note that the name must explicitly match the PROJECT() name provided in CMakeLists
12 | // (with an underscore in front)
13 | PYBIND11_MODULE(_PSEv1, m)
14 |     {
15 |     #ifdef ENABLE_CUDA
16 | 	export_Stokes(m);
17 |     #endif
18 |     export_ShearFunction(m);
19 |     export_ShearFunctionWrap(m);
20 |     export_VariantShearFunction(m);
21 |     export_SpecificShearFunction(m);
22 |     }
23 | 


--------------------------------------------------------------------------------
/PSEv1/shear_function.py:
--------------------------------------------------------------------------------
  1 | ## \package PSEv1.shear_function
  2 | # classes representing shear functions, which can be input of an integrator and variant
  3 | # to shear the box of a simulation
  4 | 
  5 | from hoomd.PSEv1 import _PSEv1
  6 | 
  7 | import hoomd
  8 | 
  9 | ## shear function interface representing shear flow field described by a function
 10 | class _shear_function:
 11 |     ## Constructor and check the validity of zero param
 12 |     # \param zero Specify absolute time step number location for 0 in \a points. Use 'now' to indicate the current step.
 13 |     def __init__(self, zero = 'now'):
 14 |         self.cpp_function = None
 15 | 
 16 |         if zero == 'now':
 17 |             self._offset = hoomd.context.current.system.getCurrentTimeStep()
 18 |         else:
 19 |             # validate zero
 20 |             if zero < 0:
 21 |                 hoomd.context.msg.error("Cannot create a shear_function variant with a negative zero\n")
 22 |                 raise RuntimeError('Error creating shear function')
 23 |             if zero > hoomd.context.current.system.getCurrentTimeStep():
 24 |                 hoomd.context.msg.error("Cannot create a shear_function variant with a zero in the future\n")
 25 |                 raise RuntimeError('Error creating shear function')
 26 |             self._offset = zero
 27 | 
 28 |     ## Get shear rate at a certain time step, might be useful when switching strain field
 29 |     # \param timestep the timestep
 30 |     def get_shear_rate(self, timestep):
 31 |         return self.cpp_function.getShearRate(timestep)
 32 | 
 33 |     ## Get the strain at a certain time step. The strain is not wrapped
 34 |     # \param timestep the timestep
 35 |     def get_strain(self, timestep):
 36 |         return self.cpp_function.getStrain(timestep)
 37 | 
 38 |     ## Get the offset of this shear function
 39 |     def get_offset(self):
 40 |         return self.cpp_function.getOffset()
 41 | 
 42 | 
 43 | ## concrete class representing steady shear, no shear by default if shear_rate is not provided
 44 | class steady(_shear_function):
 45 |     ## Constructor of steady shear function
 46 |     # \param dt the time interval between each timestep, must be the same with the global timestep
 47 |     # \param shear_rate the shear rate of the shear, default is zero, should be zero or positive
 48 |     # \param zero the time offset
 49 |     def __init__(self, dt, shear_rate = 0, zero = 'now'):
 50 |         _shear_function.__init__(self, zero)
 51 |         self.cpp_function = _PSEv1.SteadyShearFunction(shear_rate, self._offset, dt)
 52 | 
 53 | 
 54 | ## concrete class representing simple sinusoidal oscillatory shear
 55 | class sine(_shear_function):
 56 |     ## Constructor of simple sinusoidal oscillatory shear
 57 |     # \param dt the time interval between each timestep, must be the same with the global timestep
 58 |     # \param shear_rate the maximum shear rate of the ocsillatory shear, must be positive
 59 |     # \param shear_freq the frequency (real frequency, not angular frequency) of the ocsillatory shear, must be positive
 60 |     # \param zero the time offset
 61 |     def __init__(self, dt, shear_rate, shear_freq, zero = 'now'):
 62 | 
 63 |         if shear_rate <= 0:
 64 |             hoomd.context.msg.error("Shear rate must be positive (use steady class instead for zero shear)\n")
 65 |             raise RuntimeError("Error creating shear function")
 66 |         if shear_freq <= 0:
 67 |             hoomd.context.msg.error("Shear frequency must be positive (use steady class instead for steady shear)\n")
 68 |             raise RuntimeError("Error creating shear function")
 69 | 
 70 |         _shear_function.__init__(self, zero)
 71 |         self.cpp_function = _PSEv1.SinShearFunction(shear_rate, shear_freq, self._offset, dt)
 72 | 
 73 | 
 74 | ## concrete class representing chirp oscillatory shear
 75 | class chirp(_shear_function):
 76 |     ## Constructor of chirp oscillatory shear
 77 |     # \param dt the time interval between each timestep, must be the same with the global timestep
 78 |     # \param amplitude the strain amplitude of Chirp oscillatory shear, must be positive
 79 |     # \param omega_0 minimum angular frequency, must be positive
 80 |     # \param omega_f maximum angular frequency, must be positive and larger than omega_0
 81 |     # \param periodT final time of chirp
 82 |     # \param zero the time offset
 83 |     def __init__(self, dt, amplitude, omega_0, omega_f, periodT, zero = 'now'):
 84 |         _shear_function.__init__(self, zero)
 85 |         self.cpp_function = _PSEv1.ChirpShearFunction(amplitude, omega_0, omega_f, periodT, self._offset, dt)
 86 | 
 87 | 
 88 | ## concrete class representing Tukey window function
 89 | class tukey_window(_shear_function):
 90 |     ## Constructor of Tukey window function
 91 |     # \param dt the time interval between each timestep, must be the same with the global timestep
 92 |     # \param periodT time length of the Tukey window function
 93 |     # \param tukey_param Tukey window function parameter, must be within (0, 1]
 94 |     # \param zero the time offset
 95 |     def __init__(self, dt, periodT, tukey_param, zero = 'now'):
 96 | 
 97 |         if tukey_param <= 0 or tukey_param > 1:
 98 |             hoomd.context.msg.error("Tukey parameter must be within (0, 1]")
 99 |             raise RuntimeError("Error creating Tukey window function")
100 | 
101 |         _shear_function.__init__(self, zero)
102 |         self.cpp_function = _PSEv1.TukeyWindowFunction(periodT, tukey_param, self._offset, dt)
103 | 
104 | 
105 | ## concrete class represeting a windowed shear function
106 | class windowed(_shear_function):
107 |     ## Constructor of a windowed shear function
108 |     # The strain of the resulting windowed shear function will be the product of the original shear function and
109 |     # the provided window function
110 |     # \param function_form the original shear function
111 |     # \param window the window function. It is recommended to make sure the offset (zero) of the window function is the same with shear function
112 |     def __init__(self, function_form, window):
113 |         _shear_function.__init__(self, 'now') # zero parameter is not used in windowed class anyways
114 |         self.cpp_function = _PSEv1.WindowedFunction(function_form.cpp_function, window.cpp_function)
115 | 


--------------------------------------------------------------------------------
/PSEv1/variant.py:
--------------------------------------------------------------------------------
 1 | ## \package PSEv1.variant
 2 | # classes representing the variant class to facilitate box_resize
 3 | 
 4 | from hoomd.PSEv1 import _PSEv1
 5 | from hoomd.PSEv1 import shear_function
 6 | 
 7 | from hoomd import variant
 8 | 
 9 | from hoomd import _hoomd
10 | import hoomd
11 | import sys
12 | 
13 | ## Variant class holding a functional form of shear field
14 | # Used as an argument for box_resize class to deform the box
15 | class shear_variant(hoomd.variant._variant):
16 |     ## Specify shear field represented by a function form with a limited timesteps
17 |     #
18 |     # \param function_form the functional form of the sinusoidal shear
19 |     # \param total_timestep the total timesteps of the shear, equal to shear_end_timestep - shear_start_timestep, must be positive
20 |     # \param max_strain the maximum absolute value of the strain, use 0.5 in almost all the cases
21 |     def __init__(self, function_form, total_timestep, max_strain = 0.5):
22 | 
23 |         # initialize the base class
24 |         _variant.__init__(self)
25 | 
26 | 	# check total_timestep is positive
27 |         if total_timestep <= 0:
28 |             hoomd.context.msg.error("Cannot create a shear_variant with 0 or negative points\n")
29 |             raise RuntimeError('Error creating variant')
30 | 
31 |         # create the c++ mirror class
32 |         self.cpp_variant = _PSEv1.VariantShearFunction(function_form.cpp_function, int(total_timestep), -max_strain, max_strain)
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Positively Split Ewald (PSE)
 2 | PSE is a HOOMD plugin by Andrew M. Fiore containing a GPU implemention of the Positively Split Ewald
 3 | (PSE) algorithm for calculation of the Rotne-Prager-Yamakawa (RPY)
 4 | hydrodynamic mobility and stochastic thermal displacements. This repository is no longer maintained.
 5 | 
 6 | An alternative maintained GPU implementation can be found in [UAMMD](https://github.com/RaulPPelaez/UAMMD) and is
 7 | accessible using a simple [python interface](https://github.com/RaulPPelaez/UAMMD_PSE_Python).
 8 |  
 9 | The theory behind the PSE method is described in the reference:
10 | 
11 | 1. **Rapid Sampling of Stochastic Displacements in Brownian Dynamics
12 | Simulations**, Andrew M. Fiore, Florencio Balboa Usabiaga, Aleksandar
13 | Donev, and James W. Swan, The Journal of Chemical Physics, **146**,
14 | 124116 (2017).[DOI](http://doi.org/10.1063/1.4978242) [arXiv](https://arxiv.org/abs/1611.09322)
15 | 
16 | 
17 | ## Files that come in this template
18 |  - doc/TUTORIAL.pdf : a tutorial to use PSE.
19 |  - CMakeLists.txt   : main CMake configuration file for the plugin
20 |  - FindHOOMD.cmake  : script to find a HOOMD-Blue installation to link against
21 |  - README           : This file
22 |  - PSEv1            : Directory containing C++ and CUDA source code that interacts with HOOMD. Also contains python UI level source code that drives the C++ module
23 |  - cppmodule        : Directory containing C++ and CUDA source code that interacts with HOOMD
24 |  - examples/run.py  : python example to use PSE.
25 | 
26 | ## Software requirements
27 | 
28 | The PSE plugin requires the following additional software:
29 |  - HOOMD, compiled with CUDA (tested with version 2.3.3). 
30 |  - CUDA (tested with version 9.2).
31 |  - LAPACKE (tested with version 3.6.1).
32 |  - CBLAS (tested with version 3.6.1).
33 | 
34 | ## Software Installation
35 | 
36 | HOOMD can be installed following the instructions given in the [documentation](http://hoomd-blue.readthedocs.io/en/stable/compiling.html). HOOMD must be compiled with CUDA enabled. It is recommended to use the following cmake command
37 | ```
38 | cmake ../ -DCMAKE_INSTALL_PREFIX=${SOFTWARE_ROOT}/lib/python -DCMAKE_CXX_FLAGS=-march=native -DCMAKE_C_FLAGS=-march=native -DENABLE_CUDA=ON -DENABLE_MPI=ON
39 | ```
40 | where `${SOFTWARE_ROOT}` is the path variable specifying the installation location for HOOMD.
41 | 
42 | LAPACKE and CBLAS can be install manually after downloading the source code from [netlib](http://www.netlib.org/lapacke) and [openblas](https://www.openblas.net) or from repositorities. In Ubuntu, the simplest method is via repository:
43 | ```
44 | sudo apt-get install liblapack3 liblapack-dev liblapacke liblapacke-dev
45 | sudo apt-get install libblas3 libblas-dev libopenblas-dev libatlas-base-dev
46 | ```
47 | 
48 | ## Plugin Compilation
49 | To compile this example plugin, follow steps similar to those in compiling HOOMD-Blue. The process of finding a HOOMD 
50 | installation to link to will be fully automatic IF you have hoomd_install_dir/bin in your PATH when running cmake.
51 | 
52 | Note that plugins can only be built against a HOOMD build that has been installed via a package or compiled and then
53 | installed via 'make install'. HOOMD must be built with CUDA enabled -DENABLE_CUDA=ON in order for the package to work.
54 | Plugins can only be built against hoomd when it is built as a shared library.
55 | 
56 | From the root PSE folder do: 
57 | 
58 | ```
59 | $ mkdir plugin_build
60 | $ cd plugin_build
61 | $ cmake ../
62 | $ make -j6
63 | $ make install
64 | ```
65 | 
66 | If hoomd is not in your PATH, you can specify the root using
67 | 
68 | `$ cmake -DHOOMD_ROOT=/path/to/hoomd ../`
69 | 
70 | You can also provide to `cmake`  the location of `LAPACKE`, `LAPACK`, `CBLAS`,
71 | `BLAS` and the `python` version with the options
72 | 
73 | ```
74 | $ cmake -DHOOMD_ROOT=/path/to/hoomd  \
75 | -DCBLAS_LIBRARIES=/path/to/cblas     \
76 | -DBLAS_LIBRARIES=/path/to/blas       \
77 | -DLAPACKE_LIBRARIES=/path/to/lapacke \
78 | -DLAPACK_LIBRARIES=/path/to/lapack   \
79 | -DPYTHON_EXECUTABLE=`which python`   \
80 | ../
81 | ```
82 | however, these options are unecessary if these libraries have been installed into the standard directories. 
83 | 
84 | By default, make install will install the plugin into
85 | 
86 | `${HOOMD_ROOT}/lib/python/hoomd/PSEv1`
87 | 
88 | This works if you have `make install`ed hoomd into your home directory. 
89 | 
90 | ### Using the Plugin
91 | A sample script demonstrating how the plugin is used can be found in examples/run.py. You can
92 | call this script with the command
93 | ```
94 | python3 run.py
95 | ```
96 | 


--------------------------------------------------------------------------------
/examples/run.py:
--------------------------------------------------------------------------------
 1 | import hoomd;
 2 | from hoomd import _hoomd
 3 | from hoomd.md import _md
 4 | import hoomd.PSEv1
 5 | import os;
 6 | import math
 7 | hoomd.context.initialize('');
 8 | 
 9 | # Time stepping information
10 | dt = 1e-3      # time step
11 | tf = 1e0       # the final time of the simulation (in units of bare particle diffusion time)
12 | nrun = tf / dt # number of steps
13 | 
14 | # Particle size
15 | #
16 | # Changing this won't change the PSE hydrodynamics, which assumes that all particles
17 | # have radius = 1.0, and ignores HOOMD's size data. However, might be necessary if 
18 | # hydrodynamic radius is different from other radii needed.
19 | radius = 1.0
20 | diameter = 2.0 * radius
21 | 
22 | # File output location
23 | loc = 'Data/'
24 | if not os.path.isdir( loc ):
25 |         os.mkdir( loc )
26 | 
27 | # Simple cubic crystal of 1000 particles
28 | N = 1000;
29 | L = 64
30 | n = math.ceil(N ** (1.0/3.0)) # number of particles along 1D
31 | a = L / n # spacing between particles
32 | 
33 | # Create the box and particles
34 | hoomd.init.create_lattice(unitcell=hoomd.lattice.sc(a=a),n=n)
35 | 
36 | # Shear function form, using sinusoidal oscillatory shear as example
37 | #
38 | # Options are: none (no shear. default if left unspecified in integrator call)
39 | #              steady (steady shear)
40 | #              sine (sinusoidal oscillatory shear)
41 | #              chirp (chirp frequency sweep)
42 | function_form = hoomd.PSEv1.shear_function.sine( dt = dt, shear_rate = 1.0, shear_freq = 1.0 )
43 | 
44 | # Set up PSE integrator
45 | #
46 | # Arguments to PSE integrator (default values given in parentheses):
47 | # 	group -- group of particle to act on (should be all)
48 | #	seed (1) -- Seed for the random number generator used in Brownian calculations
49 | #       T (1.0) -- Temperature
50 | #       xi (0.5) -- Ewald splitting parameter. Changing value will not affect results, only speed.
51 | #       error (1E-3) -- Calculation error tolerance
52 | #       function_form (none) -- Functional form for shearing. See above (or source code) for valid options. 
53 | hoomd.md.integrate.mode_standard(dt=dt)
54 | pse = hoomd.PSEv1.integrate.PSEv1( group = hoomd.group.all(), seed = 1, T = 1.0, xi = 0.5, error = 1E-3, function_form = function_form )
55 | 
56 | # Run the simulation
57 | hoomd.run( nrun )
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------