├── README.md └── source ├── Brownian_FarField.cu ├── Brownian_FarField.cuh ├── Brownian_NearField.cu ├── Brownian_NearField.cuh ├── CMakeLists.txt ├── DataStruct.h ├── Helper_Brownian.cu ├── Helper_Brownian.cuh ├── Helper_Debug.cu ├── Helper_Debug.cuh ├── Helper_Integrator.cu ├── Helper_Integrator.cuh ├── Helper_Mobility.cu ├── Helper_Mobility.cuh ├── Helper_Precondition.cu ├── Helper_Precondition.cuh ├── Helper_Saddle.cu ├── Helper_Saddle.cuh ├── Helper_Stokes.cu ├── Helper_Stokes.cuh ├── Integrator.cu ├── Integrator.cuh ├── Lubrication.cu ├── Lubrication.cuh ├── Mobility.cu ├── Mobility.cuh ├── Precondition.cu ├── Precondition.cuh ├── Saddle.cu ├── Saddle.cuh ├── ShearFunction.cc ├── ShearFunction.h ├── ShearFunctionWrap.cc ├── ShearFunctionWrap.h ├── Solvers.cu ├── Solvers.cuh ├── SpecificShearFunction.cc ├── SpecificShearFunction.h ├── Stokes.cc ├── Stokes.cu ├── Stokes.cuh ├── Stokes.h ├── Stokes_ResistanceTable.cc ├── Stokes_SparseMath.cc ├── VariantShearFunction.cc ├── VariantShearFunction.h ├── Wrappers.cuh ├── __init__.py ├── integrate.py ├── module.cc ├── rcm.cpp ├── rcm.hpp ├── shear_function.py └── variant.py /README.md: -------------------------------------------------------------------------------- 1 | ## Fast Stokesian Dynamics (FSD) 2 | 3 | Original authors: Andrew M. Fiore & James W. Swan (MIT) [1]. 4 | 5 | > Note: The original FSD code contains errors in the lubrication, mobility, precondition and Brownian calculations. 6 | > This version fixes those issues and tries to improve the overall performance and clarity. 7 | > Furthermore, the solver has been adapted to simulate active suspensions of squirmers based on the *Active Stokesian Dynamics* framework, 8 | > see Refs. [2-3] for details. 9 | 10 | A brief summary of the main files is given below. Main structure: 11 | 12 | > - Stokes.cc : C++ module to set up the method and run the integrator 13 | > - Stokes.cu : Driver function for integration (RK2, Euler-Maruyama, etc.) 14 | > - Precondition.cu : Preconditioners for the saddle point and near-field Brownian solves 15 | > - Integrator.cu : Velocity computation and integrators, including the RFD 16 | > - Solvers.cu : Methods to perform required matrix inversions 17 | > - Wrappers.cuh : C++ wrapper definitions for CUSP operations 18 | > - Saddle.cu : Saddle point multiplication and solution 19 | 20 | Deterministic hydrodynamics: 21 | 22 | > - Mobility.cu : Far-field mobility calculations 23 | > - Lubrication.cu : Near-field resistance (lubrication) functions (RFU, RFE, RSU, RSE) 24 | > - Stokes_ResistanceTable.cc : Values for pre-computed tabulation of lubrication functions 25 | 26 | Brownian motion: 27 | 28 | > - Brownian_FarField.cu : Methods to compute the far-field Brownian displacements 29 | > - Brownian_NearField.cu : Methods to compute the near-field Brownian forces 30 | 31 | Auxiliary functions: 32 | 33 | > - Helper_Stokes.cu : Helper functions for the Stokes integrator 34 | > - Helper_Integrator.cu : Helper functions to simplify code in Integrator.cu 35 | > - Helper_Saddle.cu : Helper functions for saddle point matrix calculations 36 | > - Helper_Mobility.cu : Helper functions for mobility calculations in Mobility.cu 37 | > - Helper_Precondition.cu : Helper functions for preconditioning calcualtions 38 | > - Helper_Brownian.cu : Helper functions used in Brownian_FarField.cu and Brownian_NearField.cu 39 | > - Helper_Debug.cu : Functions for debugging and code checking, printing output, etc. 40 | 41 | Despite our effort to verify the solver and reduce the number of mistakes, there could always be more bugs. 42 | So, if you found any please do not hesitate to contact me. 43 | 44 | ### Acknowledgements 45 | 46 | I would like to thank Boyuan Chen (Caltech) for extensive help in debugging the code. 47 | I would also like to thank William Torre (Utrecht) for discussions about the solver. 48 | 49 | ### Reference 50 | 51 | 1. Fiore, A. M., & Swan, J. W. (2019). [Fast Stokesian dynamics](https://www.cambridge.org/core/journals/journal-of-fluid-mechanics/article/abs/fast-stokesian-dynamics/970BD1B80B43E21CD355C7BAD4644D46). *Journal of Fluid Mechanics*, 878, 544-597. 52 | 2. Elfring, G. J., & Brady, J. F. (2022). [Active Stokesian dynamics](https://www.cambridge.org/core/journals/journal-of-fluid-mechanics/article/active-stokesian-dynamics/4FAE47B1A6F0531AE9B6C8F1EAC6D95C). *Journal of Fluid Mechanics*, 952, A19. 53 | 3. Ge, Z., & Elfring, G. J. (2025). [Hydrodynamic diffusion in apolar active suspensions of squirmers](https://www.cambridge.org/core/journals/journal-of-fluid-mechanics/article/hydrodynamic-diffusion-in-apolar-active-suspensions-of-squirmers/8596439F68F3E3D6B5A194EB005E992A). *Journal of Fluid Mechanics*, 1003, A17. 54 | -------------------------------------------------------------------------------- /source/Brownian_FarField.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Brownian_FarField.cuh 6 | \brief Declares GPU kernel code for far-field Brownian Calculation. 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include "DataStruct.h" 14 | 15 | //! Define the kernel 16 | #ifndef __BROWNIAN_FARFIELD_CUH__ 17 | #define __BROWNIAN_FARFIELD_CUH__ 18 | 19 | //! Definition for complex variable storage 20 | #ifdef SINGLE_PRECISION 21 | #define CUFFTCOMPLEX cufftComplex 22 | #else 23 | #define CUFFTCOMPLEX cufftComplex 24 | #endif 25 | 26 | 27 | void Brownian_FarField_SlipVelocity( 28 | float *d_Uslip_ff, 29 | Scalar4 *d_pos, 30 | unsigned int *d_group_members, 31 | unsigned int group_size, 32 | const BoxDim& box, 33 | Scalar dt, 34 | BrownianData *bro_data, 35 | MobilityData *mob_data, 36 | KernelData *ker_data, 37 | WorkData *work_data 38 | ); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /source/Brownian_NearField.cu: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | // Zhouyang Ge 5 | 6 | #include "Brownian_NearField.cuh" 7 | #include "Precondition.cuh" 8 | #include "Lubrication.cuh" 9 | 10 | #include "Helper_Brownian.cuh" 11 | #include "Helper_Debug.cuh" 12 | #include "Helper_Precondition.cuh" 13 | 14 | #include "hoomd/Saru.h" 15 | using namespace hoomd; 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | // LAPACK and CBLAS 24 | #include "lapacke.h" 25 | #include "cublas_v2.h" 26 | 27 | #ifdef WIN32 28 | #include 29 | #else 30 | #include 31 | #endif 32 | 33 | /*! 34 | \file Brownian_NearField.cu 35 | \brief Defines functions to compute the near-field Brownian Forces 36 | */ 37 | 38 | /*! 39 | Generate random numbers on particles for Near-field calculation 40 | 41 | d_Psi_nf (output) uniform random vector 42 | group_size (input) number of particles 43 | seed (input) seed for random number generation 44 | T (input) Temperature 45 | dt (input) Time step 46 | */ 47 | __global__ void Brownian_NearField_RNG_kernel( 48 | float *d_Psi_nf, 49 | unsigned int group_size, 50 | const unsigned int seed, 51 | const float T, 52 | const float dt 53 | ){ 54 | 55 | // Thread index 56 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 57 | 58 | // Check if thread is in bounds, and if so do work 59 | if (idx < group_size) { 60 | 61 | // Initialize random number generator 62 | detail::Saru s(idx, seed); 63 | 64 | // Scaling factor to get the variance right 65 | // 66 | // Fluctuation dissipation says variance is ( 2 * T / dt ) 67 | // 68 | // Variance of uniform random numbers on [ -1.0, 1.0 ] is 1/3 69 | // so we have to multiply by 3 to get the proper variance 70 | // 71 | // Therefore the right scale is 3 * ( 2 * T / dt ); 72 | float fac = sqrtf( 3.0 * ( 2.0 * T / dt ) ); 73 | 74 | // Generate random numbers and assign to global output 75 | d_Psi_nf[ 6 * idx ] = s.f( -fac, fac ); 76 | d_Psi_nf[ 6 * idx + 1 ] = s.f( -fac, fac ); 77 | d_Psi_nf[ 6 * idx + 2 ] = s.f( -fac, fac ); 78 | d_Psi_nf[ 6 * idx + 3 ] = s.f( -fac, fac ); 79 | d_Psi_nf[ 6 * idx + 4 ] = s.f( -fac, fac ); 80 | d_Psi_nf[ 6 * idx + 5 ] = s.f( -fac, fac ); 81 | 82 | } // Check if thread is in bounds 83 | 84 | } 85 | 86 | 87 | /*! 88 | Use Lanczos method to compute RFU^0.5 * psi 89 | 90 | This method is detailed in the publication: 91 | Edmond Chow and Yousef Saad, PRECONDITIONED KRYLOV SUBSPACE METHODS FOR 92 | SAMPLING MULTIVARIATE GAUSSIAN DISTRIBUTIONS, SIAM J. Sci. Comput., 2014 93 | 94 | d_FBnf (output) near-field Brownian force 95 | d_psi (input) uniform random vector 96 | d_group_members (input) ID of particle within integration group 97 | group_size (input) number of particles 98 | box (input) periodic box information 99 | dt (input) integration timestep 100 | pBuffer (input) scratch buffer space for preconditioner 101 | ker_data (input) structure containing kernel launch information 102 | bro_data (input) structure containing Brownian calculation information 103 | res_data (input) structure containing lubrication calculation information 104 | work_data (input) structure containing workspaces 105 | 106 | */ 107 | 108 | 109 | 110 | //zhoge: Re-implemented Chow & Saad (2014) method to sample correlated noise (near-field). 111 | 112 | void Lanczos_process( float *d_vm, //input 113 | float *d_v, //input 114 | float *d_vp, //output 115 | float *alpha, //output 116 | float *beta, //output/input 117 | float tol_beta, 118 | int numel, 119 | const Scalar4 *d_pos, 120 | unsigned int *d_group_members, 121 | const int group_size, 122 | const BoxDim box, 123 | void *pBuffer, 124 | KernelData *ker_data, 125 | ResistanceData *res_data, 126 | WorkData *work_data ) 127 | { 128 | // cuBLAS handle 129 | cublasHandle_t blasHandle = work_data->blasHandle; 130 | 131 | // Apply the preconditioned A to d_v (d_vp = G * A * G^T * d_v, where G^T * G = A^{-1} is the preconditioner) 132 | Precondition_Brownian_RFUmultiply( d_vp, // output 133 | d_v, // input 134 | d_pos, 135 | d_group_members, 136 | group_size, 137 | box, 138 | pBuffer, 139 | ker_data, 140 | res_data ); 141 | 142 | // Project out d_vm (d_vp = d_vp - beta * d_vm) 143 | float scale = -1.0 * beta[0]; 144 | cublasSaxpy( blasHandle, numel, &scale, d_vm, 1, d_vp, 1 ); //d_vp is modified in place 145 | 146 | // The diagonal value associated with dv (alpha = d_v \cdot d_vp) 147 | cublasSdot( blasHandle, numel, d_v, 1, d_vp, 1, alpha ); 148 | 149 | // Project out d_v (d_vp = d_vp - alpha * d_v) 150 | scale = -1.0 * alpha[0]; 151 | cublasSaxpy( blasHandle, numel, &scale, d_v, 1, d_vp, 1 ); //d_vp is modified in place 152 | 153 | // The norm of d_vp (betap = || d_vp ||) 154 | cublasSnrm2( blasHandle, numel, d_vp, 1, &beta[1] ); 155 | 156 | // Check if the norm has become very small and if so, set d_vp = d_v 157 | if ( beta[1] < tol_beta ) 158 | { 159 | cudaMemcpy( d_vp, d_v, numel*sizeof(float), cudaMemcpyDeviceToDevice ); 160 | } 161 | else //otherwise normalize d_vp 162 | { 163 | scale = 1.0 / beta[1]; 164 | cublasSscal( blasHandle, numel, &scale, d_vp, 1 ); //d_vp is modified in place 165 | } 166 | 167 | } 168 | 169 | 170 | 171 | 172 | 173 | 174 | void Brownian_NearField_Chow_Saad( Scalar *d_y, // output: near-field Brownian force 175 | Scalar *d_x, // input: random Gaussian variables 176 | Scalar4 *d_pos, 177 | unsigned int *d_group_members, 178 | unsigned int group_size, 179 | const BoxDim& box, 180 | //Scalar dt, 181 | void *pBuffer, 182 | KernelData *ker_data, 183 | BrownianData *bro_data, 184 | ResistanceData *res_data, 185 | WorkData *work_data) 186 | { 187 | // cuBLAS handle 188 | cublasHandle_t blasHandle = work_data->blasHandle; 189 | 190 | // Constants 191 | int numel = 6 * group_size; //size of v1,v2,...,vm_max, d_x, d_y 192 | int m = bro_data->m_Lanczos_nf; //number of Lanczos iterations in step 1 (either same as last time or reset in Stokes.cc) 193 | int m_max = 100; //m_max-1 is the maximum size of Tm at the end of step 2 (set to 100 in Stokes.cc) 194 | 195 | //debug 196 | if ( m >= m_max-1 ) 197 | { 198 | printf("Illegal condition: m >= m_max-1. Program aborted."); 199 | exit(1); 200 | } 201 | 202 | // Host vectors for the main and sub-diagonal values of Tm 203 | float *h_alpha = (float *)malloc( (m_max)*sizeof(float) ); 204 | float *h_beta = (float *)malloc( (m_max)*sizeof(float) ); 205 | float *h_alpha1 = (float *)malloc( (m_max)*sizeof(float) ); //buffer 206 | float *h_beta1 = (float *)malloc( (m_max)*sizeof(float) ); //buffer 207 | 208 | // Set the first element of beta to 0 209 | h_beta[0] = 0.0; 210 | 211 | // Set the tolerance for beta (less than 1e-6 even for single precision because ||vm|| can be << 1) 212 | float tol_beta = 1e-8; 213 | 214 | // Buffer vector for checking convergence 215 | Scalar *d_y0 = work_data->bro_nf_FB_old; 216 | 217 | // Lanczos basis vectors V = [v0, v1, v2, ..., vm_max], v0 is a placeholder 218 | Scalar *d_V = work_data->bro_nf_V; 219 | 220 | // Zero out v0 221 | float scale = 0.0; 222 | cublasSscal( blasHandle, numel, &scale, d_V, 1 ); 223 | 224 | // Initialize v1 = d_x / ||d_x|| 225 | float xnorm; 226 | cublasSnrm2( blasHandle, numel, d_x, 1, &xnorm ); 227 | 228 | cudaMemcpy( &d_V[numel], d_x, numel*sizeof(float), cudaMemcpyDeviceToDevice ); 229 | 230 | scale = 1.0 / xnorm; 231 | cublasSscal( blasHandle, numel, &scale, &d_V[numel], 1 ); 232 | 233 | // 234 | // Step 1: Build Vm and Tm via the Lanczos process 235 | // 236 | for ( int j = 0; j < m; ++j ) //iterate at most m times 237 | { 238 | // Find Vm and Tm that approximately satisfy Vm^T * A * Vm = Tm 239 | Lanczos_process( &d_V[ j *numel ], //input 240 | &d_V[ (j+1)*numel ], //input 241 | &d_V[ (j+2)*numel ], //output 242 | &h_alpha[ j ], //output 243 | &h_beta[ j ], //input [j] / output [j+1] 244 | tol_beta, 245 | numel, 246 | d_pos, 247 | d_group_members, 248 | group_size, 249 | box, 250 | pBuffer, 251 | ker_data, 252 | res_data, 253 | work_data ); 254 | 255 | // Stop if beta becomes very small 256 | if ( h_beta[j+1] < tol_beta ) 257 | { 258 | m = j+1; //plus 1 because one iteration was done when j=0 259 | break; 260 | } 261 | } 262 | 263 | ////debug 264 | //printf("m = %i\n",m); 265 | //for (int i=0; i bro_data->tol and m < m_max-1 ) 300 | { 301 | // Iteratively increase m 302 | Lanczos_process( &d_V[ m *numel ], //input 303 | &d_V[ (m+1)*numel ], //input 304 | &d_V[ (m+2)*numel ], //output 305 | &h_alpha[ m ], //output 306 | &h_beta[ m ], //input [m] / output [m+1] 307 | tol_beta, 308 | numel, 309 | d_pos, 310 | d_group_members, 311 | group_size, 312 | box, 313 | pBuffer, 314 | ker_data, 315 | res_data, 316 | work_data ); 317 | 318 | // Compute the new approximate solution, d_y 319 | Sqrt_multiply( &d_V[ numel ], //input 320 | h_alpha, //input 321 | h_beta, //input 322 | h_alpha1, //input (buffer) 323 | h_beta1, //input (buffer) 324 | m+1, //input 325 | d_y, //output 326 | numel, 327 | group_size, 328 | ker_data, 329 | work_data ); 330 | 331 | 332 | // Compute relative error = || d_y0 - d_y || / || d_y || 333 | scale = -1.0; 334 | cublasSaxpy( blasHandle, numel, &scale, d_y, 1, d_y0, 1 ); //d_y0 is modified in place 335 | cublasSnrm2( blasHandle, numel, d_y0, 1, &error ); 336 | cublasSnrm2( blasHandle, numel, d_y, 1, &ynorm ); 337 | error /= ynorm; 338 | 339 | ////debug 340 | //printf("Chow & Saad (near-field) iteration %3i, relative error %13.6e (norm of d_y %13.6e)\n",m,error,ynorm); 341 | 342 | // Update solution 343 | cudaMemcpy( d_y0, d_y, numel*sizeof(float), cudaMemcpyDeviceToDevice ); 344 | 345 | // Stop if beta becomes very small (even if the error is not small enough) 346 | if ( h_beta[m+1] < tol_beta ) 347 | { 348 | ++m; 349 | break; 350 | } 351 | 352 | // Increment m 353 | ++m; 354 | 355 | } 356 | 357 | ////debug 358 | //printf("\n"); 359 | 360 | // Finalize 361 | if ( error > bro_data->tol ) 362 | { 363 | printf("\nChow & Saad (near-field) didn't converge after %i iterations.\n",m-1); 364 | printf("Final relative error %13.6e\n",error); 365 | printf("Last beta %13.6e\n",h_beta[m]); 366 | //printf("\nProgram aborted.\n"); 367 | //exit(1); 368 | } 369 | 370 | // Save the number of required iterations (minus 1 because incremented at the end) 371 | bro_data->m_Lanczos_nf = m-1; 372 | 373 | //// Undo the preconditioning so that the result has the proper variance 374 | //Precondition_Brownian_Undo( d_y, //input/output 375 | // group_size, 376 | // ker_data, 377 | // res_data ); 378 | 379 | // Rescale by original norm of d_x 380 | cublasSscal( blasHandle, numel, &xnorm, d_y, 1 ); 381 | 382 | 383 | // Clean up 384 | free(h_alpha); 385 | free(h_alpha1); 386 | free(h_beta); 387 | free(h_beta1); 388 | 389 | } 390 | 391 | 392 | 393 | /* 394 | Wrap all the functions required to compute the near-field Brownian force. 395 | 396 | d_FBnf (output) near-field Brownian force 397 | d_pos (input) particle positions 398 | d_group_members (input) ID of particle within integration group 399 | group_size (input) number of particles 400 | box (input) periodic box information 401 | dt (input) integration timestep 402 | ker_data (input) structure containing kernel launch information 403 | bro_data (input) structure containing Brownian calculation information 404 | res_data (input) structure containing lubrication calculation information 405 | work_data (input) structure containing workspaces 406 | */ 407 | void Brownian_NearField_Force(Scalar *d_FBnf, // output 408 | Scalar4 *d_pos, 409 | unsigned int *d_group_members, 410 | unsigned int group_size, 411 | const BoxDim& box, 412 | Scalar dt, 413 | void *pBuffer, 414 | KernelData *ker_data, 415 | BrownianData *bro_data, 416 | ResistanceData *res_data, 417 | WorkData *work_data 418 | ) 419 | { 420 | 421 | //// Kernel Information 422 | //dim3 grid = ker_data->particle_grid; 423 | //dim3 threads = ker_data->particle_threads; 424 | 425 | 426 | // Initialize vectors 427 | float *d_Psi_nf = work_data->bro_nf_psi; 428 | 429 | //// Generate the random vectors on each particle 430 | //Brownian_NearField_RNG_kernel<<>>( 431 | // d_Psi_nf, //output 432 | // group_size, 433 | // bro_data->seed_nf, 434 | // bro_data->T, 435 | // dt); 436 | 437 | //zhoge: use cuRand to generate Gaussian variables 438 | curandGenerator_t gen; 439 | unsigned int N_random = 6*group_size; //6 because force (3) and torque (3) 440 | float std_bro = sqrtf( 2.0 * bro_data->T / dt ); //standard deviation of the Brownian force 441 | curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_PHILOX4_32_10); //fastest generator 442 | curandSetPseudoRandomGeneratorSeed(gen, bro_data->seed_nf); //set the seed (different from the ff) 443 | curandGenerateNormal(gen, d_Psi_nf, N_random, 0.0f, std_bro); //mean 0, std as specified 444 | curandDestroyGenerator(gen); 445 | 446 | 447 | // Apply the Chow & Saad method to sample the near-field force 448 | Brownian_NearField_Chow_Saad( d_FBnf, //output 449 | d_Psi_nf, //input 450 | d_pos, 451 | d_group_members, 452 | group_size, 453 | box, 454 | //dt, 455 | pBuffer, 456 | ker_data, 457 | bro_data, 458 | res_data, 459 | work_data); 460 | 461 | // Clean Up 462 | d_Psi_nf = NULL; 463 | 464 | } 465 | -------------------------------------------------------------------------------- /source/Brownian_NearField.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Brownian_NearField.cuh 6 | \brief Declares GPU kernel code for Near-Field Brownian Calculation 7 | */ 8 | 9 | #include "hoomd/ParticleData.cuh" 10 | #include "hoomd/HOOMDMath.h" 11 | 12 | #include 13 | 14 | #include "DataStruct.h" 15 | 16 | #include 17 | #include 18 | 19 | //! Define the kernel 20 | #ifndef __BROWNIAN_NEARFIELD_CUH__ 21 | #define __BROWNIAN_NEARFIELD_CUH__ 22 | 23 | //! Definition for complex variable storage 24 | #ifdef SINGLE_PRECISION 25 | #define CUFFTCOMPLEX cufftComplex 26 | #else 27 | #define CUFFTCOMPLEX cufftComplex 28 | #endif 29 | 30 | __global__ void Brownian_NearField_RNG_kernel( 31 | Scalar *d_Psi_nf, 32 | unsigned int N, 33 | const unsigned int seed, 34 | const float T, 35 | const float dt 36 | ); 37 | 38 | 39 | void Brownian_NearField_Force( 40 | Scalar *d_FBnf, // output 41 | Scalar4 *d_pos, 42 | unsigned int *d_group_members, 43 | unsigned int group_size, 44 | const BoxDim& box, 45 | Scalar dt, 46 | void *pBuffer, 47 | KernelData *ker_data, 48 | BrownianData *bro_data, 49 | ResistanceData *res_data, 50 | WorkData *work_data 51 | ); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /source/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Maintainer: Andrew M. Fiore 2 | 3 | set(COMPONENT_NAME PSEv3) 4 | 5 | set(_${COMPONENT_NAME}_sources 6 | module.cc 7 | Stokes.cc 8 | Stokes_ResistanceTable.cc 9 | Stokes_SparseMath.cc 10 | ShearFunction.cc 11 | ShearFunctionWrap.cc 12 | SpecificShearFunction.cc 13 | VariantShearFunction.cc 14 | rcm.cpp 15 | ) 16 | 17 | set(_${COMPONENT_NAME}_cu_sources 18 | Brownian_FarField.cu 19 | Brownian_NearField.cu 20 | Helper_Brownian.cu 21 | Helper_Debug.cu 22 | Helper_Integrator.cu 23 | Helper_Mobility.cu 24 | Helper_Precondition.cu 25 | Helper_Stokes.cu 26 | Helper_Saddle.cu 27 | Integrator.cu 28 | Lubrication.cu 29 | Mobility.cu 30 | Precondition.cu 31 | Saddle.cu 32 | Stokes.cu 33 | Solvers.cu 34 | ) 35 | 36 | if (ENABLE_CUDA) 37 | CUDA_COMPILE(_CUDA_GENERATED_FILES ${_${COMPONENT_NAME}_cu_sources} OPTIONS ${CUDA_ADDITIONAL_OPTIONS} SHARED) 38 | endif (ENABLE_CUDA) 39 | 40 | pybind11_add_module (_${COMPONENT_NAME} SHARED ${_${COMPONENT_NAME}_sources} ${_CUDA_GENERATED_FILES} NO_EXTRAS) 41 | if (APPLE) 42 | set_target_properties(_${COMPONENT_NAME} PROPERTIES INSTALL_RPATH "@loader_path/..;@loader_path") 43 | else() 44 | set_target_properties(_${COMPONENT_NAME} PROPERTIES INSTALL_RPATH "\$ORIGIN/..;\$ORIGIN") 45 | endif() 46 | 47 | # Find additional libraries to be linked for the plugin 48 | find_library( LAPACKE_LIBRARIES lapacke PATHS /usr/local/lapack-3.6.0/lib/ ) 49 | find_library( LAPACK_LIBRARIES lapack PATHS /usr/local/lapack-3.6.0/lib/ ) 50 | find_library( BLAS_LIBRARIES blas PATHS /usr/local/lapack-3.6.0/lib/ ) 51 | find_library( CBLAS_LIBRARIES cblas PATHS /usr/local/lapack-3.6.0/lib/ ) 52 | if( LAPACKE_LIBRARIES-NOTFOUND ) 53 | message(FATAL_ERROR "lapacke libraries not found") 54 | endif( LAPACKE_LIBRARIES-NOTFOUND ) 55 | message(STATUS "found lapacke libraries: ${LAPACKE_LIBRARIES}") 56 | if( LAPACK_LIBRARIES-NOTFOUND ) 57 | message(FATAL_ERROR "lapack libraries not found") 58 | endif( LAPACK_LIBRARIES-NOTFOUND ) 59 | message(STATUS "found lapack libraries: ${LAPACK_LIBRARIES}") 60 | set( LAPACK_LIBRARIES ${LAPACKE_LIBRARIES} ${LAPACK_LIBRARIES} ${CBLAS_LIBRARIES} ${BLAS_LIBRARIES} ) 61 | 62 | # Find CUBLAS 63 | find_library( CUBLAS_LIBRARY cublas PATHS /usr/local/cuda/lib64 ) 64 | if( CUBLAS_LIBRARY-NOTFOUND ) 65 | message(FATAL_ERROR "CUBLAS Library not found") 66 | endif( CUBLAS_LIBRARY-NOTFOUND ) 67 | message(STATUS "found CUBLAS library: ${CUBLAS_LIBRARY}") 68 | set( CUBLAS_LIBRARY ${CUBLAS_LIBRARY} ) 69 | 70 | # link the library to its dependencies 71 | target_link_libraries(_${COMPONENT_NAME} PRIVATE ${HOOMD_LIBRARIES} ${LAPACK_LIBRARIES} ${CUBLAS_LIBRARY}) 72 | 73 | # if we are compiling with MPI support built in, set appropriate 74 | # compiler/linker flags 75 | if (ENABLE_MPI) 76 | if(MPI_COMPILE_FLAGS) 77 | set_target_properties(_${COMPONENT_NAME} PROPERTIES COMPILE_FLAGS "${MPI_CXX_COMPILE_FLAGS}") 78 | endif(MPI_COMPILE_FLAGS) 79 | if(MPI_LINK_FLAGS) 80 | set_target_properties(_${COMPONENT_NAME} PROPERTIES LINK_FLAGS "${MPI_CXX_LINK_FLAGS}") 81 | endif(MPI_LINK_FLAGS) 82 | endif(ENABLE_MPI) 83 | 84 | fix_cudart_rpath(_${COMPONENT_NAME}) 85 | 86 | # install the library 87 | install(TARGETS _${COMPONENT_NAME} 88 | LIBRARY DESTINATION ${PYTHON_MODULE_BASE_DIR}/${COMPONENT_NAME} 89 | ) 90 | 91 | ################ Python only modules 92 | # copy python modules to the build directory to make it a working python package 93 | MACRO(copy_file file) 94 | add_custom_command ( 95 | OUTPUT ${file} 96 | DEPENDS ${file} 97 | POST_BUILD 98 | COMMAND ${CMAKE_COMMAND} 99 | ARGS -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${file} ${CMAKE_CURRENT_BINARY_DIR}/${file} 100 | COMMENT "Copy hoomd/${COMPONENT_NAME}/${file}" 101 | ) 102 | ENDMACRO(copy_file) 103 | 104 | set(files 105 | __init__.py 106 | integrate.py 107 | shear_function.py 108 | variant.py 109 | ) 110 | 111 | install(FILES ${files} 112 | DESTINATION ${PYTHON_MODULE_BASE_DIR}/${COMPONENT_NAME} 113 | ) 114 | 115 | foreach(file ${files}) 116 | copy_file(${file}) 117 | endforeach() 118 | 119 | add_custom_target(copy_${COMPONENT_NAME} ALL DEPENDS ${files}) 120 | 121 | if (BUILD_TESTING) 122 | add_subdirectory(test-py) 123 | endif() 124 | -------------------------------------------------------------------------------- /source/DataStruct.h: -------------------------------------------------------------------------------- 1 | // Maintainer: Andrew Fiore 2 | // Modified by Zhouyang Ge 3 | 4 | /*! \file DataStruct.h 5 | \brief Defines data structures to hold related variables for the different 6 | parts of the calculation 7 | */ 8 | 9 | 10 | #include "hoomd/HOOMDMath.h" 11 | 12 | #include 13 | #include 14 | #include 15 | #include "cublas_v2.h" 16 | 17 | #ifndef __DATA_STRUCT_H__ 18 | #define __DATA_STRUCT_H__ 19 | 20 | #ifdef SINGLE_PRECISION 21 | #define CUFFTCOMPLEX cufftComplex 22 | #else 23 | #define CUFFTCOMPLEX cufftComplex 24 | #endif 25 | 26 | //! Declare a structure to hold all of the kernel parameters 27 | struct KernelData 28 | { 29 | 30 | dim3 particle_grid; //!< Particle-based calculations CUDA kernel grid dimension 31 | dim3 particle_threads; //!< Particle-based calculations CUDA kernel block dimension 32 | 33 | int grid_grid; //!< FFT Grid-based calcualtions CUDA kernel grid dimension 34 | int grid_threads; //!< FFT Grid-based calcualtions CUDA kernel block dimension 35 | 36 | unsigned int NxNyNz; //!< Total number of FFT grid points 37 | 38 | }; 39 | 40 | //! Declare a structure to hold all of the Brownian calculation information 41 | struct BrownianData 42 | { 43 | float tol; //!< Tolerance for the Brownian approximation (should be same as all other errors) 44 | 45 | unsigned int timestep; //!< Simulation time step (used by RNG) 46 | 47 | unsigned int seed_ff_rs; //!< Seed for the RNG for far-field Brownian calculation, real space 48 | unsigned int seed_ff_ws; //!< Seed for the RNG for far-field Brownian calculation, wave space 49 | unsigned int seed_nf; //!< Seed for the RNG for near-field Brownian calculation 50 | unsigned int seed_rfd; //!< Seed for the RNG for RFD 51 | 52 | int m_Lanczos_ff; //!< Number of Lanczos iterations for the far-field Brownian calculation 53 | int m_Lanczos_nf; //!< Number of Lanczos iterations for the near-field Brownian calculation 54 | 55 | float T; //!< Temperature 56 | 57 | float rfd_epsilon; //!< epsilon for RFD approximation 58 | 59 | Scalar *rfd_rhs; //!< (DEVICE) RFD right-hand side 60 | Scalar *rfd_sol; //!< (DEVICE) RFD solution vector 61 | 62 | }; 63 | 64 | //! Declare a structure to hold all of the mobility calculation information 65 | struct MobilityData 66 | { 67 | Scalar xi; //!< Ewald splitting parameter 68 | 69 | Scalar ewald_cut; //!< Ewald sum real space cutoff distance 70 | Scalar ewald_dr; //!< Ewald sum real space tabulation discretization 71 | int ewald_n; //!< Ewald sum real space tabulation number of entries 72 | Scalar4 *ewald_table; //!< Ewald sum real space table 73 | 74 | Scalar2 self; //!< Ewald sum self piece 75 | 76 | unsigned int *nneigh; //!< Ewald sum real space number of neighbors 77 | unsigned int *nlist; //!< Ewald sum real space neighbor list 78 | unsigned int *headlist; //!< Ewald sum real space headlist 79 | 80 | Scalar eta; //!< Ewald sum wave space spectral Ewald decay parameter 81 | int P; //!< Ewald sum wave space spectral Ewald support size 82 | Scalar3 gridh; //!< Ewald sum wave space grid spacing (in real space) 83 | Scalar4 *gridk; //!< Ewald sum wave space grid vectors 84 | CUFFTCOMPLEX *gridX; //!< Ewald sum wave space gridded force, x-component 85 | CUFFTCOMPLEX *gridY; 86 | CUFFTCOMPLEX *gridZ; 87 | CUFFTCOMPLEX *gridXX; //!< Ewald sum wave space gridded couplet, xx-component 88 | CUFFTCOMPLEX *gridXY; 89 | CUFFTCOMPLEX *gridXZ; 90 | CUFFTCOMPLEX *gridYX; 91 | CUFFTCOMPLEX *gridYY; 92 | CUFFTCOMPLEX *gridYZ; 93 | CUFFTCOMPLEX *gridZX; 94 | CUFFTCOMPLEX *gridZY; 95 | cufftHandle plan; //!< Ewald sum wave space CUFFT plan 96 | int Nx; //!< Ewald sum wave space number of grid vectors in each direction 97 | int Ny; //!< Ewald sum wave space number of grid vectors in each direction 98 | int Nz; //!< Ewald sum wave space number of grid vectors in each direction 99 | 100 | }; 101 | 102 | //! Declare a structure to hold all of the resistance calculation information 103 | struct ResistanceData 104 | { 105 | 106 | float rlub; //!< cutoff distance for lubrication 107 | float rp; //!< cutoff distance for preconditioner 108 | 109 | unsigned int *nneigh; //!< Lubrication interaction number of neighbors 110 | unsigned int *nlist; //!< Lubrication interaction neighbor list 111 | unsigned int *headlist; //!< Lubrication interaction headlist 112 | 113 | unsigned int *nneigh_pruned; //!< Number of neighbors for pruned neighborlist 114 | unsigned int *headlist_pruned; //!< Headlist for pruned neighborlist 115 | unsigned int *nlist_pruned; //!< Pruned neighborlist 116 | 117 | int nnz; //!< Lubrication preconditioner Number of non-zero entries 118 | unsigned int *nneigh_less; //!< Lubrication preconditioner Number of neighbors with index less than particle 119 | unsigned int *NEPP; //!< Lubrication preconditioner Number of entries per-particle 120 | unsigned int *offset; //!< Lubrication preconditioner Offset into array 121 | 122 | int *L_RowInd; //!< Lubrication preconditioner, sparse storage, row indices 123 | int *L_RowPtr; //!< Lubrication preconditioner, sparse storage, row pointers 124 | int *L_ColInd; //!< Lubrication preconditioner, sparse storage, column indices 125 | float *L_Val; //!< Lubrication preconditioner, sparse storage, values 126 | 127 | float *table_dist; //!< Resistance tabulation distances 128 | float *table_vals; //!< Resistance tabulation values 129 | float table_min; //!< Resistance tabulation shortest distance 130 | float table_dr; //!< Resistance tabulation discretization 131 | 132 | cusolverSpHandle_t soHandle; //!< Opaque handle to cuSOLVER 133 | cusparseHandle_t spHandle; //!< Opaque handle to cuSPARSE 134 | cusparseStatus_t spStatus; //!< Status output for cuSPARSE operations 135 | cusparseMatDescr_t descr_R; //!< Matrix description for the resistance tensor (preconditioner) 136 | cusparseMatDescr_t descr_L; //!< Matrix description for the IChol of resistance tensor 137 | cusparseOperation_t trans_L; //!< Specify to not transpose IChol 138 | cusparseOperation_t trans_Lt; //!< Specify to transpose IChol 139 | csric02Info_t info_R; //!< Info on the resistance tensor 140 | csrsv2Info_t info_L; //!< Info on the IChol matrix 141 | csrsv2Info_t info_Lt; //!< Info on the transpose fo the IChol matrix 142 | cusparseSolvePolicy_t policy_R; //!< Solver policy for R 143 | cusparseSolvePolicy_t policy_L; //!< Solver policy for L 144 | cusparseSolvePolicy_t policy_Lt; //!< Solver policy for L^T 145 | 146 | int pBufferSize; //!< Buffer size for cuSPARSE oeprations 147 | 148 | float *Scratch1; //!< Scratch vector for in-place calculations (size 6*N) 149 | float *Scratch2; //!< Scratch vector for in-place calculations (size 17*N) 150 | float *Scratch3; //!< Scratch vector for re-ordering values (size nnz) 151 | 152 | int *prcm; //!< Reverse-Cuthill-McKee permutation vector 153 | 154 | int *HasNeigh; //!< List for whether a particle has neighbors or not 155 | float *Diag; //!< Diagonal preconditioner for Brownian calculation 156 | 157 | float ichol_relaxer; //!< magnitude of term to add to diagonal for IChol 158 | bool ichol_converged; //!< flag for whether the incomplete Cholesky converged 159 | 160 | 161 | // Interparticle force parameters 162 | float m_ndsr; //non-dimensional shear rate 163 | float m_k_n; //collision spring const 164 | float m_kappa; //inverse Debye length for electrostatic repulsion 165 | float m_beta; // ratio of Hamaker constant and electrostatic force scale 166 | float m_epsq; // square root of the regularization term for vdW 167 | // float m_sqm_B1; // coef for the B1 mode of spherical squirmers 168 | // float m_sqm_B2; // coef for the B2 mode of spherical squirmers 169 | 170 | }; 171 | 172 | //! Declare a structure to hold work spaces required throughout the calculations 173 | struct WorkData 174 | { 175 | 176 | cublasHandle_t blasHandle; //!< Opaque handle for cuBLAS operations //zhoge: was in res_data 177 | 178 | //zhoge: RK2 midstep storage 179 | Scalar4 *pos_rk1; 180 | Scalar3 *ori_rk1; 181 | 182 | // Dot product partial sum 183 | Scalar *dot_sum; //!< Partial dot product sum 184 | float *bro_gauss; //zhoge: Gaussian random variables 185 | 186 | // Variables for far-field Lanczos iteration 187 | Scalar4 *bro_ff_psi; //!< (DEVICE) random vector for far-field real space 188 | Scalar4 *bro_ff_UBreal; //!< (DEVICE) real space far-field Brownian displacement 189 | Scalar4 *bro_ff_Mpsi; //!< (DEVICE) Product of mobility with the random vector 190 | 191 | //zhoge: re-implement ff Chow & Saad 192 | Scalar *bro_ff_V1; //!< (DEVICE) Basis vectors for Lanczos iteration 193 | Scalar *bro_ff_UB_new1; //!< (DEVICE) Old value of displacement 194 | Scalar *bro_ff_UB_old1; //!< (DEVICE) Old value of displacement 195 | 196 | // Variables for near-field Lanczos iteration 197 | Scalar *bro_nf_Tm; //!< (DEVICE) Tri-diagonal matrix for square root calculation 198 | Scalar *bro_nf_V; //!< (DEVICE) Basis vectors for Lanczos iteration 199 | Scalar *bro_nf_FB_old; //!< (DEVICE) Old value of displacement 200 | Scalar *bro_nf_psi; //!< (DEVICE) Random vector for near-field Brownian calculation 201 | 202 | Scalar *saddle_psi; //!< (DEVICE) Random vector for RFD 203 | Scalar4 *saddle_posPrime; //!< (DEVICE) Displaced position for RFD 204 | Scalar *saddle_rhs; //!< (DEVICE) Saddle point solve right-hand side 205 | Scalar *saddle_solution; //!< (DEVICE) Saddle point solve solution vector 206 | 207 | Scalar4 *mob_couplet; //!< (DEVICE) Placeholder for couplet 208 | Scalar4 *mob_delu; //!< (DEVICE) Placeholder for velocity gradient 209 | Scalar4 *mob_vel1; //!< (DEVICE) Placeholder for velocity 210 | Scalar4 *mob_vel2; //!< (DEVICE) Another 211 | Scalar4 *mob_delu1; //!< (DEVICE) Placeholder for velocity gradient 212 | Scalar4 *mob_delu2; //!< (DEVICE) Another 213 | Scalar4 *mob_vel; //!< (DEVICE) Storage for velocity 214 | Scalar4 *mob_AngvelStrain; //!< (DEVICE) Storage for angular velocity and rate of strain 215 | Scalar4 *mob_net_force; //!< (DEVICE) Storage for net force 216 | Scalar4 *mob_TorqueStress; //!< (DEVICE) Storage for torque and stresslet 217 | 218 | 219 | int *precond_scratch; //!< (DEVICE) Placeholder for preconditioning copies 220 | int *precond_map; //!< (DEVICE) Map for RCM reordering 221 | Scalar *precond_backup; //!< (DEVICE) Backup IChol values if need to increase diagonal 222 | 223 | }; 224 | 225 | 226 | #endif 227 | -------------------------------------------------------------------------------- /source/Helper_Brownian.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | // Zhouyang Ge 5 | 6 | /*! \file Helper_Brownian.cuh 7 | \brief Declares GPU kernel code for helper functions in Brownian calculations. 8 | */ 9 | #include "hoomd/ParticleData.cuh" 10 | #include "hoomd/HOOMDMath.h" 11 | #include "DataStruct.h" 12 | 13 | #include 14 | 15 | //! Define the step_one kernel 16 | #ifndef __HELPER_BROWNIAN_CUH__ 17 | #define __HELPER_BROWNIAN_CUH__ 18 | 19 | //! Definition for comxplex variable storage 20 | #ifdef SINGLE_PRECISION 21 | #define CUFFTCOMPLEX cufftComplex 22 | #else 23 | #define CUFFTCOMPLEX cufftComplex 24 | #endif 25 | 26 | __global__ void Brownian_FarField_Dot1of2_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar *dot_sum, unsigned int group_size, unsigned int *d_group_members); 27 | 28 | __global__ void Brownian_FarField_Dot2of2_kernel(Scalar *dot_sum, unsigned int num_partial_sums); 29 | 30 | __global__ void Brownian_FarField_LanczosMatrixMultiply_kernel(Scalar4 *d_A, Scalar *d_x, Scalar4 *d_b, unsigned int group_size, int m); 31 | 32 | __global__ void Brownian_NearField_LanczosMatrixMultiply_kernel( 33 | Scalar *d_A, 34 | Scalar *d_x, 35 | Scalar *d_b, 36 | unsigned int group_size, 37 | int numel, 38 | int m 39 | ); 40 | 41 | __global__ void Brownian_FarField_AddGrids_kernel(CUFFTCOMPLEX *d_a, CUFFTCOMPLEX *d_b, CUFFTCOMPLEX *d_c, unsigned int NxNyNz); 42 | 43 | __global__ void Brownian_Farfield_LinearCombinationFTS_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar4 *d_c, Scalar coeff_a, Scalar coeff_b, unsigned int group_size, unsigned int *d_group_members); 44 | 45 | //void Brownian_Sqrt( 46 | // int m, 47 | // float *alpha, 48 | // float *beta, 49 | // float *alpha_save, 50 | // float *beta_save, 51 | // float *W, 52 | // float *W1, 53 | // float *Tm, 54 | // float *d_Tm 55 | // ); 56 | 57 | 58 | //zhoge 59 | void Sqrt_multiply( float *d_V, //input 60 | float *h_alpha, //input 61 | float *h_beta, //input 62 | float *h_alpha1, //input 63 | float *h_beta1, //input 64 | int m, //input 65 | float *d_y, //output 66 | int numel, 67 | int group_size, 68 | KernelData *ker_data, 69 | WorkData *work_data ); 70 | 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /source/Helper_Debug.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Helper_Debug.cuh 6 | \brief Declares helper functions for error checking and debugging. 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include "DataStruct.h" 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | //! Define the step_one kernel 21 | #ifndef __HELPER_DEBUG_CUH__ 22 | #define __HELPER_DEBUG_CUH__ 23 | 24 | // Error checking 25 | #ifndef __ERRCHK_CUH__ 26 | #define __ERRCHK_CUH__ 27 | 28 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 29 | /*! 30 | \param code returned error code 31 | \param file which file the error occured in 32 | \param line which line error check was tripped 33 | \param abort whether to kill code upon error trigger 34 | */ 35 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 36 | { 37 | if (code != cudaSuccess) 38 | { 39 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 40 | if (abort) exit(code); 41 | } 42 | } 43 | #endif 44 | 45 | //! Definition for comxplex variable storage 46 | #ifdef SINGLE_PRECISION 47 | #define CUFFTCOMPLEX cufftComplex 48 | #else 49 | #define CUFFTCOMPLEX cufftComplex 50 | #endif 51 | 52 | void Debug_HasNaN( float *d_vec, int N ); 53 | 54 | void Debug_HasZeroDiag( float *d_Diag, int N ); 55 | 56 | void Debug_CSRzeroDiag( int *d_RowPtr, int *d_ColInd, float *d_Val, int group_size, int nnz ); 57 | 58 | void Debug_StatusCheck_cuSparse( cusparseStatus_t spStatus, const char *name ); 59 | 60 | void Debug_StatusCheck_cuSolver( cusolverStatus_t soStatus ); 61 | 62 | void Debug_PrintVector_Int( int *d_vec, int N, const char *name ); 63 | 64 | void Debug_PrintVector_Float( float *d_vec, int N, const char *name ); 65 | 66 | void Debug_PrintVector_CSR( float *d_Val, int *d_RowPtr, int *d_ColInd, int nrows, int nnz, const char *name ); 67 | 68 | void Debug_PrintVector_CSR_forMatlab( int *d_RowPtr, int *d_ColInd, float *d_Val, int nrows, int nnz ); 69 | 70 | void Debug_PrintVector_COO( float *d_Val, int *d_RowInd, int *d_ColInd, int nnz, const char *name ); 71 | 72 | void Debug_PrintVector_SpIndexing( const unsigned int *d_n_neigh, const unsigned int *d_offset, const unsigned int *d_NEPP, int N ); 73 | 74 | void Debug_PrintPos( Scalar4 *d_pos, int N ); 75 | 76 | void Debug_Lattice_SpinViscosity( 77 | MobilityData *mob_data, 78 | ResistanceData *res_data, 79 | KernelData *ker_data, 80 | WorkData *work_data, 81 | Scalar4 *d_pos, 82 | unsigned int *d_group_members, 83 | int group_size, 84 | const BoxDim box 85 | ); 86 | 87 | void Debug_Lattice_ShearViscosity( 88 | MobilityData *mob_data, 89 | ResistanceData *res_data, 90 | KernelData *ker_data, 91 | WorkData *work_data, 92 | Scalar4 *d_pos, 93 | unsigned int *d_group_members, 94 | int group_size, 95 | const BoxDim box 96 | ); 97 | 98 | void Debug_Random_Dss1( 99 | ResistanceData *res_data, 100 | KernelData *ker_data, 101 | BrownianData *bro_data, 102 | MobilityData *mob_data, 103 | Scalar4 *d_pos, 104 | unsigned int *d_group_members, 105 | int group_size, 106 | int3 *d_image, 107 | const BoxDim box, 108 | float dt 109 | ); 110 | 111 | void Debug_Random_Dss2( 112 | ResistanceData *res_data, 113 | KernelData *ker_data, 114 | BrownianData *bro_data, 115 | MobilityData *mob_data, 116 | Scalar4 *d_pos, 117 | unsigned int *d_group_members, 118 | int group_size, 119 | int3 *d_image, 120 | const BoxDim box, 121 | float dt 122 | ); 123 | 124 | 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /source/Helper_Integrator.cu: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | #include "Helper_Integrator.cuh" 6 | 7 | #include "hoomd/Saru.h" 8 | #include "hoomd/TextureTools.h" 9 | using namespace hoomd; 10 | 11 | #include 12 | #include 13 | 14 | #include "lapacke.h" 15 | #include "cblas.h" 16 | 17 | #ifdef WIN32 18 | #include 19 | #else 20 | #include 21 | #endif 22 | 23 | 24 | /*! 25 | Helper_Integrator.cu 26 | 27 | Helper functions for saddle point integration 28 | */ 29 | 30 | /*! 31 | Generate random numbers on particles. 32 | 33 | d_psi (output) random vector 34 | n (input) number of particles 35 | timestep (input) length of time step 36 | seed (input) seed for random number generation 37 | 38 | */ 39 | __global__ void Integrator_RFD_RandDisp_kernel( 40 | float *d_psi, 41 | unsigned int N, 42 | const unsigned int seed 43 | ){ 44 | 45 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 46 | 47 | // Check if thread is in bounds 48 | if (idx < N) { 49 | 50 | // Initialize random seed 51 | detail::Saru s(idx, seed); 52 | 53 | // Square root of 3 54 | float sqrt3 = 1.732050807568877; 55 | 56 | // Call the random number generator 57 | float x1 = s.f( -sqrt3, sqrt3 ); 58 | float y1 = s.f( -sqrt3, sqrt3 ); 59 | float z1 = s.f( -sqrt3, sqrt3 ); 60 | float x2 = s.f( -sqrt3, sqrt3 ); 61 | float y2 = s.f( -sqrt3, sqrt3 ); 62 | float z2 = s.f( -sqrt3, sqrt3 ); 63 | 64 | // Write to output 65 | d_psi[ 6*idx + 0 ] = x1; 66 | d_psi[ 6*idx + 1 ] = y1; 67 | d_psi[ 6*idx + 2 ] = z1; 68 | d_psi[ 6*idx + 3 ] = x2; 69 | d_psi[ 6*idx + 4 ] = y2; 70 | d_psi[ 6*idx + 5 ] = z2; 71 | 72 | } 73 | 74 | } 75 | 76 | /*! 77 | The output velocity 78 | 79 | d_b (output) output vector 80 | N (input) number of particles 81 | 82 | */ 83 | __global__ void Integrator_ZeroVelocity_kernel( 84 | float *d_b, 85 | unsigned int N 86 | ){ 87 | 88 | // Thread index 89 | unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; 90 | 91 | // Check if thread is inbounds 92 | if ( tid < N ) { 93 | 94 | d_b[ 6*tid + 0 ] = 0.0; 95 | d_b[ 6*tid + 1 ] = 0.0; 96 | d_b[ 6*tid + 2 ] = 0.0; 97 | d_b[ 6*tid + 3 ] = 0.0; 98 | d_b[ 6*tid + 4 ] = 0.0; 99 | d_b[ 6*tid + 5 ] = 0.0; 100 | 101 | } 102 | } 103 | 104 | /*! 105 | Add rate of strain from shearing to the right-hand side of the saddle point solve 106 | 107 | d_b (input/output) right-hand side vector 108 | shear_rate (input) shear rate of applied deformation 109 | B2 (input) coefficient of B2 mode (spherical squirmers) 110 | d_ori (input) particle orientation (unit vector) 111 | N (input) number of particles 112 | 113 | */ 114 | __global__ void Integrator_AddStrainRate_kernel( 115 | float *d_b, 116 | float shear_rate, 117 | unsigned int *d_group_members, 118 | float B2, 119 | float *d_sqm_B2_mask, 120 | Scalar3 *d_ori, 121 | unsigned int N 122 | ){ 123 | 124 | // Thread index 125 | unsigned int tidx = blockDim.x * blockIdx.x + threadIdx.x; 126 | 127 | // Check if thread is inbounds 128 | if ( tidx < N ) { 129 | 130 | // Particle ID 131 | unsigned int tid = d_group_members[tidx]; 132 | 133 | // Index into array 134 | unsigned int ind = 6*N + 5*tid; 135 | 136 | // Add ambient strain rate Einf (E_xy = E_yx = shear_rate/2, all else 0) 137 | d_b[ ind + 0 ] += 0.0; // E_xx - E_zz 138 | d_b[ ind + 1 ] += shear_rate; // E_xy * 2 139 | d_b[ ind + 2 ] += 0.0; // E_xz * 2 140 | d_b[ ind + 3 ] += 0.0; // E_yz * 2 141 | d_b[ ind + 4 ] += 0.0; // E_yy - E_zz 142 | 143 | // Substract the particle strain rate from Einf 144 | Scalar3 pdir = d_ori[tid]; 145 | Scalar px = pdir.x; 146 | Scalar py = pdir.y; 147 | Scalar pz = pdir.z; 148 | 149 | Scalar b2 = -0.6*B2*d_sqm_B2_mask[tid]; //prefactor for the active strain rate (require radius a=1) 150 | d_b[ ind + 0 ] -= b2*(px*px - pz*pz); 151 | d_b[ ind + 1 ] -= b2*(2.*px*py); 152 | d_b[ ind + 2 ] -= b2*(2.*px*pz); 153 | d_b[ ind + 3 ] -= b2*(2.*py*pz); 154 | d_b[ ind + 4 ] -= b2*(py*py - pz*pz); 155 | 156 | 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /source/Helper_Integrator.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Helper_Integrator.cuh 6 | \brief Declares helper functions for integration. 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | //! Define the step_one kernel 14 | #ifndef __HELPER_INTEGRATOR_CUH__ 15 | #define __HELPER_INTEGRATOR_CUH__ 16 | 17 | //! Definition for comxplex variable storage 18 | #ifdef SINGLE_PRECISION 19 | #define CUFFTCOMPLEX cufftComplex 20 | #else 21 | #define CUFFTCOMPLEX cufftComplex 22 | #endif 23 | 24 | __global__ void Integrator_RFD_RandDisp_kernel( 25 | float *d_psi, 26 | unsigned int N, 27 | const unsigned int seed 28 | ); 29 | __global__ void Integrator_ZeroVelocity_kernel( 30 | float *d_b, 31 | unsigned int N 32 | ); 33 | __global__ void Integrator_AddStrainRate_kernel( 34 | float *d_b, 35 | float shear_rate, 36 | unsigned int *d_group_members, 37 | float B2, 38 | float *d_sqm_B2_mask, 39 | Scalar3 *d_ori, 40 | unsigned int N 41 | ); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /source/Helper_Mobility.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | // Modified by Zhouyang Ge 54 | 55 | 56 | #include "Helper_Mobility.cuh" 57 | 58 | #include 59 | 60 | #ifdef WIN32 61 | #include 62 | #else 63 | #include 64 | #endif 65 | 66 | //! command to convert floats or doubles to integers 67 | #ifdef SINGLE_PRECISION 68 | #define __scalar2int_rd __float2int_rd 69 | #else 70 | #define __scalar2int_rd __double2int_rd 71 | #endif 72 | 73 | 74 | /*! \file Helper_Mobility.cu 75 | \brief Helper functions to perform additions etc., needed in 76 | the mobility calculations 77 | */ 78 | 79 | 80 | /*! 81 | 82 | Zero out the force grid 83 | 84 | grid (input/output) the grid going to be zero out 85 | NxNyNz (input) dimension of the grid 86 | 87 | */ 88 | __global__ void Mobility_ZeroGrid_kernel( 89 | CUFFTCOMPLEX *grid, 90 | unsigned int NxNyNz 91 | ){ 92 | 93 | // Thread index 94 | unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; 95 | 96 | // Check if index is within bounds 97 | if ( tid < NxNyNz ) { 98 | 99 | grid[tid] = make_scalar2( 0.0, 0.0 ); 100 | 101 | } 102 | } 103 | 104 | /*! 105 | Linear combination helper function 106 | C = a*A + b*B 107 | C can be A or B, so that A or B will be overwritten 108 | The fourth element of Scalar4 is not changed! 109 | 110 | d_a (input) input vector, A 111 | d_b (input) input vector, B 112 | d_c (output) output vector, C 113 | coeff_a (input) scaling factor for A, a 114 | coeff_b (input) scaling factor for B, b 115 | group_size (input) length of vectors 116 | d_group_members (input) index into vectors 117 | */ 118 | __global__ void Mobility_LinearCombination_kernel( 119 | Scalar4 *d_a, 120 | Scalar4 *d_b, 121 | Scalar4 *d_c, 122 | Scalar coeff_a, 123 | Scalar coeff_b, 124 | unsigned int group_size, 125 | unsigned int *d_group_members 126 | ){ 127 | 128 | // Thread index 129 | int group_idx = blockDim.x * blockIdx.x + threadIdx.x; 130 | 131 | // Check if thread is within bounds 132 | if (group_idx < group_size){ 133 | 134 | // Get current vector element, using the index (if needed) 135 | unsigned int idx = d_group_members[group_idx]; 136 | Scalar4 A4 = d_a[idx]; 137 | Scalar4 B4 = d_b[idx]; 138 | 139 | // Make scalar3 because we only want to sum the first 140 | // three components 141 | Scalar3 A = make_scalar3(A4.x, A4.y, A4.z); 142 | Scalar3 B = make_scalar3(B4.x, B4.y, B4.z); 143 | 144 | // Addition 145 | A = coeff_a * A + coeff_b * B; 146 | 147 | // Write out 148 | d_c[idx] = make_scalar4(A.x, A.y, A.z, d_c[idx].w); 149 | } 150 | } 151 | 152 | /*! 153 | Direct addition of two scalar4 arrays, where each thread does 154 | work on two adjacent scalar4 elements of the array 155 | 156 | C = a*A + b*B 157 | C can be A or B, so that A or B will be overwritten 158 | The fourth element of Scalar4 is changed! 159 | 160 | d_a (input) input vector, A 161 | d_b (input) input vector, B 162 | d_c (output) output vector, C 163 | coeff_a (input) scaling factor for A, a 164 | coeff_b (input) scaling factor for B, b 165 | group_size (input) length of vectors 166 | 167 | */ 168 | __global__ void Mobility_Add4_kernel( 169 | Scalar4 *d_a, 170 | Scalar4 *d_b, 171 | Scalar4 *d_c, 172 | Scalar coeff_a, 173 | Scalar coeff_b, 174 | unsigned int group_size 175 | ){ 176 | 177 | // Thread index 178 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 179 | 180 | // Check if thread is in bounds 181 | if (idx < group_size) { 182 | 183 | //brownian single particle//// Get first element 184 | //brownian single particle//Scalar4 A = d_a[2*idx]; 185 | //brownian single particle//Scalar4 B = d_b[2*idx]; 186 | //brownian single particle// 187 | //brownian single particle//// Addition for 4 components of the first element 188 | //brownian single particle//A.x = coeff_a * A.x + coeff_b * B.x; 189 | //brownian single particle//A.y = coeff_a * A.y + coeff_b * B.y; 190 | //brownian single particle//A.z = coeff_a * A.z + coeff_b * B.z; 191 | //brownian single particle//A.w = coeff_a * A.w + coeff_b * B.w; 192 | //brownian single particle// 193 | //brownian single particle//// Write out first element 194 | //brownian single particle//d_c[2*idx] = make_scalar4(A.x, A.y, A.z, A.w); 195 | //brownian single particle// 196 | //brownian single particle//// Get second element 197 | //brownian single particle//A = d_a[2*idx+1]; 198 | //brownian single particle//B = d_b[2*idx+1]; 199 | //brownian single particle// 200 | //brownian single particle//// Addition for 4 components of the second element 201 | //brownian single particle//A.x = coeff_a * A.x + coeff_b * B.x; 202 | //brownian single particle//A.y = coeff_a * A.y + coeff_b * B.y; 203 | //brownian single particle//A.z = coeff_a * A.z + coeff_b * B.z; 204 | //brownian single particle//A.w = coeff_a * A.w + coeff_b * B.w; 205 | //brownian single particle// 206 | //brownian single particle//// Write out second element 207 | //brownian single particle//d_c[2*idx+1] = make_scalar4(A.x, A.y, A.z, A.w); 208 | 209 | 210 | //zhoge 211 | int ind = 2*idx; 212 | d_c[ind] = make_scalar4(coeff_a*d_a[ind].x + coeff_b*d_b[ind].x, 213 | coeff_a*d_a[ind].y + coeff_b*d_b[ind].y, 214 | coeff_a*d_a[ind].z + coeff_b*d_b[ind].z, 215 | coeff_a*d_a[ind].w + coeff_b*d_b[ind].w); 216 | 217 | ind += 1; 218 | d_c[ind] = make_scalar4(coeff_a*d_a[ind].x + coeff_b*d_b[ind].x, 219 | coeff_a*d_a[ind].y + coeff_b*d_b[ind].y, 220 | coeff_a*d_a[ind].z + coeff_b*d_b[ind].z, 221 | coeff_a*d_a[ind].w + coeff_b*d_b[ind].w); 222 | 223 | } 224 | } 225 | 226 | /*! 227 | 228 | Helper function to convert velocity gradient to angular velocity and rate of strain 229 | 230 | d_delu (input) velocity gradient 231 | d_omegaE (output) angular velocity and rate of strain 232 | group_size (input) number of particles 233 | 234 | */ 235 | __global__ void Mobility_D2WE_kernel( 236 | Scalar4 *d_delu, 237 | Scalar4 *d_omegaE, 238 | unsigned int group_size 239 | ){ 240 | 241 | // Thread index 242 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 243 | 244 | // Check if thread is in bounds 245 | if (idx < group_size) { 246 | 247 | // Get the current velocity gradient 248 | Scalar4 D[2]; 249 | D[0] = make_scalar4( d_delu[2*idx].x, //E_xx 250 | d_delu[2*idx].y, //E_xy + Omega_z 251 | d_delu[2*idx].z, //E_xz - Omega_y 252 | d_delu[2*idx].w //E_yz + Omega_x 253 | ); 254 | D[1] = make_scalar4( d_delu[2*idx+1].x, //E_yy 255 | d_delu[2*idx+1].y, //E_xx - Omega_z 256 | d_delu[2*idx+1].z, //E_xz + Omega_y 257 | d_delu[2*idx+1].w //E_yz - Omega_x 258 | ); 259 | 260 | // Convert to angular velocity and rate of strain 261 | Scalar W[3]; 262 | Scalar E[5]; 263 | 264 | W[0] = 0.5 * ( D[0].w - D[1].w ); //Omega_x 265 | W[1] = 0.5 * ( D[1].z - D[0].z ); //Omega_y 266 | W[2] = 0.5 * ( D[0].y - D[1].y ); //Omega_z 267 | 268 | E[0] = D[0].x; //E_xx 269 | E[1] = 0.5 * ( D[0].y + D[1].y ); //E_xy 270 | E[2] = 0.5 * ( D[0].z + D[1].z ); //E_xz 271 | E[3] = 0.5 * ( D[0].w + D[1].w ); //E_yz 272 | E[4] = D[1].x; //E_yy 273 | 274 | // Write output 275 | d_omegaE[2*idx] = make_scalar4( W[0], W[1], W[2], 276 | 2*E[0]+E[4] ); //E_xx-E_zz 277 | d_omegaE[2*idx+1] = make_scalar4( 2*E[1], //2*E_xy 278 | 2*E[2], //2*E_xz 279 | 2*E[3], //2*E_yz 280 | 2*E[4]+E[0] ); //E_yy-E_zz 281 | 282 | } 283 | } 284 | 285 | /*! 286 | 287 | Helper function to convert torque and stresslet to couplet 288 | 289 | d_couplet (output) particle couplet 290 | d_ts (input) torque and stresslet 291 | group_size (input) number of particles 292 | 293 | */ 294 | __global__ void Mobility_TS2C_kernel( 295 | Scalar4 *d_couplet, 296 | Scalar4 *d_ts, 297 | unsigned int group_size 298 | ){ 299 | 300 | // Thread index 301 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 302 | 303 | // Check if thread is in bounds 304 | if (idx < group_size) { 305 | 306 | // Get torque and stresslet 307 | // 308 | // Torque is first 3 elements of the 2 scalar4s 309 | // Stresslet is last 5 elements of the 2 scalar4s 310 | Scalar4 TS[2]; 311 | TS[0] = make_scalar4( d_ts[2*idx].x, d_ts[2*idx].y, d_ts[2*idx].z, d_ts[2*idx].w ); 312 | TS[1] = make_scalar4( d_ts[2*idx+1].x, d_ts[2*idx+1].y, d_ts[2*idx+1].z, d_ts[2*idx+1].w ); 313 | 314 | // zhoge: times -1 to effectively transpose C (because the sign of S is corrected) 315 | Scalar Lx = -TS[0].x; 316 | Scalar Ly = -TS[0].y; 317 | Scalar Lz = -TS[0].z; 318 | 319 | 320 | Scalar Sxx = TS[0].w; 321 | Scalar Sxy = TS[1].x; 322 | Scalar Sxz = TS[1].y; 323 | Scalar Syz = TS[1].z; //zhoge: It is not Syy !!! (The html document is wrong) 324 | Scalar Syy = TS[1].w; 325 | 326 | // Compute the couplet from torque and stresslet 327 | Scalar C[8]; 328 | C[0] = Sxx; C[1] = Sxy + 0.5*Lz; C[2] = Sxz - 0.5*Ly; 329 | C[5] = Sxy - 0.5*Lz; C[4] = Syy; C[3] = Syz + 0.5*Lx; 330 | C[6] = Sxz + 0.5*Ly; C[7] = Syz - 0.5*Lx; 331 | 332 | // Write output 333 | d_couplet[2*idx] = make_scalar4( C[0], C[1], C[2], C[3] ); 334 | d_couplet[2*idx+1] = make_scalar4( C[4], C[5], C[6], C[7] ); 335 | } 336 | } 337 | 338 | /*! 339 | Kernel function to calculate position of each grid in reciprocal space 340 | 341 | gridk (output) Fourier space lattice vectors and Stokes flow scaling coefficient 342 | Nx (input) number of grid points in x-direction 343 | Ny (input) number of grid points in y-direction 344 | Nz (input) number of grid points in z-direction 345 | NxNyNz (input) total number of grid points (NxNyNz = Nx*Ny*Nz) 346 | box (input) periodic box information 347 | xi (input) Ewald parameter 348 | eta (input) NUFFT parameter 349 | 350 | */ 351 | __global__ void Mobility_SetGridk_kernel( 352 | Scalar4 *gridk, 353 | int Nx, 354 | int Ny, 355 | int Nz, 356 | unsigned int NxNyNz, 357 | BoxDim box, 358 | Scalar xi, 359 | Scalar eta 360 | ){ 361 | 362 | // Thread index 363 | int tid = blockDim.x * blockIdx.x + threadIdx.x; 364 | 365 | // Check if thread is in bounds 366 | if ( tid < NxNyNz ) { 367 | 368 | // x,y,z coordinates from modulo arithmetic 369 | int i = tid / (Ny*Nz); 370 | int j = (tid - i * Ny * Nz) / Nz; 371 | int k = tid % Nz; 372 | 373 | // Get box and tilt factor . 374 | // 375 | // NOTE: tilt factor assumes only shear in XY 376 | Scalar3 L = box.getL(); 377 | Scalar xy = box.getTiltFactorXY(); 378 | Scalar4 gridk_value; 379 | 380 | // Grid coordinates in x,y,z directions. 381 | // 382 | // NOTE: Assumes only shear in XY 383 | gridk_value.x = (i < (Nx+1) / 2) ? i : i - Nx; 384 | gridk_value.y = ( ((j < (Ny+1) / 2) ? j : j - Ny) - xy * gridk_value.x * L.y / L.x ) / L.y; // Fixed by Zsigi 2015 385 | gridk_value.x = gridk_value.x / L.x; 386 | gridk_value.z = ((k < (Nz+1) / 2) ? k : k - Nz) / L.z; 387 | 388 | // Scale by 2*pi 389 | gridk_value.x *= 2.0*3.1416926536; 390 | gridk_value.y *= 2.0*3.1416926536; 391 | gridk_value.z *= 2.0*3.1416926536; 392 | 393 | // Compute dot(k,k) and xisq once 394 | Scalar k2 = gridk_value.x*gridk_value.x + gridk_value.y*gridk_value.y + gridk_value.z*gridk_value.z; 395 | Scalar xisq = xi * xi; 396 | 397 | // Scaling factor used in wave space sum 398 | if (i == 0 && j == 0 && k == 0){ 399 | gridk_value.w = 0.0; 400 | } 401 | else{ 402 | // Have to divide by Nx*Ny*Nz to normalize the FFTs 403 | gridk_value.w = 6.0*3.1415926536 *(1.0 + k2/4.0/xisq) * expf( -(1-eta) * k2/4.0/xisq ) / ( k2 ) / Scalar( Nx*Ny*Nz ); 404 | } 405 | 406 | // Write output 407 | gridk[tid] = gridk_value; 408 | 409 | } 410 | } 411 | 412 | -------------------------------------------------------------------------------- /source/Helper_Mobility.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | 53 | /*! \file Helper_Mobility.cuh 54 | \brief Declares GPU kernel code for mobility helper functions. 55 | */ 56 | #include "hoomd/ParticleData.cuh" 57 | #include "hoomd/HOOMDMath.h" 58 | 59 | #include 60 | 61 | //! Define the step_one kernel 62 | #ifndef __HELPER_Mobility_CUH__ 63 | #define __HELPER_Mobility_CUH__ 64 | 65 | //! Definition for comxplex variable storage 66 | #ifdef SINGLE_PRECISION 67 | #define CUFFTCOMPLEX cufftComplex 68 | #else 69 | #define CUFFTCOMPLEX cufftComplex 70 | #endif 71 | 72 | __global__ void Mobility_ZeroGrid_kernel(CUFFTCOMPLEX *grid, unsigned int NxNyNz); 73 | 74 | __global__ void Mobility_LinearCombination_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar4 *d_c, Scalar coeff_a, Scalar coeff_b, unsigned int group_size, unsigned int *d_group_members); 75 | 76 | __global__ void Mobility_Add4_kernel(Scalar4 *d_a, Scalar4 *d_b, Scalar4 *d_c, Scalar coeff_a, Scalar coeff_b, unsigned int group_size); 77 | 78 | __global__ void Mobility_TS2C_kernel(Scalar4 *d_couplet, Scalar4 *d_ts, unsigned int group_size); 79 | 80 | __global__ void Mobility_D2WE_kernel(Scalar4 *d_delu, Scalar4 *d_omegaE, unsigned int group_size); 81 | 82 | __global__ void Mobility_SetGridk_kernel(Scalar4 *gridk, int Nx, int Ny, int Nz, unsigned int NxNyNz, BoxDim box, Scalar xi, Scalar eta); 83 | 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /source/Helper_Precondition.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Helper_Precondition.cuh 6 | \brief Declares helper functions for error checking and sparse math. 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include 14 | 15 | //! Define the step_one kernel 16 | #ifndef __HELPER_PRECONDITION_CUH__ 17 | #define __HELPER_PRECONDITION_CUH__ 18 | 19 | //! Definition for comxplex variable storage 20 | #ifdef SINGLE_PRECISION 21 | #define CUFFTCOMPLEX cufftComplex 22 | #else 23 | #define CUFFTCOMPLEX cufftComplex 24 | #endif 25 | 26 | __global__ void Precondition_ZeroVector_kernel( 27 | float *d_b, 28 | const unsigned int nnz, 29 | const unsigned int group_size 30 | ); 31 | 32 | __global__ void Precondition_ApplyRCM_Vector_kernel( 33 | float *d_Scratch_Vector, 34 | float *d_Vector, 35 | const int *d_prcm, 36 | const int length, 37 | const int direction 38 | ); 39 | 40 | __global__ void Precondition_AddInt_kernel( 41 | unsigned int *d_a, 42 | unsigned int *d_b, 43 | unsigned int *d_c, 44 | int coeff_a, 45 | int coeff_b, 46 | unsigned int group_size 47 | ); 48 | 49 | __global__ void Precondition_AddIdentity_kernel( 50 | float *d_L_Val, 51 | int *d_L_RowPtr, 52 | int *d_L_ColInd, 53 | int group_size, 54 | float ichol_relaxer 55 | ); 56 | 57 | __global__ void Precondition_Inn_kernel( 58 | Scalar *d_y, 59 | Scalar *d_x, 60 | int *d_HasNeigh, 61 | int group_size 62 | ); 63 | 64 | __global__ void Precondition_ImInn_kernel( 65 | Scalar *d_y, 66 | Scalar *d_x, 67 | int *d_HasNeigh, 68 | int group_size 69 | ); 70 | 71 | __global__ void Precondition_ExpandPRCM_kernel( 72 | int *d_prcm, 73 | int *d_scratch, 74 | int group_size 75 | ); 76 | 77 | __global__ void Precondition_InitializeMap_kernel( 78 | int *d_map, 79 | int nnz 80 | ); 81 | 82 | __global__ void Precondition_Map_kernel( 83 | float *d_Scratch, 84 | float *d_Val, 85 | int *d_map, 86 | int nnz 87 | ); 88 | 89 | __global__ void Precondition_GetDiags_kernel( 90 | int group_size, 91 | float *d_Diag, 92 | int *d_L_RowPtr, 93 | int *d_L_ColInd, 94 | float *d_L_Val 95 | ); 96 | 97 | __global__ void Precondition_DiagMult_kernel( 98 | float *d_y, // output 99 | float *d_x, // input 100 | int group_size, 101 | float *d_Diag, 102 | int direction 103 | ); 104 | 105 | __global__ void Precondition_ZeroUpperTriangle_kernel( 106 | int *d_RowPtr, 107 | int *d_ColInd, 108 | float *d_Val, 109 | int group_size 110 | ); 111 | 112 | __global__ void Precondition_Lmult_kernel( 113 | float *d_y, 114 | float *d_x, 115 | int *d_RowPtr, 116 | int *d_ColInd, 117 | float *d_Val, 118 | int group_size 119 | ); 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /source/Helper_Saddle.cu: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | 6 | #include "Helper_Saddle.cuh" 7 | 8 | #include 9 | 10 | #include 11 | 12 | #ifdef WIN32 13 | #include 14 | #else 15 | #include 16 | #endif 17 | 18 | //! command to convert floats or doubles to integers 19 | #ifdef SINGLE_PRECISION 20 | #define __scalar2int_rd __float2int_rd 21 | #else 22 | #define __scalar2int_rd __double2int_rd 23 | #endif 24 | 25 | 26 | /*! \file Helper_Saddle.cu 27 | Helper functions to perform the additions and operations required in the saddle point 28 | matrix calculations 29 | */ 30 | 31 | /*! 32 | Zero the output for the saddle point multiplication 33 | 34 | d_b (input/output) vector zeroed upon output 35 | N (input) number of particles 36 | */ 37 | __global__ void Saddle_ZeroOutput_kernel( 38 | float *d_b, 39 | unsigned int N 40 | ){ 41 | 42 | // Thread index 43 | unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x; 44 | 45 | // Check if thread is inbounds 46 | if ( tid < N ) { 47 | 48 | // Do the zeroing 49 | for ( int ii = 0; ii < 17; ii++ ){ 50 | d_b[ 17*tid + ii ] = 0.0; 51 | } 52 | 53 | } 54 | } 55 | 56 | 57 | /*! 58 | Direct addition of two float arrays 59 | 60 | C = a*A + b*B 61 | C can be A or B, so that A or B will be overwritten 62 | 63 | d_a (input) input vector, A 64 | d_b (input) input vector, B 65 | d_c (output) output vector, C 66 | coeff_a (input) scaling factor for A, a 67 | coeff_b (input) scaling factor for B, b 68 | N (input) length of vectors 69 | stride (input) number of repeats 70 | */ 71 | __global__ void Saddle_AddFloat_kernel( float *d_a, 72 | float *d_b, 73 | float *d_c, 74 | float coeff_a, 75 | float coeff_b, 76 | unsigned int N, 77 | int stride 78 | ){ 79 | 80 | // Thread index 81 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 82 | 83 | // Check if thread is in bounds 84 | if (idx < N) { 85 | 86 | for ( int ii = 0; ii < stride; ++ii ){ 87 | 88 | // Index for current striding 89 | int ind = stride * idx + ii; 90 | 91 | // Do addition 92 | d_c[ ind ] = coeff_a * d_a[ ind ] + coeff_b * d_b[ ind ]; 93 | 94 | } 95 | 96 | } 97 | } 98 | 99 | /*! 100 | Split generalized force into force/torque/stresslet 101 | 102 | d_generalF (input) 11N vector of generalized force (force/torque first 6N, stresslet last 5N) 103 | d_net_force (output) linear force 104 | d_TorqueStress (output) torque and stresslet 105 | N (input) number of particles 106 | 107 | */ 108 | __global__ void Saddle_SplitGeneralizedF_kernel( float *d_generalF, 109 | Scalar4 *d_net_force, 110 | Scalar4 *d_TorqueStress, 111 | unsigned int N 112 | ){ 113 | // Thread index 114 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 115 | 116 | // Check if thread is in bounds 117 | if (idx < N) { 118 | 119 | int ind1 = 6*idx; 120 | int ind2 = 6*N + 5*idx; 121 | 122 | // 123 | float f1 = d_generalF[ ind1 + 0 ]; 124 | float f2 = d_generalF[ ind1 + 1 ]; 125 | float f3 = d_generalF[ ind1 + 2 ]; 126 | float l1 = d_generalF[ ind1 + 3 ]; 127 | float l2 = d_generalF[ ind1 + 4 ]; 128 | float l3 = d_generalF[ ind1 + 5 ]; 129 | float s1 = d_generalF[ ind2 + 0 ]; 130 | float s2 = d_generalF[ ind2 + 1 ]; 131 | float s3 = d_generalF[ ind2 + 2 ]; 132 | float s4 = d_generalF[ ind2 + 3 ]; //zhoge: Syz 133 | float s5 = d_generalF[ ind2 + 4 ]; //zhoge: Syy 134 | 135 | d_net_force[ idx ] = make_scalar4( f1, f2, f3, 0.0 ); 136 | d_TorqueStress[ 2*idx + 0 ] = make_scalar4( l1, l2, l3, s1 ); 137 | d_TorqueStress[ 2*idx + 1 ] = make_scalar4( s2, s3, s4, s5 ); 138 | 139 | } 140 | } 141 | 142 | /*! 143 | Combine velocity/angular velocity/rate of strain into generalized velocity 144 | 145 | d_generalU (output) 11N vector of generalized velocity (first 6N) and trate of strain (last 5N) 146 | d_vel (input) linear velocity 147 | d_AngvelStrain (input) angular velocity and rate of strain 148 | N (input) number of particles 149 | 150 | */ 151 | __global__ void Saddle_MakeGeneralizedU_kernel( float *d_generalU, 152 | Scalar4 *d_vel, 153 | Scalar4 *d_AngvelStrain, 154 | unsigned int N 155 | ){ 156 | // Thread index 157 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 158 | 159 | // Check if thread is in bounds 160 | if (idx < N) { 161 | 162 | float4 vel = d_vel[ idx ]; 163 | float4 AS1 = d_AngvelStrain[ 2*idx + 0 ]; 164 | float4 AS2 = d_AngvelStrain[ 2*idx + 1 ]; 165 | 166 | int ind1 = 6*idx; 167 | int ind2 = 6*N + 5*idx; 168 | 169 | d_generalU[ ind1 + 0 ] = vel.x; // U_x - U^infty 170 | d_generalU[ ind1 + 1 ] = vel.y; // U_y - U^infty 171 | d_generalU[ ind1 + 2 ] = vel.z; // U_z - U^infty 172 | d_generalU[ ind1 + 3 ] = AS1.x; // Omega_x - Omega^infty 173 | d_generalU[ ind1 + 4 ] = AS1.y; // Omega_y - Omega^infty 174 | d_generalU[ ind1 + 5 ] = AS1.z; // Omega_z - Omega^infty 175 | d_generalU[ ind2 + 0 ] = AS1.w; // E_xx - E_zz 176 | d_generalU[ ind2 + 1 ] = AS2.x; // E_xy * 2 177 | d_generalU[ ind2 + 2 ] = AS2.y; // E_xz * 2 178 | d_generalU[ ind2 + 3 ] = AS2.z; // E_yz * 2 179 | d_generalU[ ind2 + 4 ] = AS2.w; // E_yy - E_zz 180 | 181 | } 182 | } 183 | 184 | /*! 185 | Copy force/torque to right-hand-side vector of saddle point problem 186 | 187 | d_force (input) 6*N vector of particle force/torque 188 | d_rhs (input/output) 17*N vector of right-hand side vector 189 | N (input) Number of particles 190 | 191 | */ 192 | __global__ void Saddle_force2rhs_kernel( 193 | float *d_force, 194 | float *d_rhs, 195 | unsigned int N 196 | ){ 197 | // Thread index 198 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 199 | 200 | // Check if thread is in bounds 201 | if (idx < N) { 202 | 203 | //zhoge: Here, it directly uses the GPU core index!!! 204 | // It should be d_group_members[idx] to be consistent with the rest!!! 205 | // If we use the global index array for d_group_members, idx is okay 206 | // because both index[idx] = idx (from 0 to N-1). 207 | // However, if we use tag for d_group_members, it will lead to inconsistency 208 | // because tag[idx] != inx. 209 | 210 | 211 | d_rhs[ 11*N + 6*idx + 0 ] -= d_force[ 6*idx + 0 ]; 212 | d_rhs[ 11*N + 6*idx + 1 ] -= d_force[ 6*idx + 1 ]; 213 | d_rhs[ 11*N + 6*idx + 2 ] -= d_force[ 6*idx + 2 ]; 214 | d_rhs[ 11*N + 6*idx + 3 ] -= d_force[ 6*idx + 3 ]; 215 | d_rhs[ 11*N + 6*idx + 4 ] -= d_force[ 6*idx + 4 ]; 216 | d_rhs[ 11*N + 6*idx + 5 ] -= d_force[ 6*idx + 5 ]; 217 | 218 | } 219 | } 220 | 221 | 222 | /*! 223 | Copy velocity out of saddle point solution vector 224 | 225 | d_U (output) 6*N vector of particle linear/angular velocities 226 | d_solution (input) 17*N vector of right-hand side vector 227 | N (input) Number of particles 228 | 229 | */ 230 | __global__ void Saddle_solution2vel_kernel( 231 | float *d_U, 232 | float *d_solution, 233 | unsigned int N 234 | ){ 235 | // Thread index 236 | int idx = blockDim.x * blockIdx.x + threadIdx.x; 237 | 238 | // Check if thread is in bounds 239 | if (idx < N) { 240 | 241 | d_U[ 6*idx + 0 ] = d_solution[ 11*N + 6*idx + 0 ]; 242 | d_U[ 6*idx + 1 ] = d_solution[ 11*N + 6*idx + 1 ]; 243 | d_U[ 6*idx + 2 ] = d_solution[ 11*N + 6*idx + 2 ]; 244 | d_U[ 6*idx + 3 ] = d_solution[ 11*N + 6*idx + 3 ]; 245 | d_U[ 6*idx + 4 ] = d_solution[ 11*N + 6*idx + 4 ]; 246 | d_U[ 6*idx + 5 ] = d_solution[ 11*N + 6*idx + 5 ]; 247 | 248 | } 249 | } 250 | 251 | 252 | 253 | -------------------------------------------------------------------------------- /source/Helper_Saddle.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Helper_Saddle.cuh 6 | \brief Declared helper functions for saddle point calculations 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include 14 | #include "cusparse.h" 15 | 16 | //! Define the step_one kernel 17 | #ifndef __HELPER_SADDLE_CUH__ 18 | #define __HELPER_SADDLE_CUH__ 19 | 20 | //! Definition for comxplex variable storage 21 | #ifdef SINGLE_PRECISION 22 | #define CUFFTCOMPLEX cufftComplex 23 | #else 24 | #define CUFFTCOMPLEX cufftComplex 25 | #endif 26 | 27 | __global__ void Saddle_ZeroOutput_kernel( 28 | float *d_b, 29 | unsigned int N 30 | ); 31 | 32 | __global__ void Saddle_AddFloat_kernel( float *d_a, 33 | float *d_b, 34 | float *d_c, 35 | float coeff_a, 36 | float coeff_b, 37 | unsigned int N, 38 | int stride 39 | ); 40 | 41 | __global__ void Saddle_SplitGeneralizedF_kernel( float *d_GeneralF, 42 | Scalar4 *d_net_force, 43 | Scalar4 *d_TorqueStress, 44 | unsigned int N 45 | ); 46 | 47 | __global__ void Saddle_MakeGeneralizedU_kernel( float *d_GeneralU, 48 | Scalar4 *d_vel, 49 | Scalar4 *d_AngvelStrain, 50 | unsigned int N 51 | ); 52 | 53 | 54 | __global__ void Saddle_force2rhs_kernel( 55 | float *d_force, 56 | float *d_rhs, 57 | unsigned int N 58 | ); 59 | 60 | __global__ void Saddle_solution2vel_kernel( 61 | float *d_U, 62 | float *d_solution, 63 | unsigned int N 64 | ); 65 | 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /source/Helper_Stokes.cu: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | #include "Helper_Stokes.cuh" //zhoge: This includes HOOMDMath.h, which includes cmath 6 | 7 | #include 8 | 9 | #ifdef WIN32 10 | #include 11 | #else 12 | #include 13 | #endif 14 | 15 | //! command to convert floats or doubles to integers 16 | #ifdef SINGLE_PRECISION 17 | #define __scalar2int_rd __float2int_rd 18 | #else 19 | #define __scalar2int_rd __double2int_rd 20 | #endif 21 | 22 | 23 | /*! \file Helper_Stokes.cu 24 | \brief Helper functions required for data handling in Stokes.cu 25 | */ 26 | 27 | /*! 28 | Initialize the total applied force and torque using the net_force 29 | vector from HOOMD which contains the contributions from external 30 | and interparticle potentials 31 | 32 | d_net_force (input) HOOMD force vector 33 | d_AppliedForce (output) Total force experience by the particles 34 | group_size (input) length of vectors 35 | d_group_members (input) index into vectors 36 | 37 | */ 38 | __global__ void Stokes_SetForce_kernel( 39 | Scalar4 *d_net_force, 40 | float *d_AppliedForce, 41 | unsigned int group_size, 42 | unsigned int *d_group_members 43 | ){ 44 | 45 | // Thread idx 46 | int tidx = blockDim.x * blockIdx.x + threadIdx.x; 47 | 48 | // Do work if thread is in bounds 49 | if (tidx < group_size) { 50 | 51 | unsigned int idx = d_group_members[ tidx ]; 52 | 53 | Scalar4 net_force = d_net_force[ idx ]; 54 | 55 | d_AppliedForce[ 6*idx ] = net_force.x; 56 | d_AppliedForce[ 6*idx + 1 ] = net_force.y; 57 | d_AppliedForce[ 6*idx + 2 ] = net_force.z; 58 | d_AppliedForce[ 6*idx + 3 ] = 0.0; 59 | d_AppliedForce[ 6*idx + 4 ] = 0.0; 60 | d_AppliedForce[ 6*idx + 5 ] = 0.0; 61 | 62 | } 63 | } 64 | 65 | __global__ void Stokes_SetForce_manually_kernel( 66 | const Scalar4 *d_pos, //input 67 | Scalar3 *d_ori, //input 68 | float *d_AppliedForce, //output 69 | unsigned int group_size, 70 | unsigned int *d_group_members, 71 | const unsigned int *d_nneigh, 72 | unsigned int *d_nlist, 73 | const unsigned int *d_headlist, 74 | const float ndsr, 75 | const float k_n, 76 | const float kappa, 77 | const float beta, 78 | const float epsq, 79 | Scalar T_ext, 80 | const BoxDim box 81 | ){ 82 | 83 | // Thread idx 84 | int tidx = blockDim.x * blockIdx.x + threadIdx.x; 85 | 86 | // Do work if thread is in bounds 87 | if (tidx < group_size) { 88 | 89 | unsigned int idx = d_group_members[ tidx ]; 90 | 91 | Scalar4 posi = d_pos[idx]; // position 92 | Scalar3 pdir = d_ori[idx]; // orientation 93 | 94 | // Interparticle force parameters 95 | float h_rough = sqrt(epsq); //roughness height 96 | float rcol = 2.0 + 1.0*h_rough; //collision cutoff 97 | float F_0 = 1.0/ndsr; //repulsive force scale (ASSUMING sr = 1.0) 98 | float Hamaker = F_0*beta; //Hamaker constant for vdW 99 | //float T_ext = 1.0; //magnitude of the external torque 100 | 101 | // Interparticle force 102 | float F_x = 0.; 103 | float F_y = 0.; 104 | float F_z = 0.; 105 | 106 | ////test single Brownian particle in a potential (equilibrium at center) 107 | //F_x = -0.1*posi.x; 108 | //F_y = -0.1*posi.y; 109 | //F_z = -0.1*posi.z; 110 | 111 | // External torque (rotate particles to align with the z-dir) 112 | //float T_x = T_ext * ( pdir.y); 113 | //float T_y = T_ext * (-pdir.x); 114 | //float T_z = T_ext * ( 0. ); 115 | // (along z direction) 116 | float T_x = T_ext * 0.; 117 | float T_y = T_ext * 0.; 118 | float T_z = T_ext * 1.; 119 | 120 | // Neighborlist arrays 121 | unsigned int head_idx = d_headlist[ idx ]; // Location in head array for neighbors of current particle 122 | unsigned int n_neigh = d_nneigh[ idx ]; // Number of neighbors of the nearest particle 123 | 124 | 125 | for (unsigned int neigh_idx = 0; neigh_idx < n_neigh; neigh_idx++) { 126 | 127 | // Get the current neighbor index 128 | unsigned int curr_neigh = d_nlist[ head_idx + neigh_idx ]; 129 | 130 | Scalar4 posj = d_pos[curr_neigh]; // position 131 | Scalar3 R = make_scalar3( posj.x - posi.x, posj.y - posi.y, posj.z - posi.z ); // distance vector 132 | R = box.minImage(R); //periodic BC 133 | Scalar distSqr = dot(R,R); 134 | Scalar dist = sqrtf( distSqr ); // Distance magnitude 135 | 136 | Scalar gap1 = dist - rcol; //surface gap for interparticle forces 137 | 138 | float F_app_mag = 0.; //applied force magnitude 139 | 140 | ////mouad swim3 141 | //F_app_mag = 0.1*(dist - (2.0 + 10.0/kappa)); //an attractive spring force 142 | //F_app_mag = 1.0*(dist - 3.0); //spring force with a prescribed dist_eq (test Brownian pair) 143 | 144 | // vdW and electrostatic repulsion 145 | if (gap1 >= 0. && dist <= 2.0 + 10.0/kappa) 146 | F_app_mag = Hamaker/(12.*(gap1*gap1 + epsq)) - F_0 * expf (-kappa * gap1); //attraction is positive, repulsion is negative 147 | 148 | // Max vdW - Max electrostatic repulsion - Collision 149 | if (gap1 < 0.) 150 | F_app_mag = Hamaker/(12.*epsq) - F_0 - k_n * abs(gap1); 151 | 152 | // Normal vector 153 | float normalx = R.x/dist; //from center to neighbor 154 | float normaly = R.y/dist; //from center to neighbor 155 | float normalz = R.z/dist; //from center to neighbor 156 | 157 | // Accumulate the collision/repulsive forces 158 | F_x += F_app_mag * normalx; 159 | F_y += F_app_mag * normaly; 160 | F_z += F_app_mag * normalz; 161 | 162 | } //neighbor particle 163 | 164 | d_AppliedForce[ 6*idx ] = F_x; 165 | d_AppliedForce[ 6*idx + 1 ] = F_y; 166 | d_AppliedForce[ 6*idx + 2 ] = F_z; 167 | d_AppliedForce[ 6*idx + 3 ] = T_x;//0.0; 168 | d_AppliedForce[ 6*idx + 4 ] = T_y;//0.0; 169 | d_AppliedForce[ 6*idx + 5 ] = T_z;//0.0; 170 | 171 | } 172 | } 173 | 174 | /*! 175 | Copy velocity computed from solving the hydrodynamic problem 176 | to the HOOMD velocity array 177 | 178 | d_vel (output) HOOMD velocity vector 179 | d_Velocity (input) Velocity computed from hydrodynamics 180 | group_size (input) length of vectors 181 | d_group_members (input) index into vectors 182 | 183 | */ 184 | __global__ void Stokes_SetVelocity_kernel( 185 | Scalar4 *d_vel, 186 | Scalar4 *d_omg, 187 | float *d_Velocity, 188 | unsigned int group_size, 189 | unsigned int *d_group_members 190 | ){ 191 | 192 | // Thread idx 193 | int tidx = blockDim.x * blockIdx.x + threadIdx.x; 194 | 195 | // Do work if thread is in bounds 196 | if (tidx < group_size) { 197 | 198 | Scalar4 vel,omg; 199 | 200 | unsigned int idx = d_group_members[ tidx ]; 201 | unsigned int idx0 = 6*idx; 202 | //unsigned int idx1 = 6*group_size + 5*idx; 203 | 204 | vel.x = d_Velocity[ idx0 ]; 205 | vel.y = d_Velocity[ idx0 + 1 ]; 206 | vel.z = d_Velocity[ idx0 + 2 ]; 207 | omg.x = d_Velocity[ idx0 + 3 ]; 208 | omg.y = d_Velocity[ idx0 + 4 ]; 209 | omg.z = d_Velocity[ idx0 + 5 ]; 210 | 211 | d_vel[ idx ] = make_scalar4( vel.x, vel.y, vel.z, 0. ); 212 | d_omg[ idx ] = make_scalar4( omg.x, omg.y, omg.z, 0. ); 213 | 214 | } 215 | } 216 | 217 | -------------------------------------------------------------------------------- /source/Helper_Stokes.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Helper_Stokes.cuh 6 | \brief Declares GPU kernel code for helper functions integration considering hydrodynamic interactions on the GPU. Used by Stokes. 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | //! Define the step_one kernel 14 | #ifndef __HELPER_STOKES_CUH__ 15 | #define __HELPER_STOKES_CUH__ 16 | 17 | //! Definition for comxplex variable storage 18 | #ifdef SINGLE_PRECISION 19 | #define CUFFTCOMPLEX cufftComplex 20 | #else 21 | #define CUFFTCOMPLEX cufftComplex 22 | #endif 23 | 24 | 25 | __global__ void Stokes_SetForce_kernel( 26 | Scalar4 *d_net_force, 27 | float *d_AppliedForce, 28 | unsigned int group_size, 29 | unsigned int *d_group_members 30 | ); 31 | 32 | __global__ void Stokes_SetForce_manually_kernel( 33 | const Scalar4 *d_pos, 34 | Scalar3 *d_ori, 35 | float *d_AppliedForce, 36 | unsigned int group_size, 37 | unsigned int *d_group_members, 38 | const unsigned int *d_nneigh, 39 | unsigned int *d_nlist, 40 | const unsigned int *d_headlist, 41 | const float ndsr, 42 | const float k_n, 43 | const float kappa, 44 | const float beta, 45 | const float epsq, 46 | Scalar T_ext, 47 | const BoxDim box 48 | ); 49 | 50 | 51 | __global__ void Stokes_SetVelocity_kernel( 52 | Scalar4 *d_vel, 53 | Scalar4 *d_omg, 54 | float *d_Velocity, 55 | unsigned int group_size, 56 | unsigned int *d_group_members 57 | ); 58 | 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /source/Integrator.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | // Zhouyang Ge 5 | 6 | /*! \file Integrator.cuh 7 | \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes. 8 | */ 9 | #include "hoomd/ParticleData.cuh" 10 | #include "hoomd/HOOMDMath.h" 11 | 12 | #include 13 | 14 | #include "DataStruct.h" 15 | 16 | #include 17 | #include 18 | 19 | //! Define the kernel 20 | #ifndef __INTEGRATOR_CUH__ 21 | #define __INTEGRATOR_CUH__ 22 | 23 | //! Definition for complex variable storage 24 | #ifdef SINGLE_PRECISION 25 | #define CUFFTCOMPLEX cufftComplex 26 | #else 27 | #define CUFFTCOMPLEX cufftComplex 28 | #endif 29 | 30 | 31 | extern "C" __global__ void Integrator_ExplicitEuler_kernel( 32 | Scalar4 *d_pos_in, 33 | Scalar4 *d_pos_out, 34 | float *d_Velocity, 35 | int3 *d_image, 36 | unsigned int *d_group_members, 37 | unsigned int group_size, 38 | BoxDim box, 39 | Scalar dt 40 | ); 41 | 42 | extern "C" __global__ void Integrator_ExplicitEuler_Shear_kernel( 43 | Scalar4 *d_pos_in, 44 | Scalar3 *d_ori_in, 45 | Scalar4 *d_pos_out, 46 | Scalar3 *d_ori_out, 47 | Scalar4 *d_pos_gb, 48 | float *d_Velocity, 49 | float B1, 50 | float *d_sqm_B1_mask, 51 | Scalar3 *d_noise_ang, 52 | int3 *d_image, 53 | unsigned int *d_group_members, 54 | unsigned int group_size, 55 | BoxDim box, 56 | Scalar dt, 57 | Scalar shear_rate 58 | ); 59 | 60 | extern "C" __global__ void Integrator_ExplicitEuler1_Shear_kernel( 61 | Scalar4 *d_pos_in, 62 | Scalar3 *d_ori_in, 63 | Scalar4 *d_pos_out, 64 | Scalar3 *d_ori_out, 65 | Scalar4 *d_pos_gb, 66 | float *d_Velocity, 67 | float B1, 68 | float *d_sqm_B1_mask, 69 | Scalar3 *d_noise_ang, 70 | int3 *d_image, 71 | unsigned int *d_group_members, 72 | unsigned int group_size, 73 | BoxDim box, 74 | Scalar dt, 75 | Scalar shear_rate 76 | ); 77 | 78 | extern "C" __global__ void Integrator_RK_Shear_kernel(Scalar coef_1, Scalar4 *d_pos_in_1, Scalar3 *d_ori_in_1, 79 | Scalar coef_2, Scalar4 *d_pos_in_2, Scalar3 *d_ori_in_2, 80 | Scalar4 *d_pos_out, Scalar3 *d_ori_out, 81 | Scalar4 *d_pos_gb, 82 | float *d_Velocity, 83 | float B1, 84 | float *d_sqm_B1_mask, 85 | Scalar3 *d_noise_ang, 86 | int3 *d_image, 87 | unsigned int *d_group_members, 88 | unsigned int group_size, 89 | BoxDim box, 90 | Scalar coef_3, Scalar dt, 91 | Scalar shear_rate 92 | ); 93 | 94 | 95 | void Integrator_RFD( 96 | float *d_Divergence, // 11*N (will have some zeros, but they will be ignored later) 97 | Scalar4 *d_pos, 98 | int3 *d_image, 99 | unsigned int *d_group_members, 100 | unsigned int group_size, 101 | const BoxDim& box, 102 | KernelData *ker_data, 103 | BrownianData *bro_data, 104 | MobilityData *mob_data, 105 | ResistanceData *res_data, 106 | WorkData *work_data 107 | ); 108 | 109 | void Integrator_ComputeVelocity( unsigned int timestep, 110 | unsigned int output_period, 111 | float *d_AppliedForce, 112 | float *d_Velocity, 113 | float dt, 114 | float shear_rate, 115 | Scalar4 *d_pos, 116 | float sqm_B2, 117 | float *d_sqm_B2_mask, 118 | Scalar3 *d_ori, 119 | int3 *d_image, 120 | unsigned int *d_group_members, 121 | unsigned int group_size, 122 | const BoxDim& box, 123 | KernelData *ker_data, 124 | BrownianData *bro_data, 125 | MobilityData *mob_data, 126 | ResistanceData *res_data, 127 | WorkData *work_data 128 | ); 129 | 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /source/Lubrication.cuh: -------------------------------------------------------------------------------- 1 | // Maintainer: Andrew Fiore 2 | 3 | /*! \file Lubrication.cuh 4 | \brief Define the GPU kernels and driving functions to compute the Lubrication 5 | interactions. 6 | */ 7 | 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | //! Define the step_one kernel 25 | #ifndef __LUBRICATION_CUH__ 26 | #define __LUBRICATION_CUH__ 27 | 28 | //! Definition for comxplex variable storage 29 | #ifdef SINGLE_PRECISION 30 | #define CUFFTCOMPLEX cufftComplex 31 | #else 32 | #define CUFFTCOMPLEX cufftComplex 33 | #endif 34 | 35 | __global__ void Lubrication_RFU_kernel( 36 | Scalar *d_AppliedForce, // output 37 | const Scalar *d_Velocity, // input 38 | const Scalar4 *d_pos, 39 | unsigned int *d_group_members, 40 | const int group_size, 41 | const BoxDim box, 42 | const unsigned int *d_n_neigh, 43 | unsigned int *d_nlist, 44 | const unsigned int *d_headlist, 45 | const Scalar *d_ResTable_dist, 46 | const Scalar *d_ResTable_vals, 47 | const float ResTable_min, 48 | const float ResTable_dr, 49 | const Scalar rlub 50 | ); 51 | 52 | __global__ void Lubrication_RFE_kernel( 53 | Scalar *d_Force, 54 | Scalar shear_rate, 55 | Scalar4 *d_pos, 56 | float B2, 57 | float *d_sqm_B2_mask, 58 | Scalar3 *d_ori, 59 | unsigned int *d_group_members, 60 | int group_size, 61 | BoxDim box, 62 | const unsigned int *d_n_neigh, 63 | unsigned int *d_nlist, 64 | const unsigned int *d_headlist, 65 | const Scalar *d_ResTable_dist, 66 | const Scalar *d_ResTable_vals, 67 | const float ResTable_min, 68 | const float ResTable_dr, 69 | const Scalar rlub 70 | ); 71 | 72 | __global__ void Lubrication_RSU_kernel( 73 | Scalar *d_Stresslet, 74 | Scalar *d_Velocity, 75 | Scalar4 *d_pos, 76 | unsigned int *d_group_members, 77 | int group_size, 78 | BoxDim box, 79 | const unsigned int *d_n_neigh, 80 | unsigned int *d_nlist, 81 | const unsigned int *d_headlist, 82 | const Scalar *d_ResTable_dist, 83 | const Scalar *d_ResTable_vals, 84 | const float ResTable_min, 85 | const float ResTable_dr, 86 | const Scalar rlub 87 | ); 88 | 89 | __global__ void Lubrication_RSE_kernel( 90 | Scalar *d_Stresslet, 91 | Scalar strain_rate, 92 | float B2, 93 | float *d_sqm_B2_mask, 94 | Scalar3 *d_ori, 95 | int group_size, 96 | unsigned int *d_group_members, 97 | const unsigned int *d_n_neigh, 98 | unsigned int *d_nlist, 99 | const unsigned int *d_headlist, 100 | Scalar4 *d_pos, 101 | BoxDim box, 102 | const Scalar *d_ResTable_dist, 103 | const Scalar *d_ResTable_vals, 104 | const float ResTable_min, 105 | const float ResTable_dr 106 | ); 107 | 108 | __global__ void Lubrication_RSEgeneral_kernel( 109 | Scalar *d_Stresslet, 110 | Scalar *d_Strain, 111 | int group_size, 112 | unsigned int *d_group_members, 113 | const unsigned int *d_n_neigh, 114 | unsigned int *d_nlist, 115 | const unsigned int *d_headlist, 116 | Scalar4 *d_pos, 117 | BoxDim box, 118 | const Scalar *d_ResTable_dist, 119 | const Scalar *d_ResTable_vals, 120 | const float ResTable_min, 121 | const float ResTable_dr 122 | ); 123 | 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /source/Mobility.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Andrew Fiore 52 | 53 | /*! \file Stokes.cuh 54 | \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes. 55 | */ 56 | #include "hoomd/ParticleData.cuh" 57 | #include "hoomd/HOOMDMath.h" 58 | 59 | #include 60 | 61 | #include "DataStruct.h" 62 | 63 | //! Define the step_one kernel 64 | #ifndef __MOBILITY_CUH__ 65 | #define __MOBILITY_CUH__ 66 | 67 | //! Definition for comxplex variable storage 68 | #ifdef SINGLE_PRECISION 69 | #define CUFFTCOMPLEX cufftComplex 70 | #else 71 | #define CUFFTCOMPLEX cufftComplex 72 | #endif 73 | 74 | 75 | void Mobility_MobilityUD( 76 | Scalar4 *d_pos, 77 | Scalar4 *d_vel, 78 | Scalar4 *d_AngvelStrain, 79 | Scalar4 *d_net_force, 80 | Scalar4 *d_TorqueStress, 81 | unsigned int *d_group_members, 82 | unsigned int group_size, 83 | const BoxDim& box, 84 | KernelData *ker_data, 85 | MobilityData *mob_data, 86 | WorkData *work_data 87 | ); 88 | 89 | __global__ void Mobility_RealSpace_kernel( 90 | Scalar4 *d_pos, 91 | Scalar4 *d_vel, 92 | Scalar4 *d_AngvelStrain, 93 | Scalar4 *d_net_force, 94 | Scalar4 *d_TorqueStress, 95 | int group_size, 96 | Scalar xi, 97 | Scalar4 *d_ewaldC1, 98 | Scalar2 self, 99 | Scalar ewald_cut, 100 | int ewald_n, 101 | Scalar ewald_dr, 102 | unsigned int *d_group_members, 103 | BoxDim box, 104 | const unsigned int *d_n_neigh, 105 | const unsigned int *d_nlist, 106 | const unsigned int *d_headlist 107 | ); 108 | 109 | void Mobility_RealSpaceFTS( 110 | Scalar4 *d_pos, 111 | Scalar4 *d_vel, 112 | Scalar4 *d_AngvelStrain, 113 | Scalar4 *d_net_force, 114 | Scalar4 *d_TorqueStress, 115 | Scalar4 *d_couplet, 116 | Scalar4 *d_delu, 117 | unsigned int *d_group_members, 118 | unsigned int group_size, 119 | const BoxDim& box, 120 | Scalar xi, 121 | Scalar ewald_cut, 122 | Scalar ewald_dr, 123 | int ewald_n, 124 | Scalar4 *d_ewaldC1, 125 | Scalar2 self, 126 | const unsigned int *d_n_neigh, 127 | const unsigned int *d_nlist, 128 | const unsigned int *d_headlist, 129 | dim3 grid, 130 | dim3 threads ); 131 | 132 | 133 | __global__ void Mobility_WaveSpace_Spread_kernel( 134 | Scalar4 *d_pos, 135 | Scalar4 *d_net_force, 136 | Scalar4 *d_TorqueStress, 137 | CUFFTCOMPLEX *gridX, 138 | CUFFTCOMPLEX *gridY, 139 | CUFFTCOMPLEX *gridZ, 140 | CUFFTCOMPLEX *gridXX, 141 | CUFFTCOMPLEX *gridXY, 142 | CUFFTCOMPLEX *gridXZ, 143 | CUFFTCOMPLEX *gridYX, 144 | CUFFTCOMPLEX *gridYY, 145 | CUFFTCOMPLEX *gridYZ, 146 | CUFFTCOMPLEX *gridZX, 147 | CUFFTCOMPLEX *gridZY, 148 | int group_size, 149 | int Nx, 150 | int Ny, 151 | int Nz, 152 | unsigned int *d_group_members, 153 | BoxDim box, 154 | const int P, 155 | Scalar3 gridh, 156 | Scalar xi, 157 | Scalar eta, 158 | Scalar prefac, 159 | Scalar expfac 160 | ); 161 | 162 | __global__ void Mobility_WaveSpace_Green_kernel( 163 | CUFFTCOMPLEX *gridX, 164 | CUFFTCOMPLEX *gridY, 165 | CUFFTCOMPLEX *gridZ, 166 | CUFFTCOMPLEX *gridXX, 167 | CUFFTCOMPLEX *gridXY, 168 | CUFFTCOMPLEX *gridXZ, 169 | CUFFTCOMPLEX *gridYX, 170 | CUFFTCOMPLEX *gridYY, 171 | CUFFTCOMPLEX *gridYZ, 172 | CUFFTCOMPLEX *gridZX, 173 | CUFFTCOMPLEX *gridZY, 174 | Scalar4 *gridk, 175 | unsigned int NxNyNz 176 | ); 177 | 178 | __global__ void Mobility_WaveSpace_ContractU( 179 | Scalar4 *d_pos, 180 | Scalar4 *d_vel, 181 | CUFFTCOMPLEX *gridX, 182 | CUFFTCOMPLEX *gridY, 183 | CUFFTCOMPLEX *gridZ, 184 | int group_size, 185 | int Nx, 186 | int Ny, 187 | int Nz, 188 | Scalar xi, 189 | Scalar eta, 190 | unsigned int *d_group_members, 191 | BoxDim box, 192 | const int P, 193 | Scalar3 gridh, 194 | Scalar prefac, 195 | Scalar expfac 196 | ); 197 | 198 | __global__ void Mobility_WaveSpace_ContractD( 199 | Scalar4 *d_pos, 200 | Scalar4 *d_delu, 201 | CUFFTCOMPLEX *gridXX, 202 | CUFFTCOMPLEX *gridXY, 203 | CUFFTCOMPLEX *gridXZ, 204 | CUFFTCOMPLEX *gridYX, 205 | CUFFTCOMPLEX *gridYY, 206 | CUFFTCOMPLEX *gridYZ, 207 | CUFFTCOMPLEX *gridZX, 208 | CUFFTCOMPLEX *gridZY, 209 | int group_size, 210 | int Nx, 211 | int Ny, 212 | int Nz, 213 | Scalar xi, 214 | Scalar eta, 215 | unsigned int *d_group_members, 216 | BoxDim box, 217 | const int P, 218 | Scalar3 gridh, 219 | Scalar prefac, 220 | Scalar expfac 221 | ); 222 | 223 | void Mobility_GeneralizedMobility( 224 | float *d_generalU, 225 | float *d_generalF, 226 | Scalar4 *d_pos, 227 | unsigned int *d_group_members, 228 | unsigned int group_size, 229 | const BoxDim& box, 230 | KernelData *ker_data, 231 | MobilityData *mob_data, 232 | WorkData *work_data 233 | ); 234 | 235 | #endif 236 | -------------------------------------------------------------------------------- /source/Precondition.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Preconditioner.cuh 6 | \brief Define the GPU kernels and driving functions to compute the preconditioner. 7 | */ 8 | 9 | #include "hoomd/ParticleData.cuh" 10 | #include "hoomd/HOOMDMath.h" 11 | 12 | #include 13 | 14 | #include "DataStruct.h" 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | //! Define the step_one kernel 28 | #ifndef __PRECONDITION_CUH__ 29 | #define __PRECONDITION_CUH__ 30 | 31 | //! Definition for comxplex variable storage 32 | #ifdef SINGLE_PRECISION 33 | #define CUFFTCOMPLEX cufftComplex 34 | #else 35 | #define CUFFTCOMPLEX cufftComplex 36 | #endif 37 | 38 | void Precondition_Brownian_RFUmultiply( 39 | float *d_y, // output 40 | float *d_x, // input 41 | const Scalar4 *d_pos, 42 | unsigned int *d_group_members, 43 | const int group_size, 44 | const BoxDim box, 45 | void *pBuffer, 46 | KernelData *ker_data, 47 | ResistanceData *res_data 48 | ); 49 | 50 | void Precondition_Brownian_Undo( 51 | float *d_x, // input/output 52 | int group_size, 53 | KernelData *ker_data, 54 | ResistanceData *res_data 55 | ); 56 | 57 | void Precondition_Saddle_RFUmultiply( 58 | float *d_y, // output 59 | float *d_x, // input 60 | float *d_Scratch, // intermediate storage 61 | const int *d_prcm, 62 | int group_size, 63 | unsigned int nnz, 64 | const int *d_L_RowPtr, 65 | const int *d_L_ColInd, 66 | const float *d_L_Val, 67 | cusparseHandle_t spHandle, 68 | cusparseStatus_t spStatus, 69 | cusparseMatDescr_t descr_L, 70 | csrsv2Info_t info_L, 71 | csrsv2Info_t info_Lt, 72 | const cusparseOperation_t trans_L, 73 | const cusparseOperation_t trans_Lt, 74 | const cusparseSolvePolicy_t policy_L, 75 | const cusparseSolvePolicy_t policy_Lt, 76 | void *pBuffer, 77 | dim3 grid, 78 | dim3 threads 79 | ); 80 | 81 | void Precondition_Wrap( 82 | Scalar4 *d_pos, 83 | unsigned int *d_group_members, 84 | unsigned int group_size, 85 | const BoxDim& box, 86 | KernelData *ker_data, 87 | ResistanceData *res_data, 88 | WorkData *work_data 89 | ); 90 | 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /source/Saddle.cu: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | 6 | #include "Saddle.cuh" 7 | #include "Lubrication.cuh" 8 | #include "Precondition.cuh" 9 | #include "Mobility.cuh" 10 | #include "Solvers.cuh" 11 | #include "Wrappers.cuh" 12 | 13 | #include "Helper_Debug.cuh" 14 | #include "Helper_Mobility.cuh" 15 | #include "Helper_Precondition.cuh" 16 | #include "Helper_Saddle.cuh" 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | #include "hoomd/Saru.h" 24 | #include "hoomd/TextureTools.h" 25 | 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include 37 | 38 | #ifdef WIN32 39 | #include 40 | #else 41 | #include 42 | #endif 43 | 44 | //! command to convert floats or doubles to integers 45 | #ifdef SINGLE_PRECISION 46 | #define __scalar2int_rd __float2int_rd 47 | #else 48 | #define __scalar2int_rd __double2int_rd 49 | #endif 50 | 51 | 52 | /* 53 | Define the saddle point matrix describing Stokesian Dynamics, 54 | i.e. it describes the relationship Ax=b (rather than constructing the matrix A). 55 | */ 56 | 57 | 58 | //! Texture for reading table values 59 | scalar4_tex_t tables1_tex; 60 | //! Texture for reading particle positions 61 | scalar4_tex_t pos_tex; 62 | 63 | /*! 64 | Matrix-vector operation associated with the saddle point matrix solve 65 | 66 | d_b (output) output of matrix-vector product (a vector of size 17N) 67 | d_x (input) input of matrix-vector product 68 | d_pos (input) positions of the particles, actually they are fetched on texture memory 69 | d_group_members (input) index array to global HOOMD tag on each particle 70 | group_size (input) size of the group, i.e. number of particles 71 | box (input) array containing box dimensions 72 | ker_data (input) structure containing information for kernel launches 73 | mob_data (input) structure containing information for mobility calculations 74 | res_data (input) structure containing information for resistance calculation 75 | 76 | */ 77 | 78 | //zhoge// Referenced by cuspSaddle in Wrappers.cuh 79 | 80 | void Saddle_Multiply( 81 | float *d_b, // output 82 | float *d_x, // input 83 | Scalar4 *d_pos, 84 | unsigned int *d_group_members, 85 | unsigned int group_size, 86 | const BoxDim& box, 87 | KernelData *ker_data, 88 | MobilityData *mob_data, 89 | ResistanceData *res_data, 90 | WorkData *work_data 91 | ){ 92 | 93 | // Kernel information 94 | dim3 grid = ker_data->particle_grid; 95 | dim3 threads = ker_data->particle_threads; 96 | 97 | // Set output to zero to start (size 17N) 98 | Saddle_ZeroOutput_kernel<<>>( d_b, group_size ); 99 | 100 | // Do the mobility multiplication, M^ff * F => d_b[0:11N] 101 | Mobility_GeneralizedMobility( 102 | d_b, //output (temporary, modified next) 103 | d_x, //input (generalized forces) 104 | d_pos, 105 | d_group_members, 106 | group_size, 107 | box, 108 | ker_data, 109 | mob_data, 110 | work_data); 111 | //// zhoge: Copy the first 11N entries from d_x to d_b to effectively turn off far-field mobility 112 | //// (need to comment the Mobility_GeneralizedMobility above) 113 | //cudaMemcpy( d_b, d_x, 11*group_size*sizeof(float), cudaMemcpyDeviceToDevice ); 114 | 115 | 116 | // M^ff*F + B*U => RHS[0:11N]. Effectively, d_b[0:6N] += d_x[11N:17N] 117 | Saddle_AddFloat_kernel<<>>(d_b, 118 | &d_x[11*group_size], 119 | d_b, //output 120 | 1.0, 1.0, 121 | group_size, 6 ); 122 | 123 | 124 | 125 | 126 | // Do the resistance multiplication, R_FU^nf * U => d_b[11N:17N] 127 | Lubrication_RFU_kernel<<>>( 128 | &d_b[11*group_size], // output (temporary, modified next) 129 | &d_x[11*group_size], // input (relative velocity) 130 | d_pos, 131 | d_group_members, 132 | group_size, 133 | box, 134 | res_data->nneigh, 135 | res_data->nlist, 136 | res_data->headlist, 137 | res_data->table_dist, 138 | res_data->table_vals, 139 | res_data->table_min, 140 | res_data->table_dr, 141 | res_data->rlub); 142 | 143 | // B^T*F - R_FU*U => RHS[11N:17N]. Effectively, d_b[11N:17N] = d_x[0:6N] - d_b[11N:17N] 144 | Saddle_AddFloat_kernel<<>>( 145 | d_x, 146 | &d_b[11*group_size], 147 | &d_b[11*group_size], //output 148 | 1.0, -1.0, 149 | group_size, 6 ); 150 | 151 | } 152 | 153 | 154 | 155 | /*! 156 | Matrix-vector operation for saddle point preconditioner 157 | x = P \ b 158 | 159 | (zhoge: P \ b means P^-1 * b) 160 | 161 | !!! In order for this to work with cusp, the operator must be 162 | able to do the linear transformation in place! (gmres.inl line 143 in CUSP) 163 | 164 | d_x (output) Solution of preconditioner 165 | d_b (input) RHS of preconditioner solve 166 | group_size (input) size of the group, i.e. number of particles 167 | ker_data (input) structure containing information for kernel launches 168 | res_data (input) structure containing information for resistance calculation 169 | 170 | */ 171 | void Saddle_Preconditioner( 172 | float *d_x, // output 173 | float *d_b, // input 174 | int group_size, 175 | void *pBuffer, 176 | KernelData *ker_data, 177 | ResistanceData *res_data 178 | ){ 179 | 180 | // Get kernel information 181 | dim3 grid = ker_data->particle_grid; 182 | dim3 threads = ker_data->particle_threads; 183 | 184 | // Get pointer to scratch array (size 17N) 185 | float *d_Scratch = res_data->Scratch2; 186 | 187 | // In the preconditioner, M is approximated as identity 188 | // Effectively, d_Scratch[0:11N] = M^(-1) * d_b[0:11N] 189 | cudaMemcpy( d_Scratch, d_b, 11*group_size*sizeof(float), cudaMemcpyDeviceToDevice ); 190 | 191 | // 192 | // Incomplete Cholesky solves (done in place!) 193 | // 194 | // zhoge: output = -S^(-1) * input, where S = -(RFU + I). 195 | 196 | // d_Scratch[11N:] = -S^(-1) * B^T * M^(-1) * d_b[0:11N], where M^(-1) is approximated as identity 197 | Precondition_Saddle_RFUmultiply( 198 | &d_Scratch[11*group_size], // output 199 | d_Scratch, // input 200 | res_data->Scratch1, // intermediate storage 201 | res_data->prcm, 202 | group_size, 203 | res_data->nnz, 204 | res_data->L_RowPtr, 205 | res_data->L_ColInd, 206 | res_data->L_Val, 207 | res_data->spHandle, 208 | res_data->spStatus, 209 | res_data->descr_L, 210 | res_data->info_L, 211 | res_data->info_Lt, 212 | res_data->trans_L, 213 | res_data->trans_Lt, 214 | res_data->policy_L, 215 | res_data->policy_Lt, 216 | pBuffer, 217 | ker_data->particle_grid, 218 | ker_data->particle_threads 219 | ); 220 | 221 | // Effectively, d_Scratch[0:11N] += M^(-1) * B * S^(-1) * B^T * M^(-1) * d_b[0:11N], where M^(-1) is approximated as identity 222 | Saddle_AddFloat_kernel<<>>( d_Scratch, &d_Scratch[11*group_size], d_Scratch, 1.0, -1.0, group_size, 6 ); 223 | 224 | // d_b[0:6N] = -S^(-1) * d_b[11N:] 225 | Precondition_Saddle_RFUmultiply( 226 | d_b, // output (overwrites, but doesn't matter) 227 | &d_b[11*group_size], // input 228 | res_data->Scratch1, // intermediate storage 229 | res_data->prcm, 230 | group_size, 231 | res_data->nnz, 232 | res_data->L_RowPtr, 233 | res_data->L_ColInd, 234 | res_data->L_Val, 235 | res_data->spHandle, 236 | res_data->spStatus, 237 | res_data->descr_L, 238 | res_data->info_L, 239 | res_data->info_Lt, 240 | res_data->trans_L, 241 | res_data->trans_Lt, 242 | res_data->policy_L, 243 | res_data->policy_Lt, 244 | pBuffer, 245 | ker_data->particle_grid, 246 | ker_data->particle_threads 247 | ); 248 | 249 | // Effectively, d_Scratch[0:11N] += - M^(-1) * B * S^(-1) * d_b[11N:], where M^(-1) is approximated as identity 250 | Saddle_AddFloat_kernel<<>>( d_b, d_Scratch, d_Scratch, 1.0, 1.0, group_size, 6 ); 251 | 252 | // d_Scratch[11N:] += S^(-1) * d_b[11N:] 253 | Saddle_AddFloat_kernel<<>>( &d_Scratch[11*group_size], d_b, &d_Scratch[11*group_size], 1.0, -1.0, group_size, 6); 254 | 255 | // Finish, d_x <-- d_Scratch 256 | cudaMemcpy( d_x, d_Scratch, 17*group_size*sizeof(float), cudaMemcpyDeviceToDevice ); 257 | 258 | //// zhoge: uncomment below to effectively turn off the preconditioner (can comment everything above, too) 259 | //cudaMemcpy( d_x, d_b, 17*group_size*sizeof(float), cudaMemcpyDeviceToDevice ); 260 | 261 | // Clean up 262 | d_Scratch = NULL; 263 | 264 | } 265 | -------------------------------------------------------------------------------- /source/Saddle.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Saddle.cuh 6 | \brief Declared functions for saddle point calculations 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include "DataStruct.h" 14 | 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | //! Define the step_one kernel 21 | #ifndef __SADDLE_CUH__ 22 | #define __SADDLE_CUH__ 23 | 24 | //! Definition for comxplex variable storage 25 | #ifdef SINGLE_PRECISION 26 | #define CUFFTCOMPLEX cufftComplex 27 | #else 28 | #define CUFFTCOMPLEX cufftComplex 29 | #endif 30 | 31 | 32 | void Saddle_Multiply( 33 | float *d_b, // output 34 | float *d_x, // input 35 | Scalar4 *d_pos, 36 | unsigned int *d_group_members, 37 | unsigned int group_size, 38 | const BoxDim& box, 39 | KernelData *ker_data, 40 | MobilityData *mob_data, 41 | ResistanceData *res_data, 42 | WorkData *work_data 43 | ); 44 | 45 | void Saddle_Preconditioner( 46 | float *d_x, // Solution 47 | float *d_b, // RHS 48 | int group_size, 49 | void *pBuffer, 50 | KernelData *ker_data, 51 | ResistanceData *res_data 52 | ); 53 | 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /source/ShearFunction.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore 3 | 4 | /*! \file ShearFunction.cc 5 | \brief Defines ShearFunction class and relevant functions 6 | */ 7 | 8 | #ifdef WIN32 9 | #pragma warning( push ) 10 | #pragma warning( disable : 4103 4244 ) 11 | #endif 12 | 13 | #include "ShearFunction.h" 14 | 15 | using namespace std; 16 | 17 | void export_ShearFunction(pybind11::module& m) 18 | { 19 | 20 | pybind11::class_ >( m, "ShearFunction" ) 21 | .def(pybind11::init< >()) 22 | .def("getShearRate", &ShearFunction::getShearRate) 23 | .def("getStrain", &ShearFunction::getStrain) 24 | .def("getOffset", &ShearFunction::getOffset); 25 | 26 | } 27 | 28 | #ifdef WIN32 29 | #pragma warning( pop ) 30 | #endif 31 | -------------------------------------------------------------------------------- /source/ShearFunction.h: -------------------------------------------------------------------------------- 1 | #ifdef NVCC 2 | #error This header cannot be compiled by nvcc 3 | #endif 4 | 5 | #include 6 | 7 | #ifndef __SHEAR_FUNCTION_H__ 8 | #define __SHEAR_FUNCTION_H__ 9 | 10 | #include 11 | 12 | //! Abstract class representing the function of shear rate and shear strain 13 | /*! ShearFunction class, having three public pure virtual functions: 14 | 1) getShearRate; 2) getStrain; and 3) getOffset 15 | This interface can make it easier to add new shear functionality to HOOMD. 16 | Compared with previous approach, we can simply subclass this interface without 17 | changing any existing code or creating a new plugin. 18 | */ 19 | class ShearFunction 20 | { 21 | public: 22 | 23 | //! Get shear rate at certain timestep 24 | /*! \param timestep the timestep 25 | */ 26 | virtual double getShearRate(unsigned int timestep){ return double(0.0); } 27 | 28 | //! Get strain at certain timestep (unwrapped) 29 | /*! \param timestep the timestep 30 | */ 31 | virtual double getStrain(unsigned int timestep){ return double(0.0); } 32 | 33 | //! Get the offset of timestep (typically offset is the timestep when the shear starts) 34 | virtual unsigned int getOffset(){ return int(0); } 35 | 36 | }; 37 | 38 | //! Export the ShearFunction class to python 39 | void export_ShearFunction(pybind11::module& m); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /source/ShearFunctionWrap.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore 3 | 4 | /*! \file ShearFunction.cc 5 | \brief Defines ShearFunction class and relevant functions 6 | */ 7 | 8 | #ifdef WIN32 9 | #pragma warning( push ) 10 | #pragma warning( disable : 4103 4244 ) 11 | #endif 12 | 13 | #include "ShearFunctionWrap.h" 14 | 15 | using namespace std; 16 | 17 | void export_ShearFunctionWrap(pybind11::module& m) 18 | { 19 | 20 | pybind11::class_ >( m, "ShearFunctionWrap", pybind11::base() ) 21 | .def(pybind11::init< >()) 22 | .def("getShearRate", &ShearFunction::getShearRate) 23 | .def("getStrain", &ShearFunction::getStrain) 24 | .def("getOffset", &ShearFunction::getOffset); 25 | 26 | } 27 | 28 | #ifdef WIN32 29 | #pragma warning( pop ) 30 | #endif 31 | -------------------------------------------------------------------------------- /source/ShearFunctionWrap.h: -------------------------------------------------------------------------------- 1 | #ifdef NVCC 2 | #error This header cannot be compiled by nvcc 3 | #endif 4 | 5 | #include 6 | 7 | #ifndef __SHEAR_FUNCTION_WRAP_H__ 8 | #define __SHEAR_FUNCTION_WRAP_H__ 9 | 10 | #include "ShearFunction.h" 11 | 12 | #include 13 | 14 | //! Abstract class representing the function of shear rate and shear strain 15 | /*! ShearFunction class, having three public pure virtual functions: 16 | 1) getShearRate; 2) getStrain; and 3) getOffset 17 | This interface can make it easier to add new shear functionality to HOOMD. 18 | Compared with previous approach, we can simply subclass this interface without 19 | changing any existing code or creating a new plugin. 20 | */ 21 | class ShearFunctionWrap : public ShearFunction 22 | { 23 | public: 24 | 25 | //! Get shear rate at certain timestep 26 | /*! \param timestep the timestep 27 | */ 28 | virtual double getShearRate(unsigned int timestep){ return double(0.0); } 29 | 30 | //! Get strain at certain timestep (unwrapped) 31 | /*! \param timestep the timestep 32 | */ 33 | virtual double getStrain(unsigned int timestep){ return double(0.0); } 34 | 35 | //! Get the offset of timestep (typically offset is the timestep when the shear starts) 36 | virtual unsigned int getOffset(){ return int(0); } 37 | 38 | }; 39 | 40 | 41 | void export_ShearFunctionWrap(pybind11::module& m); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /source/Solvers.cu: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | #include "Wrappers.cuh" 6 | #include "Solvers.cuh" 7 | #include "Lubrication.cuh" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | #ifdef WIN32 27 | #include 28 | #else 29 | #include 30 | #endif 31 | 32 | //! command to convert floats or doubles to integers 33 | #ifdef SINGLE_PRECISION 34 | #define __scalar2int_rd __float2int_rd 35 | #else 36 | #define __scalar2int_rd __double2int_rd 37 | #endif 38 | 39 | /* 40 | Construct the saddle point matrix, the preconditioner matrix, and do the 41 | preconditioned solve, all in one wrapper function. 42 | 43 | d_rhs (input) right-hand side for the saddle point solve 44 | d_solution (output) solution to the saddle point solve 45 | d_pos (input) particle positions 46 | d_group_members (input) indices of particles in the integration group 47 | group_size (input) number of particles 48 | box (input) periodic box information 49 | pBuffer (input) buffer space for cuSPARSE operations in preconditioner 50 | ker_data (input) structure containing information for kernel launches 51 | mob_data (input) structure containing information for mobility calculation 52 | res_data (input) structure containing information for resistance calculation 53 | 54 | */ 55 | void Solvers_Saddle( 56 | float *d_rhs, 57 | float *d_solution, 58 | Scalar4 *d_pos, 59 | unsigned int *d_group_members, 60 | unsigned int group_size, 61 | const BoxDim& box, 62 | float tol, 63 | void *pBuffer, 64 | KernelData *ker_data, 65 | MobilityData *mob_data, 66 | ResistanceData *res_data, 67 | WorkData *work_data 68 | ){ 69 | 70 | // Set up CUSP saddle point matrix object 71 | cuspSaddle SADDLE( 72 | d_pos, 73 | d_group_members, 74 | group_size, 75 | box, 76 | ker_data, 77 | mob_data, 78 | res_data, 79 | work_data 80 | ); 81 | 82 | // Set up CUSP preconditioner matrix object 83 | cuspSaddlePreconditioner PRECONDITIONER( 84 | group_size, 85 | pBuffer, 86 | ker_data, 87 | res_data 88 | ); 89 | 90 | // Wrap raw pointers for solution (initial guess) and RHS with thrust::device_ptr 91 | thrust::device_ptr d_x( d_solution ); 92 | thrust::device_ptr d_b( d_rhs ); 93 | 94 | // Wrap thrust device pointers in cusp array1d_view 95 | typedef typename cusp::array1d_view< thrust::device_ptr > DeviceArrayView; 96 | DeviceArrayView x (d_x, d_x + 17*group_size ); 97 | DeviceArrayView b (d_b, d_b + 17*group_size ); 98 | 99 | // CUSP Solver Monitor 100 | // rhs vector = b 101 | // tol = 1E-3 (specified in the run.py /zhoge) 102 | // 103 | // Converge if residual norm || b - A*x || <= abs_tol + rel_tol * || b || 104 | // 105 | int iter_limit = 1000; 106 | float rel_tol = tol; 107 | float abs_tol = 0.0; 108 | bool verbose_flag = false; 109 | //cusp::default_monitor monitor(b, iter_limit, tol); 110 | //cusp::verbose_monitor monitor(b, iter_limit, tol); 111 | cusp::monitor monitor(b, iter_limit, rel_tol, abs_tol, verbose_flag); 112 | 113 | // solve the linear system A * x = b using GMRES 114 | // 115 | // Smaller values of the restart parameter reduce memory requirements but 116 | // also worsen the convergence. 117 | int restart = 50; 118 | cusp::krylov::gmres( SADDLE, x, b, restart, monitor, PRECONDITIONER ); 119 | 120 | //std::cout << "Iteration Count: " << monitor.iteration_count() << ", Residual: " << monitor.residual_norm() << std::endl; 121 | if (!monitor.converged()) 122 | { 123 | printf (" GMRES solver failed to converge. Iterations = %5lu, residual = %10.3e \n", 124 | monitor.iteration_count(), monitor.residual_norm() ); 125 | } 126 | 127 | } -------------------------------------------------------------------------------- /source/Solvers.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | /*! \file Saddle_Helper.cuh 6 | \brief Declared functions for saddle point calculations 7 | */ 8 | #include "hoomd/ParticleData.cuh" 9 | #include "hoomd/HOOMDMath.h" 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "DataStruct.h" 16 | 17 | #include 18 | #include 19 | 20 | //! Define the step_one kernel 21 | #ifndef __SOLVERS_CUH__ 22 | #define __SOLVERS_CUH__ 23 | 24 | //! Definition for comxplex variable storage 25 | #ifdef SINGLE_PRECISION 26 | #define CUFFTCOMPLEX cufftComplex 27 | #else 28 | #define CUFFTCOMPLEX cufftComplex 29 | #endif 30 | 31 | void Solvers_Saddle( 32 | float *d_rhs, 33 | float *d_solution, 34 | Scalar4 *d_pos, 35 | unsigned int *d_group_members, 36 | unsigned int group_size, 37 | const BoxDim& box, 38 | float tol, 39 | void *pBuffer, 40 | KernelData *ker_data, 41 | MobilityData *mob_data, 42 | ResistanceData *res_data, 43 | WorkData *work_data 44 | ); 45 | 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /source/SpecificShearFunction.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | // Updated to HOOMD2.x compatibility by Andrew M. Fiore 3 | 4 | /*! \file ShearFunction.cc 5 | \brief Defines ShearFunction class and relevant functions 6 | */ 7 | 8 | #ifdef WIN32 9 | #pragma warning( push ) 10 | #pragma warning( disable : 4103 4244 ) 11 | #endif 12 | 13 | #include "SpecificShearFunction.h" 14 | 15 | using namespace std; 16 | 17 | void export_SpecificShearFunction(pybind11::module& m) 18 | { 19 | 20 | pybind11::class_ >( m, "SinShearFunction", pybind11::base()) 21 | .def(pybind11::init< double, double, unsigned int, double >()); 22 | 23 | pybind11::class_ > (m, "SteadyShearFunction", pybind11::base()) 24 | .def(pybind11::init< double, unsigned int, double >()); 25 | 26 | pybind11::class_ >(m, "ChirpShearFunction", pybind11::base()) 27 | .def(pybind11::init< double, double, double, double, unsigned int, double >()); 28 | 29 | pybind11::class_ >( m, "TukeyWindowFunction", pybind11::base()) 30 | .def(pybind11::init< double, double, unsigned int, double >()); 31 | 32 | pybind11::class_ >(m, "WindowedFunction", pybind11::base()) 33 | .def(pybind11::init< std::shared_ptr, std::shared_ptr >()); 34 | } 35 | 36 | #ifdef WIN32 37 | #pragma warning( pop ) 38 | #endif 39 | -------------------------------------------------------------------------------- /source/SpecificShearFunction.h: -------------------------------------------------------------------------------- 1 | #ifdef NVCC 2 | #error This header cannot be compiled by nvcc 3 | #endif 4 | 5 | #include 6 | 7 | #include "ShearFunction.h" 8 | 9 | #ifndef __SPECIFIC_SHEAR_FUNCTION_H__ 10 | #define __SPECIFIC_SHEAR_FUNCTION_H__ 11 | 12 | #include 13 | 14 | 15 | //! Simple sinusoidal shear implementing the abstract class ShearFunction 16 | class SinShearFunction : public ShearFunction 17 | { 18 | public: 19 | //! Constructor of SinShearFunction class 20 | /*! \param max_shear_rate maximum shear rate 21 | \param frequency real (NOT angular) frequency of oscillatory shear 22 | \param offset the offset of oscillatory shear 23 | \param dt the time interval 24 | */ 25 | SinShearFunction(double max_shear_rate, double frequency, unsigned int offset, double dt) : 26 | ShearFunction(), 27 | m_max_shear_rate(max_shear_rate), 28 | m_frequency(frequency), 29 | m_offset(offset), 30 | m_dt(dt) { } 31 | double getShearRate(unsigned int timestep) { 32 | return m_max_shear_rate * cos( m_frequency * 2 * m_pi * ( (timestep - m_offset) * m_dt ) ); 33 | } 34 | double getStrain(unsigned int timestep) { 35 | return m_max_shear_rate * sin( m_frequency * 2 * m_pi * ( (timestep - m_offset) * m_dt ) ) / m_frequency / 2 / m_pi; 36 | } 37 | unsigned int getOffset() { 38 | return m_offset; 39 | } 40 | private: 41 | const double m_max_shear_rate; //!< maximum shear rate 42 | const double m_frequency; //!< Real frequency, not angular frequency 43 | const unsigned int m_offset; //!< offset of the sinusoidal oscillatory shear 44 | const double m_dt; //!< time step 45 | static constexpr double m_pi = 3.1415926536; 46 | }; 47 | 48 | //! Simple steady shear implementing the abstract class ShearFunction 49 | class SteadyShearFunction : public ShearFunction 50 | { 51 | public: 52 | //! Constructor of SteadyShearFunction 53 | /*! \param shear_rate the shear rate 54 | \param offset the offset of the steady shear 55 | \param the time interval between each timestep 56 | */ 57 | SteadyShearFunction(double shear_rate, unsigned int offset, double dt) : 58 | ShearFunction(), 59 | m_shear_rate(shear_rate), 60 | m_offset(offset), 61 | m_dt(dt) { } 62 | double getShearRate(unsigned int timestep) { 63 | return m_shear_rate; 64 | } 65 | double getStrain(unsigned int timestep) { 66 | return m_shear_rate * (timestep - m_offset) * m_dt; 67 | } 68 | unsigned int getOffset() { 69 | return m_offset; 70 | } 71 | private: 72 | const double m_shear_rate; //!< constant shear rate 73 | const unsigned int m_offset; //!< offset of the steady shear 74 | const double m_dt; //!< time step 75 | }; 76 | 77 | //! Chirp oscillatory shear implementing abstract class ShearFunction 78 | /*! Adjusted from code of Zsigmond Varga, plugin PSEv1a_chirpv2 79 | */ 80 | class ChirpShearFunction : public ShearFunction 81 | { 82 | public: 83 | //! Constructor of ChirpShearFunction class 84 | /*! \param amp the strain amplitude of the chirp shear 85 | \param omega_0 the starting ANGULAR frequency of the shear 86 | \param omega_f the ending ANGULAR frequency of the shear 87 | \param periodT the total time of the chirp run 88 | \param offset the offset of the chirp return 89 | \param dt the time interval between each timestep 90 | */ 91 | ChirpShearFunction(double amp, double omega_0, double omega_f, double periodT, unsigned int offset, double dt) : 92 | ShearFunction(), 93 | m_amp(amp), 94 | m_omega_0(omega_0), 95 | m_omega_f(omega_f), 96 | m_periodT(periodT), 97 | m_offset(offset), 98 | m_dt(dt) { } 99 | double getShearRate(unsigned int timestep) { 100 | double current_omega = getCurrentOmega(timestep); 101 | double current_phase = getCurrentPhase(timestep); 102 | return m_amp * current_omega * cos(current_phase); 103 | } 104 | double getStrain(unsigned int timestep) { 105 | double current_phase = getCurrentPhase(timestep); 106 | return m_amp * sin( current_phase ); 107 | } 108 | unsigned int getOffset() { 109 | return m_offset; 110 | } 111 | private: 112 | double getCurrentOmega(unsigned int timestep) { 113 | return m_omega_0 * exp( m_dt * (timestep - m_offset) * logf(m_omega_f / m_omega_0) / m_periodT ); 114 | } 115 | double getCurrentPhase(unsigned int timestep) { 116 | return m_periodT * m_omega_0 / logf( m_omega_f / m_omega_0 ) * ( exp( m_dt * (timestep - m_offset) * logf(m_omega_f / m_omega_0) / m_periodT ) - 1 ); 117 | } 118 | const double m_amp; //!< Amplitude 119 | const double m_omega_0; //!< Minimum angular frequency 120 | const double m_omega_f; //!< Maximum angular frequency 121 | const double m_periodT; //!< Final time of Chirp 122 | const unsigned int m_offset; //!< offset of the shear 123 | const double m_dt; //!< time step 124 | }; 125 | 126 | 127 | //! Tukey Window Function implementing abstract class ShearFunction 128 | /*! Strictly speaking, this function is not a ShearFunction since it will only be 129 | used as a window function and applied to other ShearFunctions. This class should 130 | never be used by itself. However, since ShearFunction provides all the abstract 131 | functions it needs. We will call this a ShearFunction to avoid duplicate base classes 132 | TODO: Change the names of ShearFunction/getShearRate/getStrain to more general descriptions. 133 | */ 134 | class TukeyWindowFunction : public ShearFunction 135 | { 136 | public: 137 | //! Constructor of TukeyWindowFunction class 138 | /*! \param periodT the total time of the window 139 | \param tukey_param the parameter of Tukey window function, must be within (0, 1] 140 | \param offset the offset of the window 141 | \param dt the time interval between each timestep 142 | */ 143 | TukeyWindowFunction(double periodT, double tukey_param, unsigned int offset, double dt) : 144 | ShearFunction(), 145 | m_periodT(periodT), 146 | m_tukey_param(tukey_param), 147 | m_offset(offset), 148 | m_dt(dt) { 149 | m_omega_value = 2 * m_pi / tukey_param; 150 | } 151 | double getShearRate(unsigned int timestep) { 152 | double rel_time = (timestep - m_offset) * m_dt / m_periodT; // supposed to be within [0,1] 153 | if (rel_time <= 0 || rel_time >= 1) { 154 | return 0; 155 | } 156 | else if (rel_time >= m_tukey_param / 2 && rel_time <= 1 - m_tukey_param / 2) { 157 | return 0; 158 | } 159 | else if (rel_time < 0.5) { 160 | return -( sin( m_omega_value * (rel_time - m_tukey_param / 2) ) ) / 2 * m_omega_value / m_periodT; 161 | } 162 | else { 163 | return -( sin( m_omega_value * (rel_time - 1 + m_tukey_param / 2) ) ) / 2 * m_omega_value / m_periodT; 164 | } 165 | } 166 | double getStrain(unsigned int timestep) { 167 | double rel_time = (timestep - m_offset) * m_dt / m_periodT; // supposed to be within [0,1] 168 | if (rel_time <= 0 || rel_time >= 1) { 169 | return 0; 170 | } 171 | else if (rel_time >= m_tukey_param / 2 && rel_time <= 1 - m_tukey_param / 2) { 172 | return 1; 173 | } 174 | else if (rel_time < 0.5) { 175 | return ( 1 + cos( m_omega_value * (rel_time - m_tukey_param / 2) ) ) / 2; 176 | } 177 | else { 178 | return ( 1 + cos( m_omega_value * (rel_time - 1 + m_tukey_param / 2) ) ) / 2; 179 | } 180 | } 181 | unsigned int getOffset() { 182 | return m_offset; 183 | } 184 | private: 185 | const double m_periodT; //!< The time period of the window 186 | const double m_tukey_param; //!< The parameter of Tukey window function (scales the cosine lobe) 187 | const unsigned int m_offset; //!< offset of the window function 188 | const double m_dt; //!< time step 189 | static constexpr double m_pi = 3.1415926536; 190 | double m_omega_value; //!< omega value of the cosine function 191 | }; 192 | 193 | 194 | //! Windowed ShearFunction: A ShearFunction windowed by a window function (which is also a ShearFunction subclass) 195 | /*! WindowedFunction represents a strain field whose strain is the product of original ShearFunction and the window 196 | function. Therefore, the shear rate satisfies the product rule of derivative. 197 | */ 198 | class WindowedFunction : public ShearFunction 199 | { 200 | public: 201 | //! Constructor of WindowedFunction class 202 | /*! It is recommended to use the same offset for base shear function and window function 203 | \param base_shear_func the base shear function 204 | \param window_func the window function 205 | */ 206 | WindowedFunction(std::shared_ptr base_shear_func, std::shared_ptr window_func) : 207 | ShearFunction(), 208 | m_base_shear_func(base_shear_func), 209 | m_window_func(window_func) { } 210 | double getShearRate(unsigned int timestep) { 211 | return ( m_base_shear_func -> getShearRate(timestep) ) * ( m_window_func -> getStrain(timestep) ) + 212 | ( m_base_shear_func -> getStrain(timestep) ) * ( m_window_func -> getShearRate(timestep) ); 213 | } 214 | double getStrain(unsigned int timestep) { 215 | return ( m_base_shear_func -> getStrain(timestep) ) * ( m_window_func -> getStrain(timestep) ); 216 | } 217 | unsigned int getOffset() { 218 | return m_base_shear_func -> getOffset(); 219 | } 220 | private: 221 | const std::shared_ptr m_base_shear_func; //!< Base shear function 222 | const std::shared_ptr m_window_func; //!< Window function 223 | }; 224 | 225 | 226 | void export_SpecificShearFunction(pybind11::module& m); 227 | 228 | #endif 229 | -------------------------------------------------------------------------------- /source/Stokes.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | // Modified by Zhouyang Ge 54 | 55 | #include "Stokes.cuh" 56 | 57 | #include "Integrator.cuh" 58 | #include "Lubrication.cuh" 59 | #include "Mobility.cuh" 60 | #include "Precondition.cuh" 61 | #include "Wrappers.cuh" 62 | #include "Saddle.cuh" 63 | 64 | #include "Helper_Debug.cuh" 65 | #include "Helper_Mobility.cuh" 66 | #include "Helper_Stokes.cuh" 67 | 68 | #include 69 | #include 70 | 71 | #include 72 | 73 | #include "hoomd/Saru.h" 74 | #include "hoomd/TextureTools.h" 75 | 76 | #include 77 | #include 78 | 79 | #ifdef WIN32 80 | #include 81 | #else 82 | #include 83 | #endif 84 | 85 | 86 | /*! \file Stokes.cu 87 | \brief Defines GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes.cc. 88 | */ 89 | 90 | // Texture for reading table values 91 | scalar4_tex_t tables1_tex; 92 | 93 | /*! 94 | Step one of two-step integrator (step 2 is null) for the overdamped particle dynamics. 95 | Explicit Euler integration of particle positions given a velocity. 96 | 97 | timestep (input) current timestep 98 | output_period (input) output per output_period steps 99 | d_pos (input/ouput) array of particle positions 100 | d_ori (input/ouput) array of particle orientations 101 | d_net_force (input) particle forces 102 | d_vel (output) particle velocities 103 | d_AppliedForce (input/output) Array for force and torque applied on particles 104 | d_Velocity (input/output) Array for linear and angular velocity of particles and stresslets 105 | dt (input) integration time step 106 | m_error (input) calculation error tolerance 107 | shear_rate (input) shear rate in the suspension, if any 108 | block_size (input) number of threads per block for particle-based calculations 109 | d_image (input) array of particle images 110 | d_group_members (input) index of particles within the integration group 111 | group_size (input) number of particles 112 | box (input) periodic box information 113 | bro_data (input) structure containing data for Brownian calculations 114 | mob_data (input) structure containing data for Mobility calculations 115 | res_data (input) structure containing data for lubrication resistance calculations 116 | work_data (input) structure containing data for scratch arrays and workspaces 117 | */ 118 | 119 | cudaError_t Stokes_StepOne( unsigned int timestep, 120 | unsigned int output_period, 121 | Scalar4 *d_pos, 122 | Scalar3 *d_ori, 123 | Scalar4 *d_pos_gb, 124 | //Scalar4 *d_vel, 125 | //Scalar4 *d_omg, 126 | float *d_AppliedForce, 127 | float *d_Velocity, 128 | Scalar sqm_B1, Scalar sqm_B2, 129 | float *d_sqm_B1_mask, 130 | float *d_sqm_B2_mask, 131 | Scalar rot_diff, 132 | Scalar3 *d_noise_ang, 133 | Scalar T_ext, 134 | Scalar dt, 135 | const float m_error, 136 | Scalar shear_rate, 137 | unsigned int block_size, 138 | int3 *d_image, 139 | unsigned int *d_group_members, 140 | unsigned int group_size, 141 | const BoxDim& box, 142 | BrownianData *bro_data, 143 | MobilityData *mob_data, 144 | ResistanceData *res_data, 145 | WorkData *work_data 146 | ){ 147 | 148 | // ******************************************************* 149 | // Pre-calculation setup 150 | // ******************************************************* 151 | 152 | // Set up the blocks and threads to run the particle-based kernels 153 | dim3 grid( (group_size/block_size) + 1, 1, 1 ); 154 | dim3 threads(block_size, 1, 1); 155 | 156 | // Set up the blocks and threads to run the FFT-grid-based kernels 157 | unsigned int NxNyNz = (mob_data->Nx) * (mob_data->Ny) * (mob_data->Nz); 158 | int gridBlockSize = ( NxNyNz > block_size ) ? block_size : NxNyNz; 159 | int gridNBlock = ( NxNyNz + gridBlockSize - 1 ) / gridBlockSize ; 160 | 161 | // Initialize values in the data structure for kernel information 162 | KernelData ker_struct = {grid, 163 | threads, 164 | gridNBlock, 165 | gridBlockSize, 166 | NxNyNz}; 167 | KernelData *ker_data = &ker_struct; 168 | 169 | // Bind the real-space Ewald sum table to textured memory 170 | // One dimension, Read mode: ElementType(Get what we write) 171 | tables1_tex.normalized = false; // Not normalized 172 | tables1_tex.filterMode = cudaFilterModeLinear; // Filter mode: floor of the index 173 | cudaBindTexture(0, tables1_tex, mob_data->ewald_table, sizeof(Scalar4) * ((mob_data->ewald_n)+1)); 174 | 175 | // ******************************************************* 176 | // Get sheared grid vectors 177 | // ******************************************************* 178 | 179 | Mobility_SetGridk_kernel<<>>(mob_data->gridk, //output 180 | mob_data->Nx, 181 | mob_data->Ny, 182 | mob_data->Nz, 183 | NxNyNz, 184 | box, 185 | mob_data->xi, 186 | mob_data->eta); 187 | 188 | // ******************************************************* 189 | // Prepare the preconditioners 190 | // ******************************************************* 191 | 192 | // Build preconditioner (only do once, because it should still be 193 | // sufficiently good for RFD with small displacements) 194 | // zhoge: It mainly does the incomplete Cholesky factorization of P * (\tilde{R}_FU^nf + relaxer*I) * P^T 195 | Precondition_Wrap(d_pos, 196 | d_group_members, 197 | group_size, 198 | box, 199 | ker_data, 200 | res_data, 201 | work_data); 202 | 203 | // Debug_Lattice_SpinViscosity(mob_data,res_data,ker_data,work_data,d_pos,d_group_members,group_size,box); 204 | // Debug_Lattice_ShearViscosity(mob_data,res_data,ker_data,work_data,d_pos,d_group_members,group_size,box); 205 | // cudaUnbindTexture(tables1_tex); 206 | // gpuErrchk(cudaPeekAtLastError()); 207 | // return cudaSuccess; 208 | 209 | 210 | // ******************************************************* 211 | // Solve the hydrodynamic problem and do the integration 212 | // ******************************************************* 213 | 214 | // Set applied force equal to net_force from HOOMD (pair potentials, external potentials, etc.) 215 | Stokes_SetForce_manually_kernel<<>>( 216 | d_pos, //input 217 | d_ori, //input 218 | d_AppliedForce, //output 219 | group_size, 220 | d_group_members, 221 | res_data->nneigh, 222 | res_data->nlist, 223 | res_data->headlist, 224 | res_data->m_ndsr, 225 | res_data->m_k_n, 226 | res_data->m_kappa, 227 | res_data->m_beta, 228 | res_data->m_epsq, 229 | T_ext, 230 | box 231 | ); 232 | 233 | // Compute particle velocities from central RFD + Saddle point solve (in Integrator.cu) 234 | Integrator_ComputeVelocity(timestep, output_period, 235 | d_AppliedForce, 236 | d_Velocity, //output (FSD velocity and stresslet, 11N) 237 | dt, 238 | shear_rate, 239 | d_pos, //input position 240 | sqm_B2, 241 | d_sqm_B2_mask, 242 | d_ori, //input: orientation 243 | d_image, 244 | d_group_members, 245 | group_size, 246 | box, 247 | ker_data, 248 | bro_data, 249 | mob_data, 250 | res_data, 251 | work_data); 252 | 253 | if ( bro_data->T > 0.0 or rot_diff > 0.0 ) //Euler-Maruyama for stochastic simulations 254 | { 255 | // Make the displacement 256 | Integrator_ExplicitEuler_Shear_kernel<<>>(d_pos, //input 257 | d_ori, //input 258 | d_pos, //output (overwrite) 259 | d_ori, //output (overwrite) 260 | d_pos_gb, //input/output global position (updated) 261 | d_Velocity, 262 | sqm_B1, 263 | d_sqm_B1_mask, 264 | d_noise_ang, 265 | d_image, 266 | d_group_members, 267 | group_size, 268 | box, 269 | dt, 270 | shear_rate 271 | ); 272 | } 273 | else //Runge-Kutta for deterministic simulations 274 | { 275 | // RK position storage 276 | Scalar4 *pos_rk1 = work_data->pos_rk1; 277 | Scalar3 *ori_rk1 = work_data->ori_rk1; 278 | 279 | // Make an intermediate displacement 280 | Integrator_ExplicitEuler1_Shear_kernel<<>>(d_pos, //input 281 | d_ori, //input 282 | pos_rk1, //output 283 | ori_rk1, //output 284 | d_pos_gb, //input/output global position (updated) 285 | d_Velocity, 286 | sqm_B1, 287 | d_sqm_B1_mask, 288 | d_noise_ang, 289 | d_image, 290 | d_group_members, 291 | group_size, 292 | box, 293 | dt, 294 | shear_rate 295 | ); 296 | 297 | 298 | // second RK step 299 | 300 | // zhoge: Probably no need to precondition again 301 | Precondition_Wrap(pos_rk1, 302 | d_group_members, 303 | group_size, 304 | box, 305 | ker_data, 306 | res_data, //input/output (pruned neighbor list) 307 | work_data); 308 | 309 | // Get the midstep interparticle force 310 | Stokes_SetForce_manually_kernel<<>>( 311 | pos_rk1, //input 312 | ori_rk1, //input 313 | d_AppliedForce, //output 314 | group_size, 315 | d_group_members, 316 | res_data->nneigh, 317 | res_data->nlist, 318 | res_data->headlist, 319 | res_data->m_ndsr, 320 | res_data->m_k_n, 321 | res_data->m_kappa, 322 | res_data->m_beta, 323 | res_data->m_epsq, 324 | T_ext, 325 | box 326 | ); 327 | 328 | // Compute particle velocities from central RFD + Saddle point solve (in Integrator.cu) 329 | Integrator_ComputeVelocity(timestep, output_period, 330 | d_AppliedForce, 331 | d_Velocity, //output (FSD velocity and stresslet, 11N) 332 | dt/2., 333 | shear_rate, 334 | pos_rk1, //input position 335 | sqm_B2, 336 | d_sqm_B2_mask, 337 | ori_rk1, //input orientation 338 | d_image, 339 | d_group_members, 340 | group_size, 341 | box, 342 | ker_data, 343 | bro_data, 344 | mob_data, 345 | res_data, 346 | work_data); 347 | 348 | // Make the final displacement 349 | Scalar coef_1 = 0.5; 350 | Scalar coef_2 = 0.5; 351 | Scalar coef_3 = 0.5; 352 | 353 | Integrator_RK_Shear_kernel<<>>(coef_1, d_pos,d_ori, //input position/orientation 354 | coef_2, pos_rk1,ori_rk1, //input position/orientation 355 | d_pos,d_ori, //output position/orientation (overwritten) 356 | d_pos_gb, //input/output global position (updated) 357 | d_Velocity, 358 | sqm_B1, 359 | d_sqm_B1_mask, 360 | d_noise_ang, 361 | d_image, 362 | d_group_members, 363 | group_size, 364 | box, 365 | coef_3, dt, 366 | shear_rate 367 | ); 368 | 369 | } 370 | 371 | 372 | // Clean up 373 | cudaUnbindTexture(tables1_tex); 374 | 375 | // Error checking 376 | gpuErrchk(cudaPeekAtLastError()); 377 | 378 | return cudaSuccess; 379 | } -------------------------------------------------------------------------------- /source/Stokes.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Maintainer: joaander 51 | // Modified by Gang Wang 52 | // Modified by Andrew Fiore 53 | // Modified by Zhouyang Ge 54 | 55 | /*! \file Stokes.cuh 56 | \brief Declares GPU kernel code for integration considering hydrodynamic interactions on the GPU. Used by Stokes. 57 | */ 58 | #include "hoomd/ParticleData.cuh" 59 | #include "hoomd/HOOMDMath.h" 60 | 61 | #include 62 | 63 | #include "DataStruct.h" 64 | 65 | #include 66 | #include 67 | 68 | //! Define the step_one kernel 69 | #ifndef __STOKES_CUH__ 70 | #define __STOKES_CUH__ 71 | 72 | //! Definition for comxplex variable storage 73 | #ifdef SINGLE_PRECISION 74 | #define CUFFTCOMPLEX cufftComplex 75 | #else 76 | #define CUFFTCOMPLEX cufftComplex 77 | #endif 78 | 79 | 80 | //! Kernel driver for the first part (no second part) of the Stokes update called by Stokes.cc 81 | cudaError_t Stokes_StepOne( unsigned int timestep, 82 | unsigned int output_period, 83 | Scalar4 *d_pos, 84 | Scalar3 *d_ori, 85 | Scalar4 *d_pos_gb, 86 | //Scalar4 *d_vel, 87 | //Scalar4 *d_omg, 88 | float *d_AppliedForce, 89 | float *d_Velocity, 90 | Scalar sqm_B1, Scalar sqm_B2, 91 | float *d_sqm_B1_mask, 92 | float *d_sqm_B2_mask, 93 | Scalar rot_diff, 94 | Scalar3 *d_noise_ang, 95 | Scalar T_ext, 96 | Scalar dt, 97 | const float m_error, 98 | Scalar shear_rate, 99 | unsigned int block_size, 100 | int3 *d_image, 101 | unsigned int *d_group_members, 102 | unsigned int group_size, 103 | const BoxDim& box, 104 | BrownianData *bro_data, 105 | MobilityData *mob_data, 106 | ResistanceData *res_data, 107 | WorkData *work_data 108 | ); 109 | 110 | 111 | 112 | 113 | cudaError_t Stokes_StepTwo( unsigned int timestep, 114 | Scalar4 *d_pos, 115 | Scalar4 *d_net_force, 116 | Scalar4 *d_buf, // input (buffered position) / output (HOOMD velocity) 117 | float *d_AppliedForce, 118 | float *d_Velocity, 119 | Scalar dt, 120 | const float m_error, 121 | Scalar shear_rate, 122 | unsigned int block_size, 123 | int3 *d_image, 124 | unsigned int *d_group_members, 125 | unsigned int group_size, 126 | const BoxDim& box, 127 | BrownianData *bro_data, 128 | MobilityData *mob_data, 129 | ResistanceData *res_data, 130 | WorkData *work_data 131 | ); 132 | 133 | 134 | 135 | 136 | #endif 137 | -------------------------------------------------------------------------------- /source/Stokes.h: -------------------------------------------------------------------------------- 1 | // Modified by Andrew Fiore 2 | // Modified by Zhouyang Ge 3 | 4 | #ifndef SINGLE_PRECISION 5 | #define CUFFTCOMPLEX cufftComplex 6 | #else 7 | #define CUFFTCOMPLEX cufftComplex 8 | #endif 9 | 10 | #ifndef __STOKES_H__ 11 | #define __STOKES_H__ 12 | 13 | /*! \file Stokes.h 14 | \brief Declares the Stokes class 15 | */ 16 | 17 | #include 18 | //#include //zhoge//RK2////// 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include "cublas_v2.h" 26 | 27 | #include "ShearFunction.h" 28 | 29 | #include 30 | #include 31 | 32 | #ifdef NVCC 33 | #error This header cannot be compiled by nvcc 34 | #endif 35 | 36 | #include 37 | #include // lets us pass arrays from python to c++ 38 | 39 | //! Integrates the system forward considering hydrodynamic interactions by GPU 40 | /*! Implements overdamped integration (one step) through IntegrationMethodTwoStep interface, runs on the GPU 41 | */ 42 | 43 | class Stokes : public IntegrationMethodTwoStep 44 | { 45 | public: 46 | //! Constructs the integration method and associates it with the system 47 | Stokes( std::shared_ptr sysdef, 48 | std::shared_ptr group, 49 | std::shared_ptr T, 50 | unsigned int seed, 51 | std::shared_ptr nlist_ewald, 52 | Scalar xi, 53 | Scalar error, 54 | std::string fileprefix, 55 | int period, 56 | Scalar ndsr, Scalar kappa, Scalar k_n, Scalar beta_AF, Scalar epsq, Scalar sqm_B1, Scalar sqm_B2, 57 | unsigned int N_mix, Scalar coef_B1_mask, Scalar coef_B2_mask, 58 | Scalar rot_diff, Scalar T_ext, Scalar omega_ext //zhoge 59 | ); 60 | 61 | virtual ~Stokes(); 62 | 63 | //! Set a new temperature 64 | /*! \param T new temperature to set */ 65 | void setT(std::shared_ptr T) 66 | { 67 | m_T = T; 68 | } 69 | 70 | //! Performs the first step of the integration 71 | virtual void integrateStepOne(unsigned int timestep); 72 | 73 | //! Performs the second step of the integration 74 | virtual void integrateStepTwo(unsigned int timestep); 75 | 76 | //! Set the table for resistance coefficients 77 | void setResistanceTable(); 78 | 79 | //! Set up the sparse math functions 80 | void setSparseMath(); 81 | 82 | //! Set the parameters for various parts of the calculation (Ewald Sum, Lubrication Tensor) 83 | void setParams(); 84 | 85 | //! Write particle dipoles to file 86 | void OutputData(unsigned int timestep, BoxDim box, Scalar current_shear_rate); 87 | 88 | //! Allocate workspace variables 89 | void AllocateWorkSpaces(); 90 | 91 | //! Free workspace variables 92 | void FreeWorkSpaces(); 93 | 94 | //! Set the friction type 95 | void setFriction( std::string friction_type, float h0, std::vector &alpha ); 96 | 97 | //! Set the shear rate and shear frequency 98 | void setShear(std::shared_ptr shear_func, Scalar max_strain){ 99 | m_shear_func = shear_func; 100 | m_max_strain = max_strain; 101 | } 102 | 103 | 104 | protected: 105 | 106 | std::shared_ptr m_shear_func; //!< mutable shared pointer towards a ShearFunction object 107 | Scalar m_max_strain; //!< Maximum total strain before box resizing 108 | 109 | std::shared_ptr m_T; //!< The Temperature of the Stochastic Bath 110 | unsigned int m_seed; //!< The seed for the RNG of the Stochastic Bath 111 | unsigned int m_seed_ff_rs; //!< The seed for the RNG, far-field, real space 112 | unsigned int m_seed_ff_ws; //!< The seed for the RNG, far-field, wave space 113 | unsigned int m_seed_nf; //!< The seed for the RNG, near-field 114 | unsigned int m_seed_rfd; //!< The seed for the RNG, random finite displacement 115 | 116 | cufftHandle plan; //!< Used for the Fast Fourier Transformations performed on the GPU 117 | 118 | std::shared_ptr m_nlist_ewald; //!< The neighborlist to use for the mobility computation 119 | 120 | unsigned int m_shear_offset; //!< Offset time of the shear 121 | 122 | ////zhoge//RK2////// 123 | //std::shared_ptr m_force; //!< mutable shared pointer 124 | 125 | 126 | // ************************************************************************ 127 | // Declare all variables related to the far-field hydrodynamic calculations 128 | // ************************************************************************ 129 | 130 | Scalar m_xi; //!< ewald splitting parameter xi 131 | Scalar m_ewald_cut; //!< Real space cutoff 132 | GPUArray m_ewaldC1; //!< Real space Ewald coefficients table 133 | int m_ewald_n; //!< Number of entries in table of Ewald coefficients 134 | Scalar m_ewald_dr; //!< Real space Ewald table spacing 135 | 136 | Scalar2 m_self; //!< self piece 137 | 138 | int m_Nx; //!< Number of grid points in x direction 139 | int m_Ny; //!< Number of grid points in y direction 140 | int m_Nz; //!< Number of grid points in z direction 141 | 142 | GPUArray m_gridk; //!< k-vectors for each grid point 143 | GPUArray m_gridX; //!< x component of the grid based force/velocity 144 | GPUArray m_gridY; //!< y component of the grid based force/velocity 145 | GPUArray m_gridZ; //!< z component of the grid based force/velocity 146 | 147 | GPUArray m_gridXX; //!< xx component of the grid based couplet/velocity gradient 148 | GPUArray m_gridXY; //!< xy component of the grid based couplet/velocity gradient 149 | GPUArray m_gridXZ; //!< xz component of the grid based couplet/velocity gradient 150 | GPUArray m_gridYX; //!< yx component of the grid based couplet/velocity gradient 151 | GPUArray m_gridYY; //!< yy component of the grid based couplet/velocity gradient 152 | GPUArray m_gridYZ; //!< yz component of the grid based couplet/velocity gradient 153 | GPUArray m_gridZX; //!< zx component of the grid based couplet/velocity gradient 154 | GPUArray m_gridZY; //!< zy component of the grid based couplet/velocity gradient 155 | 156 | Scalar m_gaussm; //!< Gaussian width in standard deviations for wave space spreading/contraction 157 | int m_gaussP; //!< Number of points in each dimension for Gaussian support 158 | Scalar m_eta; //!< Gaussian spreading parameter 159 | Scalar3 m_gridh; //!< Size of the grid box in 3 direction 160 | 161 | int m_m_Lanczos_ff; //!< Number of Lanczos Iterations to use for calculation of far-field Brownian slip 162 | int m_m_Lanczos_nf; //!< Number of Lanczos Iterations to use for calculation of near-field Brownian force 163 | 164 | float m_rfd_epsilon; //!< epsilon for RFD displacement 165 | 166 | Scalar m_error; //!< Error tolerance for all calculations 167 | 168 | Scalar m_ndsr; // non-dim shear rate (zhoge) 169 | Scalar m_kappa; // inverse Debye length (zhoge) 170 | Scalar m_k_n; // collision spring constant (zhoge) 171 | Scalar m_beta; // ratio of Hamaker constant and electrostatic force scale 172 | Scalar m_epsq; // square root of the regularization term for vdW 173 | Scalar m_sqm_B1; // coef for the B1 mode of spherical squirmers 174 | Scalar m_sqm_B2; // coef for the B2 mode of spherical squirmers 175 | unsigned int m_N_mix; // number of particles in the first group (when having a mixture) 176 | Scalar m_coef_B1_mask; // coef for the B1 mask of spherical squirmers 177 | Scalar m_coef_B2_mask; // coef for the B2 mask of spherical squirmers 178 | GPUArray m_sqm_B1_mask; // mask array for B1 179 | GPUArray m_sqm_B2_mask; // mask array for B2 180 | GPUArray m_noise_ang; // Gaussian noise for the angular velocity 181 | Scalar m_rot_diff; // rotational diffusion coef due to noise 182 | Scalar m_T_ext; // external torque 183 | Scalar m_omega_ext; // external torque angular frequency 184 | 185 | // ****************************************************************** 186 | // Declare all variables for physical quantities (forces, velocities) 187 | // ****************************************************************** 188 | 189 | GPUArray m_AppliedForce; // Force and torque applied to the particles 190 | GPUArray m_Velocity; // Linear velocity, angular velocity, and stresslet of all particles 191 | 192 | // ********************************************************************************* 193 | // Declare all variables related to the lubrication and required sparse calculations 194 | // ********************************************************************************* 195 | 196 | float m_ResTable_min; //!< Minimum distance in the lubrication tabulation 197 | float m_ResTable_dr; //!< Discretization of the lubrication table (in log space) 198 | GPUArray m_ResTable_dist; //!< Distance values used in the lubrication function tabulation 199 | GPUArray m_ResTable_vals; //!< Lubrication function tabulation 200 | 201 | GPUArray m_nneigh_pruned; //!< Number of neighbors for pruned neighborlist 202 | GPUArray m_headlist_pruned; //!< Headlist for pruned neighborlist 203 | GPUArray m_nlist_pruned; //!< Pruned neighborlist 204 | 205 | GPUArray m_nneigh_less; //!< Number of neighbors with index less than particle ID 206 | GPUArray m_NEPP; //!< Number of non-zero entries per particle in sparse matrices 207 | GPUArray m_offset; //!< Particle offset into sparse matrix arrays 208 | 209 | int m_nnz; //!< Number of non-zero entries in RFU preconditioner 210 | 211 | GPUArray m_L_RowInd; //!< Rnf sparse storage ( COO Format - Row Indices ) 212 | GPUArray m_L_RowPtr; //!< Rnf sparse storage ( CSR Format - Row Pointer ) 213 | GPUArray m_L_ColInd; //!< Rnf sparse storage ( COO/CSR Format - Col Indices ) 214 | GPUArray m_L_Val; //!< L sparse storage ( COO/CSR Format - Values ) 215 | 216 | GPUArray m_Diag; //!< Diagonal entries for preconditioner 217 | GPUArray m_HasNeigh; //!< Whether a particle has neighbors or not 218 | 219 | float m_ichol_relaxer; //!< magnitude of term to add to diagonal before IChol to ensure convergence 220 | 221 | cusolverSpHandle_t soHandle; //!< opaque handle fo cuSOLVER operations 222 | 223 | cusparseHandle_t spHandle; //!< Opaque handle for cuSPARSE operations 224 | cusparseStatus_t spStatus; //!< cuSPARSE function success/failure output 225 | cusparseMatDescr_t descr_R; //!< cuSPARSE matrix descriptor for resistance tensor 226 | cusparseMatDescr_t descr_L; //!< cuSPARSE matrix descriptor for lower cholesky of R 227 | cusparseOperation_t trans_L; //!< Transpose option for lower Cholesky factor, L 228 | cusparseOperation_t trans_Lt; //!< Transpose option for upper Cholesky factor, L^T 229 | csric02Info_t info_R; //!< Opaque solver information for cuSPARSE operations on R 230 | csrsv2Info_t info_L; //!< Opaque solver information for cuSPARSE operations on L 231 | csrsv2Info_t info_Lt; //!< Opaque solver information for cuSPARSE operations on L^T 232 | cusparseSolvePolicy_t policy_R; //!< Solve level output for R 233 | cusparseSolvePolicy_t policy_L; //!< Solve level output for L 234 | cusparseSolvePolicy_t policy_Lt; //!< Solve level output for L^T 235 | int m_pBufferSize; //!< Buffer size for cuSPARSE calculations 236 | 237 | GPUArray m_Scratch1; //!< 6*N, Scratch storage for re-ordered matrix-vector multiplication 238 | GPUArray m_Scratch2; //!< 17*N, Scratch storage for saddle point preconditioning 239 | GPUArray m_Scratch3; //!< nnz, Scratch Storage for Value reordering 240 | GPUArray m_prcm; //!< matrix re-ordering vector using Reverse-Cuthill-Mckee (RCM) 241 | 242 | std::string m_fileprefix; //!< output file prefix 243 | int m_period; //!< frequency with which to write output files 244 | 245 | // ********************************************************************************* 246 | // Work space variables for all calculations 247 | // ********************************************************************************* 248 | 249 | cublasHandle_t blasHandle; //!< opaque handle for cuBLAS operations 250 | 251 | 252 | // Dot product partial sum 253 | Scalar *dot_sum; 254 | float *m_work_bro_gauss; //zhoge: Gaussian random variables (type float for cuRand) 255 | 256 | // Variables for far-field Lanczos iteration 257 | Scalar4 *m_work_bro_ff_psi; 258 | Scalar4 *m_work_bro_ff_UBreal; 259 | Scalar4 *m_work_bro_ff_Mpsi; 260 | //zhoge 261 | Scalar *m_work_bro_ff_V1; 262 | Scalar *m_work_bro_ff_UB_new1; 263 | Scalar *m_work_bro_ff_UB_old1; 264 | 265 | Scalar *m_work_rfd_rhs; 266 | Scalar *m_work_rfd_sol; 267 | 268 | Scalar4 *m_work_pos_rk1; 269 | Scalar3 *m_work_ori_rk1; 270 | 271 | 272 | // Variables for near-field Lanczos iteration 273 | Scalar *m_work_bro_nf_Tm; 274 | Scalar *m_work_bro_nf_V; 275 | Scalar *m_work_bro_nf_FB_old; 276 | Scalar *m_work_bro_nf_psi; 277 | 278 | Scalar *m_work_saddle_psi; 279 | Scalar4 *m_work_saddle_posPrime; 280 | Scalar *m_work_saddle_rhs; 281 | Scalar *m_work_saddle_solution; 282 | 283 | Scalar4 *m_work_mob_couplet; 284 | Scalar4 *m_work_mob_delu; 285 | Scalar4 *m_work_mob_vel1; 286 | Scalar4 *m_work_mob_vel2; 287 | Scalar4 *m_work_mob_delu1; 288 | Scalar4 *m_work_mob_delu2; 289 | Scalar4 *m_work_mob_vel; 290 | Scalar4 *m_work_mob_AngvelStrain; 291 | Scalar4 *m_work_mob_net_force; 292 | Scalar4 *m_work_mob_TorqueStress; 293 | 294 | int *m_work_precond_scratch; 295 | int *m_work_precond_map; 296 | Scalar *m_work_precond_backup; 297 | 298 | 299 | }; 300 | 301 | //! Exports the Stokes class to python 302 | void export_Stokes(pybind11::module& m); 303 | 304 | #endif 305 | -------------------------------------------------------------------------------- /source/Stokes_SparseMath.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Stokes.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | // Set up the sparse matrices using CUSPARSE 11 | // 12 | // ***IMPORTANT: To get the cuSPARSE libraries to work, have to link to 13 | // libcusparse.so in the FindHoomd makefile, like so: 14 | // set(HOOMD_LIBRARIES ${HOOMD_LIB} ${HOOMD_COMMON_LIBS} /usr/local/cuda/lib64/libcublas.so /usr/local/cuda/lib64/libcusparse.so ) 15 | void Stokes::setSparseMath(){ 16 | 17 | // Set up the arrays and memory required to store the matrix information 18 | // 19 | // Total Memory required for the arrays declared in this file: 20 | // 21 | // sizeof(float) = sizeof(int) = 4 22 | // 23 | // nnz = 468 * N 24 | // 25 | // Variable Length Type 26 | // -------- ------ ---- 27 | // nneigh_pruned N int 28 | // headlist_pruned N+1 int 29 | // nlist_pruned 13*N int 30 | // L_RowInd nnz int 31 | // L_RowPtr 6*N+1 int 32 | // L_ColInd nnz int 33 | // L_Val nnz float 34 | // HasNeigh N int 35 | // Diag 6*N float 36 | // nneigh_less N int 37 | // NEPP 2*N int 38 | // offset N+1 int 39 | // Scratch1 6*N float 40 | // Scratch2 17*N float 41 | // Scratch3 nnz float 42 | // prcm 6*N int 43 | // 44 | // 1960*N+3 \approx 1960*N 45 | // 46 | // Total size in bytes: 7840 * N 47 | // Total size in KB: 7.840 * N 48 | // Total size in MB: 0.007840 * N 49 | // 50 | // Some examples for various numbers of particles 51 | // 52 | // N Size (MB) Size (GB) 53 | // --- --------- --------- 54 | // 1E2 0.7840 0.0007840 55 | // 1E3 7.840 0.007840 56 | // 1E4 78.40 0.07840 57 | // 1E5 784.0 0.7840 58 | // 1E6 7840 7.840 59 | 60 | // For particles of equal size, in a closest-packed configuration, each particle 61 | // can have at most 12 neighbors within a distance of 3 or less 62 | unsigned int N = m_group->getNumMembers(); 63 | unsigned int max_neigh = 13; 64 | 65 | // Maximum number of non-zero entries in the RFU preconditioner 66 | m_nnz = ( 36 * ( max_neigh + 1 ) ) * N; 67 | 68 | // Pruned Neighborlist Arrays (lists of particles within the (shorter) preconditioner cutoff 69 | GPUArray n_nneigh_pruned( N, m_exec_conf ); 70 | GPUArray n_headlist_pruned( N + 1, m_exec_conf ); 71 | GPUArray n_nlist_pruned( (max_neigh+1)*N, m_exec_conf ); 72 | 73 | m_nneigh_pruned.swap( n_nneigh_pruned ); 74 | m_headlist_pruned.swap( n_headlist_pruned ); 75 | m_nlist_pruned.swap( n_nlist_pruned ); 76 | 77 | // Prepare GPUArrays for sparse matrix constructions 78 | GPUArray n_L_RowInd( m_nnz, m_exec_conf ); //!< Rnf preconditioner sparse storage ( COO Format - Row Indices ) 79 | GPUArray n_L_RowPtr( 6*N+1, m_exec_conf ); //!< Rnf preconditioner sparse storage ( CSR Format - Row Pointers ) 80 | GPUArray n_L_ColInd( m_nnz, m_exec_conf ); //!< Rnf preconditioner sparse storage ( COO/CSR Format - Col Indices ) 81 | GPUArray n_L_Val( m_nnz, m_exec_conf ); //!< Values of incomplete lower Cholesky of RFU (also the matrix itself) 82 | 83 | m_L_RowInd.swap(n_L_RowInd); 84 | m_L_RowPtr.swap(n_L_RowPtr); 85 | m_L_ColInd.swap(n_L_ColInd); 86 | m_L_Val.swap( n_L_Val ); 87 | 88 | // Things required for diagonal preconditioning 89 | GPUArray n_HasNeigh( N, m_exec_conf ); //!< Whether a particle has neighbors or not 90 | GPUArray n_Diag( 6*N, m_exec_conf ); //!< Diagonal preconditioner elements 91 | m_Diag.swap( n_Diag ); 92 | m_HasNeigh.swap( n_HasNeigh ); 93 | 94 | // Index arrays needed to construct sparse matrices 95 | GPUArray n_nneigh_less( N, m_exec_conf ); //!< Number of neighbors with index less than particle ID 96 | GPUArray n_NEPP( 2*N, m_exec_conf ); //!< Number of non-zero entries per particle in sparse matrices 97 | GPUArray n_offset( (N+1), m_exec_conf ); //!< Particle offset into sparse matrix arrays 98 | 99 | m_nneigh_less.swap( n_nneigh_less ); 100 | m_NEPP.swap( n_NEPP ); 101 | m_offset.swap( n_offset ); 102 | 103 | // 104 | // Re-ordering vector and scratch space for sparse math operations 105 | // 106 | GPUArray n_Scratch1( 6*N, m_exec_conf ); //!< Scratch storage for re-ordered matrix-vector multiplication 107 | GPUArray n_Scratch2( 17*N, m_exec_conf ); //!< Scratch storage for saddle point preconditioning 108 | GPUArray n_Scratch3( m_nnz, m_exec_conf ); //!< Scratch Storage for Value reordering 109 | GPUArray n_prcm( 6*N, m_exec_conf ); //!< matrix re-ordering vector using Reverse-Cuthill-Mckee (RCM) 110 | 111 | m_Scratch1.swap( n_Scratch1 ); 112 | m_Scratch2.swap( n_Scratch2 ); 113 | m_Scratch3.swap( n_Scratch3 ); 114 | m_prcm.swap( n_prcm ); 115 | 116 | // 117 | // Set up conctext for cuSOLVER (used to perform reverse-Cuthill-Mckee reordering 118 | // 119 | cusolverSpCreate(&soHandle); 120 | 121 | // 122 | // Set up matrices for cuSPARSE 123 | // 124 | 125 | // Initialize cuSPARSE 126 | cusparseCreate(&spHandle); 127 | 128 | // 1. Define the matrices for cuSPARSE, detailing the structure 129 | descr_R = 0; 130 | cusparseCreateMatDescr( &descr_R ); 131 | cusparseSetMatIndexBase( descr_R, CUSPARSE_INDEX_BASE_ZERO ); 132 | cusparseSetMatType( descr_R, CUSPARSE_MATRIX_TYPE_GENERAL ); 133 | 134 | descr_L = 0; 135 | cusparseCreateMatDescr( &descr_L ); 136 | cusparseSetMatDiagType( descr_L, CUSPARSE_DIAG_TYPE_NON_UNIT ); 137 | cusparseSetMatType( descr_L, CUSPARSE_MATRIX_TYPE_GENERAL ); 138 | cusparseSetMatFillMode( descr_L, CUSPARSE_FILL_MODE_LOWER ); 139 | cusparseSetMatIndexBase( descr_L, CUSPARSE_INDEX_BASE_ZERO ); 140 | 141 | // 2. Operations for the triangular matrix solves 142 | trans_L = CUSPARSE_OPERATION_NON_TRANSPOSE; 143 | trans_Lt = CUSPARSE_OPERATION_TRANSPOSE; 144 | 145 | // 3. Create info structures for cuSPARSE solves 146 | // We need one info for csric02 (incomplete Cholesky) 147 | // and two info's for csrsv2 (Lower and upper triangular solves) 148 | info_R = 0; // Info structures required for setting buffer size 149 | info_L = 0; 150 | info_Lt = 0; 151 | cusparseCreateCsric02Info(&info_R); 152 | cusparseCreateCsrsv2Info(&info_L); 153 | cusparseCreateCsrsv2Info(&info_Lt); 154 | 155 | // 4. Level output information for cuSPARSE solves 156 | policy_R = CUSPARSE_SOLVE_POLICY_NO_LEVEL; 157 | policy_L = CUSPARSE_SOLVE_POLICY_NO_LEVEL; 158 | policy_Lt = CUSPARSE_SOLVE_POLICY_USE_LEVEL; 159 | 160 | // Initialize cuBLAS 161 | cublasCreate( &blasHandle ); //zhoge: Was here probably because initially only used for lubrication 162 | 163 | } 164 | -------------------------------------------------------------------------------- /source/VariantShearFunction.cc: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | 3 | /*! \file VariantShearFunction.cc 4 | \brief Defines VariantShearFunction class 5 | */ 6 | 7 | #ifdef WIN32 8 | #pragma warning( push ) 9 | #pragma warning( disable : 4103 4244 ) 10 | #endif 11 | 12 | #include "VariantShearFunction.h" 13 | 14 | using namespace std; 15 | 16 | 17 | VariantShearFunction::VariantShearFunction(std::shared_ptr shear_func, 18 | unsigned int total_timestep, 19 | double min_value, 20 | double max_value) : 21 | m_shear_func(shear_func), 22 | m_total_timestep(total_timestep), 23 | m_min_value(min_value), 24 | m_max_value(max_value) 25 | { 26 | setOffset( m_shear_func -> getOffset() ); // This line ensures the offsets of ShearFunction and Variant class are equal 27 | m_value_range = m_max_value - m_min_value; 28 | m_end_value = wrapValue( m_shear_func -> getStrain( m_offset + m_total_timestep ) ); 29 | } 30 | 31 | /*! \param timestep Timestep to get the value at 32 | \return value by the user-specified function 33 | */ 34 | double VariantShearFunction::getValue(unsigned int timestep) 35 | { 36 | if (timestep < m_offset) { 37 | return 0; 38 | } 39 | else if (timestep >= m_offset + m_total_timestep) { 40 | return m_end_value; 41 | } 42 | return wrapValue( m_shear_func -> getStrain(timestep) ); 43 | } 44 | 45 | void export_VariantShearFunction(pybind11::module& m) 46 | { 47 | pybind11::class_ >(m, "VariantShearFunction", pybind11::base()) 48 | .def(pybind11::init< std::shared_ptr, unsigned int, double, double >()); 49 | } 50 | 51 | #ifdef WIN32 52 | #pragma warning( pop ) 53 | #endif 54 | -------------------------------------------------------------------------------- /source/VariantShearFunction.h: -------------------------------------------------------------------------------- 1 | // Maintainer: Gang Wang 2 | 3 | /*! \file VariantShearFunction.h 4 | \brief Declares the VariantShearFunction class 5 | */ 6 | 7 | #ifdef NVCC 8 | #error This header cannot be compiled by nvcc 9 | #endif 10 | 11 | #include 12 | 13 | #ifndef __VARIANT_SHEAR_FUNCTION_H__ 14 | #define __VARIANT_SHEAR_FUNCTION_H__ 15 | 16 | #include 17 | #include 18 | #include "ShearFunction.h" 19 | 20 | //! Variant class for shear flowfield described by a function 21 | /*! This variant gives the strain value based on a function (which is ShearFunction type) 22 | The strain is wrapped based on the min_value and max_value since HOOMD cannot deal with 23 | very thin box. In most cases, max_value - min_value is an integer (and the recommended value 24 | is [-0.5, 0.5]). If the timestep is smaller than offset, 0 is returned when calling 25 | getValue; if the timestep is larger than offset + total_timestep, the strain of the last 26 | time point is returned. 27 | */ 28 | class VariantShearFunction : public Variant 29 | { 30 | public: 31 | //! Constructs a VariantShearFunction type with a shared_ptr to ShearFunction and total timestep 32 | /*! \param shear_func the shared pointer to the ShearFunction object 33 | \param total_timestep total time step this Variant is going to be effective 34 | \param min_value the minimal value of this Variant 35 | \param max_value the maximal value of this Variant 36 | */ 37 | VariantShearFunction(std::shared_ptr shear_func, 38 | unsigned int total_timestep, 39 | double min_value, 40 | double max_value); 41 | 42 | //! Gets the value at a given time step 43 | virtual double getValue(unsigned int timestep); 44 | 45 | //! Wrap the value between m_min_value and m_max_value 46 | double wrapValue(double functionValue) { 47 | return functionValue - m_value_range * floor( (functionValue - m_min_value) / m_value_range ); 48 | } 49 | 50 | private: 51 | const std::shared_ptr m_shear_func; 52 | const unsigned int m_total_timestep; //!< the total timestep for the Variant class 53 | const double m_min_value; //!< minimum value of the output of the Variant class 54 | const double m_max_value; //!< maximum value of the output of the Variant class 55 | double m_end_value; //!< the last value of output after time > m_offset + m_total_timestep 56 | double m_value_range; //!< max_value - min_value 57 | }; 58 | 59 | //! Exports VariantShearFunction class to python 60 | void export_VariantShearFunction(pybind11::module& m); 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /source/Wrappers.cuh: -------------------------------------------------------------------------------- 1 | // This file is part of the PSEv3 plugin, released under the BSD 3-Clause License 2 | // 3 | // Andrew Fiore 4 | 5 | #include "Mobility.cuh" 6 | #include "Helper_Mobility.cuh" 7 | #include "Helper_Brownian.cuh" 8 | #include "Helper_Saddle.cuh" 9 | #include "Saddle.cuh" 10 | 11 | #include "DataStruct.h" 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | #include 28 | 29 | #ifdef WIN32 30 | #include 31 | #else 32 | #include 33 | #endif 34 | 35 | //! command to convert floats or doubles to integers 36 | #ifdef SINGLE_PRECISION 37 | #define __scalar2int_rd __float2int_rd 38 | #else 39 | #define __scalar2int_rd __double2int_rd 40 | #endif 41 | 42 | 43 | /*! \file Wrappers.cuh 44 | \brief Defines functions wrappers used by CUSP to solve the linear equations where required. 45 | */ 46 | 47 | 48 | //! Shared memory array for partial sum of dot product kernel 49 | extern __shared__ Scalar partial_sum[]; 50 | 51 | //! Construct class wrapper to use the Saddle point matrix as a matrix-free method in CUSP. 52 | /*! 53 | CUSP shell to apply matrix-free multiplication of the saddle point matrix 54 | */ 55 | class cuspSaddle : public cusp::linear_operator 56 | { 57 | public: 58 | 59 | typedef cusp::linear_operator super; //!< Defines size of linear operator 60 | 61 | // No need to specify their values because it's the relationship that matters 62 | Scalar *d_x; //!< input vector (unspecified) 63 | Scalar *d_y; //!< output vector (unspecified) 64 | 65 | Scalar4 *d_pos; //!< Particle positions 66 | unsigned int *d_group_members; //!< list of particles in group to integrate 67 | unsigned int group_size; //!< Number of particles 68 | const BoxDim& box; //!< Box dimensions 69 | 70 | KernelData *ker_data; //!< Pointer to data structure for CUDA kernels 71 | MobilityData *mob_data; //!< Pointer to data structure for mobility calculations 72 | ResistanceData *res_data; //!< Pointer to data structure for resistance calculations 73 | WorkData *work_data; //!< Pointer to data structure for workspaces 74 | 75 | // constructor 76 | cuspSaddle( 77 | Scalar4 *d_pos, 78 | unsigned int *d_group_members, 79 | unsigned int group_size, 80 | const BoxDim& box, 81 | KernelData *ker_data, 82 | MobilityData *mob_data, 83 | ResistanceData *res_data, 84 | WorkData *work_data 85 | ) 86 | : super(17*group_size,17*group_size), 87 | d_pos(d_pos), 88 | d_group_members(d_group_members), 89 | group_size(group_size), 90 | box(box), 91 | ker_data(ker_data), 92 | mob_data(mob_data), 93 | res_data(res_data), 94 | work_data(work_data) 95 | {} 96 | 97 | // linear operator y = A*x 98 | //! Matrix multiplication part of CUSP wrapper 99 | template 101 | void operator()( VectorType1& x, VectorType2& y ) 102 | { 103 | 104 | // Raw pointer to device memory for input and output arrays 105 | d_x = (float*)thrust::raw_pointer_cast(&x[0]); 106 | d_y = (float*)thrust::raw_pointer_cast(&y[0]); 107 | 108 | // Call the kernel 109 | Saddle_Multiply( 110 | d_y, // output 111 | d_x, // input 112 | d_pos, 113 | d_group_members, 114 | group_size, 115 | box, 116 | ker_data, 117 | mob_data, 118 | res_data, 119 | work_data 120 | ); 121 | 122 | } 123 | }; 124 | 125 | 126 | //! Construct class wrapper for the preconditioner to the saddle point matrix in CUSP. 127 | /*! 128 | CUSP shell to apply the action of the preconditioner to a vector. P^(-1) * vec 129 | */ 130 | class cuspSaddlePreconditioner : public cusp::linear_operator 131 | { 132 | public: 133 | 134 | typedef cusp::linear_operator super; //!< Defines size of linear operator 135 | 136 | Scalar *d_x; //!< input vector 137 | Scalar *d_y; //!< output vector 138 | 139 | unsigned int group_size; //!< number of particles 140 | 141 | void *pBuffer; //!< Buffer space for cuSPARSE calculations 142 | 143 | KernelData *ker_data; //!< Pointer to data structure for CUDA kernels 144 | ResistanceData *res_data; //!< Pointer to data structure for mobility calculations 145 | 146 | // constructor 147 | cuspSaddlePreconditioner( 148 | unsigned int group_size, 149 | void *pBuffer, 150 | KernelData *ker_data, 151 | ResistanceData *res_data 152 | ) 153 | : super(17*group_size,17*group_size), 154 | group_size(group_size), 155 | pBuffer(pBuffer), 156 | ker_data(ker_data), 157 | res_data(res_data) 158 | {} 159 | 160 | // Linear operator y = A*x, here A = P^(-1), where P is the preconditioner 161 | // 162 | //! Matrix multiplication part of CUSP wrapper 163 | template 165 | void operator()( VectorType1& x, VectorType2& y ) 166 | { 167 | 168 | // Raw pointer to device memory for input and output arrays 169 | d_x = (float*)thrust::raw_pointer_cast(&x[0]); 170 | d_y = (float*)thrust::raw_pointer_cast(&y[0]); 171 | 172 | // Call the kernel 173 | Saddle_Preconditioner( 174 | d_y, // output 175 | d_x, // input 176 | group_size, 177 | pBuffer, 178 | ker_data, 179 | res_data 180 | ); 181 | 182 | } 183 | }; 184 | -------------------------------------------------------------------------------- /source/__init__.py: -------------------------------------------------------------------------------- 1 | # this file exists to mark this directory as a python module 2 | 3 | # need to import all submodules defined in this directory 4 | 5 | # NOTE: adjust the import statement to match the name of the template 6 | # (here: plugin_template) 7 | from hoomd.PSEv3 import integrate 8 | from hoomd.PSEv3 import shear_function 9 | from hoomd.PSEv3 import variant 10 | -------------------------------------------------------------------------------- /source/integrate.py: -------------------------------------------------------------------------------- 1 | # First, we need to import the C++ module. It has the same name as this module (plugin_template) but with an underscore 2 | # in front 3 | from hoomd.PSEv3 import _PSEv3 4 | from hoomd.PSEv3 import shear_function 5 | 6 | # Next, since we are extending an updater, we need to bring in the base class updater and some other parts from 7 | # hoomd_script 8 | import hoomd 9 | from hoomd import _hoomd 10 | from hoomd import compute 11 | from hoomd.md import _md 12 | 13 | import math 14 | 15 | ## One step overdamped integration with hydrodynamic interactions 16 | 17 | class PSEv3(hoomd.md.integrate._integration_method): 18 | ## Specifies the integrator for Fast Stokesian Dynamics (FSD) 19 | # 20 | # group Group of particles on which to apply this method. 21 | # T Temperature of the simulation (in energy units) 22 | # seed Random seed to use for the run. Simulations that are identical, except for the seed, will follow different trajectories. 23 | # xi Ewald splitting parameter 24 | # error Error threshold to use for calculations (Spectral Ewald parameters are determined on the fly using this bound) 25 | # function_form Time dependent shear rate 26 | # max_strain Maximum strain of the box 27 | # fileprefix Prefix for stresslet output 28 | # period Frequency of stresslet output 29 | # nlist_type Type of neighbor list to use 30 | # friction_type Type of friction to add 31 | # h0 Maximum distance of frictional contact 32 | # alpha List of frictional magnitudes 33 | # ndsr Non-dimensional shear rate (ratio of Stokes drag and max electrostatic repulsion) 34 | # kappa inverse Debye length 35 | # k_n collision spring constant 36 | # 37 | # T can be a variant type, allowing for temperature ramps in simulation runs. 38 | # 39 | # Internally, a compute.thermo is automatically specified and associated with a group. 40 | def __init__(self, 41 | group, 42 | T, 43 | seed = 0, 44 | xi = 0.5, 45 | error = 0.001, 46 | function_form = None, 47 | max_strain = 0.5, 48 | fileprefix="stresslet", 49 | period = 0, 50 | nlist_type = "cell", 51 | friction_type = "none", 52 | h0 = 0.0, 53 | alpha = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 54 | ndsr = 1e-1, kappa = 1/0.05, k_n = 1e4, beta_AF = 0., epsq = 1e-5, sqm_B1=0., sqm_B2=0., 55 | N_mix=1, coef_B1_mask=1.0, coef_B2_mask=1.0, rot_diff=0., T_ext=0., omega_ext=0.): 56 | 57 | hoomd.util.print_status_line(); 58 | 59 | # initialize base class 60 | hoomd.md.integrate._integration_method.__init__(self); 61 | 62 | # setup the variant inputs 63 | T = hoomd.variant._setup_variant_input(T); 64 | 65 | # Make sure the period is an integer 66 | period = int( period ); 67 | 68 | # create the compute thermo 69 | compute._get_unique_thermo(group=group); 70 | 71 | # Cutoff distance for real space Ewald Sums 72 | self.rcut = math.sqrt( - math.log( error ) ) / xi; 73 | 74 | # Initialize the reflected c++ class 75 | if not hoomd.context.exec_conf.isCUDAEnabled(): 76 | hoomd.contex.msg.error("Sorry, we have not written CPU code for Fast Stokesian Dynamics. \n"); 77 | raise RuntimeError('Error creating Stokes'); 78 | else: 79 | 80 | # Create a neighborlist exclusively for real space interactions. Use cell lists by 81 | # default, but also allow the user to specify 82 | if ( nlist_type.upper() == "CELL" ): 83 | 84 | cl_stokes = _hoomd.CellListGPU(hoomd.context.current.system_definition); 85 | hoomd.context.current.system.addCompute(cl_stokes, "stokes_cl") 86 | self.nlist_ewald = _md.NeighborListGPUBinned(hoomd.context.current.system_definition, self.rcut, 0.4, cl_stokes); 87 | 88 | elif ( nlist_type.upper() == "TREE" ): 89 | 90 | self.nlist_ewald = _md.NeighborListGPUTree(hoomd.context.current.system_definition, self.rcut, 0.4) 91 | 92 | elif ( nlist_type.upper() == "STENCIL" ): 93 | 94 | cl_stokes = _hoomd.CellListGPU(hoomd.context.current.system_definition) 95 | hoomd.context.current.system.addCompute(cl_stokes, "stokes_cl") 96 | cls_stokes = _hoomd.CellListStencil( hoomd.context.current.system_definition, cl_stokes ) 97 | hoomd.context.current.system.addCompute( cls_stokes, "stokes_cls") 98 | self.nlist_ewald = _md.NeighborListGPUStencil(hoomd.context.current.system_definition, self.rcut, 0.4, cl_stokes, cls_stokes) 99 | 100 | else: 101 | hoomd.context.msg.error("Invalid neighborlist method specified. Valid options are: cell, tree, stencil. \n"); 102 | raise RuntimeError('Error constructing neighborlist'); 103 | 104 | # Set the neighbor list properties 105 | self.nlist_ewald.setEvery(1, True); 106 | hoomd.context.current.system.addCompute(self.nlist_ewald, "stokes_nlist_ewald") 107 | self.nlist_ewald.countExclusions(); 108 | 109 | # Initialize Stokes Class 110 | self.cpp_method = _PSEv3.Stokes(hoomd.context.current.system_definition, 111 | group.cpp_group, 112 | T.cpp_variant, 113 | seed, 114 | self.nlist_ewald, 115 | xi, 116 | error, 117 | fileprefix, 118 | period, 119 | ndsr, kappa, k_n, beta_AF, epsq, sqm_B1, sqm_B2, 120 | N_mix, coef_B1_mask, coef_B2_mask, rot_diff, T_ext, omega_ext); ##zhoge 121 | 122 | self.cpp_method.validateGroup() 123 | 124 | # Set shear conditions if necessary 125 | if function_form is not None: 126 | self.cpp_method.setShear(function_form.cpp_function, max_strain) 127 | else: 128 | no_shear_function = shear_function.steady(dt = 0) 129 | self.cpp_method.setShear(no_shear_function.cpp_function, max_strain) 130 | 131 | # Set up the parameters and resistance functions before running the simulation 132 | self.cpp_method.setParams() 133 | self.cpp_method.setResistanceTable() 134 | self.cpp_method.setFriction(friction_type.upper(), h0, alpha) # must be called AFTER setResistanceTable() 135 | self.cpp_method.setSparseMath() 136 | self.cpp_method.AllocateWorkSpaces() 137 | 138 | ## Changes parameters of an existing integrator 139 | def set_params(self, T=None, function_form = None, max_strain=0.5): 140 | hoomd.util.print_status_line(); 141 | self.check_initialization(); 142 | 143 | if T is not None: 144 | # setup the variant inputs 145 | T = hoomd.variant._setup_variant_input(T); 146 | self.cpp_method.setT(T.cpp_variant); 147 | 148 | if function_form is not None: 149 | self.cpp_method.setShear(function_form.cpp_function, max_strain) 150 | 151 | ## Stop any shear 152 | def stop_shear(self, max_strain = 0.5): 153 | no_shear_function = shear_function.steady(dt = 0) 154 | self.cpp_method.setShear(no_shear_function.cpp_function, max_strain) 155 | -------------------------------------------------------------------------------- /source/module.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition 3 | (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of 4 | the University of Michigan All rights reserved. 5 | 6 | HOOMD-blue may contain modifications ("Contributions") provided, and to which 7 | copyright is held, by various Contributors who have granted The Regents of the 8 | University of Michigan the right to modify and/or distribute such Contributions. 9 | 10 | You may redistribute, use, and create derivate works of HOOMD-blue, in source 11 | and binary forms, provided you abide by the following conditions: 12 | 13 | * Redistributions of source code must retain the above copyright notice, this 14 | list of conditions, and the following disclaimer both in the code and 15 | prominently in any materials provided with the distribution. 16 | 17 | * Redistributions in binary form must reproduce the above copyright notice, this 18 | list of conditions, and the following disclaimer in the documentation and/or 19 | other materials provided with the distribution. 20 | 21 | * All publications and presentations based on HOOMD-blue, including any reports 22 | or published results obtained, in whole or in part, with HOOMD-blue, will 23 | acknowledge its use according to the terms posted at the time of submission on: 24 | http://codeblue.umich.edu/hoomd-blue/citations.html 25 | 26 | * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website: 27 | http://codeblue.umich.edu/hoomd-blue/ 28 | 29 | * Apart from the above required attributions, neither the name of the copyright 30 | holder nor the names of HOOMD-blue's contributors may be used to endorse or 31 | promote products derived from this software without specific prior written 32 | permission. 33 | 34 | Disclaimer 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND 37 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 38 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY 39 | WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED. 40 | 41 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 42 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 43 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 46 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | */ 49 | 50 | // Include the defined classes that are to be exported to python 51 | #include "Stokes.h" 52 | #include "VariantShearFunction.h" 53 | #include "ShearFunction.h" 54 | #include "ShearFunctionWrap.h" 55 | #include "SpecificShearFunction.h" 56 | 57 | // Include pybind11 58 | #include 59 | 60 | // specify the python module. Note that the name must expliclty match the PROJECT() name provided in CMakeLists 61 | // (with an underscore in front) 62 | PYBIND11_MODULE(_PSEv3, m) 63 | { 64 | #ifdef ENABLE_CUDA 65 | export_Stokes(m); 66 | #endif 67 | export_ShearFunction(m); 68 | export_ShearFunctionWrap(m); 69 | export_VariantShearFunction(m); 70 | export_SpecificShearFunction(m); 71 | } 72 | 73 | -------------------------------------------------------------------------------- /source/rcm.hpp: -------------------------------------------------------------------------------- 1 | int adj_bandwidth ( int node_num, int adj_num, int adj_row[], int adj[] ); 2 | bool adj_contains_ij ( int node_num, int adj_num, int adj_row[], int adj[], 3 | int i, int j ); 4 | void adj_insert_ij ( int node_num, int adj_max, int *adj_num, int adj_row[], 5 | int adj[], int i, int j ); 6 | int adj_perm_bandwidth ( int node_num, int adj_num, int adj_row[], int adj[], 7 | int perm[], int perm_inv[] ); 8 | void adj_perm_show ( int node_num, int adj_num, int adj_row[], int adj[], 9 | int perm[], int perm_inv[] ); 10 | void adj_print ( int node_num, int adj_num, int adj_row[], int adj[], 11 | std::string title ); 12 | void adj_print_some ( int node_num, int node_lo, int node_hi, int adj_num, 13 | int adj_row[], int adj[], std::string title ); 14 | void adj_set ( int node_num, int adj_max, int *adj_num, int adj_row[], 15 | int adj[], int irow, int jcol ); 16 | void adj_show ( int node_num, int adj_num, int adj_row[], int adj[] ); 17 | void degree ( int root, int adj_num, int adj_row[], int adj[], int mask[], 18 | int deg[], int *iccsze, int ls[], int node_num ); 19 | void genrcm ( int node_num, int adj_num, int adj_row[], int adj[], int perm[] ); 20 | void graph_01_adj ( int node_num, int adj_num, int adj_row[], int adj[] ); 21 | void graph_01_size ( int *node_num, int *adj_num ); 22 | int i4_max ( int i1, int i2 ); 23 | int i4_min ( int i1, int i2 ); 24 | int i4_sign ( int i ); 25 | void i4_swap ( int *i, int *j ); 26 | int i4_uniform ( int a, int b, int *seed ); 27 | int i4col_compare ( int m, int n, int a[], int i, int j ); 28 | void i4col_sort_a ( int m, int n, int a[] ); 29 | void i4col_swap ( int m, int n, int a[], int irow1, int irow2 ); 30 | void i4mat_print_some ( int m, int n, int a[], int ilo, int jlo, int ihi, 31 | int jhi, std::string title ); 32 | void i4mat_transpose_print ( int m, int n, int a[], std::string title ); 33 | void i4mat_transpose_print_some ( int m, int n, int a[], int ilo, int jlo, 34 | int ihi, int jhi, std::string title ); 35 | void i4vec_heap_d ( int n, int a[] ); 36 | int *i4vec_indicator ( int n ); 37 | void i4vec_print ( int n, int a[], std::string title ); 38 | void i4vec_reverse ( int n, int a[] ); 39 | void i4vec_sort_heap_a ( int n, int a[] ); 40 | void level_set ( int root, int adj_num, int adj_row[], int adj[], int mask[], 41 | int *level_num, int level_row[], int level[], int node_num ); 42 | void level_set_print ( int node_num, int level_num, int level_row[], 43 | int level[] ); 44 | bool perm_check ( int n, int p[] ); 45 | void perm_inverse3 ( int n, int perm[], int perm_inv[] ); 46 | int *perm_uniform ( int n, int *seed ); 47 | float r4_abs ( float x ); 48 | int r4_nint ( float x ); 49 | void r82vec_permute ( int n, double a[], int p[] ); 50 | void r8mat_print_some ( int m, int n, double a[], int ilo, int jlo, int ihi, 51 | int jhi, std::string title ); 52 | void r8mat_transpose_print_some ( int m, int n, double a[], int ilo, int jlo, 53 | int ihi, int jhi, std::string title ); 54 | void rcm ( int root, int adj_num, int adj_row[], int adj[], int mask[], 55 | int perm[], int *iccsze, int node_num ); 56 | void root_find ( int *root, int adj_num, int adj_row[], int adj[], int mask[], 57 | int *level_num, int level_row[], int level[], int node_num ); 58 | void sort_heap_external ( int n, int *indx, int *i, int *j, int isgn ); 59 | void timestamp ( ); 60 | int *triangulation_neighbor_triangles ( int triangle_order, int triangle_num, 61 | int triangle_node[] ); 62 | int triangulation_order3_adj_count ( int node_num, int triangle_num, 63 | int triangle_node[], int triangle_neighbor[], int adj_col[] ); 64 | int *triangulation_order3_adj_set ( int node_num, int triangle_num, 65 | int triangle_node[], int triangle_neighbor[], int adj_num, int adj_col[] ); 66 | void triangulation_order3_example2 ( int node_num, int triangle_num, 67 | double node_xy[], int triangle_node[], int triangle_neighbor[] ); 68 | void triangulation_order3_example2_size ( int *node_num, int *triangle_num, 69 | int *hole_num ); 70 | int triangulation_order6_adj_count ( int node_num, int triangle_num, 71 | int triangle_node[], int triangle_neighbor[], int adj_col[] ); 72 | int *triangulation_order6_adj_set ( int node_num, int triangle_num, 73 | int triangle_node[], int triangle_neighbor[], int adj_num, int adj_col[] ); 74 | void triangulation_order6_example2 ( int node_num, int triangle_num, 75 | double node_xy[], int triangle_node[], int triangle_neighbor[] ); 76 | void triangulation_order6_example2_size ( int *node_num, int *triangle_num, 77 | int *hole_num ); 78 | -------------------------------------------------------------------------------- /source/shear_function.py: -------------------------------------------------------------------------------- 1 | ## \package PSEv3.shear_function 2 | # classes representing shear functions, which can be input of an integrator and variant 3 | # to shear the box of a simulation 4 | 5 | from hoomd.PSEv3 import _PSEv3 6 | 7 | import hoomd 8 | 9 | ## shear function interface representing shear flow field described by a function 10 | class _shear_function: 11 | ## Constructor and check the validity of zero param 12 | # \param zero Specify absolute time step number location for 0 in \a points. Use 'now' to indicate the current step. 13 | def __init__(self, zero = 'now'): 14 | self.cpp_function = None 15 | 16 | if zero == 'now': 17 | self._offset = hoomd.context.current.system.getCurrentTimeStep() 18 | else: 19 | # validate zero 20 | if zero < 0: 21 | hoomd.context.msg.error("Cannot create a shear_function variant with a negative zero\n") 22 | raise RuntimeError('Error creating shear function') 23 | if zero > hoomd.context.current.system.getCurrentTimeStep(): 24 | hoomd.context.msg.error("Cannot create a shear_function variant with a zero in the future\n") 25 | raise RuntimeError('Error creating shear function') 26 | self._offset = zero 27 | 28 | ## Get shear rate at a certain time step, might be useful when switching strain field 29 | # \param timestep the timestep 30 | def get_shear_rate(self, timestep): 31 | return self.cpp_function.getShearRate(timestep) 32 | 33 | ## Get the strain at a certain time step. The strain is not wrapped 34 | # \param timestep the timestep 35 | def get_strain(self, timestep): 36 | return self.cpp_function.getStrain(timestep) 37 | 38 | ## Get the offset of this shear function 39 | def get_offset(self): 40 | return self.cpp_function.getOffset() 41 | 42 | 43 | ## concrete class representing steady shear, no shear by default if shear_rate is not provided 44 | class steady(_shear_function): 45 | ## Constructor of steady shear function 46 | # \param dt the time interval between each timestep, must be the same with the global timestep 47 | # \param shear_rate the shear rate of the shear, default is zero, should be zero or positive 48 | # \param zero the time offset 49 | def __init__(self, dt, shear_rate = 0, zero = 'now'): 50 | _shear_function.__init__(self, zero) 51 | self.cpp_function = _PSEv3.SteadyShearFunction(shear_rate, self._offset, dt) 52 | 53 | 54 | ## concrete class representing simple sinusoidal oscillatory shear 55 | class sine(_shear_function): 56 | ## Constructor of simple sinusoidal oscillatory shear 57 | # \param dt the time interval between each timestep, must be the same with the global timestep 58 | # \param shear_rate the maximum shear rate of the ocsillatory shear, must be positive 59 | # \param shear_freq the frequency (real frequency, not angular frequency) of the ocsillatory shear, must be positive 60 | # \param zero the time offset 61 | def __init__(self, dt, shear_rate, shear_freq, zero = 'now'): 62 | 63 | if shear_rate <= 0: 64 | hoomd.context.msg.error("Shear rate must be positive (use steady class instead for zero shear)\n") 65 | raise RuntimeError("Error creating shear function") 66 | if shear_freq <= 0: 67 | hoomd.context.msg.error("Shear frequency must be positive (use steady class instead for steady shear)\n") 68 | raise RuntimeError("Error creating shear function") 69 | 70 | _shear_function.__init__(self, zero) 71 | self.cpp_function = _PSEv3.SinShearFunction(shear_rate, shear_freq, self._offset, dt) 72 | 73 | 74 | ## concrete class representing chirp oscillatory shear 75 | class chirp(_shear_function): 76 | ## Constructor of chirp oscillatory shear 77 | # \param dt the time interval between each timestep, must be the same with the global timestep 78 | # \param amplitude the strain amplitude of Chirp oscillatory shear, must be positive 79 | # \param omega_0 minimum angular frequency, must be positive 80 | # \param omega_f maximum angular frequency, must be positive and larger than omega_0 81 | # \param periodT final time of chirp 82 | # \param zero the time offset 83 | def __init__(self, dt, amplitude, omega_0, omega_f, periodT, zero = 'now'): 84 | _shear_function.__init__(self, zero) 85 | self.cpp_function = _PSEv3.ChirpShearFunction(amplitude, omega_0, omega_f, periodT, self._offset, dt) 86 | 87 | 88 | ## concrete class representing Tukey window function 89 | class tukey_window(_shear_function): 90 | ## Constructor of Tukey window function 91 | # \param dt the time interval between each timestep, must be the same with the global timestep 92 | # \param periodT time length of the Tukey window function 93 | # \param tukey_param Tukey window function parameter, must be within (0, 1] 94 | # \param zero the time offset 95 | def __init__(self, dt, periodT, tukey_param, zero = 'now'): 96 | 97 | if tukey_param <= 0 or tukey_param > 1: 98 | hoomd.context.msg.error("Tukey parameter must be within (0, 1]") 99 | raise RuntimeError("Error creating Tukey window function") 100 | 101 | _shear_function.__init__(self, zero) 102 | self.cpp_function = _PSEv3.TukeyWindowFunction(periodT, tukey_param, self._offset, dt) 103 | 104 | 105 | ## concrete class represeting a windowed shear function 106 | class windowed(_shear_function): 107 | ## Constructor of a windowed shear function 108 | # The strain of the resulting windowed shear function will be the product of the original shear function and 109 | # the provided window function 110 | # \param function_form the original shear function 111 | # \param window the window function. It is recommended to make sure the offset (zero) of the window function is the same with shear function 112 | def __init__(self, function_form, window): 113 | _shear_function.__init__(self, 'now') # zero parameter is not used in windowed class anyways 114 | self.cpp_function = _PSEv3.WindowedFunction(function_form.cpp_function, window.cpp_function) 115 | -------------------------------------------------------------------------------- /source/variant.py: -------------------------------------------------------------------------------- 1 | ## \package PSEv3.variant 2 | # classes representing the variant class to facilitate box_resize 3 | 4 | from hoomd.PSEv3 import _PSEv3 5 | from hoomd.PSEv3 import shear_function 6 | 7 | from hoomd import variant 8 | from hoomd.variant import _variant 9 | 10 | from hoomd import _hoomd 11 | import hoomd 12 | import sys 13 | 14 | ## Variant class holding a functional form of shear field 15 | # Used as an argument for box_resize class to deform the box 16 | class shear_variant(hoomd.variant._variant): 17 | ## Specify shear field represented by a function form with a limited timesteps 18 | # 19 | # \param function_form the functional form of the oscillatory shear 20 | # \param total_timestep the total timesteps of the shear, equal to shear_end_timestep - shear_start_timestep, must be positive 21 | # \param max_strain the maximum absolute value of the strain, use 0.5 in almost all the cases 22 | def __init__(self, function_form, total_timestep, max_strain = 0.5): 23 | # initialize the base class 24 | _variant.__init__(self) 25 | 26 | # check total_timestep is positive 27 | if total_timestep <= 0: 28 | hoomd.context.msg.error("Cannot create a shear_variant with 0 or negative points\n") 29 | raise RuntimeError('Error creating variant') 30 | 31 | # create the c++ mirror class 32 | self.cpp_variant = _PSEv3.VariantShearFunction(function_form.cpp_function, int(total_timestep), -max_strain, max_strain) 33 | --------------------------------------------------------------------------------