├── 3d ├── librbf.a ├── batch2.sh ├── mpi_range.cxx ├── bluegene.sh ├── get_vorticity.h ├── Makefile ├── batch.sh ├── par.h ├── get_trunc.h ├── get_buffer.h ├── wrapper.cxx ├── matmult.cxx ├── main.cxx ├── gpumatmult.cu ├── matmultgpu.cxx ├── get_cluster.h ├── rbf_interpolation.cxx └── vorticity_evaluation.cxx ├── 2d ├── batch2.sh ├── mpi_range.cxx ├── bluegene.sh ├── get_vorticity.h ├── batch.sh ├── Makefile ├── par.h ├── get_trunc.h ├── get_buffer.h ├── matmult.cxx ├── cylinder.cxx ├── gpumatmult.cu ├── main.cxx ├── matmultgpu.cxx ├── main2.cxx ├── get_cluster.h ├── rbf_interpolation.cxx └── vorticity_evaluation.cxx ├── LICENSE └── README.md /3d/librbf.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barbagroup/petrbf/master/3d/librbf.a -------------------------------------------------------------------------------- /2d/batch2.sh: -------------------------------------------------------------------------------- 1 | export NP=1 2 | while [ $NP -le 1024 ] 3 | do 4 | echo $NP 5 | llsubmit bluegene.sh 6 | export NP=`echo $NP \* 2 | bc` 7 | done 8 | -------------------------------------------------------------------------------- /3d/batch2.sh: -------------------------------------------------------------------------------- 1 | export NP=1 2 | while [ $NP -le 1024 ] 3 | do 4 | echo $NP 5 | llsubmit bluegene.sh 6 | export NP=`echo $NP \* 2 | bc` 7 | done 8 | -------------------------------------------------------------------------------- /2d/mpi_range.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include "par.h" 3 | 4 | void mpi_range(MPI2 *mpi) 5 | { 6 | int imin,iwork1,iwork2; 7 | iwork1 = (mpi->nend-mpi->nsta+1)/mpi->nprocs; 8 | iwork2 = (mpi->nend-mpi->nsta+1)%mpi->nprocs; 9 | if (mpi->myrank <= iwork2) { 10 | imin = mpi->myrank; 11 | } else { 12 | imin = iwork2; 13 | } 14 | mpi->ista = mpi->myrank*iwork1+mpi->nsta+imin; 15 | mpi->iend = mpi->ista+iwork1; 16 | if (mpi->myrank < iwork2) mpi->iend = mpi->iend+1; 17 | } 18 | -------------------------------------------------------------------------------- /3d/mpi_range.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include "par.h" 3 | 4 | void mpi_range(MPI2 *mpi) 5 | { 6 | int imin,iwork1,iwork2; 7 | iwork1 = (mpi->nend-mpi->nsta+1)/mpi->nprocs; 8 | iwork2 = (mpi->nend-mpi->nsta+1)%mpi->nprocs; 9 | if (mpi->myrank <= iwork2) { 10 | imin = mpi->myrank; 11 | } else { 12 | imin = iwork2; 13 | } 14 | mpi->ista = mpi->myrank*iwork1+mpi->nsta+imin; 15 | mpi->iend = mpi->ista+iwork1; 16 | if (mpi->myrank < iwork2) mpi->iend = mpi->iend+1; 17 | } 18 | -------------------------------------------------------------------------------- /2d/bluegene.sh: -------------------------------------------------------------------------------- 1 | # @ job_type = parallel 2 | # @ environment = COPY_ALL 3 | # @ executable = /bgl/BlueLight/ppcfloor/bglsys/bin/mpirun 4 | # @ arguments = -verbose 1 -np $NP -cwd $HOME/rbf -exe $HOME/rbf/main -args ".9 5 1.9 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_rtol 1e-13 -ksp_max_it 1000 -ksp_monitor -log_summary -vecscatter_alltoall" 5 | # @ wall_clock_limit = 30:00 6 | # @ input = /dev/null 7 | # @ output = $(jobid).out 8 | # @ error = $(jobid).err 9 | # @ notification = never 10 | # @ queue 11 | -------------------------------------------------------------------------------- /3d/bluegene.sh: -------------------------------------------------------------------------------- 1 | # @ job_type = parallel 2 | # @ environment = COPY_ALL 3 | # @ executable = /bgl/BlueLight/ppcfloor/bglsys/bin/mpirun 4 | # @ arguments = -verbose 1 -np $NP -cwd $HOME/rbf -exe $HOME/rbf/main -args ".9 5 1.9 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_rtol 1e-13 -ksp_max_it 1000 -ksp_monitor -log_summary -vecscatter_alltoall" 5 | # @ wall_clock_limit = 30:00 6 | # @ input = /dev/null 7 | # @ output = $(jobid).out 8 | # @ error = $(jobid).err 9 | # @ notification = never 10 | # @ queue 11 | -------------------------------------------------------------------------------- /2d/get_vorticity.h: -------------------------------------------------------------------------------- 1 | #ifndef get_vorticity_h 2 | #define get_vorticity_h 3 | 4 | class Get_vorticity 5 | { 6 | int i,j,ic,il,ista,iend; 7 | double w,dx,dy; 8 | public: 9 | void get_vorticity(PARTICLE *particle,CLUSTER *cluster) 10 | { 11 | for (ic=cluster->icsta; icicend; ic++) { 12 | Get_trunc trunc; 13 | trunc.get_trunc(particle,cluster,ic); 14 | ista = cluster->ista[ic]; 15 | iend = cluster->iend[ic]; 16 | for (i=ista; i<=iend; i++) { 17 | w = 0; 18 | for (j=0; jnptruncj; j++) { 19 | dx = particle->xil[i]-cluster->xjt[j]; 20 | dy = particle->yil[i]-cluster->yjt[j]; 21 | w += cluster->gjt[j]*exp(-(dx*dx+dy*dy)/(2*particle->sigma*particle->sigma))/ 22 | (2*M_PI*particle->sigma*particle->sigma); 23 | } 24 | particle->wil[i] = w; 25 | } 26 | } 27 | } 28 | }; 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /3d/get_vorticity.h: -------------------------------------------------------------------------------- 1 | #ifndef get_vorticity_h 2 | #define get_vorticity_h 3 | 4 | class Get_vorticity 5 | { 6 | int i,j,ic,il,ista,iend; 7 | double w,dx,dy,dz; 8 | public: 9 | void get_vorticity(PARTICLE *particle,CLUSTER *cluster) 10 | { 11 | for (ic=cluster->icsta; icicend; ic++) { 12 | Get_trunc trunc; 13 | trunc.get_trunc(particle,cluster,ic); 14 | ista = cluster->ista[ic]; 15 | iend = cluster->iend[ic]; 16 | for (i=ista; i<=iend; i++) { 17 | w = 0; 18 | for (j=0; jnptruncj; j++) { 19 | dx = particle->xil[i]-cluster->xjt[j]; 20 | dy = particle->yil[i]-cluster->yjt[j]; 21 | dz = particle->zil[i]-cluster->zjt[j]; 22 | w += cluster->gjt[j]*exp(-(dx*dx+dy*dy+dz*dz)/(2*particle->sigma*particle->sigma))/ 23 | (2*M_PI*particle->sigma*particle->sigma); 24 | } 25 | particle->wil[i] = w; 26 | } 27 | } 28 | } 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /3d/Makefile: -------------------------------------------------------------------------------- 1 | NP=1 2 | LOCDIR=3d 3 | LIB = -L$(CUDA_INSTALL_PATH)/lib64 -L$(SDK_INSTALL_PATH)/lib -lcudart -lGL -lGLU -lcutil_x86_64 -lstdc++ 4 | 5 | cpu: main.o rbf_interpolation.o vorticity_evaluation.o mpi_range.o matmult.o 6 | ${CLINKER} -o main $? ${PETSC_SNES_LIB} 7 | mpirun -n ${NP} ./main 1.0 3 1.5 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall 8 | gpu: main.o rbf_interpolation.o vorticity_evaluation.o mpi_range.o matmultgpu.o gpumatmult.o 9 | ${CLINKER} -o main $? ${PETSC_SNES_LIB} $(LIB) 10 | mpirun -n ${NP} ./main 1.0 3 1.5 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall 11 | 12 | static: wrapper.o rbf_interpolation.o vorticity_evaluation.o mpi_range.o matmultgpu.o gpumatmult.o 13 | ar ruv librbf.a $? 14 | ranlib librbf.a 15 | 16 | purge: 17 | $(RM) *.dat *.o main 18 | save: 19 | make purge 20 | tar zcvf ../../rbf.tgz ../../rbf 21 | 22 | include ${PETSC_DIR}/conf/variables 23 | include ${PETSC_DIR}/conf/rules 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Barba group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /3d/batch.sh: -------------------------------------------------------------------------------- 1 | export OVERLAP=10 2 | export B_DOMAIN=3 3 | export D_DOMAIN=15 4 | rm stop 5 | while [ $OVERLAP -ge 8 ] 6 | do 7 | a=0.1 8 | export O=`echo $OVERLAP \* $a | bc` 9 | export B=$B_DOMAIN 10 | export D=`echo $D_DOMAIN \* $a | bc` 11 | echo "||-----------------------------------------------------------------" 12 | if [ $OVERLAP -eq 10 ]; then 13 | echo "|| overlap : $O B domain : $B D/B ratio : $D compile" 14 | else 15 | echo "|| overlap : $O B domain : $B D/B ratio : $D compile" 16 | fi 17 | echo "||-----------------------------------------------------------------" 18 | make 19 | # llsubmit ../bluegene.sh 20 | # qsub -V ../start.sh 21 | $PETSC_DIR/$PETSC_ARCH/bin/mpiexec -np 7 ./main $O $B $D -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall 22 | export D_DOMAIN=`expr $D_DOMAIN + 2` 23 | if [ $D_DOMAIN -gt 23 ]; then 24 | export D_DOMAIN=15 25 | export B_DOMAIN=`expr $B_DOMAIN + 1` 26 | fi 27 | if [ $B_DOMAIN -gt 7 ]; then 28 | export B_DOMAIN=3 29 | export OVERLAP=`expr $OVERLAP - 1` 30 | fi 31 | if [ -e "stop" ]; then 32 | echo "||-----------------------------------------------------------------" 33 | echo "|| canceled by user " 34 | echo "||-----------------------------------------------------------------" 35 | export OVERLAP=7 36 | fi 37 | done 38 | -------------------------------------------------------------------------------- /2d/batch.sh: -------------------------------------------------------------------------------- 1 | export OVERLAP=8 2 | export B_DOMAIN=3 3 | export D_DOMAIN=15 4 | rm stop 5 | while [ $OVERLAP -ge 8 ] 6 | do 7 | a=0.1 8 | export O=`echo $OVERLAP \* $a | bc` 9 | export B=$B_DOMAIN 10 | export D=`echo $D_DOMAIN \* $a | bc` 11 | echo "||-----------------------------------------------------------------" 12 | if [ $OVERLAP -eq 10 ]; then 13 | echo "|| overlap : $O B domain : $B D/B ratio : $D compile" 14 | else 15 | echo "|| overlap : $O B domain : $B D/B ratio : $D compile" 16 | fi 17 | echo "||-----------------------------------------------------------------" 18 | make gpuc 19 | # llsubmit ../bluegene.sh 20 | # qsub -V ../start.sh 21 | $PETSC_DIR/$PETSC_ARCH/bin/mpiexec -np 1 ./main $O $B $D -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-5 -ksp_max_it 100 -vecscatter_alltoall 22 | export D_DOMAIN=`expr $D_DOMAIN + 2` 23 | if [ $D_DOMAIN -gt 23 ]; then 24 | export D_DOMAIN=15 25 | export B_DOMAIN=`expr $B_DOMAIN + 1` 26 | fi 27 | if [ $B_DOMAIN -gt 7 ]; then 28 | export B_DOMAIN=3 29 | export OVERLAP=`expr $OVERLAP - 1` 30 | fi 31 | if [ -e "stop" ]; then 32 | echo "||-----------------------------------------------------------------" 33 | echo "|| canceled by user " 34 | echo "||-----------------------------------------------------------------" 35 | export OVERLAP=7 36 | fi 37 | done 38 | -------------------------------------------------------------------------------- /2d/Makefile: -------------------------------------------------------------------------------- 1 | NP=1 2 | LOCDIR=2d 3 | LIB = -L$(CUDA_INSTALL_PATH)/lib64 -L$(SDK_INSTALL_PATH)/lib -lcudart -lGL -lGLU -lcutil_x86_64 -lstdc++ -ldl -lm 4 | 5 | cpu: main.o rbf_interpolation.o vorticity_evaluation.o mpi_range.o matmult.o 6 | ${CLINKER} -o main $? ${PETSC_SNES_LIB} 7 | mpirun -n $(NP) ./main .9 5 1.9 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall -log_summary 8 | gpu: main.o rbf_interpolation.o vorticity_evaluation.o mpi_range.o matmultgpu.o gpumatmult.o 9 | ${CLINKER} -o main $? ${PETSC_SNES_LIB} $(LIB) 10 | mpirun -n $(NP) ./main .8 7 1.9 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall -log_summary 11 | 12 | main2: main2.o 13 | ${CLINKER} -o $@ $< ${PETSC_SNES_LIB} 14 | mpirun -n $(NP) ./main2 .9 5 1.9 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall 15 | $(RM) *.o 16 | cylinder: cylinder.o 17 | ${CLINKER} -o $@ $< ${PETSC_SNES_LIB} 18 | mpirun -n $(NP) ./cylinder .9 5 1.9 -pc_type asm -sub_pc_type lu -sub_mat_type dense -ksp_monitor -ksp_rtol 1e-13 -ksp_max_it 1000 -vecscatter_alltoall 19 | purge: 20 | $(RM) *.dat *.o main main2 cylinder 21 | save: 22 | make purge 23 | tar zcvf ../../rbf.tgz ../../rbf 24 | 25 | include ${PETSC_DIR}/conf/variables 26 | include ${PETSC_DIR}/conf/rules 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PetRBF 2 | Many applications in computational science need to approximate a function based on finite data. When the data are in 3 | a certain sense “scattered” in their domain, one very powerful technique is radial basis function (RBF) interpolation. 4 | For many years, the wide applicability of RBF interpolation was hindered by its numerical difficulty and expense. 5 | Indeed, in their mathematical expression, RBF methods produce an ill-conditioned linear system, for which a direct 6 | solution becomes prohibitive for more than a few thousand data points. 7 | 8 | We have developed a parallel algorithm for RBF interpolation that exhibits O(N) complexity, requires O(N) storage, 9 | and scales excellently up to a thousand processes. The algorithm uses the GMRES iterative solver with a restricted 10 | additive Schwarz method (RASM) as a preconditioner and a fast matrix-vector algorithm. Previous fast RBF methods 11 | — achieving at most O(N log N) complexity — were developed using multiquadric and polyharmonic basis functions. In 12 | contrast, the present method uses Gaussians with a small variance. The fast decay of the Gaussian basis function 13 | allows rapid convergence of the iterative solver even when the subdomains in the RASM are very small. The method was 14 | implemented in parallel using the PETSc library (developer version). Numerical experiments demonstrate its capability 15 | in problems of RBF interpolation with more than 50 million data points, timing at 106 seconds (19 iterations for an 16 | error tolerance of 10e−15) on 1024 processors of a Blue Gene/L (700 MHz PowerPC processors). 17 | 18 | See the paper [PetRBF--A parallel O(N) algorithm for radial basis function interpolation](http://arxiv.org/abs/0909.5413) by Rio Yokota, L A Barba, Matthew G Knepley, and visit [The Barba Group page](http://lorenabarba.com/) for more information. A [summary of this project](http://www.bu.edu/tech/support/research/visualization/gallery/petrbf/) is also available among the Boston University research computing briefs. 19 | 20 | > We distribute this code under the MIT License, giving potential users the greatest freedom possible. We do, however, request fellow scientists that if they use our codes in research, they kindly include us in the acknowledgement of their papers. We do not request gratuitous citations; only cite our articles if you deem it warranted. 21 | -------------------------------------------------------------------------------- /2d/par.h: -------------------------------------------------------------------------------- 1 | #ifndef par_h 2 | #define par_h 3 | 4 | #include 5 | 6 | struct PARAMETER{ 7 | int nt; 8 | int memory; 9 | double vis; 10 | double t; 11 | double dt; 12 | double u_inf; 13 | }; 14 | 15 | struct PARTICLE{ 16 | int n; 17 | int no; 18 | int ni; 19 | int nj; 20 | int nilocal; 21 | int njlocal; 22 | int ista; 23 | int iend; 24 | int jsta; 25 | int jend; 26 | double sigma; 27 | double sigma0; 28 | double overlap; 29 | double h; 30 | PetscReal xmin; 31 | PetscReal xmax; 32 | PetscReal ymin; 33 | PetscReal ymax; 34 | double r_grid; 35 | Vec i; 36 | Vec j; 37 | Vec ii; 38 | Vec jj; 39 | Vec xi; 40 | Vec yi; 41 | Vec gi; 42 | Vec wi; 43 | Vec xj; 44 | Vec yj; 45 | Vec gj; 46 | PetscScalar *il; 47 | PetscScalar *jl; 48 | PetscScalar *xil; 49 | PetscScalar *yil; 50 | PetscScalar *gil; 51 | PetscScalar *wil; 52 | PetscScalar *xjl; 53 | PetscScalar *yjl; 54 | PetscScalar *gjl; 55 | }; 56 | 57 | struct BOUNDARY{ 58 | int n; 59 | double r; 60 | double *x; 61 | double *y; 62 | double *g; 63 | double *ut; 64 | double *vt; 65 | double *vnx; 66 | double *vny; 67 | double *vtx; 68 | double *vty; 69 | }; 70 | 71 | struct CLUSTER{ 72 | int nsigma_box; 73 | int sigma_buffer; 74 | int sigma_trunc; 75 | int n; 76 | int nx; 77 | int ny; 78 | int neighbor_buffer; 79 | int neighbor_trunc; 80 | int neighbor_ghost; 81 | int niperbox; 82 | int njperbox; 83 | int nclocal; 84 | int ncghost; 85 | int nighost; 86 | int njghost; 87 | int npbufferi; 88 | int nptruncj; 89 | int maxbuffer; 90 | int maxtrunc; 91 | int maxghost; 92 | int maxlocal; 93 | int file; 94 | int icsta; 95 | int icend; 96 | int *ista; 97 | int *iend; 98 | int *jsta; 99 | int *jend; 100 | int *ix; 101 | int *iy; 102 | int *ilocal; 103 | int *jlocal; 104 | int *ighost; 105 | int *jghost; 106 | int *idx; 107 | int *jdx; 108 | double xmin; 109 | double xmax; 110 | double ymin; 111 | double ymax; 112 | double box_length; 113 | double buffer_length; 114 | double trunc_length; 115 | double *xc; 116 | double *yc; 117 | double *xib; 118 | double *yib; 119 | double *gib; 120 | double *wib; 121 | double *xjt; 122 | double *yjt; 123 | double *gjt; 124 | }; 125 | 126 | struct GRID{ 127 | int nx; 128 | int ny; 129 | }; 130 | 131 | struct HIERARCHICAL{ 132 | int mp; 133 | }; 134 | 135 | struct MPI2{ 136 | int nprocs; 137 | int myrank; 138 | int nsta; 139 | int nend; 140 | int ista; 141 | int iend; 142 | double *sendi; 143 | double *recvi; 144 | double *sendj; 145 | double *recvj; 146 | }; 147 | 148 | struct BOTH{ 149 | PARTICLE *p; 150 | CLUSTER *c; 151 | }; 152 | 153 | const double epsf=1e-6; 154 | 155 | #endif 156 | 157 | -------------------------------------------------------------------------------- /3d/par.h: -------------------------------------------------------------------------------- 1 | #ifndef par_h 2 | #define par_h 3 | 4 | #include 5 | 6 | struct PARAMETER{ 7 | int nt; 8 | int memory; 9 | double vis; 10 | double t; 11 | double dt; 12 | double u_inf; 13 | }; 14 | 15 | struct PARTICLE{ 16 | int n; 17 | int no; 18 | int ni; 19 | int nj; 20 | int nilocal; 21 | int njlocal; 22 | int ista; 23 | int iend; 24 | int jsta; 25 | int jend; 26 | double sigma; 27 | double sigma0; 28 | double overlap; 29 | double h; 30 | double xmin; 31 | double xmax; 32 | double ymin; 33 | double ymax; 34 | double zmin; 35 | double zmax; 36 | double r_grid; 37 | Vec i; 38 | Vec j; 39 | Vec ii; 40 | Vec jj; 41 | Vec xi; 42 | Vec yi; 43 | Vec zi; 44 | Vec gi; 45 | Vec wi; 46 | Vec xj; 47 | Vec yj; 48 | Vec zj; 49 | Vec gj; 50 | PetscScalar *il; 51 | PetscScalar *jl; 52 | PetscScalar *xil; 53 | PetscScalar *yil; 54 | PetscScalar *zil; 55 | PetscScalar *gil; 56 | PetscScalar *wil; 57 | PetscScalar *xjl; 58 | PetscScalar *yjl; 59 | PetscScalar *zjl; 60 | PetscScalar *gjl; 61 | }; 62 | 63 | struct BOUNDARY{ 64 | int n; 65 | double r; 66 | double *x; 67 | double *y; 68 | double *z; 69 | double *g; 70 | double *ut; 71 | double *vt; 72 | double *vnx; 73 | double *vny; 74 | double *vtx; 75 | double *vty; 76 | }; 77 | 78 | struct CLUSTER{ 79 | int nsigma_box; 80 | int sigma_buffer; 81 | int sigma_trunc; 82 | int n; 83 | int nx; 84 | int ny; 85 | int nz; 86 | int neighbor_buffer; 87 | int neighbor_trunc; 88 | int neighbor_ghost; 89 | int niperbox; 90 | int njperbox; 91 | int nclocal; 92 | int ncghost; 93 | int nighost; 94 | int njghost; 95 | int npbufferi; 96 | int nptruncj; 97 | int maxbuffer; 98 | int maxtrunc; 99 | int maxghost; 100 | int maxlocal; 101 | int file; 102 | int icsta; 103 | int icend; 104 | int *ista; 105 | int *iend; 106 | int *jsta; 107 | int *jend; 108 | int *ix; 109 | int *iy; 110 | int *iz; 111 | int *ilocal; 112 | int *jlocal; 113 | int *ighost; 114 | int *jghost; 115 | int *idx; 116 | int *jdx; 117 | double xmin; 118 | double xmax; 119 | double ymin; 120 | double ymax; 121 | double zmin; 122 | double zmax; 123 | double box_length; 124 | double buffer_length; 125 | double trunc_length; 126 | double *xc; 127 | double *yc; 128 | double *zc; 129 | double *xib; 130 | double *yib; 131 | double *zib; 132 | double *gib; 133 | double *wib; 134 | double *xjt; 135 | double *yjt; 136 | double *zjt; 137 | double *gjt; 138 | }; 139 | 140 | struct GRID{ 141 | int nx; 142 | int ny; 143 | int nz; 144 | }; 145 | 146 | struct HIERARCHICAL{ 147 | int mp; 148 | }; 149 | 150 | struct MPI2{ 151 | int nprocs; 152 | int myrank; 153 | int nsta; 154 | int nend; 155 | int ista; 156 | int iend; 157 | double *sendi; 158 | double *recvi; 159 | double *sendj; 160 | double *recvj; 161 | }; 162 | 163 | struct BOTH{ 164 | PARTICLE *p; 165 | CLUSTER *c; 166 | }; 167 | 168 | const double epsf=1e-6; 169 | 170 | #endif 171 | -------------------------------------------------------------------------------- /2d/get_trunc.h: -------------------------------------------------------------------------------- 1 | #ifndef get_trunc_h 2 | #define get_trunc_h 3 | 4 | class Get_trunc 5 | { 6 | int i,il,ista,iend,ix,iy,j,jc,jsta,jend,jx,jy,jx_min,jx_max,jy_min,jy_max; 7 | double xc,yc,xi,yi,wi,xj,yj,gj; 8 | public: 9 | void get_trunc(PARTICLE *particle, CLUSTER *cluster, int ic) 10 | { 11 | MPI2 mpi; 12 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 13 | 14 | cluster->trunc_length = cluster->sigma_trunc*particle->sigma/2+epsf; 15 | 16 | ista = cluster->ista[ic]; 17 | iend = cluster->iend[ic]; 18 | if (ista <= iend) { 19 | xc = cluster->xc[ic]; 20 | yc = cluster->yc[ic]; 21 | ix = cluster->ix[ic]; 22 | iy = cluster->iy[ic]; 23 | jx_min = std::max(0,ix-cluster->neighbor_trunc); 24 | jx_max = std::min(cluster->nx-1,ix+cluster->neighbor_trunc); 25 | jy_min = std::max(0,iy-cluster->neighbor_trunc); 26 | jy_max = std::min(cluster->ny-1,iy+cluster->neighbor_trunc); 27 | 28 | /* 29 | put all particles in the center box into the corresponding cell structure 30 | */ 31 | jsta = cluster->jsta[ic]; 32 | jend = cluster->jend[ic]; 33 | i = -1; 34 | for (j=jsta; j<=jend; j++) { 35 | i++; 36 | cluster->xjt[i] = particle->xjl[j]; 37 | cluster->yjt[i] = particle->yjl[j]; 38 | cluster->gjt[i] = particle->gjl[j]; 39 | } 40 | 41 | /* 42 | loop through all neighbors 43 | */ 44 | for (jx=jx_min; jx<=jx_max; jx++) { 45 | for (jy=jy_min; jy<=jy_max; jy++) { 46 | if (ix != jx || iy != jy) { 47 | jc = jx*cluster->ny+jy; 48 | jsta = cluster->jsta[jc]; 49 | jend = cluster->jend[jc]; 50 | 51 | /* 52 | select from the particles in the neighbor boxes, the ones that belong in the truncated zone 53 | */ 54 | if (jsta <= jend) { 55 | for (j=jsta; j<=jend; j++) { 56 | xj = particle->xjl[j]; 57 | yj = particle->yjl[j]; 58 | gj = particle->gjl[j]; 59 | 60 | /* 61 | add all particles in the neighbor boxes into the corresponding cell structure 62 | */ 63 | if (fabs(xj-xc) < cluster->trunc_length && fabs(yj-yc) < cluster->trunc_length) { 64 | i++; 65 | cluster->xjt[i] = xj; 66 | cluster->yjt[i] = yj; 67 | cluster->gjt[i] = gj; 68 | } 69 | } 70 | } 71 | } 72 | } 73 | } 74 | cluster->nptruncj = i+1; 75 | } 76 | else { 77 | cluster->nptruncj = 0; 78 | } 79 | if (cluster->file == 1) { 80 | std::ofstream fid; 81 | fid.open("trunc.dat", std::ios::app); 82 | fid << cluster->nptruncj << " "; 83 | for (i=0; inptruncj; i++) fid << cluster->xjt[i] << " "; 84 | for (i=0; inptruncj; i++) fid << cluster->yjt[i] << " "; 85 | fid.close(); 86 | } 87 | } 88 | }; 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /2d/get_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef get_buffer_h 2 | #define get_buffer_h 3 | 4 | class Get_buffer 5 | { 6 | int i,ista,iend,ix,iy,il,j,jc,jsta,jend,jx,jy,jx_min,jx_max,jy_min,jy_max; 7 | double xc,yc,xi,yi,gi,wi,xj,yj,gj; 8 | public: 9 | void get_buffer(PARTICLE *particle, CLUSTER *cluster, int ic) 10 | { 11 | cluster->buffer_length = cluster->sigma_buffer*particle->sigma/2+epsf; 12 | 13 | /* 14 | loop through all clusters 15 | */ 16 | ista = cluster->ista[ic]; 17 | iend = cluster->iend[ic]; 18 | if (ista <= iend) { 19 | xc = cluster->xc[ic]; 20 | yc = cluster->yc[ic]; 21 | ix = cluster->ix[ic]; 22 | iy = cluster->iy[ic]; 23 | jx_min = std::max(0,ix-cluster->neighbor_buffer); 24 | jx_max = std::min(cluster->nx-1,ix+cluster->neighbor_buffer); 25 | jy_min = std::max(0,iy-cluster->neighbor_buffer); 26 | jy_max = std::min(cluster->ny-1,iy+cluster->neighbor_buffer); 27 | 28 | /* 29 | put all particles in the center box into the corresponding cell structure 30 | */ 31 | i = -1; 32 | for (j=ista; j<=iend; j++) { 33 | i++; 34 | cluster->xib[i] = particle->xil[j]; 35 | cluster->yib[i] = particle->yil[j]; 36 | cluster->gib[i] = particle->gil[j]; 37 | cluster->wib[i] = particle->wil[j]; 38 | cluster->idx[i] = cluster->ilocal[j]; 39 | } 40 | 41 | /* 42 | loop through all neighbors 43 | */ 44 | for (jx=jx_min; jx<=jx_max; jx++) { 45 | for (jy=jy_min; jy<=jy_max; jy++) { 46 | if (ix != jx || iy != jy) { 47 | jc = jx*cluster->ny+jy; 48 | jsta = cluster->ista[jc]; 49 | jend = cluster->iend[jc]; 50 | 51 | /* 52 | select from the particles in the neighbor boxes, the ones that belong in the buffer zone 53 | */ 54 | if (jsta <= jend) { 55 | for (j=jsta; j<=jend; j++) { 56 | xi = particle->xil[j]; 57 | yi = particle->yil[j]; 58 | gi = particle->gil[j]; 59 | wi = particle->wil[j]; 60 | 61 | /* 62 | add all particles in the neighbor boxes into the corresponding cell structure 63 | */ 64 | if (fabs(xi-xc) < cluster->buffer_length && fabs(yi-yc) < cluster->buffer_length) { 65 | i++; 66 | cluster->xib[i] = xi; 67 | cluster->yib[i] = yi; 68 | cluster->gib[i] = gi; 69 | cluster->wib[i] = wi; 70 | cluster->idx[i] = cluster->ilocal[j]; 71 | } 72 | } 73 | } 74 | } 75 | } 76 | } 77 | cluster->npbufferi = i+1; 78 | } 79 | else { 80 | cluster->npbufferi = 0; 81 | } 82 | if (cluster->file == 1) { 83 | std::ofstream fid; 84 | fid.open("buffer.dat", std::ios::app); 85 | fid << cluster->npbufferi << " "; 86 | for (i=0; inpbufferi; i++) fid << cluster->xib[i] << " "; 87 | for (i=0; inpbufferi; i++) fid << cluster->yib[i] << " "; 88 | fid.close(); 89 | } 90 | } 91 | }; 92 | 93 | #endif 94 | 95 | -------------------------------------------------------------------------------- /3d/get_trunc.h: -------------------------------------------------------------------------------- 1 | #ifndef get_trunc_h 2 | #define get_trunc_h 3 | 4 | class Get_trunc 5 | { 6 | int i,il,ista,iend,ix,iy,iz,j,jc,jsta,jend,jx,jy,jz,jx_min,jx_max,jy_min,jy_max,jz_min,jz_max; 7 | double xc,yc,zc,xi,yi,zi,wi,xj,yj,zj,gj; 8 | public: 9 | void get_trunc(PARTICLE *particle, CLUSTER *cluster, int ic) 10 | { 11 | MPI2 mpi; 12 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 13 | 14 | cluster->trunc_length = cluster->sigma_trunc*particle->sigma/2+epsf; 15 | 16 | ista = cluster->ista[ic]; 17 | iend = cluster->iend[ic]; 18 | if (ista <= iend) { 19 | xc = cluster->xc[ic]; 20 | yc = cluster->yc[ic]; 21 | zc = cluster->zc[ic]; 22 | ix = cluster->ix[ic]; 23 | iy = cluster->iy[ic]; 24 | iz = cluster->iz[ic]; 25 | jx_min = std::max(0,ix-cluster->neighbor_trunc); 26 | jx_max = std::min(cluster->nx-1,ix+cluster->neighbor_trunc); 27 | jy_min = std::max(0,iy-cluster->neighbor_trunc); 28 | jy_max = std::min(cluster->ny-1,iy+cluster->neighbor_trunc); 29 | jz_min = std::max(0,iz-cluster->neighbor_trunc); 30 | jz_max = std::min(cluster->nz-1,iz+cluster->neighbor_trunc); 31 | 32 | /* 33 | put all particles in the center box into the corresponding cell structure 34 | */ 35 | jsta = cluster->jsta[ic]; 36 | jend = cluster->jend[ic]; 37 | i = -1; 38 | for (j=jsta; j<=jend; j++) { 39 | i++; 40 | cluster->xjt[i] = particle->xjl[j]; 41 | cluster->yjt[i] = particle->yjl[j]; 42 | cluster->zjt[i] = particle->zjl[j]; 43 | cluster->gjt[i] = particle->gjl[j]; 44 | } 45 | 46 | /* 47 | loop through all neighbors 48 | */ 49 | for (jx=jx_min; jx<=jx_max; jx++) { 50 | for (jy=jy_min; jy<=jy_max; jy++) { 51 | for (jz=jz_min; jz<=jz_max; jz++) { 52 | if (ix != jx || iy != jy || iz != jz) { 53 | jc = jz*cluster->nx*cluster->ny+jx*cluster->ny+jy; 54 | jsta = cluster->jsta[jc]; 55 | jend = cluster->jend[jc]; 56 | 57 | /* 58 | select from the particles in the neighbor boxes, the ones that belong in the truncated zone 59 | */ 60 | if (jsta <= jend) { 61 | for (j=jsta; j<=jend; j++) { 62 | xj = particle->xjl[j]; 63 | yj = particle->yjl[j]; 64 | zj = particle->zjl[j]; 65 | gj = particle->gjl[j]; 66 | 67 | /* 68 | add all particles in the neighbor boxes into the corresponding cell structure 69 | */ 70 | if (fabs(xj-xc) < cluster->trunc_length && 71 | fabs(yj-yc) < cluster->trunc_length && 72 | fabs(zj-zc) < cluster->trunc_length) { 73 | i++; 74 | cluster->xjt[i] = xj; 75 | cluster->yjt[i] = yj; 76 | cluster->zjt[i] = zj; 77 | cluster->gjt[i] = gj; 78 | } 79 | } 80 | } 81 | } 82 | } 83 | } 84 | } 85 | cluster->nptruncj = i+1; 86 | } 87 | else { 88 | cluster->nptruncj = 0; 89 | } 90 | if (cluster->file == 1) { 91 | std::ofstream fid; 92 | fid.open("trunc.dat", std::ios::app); 93 | fid << cluster->nptruncj << " "; 94 | for (i=0; inptruncj; i++) fid << cluster->xjt[i] << " "; 95 | for (i=0; inptruncj; i++) fid << cluster->yjt[i] << " "; 96 | for (i=0; inptruncj; i++) fid << cluster->zjt[i] << " "; 97 | fid.close(); 98 | } 99 | } 100 | }; 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /3d/get_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef get_buffer_h 2 | #define get_buffer_h 3 | 4 | class Get_buffer 5 | { 6 | int i,ista,iend,ix,iy,iz,il,j,jc,jsta,jend,jx,jy,jz,jx_min,jx_max,jy_min,jy_max,jz_min,jz_max; 7 | double xc,yc,zc,xi,yi,zi,gi,wi,xj,yj,zj,gj; 8 | public: 9 | void get_buffer(PARTICLE *particle, CLUSTER *cluster, int ic) 10 | { 11 | cluster->buffer_length = cluster->sigma_buffer*particle->sigma/2+epsf; 12 | 13 | /* 14 | loop through all clusters 15 | */ 16 | ista = cluster->ista[ic]; 17 | iend = cluster->iend[ic]; 18 | if (ista <= iend) { 19 | xc = cluster->xc[ic]; 20 | yc = cluster->yc[ic]; 21 | zc = cluster->zc[ic]; 22 | ix = cluster->ix[ic]; 23 | iy = cluster->iy[ic]; 24 | iz = cluster->iz[ic]; 25 | jx_min = std::max(0,ix-cluster->neighbor_buffer); 26 | jx_max = std::min(cluster->nx-1,ix+cluster->neighbor_buffer); 27 | jy_min = std::max(0,iy-cluster->neighbor_buffer); 28 | jy_max = std::min(cluster->ny-1,iy+cluster->neighbor_buffer); 29 | jz_min = std::max(0,iz-cluster->neighbor_buffer); 30 | jz_max = std::min(cluster->nz-1,iz+cluster->neighbor_buffer); 31 | 32 | /* 33 | put all particles in the center box into the corresponding cell structure 34 | */ 35 | i = -1; 36 | for (j=ista; j<=iend; j++) { 37 | i++; 38 | cluster->xib[i] = particle->xil[j]; 39 | cluster->yib[i] = particle->yil[j]; 40 | cluster->zib[i] = particle->zil[j]; 41 | cluster->gib[i] = particle->gil[j]; 42 | cluster->wib[i] = particle->wil[j]; 43 | cluster->idx[i] = cluster->ilocal[j]; 44 | } 45 | 46 | /* 47 | loop through all neighbors 48 | */ 49 | for (jx=jx_min; jx<=jx_max; jx++) { 50 | for (jy=jy_min; jy<=jy_max; jy++) { 51 | for (jz=jz_min; jz<=jz_max; jz++) { 52 | if (ix != jx || iy != jy || iz != jz) { 53 | jc = jz*cluster->nx*cluster->ny+jx*cluster->ny+jy; 54 | jsta = cluster->ista[jc]; 55 | jend = cluster->iend[jc]; 56 | 57 | /* 58 | select from the particles in the neighbor boxes, the ones that belong in the buffer zone 59 | */ 60 | if (jsta <= jend) { 61 | for (j=jsta; j<=jend; j++) { 62 | xi = particle->xil[j]; 63 | yi = particle->yil[j]; 64 | zi = particle->zil[j]; 65 | gi = particle->gil[j]; 66 | wi = particle->wil[j]; 67 | 68 | /* 69 | add all particles in the neighbor boxes into the corresponding cell structure 70 | */ 71 | if (fabs(xi-xc) < cluster->buffer_length && 72 | fabs(yi-yc) < cluster->buffer_length && 73 | fabs(zi-zc) < cluster->buffer_length) { 74 | i++; 75 | cluster->xib[i] = xi; 76 | cluster->yib[i] = yi; 77 | cluster->zib[i] = zi; 78 | cluster->gib[i] = gi; 79 | cluster->wib[i] = wi; 80 | cluster->idx[i] = cluster->ilocal[j]; 81 | } 82 | } 83 | } 84 | } 85 | } 86 | } 87 | } 88 | cluster->npbufferi = i+1; 89 | } 90 | else { 91 | cluster->npbufferi = 0; 92 | } 93 | if (cluster->file == 1) { 94 | std::ofstream fid; 95 | fid.open("buffer.dat", std::ios::app); 96 | fid << cluster->npbufferi << " "; 97 | for (i=0; inpbufferi; i++) fid << cluster->xib[i] << " "; 98 | for (i=0; inpbufferi; i++) fid << cluster->yib[i] << " "; 99 | for (i=0; inpbufferi; i++) fid << cluster->zib[i] << " "; 100 | fid.close(); 101 | } 102 | } 103 | }; 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /3d/wrapper.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "par.h" 12 | #include "get_cluster.h" 13 | #include "get_buffer.h" 14 | #include "get_trunc.h" 15 | #include "get_vorticity.h" 16 | 17 | extern PetscErrorCode vorticity_evaluation(Vec,Vec,Vec,Vec,Vec,Vec,Vec,Vec,double,int,int,int); 18 | extern PetscErrorCode rbf_interpolation(Vec,Vec,Vec,Vec,Vec,double,int,int,int,int*); 19 | 20 | int RBFInterpolation(int argc, char **argv, 21 | int ng, float *xg, float *yg, float *zg, float *wg, 22 | int np, float *xp, float *yp, float *zp, float *gp, 23 | float sigma, int nsigma_box, int sigma_buffer, int sigma_trunc) 24 | { 25 | int i,its,ni,nj,ista,iend,jsta,jend; 26 | 27 | MPI2 mpi; 28 | 29 | PetscErrorCode ierr; 30 | PetscScalar *xid,*yid,*zid,*wid,*gid,*xjd,*yjd,*zjd,*gjd; 31 | Vec xi,yi,zi,wi,gi,xj,yj,zj,gj; 32 | 33 | PetscInitialize(&argc,&argv,PETSC_NULL,PETSC_NULL); 34 | MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs); 35 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 36 | 37 | /* 38 | calculate problem size 39 | */ 40 | ni = ng; 41 | nj = np; 42 | 43 | /* 44 | generate particles 45 | */ 46 | ierr = VecCreate(PETSC_COMM_WORLD,&xi);CHKERRQ(ierr); 47 | ierr = VecSetSizes(xi,PETSC_DECIDE,ni);CHKERRQ(ierr); 48 | ierr = VecSetFromOptions(xi);CHKERRQ(ierr); 49 | ierr = VecDuplicate(xi,&yi);CHKERRQ(ierr); 50 | ierr = VecDuplicate(xi,&zi);CHKERRQ(ierr); 51 | ierr = VecDuplicate(xi,&gi);CHKERRQ(ierr); 52 | ierr = VecDuplicate(xi,&wi);CHKERRQ(ierr); 53 | ierr = VecGetOwnershipRange(xi,&ista,&iend);CHKERRQ(ierr); 54 | ierr = VecCreate(PETSC_COMM_WORLD,&xj);CHKERRQ(ierr); 55 | ierr = VecSetSizes(xj,PETSC_DECIDE,nj);CHKERRQ(ierr); 56 | ierr = VecSetFromOptions(xj);CHKERRQ(ierr); 57 | ierr = VecDuplicate(xj,&yj);CHKERRQ(ierr); 58 | ierr = VecDuplicate(xj,&zj);CHKERRQ(ierr); 59 | ierr = VecDuplicate(xj,&gj);CHKERRQ(ierr); 60 | ierr = VecGetOwnershipRange(xj,&jsta,&jend);CHKERRQ(ierr); 61 | ierr = VecGetArray(xi,&xid);CHKERRQ(ierr); 62 | ierr = VecGetArray(yi,&yid);CHKERRQ(ierr); 63 | ierr = VecGetArray(zi,&zid);CHKERRQ(ierr); 64 | ierr = VecGetArray(wi,&wid);CHKERRQ(ierr); 65 | ierr = VecGetArray(gi,&gid);CHKERRQ(ierr); 66 | ierr = VecGetArray(xj,&xjd);CHKERRQ(ierr); 67 | ierr = VecGetArray(yj,&yjd);CHKERRQ(ierr); 68 | ierr = VecGetArray(zj,&zjd);CHKERRQ(ierr); 69 | ierr = VecGetArray(gj,&gjd);CHKERRQ(ierr); 70 | for(i=ista; i epsf ) { 94 | vorticity_evaluation(xi,yi,zi,wi,xj,yj,zj,gj,sigma,nsigma_box,sigma_buffer,sigma_trunc); 95 | } 96 | rbf_interpolation(xi,yi,zi,gi,wi,sigma,nsigma_box,sigma_buffer,sigma_trunc,&its); 97 | 98 | ierr = VecGetArray(gi,&gid);CHKERRQ(ierr); 99 | for(i=ista; i 2 | #include "par.h" 3 | #include "get_buffer.h" 4 | #include "get_trunc.h" 5 | 6 | PetscErrorCode mymatmult(Mat A,Vec x,Vec y) 7 | { 8 | int i,j,ic,il,ista,iend; 9 | double dx,dy,w; 10 | PetscScalar *ax,*ay; 11 | PetscErrorCode ierr; 12 | BOTH *both; 13 | ierr = MatShellGetContext(A, (void **) &both);CHKERRQ(ierr); 14 | PARTICLE *particle = both->p; 15 | CLUSTER *cluster = both->c; 16 | 17 | PetscFunctionBegin; 18 | ierr = VecGetArray(x,&ax);CHKERRQ(ierr); 19 | ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 20 | for(i=particle->ista; iiend; i++) { 21 | ierr = VecSetValues(particle->gi,1,&i,&ax[i-particle->ista],INSERT_VALUES);CHKERRQ(ierr); 22 | } 23 | ierr = VecAssemblyBegin(particle->gi);CHKERRQ(ierr); 24 | ierr = VecAssemblyEnd(particle->gi);CHKERRQ(ierr); 25 | ierr = VecGhostUpdateBegin(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 26 | ierr = VecGhostUpdateEnd(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 27 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 28 | for (ic=cluster->icsta; icicend; ic++) { 29 | Get_trunc trunc; 30 | trunc.get_trunc(particle,cluster,ic); 31 | ista = cluster->ista[ic]; 32 | iend = cluster->iend[ic]; 33 | for (i=ista; i<=iend; i++) { 34 | il = cluster->ilocal[i]; 35 | w = 0; 36 | for (j=0; jnptruncj; j++) { 37 | dx = particle->xil[i]-cluster->xjt[j]; 38 | dy = particle->yil[i]-cluster->yjt[j]; 39 | w += cluster->gjt[j]*exp(-(dx*dx+dy*dy)/(2*particle->sigma*particle->sigma))/ 40 | (2*M_PI*particle->sigma*particle->sigma); 41 | } 42 | ay[il-particle->ista] = w; 43 | } 44 | /* Counted 1 for exp() */ 45 | ierr = PetscLogFlops((iend-ista)*cluster->nptruncj*15);CHKERRQ(ierr); 46 | } 47 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 48 | ierr = VecRestoreArray(x,&ax);CHKERRQ(ierr); 49 | ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 50 | PetscFunctionReturn(0); 51 | } 52 | 53 | PetscErrorCode mysubmat(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 54 | { 55 | int i,ic,id,j,ista,iend; 56 | double dx,dy; 57 | PetscInt *idx; 58 | PetscScalar *A; 59 | PetscErrorCode ierr; 60 | BOTH *both; 61 | ierr = MatShellGetContext(mat, (void **) &both);CHKERRQ(ierr); 62 | PARTICLE *particle = both->p; 63 | CLUSTER *cluster = both->c; 64 | 65 | idx = new PetscInt [cluster->maxbuffer]; 66 | A = new PetscScalar [cluster->maxbuffer*cluster->maxbuffer]; 67 | 68 | PetscFunctionBegin; 69 | ierr = PetscMalloc(n * sizeof(Mat*), submat);CHKERRQ(ierr); 70 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 71 | for(ic = cluster->icsta; ic < cluster->icend; ic++) { 72 | id = ic-cluster->icsta; 73 | Get_buffer buffer; 74 | buffer.get_buffer(particle,cluster,ic); 75 | ierr = MatCreate(PETSC_COMM_SELF,&(*submat)[id]);CHKERRQ(ierr); 76 | ierr = MatSetOptionsPrefix((*submat)[id], "sub_");CHKERRQ(ierr); 77 | ierr = MatSetSizes((*submat)[id],cluster->npbufferi,cluster->npbufferi,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr 78 | ); 79 | ierr = MatSetFromOptions((*submat)[id]);CHKERRQ(ierr); 80 | ierr = MatSeqAIJSetPreallocation((*submat)[id],cluster->npbufferi,PETSC_NULL);CHKERRQ(ierr); 81 | ista = cluster->ista[ic]; 82 | iend = cluster->iend[ic]; 83 | if (ista <= iend) { 84 | for (i=0; inpbufferi; i++) { 85 | for (j=0; jnpbufferi; j++) { 86 | dx = cluster->xib[i]-cluster->xib[j]; 87 | dy = cluster->yib[i]-cluster->yib[j]; 88 | A[i*cluster->npbufferi+j] = exp(-(dx*dx+dy*dy)/(2*particle->sigma*particle->sigma))/ 89 | (2*M_PI*particle->sigma*particle->sigma); 90 | } 91 | idx[i] = i; 92 | } 93 | } 94 | ierr = MatSetUp((*submat)[id]);CHKERRQ(ierr); 95 | ierr = MatSetValues((*submat)[id],cluster->npbufferi,idx,cluster->npbufferi,idx,A,INSERT_VALUES);CHKERRQ(ierr); 96 | ierr = MatAssemblyBegin((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 97 | ierr = MatAssemblyEnd((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 98 | } 99 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 100 | delete[] A; 101 | delete[] idx; 102 | 103 | PetscFunctionReturn(0); 104 | } 105 | -------------------------------------------------------------------------------- /3d/matmult.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include "par.h" 3 | #include "get_buffer.h" 4 | #include "get_trunc.h" 5 | 6 | PetscErrorCode mymatmult(Mat A,Vec x,Vec y) 7 | { 8 | int i,j,ic,il,ista,iend; 9 | double dx,dy,dz,w; 10 | PetscScalar *ax,*ay; 11 | PetscErrorCode ierr; 12 | BOTH *both; 13 | ierr = MatShellGetContext(A, (void **) &both);CHKERRQ(ierr); 14 | PARTICLE *particle = both->p; 15 | CLUSTER *cluster = both->c; 16 | 17 | PetscFunctionBegin; 18 | ierr = VecGetArray(x,&ax);CHKERRQ(ierr); 19 | ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 20 | for(i=particle->ista; iiend; i++) { 21 | ierr = VecSetValues(particle->gi,1,&i,&ax[i-particle->ista],INSERT_VALUES);CHKERRQ(ierr); 22 | } 23 | ierr = VecAssemblyBegin(particle->gi);CHKERRQ(ierr); 24 | ierr = VecAssemblyEnd(particle->gi);CHKERRQ(ierr); 25 | ierr = VecGhostUpdateBegin(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 26 | ierr = VecGhostUpdateEnd(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 27 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 28 | for (ic=cluster->icsta; icicend; ic++) { 29 | Get_trunc trunc; 30 | trunc.get_trunc(particle,cluster,ic); 31 | ista = cluster->ista[ic]; 32 | iend = cluster->iend[ic]; 33 | for (i=ista; i<=iend; i++) { 34 | il = cluster->ilocal[i]; 35 | w = 0; 36 | for (j=0; jnptruncj; j++) { 37 | dx = particle->xil[i]-cluster->xjt[j]; 38 | dy = particle->yil[i]-cluster->yjt[j]; 39 | dz = particle->zil[i]-cluster->zjt[j]; 40 | w += cluster->gjt[j]*exp(-(dx*dx+dy*dy+dz*dz)/(2*particle->sigma*particle->sigma))/ 41 | (2*M_PI*particle->sigma*particle->sigma); 42 | } 43 | ay[il-particle->ista] = w; 44 | } 45 | /* Counted 1 for exp() */ 46 | ierr = PetscLogFlops((iend-ista)*cluster->nptruncj*15);CHKERRQ(ierr); 47 | } 48 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 49 | ierr = VecRestoreArray(x,&ax);CHKERRQ(ierr); 50 | ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 51 | PetscFunctionReturn(0); 52 | } 53 | 54 | PetscErrorCode mysubmat(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 55 | { 56 | int i,ic,id,j,ista,iend; 57 | double dx,dy,dz; 58 | PetscInt *idx; 59 | PetscScalar *A; 60 | PetscErrorCode ierr; 61 | BOTH *both; 62 | ierr = MatShellGetContext(mat, (void **) &both);CHKERRQ(ierr); 63 | PARTICLE *particle = both->p; 64 | CLUSTER *cluster = both->c; 65 | 66 | idx = new PetscInt [cluster->maxbuffer]; 67 | A = new PetscScalar [cluster->maxbuffer*cluster->maxbuffer]; 68 | 69 | PetscFunctionBegin; 70 | ierr = PetscMalloc(n * sizeof(Mat*), submat);CHKERRQ(ierr); 71 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 72 | for(ic = cluster->icsta; ic < cluster->icend; ic++) { 73 | id = ic-cluster->icsta; 74 | Get_buffer buffer; 75 | buffer.get_buffer(particle,cluster,ic); 76 | ierr = MatCreate(PETSC_COMM_SELF,&(*submat)[id]);CHKERRQ(ierr); 77 | ierr = MatSetOptionsPrefix((*submat)[id], "sub_");CHKERRQ(ierr); 78 | ierr = MatSetSizes((*submat)[id],cluster->npbufferi,cluster->npbufferi,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr 79 | ); 80 | ierr = MatSetFromOptions((*submat)[id]);CHKERRQ(ierr); 81 | ierr = MatSeqAIJSetPreallocation((*submat)[id],cluster->npbufferi,PETSC_NULL);CHKERRQ(ierr); 82 | ista = cluster->ista[ic]; 83 | iend = cluster->iend[ic]; 84 | if (ista <= iend) { 85 | for (i=0; inpbufferi; i++) { 86 | for (j=0; jnpbufferi; j++) { 87 | dx = cluster->xib[i]-cluster->xib[j]; 88 | dy = cluster->yib[i]-cluster->yib[j]; 89 | dz = cluster->zib[i]-cluster->zib[j]; 90 | A[i*cluster->npbufferi+j] = exp(-(dx*dx+dy*dy+dz*dz)/(2*particle->sigma*particle->sigma))/ 91 | (2*M_PI*particle->sigma*particle->sigma); 92 | } 93 | idx[i] = i; 94 | } 95 | } 96 | ierr = MatSetUp((*submat)[id]);CHKERRQ(ierr); 97 | ierr = MatSetValues((*submat)[id],cluster->npbufferi,idx,cluster->npbufferi,idx,A,INSERT_VALUES);CHKERRQ(ierr); 98 | ierr = MatAssemblyBegin((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 99 | ierr = MatAssemblyEnd((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 100 | } 101 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 102 | delete[] A; 103 | delete[] idx; 104 | 105 | PetscFunctionReturn(0); 106 | } 107 | -------------------------------------------------------------------------------- /2d/cylinder.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "par.h" 12 | #include "mpi_range.h" 13 | #include "get_cluster.h" 14 | #include "get_buffer.h" 15 | #include "get_trunc.h" 16 | #include "get_vorticity.h" 17 | #include "matmult.h" 18 | 19 | #include "vorticity_evaluation.cxx" 20 | #include "rbf_interpolation.cxx" 21 | 22 | int main(int argc,char **argv) 23 | { 24 | int i,its,nsigma_box,sigma_buffer,sigma_trunc,ni,nj,ista,iend,nlocal; 25 | double sigma,overlap,h,*xd,*yd,*gd,*ed,*wd,t,err,errd; 26 | clock_t tic,toc; 27 | tic = std::clock(); 28 | 29 | std::ifstream fid; 30 | std::ofstream fid0,fid1; 31 | MPI2 mpi; 32 | 33 | PetscErrorCode ierr; 34 | Vec x,y,g,e,w; 35 | 36 | PetscInitialize(&argc,&argv,PETSC_NULL,PETSC_NULL); 37 | MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs); 38 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 39 | 40 | /* 41 | particle parameters 42 | */ 43 | sigma = 0.007; 44 | overlap = atof(argv[1]); 45 | h = overlap*sigma; 46 | 47 | /* 48 | cluster parameters 49 | */ 50 | nsigma_box = atoi(argv[2]); 51 | sigma_buffer = (int)nsigma_box*atof(argv[3]); 52 | if (overlap < 0.8+epsf) { 53 | sigma_trunc = nsigma_box+6; 54 | } else { 55 | sigma_trunc = nsigma_box+4; 56 | } 57 | 58 | /* 59 | calculate problem size 60 | */ 61 | ni = 5346; 62 | if(mpi.myrank==0) { 63 | printf("||---------------------------------------\n"); 64 | printf("|| number of particles : %d \n",ni); 65 | printf("|| std of Gaussian (sigma) : %f \n",sigma); 66 | printf("|| overlap ratio (h/sigma) : %f \n",overlap); 67 | printf("|| non-overlapping subdomain : %d sigma\n",nsigma_box); 68 | printf("|| overlapping subdomain : %d sigma\n",(int)fmin(sigma_buffer,floor(2/sigma))); 69 | printf("|| entire domain : %d sigma\n",(int)floor(2/sigma)); 70 | printf("||---------------------------------------\n"); 71 | } 72 | nj = ni; 73 | 74 | /* 75 | generate particles 76 | */ 77 | xd = new double [ni]; 78 | yd = new double [ni]; 79 | gd = new double [ni]; 80 | ed = new double [ni]; 81 | wd = new double [ni]; 82 | fid.open("cdata"); 83 | for (i=0; i> xd[i]; 85 | fid >> yd[i]; 86 | fid >> gd[i]; 87 | fid >> ed[i]; 88 | wd[i] = ed[i]; 89 | } 90 | fid.close(); 91 | ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); 92 | ierr = VecSetSizes(x,PETSC_DECIDE,ni);CHKERRQ(ierr); 93 | ierr = VecSetFromOptions(x);CHKERRQ(ierr); 94 | ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 95 | ierr = VecDuplicate(x,&g);CHKERRQ(ierr); 96 | ierr = VecDuplicate(x,&e);CHKERRQ(ierr); 97 | ierr = VecDuplicate(x,&w);CHKERRQ(ierr); 98 | ierr = VecGetOwnershipRange(x,&ista,&iend);CHKERRQ(ierr); 99 | nlocal = iend-ista; 100 | for(i=ista; ideviceOffsetSize) { 78 | if(deviceOffsetSize!=0) cudaFree(deviceOffset); 79 | cudaMalloc((void**)&deviceOffset,hostOffsetSize); 80 | deviceOffsetSize=hostOffsetSize; 81 | } 82 | if (hostTargetSize>deviceTargetSize) { 83 | if(deviceTargetSize!=0) { 84 | cudaFree(deviceTargetX); 85 | cudaFree(deviceTargetY); 86 | cudaFree(deviceTargetW); 87 | } 88 | cudaMalloc((void**)&deviceTargetX,hostTargetSize); 89 | cudaMalloc((void**)&deviceTargetY,hostTargetSize); 90 | cudaMalloc((void**)&deviceTargetW,hostTargetSize); 91 | deviceTargetSize=hostTargetSize; 92 | } 93 | if (hostSourceSize>deviceSourceSize) { 94 | if(deviceSourceSize!=0) { 95 | cudaFree(deviceSourceX); 96 | cudaFree(deviceSourceY); 97 | cudaFree(deviceSourceG); 98 | } 99 | cudaMalloc((void**)&deviceSourceX,hostSourceSize); 100 | cudaMalloc((void**)&deviceSourceY,hostSourceSize); 101 | cudaMalloc((void**)&deviceSourceG,hostSourceSize); 102 | deviceSourceSize=hostSourceSize; 103 | } 104 | 105 | cudaMemcpy(deviceOffset,hostOffset,hostOffsetSize,cudaMemcpyHostToDevice); 106 | cudaMemcpy(deviceTargetX,hostTargetX,hostTargetSize,cudaMemcpyHostToDevice); 107 | cudaMemcpy(deviceTargetY,hostTargetY,hostTargetSize,cudaMemcpyHostToDevice); 108 | cudaMemcpy(deviceSourceX,hostSourceX,hostSourceSize,cudaMemcpyHostToDevice); 109 | cudaMemcpy(deviceSourceY,hostSourceY,hostSourceSize,cudaMemcpyHostToDevice); 110 | cudaMemcpy(deviceSourceG,hostSourceG,hostSourceSize,cudaMemcpyHostToDevice); 111 | 112 | dim3 block(threadsPerBlock); 113 | dim3 grid(iblok); 114 | kernel<<< grid, block >>>(deviceOffset,deviceTargetX,deviceTargetY,deviceTargetW, 115 | sigma,deviceSourceX,deviceSourceY,deviceSourceG); 116 | 117 | cudaMemcpy(hostTargetW,deviceTargetW,hostTargetSize,cudaMemcpyDeviceToHost); 118 | 119 | } 120 | -------------------------------------------------------------------------------- /2d/main.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "par.h" 12 | #include "get_cluster.h" 13 | #include "get_buffer.h" 14 | #include "get_trunc.h" 15 | #include "get_vorticity.h" 16 | 17 | extern PetscErrorCode vorticity_evaluation(Vec,Vec,Vec,Vec,Vec,Vec,double,int,int,int); 18 | extern PetscErrorCode rbf_interpolation(Vec,Vec,Vec,Vec,double,int,int,int,int*); 19 | 20 | int main(int argc,char **argv) 21 | { 22 | int i,its,nsigma_box,sigma_buffer,sigma_trunc,nx,ny,ni,nj,ista,iend,nlocal; 23 | PetscReal sigma,overlap,h,xmin,xmax,ymin,ymax,xd,yd,gd,ed,wd,t,err,errd; 24 | clock_t tic,toc; 25 | tic = std::clock(); 26 | 27 | std::ofstream fid0,fid1; 28 | PARAMETER parameter; 29 | MPI2 mpi; 30 | 31 | PetscErrorCode ierr; 32 | Vec x,y,g,e,w; 33 | 34 | PetscInitialize(&argc,&argv,PETSC_NULL,PETSC_NULL); 35 | MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs); 36 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 37 | 38 | /* 39 | physical parameters 40 | */ 41 | parameter.vis = 0.1; 42 | parameter.t = 1; 43 | 44 | /* 45 | particle parameters 46 | */ 47 | sigma = 0.005; 48 | overlap = atof(argv[1]); 49 | h = overlap*sigma; 50 | xmin = 0; 51 | xmax = 1; 52 | ymin = 0; 53 | ymax = 1; 54 | 55 | /* 56 | cluster parameters 57 | */ 58 | nsigma_box = atoi(argv[2]); 59 | sigma_buffer = (int)nsigma_box*atof(argv[3]); 60 | if (overlap < 0.8+epsf) { 61 | sigma_trunc = nsigma_box+6; 62 | } else { 63 | sigma_trunc = nsigma_box+4; 64 | } 65 | 66 | /* 67 | calculate problem size 68 | */ 69 | nx = (int)floor((xmax-xmin+epsf)/h)+1; 70 | ny = (int)floor((ymax-ymin+epsf)/h)+1; 71 | ni = nx*ny; 72 | if(mpi.myrank==0) { 73 | printf("||---------------------------------------\n"); 74 | printf("|| number of particles : %d \n",ni); 75 | printf("|| std of Gaussian (sigma) : %f \n",sigma); 76 | printf("|| overlap ratio (h/sigma) : %f \n",overlap); 77 | printf("|| non-overlapping subdomain : %d sigma\n",nsigma_box); 78 | printf("|| overlapping subdomain : %d sigma\n",(int)fmin(sigma_buffer,floor(2/sigma))); 79 | printf("|| entire domain : %d sigma\n",(int)floor(2/sigma)); 80 | printf("||---------------------------------------\n"); 81 | } 82 | nj = ni; 83 | 84 | /* 85 | generate particles 86 | */ 87 | ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); 88 | ierr = VecSetSizes(x,PETSC_DECIDE,ni);CHKERRQ(ierr); 89 | ierr = VecSetFromOptions(x);CHKERRQ(ierr); 90 | ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 91 | ierr = VecDuplicate(x,&g);CHKERRQ(ierr); 92 | ierr = VecDuplicate(x,&e);CHKERRQ(ierr); 93 | ierr = VecDuplicate(x,&w);CHKERRQ(ierr); 94 | ierr = VecGetOwnershipRange(x,&ista,&iend);CHKERRQ(ierr); 95 | nlocal = iend-ista; 96 | for(i=ista; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "par.h" 12 | #include "get_cluster.h" 13 | #include "get_buffer.h" 14 | #include "get_trunc.h" 15 | #include "get_vorticity.h" 16 | 17 | extern PetscErrorCode vorticity_evaluation(Vec,Vec,Vec,Vec,Vec,Vec,Vec,Vec,double,int,int,int); 18 | extern PetscErrorCode rbf_interpolation(Vec,Vec,Vec,Vec,Vec,double,int,int,int,int*); 19 | 20 | int main(int argc,char **argv) 21 | { 22 | int i,its,nsigma_box,sigma_buffer,sigma_trunc,nx,ny,nz,ni,nj,ista,iend,nlocal; 23 | double sigma,overlap,h,xmin,xmax,ymin,ymax,zmin,zmax,xd,yd,zd,gd,ed,wd,t,err,errd; 24 | clock_t tic,toc; 25 | tic = std::clock(); 26 | 27 | std::ofstream fid0,fid1; 28 | PARAMETER parameter; 29 | MPI2 mpi; 30 | 31 | PetscErrorCode ierr; 32 | Vec x,y,z,g,e,w; 33 | 34 | PetscInitialize(&argc,&argv,PETSC_NULL,PETSC_NULL); 35 | MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs); 36 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 37 | 38 | /* 39 | physical parameters 40 | */ 41 | parameter.vis = 0.1; 42 | parameter.t = 1; 43 | 44 | /* 45 | particle parameters 46 | */ 47 | sigma = 0.1; 48 | overlap = atof(argv[1]); 49 | h = overlap*sigma; 50 | xmin = -1; 51 | xmax = 1; 52 | ymin = -1; 53 | ymax = 1; 54 | zmin = -1; 55 | zmax = 1; 56 | 57 | /* 58 | cluster parameters 59 | */ 60 | nsigma_box = atoi(argv[2]); 61 | sigma_buffer = (int)nsigma_box*atof(argv[3]); 62 | if (overlap < 0.8+epsf) { 63 | sigma_trunc = nsigma_box+6; 64 | } else { 65 | sigma_trunc = nsigma_box+4; 66 | } 67 | 68 | /* 69 | calculate problem size 70 | */ 71 | nx = (int)floor((xmax-xmin+epsf)/h)+1; 72 | ny = (int)floor((ymax-ymin+epsf)/h)+1; 73 | nz = (int)floor((zmax-zmin+epsf)/h)+1; 74 | ni = nx*ny*nz; 75 | if(mpi.myrank==0) { 76 | printf("||---------------------------------------\n"); 77 | printf("|| number of particles : %d \n",ni); 78 | printf("|| std of Gaussian (sigma) : %f \n",sigma); 79 | printf("|| overlap ratio (h/sigma) : %f \n",overlap); 80 | printf("|| non-overlapping subdomain : %d sigma\n",nsigma_box); 81 | printf("|| overlapping subdomain : %d sigma\n",(int)fmin(sigma_buffer,floor((xmax-xmin)/sigma))); 82 | printf("|| entire domain : %d sigma\n",(int)floor((xmax-xmin)/sigma)); 83 | printf("||---------------------------------------\n"); 84 | } 85 | nj = ni; 86 | 87 | /* 88 | generate particles 89 | */ 90 | ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); 91 | ierr = VecSetSizes(x,PETSC_DECIDE,ni);CHKERRQ(ierr); 92 | ierr = VecSetFromOptions(x);CHKERRQ(ierr); 93 | ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 94 | ierr = VecDuplicate(x,&z);CHKERRQ(ierr); 95 | ierr = VecDuplicate(x,&g);CHKERRQ(ierr); 96 | ierr = VecDuplicate(x,&e);CHKERRQ(ierr); 97 | ierr = VecDuplicate(x,&w);CHKERRQ(ierr); 98 | ierr = VecGetOwnershipRange(x,&ista,&iend);CHKERRQ(ierr); 99 | nlocal = iend-ista; 100 | for(i=ista; ideviceOffsetSize) { 86 | if(deviceOffsetSize!=0) cudaFree(deviceOffset); 87 | cudaMalloc((void**)&deviceOffset,hostOffsetSize); 88 | deviceOffsetSize=hostOffsetSize; 89 | } 90 | if (hostTargetSize>deviceTargetSize) { 91 | if(deviceTargetSize!=0) { 92 | cudaFree(deviceTargetX); 93 | cudaFree(deviceTargetY); 94 | cudaFree(deviceTargetZ); 95 | cudaFree(deviceTargetW); 96 | } 97 | cudaMalloc((void**)&deviceTargetX,hostTargetSize); 98 | cudaMalloc((void**)&deviceTargetY,hostTargetSize); 99 | cudaMalloc((void**)&deviceTargetZ,hostTargetSize); 100 | cudaMalloc((void**)&deviceTargetW,hostTargetSize); 101 | deviceTargetSize=hostTargetSize; 102 | } 103 | if (hostSourceSize>deviceSourceSize) { 104 | if(deviceSourceSize!=0) { 105 | cudaFree(deviceSourceX); 106 | cudaFree(deviceSourceY); 107 | cudaFree(deviceSourceZ); 108 | cudaFree(deviceSourceG); 109 | } 110 | cudaMalloc((void**)&deviceSourceX,hostSourceSize); 111 | cudaMalloc((void**)&deviceSourceY,hostSourceSize); 112 | cudaMalloc((void**)&deviceSourceZ,hostSourceSize); 113 | cudaMalloc((void**)&deviceSourceG,hostSourceSize); 114 | deviceSourceSize=hostSourceSize; 115 | } 116 | 117 | cudaMemcpy(deviceOffset,hostOffset,hostOffsetSize,cudaMemcpyHostToDevice); 118 | cudaMemcpy(deviceTargetX,hostTargetX,hostTargetSize,cudaMemcpyHostToDevice); 119 | cudaMemcpy(deviceTargetY,hostTargetY,hostTargetSize,cudaMemcpyHostToDevice); 120 | cudaMemcpy(deviceTargetZ,hostTargetZ,hostTargetSize,cudaMemcpyHostToDevice); 121 | cudaMemcpy(deviceSourceX,hostSourceX,hostSourceSize,cudaMemcpyHostToDevice); 122 | cudaMemcpy(deviceSourceY,hostSourceY,hostSourceSize,cudaMemcpyHostToDevice); 123 | cudaMemcpy(deviceSourceZ,hostSourceZ,hostSourceSize,cudaMemcpyHostToDevice); 124 | cudaMemcpy(deviceSourceG,hostSourceG,hostSourceSize,cudaMemcpyHostToDevice); 125 | 126 | dim3 block(threadsPerBlock); 127 | dim3 grid(iblok); 128 | kernel<<< grid, block >>>(deviceOffset,deviceTargetX,deviceTargetY,deviceTargetZ,deviceTargetW, 129 | sigma,deviceSourceX,deviceSourceY,deviceSourceZ,deviceSourceG); 130 | 131 | cudaMemcpy(hostTargetW,deviceTargetW,hostTargetSize,cudaMemcpyDeviceToHost); 132 | 133 | } 134 | -------------------------------------------------------------------------------- /2d/matmultgpu.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include "par.h" 3 | #include "get_buffer.h" 4 | #include "get_trunc.h" 5 | 6 | const int threadsPerBlock=128; 7 | extern void gpumatmult(float*, float*, float*, float*, float*, float*, int*, int, float, int, int); 8 | 9 | PetscErrorCode mymatmult(Mat A,Vec x,Vec y) 10 | { 11 | int i,j,ic,il,ista,iend; 12 | int iblok,isize,im,jc,*offset; 13 | float sigma; 14 | float *targetX,*targetY,*targetW,*sourceX,*sourceY,*sourceG; 15 | PetscScalar *ax,*ay; 16 | PetscErrorCode ierr; 17 | BOTH *both; 18 | ierr = MatShellGetContext(A, (void **) &both);CHKERRQ(ierr); 19 | PARTICLE *particle = both->p; 20 | CLUSTER *cluster = both->c; 21 | 22 | offset = new int [cluster->n+1]; 23 | targetX = new float [cluster->n*threadsPerBlock]; 24 | targetY = new float [cluster->n*threadsPerBlock]; 25 | targetW = new float [cluster->n*threadsPerBlock]; 26 | sourceX = new float [cluster->n*cluster->maxtrunc]; 27 | sourceY = new float [cluster->n*cluster->maxtrunc]; 28 | sourceG = new float [cluster->n*cluster->maxtrunc]; 29 | PetscFunctionBegin; 30 | ierr = VecGetArray(x,&ax);CHKERRQ(ierr); 31 | ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 32 | for (i=particle->ista; iiend; i++) { 33 | ierr = VecSetValues(particle->gi,1,&i,&ax[i-particle->ista],INSERT_VALUES);CHKERRQ(ierr); 34 | } 35 | ierr = VecAssemblyBegin(particle->gi);CHKERRQ(ierr); 36 | ierr = VecAssemblyEnd(particle->gi);CHKERRQ(ierr); 37 | ierr = VecGhostUpdateBegin(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 38 | ierr = VecGhostUpdateEnd(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 39 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 40 | iblok = 0; 41 | jc = 0; 42 | for (ic=cluster->icsta; icicend; ic++) { 43 | Get_trunc trunc; 44 | trunc.get_trunc(particle,cluster,ic); 45 | ista = cluster->ista[ic]; 46 | iend = cluster->iend[ic]; 47 | isize = iend-ista+1; 48 | for (i=0; ixil[i+ista]); 51 | targetY[im] = float(particle->yil[i+ista]); 52 | } 53 | for (i=isize; inptruncj; j++) { 60 | sourceX[jc] = float(cluster->xjt[j]); 61 | sourceY[jc] = float(cluster->yjt[j]); 62 | sourceG[jc] = float(cluster->gjt[j]); 63 | jc++; 64 | } 65 | iblok++; 66 | } 67 | offset[iblok] = jc; 68 | 69 | sigma = float(particle->sigma); 70 | gpumatmult(targetX,targetY,targetW,sourceX,sourceY,sourceG,offset,iblok,sigma,cluster->n,cluster->maxtrunc); 71 | 72 | iblok = 0; 73 | for (ic=cluster->icsta; icicend; ic++) { 74 | Get_trunc trunc; 75 | trunc.get_trunc(particle,cluster,ic); 76 | ista = cluster->ista[ic]; 77 | iend = cluster->iend[ic]; 78 | isize = iend-ista+1; 79 | for (i=0; iilocal[i+ista]; 81 | im = iblok*threadsPerBlock+i; 82 | ay[il-particle->ista] = targetW[im]; 83 | } 84 | iblok++; 85 | } 86 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 87 | ierr = VecRestoreArray(x,&ax);CHKERRQ(ierr); 88 | ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 89 | delete[] offset; 90 | delete[] targetX; 91 | delete[] targetY; 92 | delete[] targetW; 93 | delete[] sourceX; 94 | delete[] sourceY; 95 | delete[] sourceG; 96 | PetscFunctionReturn(0); 97 | } 98 | 99 | PetscErrorCode mysubmat(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 100 | { 101 | int i,ic,id,j,ista,iend; 102 | double dx,dy; 103 | PetscInt *idx; 104 | PetscScalar *A; 105 | PetscErrorCode ierr; 106 | BOTH *both; 107 | ierr = MatShellGetContext(mat, (void **) &both);CHKERRQ(ierr); 108 | PARTICLE *particle = both->p; 109 | CLUSTER *cluster = both->c; 110 | 111 | idx = new PetscInt [cluster->maxbuffer]; 112 | A = new PetscScalar [cluster->maxbuffer*cluster->maxbuffer]; 113 | 114 | PetscFunctionBegin; 115 | ierr = PetscMalloc(n * sizeof(Mat*), submat);CHKERRQ(ierr); 116 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 117 | for (ic = cluster->icsta; ic < cluster->icend; ic++) { 118 | id = ic-cluster->icsta; 119 | Get_buffer buffer; 120 | buffer.get_buffer(particle,cluster,ic); 121 | ierr = MatCreate(PETSC_COMM_SELF,&(*submat)[id]);CHKERRQ(ierr); 122 | ierr = MatSetOptionsPrefix((*submat)[id], "sub_");CHKERRQ(ierr); 123 | ierr = MatSetSizes((*submat)[id],cluster->npbufferi,cluster->npbufferi,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr 124 | ); 125 | ierr = MatSetFromOptions((*submat)[id]);CHKERRQ(ierr); 126 | ierr = MatSeqAIJSetPreallocation((*submat)[id],cluster->npbufferi,PETSC_NULL);CHKERRQ(ierr); 127 | ista = cluster->ista[ic]; 128 | iend = cluster->iend[ic]; 129 | if (ista <= iend) { 130 | for (i=0; inpbufferi; i++) { 131 | for (j=0; jnpbufferi; j++) { 132 | dx = cluster->xib[i]-cluster->xib[j]; 133 | dy = cluster->yib[i]-cluster->yib[j]; 134 | A[i*cluster->npbufferi+j] = exp(-(dx*dx+dy*dy)/(2*particle->sigma*particle->sigma))/ 135 | (2*M_PI*particle->sigma*particle->sigma); 136 | } 137 | idx[i] = i; 138 | } 139 | } 140 | ierr = MatSetUp((*submat)[id]);CHKERRQ(ierr); 141 | ierr = MatSetValues((*submat)[id],cluster->npbufferi,idx,cluster->npbufferi,idx,A,INSERT_VALUES);CHKERRQ(ierr); 142 | ierr = MatAssemblyBegin((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 143 | ierr = MatAssemblyEnd((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 144 | } 145 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 146 | delete[] A; 147 | delete[] idx; 148 | 149 | PetscFunctionReturn(0); 150 | } 151 | -------------------------------------------------------------------------------- /3d/matmultgpu.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include "par.h" 3 | #include "get_buffer.h" 4 | #include "get_trunc.h" 5 | 6 | const int threadsPerBlock=128; 7 | extern void gpumatmult(float*, float*, float *, float*, float*, float*, float*, float*, int*, int, float, int, int); 8 | 9 | PetscErrorCode mymatmult(Mat A,Vec x,Vec y) 10 | { 11 | int i,j,ic,il,ista,iend; 12 | int iblok,isize,im,jc,*offset; 13 | float sigma; 14 | float *targetX,*targetY,*targetZ,*targetW,*sourceX,*sourceY,*sourceZ,*sourceG; 15 | PetscScalar *ax,*ay; 16 | PetscErrorCode ierr; 17 | BOTH *both; 18 | ierr = MatShellGetContext(A, (void **) &both);CHKERRQ(ierr); 19 | PARTICLE *particle = both->p; 20 | CLUSTER *cluster = both->c; 21 | 22 | offset = new int [cluster->n+1]; 23 | targetX = new float [cluster->n*threadsPerBlock]; 24 | targetY = new float [cluster->n*threadsPerBlock]; 25 | targetZ = new float [cluster->n*threadsPerBlock]; 26 | targetW = new float [cluster->n*threadsPerBlock]; 27 | sourceX = new float [cluster->n*cluster->maxtrunc]; 28 | sourceY = new float [cluster->n*cluster->maxtrunc]; 29 | sourceZ = new float [cluster->n*cluster->maxtrunc]; 30 | sourceG = new float [cluster->n*cluster->maxtrunc]; 31 | PetscFunctionBegin; 32 | ierr = VecGetArray(x,&ax);CHKERRQ(ierr); 33 | ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 34 | for (i=particle->ista; iiend; i++) { 35 | ierr = VecSetValues(particle->gi,1,&i,&ax[i-particle->ista],INSERT_VALUES);CHKERRQ(ierr); 36 | } 37 | ierr = VecAssemblyBegin(particle->gi);CHKERRQ(ierr); 38 | ierr = VecAssemblyEnd(particle->gi);CHKERRQ(ierr); 39 | ierr = VecGhostUpdateBegin(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 40 | ierr = VecGhostUpdateEnd(particle->gi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 41 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 42 | iblok = 0; 43 | jc = 0; 44 | for (ic=cluster->icsta; icicend; ic++) { 45 | Get_trunc trunc; 46 | trunc.get_trunc(particle,cluster,ic); 47 | ista = cluster->ista[ic]; 48 | iend = cluster->iend[ic]; 49 | isize = iend-ista+1; 50 | for (i=0; ixil[i+ista]); 53 | targetY[im] = float(particle->yil[i+ista]); 54 | targetZ[im] = float(particle->zil[i+ista]); 55 | } 56 | for (i=isize; inptruncj; j++) { 64 | sourceX[jc] = float(cluster->xjt[j]); 65 | sourceY[jc] = float(cluster->yjt[j]); 66 | sourceZ[jc] = float(cluster->zjt[j]); 67 | sourceG[jc] = float(cluster->gjt[j]); 68 | jc++; 69 | } 70 | iblok++; 71 | } 72 | offset[iblok] = jc; 73 | 74 | sigma = float(particle->sigma); 75 | gpumatmult(targetX,targetY,targetZ,targetW,sourceX,sourceY,sourceZ,sourceG, 76 | offset,iblok,sigma,cluster->n,cluster->maxtrunc); 77 | 78 | iblok = 0; 79 | for (ic=cluster->icsta; icicend; ic++) { 80 | Get_trunc trunc; 81 | trunc.get_trunc(particle,cluster,ic); 82 | ista = cluster->ista[ic]; 83 | iend = cluster->iend[ic]; 84 | isize = iend-ista+1; 85 | for (i=0; iilocal[i+ista]; 87 | im = iblok*threadsPerBlock+i; 88 | ay[il-particle->ista] = targetW[im]; 89 | } 90 | iblok++; 91 | } 92 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 93 | ierr = VecRestoreArray(x,&ax);CHKERRQ(ierr); 94 | ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 95 | delete[] offset; 96 | delete[] targetX; 97 | delete[] targetY; 98 | delete[] targetZ; 99 | delete[] targetW; 100 | delete[] sourceX; 101 | delete[] sourceY; 102 | delete[] sourceZ; 103 | delete[] sourceG; 104 | PetscFunctionReturn(0); 105 | } 106 | 107 | PetscErrorCode mysubmat(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 108 | { 109 | int i,ic,id,j,ista,iend; 110 | double dx,dy,dz; 111 | PetscInt *idx; 112 | PetscScalar *A; 113 | PetscErrorCode ierr; 114 | BOTH *both; 115 | ierr = MatShellGetContext(mat, (void **) &both);CHKERRQ(ierr); 116 | PARTICLE *particle = both->p; 117 | CLUSTER *cluster = both->c; 118 | 119 | idx = new PetscInt [cluster->maxbuffer]; 120 | A = new PetscScalar [cluster->maxbuffer*cluster->maxbuffer]; 121 | 122 | PetscFunctionBegin; 123 | ierr = PetscMalloc(n * sizeof(Mat*), submat);CHKERRQ(ierr); 124 | ierr = VecGetArray(particle->gi,&particle->gil);CHKERRQ(ierr); 125 | for (ic = cluster->icsta; ic < cluster->icend; ic++) { 126 | id = ic-cluster->icsta; 127 | Get_buffer buffer; 128 | buffer.get_buffer(particle,cluster,ic); 129 | ierr = MatCreate(PETSC_COMM_SELF,&(*submat)[id]);CHKERRQ(ierr); 130 | ierr = MatSetOptionsPrefix((*submat)[id], "sub_");CHKERRQ(ierr); 131 | ierr = MatSetSizes((*submat)[id],cluster->npbufferi,cluster->npbufferi,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr 132 | ); 133 | ierr = MatSetFromOptions((*submat)[id]);CHKERRQ(ierr); 134 | ierr = MatSeqAIJSetPreallocation((*submat)[id],cluster->npbufferi,PETSC_NULL);CHKERRQ(ierr); 135 | ista = cluster->ista[ic]; 136 | iend = cluster->iend[ic]; 137 | if (ista <= iend) { 138 | for (i=0; inpbufferi; i++) { 139 | for (j=0; jnpbufferi; j++) { 140 | dx = cluster->xib[i]-cluster->xib[j]; 141 | dy = cluster->yib[i]-cluster->yib[j]; 142 | dz = cluster->zib[i]-cluster->zib[j]; 143 | A[i*cluster->npbufferi+j] = exp(-(dx*dx+dy*dy+dz*dz)/(2*particle->sigma*particle->sigma))/ 144 | (2*M_PI*particle->sigma*particle->sigma); 145 | } 146 | idx[i] = i; 147 | } 148 | } 149 | ierr = MatSetUp((*submat)[id]);CHKERRQ(ierr); 150 | ierr = MatSetValues((*submat)[id],cluster->npbufferi,idx,cluster->npbufferi,idx,A,INSERT_VALUES);CHKERRQ(ierr); 151 | ierr = MatAssemblyBegin((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 152 | ierr = MatAssemblyEnd((*submat)[id],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 153 | } 154 | ierr = VecRestoreArray(particle->gi,&particle->gil);CHKERRQ(ierr); 155 | delete[] A; 156 | delete[] idx; 157 | 158 | PetscFunctionReturn(0); 159 | } 160 | -------------------------------------------------------------------------------- /2d/main2.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "par.h" 12 | #include "mpi_range.h" 13 | #include "get_cluster.h" 14 | #include "get_buffer.h" 15 | #include "get_trunc.h" 16 | #include "get_vorticity.h" 17 | #include "matmult.h" 18 | 19 | #include "vorticity_evaluation.cxx" 20 | #include "rbf_interpolation.cxx" 21 | 22 | int main(int argc,char **argv) 23 | { 24 | int i,its,nsigma_box,sigma_buffer,sigma_trunc,nx,ny,nxg,nyg,ni,nj,ng,ista,iend,istag,iendg,nlocal; 25 | double sigma,overlap,h,hg,xmin,xmax,ymin,ymax,xd,yd,gd,ed,wd,t,err,errd; 26 | clock_t tic,toc; 27 | tic = std::clock(); 28 | 29 | std::ofstream fid0,fid1; 30 | PARAMETER parameter; 31 | MPI2 mpi; 32 | 33 | PetscErrorCode ierr; 34 | PetscViewer viewer; 35 | Vec x,y,g,e,w,xg,yg,wg,we; 36 | 37 | PetscInitialize(&argc,&argv,PETSC_NULL,PETSC_NULL); 38 | MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs); 39 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 40 | 41 | /* 42 | physical parameters 43 | */ 44 | parameter.vis = 0.01; 45 | parameter.t = 1; 46 | 47 | /* 48 | particle parameters 49 | */ 50 | h = 0.02; 51 | overlap = atof(argv[1]); 52 | sigma = h/overlap; 53 | hg = h/2; 54 | xmin = -4; 55 | xmax = 4; 56 | ymin = -4; 57 | ymax = 4; 58 | 59 | /* 60 | cluster parameters 61 | */ 62 | nsigma_box = atoi(argv[2]); 63 | sigma_buffer = (int)nsigma_box*atof(argv[3]); 64 | if (overlap < 0.8+epsf) { 65 | sigma_trunc = nsigma_box+16; 66 | } else { 67 | sigma_trunc = nsigma_box+14; 68 | } 69 | 70 | /* 71 | calculate problem size 72 | */ 73 | nx = (int)floor((xmax-xmin+epsf)/h)+1; 74 | ny = (int)floor((ymax-ymin+epsf)/h)+1; 75 | nxg = (int)floor((xmax-xmin+epsf)/hg)+1; 76 | nyg = (int)floor((ymax-ymin+epsf)/hg)+1; 77 | ni = nx*ny; 78 | ng = nxg*nyg; 79 | if(mpi.myrank==0) { 80 | printf("||---------------------------------------\n"); 81 | printf("|| number of particles : %d \n",ni); 82 | printf("|| std of Gaussian (sigma) : %f \n",sigma); 83 | printf("|| overlap ratio (h/sigma) : %f \n",overlap); 84 | printf("|| non-overlapping subdomain : %d sigma\n",nsigma_box); 85 | printf("|| overlapping subdomain : %d sigma\n",(int)fmin(sigma_buffer,floor(2/sigma))); 86 | printf("|| entire domain : %d sigma\n",(int)floor(2/sigma)); 87 | printf("||---------------------------------------\n"); 88 | } 89 | nj = ni; 90 | 91 | /* 92 | generate particles 93 | */ 94 | ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); 95 | ierr = VecSetSizes(x,PETSC_DECIDE,ni);CHKERRQ(ierr); 96 | ierr = VecSetFromOptions(x);CHKERRQ(ierr); 97 | ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 98 | ierr = VecDuplicate(x,&g);CHKERRQ(ierr); 99 | ierr = VecDuplicate(x,&e);CHKERRQ(ierr); 100 | ierr = VecDuplicate(x,&w);CHKERRQ(ierr); 101 | ierr = VecCreate(PETSC_COMM_WORLD,&xg);CHKERRQ(ierr); 102 | ierr = VecSetSizes(xg,PETSC_DECIDE,ng);CHKERRQ(ierr); 103 | ierr = VecSetFromOptions(xg);CHKERRQ(ierr); 104 | ierr = VecDuplicate(xg,&yg);CHKERRQ(ierr); 105 | ierr = VecDuplicate(xg,&wg);CHKERRQ(ierr); 106 | ierr = VecDuplicate(xg,&we);CHKERRQ(ierr); 107 | ierr = VecGetOwnershipRange(x,&ista,&iend);CHKERRQ(ierr); 108 | nlocal = iend-ista; 109 | for(i=ista; ineighbor_buffer = (int) ceil((cluster->sigma_buffer - cluster->nsigma_box + epsf) / 2 / cluster->nsigma_box); 21 | cluster->neighbor_trunc = (int) ceil((cluster->sigma_trunc - cluster->nsigma_box + epsf) / 2 / cluster->nsigma_box); 22 | cluster->neighbor_ghost = std::max(cluster->neighbor_buffer, cluster->neighbor_trunc); 23 | 24 | // Calculate cluster size 25 | cluster->xmin = particle->xmin-epsf; 26 | cluster->xmax = particle->xmax+epsf; 27 | cluster->ymin = particle->ymin-epsf; 28 | cluster->ymax = particle->ymax+epsf; 29 | cluster->box_length = cluster->nsigma_box*particle->sigma+epsf; 30 | 31 | // Calculate number of clusters in each direction 32 | cluster->nx = (int)ceil((cluster->xmax - cluster->xmin)/cluster->box_length); 33 | cluster->ny = (int)ceil((cluster->ymax - cluster->ymin)/cluster->box_length); 34 | cluster->n = cluster->nx*cluster->ny; 35 | 36 | // Allocate arrays 37 | cluster->ista = new int [cluster->n]; 38 | cluster->iend = new int [cluster->n]; 39 | cluster->jsta = new int [cluster->n]; 40 | cluster->jend = new int [cluster->n]; 41 | cluster->ix = new int [cluster->n]; 42 | cluster->iy = new int [cluster->n]; 43 | cluster->xc = new double [cluster->n]; 44 | cluster->yc = new double [cluster->n]; 45 | 46 | iplocal = new int [cluster->n]; 47 | jplocal = new int [cluster->n]; 48 | ipglobal = new int [cluster->n]; 49 | jpglobal = new int [cluster->n]; 50 | ipoffset = new int [cluster->n]; 51 | jpoffset = new int [cluster->n]; 52 | idghost = new int [cluster->n]; 53 | 54 | // Calculate the x, y index and coordinates of the center 55 | ic = -1; 56 | for (ix = 0; ix < cluster->nx; ix++) { 57 | for (iy = 0; iy < cluster->ny; iy++) { 58 | ic++; 59 | cluster->ix[ic] = ix; 60 | cluster->iy[ic] = iy; 61 | cluster->xc[ic] = cluster->xmin+(ix+0.5)*cluster->box_length; 62 | cluster->yc[ic] = cluster->ymin+(iy+0.5)*cluster->box_length; 63 | cluster->ista[ic] = 0; 64 | cluster->iend[ic] = -1; 65 | cluster->jsta[ic] = 0; 66 | cluster->jend[ic] = -1; 67 | iplocal[ic] = 0; 68 | jplocal[ic] = 0; 69 | ipoffset[ic] = 0; 70 | jpoffset[ic] = 0; 71 | } 72 | } 73 | 74 | /* 75 | assign cluster number to particles 76 | */ 77 | for (ip = 0; ip < particle->nilocal; ip++) { 78 | ix_cluster = (int)floor((particle->xil[ip] - cluster->xmin) / cluster->box_length); 79 | iy_cluster = (int)floor((particle->yil[ip] - cluster->ymin) / cluster->box_length); 80 | ic = ix_cluster * cluster->ny + iy_cluster; 81 | iplocal[ic]++; 82 | } 83 | for (ip = 0; ip < particle->njlocal; ip++) { 84 | ix_cluster = (int)floor((particle->xjl[ip] - cluster->xmin) / cluster->box_length); 85 | iy_cluster = (int)floor((particle->yjl[ip] - cluster->ymin) / cluster->box_length); 86 | ic = ix_cluster * cluster->ny + iy_cluster; 87 | jplocal[ic]++; 88 | } 89 | 90 | /* 91 | communicate and find global box offset (cluster->ista) and local box offset (ipoffset) 92 | */ 93 | MPI_Exscan(iplocal, ipoffset, cluster->n, MPI_INT, MPI_SUM, PETSC_COMM_WORLD); 94 | MPI_Exscan(jplocal, jpoffset, cluster->n, MPI_INT, MPI_SUM, PETSC_COMM_WORLD); 95 | MPI_Allreduce(iplocal, ipglobal, cluster->n, MPI_INT, MPI_SUM, PETSC_COMM_WORLD); 96 | MPI_Allreduce(jplocal, jpglobal, cluster->n, MPI_INT, MPI_SUM, PETSC_COMM_WORLD); 97 | id = 0; 98 | jd = 0; 99 | for (ic = 0; ic < cluster->n; ic++) { 100 | ipoffset[ic] += id; 101 | jpoffset[ic] += jd; 102 | cluster->ista[ic] = id; 103 | cluster->jsta[ic] = jd; 104 | id += ipglobal[ic]; 105 | jd += jpglobal[ic]; 106 | cluster->iend[ic] = id-1; 107 | cluster->jend[ic] = jd-1; 108 | } 109 | 110 | mpi.nsta = 0; 111 | mpi.nend = cluster->n-1; 112 | mpi_range(&mpi); 113 | cluster->icsta = mpi.ista; 114 | cluster->icend = mpi.iend; 115 | 116 | VecCreate(PETSC_COMM_WORLD,&particle->ii); 117 | VecCreate(PETSC_COMM_WORLD,&particle->jj); 118 | VecSetSizes(particle->ii,particle->nilocal,PETSC_DETERMINE); 119 | VecSetSizes(particle->jj,particle->njlocal,PETSC_DETERMINE); 120 | VecSetFromOptions(particle->ii); 121 | VecSetFromOptions(particle->jj); 122 | 123 | for (ip = 0; ip < particle->nilocal; ip++) { 124 | ix_cluster = (int)floor((particle->xil[ip] - cluster->xmin) / cluster->box_length); 125 | iy_cluster = (int)floor((particle->yil[ip] - cluster->ymin) / cluster->box_length); 126 | ic = ix_cluster * cluster->ny + iy_cluster; 127 | sort = ip + particle->ista; 128 | VecSetValues(particle->ii,1,&ipoffset[ic],&sort,INSERT_VALUES); 129 | ipoffset[ic]++; 130 | } 131 | for (ip = 0; ip < particle->njlocal; ip++) { 132 | ix_cluster = (int)floor((particle->xjl[ip] - cluster->xmin) / cluster->box_length); 133 | iy_cluster = (int)floor((particle->yjl[ip] - cluster->ymin) / cluster->box_length); 134 | ic = ix_cluster * cluster->ny + iy_cluster; 135 | sort = ip + particle->jsta; 136 | VecSetValues(particle->jj, 1, &jpoffset[ic], &sort, INSERT_VALUES); 137 | jpoffset[ic]++; 138 | } 139 | VecAssemblyBegin(particle->ii); 140 | VecAssemblyEnd(particle->ii); 141 | VecAssemblyBegin(particle->jj); 142 | VecAssemblyEnd(particle->jj); 143 | 144 | particle->ista = cluster->ista[cluster->icsta]; 145 | particle->jsta = cluster->jsta[cluster->icsta]; 146 | particle->iend = cluster->iend[cluster->icend-1]+1; 147 | particle->jend = cluster->jend[cluster->icend-1]+1; 148 | particle->nilocal = particle->iend-particle->ista; 149 | particle->njlocal = particle->jend-particle->jsta; 150 | 151 | /* 152 | determine size and create buffer & trunc temp arrays 153 | */ 154 | cluster->niperbox = 0; 155 | cluster->njperbox = 0; 156 | for (ic = 0; ic < cluster->n; ic++) { 157 | if(cluster->iend[ic] - cluster->ista[ic] + 1 > cluster->niperbox) { 158 | cluster->niperbox = cluster->iend[ic] - cluster->ista[ic] + 1; 159 | } 160 | if(cluster->jend[ic] - cluster->jsta[ic] + 1 > cluster->njperbox) { 161 | cluster->njperbox = cluster->jend[ic] - cluster->jsta[ic] + 1; 162 | } 163 | } 164 | 165 | cluster->maxbuffer = cluster->niperbox * (2 * cluster->neighbor_buffer + 1) * (2 * cluster->neighbor_buffer + 1); 166 | cluster->maxtrunc = cluster->njperbox * (2 * cluster->neighbor_trunc + 1) * (2 * cluster->neighbor_trunc + 1); 167 | 168 | for (ic = 0; ic < cluster->n; ic++) { 169 | idghost[ic] = 0; 170 | } 171 | for (ic = cluster->icsta; ic < cluster->icend; ic++) { 172 | idghost[ic] = 1; 173 | } 174 | cluster->ncghost = 0; 175 | for (ic = cluster->icsta; ic < cluster->icend; ic++) { 176 | ix = cluster->ix[ic]; 177 | iy = cluster->iy[ic]; 178 | jx_min = std::max(0, ix - cluster->neighbor_ghost); 179 | jx_max = std::min(cluster->nx - 1, ix + cluster->neighbor_ghost); 180 | jy_min = std::max(0, iy - cluster->neighbor_ghost); 181 | jy_max = std::min(cluster->ny - 1, iy + cluster->neighbor_ghost); 182 | for (jx = jx_min; jx <= jx_max; jx++) { 183 | for (jy = jy_min; jy <= jy_max; jy++) { 184 | jc = jx * cluster->ny + jy; 185 | if (idghost[jc] == 0) { 186 | idghost[jc] = 2; 187 | cluster->ncghost++; 188 | } 189 | } 190 | } 191 | } 192 | cluster->nclocal = cluster->icend - cluster->icsta; 193 | cluster->maxghost = std::max(cluster->niperbox, cluster->njperbox) * cluster->ncghost; 194 | cluster->maxlocal = std::max(cluster->niperbox, cluster->njperbox) * (cluster->nclocal + cluster->ncghost); 195 | cluster->ighost = new int [cluster->maxghost]; 196 | cluster->ilocal = new int [cluster->maxlocal]; 197 | cluster->jghost = new int [cluster->maxghost]; 198 | cluster->jlocal = new int [cluster->maxlocal]; 199 | 200 | /* 201 | local cluster indexing 202 | */ 203 | nilocal = 0; 204 | for (ic = 0; ic < cluster->n; ic++) { 205 | if (idghost[ic] == 1) { 206 | ista = cluster->ista[ic]; 207 | iend = cluster->iend[ic]; 208 | cluster->ista[ic] = nilocal; 209 | for (j = ista; j <= iend; j++) { 210 | cluster->ilocal[nilocal] = j; 211 | nilocal++; 212 | } 213 | cluster->iend[ic] = nilocal-1; 214 | } 215 | } 216 | cluster->nighost = 0; 217 | for (ic = 0; ic < cluster->n; ic++) { 218 | if (idghost[ic] == 2) { 219 | ista = cluster->ista[ic]; 220 | iend = cluster->iend[ic]; 221 | cluster->ista[ic] = nilocal; 222 | for (j = ista; j <= iend; j++) { 223 | cluster->ighost[cluster->nighost] = j; 224 | cluster->nighost++; 225 | cluster->ilocal[nilocal] = j; 226 | nilocal++; 227 | } 228 | cluster->iend[ic] = nilocal-1; 229 | } 230 | } 231 | njlocal = 0; 232 | for (ic = 0; ic < cluster->n; ic++) { 233 | if (idghost[ic] == 1) { 234 | jsta = cluster->jsta[ic]; 235 | jend = cluster->jend[ic]; 236 | cluster->jsta[ic] = njlocal; 237 | for (j = jsta; j <= jend; j++) { 238 | cluster->jlocal[njlocal] = j; 239 | njlocal++; 240 | } 241 | cluster->jend[ic] = njlocal-1; 242 | } 243 | } 244 | cluster->njghost = 0; 245 | for (ic=0; icn; ic++) { 246 | if (idghost[ic] == 2) { 247 | jsta = cluster->jsta[ic]; 248 | jend = cluster->jend[ic]; 249 | cluster->jsta[ic] = njlocal; 250 | for (j = jsta; j <= jend; j++) { 251 | cluster->jghost[cluster->njghost] = j; 252 | cluster->njghost++; 253 | cluster->jlocal[njlocal] = j; 254 | njlocal++; 255 | } 256 | cluster->jend[ic] = njlocal-1; 257 | } 258 | } 259 | 260 | delete[] iplocal; 261 | delete[] jplocal; 262 | delete[] ipglobal; 263 | delete[] jpglobal; 264 | delete[] ipoffset; 265 | delete[] jpoffset; 266 | delete[] idghost; 267 | 268 | cluster->idx = new int [cluster->maxbuffer]; 269 | cluster->xib = new double [cluster->maxbuffer]; 270 | cluster->yib = new double [cluster->maxbuffer]; 271 | cluster->gib = new double [cluster->maxbuffer]; 272 | cluster->wib = new double [cluster->maxbuffer]; 273 | cluster->xjt = new double [cluster->maxtrunc]; 274 | cluster->yjt = new double [cluster->maxtrunc]; 275 | cluster->gjt = new double [cluster->maxtrunc]; 276 | } 277 | }; 278 | 279 | #endif 280 | 281 | -------------------------------------------------------------------------------- /3d/get_cluster.h: -------------------------------------------------------------------------------- 1 | #ifndef get_cluster_h 2 | #define get_cluster_h 3 | 4 | #include "par.h" 5 | 6 | extern void mpi_range(MPI2*); 7 | 8 | class Get_cluster 9 | { 10 | int n,ic,id,io,ip,ix,iy,iz,ista,iend,ix_cluster,iy_cluster,iz_cluster; 11 | int j,jc,jd,jsta,jend,jx,jy,jz,jx_min,jx_max,jy_min,jy_max,jz_min,jz_max; 12 | int icall,ncall,nilocal,njlocal,*iplocal,*jplocal,*ipglobal,*jpglobal,*ipoffset,*jpoffset,*idghost; 13 | double sort; 14 | MPI2 mpi; 15 | public: 16 | void get_cluster(PARTICLE *particle,CLUSTER *cluster) 17 | { 18 | MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs); 19 | MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank); 20 | 21 | cluster->neighbor_buffer = (int)ceil((cluster->sigma_buffer-cluster->nsigma_box+epsf)/2/ 22 | cluster->nsigma_box); 23 | cluster->neighbor_trunc = (int)ceil((cluster->sigma_trunc-cluster->nsigma_box+epsf)/2/ 24 | cluster->nsigma_box); 25 | cluster->neighbor_ghost = std::max(cluster->neighbor_buffer,cluster->neighbor_trunc); 26 | 27 | /* 28 | calculate cluster size 29 | */ 30 | cluster->xmin = particle->xmin-epsf; 31 | cluster->xmax = particle->xmax+epsf; 32 | cluster->ymin = particle->ymin-epsf; 33 | cluster->ymax = particle->ymax+epsf; 34 | cluster->zmin = particle->zmin-epsf; 35 | cluster->zmax = particle->zmax+epsf; 36 | cluster->box_length = cluster->nsigma_box*particle->sigma+epsf; 37 | 38 | /* 39 | calculate number of clusters in each direction 40 | */ 41 | cluster->nx = (int)ceil((cluster->xmax-cluster->xmin)/cluster->box_length); 42 | cluster->ny = (int)ceil((cluster->ymax-cluster->ymin)/cluster->box_length); 43 | cluster->nz = (int)ceil((cluster->zmax-cluster->zmin)/cluster->box_length); 44 | cluster->n = cluster->nx*cluster->ny*cluster->nz; 45 | 46 | /* 47 | allocate arrays 48 | */ 49 | cluster->ista = new int [cluster->n]; 50 | cluster->iend = new int [cluster->n]; 51 | cluster->jsta = new int [cluster->n]; 52 | cluster->jend = new int [cluster->n]; 53 | cluster->ix = new int [cluster->n]; 54 | cluster->iy = new int [cluster->n]; 55 | cluster->iz = new int [cluster->n]; 56 | cluster->xc = new double [cluster->n]; 57 | cluster->yc = new double [cluster->n]; 58 | cluster->zc = new double [cluster->n]; 59 | 60 | iplocal = new int [cluster->n]; 61 | jplocal = new int [cluster->n]; 62 | ipglobal = new int [cluster->n]; 63 | jpglobal = new int [cluster->n]; 64 | ipoffset = new int [cluster->n]; 65 | jpoffset = new int [cluster->n]; 66 | idghost = new int [cluster->n]; 67 | 68 | /* 69 | calculate the x, y index and coordinates of the center 70 | */ 71 | ic = -1; 72 | for (iz=0; iznz; iz++) { 73 | for (ix=0; ixnx; ix++) { 74 | for (iy=0; iyny; iy++) { 75 | ic++; 76 | cluster->ix[ic] = ix; 77 | cluster->iy[ic] = iy; 78 | cluster->iz[ic] = iz; 79 | cluster->xc[ic] = cluster->xmin+(ix+0.5)*cluster->box_length; 80 | cluster->yc[ic] = cluster->ymin+(iy+0.5)*cluster->box_length; 81 | cluster->zc[ic] = cluster->zmin+(iz+0.5)*cluster->box_length; 82 | cluster->ista[ic] = 0; 83 | cluster->iend[ic] = -1; 84 | cluster->jsta[ic] = 0; 85 | cluster->jend[ic] = -1; 86 | iplocal[ic] = 0; 87 | jplocal[ic] = 0; 88 | ipoffset[ic] = 0; 89 | jpoffset[ic] = 0; 90 | } 91 | } 92 | } 93 | 94 | /* 95 | assign cluster number to particles 96 | */ 97 | for (ip=0; ipnilocal; ip++) { 98 | ix_cluster = (int)floor((particle->xil[ip]-cluster->xmin)/cluster->box_length); 99 | iy_cluster = (int)floor((particle->yil[ip]-cluster->ymin)/cluster->box_length); 100 | iz_cluster = (int)floor((particle->zil[ip]-cluster->zmin)/cluster->box_length); 101 | ic = iz_cluster*cluster->nx*cluster->ny+ix_cluster*cluster->ny+iy_cluster; 102 | iplocal[ic]++; 103 | } 104 | for (ip=0; ipnjlocal; ip++) { 105 | ix_cluster = (int)floor((particle->xjl[ip]-cluster->xmin)/cluster->box_length); 106 | iy_cluster = (int)floor((particle->yjl[ip]-cluster->ymin)/cluster->box_length); 107 | iz_cluster = (int)floor((particle->zjl[ip]-cluster->zmin)/cluster->box_length); 108 | ic = iz_cluster*cluster->nx*cluster->ny+ix_cluster*cluster->ny+iy_cluster; 109 | jplocal[ic]++; 110 | } 111 | 112 | /* 113 | communicate and find global box offset (cluster->ista) and local box offset (ipoffset) 114 | */ 115 | MPI_Exscan(iplocal,ipoffset,cluster->n,MPI_INT,MPI_SUM,PETSC_COMM_WORLD); 116 | MPI_Exscan(jplocal,jpoffset,cluster->n,MPI_INT,MPI_SUM,PETSC_COMM_WORLD); 117 | MPI_Allreduce(iplocal,ipglobal,cluster->n,MPI_INT,MPI_SUM,PETSC_COMM_WORLD); 118 | MPI_Allreduce(jplocal,jpglobal,cluster->n,MPI_INT,MPI_SUM,PETSC_COMM_WORLD); 119 | id = 0; 120 | jd = 0; 121 | for (ic=0; icn; ic++) { 122 | ipoffset[ic] += id; 123 | jpoffset[ic] += jd; 124 | cluster->ista[ic] = id; 125 | cluster->jsta[ic] = jd; 126 | id += ipglobal[ic]; 127 | jd += jpglobal[ic]; 128 | cluster->iend[ic] = id-1; 129 | cluster->jend[ic] = jd-1; 130 | } 131 | 132 | mpi.nsta = 0; 133 | mpi.nend = cluster->n-1; 134 | mpi_range(&mpi); 135 | cluster->icsta = mpi.ista; 136 | cluster->icend = mpi.iend; 137 | 138 | VecCreate(PETSC_COMM_WORLD,&particle->ii); 139 | VecCreate(PETSC_COMM_WORLD,&particle->jj); 140 | VecSetSizes(particle->ii,particle->nilocal,PETSC_DETERMINE); 141 | VecSetSizes(particle->jj,particle->njlocal,PETSC_DETERMINE); 142 | VecSetFromOptions(particle->ii); 143 | VecSetFromOptions(particle->jj); 144 | for (ip=0; ipnilocal; ip++) { 145 | ix_cluster = (int)floor((particle->xil[ip]-cluster->xmin)/cluster->box_length); 146 | iy_cluster = (int)floor((particle->yil[ip]-cluster->ymin)/cluster->box_length); 147 | iz_cluster = (int)floor((particle->zil[ip]-cluster->zmin)/cluster->box_length); 148 | ic = iz_cluster*cluster->nx*cluster->ny+ix_cluster*cluster->ny+iy_cluster; 149 | sort = ip+particle->ista; 150 | VecSetValues(particle->ii,1,&ipoffset[ic],&sort,INSERT_VALUES); 151 | ipoffset[ic]++; 152 | } 153 | for (ip=0; ipnjlocal; ip++) { 154 | ix_cluster = (int)floor((particle->xjl[ip]-cluster->xmin)/cluster->box_length); 155 | iy_cluster = (int)floor((particle->yjl[ip]-cluster->ymin)/cluster->box_length); 156 | iz_cluster = (int)floor((particle->zjl[ip]-cluster->zmin)/cluster->box_length); 157 | ic = iz_cluster*cluster->nx*cluster->ny+ix_cluster*cluster->ny+iy_cluster; 158 | sort = ip+particle->jsta; 159 | VecSetValues(particle->jj,1,&jpoffset[ic],&sort,INSERT_VALUES); 160 | jpoffset[ic]++; 161 | } 162 | VecAssemblyBegin(particle->ii); 163 | VecAssemblyEnd(particle->ii); 164 | VecAssemblyBegin(particle->jj); 165 | VecAssemblyEnd(particle->jj); 166 | 167 | particle->ista = cluster->ista[cluster->icsta]; 168 | particle->jsta = cluster->jsta[cluster->icsta]; 169 | particle->iend = cluster->iend[cluster->icend-1]+1; 170 | particle->jend = cluster->jend[cluster->icend-1]+1; 171 | particle->nilocal = particle->iend-particle->ista; 172 | particle->njlocal = particle->jend-particle->jsta; 173 | 174 | /* 175 | determine size and create buffer & trunc temp arrays 176 | */ 177 | cluster->niperbox = 0; 178 | cluster->njperbox = 0; 179 | for (ic=0; icn; ic++) { 180 | if(cluster->iend[ic]-cluster->ista[ic]+1 > cluster->niperbox) { 181 | cluster->niperbox = cluster->iend[ic]-cluster->ista[ic]+1; 182 | } 183 | if(cluster->jend[ic]-cluster->jsta[ic]+1 > cluster->njperbox) { 184 | cluster->njperbox = cluster->jend[ic]-cluster->jsta[ic]+1; 185 | } 186 | } 187 | 188 | cluster->maxbuffer = cluster->niperbox*(2*cluster->neighbor_buffer+1)* 189 | (2*cluster->neighbor_buffer+1); 190 | cluster->maxtrunc = cluster->njperbox*(2*cluster->neighbor_trunc+1)* 191 | (2*cluster->neighbor_trunc+1); 192 | 193 | for (ic=0; icn; ic++) { 194 | idghost[ic] = 0; 195 | } 196 | for (ic=cluster->icsta; icicend; ic++) { 197 | idghost[ic] = 1; 198 | } 199 | cluster->ncghost=0; 200 | for (ic=cluster->icsta; icicend; ic++) { 201 | ix = cluster->ix[ic]; 202 | iy = cluster->iy[ic]; 203 | iz = cluster->iz[ic]; 204 | jx_min = std::max(0,ix-cluster->neighbor_ghost); 205 | jx_max = std::min(cluster->nx-1,ix+cluster->neighbor_ghost); 206 | jy_min = std::max(0,iy-cluster->neighbor_ghost); 207 | jy_max = std::min(cluster->ny-1,iy+cluster->neighbor_ghost); 208 | jz_min = std::max(0,iz-cluster->neighbor_ghost); 209 | jz_max = std::min(cluster->nz-1,iz+cluster->neighbor_ghost); 210 | for (jx=jx_min; jx<=jx_max; jx++) { 211 | for (jy=jy_min; jy<=jy_max; jy++) { 212 | for (jz=jz_min; jz<=jz_max; jz++) { 213 | jc = jz*cluster->nx*cluster->ny+jx*cluster->ny+jy; 214 | if (idghost[jc] == 0) { 215 | idghost[jc] = 2; 216 | cluster->ncghost++; 217 | } 218 | } 219 | } 220 | } 221 | } 222 | cluster->nclocal = cluster->icend-cluster->icsta; 223 | cluster->maxghost = std::max(cluster->niperbox,cluster->njperbox)*cluster->ncghost; 224 | cluster->maxlocal = std::max(cluster->niperbox,cluster->njperbox)*(cluster->nclocal+cluster->ncghost); 225 | cluster->ighost = new int [cluster->maxghost]; 226 | cluster->ilocal = new int [cluster->maxlocal]; 227 | cluster->jghost = new int [cluster->maxghost]; 228 | cluster->jlocal = new int [cluster->maxlocal]; 229 | 230 | /* 231 | local cluster indexing 232 | */ 233 | nilocal = 0; 234 | for (ic=0; icn; ic++) { 235 | if (idghost[ic] == 1) { 236 | ista = cluster->ista[ic]; 237 | iend = cluster->iend[ic]; 238 | cluster->ista[ic] = nilocal; 239 | for (j=ista; j<=iend; j++) { 240 | cluster->ilocal[nilocal] = j; 241 | nilocal++; 242 | } 243 | cluster->iend[ic] = nilocal-1; 244 | } 245 | } 246 | cluster->nighost = 0; 247 | for (ic=0; icn; ic++) { 248 | if (idghost[ic] == 2) { 249 | ista = cluster->ista[ic]; 250 | iend = cluster->iend[ic]; 251 | cluster->ista[ic] = nilocal; 252 | for (j=ista; j<=iend; j++) { 253 | cluster->ighost[cluster->nighost] = j; 254 | cluster->nighost++; 255 | cluster->ilocal[nilocal] = j; 256 | nilocal++; 257 | } 258 | cluster->iend[ic] = nilocal-1; 259 | } 260 | } 261 | njlocal = 0; 262 | for (ic=0; icn; ic++) { 263 | if (idghost[ic] == 1) { 264 | jsta = cluster->jsta[ic]; 265 | jend = cluster->jend[ic]; 266 | cluster->jsta[ic] = njlocal; 267 | for (j=jsta; j<=jend; j++) { 268 | cluster->jlocal[njlocal] = j; 269 | njlocal++; 270 | } 271 | cluster->jend[ic] = njlocal-1; 272 | } 273 | } 274 | cluster->njghost = 0; 275 | for (ic=0; icn; ic++) { 276 | if (idghost[ic] == 2) { 277 | jsta = cluster->jsta[ic]; 278 | jend = cluster->jend[ic]; 279 | cluster->jsta[ic] = njlocal; 280 | for (j=jsta; j<=jend; j++) { 281 | cluster->jghost[cluster->njghost] = j; 282 | cluster->njghost++; 283 | cluster->jlocal[njlocal] = j; 284 | njlocal++; 285 | } 286 | cluster->jend[ic] = njlocal-1; 287 | } 288 | } 289 | 290 | delete[] iplocal; 291 | delete[] jplocal; 292 | delete[] ipglobal; 293 | delete[] jpglobal; 294 | delete[] ipoffset; 295 | delete[] jpoffset; 296 | delete[] idghost; 297 | 298 | cluster->idx = new int [cluster->maxbuffer]; 299 | cluster->xib = new double [cluster->maxbuffer]; 300 | cluster->yib = new double [cluster->maxbuffer]; 301 | cluster->zib = new double [cluster->maxbuffer]; 302 | cluster->gib = new double [cluster->maxbuffer]; 303 | cluster->wib = new double [cluster->maxbuffer]; 304 | cluster->xjt = new double [cluster->maxtrunc]; 305 | cluster->yjt = new double [cluster->maxtrunc]; 306 | cluster->zjt = new double [cluster->maxtrunc]; 307 | cluster->gjt = new double [cluster->maxtrunc]; 308 | } 309 | }; 310 | 311 | #endif 312 | -------------------------------------------------------------------------------- /2d/rbf_interpolation.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "par.h" 5 | #include "get_cluster.h" 6 | #include "get_buffer.h" 7 | 8 | extern PetscErrorCode mymatmult(Mat,Vec,Vec); 9 | extern PetscErrorCode mysubmat(Mat,PetscInt,const IS*,const IS*,MatReuse,Mat**); 10 | 11 | /** RBF solver. 12 | * 13 | * Using collocation, it finds the weights (gi) for a set of RBF gaussian bases (xi, yi). 14 | * 15 | * Parameters: 16 | * xi, yi: Coordinates of the gaussian bases. 17 | * gi: Returns the solved weights for the gaussian bases. 18 | * wi: Solution of the field at the bases locations. 19 | * sigma: Sigma parameter of the gaussian. 20 | * nsigma_box: Size of the inner box or 'local box'. 21 | * sigma buffer: Size of the buffer area. 22 | * sigma_trunc: Truncation point for sigma. 23 | * its: Returns solver teration data. 24 | */ 25 | PetscErrorCode rbf_interpolation(Vec xi, Vec yi, Vec gi, Vec wi, 26 | double sigma, int nsigma_box, int sigma_buffer, int sigma_trunc, int *its) 27 | { 28 | int i,ic,id,ista,iend,*isort,ievent[10]; 29 | std::ofstream fid0,fid1; 30 | PARTICLE particle; 31 | CLUSTER cluster; 32 | MPI2 mpi; 33 | BOTH both; 34 | both.p = &particle; 35 | both.c = &cluster; 36 | 37 | PetscErrorCode ierr; 38 | KSP ksp; 39 | PC pc; 40 | IS isx,isy,*is,*is_local; 41 | Mat M,P; 42 | Vec xx; 43 | PetscInt *idx; 44 | PetscScalar *xxx; 45 | VecScatter ctx; 46 | 47 | ierr = PetscLogEventRegister("InitVec",0,&ievent[0]);CHKERRQ(ierr); 48 | ierr = PetscLogEventRegister("InitCluster",0,&ievent[1]);CHKERRQ(ierr); 49 | ierr = PetscLogEventRegister("InitIS",0,&ievent[2]);CHKERRQ(ierr); 50 | ierr = PetscLogEventRegister("InitGhost",0,&ievent[3]);CHKERRQ(ierr); 51 | ierr = PetscLogEventRegister("InitMat",0,&ievent[4]);CHKERRQ(ierr); 52 | ierr = PetscLogEventRegister("Post Processing",0,&ievent[5]);CHKERRQ(ierr); 53 | 54 | ierr = PetscLogEventBegin(ievent[0],0,0,0,0);CHKERRQ(ierr); 55 | ierr = MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs);CHKERRQ(ierr); 56 | ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank);CHKERRQ(ierr); 57 | cluster.file = 0; 58 | 59 | /* 60 | particle parameters 61 | */ 62 | particle.sigma = sigma; 63 | ierr = VecMin(xi,PETSC_NULL,&particle.xmin);CHKERRQ(ierr); 64 | ierr = VecMax(xi,PETSC_NULL,&particle.xmax);CHKERRQ(ierr); 65 | ierr = VecMin(yi,PETSC_NULL,&particle.ymin);CHKERRQ(ierr); 66 | ierr = VecMax(yi,PETSC_NULL,&particle.ymax);CHKERRQ(ierr); 67 | 68 | /* 69 | cluster parameters 70 | */ 71 | cluster.nsigma_box = nsigma_box; 72 | cluster.sigma_buffer = sigma_buffer; 73 | cluster.sigma_trunc = sigma_trunc; 74 | 75 | /* 76 | calculate problem size 77 | */ 78 | ierr = VecGetSize(xi,&particle.ni);CHKERRQ(ierr); 79 | ierr = VecGetSize(xi,&particle.nj);CHKERRQ(ierr); 80 | ierr = VecGetOwnershipRange(xi,&particle.ista,&particle.iend);CHKERRQ(ierr); 81 | ierr = VecGetOwnershipRange(xi,&particle.jsta,&particle.jend);CHKERRQ(ierr); 82 | particle.nilocal = particle.iend-particle.ista; 83 | particle.njlocal = particle.jend-particle.jsta; 84 | 85 | ierr = PetscLogEventEnd(ievent[0],0,0,0,0);CHKERRQ(ierr); 86 | ierr = PetscLogEventBegin(ievent[1],0,0,0,0);CHKERRQ(ierr); 87 | 88 | /* 89 | generate clusters 90 | */ 91 | ierr = VecGetArray(xi,&particle.xil);CHKERRQ(ierr); 92 | ierr = VecGetArray(yi,&particle.yil);CHKERRQ(ierr); 93 | ierr = VecGetArray(xi,&particle.xjl);CHKERRQ(ierr); 94 | ierr = VecGetArray(yi,&particle.yjl);CHKERRQ(ierr); 95 | 96 | Get_cluster clusters; 97 | clusters.get_cluster(&particle,&cluster); 98 | 99 | ierr = VecRestoreArray(xi,&particle.xil);CHKERRQ(ierr); 100 | ierr = VecRestoreArray(yi,&particle.yil);CHKERRQ(ierr); 101 | ierr = VecRestoreArray(xi,&particle.xjl);CHKERRQ(ierr); 102 | ierr = VecRestoreArray(yi,&particle.yjl);CHKERRQ(ierr); 103 | isort = new int [particle.nilocal]; 104 | 105 | ierr = PetscLogEventEnd(ievent[1],0,0,0,0);CHKERRQ(ierr); 106 | ierr = PetscLogEventBegin(ievent[2],0,0,0,0);CHKERRQ(ierr); 107 | 108 | /* 109 | generate IS 110 | */ 111 | ierr = ISCreateStride(PETSC_COMM_WORLD,particle.nilocal,particle.ista,1,&isx);CHKERRQ(ierr); 112 | ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); 113 | ierr = VecCreate(PETSC_COMM_WORLD,&particle.i);CHKERRQ(ierr); 114 | ierr = VecSetSizes(particle.i,particle.nilocal,PETSC_DETERMINE);CHKERRQ(ierr); 115 | ierr = VecSetFromOptions(particle.i);CHKERRQ(ierr); 116 | ierr = VecScatterCreate(particle.ii,isx,particle.i,isy,&ctx);CHKERRQ(ierr); 117 | ierr = VecScatterBegin(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 118 | ierr = VecScatterEnd(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 119 | ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr); 120 | ierr = ISDestroy(&isx);CHKERRQ(ierr); 121 | ierr = ISDestroy(&isy);CHKERRQ(ierr); 122 | ierr = VecAssemblyBegin(particle.i);CHKERRQ(ierr); 123 | ierr = VecAssemblyEnd(particle.i);CHKERRQ(ierr); 124 | ierr = VecGetArray(particle.i,&particle.il);CHKERRQ(ierr); 125 | for(i=0; i 2 | #include 3 | #include 4 | 5 | #include "par.h" 6 | #include "get_cluster.h" 7 | #include "get_buffer.h" 8 | #include "get_trunc.h" 9 | #include "get_vorticity.h" 10 | 11 | /** RBF gaussian interpolation. 12 | * 13 | * Interpolation from source points (xi,yi,wi) for the values (gj) at the evaluation 14 | * points (xj,yj). 15 | * 16 | * Parameters 17 | * xi, yi: Coordinates of the evaluation points. 18 | * wi: Variable for storing the evaluation. 19 | * xj, yj: Coordinates of the source points. 20 | * gj: Weight for the source points. 21 | * sigma: Parameter of the gaussian. 22 | * nsigma_box: Size of inner box, measured in sigma. 23 | * sigma_buffer: Size of the buffer, measured in sigma. 24 | * sigma_trunc: Truncation distance for the gaussians, meassured in sigma. 25 | */ 26 | PetscErrorCode vorticity_evaluation(Vec xi, Vec yi, Vec wi, Vec xj, Vec yj, Vec gj, 27 | double sigma, int nsigma_box, int sigma_buffer, int sigma_trunc) 28 | { 29 | int i,*isort,*jsort,ievent[10]; 30 | PetscReal ximin,ximax,yimin,yimax,xjmin,xjmax,yjmin,yjmax; 31 | std::ofstream fid0,fid1; 32 | PARTICLE particle; 33 | CLUSTER cluster; 34 | MPI2 mpi; 35 | BOTH both; 36 | both.p = &particle; 37 | both.c = &cluster; 38 | 39 | PetscErrorCode ierr; 40 | IS isx,isy,jsx,jsy; 41 | VecScatter ctx; 42 | 43 | ierr = PetscLogEventRegister("InitVec",0,&ievent[0]);CHKERRQ(ierr); 44 | ierr = PetscLogEventRegister("InitCluster",0,&ievent[1]);CHKERRQ(ierr); 45 | ierr = PetscLogEventRegister("InitIS",0,&ievent[2]);CHKERRQ(ierr); 46 | ierr = PetscLogEventRegister("InitGhost",0,&ievent[3]);CHKERRQ(ierr); 47 | ierr = PetscLogEventRegister("InitRHS",0,&ievent[4]);CHKERRQ(ierr); 48 | ierr = PetscLogEventRegister("Post Processing",0,&ievent[5]);CHKERRQ(ierr); 49 | 50 | ierr = PetscLogEventBegin(ievent[0],0,0,0,0);CHKERRQ(ierr); 51 | ierr = MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs);CHKERRQ(ierr); 52 | ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank);CHKERRQ(ierr); 53 | cluster.file = 0; 54 | 55 | /* 56 | particle parameters 57 | */ 58 | particle.sigma = sigma; 59 | ierr = VecMin(xi,PETSC_NULL,&ximin);CHKERRQ(ierr); 60 | ierr = VecMax(xi,PETSC_NULL,&ximax);CHKERRQ(ierr); 61 | ierr = VecMin(yi,PETSC_NULL,&yimin);CHKERRQ(ierr); 62 | ierr = VecMax(yi,PETSC_NULL,&yimax);CHKERRQ(ierr); 63 | ierr = VecMin(xj,PETSC_NULL,&xjmin);CHKERRQ(ierr); 64 | ierr = VecMax(xj,PETSC_NULL,&xjmax);CHKERRQ(ierr); 65 | ierr = VecMin(yj,PETSC_NULL,&yjmin);CHKERRQ(ierr); 66 | ierr = VecMax(yj,PETSC_NULL,&yjmax);CHKERRQ(ierr); 67 | particle.xmin = std::min(ximin,xjmin); 68 | particle.xmax = std::max(ximax,xjmax); 69 | particle.ymin = std::min(yimin,yjmin); 70 | particle.ymax = std::max(yimax,yjmax); 71 | 72 | /* 73 | cluster parameters 74 | */ 75 | cluster.nsigma_box = nsigma_box; 76 | cluster.sigma_buffer = sigma_buffer; 77 | cluster.sigma_trunc = sigma_trunc; 78 | 79 | /* 80 | calculate problem size 81 | */ 82 | ierr = VecGetSize(xi,&particle.ni);CHKERRQ(ierr); 83 | ierr = VecGetSize(xj,&particle.nj);CHKERRQ(ierr); 84 | ierr = VecGetOwnershipRange(xi,&particle.ista,&particle.iend);CHKERRQ(ierr); 85 | ierr = VecGetOwnershipRange(xj,&particle.jsta,&particle.jend);CHKERRQ(ierr); 86 | particle.nilocal = particle.iend-particle.ista; 87 | particle.njlocal = particle.jend-particle.jsta; 88 | 89 | ierr = PetscLogEventEnd(ievent[0],0,0,0,0);CHKERRQ(ierr); 90 | ierr = PetscLogEventBegin(ievent[1],0,0,0,0);CHKERRQ(ierr); 91 | 92 | /* 93 | generate clusters 94 | */ 95 | ierr = VecGetArray(xi,&particle.xil);CHKERRQ(ierr); 96 | ierr = VecGetArray(yi,&particle.yil);CHKERRQ(ierr); 97 | ierr = VecGetArray(xj,&particle.xjl);CHKERRQ(ierr); 98 | ierr = VecGetArray(yj,&particle.yjl);CHKERRQ(ierr); 99 | 100 | Get_cluster clusters; 101 | clusters.get_cluster(&particle,&cluster); 102 | 103 | ierr = VecRestoreArray(xi,&particle.xil);CHKERRQ(ierr); 104 | ierr = VecRestoreArray(yi,&particle.yil);CHKERRQ(ierr); 105 | ierr = VecRestoreArray(xj,&particle.xjl);CHKERRQ(ierr); 106 | ierr = VecRestoreArray(yj,&particle.yjl);CHKERRQ(ierr); 107 | isort = new int [particle.nilocal]; 108 | jsort = new int [particle.njlocal]; 109 | 110 | ierr = PetscLogEventEnd(ievent[1],0,0,0,0);CHKERRQ(ierr); 111 | ierr = PetscLogEventBegin(ievent[2],0,0,0,0);CHKERRQ(ierr); 112 | 113 | /* 114 | generate IS 115 | */ 116 | ierr = ISCreateStride(PETSC_COMM_WORLD,particle.nilocal,particle.ista,1,&isx);CHKERRQ(ierr); 117 | ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); 118 | ierr = VecCreate(PETSC_COMM_WORLD,&particle.i);CHKERRQ(ierr); 119 | ierr = VecSetSizes(particle.i,particle.nilocal,PETSC_DETERMINE);CHKERRQ(ierr); 120 | ierr = VecSetFromOptions(particle.i);CHKERRQ(ierr); 121 | ierr = VecScatterCreate(particle.ii,isx,particle.i,isy,&ctx);CHKERRQ(ierr); 122 | ierr = VecScatterBegin(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 123 | ierr = VecScatterEnd(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 124 | ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr); 125 | ierr = ISDestroy(&isx);CHKERRQ(ierr); 126 | ierr = ISDestroy(&isy);CHKERRQ(ierr); 127 | ierr = VecAssemblyBegin(particle.i);CHKERRQ(ierr); 128 | ierr = VecAssemblyEnd(particle.i);CHKERRQ(ierr); 129 | ierr = VecGetArray(particle.i,&particle.il);CHKERRQ(ierr); 130 | for(i=0; i 2 | #include 3 | #include 4 | 5 | #include "par.h" 6 | #include "get_cluster.h" 7 | #include "get_buffer.h" 8 | 9 | extern PetscErrorCode mymatmult(Mat,Vec,Vec); 10 | extern PetscErrorCode mysubmat(Mat,PetscInt,const IS*,const IS*,MatReuse,Mat**); 11 | 12 | /** RBF solver. 13 | * 14 | * Using collocation, it finds the weights (gi) for a set of RBF gaussian bases (xi, yi, zi). 15 | * 16 | * Parameters: 17 | * xi, yi, zi: Coordinates of the gaussian bases. 18 | * gi: Returns the solved weights for the gaussian bases. 19 | * wi: Solution of the field at the bases locations. 20 | * sigma: Sigma parameter of the gaussian. 21 | * nsigma_box: Size of the inner box or 'local box'. 22 | * sigma buffer: Size of the buffer area. 23 | * sigma_trunc: Truncation point for sigma. 24 | * its: Returns solver teration data. 25 | */ 26 | PetscErrorCode rbf_interpolation(Vec xi, Vec yi, Vec zi, Vec gi, Vec wi, 27 | double sigma, int nsigma_box, int sigma_buffer, int sigma_trunc, int *its) 28 | { 29 | int i,ic,id,ista,iend,*isort,ievent[10]; 30 | std::ofstream fid0,fid1; 31 | PARTICLE particle; 32 | CLUSTER cluster; 33 | MPI2 mpi; 34 | BOTH both; 35 | both.p = &particle; 36 | both.c = &cluster; 37 | 38 | PetscErrorCode ierr; 39 | KSP ksp; 40 | PC pc; 41 | IS isx,isy,*is,*is_local; 42 | Mat M,P; 43 | Vec xx; 44 | PetscInt *idx; 45 | PetscScalar *xxx; 46 | VecScatter ctx; 47 | 48 | ierr = PetscLogEventRegister("InitVec",0,&ievent[0]);CHKERRQ(ierr); 49 | ierr = PetscLogEventRegister("InitCluster",0,&ievent[1]);CHKERRQ(ierr); 50 | ierr = PetscLogEventRegister("InitIS",0,&ievent[2]);CHKERRQ(ierr); 51 | ierr = PetscLogEventRegister("InitGhost",0,&ievent[3]);CHKERRQ(ierr); 52 | ierr = PetscLogEventRegister("InitMat",0,&ievent[4]);CHKERRQ(ierr); 53 | ierr = PetscLogEventRegister("Post Processing",0,&ievent[5]);CHKERRQ(ierr); 54 | 55 | ierr = PetscLogEventBegin(ievent[0],0,0,0,0);CHKERRQ(ierr); 56 | ierr = MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs);CHKERRQ(ierr); 57 | ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank);CHKERRQ(ierr); 58 | cluster.file = 0; 59 | 60 | /* 61 | particle parameters 62 | */ 63 | particle.sigma = sigma; 64 | ierr = VecMin(xi,PETSC_NULL,&particle.xmin);CHKERRQ(ierr); 65 | ierr = VecMax(xi,PETSC_NULL,&particle.xmax);CHKERRQ(ierr); 66 | ierr = VecMin(yi,PETSC_NULL,&particle.ymin);CHKERRQ(ierr); 67 | ierr = VecMax(yi,PETSC_NULL,&particle.ymax);CHKERRQ(ierr); 68 | ierr = VecMin(zi,PETSC_NULL,&particle.zmin);CHKERRQ(ierr); 69 | ierr = VecMax(zi,PETSC_NULL,&particle.zmax);CHKERRQ(ierr); 70 | 71 | /* 72 | cluster parameters 73 | */ 74 | cluster.nsigma_box = nsigma_box; 75 | cluster.sigma_buffer = sigma_buffer; 76 | cluster.sigma_trunc = sigma_trunc; 77 | 78 | /* 79 | calculate problem size 80 | */ 81 | ierr = VecGetSize(xi,&particle.ni);CHKERRQ(ierr); 82 | ierr = VecGetSize(xi,&particle.nj);CHKERRQ(ierr); 83 | ierr = VecGetOwnershipRange(xi,&particle.ista,&particle.iend);CHKERRQ(ierr); 84 | ierr = VecGetOwnershipRange(xi,&particle.jsta,&particle.jend);CHKERRQ(ierr); 85 | particle.nilocal = particle.iend-particle.ista; 86 | particle.njlocal = particle.jend-particle.jsta; 87 | 88 | ierr = PetscLogEventEnd(ievent[0],0,0,0,0);CHKERRQ(ierr); 89 | ierr = PetscLogEventBegin(ievent[1],0,0,0,0);CHKERRQ(ierr); 90 | 91 | /* 92 | generate clusters 93 | */ 94 | ierr = VecGetArray(xi,&particle.xil);CHKERRQ(ierr); 95 | ierr = VecGetArray(yi,&particle.yil);CHKERRQ(ierr); 96 | ierr = VecGetArray(zi,&particle.zil);CHKERRQ(ierr); 97 | ierr = VecGetArray(xi,&particle.xjl);CHKERRQ(ierr); 98 | ierr = VecGetArray(yi,&particle.yjl);CHKERRQ(ierr); 99 | ierr = VecGetArray(zi,&particle.zjl);CHKERRQ(ierr); 100 | 101 | Get_cluster clusters; 102 | clusters.get_cluster(&particle,&cluster); 103 | 104 | ierr = VecRestoreArray(xi,&particle.xil);CHKERRQ(ierr); 105 | ierr = VecRestoreArray(yi,&particle.yil);CHKERRQ(ierr); 106 | ierr = VecRestoreArray(zi,&particle.zil);CHKERRQ(ierr); 107 | ierr = VecRestoreArray(xi,&particle.xjl);CHKERRQ(ierr); 108 | ierr = VecRestoreArray(yi,&particle.yjl);CHKERRQ(ierr); 109 | ierr = VecRestoreArray(zi,&particle.zjl);CHKERRQ(ierr); 110 | isort = new int [particle.nilocal]; 111 | 112 | ierr = PetscLogEventEnd(ievent[1],0,0,0,0);CHKERRQ(ierr); 113 | ierr = PetscLogEventBegin(ievent[2],0,0,0,0);CHKERRQ(ierr); 114 | 115 | /* 116 | generate IS 117 | */ 118 | ierr = ISCreateStride(PETSC_COMM_WORLD,particle.nilocal,particle.ista,1,&isx);CHKERRQ(ierr); 119 | ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); 120 | ierr = VecCreate(PETSC_COMM_WORLD,&particle.i);CHKERRQ(ierr); 121 | ierr = VecSetSizes(particle.i,particle.nilocal,PETSC_DETERMINE);CHKERRQ(ierr); 122 | ierr = VecSetFromOptions(particle.i);CHKERRQ(ierr); 123 | ierr = VecScatterCreate(particle.ii,isx,particle.i,isy,&ctx);CHKERRQ(ierr); 124 | ierr = VecScatterBegin(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 125 | ierr = VecScatterEnd(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 126 | ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr); 127 | ierr = ISDestroy(&isx);CHKERRQ(ierr); 128 | ierr = ISDestroy(&isy);CHKERRQ(ierr); 129 | ierr = VecAssemblyBegin(particle.i);CHKERRQ(ierr); 130 | ierr = VecAssemblyEnd(particle.i);CHKERRQ(ierr); 131 | ierr = VecGetArray(particle.i,&particle.il);CHKERRQ(ierr); 132 | for(i=0; i 2 | #include 3 | #include 4 | 5 | #include "par.h" 6 | #include "get_cluster.h" 7 | #include "get_buffer.h" 8 | #include "get_trunc.h" 9 | #include "get_vorticity.h" 10 | 11 | /** RBF gaussian interpolation. 12 | * 13 | * Interpolation from source points (xi,yi,wi) for the values (gj) at the evaluation 14 | * points (xj,yj). 15 | * 16 | * Parameters 17 | * xi, yi: Coordinates of the evaluation points. 18 | * wi: Variable for storing the evaluation. 19 | * xj, yj: Coordinates of the source points. 20 | * gj: Weight for the source points. 21 | * sigma: Parameter of the gaussian. 22 | * nsigma_box: Size of inner box, measured in sigma. 23 | * sigma_buffer: Size of the buffer, measured in sigma. 24 | * sigma_trunc: Truncation distance for the gaussians, meassured in sigma. 25 | */ 26 | PetscErrorCode vorticity_evaluation(Vec xi, Vec yi, Vec zi, Vec wi, Vec xj, Vec yj, Vec zj, Vec gj, 27 | double sigma, int nsigma_box, int sigma_buffer, int sigma_trunc) 28 | { 29 | int i,*isort,*jsort,ievent[10]; 30 | double ximin,ximax,yimin,yimax,zimin,zimax,xjmin,xjmax,yjmin,yjmax,zjmin,zjmax; 31 | std::ofstream fid0,fid1; 32 | PARTICLE particle; 33 | CLUSTER cluster; 34 | MPI2 mpi; 35 | BOTH both; 36 | both.p = &particle; 37 | both.c = &cluster; 38 | 39 | PetscErrorCode ierr; 40 | IS isx,isy,jsx,jsy; 41 | VecScatter ctx; 42 | 43 | ierr = PetscLogEventRegister("InitVec",0,&ievent[0]);CHKERRQ(ierr); 44 | ierr = PetscLogEventRegister("InitCluster",0,&ievent[1]);CHKERRQ(ierr); 45 | ierr = PetscLogEventRegister("InitIS",0,&ievent[2]);CHKERRQ(ierr); 46 | ierr = PetscLogEventRegister("InitGhost",0,&ievent[3]);CHKERRQ(ierr); 47 | ierr = PetscLogEventRegister("InitRHS",0,&ievent[4]);CHKERRQ(ierr); 48 | ierr = PetscLogEventRegister("Post Processing",0,&ievent[5]);CHKERRQ(ierr); 49 | 50 | ierr = PetscLogEventBegin(ievent[0],0,0,0,0);CHKERRQ(ierr); 51 | ierr = MPI_Comm_size(PETSC_COMM_WORLD,&mpi.nprocs);CHKERRQ(ierr); 52 | ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&mpi.myrank);CHKERRQ(ierr); 53 | cluster.file = 0; 54 | 55 | /* 56 | particle parameters 57 | */ 58 | particle.sigma = sigma; 59 | ierr = VecMin(xi,PETSC_NULL,&ximin);CHKERRQ(ierr); 60 | ierr = VecMax(xi,PETSC_NULL,&ximax);CHKERRQ(ierr); 61 | ierr = VecMin(yi,PETSC_NULL,&yimin);CHKERRQ(ierr); 62 | ierr = VecMax(yi,PETSC_NULL,&yimax);CHKERRQ(ierr); 63 | ierr = VecMin(zi,PETSC_NULL,&zimin);CHKERRQ(ierr); 64 | ierr = VecMax(zi,PETSC_NULL,&zimax);CHKERRQ(ierr); 65 | ierr = VecMin(xj,PETSC_NULL,&xjmin);CHKERRQ(ierr); 66 | ierr = VecMax(xj,PETSC_NULL,&xjmax);CHKERRQ(ierr); 67 | ierr = VecMin(yj,PETSC_NULL,&yjmin);CHKERRQ(ierr); 68 | ierr = VecMax(yj,PETSC_NULL,&yjmax);CHKERRQ(ierr); 69 | ierr = VecMin(zj,PETSC_NULL,&zjmin);CHKERRQ(ierr); 70 | ierr = VecMax(zj,PETSC_NULL,&zjmax);CHKERRQ(ierr); 71 | particle.xmin = std::min(ximin,xjmin); 72 | particle.xmax = std::max(ximax,xjmax); 73 | particle.ymin = std::min(yimin,yjmin); 74 | particle.ymax = std::max(yimax,yjmax); 75 | particle.zmin = std::min(zimin,zjmin); 76 | particle.zmax = std::max(zimax,zjmax); 77 | 78 | /* 79 | cluster parameters 80 | */ 81 | cluster.nsigma_box = nsigma_box; 82 | cluster.sigma_buffer = sigma_buffer; 83 | cluster.sigma_trunc = sigma_trunc; 84 | 85 | /* 86 | calculate problem size 87 | */ 88 | ierr = VecGetSize(xi,&particle.ni);CHKERRQ(ierr); 89 | ierr = VecGetSize(xj,&particle.nj);CHKERRQ(ierr); 90 | ierr = VecGetOwnershipRange(xi,&particle.ista,&particle.iend);CHKERRQ(ierr); 91 | ierr = VecGetOwnershipRange(xj,&particle.jsta,&particle.jend);CHKERRQ(ierr); 92 | particle.nilocal = particle.iend-particle.ista; 93 | particle.njlocal = particle.jend-particle.jsta; 94 | 95 | ierr = PetscLogEventEnd(ievent[0],0,0,0,0);CHKERRQ(ierr); 96 | ierr = PetscLogEventBegin(ievent[1],0,0,0,0);CHKERRQ(ierr); 97 | 98 | /* 99 | generate clusters 100 | */ 101 | ierr = VecGetArray(xi,&particle.xil);CHKERRQ(ierr); 102 | ierr = VecGetArray(yi,&particle.yil);CHKERRQ(ierr); 103 | ierr = VecGetArray(zi,&particle.zil);CHKERRQ(ierr); 104 | ierr = VecGetArray(xj,&particle.xjl);CHKERRQ(ierr); 105 | ierr = VecGetArray(yj,&particle.yjl);CHKERRQ(ierr); 106 | ierr = VecGetArray(zj,&particle.zjl);CHKERRQ(ierr); 107 | 108 | Get_cluster clusters; 109 | clusters.get_cluster(&particle,&cluster); 110 | 111 | ierr = VecRestoreArray(xi,&particle.xil);CHKERRQ(ierr); 112 | ierr = VecRestoreArray(yi,&particle.yil);CHKERRQ(ierr); 113 | ierr = VecRestoreArray(zi,&particle.zil);CHKERRQ(ierr); 114 | ierr = VecRestoreArray(xj,&particle.xjl);CHKERRQ(ierr); 115 | ierr = VecRestoreArray(yj,&particle.yjl);CHKERRQ(ierr); 116 | ierr = VecRestoreArray(zj,&particle.zjl);CHKERRQ(ierr); 117 | isort = new int [particle.nilocal]; 118 | jsort = new int [particle.njlocal]; 119 | 120 | ierr = PetscLogEventEnd(ievent[1],0,0,0,0);CHKERRQ(ierr); 121 | ierr = PetscLogEventBegin(ievent[2],0,0,0,0);CHKERRQ(ierr); 122 | 123 | /* 124 | generate IS 125 | */ 126 | ierr = ISCreateStride(PETSC_COMM_WORLD,particle.nilocal,particle.ista,1,&isx);CHKERRQ(ierr); 127 | ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); 128 | ierr = VecCreate(PETSC_COMM_WORLD,&particle.i);CHKERRQ(ierr); 129 | ierr = VecSetSizes(particle.i,particle.nilocal,PETSC_DETERMINE);CHKERRQ(ierr); 130 | ierr = VecSetFromOptions(particle.i);CHKERRQ(ierr); 131 | ierr = VecScatterCreate(particle.ii,isx,particle.i,isy,&ctx);CHKERRQ(ierr); 132 | ierr = VecScatterBegin(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 133 | ierr = VecScatterEnd(ctx,particle.ii,particle.i,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 134 | ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr); 135 | ierr = ISDestroy(&isx);CHKERRQ(ierr); 136 | ierr = ISDestroy(&isy);CHKERRQ(ierr); 137 | ierr = VecAssemblyBegin(particle.i);CHKERRQ(ierr); 138 | ierr = VecAssemblyEnd(particle.i);CHKERRQ(ierr); 139 | ierr = VecGetArray(particle.i,&particle.il);CHKERRQ(ierr); 140 | for(i=0; i