├── IO.C
├── IO.h
├── Makefile
├── README.md
├── advection.C
├── advection.h
├── analysis.m
├── assemble.C
├── assemble.h
├── jacobi.C
├── jacobi.h
├── main.C
├── msort.C
├── msort.h
├── pressure.C
├── pressure.h
├── scaling.m
├── utils.C
├── utils.h
├── v_cycle.C
├── v_cycle.h
├── viscosity.C
└── viscosity.h


/IO.C:
--------------------------------------------------------------------------------
  1 | #include "IO.h"
  2 | 
  3 | // write out the sparse matrix
  4 | int write_matrix(cuint P,
  5 | 				 cuint Q,
  6 | 				 double** U,
  7 | 				 char* file_name)
  8 | {
  9 | 	ofstream file_out;
 10 | 	file_out.open (file_name);
 11 | 
 12 | 	if(!file_out.is_open()){
 13 | 		return 1;
 14 | 	}
 15 | 
 16 | 	for(int p=0; p<P; p++){
 17 | 		for(int q=0; q<Q; q++){
 18 | 			file_out<<U[p][q]<<" ";
 19 | 		}
 20 | 		file_out<<endl;
 21 | 	}
 22 | 		
 23 | 	file_out.close();
 24 | 	
 25 | 	return 0;
 26 | }
 27 | 
 28 | 
 29 | // write out the sparse matrix
 30 | int write_vector( cuint P,
 31 | 				  double* F,
 32 | 				  char* file_name)
 33 | {
 34 | 	ofstream file_out;
 35 | 	file_out.open (file_name);
 36 | 
 37 | 	if(!file_out.is_open()){
 38 | 		return 1;
 39 | 	}
 40 | 
 41 | 	for(int p=0; p<P; p++){
 42 | 		file_out<<F[p]<<endl;
 43 | 	}
 44 | 		
 45 | 	file_out.close();
 46 | 	
 47 | 	return 0;
 48 | }
 49 | 
 50 | // write out the results
 51 | int write_results(  double* U,
 52 | 					double* V,
 53 | 					double* W,
 54 | 					double* P,
 55 | 					cuint n_dof,
 56 | 					cuint nx,
 57 | 					cuint ny,
 58 | 					cuint nz,
 59 | 					cdouble xmin,
 60 | 					cdouble ymin,
 61 | 					cdouble zmin,
 62 | 					cdouble hx,
 63 | 					cdouble hy,
 64 | 					cdouble hz,
 65 | 					cuint ts,
 66 | 					cdouble bcs[][6]
 67 | 					)
 68 | {
 69 | 	
 70 | 	// build matrix with boundary conditions
 71 | 	double* Ue = new double[(nx+1)*(ny+2)*(nz+2)];
 72 | 	double* Ve = new double[(nx+2)*(ny+1)*(nz+2)];
 73 | 	double* We = new double[(nx+2)*(ny+2)*(nz+1)];
 74 | 	
 75 | 	grid_matrix(U, Ue, nx-1, ny, nz, nx+1, ny+2, nz+2, X_DIR, bcs[0]);
 76 | 	grid_matrix(V, Ve, nx, ny-1, nz, nx+2, ny+1, nz+2, Y_DIR, bcs[1]);
 77 | 	grid_matrix(W, We, nx, ny, nz-1, nx+2, ny+2, nz+1, Z_DIR, bcs[2]);
 78 | 
 79 | 	// get U, V, W defined at cell centers
 80 | 	double* Uc = new double[(nx)*(ny+2)*(nz+2)];
 81 | 	double* Vc = new double[(nx+2)*(ny)*(nz+2)];
 82 | 	double* Wc = new double[(nx+2)*(ny+2)*(nz)];
 83 | 	// average into cell centers
 84 | 	average(Ue, Uc, nx+1, ny+2, nz+2, nx, ny+2, nz+2, X_DIR);
 85 | 	average(Ve, Vc, nx+2, ny+1, nz+2, nx+2, ny, nz+2, Y_DIR);
 86 | 	average(We, Wc, nx+2, ny+2, nz+1, nx+2, ny+2, nz, Z_DIR);
 87 | 	
 88 | 	char file_name[100];
 89 | 	// write out the results in vtk format
 90 | 	ofstream file_out;
 91 | 	sprintf(file_name, "results_%i.vtk", ts);
 92 | 	file_out.open (file_name);
 93 | 	if(!file_out.is_open()){
 94 | 		return 1;
 95 | 	}
 96 | 	
 97 | 	// header
 98 | 	file_out<<"# vtk DataFile Version 3.0"<<endl;
 99 | 	file_out<<"3d incompressible NS problem"<<endl
100 | 			<<"ASCII"<<endl
101 | 			<<"DATASET STRUCTURED_GRID"<<endl
102 | 			<<"DIMENSIONS "<<nx<<" "<<ny<<" "<<nz<<endl
103 | 			<<"POINTS "<<n_dof<<" "<<"float"<<endl;
104 | 	
105 | 	// unsigned int i,j,k;
106 | 	// for(int n=0; n<n_dof; n++){
107 | 	// 	one_d_to_three_d( n, nx, ny, i, j, k);
108 | 	// 	file_out<<i*hx<<" "<<j*hy<<" "<<k*hz<<endl;
109 | 	// }
110 | 
111 | 	// base for the grid
112 | 	cdouble xbase=xmin+hx/2;
113 | 	cdouble ybase=ymin+hy/2;
114 | 	cdouble zbase=zmin+hz/2;
115 | 	
116 | 	for(int i=0; i<nx; i++){
117 | 		for(int j=0; j<ny; j++){
118 | 			for(int k=0; k<nz; k++){
119 | 				file_out<<xbase+i*hx<<" "<<ybase+j*hy<<" "<<zbase+k*hz<<endl;
120 | 			}
121 | 		}
122 | 	}
123 | 	
124 | 	file_out<<"POINT_DATA "<<n_dof<<endl;
125 | 	file_out<<"SCALARS P float 1"<<endl
126 | 			<<"LOOKUP_TABLE default"<<endl;
127 | 	// for(int n=0; n<n_dof; n++){
128 | 	// 	file_out<<P[n]<<endl;
129 | 	// }
130 | 
131 | 
132 | 	for(int i=0; i<nx; i++){
133 | 		for(int j=0; j<ny; j++){
134 | 			for(int k=0; k<nz; k++){
135 | 				uint t4;
136 | 				three_d_to_one_d(i,j,k, nx,ny, t4);
137 | 	
138 | 				file_out<<P[t4]<<endl;
139 | 			}
140 | 		}
141 | 	}
142 | 
143 | file_out<<"VECTORS velocity float"<<endl;
144 | 
145 |  for(int i=0; i<nx; i++){
146 | 	 for(int j=0; j<ny; j++){
147 | 		 for(int k=0; k<nz; k++){
148 | 			 uint t1, t2, t3;
149 | 			 three_d_to_one_d(i,j+1,k+1, nx, ny+2,  t1);
150 | 			 three_d_to_one_d(i+1,j,k+1, nx+2, ny,  t2);
151 | 			 three_d_to_one_d(i+1,j+1,k, nx+2, ny+2,t3);
152 | 
153 | 			 file_out<<Uc[t1]<<" "
154 | 					 <<Vc[t2]<<" "
155 | 					 <<Wc[t3]<<endl;
156 | 		 }
157 | 	 }
158 |  }
159 | 
160 | 
161 |  // for(int n=0; n<n_dof; n++){
162 |  // 		// uint i,j,k;
163 |  // 		// one_d_to_three_d(n, nx, ny, i,j,k);
164 |  // 		file_out<<Uc[n]<<" "
165 |  // 				<<Vc[n]<<" "
166 |  // 				<<Wc[n]<<endl;
167 |  // 	}
168 | 	
169 | 	file_out.close();
170 | 
171 | 	// output files for matlab
172 | 	// point coordinates and scalar result
173 | 	sprintf(file_name, "results_%i.dat", ts);
174 | 	file_out.open(file_name);
175 | 
176 | 	for(int i=0; i<nx; i++){
177 | 		for(int j=0; j<ny; j++){
178 | 			for(int k=0; k<nz; k++){
179 | 				uint t1, t2, t3, t4;
180 | 				three_d_to_one_d(i,j+1,k+1, nx, ny+2,  t1);
181 | 				three_d_to_one_d(i+1,j,k+1, nx+2, ny,  t2);
182 | 				three_d_to_one_d(i+1,j+1,k, nx+2, ny+2,t3);
183 | 				three_d_to_one_d(i,j,k, nx,ny, t4);
184 | 				
185 | 				file_out<<xbase+i*hx<<" "<<ybase+j*hy<<" "<<zbase+k*hz<<" "
186 | 						<<P[t4]<<" "
187 | 						<<Uc[t1]<<" "
188 | 						<<Vc[t2]<<" "
189 | 						<<Wc[t3]<<endl;
190 | 			}
191 | 		}
192 | 	}
193 | 	
194 | 	// for(int n=0; n<n_dof; n++){
195 | 	// 	one_d_to_three_d( n, nx, ny, i, j, k);
196 | 	// 	file_out<<i*hx<<" "<<j*hy<<" "<<k*hz<<" "
197 | 	// 			<<P[n]<<" "
198 | 	// 			<<Uc[n]<<" "
199 | 	// 			<<Vc[n]<<" "
200 | 	// 			<<Wc[n]<<endl;
201 | 	// }
202 | 	file_out.close();
203 | 
204 | 	return 0;
205 | }
206 | 
207 | 
208 | // write out 3d data for debuggin purpose
209 | int write_3d_data( double* U,
210 | 				   cuint nx, cuint ny, cuint nz,
211 | 				   char* file_name )
212 | {
213 | 	// output files for matlab
214 | 	ofstream file_out;
215 | 	file_out.open(file_name);
216 | 
217 | 	if(!file_out.is_open()){
218 | 		cout<<"failed to open file"<<endl;
219 | 		return 1;
220 | 	}
221 | 	
222 | 	// 1d index
223 | 	uint t;
224 | 
225 | 	// set up U
226 | 	for(int i=0; i<nx; i++){
227 | 		for(int j=0; j<ny; j++){
228 | 			for(int k=0; k<nz; k++){
229 | 				three_d_to_one_d(i,j,k, nx, ny, t);
230 | 				file_out<<i<<" "<<j<<" "<<k<<" "<<U[t]<<endl;
231 | 			}
232 | 		}
233 | 	}
234 | 
235 | 	file_out.close();
236 | 		
237 | 	return 0;
238 | }
239 | 


--------------------------------------------------------------------------------
/IO.h:
--------------------------------------------------------------------------------
 1 | // assemble matrix and vector
 2 | #ifndef IO_H
 3 | #define IO_H
 4 | 
 5 | #include "utils.h"
 6 | #include "advection.h"
 7 | 
 8 | using namespace std;
 9 | 
10 | 
11 | // write out the sparse matrix
12 | int write_matrix(cuint P,
13 | 				 cuint Q,
14 | 				 double** U,
15 | 				 char* file_name);
16 | 
17 | // write out the sparse matrix
18 | int write_vector( cuint P,
19 | 				  double* F,
20 | 				  char* file_name);
21 | 
22 | // write out the results
23 | int write_results(  double* U,
24 | 					double* V,
25 | 					double* W,
26 | 					double* P,
27 | 					cuint n_dof,
28 | 					cuint nx,
29 | 					cuint ny,
30 | 					cuint nz,
31 | 					cdouble xmin,
32 | 					cdouble ymin,
33 | 					cdouble zmin,
34 | 					cdouble hx,
35 | 					cdouble hy,
36 | 					cdouble hz,
37 | 					cuint ts,
38 | 					cdouble bcs[][6]
39 | 					);
40 | 
41 | // write out 3d data for debuggin purpose
42 | int write_3d_data( double* U,
43 | 				   cuint nx, cuint ny, cuint nz,
44 | 				   char* file_name );
45 | #endif //IO_H
46 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile: CSE391 Final Project
 2 | # Geometric Multigrid
 3 | 
 4 | CC=g++
 5 | CFLAGS=-fopenmp  -std=c++11
 6 | LDFLAGS=-fopenmp  -std=c++11
 7 | SOURCES=main.C assemble.C  IO.C  jacobi.C utils.C  v_cycle.C advection.C viscosity.C msort.C pressure.C
 8 | OBJ=$(SOURCES:.C=.o)
 9 | EXE=multigrid
10 | 
11 | all: $(SOURCES) $(EXE)
12 | 
13 | $(EXE): $(OBJ) 
14 | 	$(CC) $(LDFLAGS) $(OBJ) -o $@
15 | 
16 | .C.o:
17 | 	$(CC) $(CFLAGS) -c $< -o $@
18 | 
19 | jacobi.o:jacobi.h
20 | main.o:jacobi.h utils.h IO.h v_cycle.h advection.h viscosity.h
21 | viscosity.o: viscosity.h
22 | pressure.o: pressure.h
23 | IO.o: IO.h advection.h
24 | v_cycle.o: v_cycle.h pressure.h
25 | pressure.o: pressure.h v_cycle.h
26 | 
27 | run:
28 | 	@ export OMP_NUM_THREADS=1
29 | 	@./multigrid 1 1 12 12 12
30 | 
31 | clean:
32 | 	@ rm *.o multigrid
33 | 
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 3D Navier Stokes Equation Solver                                                                                                                                        
 2 | ================================
 3 | 
 4 | Solving 3D incompressible Navier Stokes equation using finite difference method with uniform grid in parallel. The incompressibility is implemented using pressure-corr\
 5 | ection scheme and linear (Poisson) solver is implemented using multigrid v-cycle.
 6 | 
 7 | The code is pretty much based on this [MATLAB implementation](http://math.mit.edu/cse/codes/mit18086_navierstokes.m). The documentation is [here](http://math.mit.edu/c\
 8 | se/codes/mit18086_navierstokes.pdf).
 9 | 
10 | Problem Statement
11 | -------------------------
12 | 
13 | Incompressible Navier Stokes Equation:
14 | 
15 | ![momentum](http://upload.wikimedia.org/math/4/8/c/48c88ec1a44dce97a23ceff09ee668b2.png "momentum")
16 | 
17 | Incompressibility condition:
18 | 
19 | ![mass](http://upload.wikimedia.org/math/1/6/9/169892c54316eb6d350f5118bff5c213.png "mass")
20 | 
21 | 3D cubic domain [0,1]^3. 
22 | 
23 | Dicirhlet boundary conditions for veocities and Neumann boundary condition for pressure are implemented. 
24 | The velocity on the boundary can be specified in double bcs[3][6] array (first dimension specifies x- y- z- velocity,
25 | second dimsension specifies the face of the cube).
26 | 
27 | Implementation
28 | -------------------------
29 | 
30 | 1. The cubic domain is discretized by unform staggered grid.
31 | 2. The partial differential equations are discretized using finite difference method.
32 | 3. Parallelization is implemented by OpenMP.
33 | 4. Pressure-correction scheme is used to enforce incompressibility.
34 | 
35 | Usage
36 | -------------------------
37 | 
38 | ./multigrid [# of threads] [max level of v-cycle] [x-grid size] [y-grid size] [z-grid size]
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/advection.C:
--------------------------------------------------------------------------------
  1 | #include "advection.h"
  2 | #include "IO.h"
  3 | 
  4 | // treat nonlinear advection terms
  5 | void advection( double* U,
  6 | 				double* V,
  7 | 				double* W,
  8 | 				cuint nx, cuint ny, cuint nz,
  9 | 				cdouble hx, cdouble hy, cdouble hz,
 10 | 				cdouble dt,
 11 | 				cdouble bcs[][6]
 12 | 				)
 13 | {	
 14 | 	// upwinding parameter
 15 | 	// cdouble gamma =
 16 | 	// 	min( 1.2*dt*max(max(max_3d_array(U), max_3d_array(V)), max_3d_array(W)),
 17 | 	// 		 1.0);
 18 | 
 19 | 	// build matrix with boundary conditions
 20 | 	double* Ue = new double[(nx+1)*(ny+2)*(nz+2)];
 21 | 	double* Ve = new double[(nx+2)*(ny+1)*(nz+2)];
 22 | 	double* We = new double[(nx+2)*(ny+2)*(nz+1)];
 23 | 	grid_matrix(U, Ue, nx-1, ny, nz, nx+1, ny+2, nz+2, X_DIR, bcs[0]);
 24 | 	grid_matrix(V, Ve, nx, ny-1, nz, nx+2, ny+1, nz+2, Y_DIR, bcs[1]);
 25 | 	grid_matrix(W, We, nx, ny, nz-1, nx+2, ny+2, nz+1, Z_DIR, bcs[2]);
 26 | 
 27 | 	// char Ufile [] = "U.dat";
 28 | 	// write_3d_data(Ue, nx+1, ny+2, nz+2, Ufile);
 29 | 	// char Vfile [] = "V.dat";
 30 | 	// write_3d_data(Ve, nx+2, ny+1, nz+2, Vfile);
 31 | 	// char Wfile [] = "W.dat";
 32 | 	// write_3d_data(We, nx+2, ny+2, nz+1, Wfile);
 33 | 	
 34 | 	// calculate UV, UW, VW defined at mid-edges
 35 | 	double* UV = new double[(nx+1)*(ny+1)*(nz+2)];
 36 | 	double* UW = new double[(nx+1)*(ny+2)*(nz+1)];
 37 | 	double* VW = new double[(nx+2)*(ny+1)*(nz+1)];
 38 | 	calculate_edge_values(Ue, Ve, We, UV, UW, VW, nx, ny, nz);
 39 | 		
 40 | 	// calculate UV_y, UW_z, VU_x, VW_z, WU_x, WV_y
 41 | 	// defined at corresponding points
 42 | 	double*  UV_y = new double[(nx+1)*(ny)*(nz+2)];
 43 | 	double*  UW_z = new double[(nx+1)*(ny+2)*(nz)];
 44 | 	double*  VU_x = new double[(nx)*(ny+1)*(nz+2)];
 45 | 	double*  VW_z = new double[(nx+2)*(ny+1)*(nz)];
 46 | 	double*  WU_x = new double[(nx)*(ny+2)*(nz+1)];
 47 | 	double*  WV_y = new double[(nx+2)*(ny)*(nz+1)];
 48 | 	staggered_first_difference( UV, UV_y, nx+1, ny+1, nz+2,
 49 | 								nx+1, ny, nz+2, hy, Y_DIR );
 50 | 	staggered_first_difference( UW, UW_z, nx+1, ny+2, nz+1,
 51 | 								nx+1, ny+2, nz, hz, Z_DIR );
 52 | 	staggered_first_difference( UV, VU_x, nx+1, ny+1, nz+2,
 53 | 								nx, ny+1, nz+2, hx, X_DIR );
 54 | 	staggered_first_difference( VW, VW_z, nx+2, ny+1, nz+1,
 55 | 								nx+2, ny+1, nz, hz, Z_DIR );
 56 | 	staggered_first_difference( UW, WU_x, nx+1, ny+2, nz+1,
 57 | 								nx, ny+2, nz+1, hx, X_DIR );
 58 | 	staggered_first_difference( VW, WV_y, nx+2, ny+1, nz+1,
 59 | 								nx+2, ny, nz+1, hy, Y_DIR );
 60 | 
 61 | 	// get U, V, W defined at cell centers
 62 | 	double* U2c = new double[(nx)*(ny+2)*(nz+2)];
 63 | 	double* V2c = new double[(nx+2)*(ny)*(nz+2)];
 64 | 	double* W2c = new double[(nx+2)*(ny+2)*(nz)];
 65 | 	// average into cell centers
 66 | 	average(Ue, U2c, nx+1, ny+2, nz+2, nx, ny+2, nz+2, X2_DIR);
 67 | 	average(Ve, V2c, nx+2, ny+1, nz+2, nx+2, ny, nz+2, Y2_DIR);
 68 | 	average(We, W2c, nx+2, ny+2, nz+1, nx+2, ny+2, nz, Z2_DIR);
 69 | 
 70 | 	// get U2, V2, W2 defined at corresponding points
 71 | 	double* U2_x = new double[(nx-1)*(ny+2)*(nz+2)];
 72 | 	double* V2_y = new double[(nx+2)*(ny-1)*(nz+2)];
 73 | 	double* W2_z = new double[(nx+2)*(ny+2)*(nz-1)];
 74 | 	staggered_first_difference( U2c, U2_x, nx, ny+2, nz+2,
 75 | 								nx-1, ny+2, nz+2, hx, X_DIR );
 76 | 	staggered_first_difference( V2c, V2_y, nx+2, ny, nz+2,
 77 | 								nx+2, ny-1, nz+2, hy, Y_DIR );
 78 | 	staggered_first_difference( W2c, W2_z, nx+2, ny+2, nz,
 79 | 								nx+2, ny+2, nz-1, hz, Z_DIR );
 80 | 	
 81 | 	// consolidate advection terms
 82 | 	consolidate_advection( U, V, W,
 83 | 						   U2_x, V2_y, W2_z,
 84 | 						   UV_y, UW_z, VU_x, VW_z, WU_x, WV_y,
 85 | 						   nx, ny, nz,
 86 | 						   dt );
 87 | 
 88 | 	// cleanup
 89 | 	delete[] Ue, Ve, We;
 90 | 	delete[] UV, UW, VW;
 91 | 	delete[] UV_y, UW_z, VU_x, VW_z, WU_x, WV_y;
 92 | 	delete[] U2c, V2c, W2c;
 93 | 	delete[] U2_x, V2_y, W2_z;
 94 | 	
 95 | 	return;
 96 | }
 97 | 
 98 | // generate grid matrix
 99 | // dir: direction of velocity: 0:x 1:y 2:z
100 | void grid_matrix( double* U,
101 | 				  double* Ue,
102 | 				  cuint nx, cuint ny, cuint nz,
103 | 				  cuint nxe, cuint nye, cuint nze,
104 | 				  cuint dir,
105 | 				  cdouble* bc )
106 | {
107 | 	// 1d index
108 | 	uint t, t0, t1;
109 | 
110 | 	// insert values
111 | #pragma omp parallel for private(t, t0, t1) shared(Ue, U) num_threads(nt)
112 | 	for(int i=0; i<nxe; i++){
113 | 		for(int j=0; j<nye; j++){
114 | 			for(int k=0; k<nze; k++){
115 | 				three_d_to_one_d(i,j,k, nxe,nye, t);					
116 | 				if(i==0 || i==nxe-1 || j==0 || j==nye-1
117 | 				   || k==0 || k==nze-1)
118 | 					Ue[t] = 0.0;
119 | 				else{
120 | 					three_d_to_one_d(i-1,j-1,k-1, nx, ny, t1);
121 | 					Ue[t] = U[t1];
122 | 				}
123 | 			}
124 | 		}
125 | 	}
126 | 
127 | 	// cout<<"U"<<endl;
128 | 	// if(dir==Y_DIR)
129 | 	// for(int i=0; i<(nx)*(ny)*(nz); i++)
130 | 	// 	cout<<U[i]<<endl;
131 | 		
132 | 	// for cofficients u
133 | 	if( dir==X_DIR){
134 | 		
135 | #pragma omp parallel for private(t) shared(Ue) num_threads(nt)
136 | 		for(int j=0; j<nye; j++){
137 | 			for(int k=0; k<nze; k++){
138 | 				three_d_to_one_d(0,j,k, nxe,nye, t);
139 | 				Ue[t] = bc[0];
140 | 				three_d_to_one_d(nxe-1,j,k, nxe,nye, t);
141 | 				Ue[t] = bc[1];
142 | 			}
143 | 		}
144 | 		
145 | 		// y0, yl
146 | #pragma omp parallel for private(t, t0, t1) shared(Ue) num_threads(nt)
147 | 		for(int i=1; i<nxe-1; i++){
148 | 			for(int k=1; k<nze-1; k++){
149 | 				three_d_to_one_d(i,0,k, nxe,nye, t0);
150 | 				three_d_to_one_d(i,1,k, nxe,nye, t1);
151 | 				Ue[t0] = 2*bc[2]-Ue[t1];
152 | 				
153 | 				three_d_to_one_d(i,nye-1,k, nxe,nye, t0);
154 | 				three_d_to_one_d(i,nye-2,k, nxe,nye, t1);
155 | 				Ue[t0] = 2*bc[3]-Ue[t1];
156 | 			}
157 | 		}
158 | 
159 | 		// z0, zl
160 | #pragma omp parallel for private(t, t0, t1) shared(Ue) num_threads(nt)
161 | 		for(int i=0; i<nxe; i++){
162 | 			for(int j=0; j<nye; j++){
163 | 				three_d_to_one_d(i,j,0, nxe,nye, t0);
164 | 				three_d_to_one_d(i,j,1, nxe,nye, t1);
165 | 				Ue[t0] = 2*bc[4]-Ue[t1];
166 | 
167 | 				three_d_to_one_d(i,j,nze-1, nxe,nye, t0);
168 | 				three_d_to_one_d(i,j,nze-2, nxe,nye, t1);
169 | 				Ue[t0] = 2*bc[5]-Ue[t1];
170 | 			}
171 | 		}
172 | 		
173 | 	}
174 | 
175 | 	// for cofficients v
176 | 	else if( dir==Y_DIR ){
177 | 		
178 | 		// account for boundary conditions
179 | 		// x0, xl
180 | #pragma omp parallel for private(t, t0, t1) shared(Ue) num_threads(nt)
181 | 		for(int j=0; j<nye; j++){
182 | 			for(int k=0; k<nze; k++){
183 | 				three_d_to_one_d(0,j,k, nxe,nye, t0);
184 | 				three_d_to_one_d(1,j,k, nxe,nye, t1);
185 | 				Ue[t0] = 2*bc[0]-Ue[t1];
186 | 				
187 | 				three_d_to_one_d(nxe-1,j,k, nxe,nye, t0);
188 | 				three_d_to_one_d(nxe-2,j,k, nxe,nye, t1);
189 | 				Ue[t0] = 2*bc[0]-Ue[t1];
190 | 
191 | 			}
192 | 		}
193 | 
194 | 		// y0, yl
195 | #pragma omp parallel for private(t) shared(Ue) num_threads(nt)
196 | 		for(int i=0; i<nxe; i++){
197 | 			for(int k=0; k<nze; k++){
198 | 				three_d_to_one_d(i,0,k, nxe,nye, t);
199 | 				Ue[t] = bc[2];
200 | 				three_d_to_one_d(i,nye-1,k, nxe,nye, t);
201 | 				Ue[t] = bc[3];
202 | 
203 | 			}
204 | 		}
205 | 
206 | 		// z0, zl
207 | #pragma omp parallel for private(t, t0, t1) shared(Ue) num_threads(nt)
208 | 		for(int i=0; i<nxe; i++){
209 | 			for(int j=0; j<nye; j++){
210 | 				three_d_to_one_d(i,j,0, nxe,nye, t0);
211 | 				three_d_to_one_d(i,j,1, nxe,nye, t1);
212 | 				Ue[t0] = 2*bc[4]-Ue[t1];
213 | 		
214 | 				three_d_to_one_d(i,j,nze-1, nxe,nye, t0);
215 | 				three_d_to_one_d(i,j,nze-2, nxe,nye, t1);
216 | 				Ue[t0] = 2*bc[5]-Ue[t1];
217 | 		
218 | 			}
219 | 		}
220 | 		
221 | 	}
222 | 
223 | 	// for cofficients w
224 | 	else if( dir==Z_DIR ){
225 | 
226 | 		// account for boundary conditions
227 | 		// x0, xl
228 | #pragma omp parallel for private(t, t0, t1) shared(Ue) num_threads(nt)
229 | 		for(int j=0; j<nye; j++){
230 | 			for(int k=0; k<nze; k++){
231 | 				three_d_to_one_d(0,j,k, nxe,nye, t0);
232 | 				three_d_to_one_d(1,j,k, nxe,nye, t1);
233 | 				Ue[t0] = 2*bc[0]-Ue[t1];
234 | 
235 | 				three_d_to_one_d(nxe-1,j,k, nxe,nye, t0);
236 | 				three_d_to_one_d(nxe-2,j,k, nxe,nye, t1);
237 | 				Ue[t0] = 2*bc[1]-Ue[t1];
238 | 			}
239 | 		}
240 | 
241 | 		// y0, yl
242 | #pragma omp parallel for private(t, t0, t1) shared(Ue) num_threads(nt)
243 | 		for(int i=0; i<nxe; i++){
244 | 			for(int k=0; k<nze; k++){
245 | 				three_d_to_one_d(i,0,k, nxe,nye, t0);
246 | 				three_d_to_one_d(i,1,k, nxe,nye, t1);
247 | 				Ue[t0] = 2*bc[2]-Ue[t1];
248 | 				
249 | 				three_d_to_one_d(i,nye-1,k, nxe,nye, t0);
250 | 				three_d_to_one_d(i,nye-2,k, nxe,nye, t1);
251 | 				Ue[t0] = 2*bc[3]-Ue[t1];
252 | 			}
253 | 		}
254 | 
255 | 		// z0, zl
256 | #pragma omp parallel for private(t) shared(Ue) num_threads(nt)
257 | 		for(int i=0; i<nxe; i++){
258 | 			for(int j=0; j<nye; j++){
259 | 				three_d_to_one_d(i,j,0, nxe,nye, t);
260 | 				Ue[t] = bc[4];
261 | 				three_d_to_one_d(i,j,nze-1, nxe,nye, t);
262 | 				Ue[t] = bc[5];
263 | 			}
264 | 		}
265 | 	}
266 | 	
267 | 	return;	
268 | }
269 | 
270 | // average values in whatever way
271 | // dir: averaging direction:
272 | // 0: x-direction
273 | // 1: y-direction
274 | // 2: z-direction
275 | // 3: xy-direction
276 | // 4: xz-direction
277 | // 5: yz-direction
278 | // 6: x-direction and square
279 | // 7: y-direction and square
280 | // 8: z-direction and square
281 | void average( const double* Ue, // raw value
282 | 			  double* Ua, // averaged value
283 | 			  cuint nxe, cuint nye, cuint nze,
284 | 			  cuint nxa, cuint nya, cuint nza,
285 | 			  cuint dir )
286 | {
287 | 	uint ta, te1, te2;
288 | 
289 | 	// x-average
290 | 	if(dir==X_DIR){
291 | 		// interpolate values by averaging
292 | #pragma omp parallel for private(ta, te1, te2) shared(Ue, Ua) num_threads(nt)
293 | 		for(int i=0; i<(nxa); i++){
294 | 			for(int j=0; j<(nya); j++){
295 | 				for(int k=0; k<(nza); k++){
296 | 					three_d_to_one_d(i,j,k, nxa, nya, ta);
297 | 					three_d_to_one_d(i+1,j,k, nxe, nye, te1);
298 | 					three_d_to_one_d(i,j,k, nxe, nye, te2);
299 | 					Ua[ta] = (Ue[te1]+Ue[te2])/2;
300 | 				}
301 | 			}
302 | 		}
303 | 	}
304 | 
305 | 	// y-average
306 | 	else if(dir==Y_DIR){
307 | 		// interpolate values by averaging
308 | #pragma omp parallel for private(ta, te1, te2) shared(Ue, Ua) num_threads(nt)
309 | 		for(int i=0; i<(nxa); i++){
310 | 			for(int j=0; j<(nya); j++){
311 | 				for(int k=0; k<(nza); k++){
312 | 					three_d_to_one_d(i,j,k, nxa, nya, ta);
313 | 					three_d_to_one_d(i,j+1,k, nxe, nye, te1);
314 | 					three_d_to_one_d(i,j,k, nxe, nye, te2);
315 | 					Ua[ta] = (Ue[te1]+Ue[te2])/2;
316 | 				}
317 | 			}
318 | 		}
319 | 	}
320 | 
321 | 	// z-average
322 | 	else if(dir==Z_DIR){
323 | 		// interpolate values by averaging
324 | #pragma omp parallel for private(ta, te1, te2) shared(Ue, Ua) num_threads(nt)
325 | 		for(int i=0; i<(nxa); i++){
326 | 			for(int j=0; j<(nya); j++){
327 | 				for(int k=0; k<(nza); k++){
328 | 					three_d_to_one_d(i,j,k, nxa, nya, ta);
329 | 					three_d_to_one_d(i,j,k+1, nxe, nye, te1);
330 | 					three_d_to_one_d(i,j,k, nxe, nye, te2);
331 | 					Ua[ta] = (Ue[te1]+Ue[te2])/2;
332 | 				}
333 | 			}
334 | 		}
335 | 	}
336 | 
337 | 	// // xy-direction
338 | 	// else if(dir==3){
339 | 	// 	// interpolate values by averaging
340 | 	// 	for(int i=0; i<(nx-1); i++){
341 | 	// 		for(int j=0; j<(ny-1); j++){
342 | 	// 			for(int k=0; k<(nz); k++){
343 | 	// 				Ua[i][j][k] = (Ue[i][j][k]+Ue[i+1][j][k]
344 | 	// 							   + Ue[i][j+1][k]+Ue[i+1][j+1][k])/4;
345 | 	// 			}
346 | 	// 		}
347 | 	// 	}
348 | 	// }
349 | 	
350 | 	// // xz-direction
351 | 	// else if(dir==4){
352 | 	// 	// interpolate values by averaging
353 | 	// 	for(int i=0; i<(nx-1); i++){
354 | 	// 		for(int j=0; j<(ny); j++){
355 | 	// 			for(int k=0; k<(nz-1); k++){
356 | 	// 				Ua[i][j][k] = (Ue[i][j][k]+Ue[i+1][j][k]
357 | 	// 							   + Ue[i][j][k+1]+Ue[i+1][j][k+1])/4;
358 | 	// 			}
359 | 	// 		}
360 | 	// 	}
361 | 	// }
362 | 
363 | 	// // yz-direction
364 | 	// else if(dir==5){
365 | 	// 	// interpolate values by averaging
366 | 	// 	for(int i=0; i<(nx); i++){
367 | 	// 		for(int j=0; j<(ny-1); j++){
368 | 	// 			for(int k=0; k<(nz-1); k++){
369 | 	// 				Ua[i][j][k] = (Ue[i][j][k]+Ue[i][j+1][k]
370 | 	// 							   + Ue[i][j][k+1]+Ue[i][j+1][k+1])/4;
371 | 	// 			}
372 | 	// 		}
373 | 	// 	}
374 | 	// }
375 | 
376 | 	// x-direction and square
377 | 	else if(dir==X2_DIR){
378 | 		// interpolate values by averaging
379 | #pragma omp parallel for private(ta, te1, te2) shared(Ue, Ua) num_threads(nt)
380 | 		for(int i=0; i<(nxa); i++){
381 | 			for(int j=0; j<(nya); j++){
382 | 				for(int k=0; k<(nza); k++){
383 | 					three_d_to_one_d(i,j,k, nxa, nya, ta);
384 | 					three_d_to_one_d(i+1,j,k, nxe, nye, te1);
385 | 					three_d_to_one_d(i,j,k, nxe, nye, te2);
386 | 					
387 | 					Ua[ta] = pow((Ue[te1]+Ue[te2])/2,2);
388 | 				}
389 | 			}
390 | 		}
391 | 	}
392 | 
393 | 	// y-direction and square
394 | 	else if(dir==Y2_DIR){
395 | 		// interpolate values by averaging
396 | #pragma omp parallel for private(ta, te1, te2) shared(Ue, Ua) num_threads(nt)
397 | 		for(int i=0; i<(nxa); i++){
398 | 			for(int j=0; j<(nya); j++){
399 | 				for(int k=0; k<(nza); k++){
400 | 					three_d_to_one_d(i,j,k, nxa, nya, ta);
401 | 					three_d_to_one_d(i,j+1,k, nxe, nye, te1);
402 | 					three_d_to_one_d(i,j,k, nxe, nye, te2);
403 | 					
404 | 					Ua[ta] = pow((Ue[te1]+Ue[te2])/2,2);
405 | 				}
406 | 			}
407 | 		}
408 | 	}
409 | 
410 | 	// z-direction and square
411 | 	if(dir==Z2_DIR){
412 | 		// interpolate values by averaging
413 | #pragma omp parallel for private(ta, te1, te2) shared(Ue, Ua) num_threads(nt)
414 | 		for(int i=0; i<(nxa); i++){
415 | 			for(int j=0; j<(nya); j++){
416 | 				for(int k=0; k<(nza); k++){
417 | 					three_d_to_one_d(i,j,k, nxa, nya, ta);
418 | 					three_d_to_one_d(i,j,k+1, nxe, nye, te1);
419 | 					three_d_to_one_d(i,j,k, nxe, nye, te2);
420 | 					
421 | 					Ua[ta] = pow((Ue[te1]+Ue[te2])/2,2);
422 | 				}
423 | 			}
424 | 		}
425 | 	}
426 | 
427 | 	
428 | 	return;
429 | }
430 | 
431 | // get maximum of the abs values of 3d array
432 | double max_3d_array( const boost::multi_array<double, 3>& U )
433 | {
434 | 	boost::multi_array_types::size_type const* sizes = U.shape();
435 | 	cuint nx = sizes[0];
436 | 	cuint ny = sizes[1];
437 | 	cuint nz = sizes[2];
438 | 	
439 | 	double max_value = abs(U[0][0][0]);
440 | 
441 | 	for(int i=0; i<nx; i++){
442 | 		for(int j=0; j<ny; j++){
443 | 			for(int k=0; k<nz; k++){
444 | 				if(max_value < abs(U[i][j][k]))
445 | 					max_value = abs(U[i][j][k]);
446 | 			}
447 | 		}
448 | 	}
449 | 
450 | 	return max_value;
451 | }
452 | 
453 | // get 1d staggered difference
454 | // Ua is an averaged U value at the cell vertices
455 | // get the difference value at the center of cell
456 | void staggered_first_difference( const double* UV,
457 | 								 double* UV_x,
458 | 								 cuint nx, cuint ny, cuint nz,
459 | 								 cuint nx_x, cuint ny_x, cuint nz_x,
460 | 								 cdouble h,
461 | 								 cuint dir
462 | 								 )
463 | {
464 | 	uint t1, t2, t3;
465 | 	// difference in x-direction
466 | 	if( dir==X_DIR){
467 | #pragma omp parallel for private(t1, t2, t3) shared(UV_x, UV) num_threads(nt)
468 | 		for(int i=0; i<nx-1; i++){
469 | 			for(int j=0; j<ny; j++){
470 | 				for(int k=0; k<nz; k++){
471 | 					three_d_to_one_d(i,j,k, nx_x, ny_x, t1);
472 | 					three_d_to_one_d(i+1,j,k, nx, ny, t2);
473 | 					three_d_to_one_d(i,j,k, nx, ny, t3);
474 | 					
475 | 					UV_x[t1] = (UV[t2]-UV[t3])/h;
476 | 				}
477 | 			}
478 | 		}
479 | 	}
480 | 
481 | 	// difference in y-direction
482 | 	if( dir==Y_DIR ){
483 | #pragma omp parallel for private(t1, t2, t3) shared(UV_x, UV) num_threads(nt)
484 | 		for(int i=0; i<nx; i++){
485 | 			for(int j=0; j<ny-1; j++){
486 | 				for(int k=0; k<nz; k++){
487 | 					three_d_to_one_d(i,j,k, nx_x, ny_x, t1);
488 | 					three_d_to_one_d(i,j+1,k, nx, ny, t2);
489 | 					three_d_to_one_d(i,j,k, nx, ny, t3);
490 | 					
491 | 					UV_x[t1] = (UV[t2]-UV[t3])/h;
492 | 				}
493 | 			}
494 | 		}
495 | 	}
496 | 
497 | 	// difference in z-direction
498 | 	if( dir==Z_DIR ){
499 | #pragma omp parallel for private(t1, t2, t3) shared(UV_x, UV) num_threads(nt)
500 | 		for(int i=0; i<nx; i++){
501 | 			for(int j=0; j<ny; j++){
502 | 				for(int k=0; k<nz-1; k++){
503 | 					three_d_to_one_d(i,j,k, nx_x, ny_x, t1);
504 | 					three_d_to_one_d(i,j,k+1, nx, ny, t2);
505 | 					three_d_to_one_d(i,j,k, nx, ny, t3);
506 | 			
507 | 					UV_x[t1] = (UV[t2]-UV[t3])/h;
508 | 				}
509 | 			}
510 | 		}
511 | 	}
512 | }
513 | 	
514 | 
515 | // get upwinding differences
516 | void upwind_difference( const boost::multi_array<double, 3>& U,
517 | 						boost::multi_array<double, 3>& Ud,
518 | 						cuint dir )
519 | {
520 | 	boost::multi_array_types::size_type const* sizes = U.shape();
521 | 	cuint nx = sizes[0];
522 | 	cuint ny = sizes[1];
523 | 	cuint nz = sizes[2];
524 | 
525 | 	// differece in x-direction
526 | 	if(dir==X_DIR){
527 | 		for(int i=0; i<nx-1; i++){
528 | 			for(int j=0; j<ny; j++){
529 | 				for(int k=0; k<nz; k++){
530 | 					Ud[i][j][k] = (U[i+1][j][k]-U[i][j][k]) / 2;
531 | 				}
532 | 			}
533 | 		}
534 | 	}
535 | 
536 |    	// differece in y-direction
537 | 	else if(dir==Y_DIR){
538 | 		for(int i=0; i<nx; i++){
539 | 			for(int j=0; j<ny-1; j++){
540 | 				for(int k=0; k<nz; k++){
541 | 					Ud[i][j][k] = (U[i][j+1][k]-U[i][j][k]) / 2;
542 | 				}
543 | 			}
544 | 		}
545 | 	}
546 | 	// differece in z-direction
547 | 	else if (dir==Z_DIR){
548 | 		for(int i=0; i<nx; i++){
549 | 			for(int j=0; j<ny; j++){
550 | 				for(int k=0; k<nz-1; k++){
551 | 					Ud[i][j][k] = (U[i][j][k+1]-U[i][j][k]) / 2;
552 | 				}
553 | 			}
554 | 		}
555 | 
556 | 	}
557 | 	
558 | 	return;
559 | }
560 | 
561 | // get central first difference at center of element
562 | void central_first_difference( const boost::multi_array<double, 3>& U2,
563 | 							   boost::multi_array<double, 3>& U2_x,
564 | 							   cdouble h,
565 | 							   cuint dir )
566 | {
567 | 	boost::multi_array_types::size_type const* sizes = U2.shape();
568 | 	cuint nx = sizes[0];
569 | 	cuint ny = sizes[1];
570 | 	cuint nz = sizes[2];
571 | 
572 | 	// x-difference
573 | 	if( dir==X_DIR){
574 | 		for(int i=0; i<nx-2; i++){
575 | 			for(int j=0; j<ny; j++){
576 | 				for(int k=0; k<nz; k++){	
577 | 					U2_x[i][j][k] = (U2[i][j][k]+U2[i+2][j][k])/(2*h);
578 | 				}
579 | 			}
580 | 		}
581 | 	}
582 | 
583 | 	// y-difference
584 | 	if( dir==Y_DIR){
585 | 		for(int i=0; i<nx; i++){
586 | 			for(int j=0; j<ny-2; j++){
587 | 				for(int k=0; k<nz; k++){	
588 | 					U2_x[i][j][k] = (U2[i][j][k]+U2[i][j+2][k])/(2*h);
589 | 				}
590 | 			}
591 | 		}
592 | 	}
593 | 
594 | 	// z-difference
595 | 	if( dir==Z_DIR){
596 | 		for(int i=0; i<nx; i++){
597 | 			for(int j=0; j<ny; j++){
598 | 				for(int k=0; k<nz-2; k++){	
599 | 					U2_x[i][j][k] = (U2[i][j][k]+U2[i][j][k+2])/(2*h);
600 | 				}
601 | 			}
602 | 		}
603 | 	}
604 | 
605 | 	return;
606 | }
607 | 
608 | // get mixed edge values
609 | void calculate_edge_values( double* Ue,
610 | 							double* Ve,
611 | 							double* We,
612 | 							double* UV,
613 | 							double* UW,
614 | 							double* VW,
615 | 							cuint nx, cuint ny, cuint nz)
616 | {
617 | 	// average each value
618 | 	double* Uay = new double[(nx+1)*(ny+1)*(nz+2)];
619 | 	double* Vax = new double[(nx+1)*(ny+1)*(nz+2)];
620 | 	double* Uaz = new double[(nx+1)*(ny+2)*(nz+1)];
621 | 	double* Wax = new double[(nx+1)*(ny+2)*(nz+1)];
622 | 	double* Vaz = new double[(nx+2)*(ny+1)*(nz+1)];
623 | 	double* Way = new double[(nx+2)*(ny+1)*(nz+1)];
624 | 	
625 | 	average(Ue, Uay, nx+1, ny+2, nz+2, nx+1, ny+1, nz+2, Y_DIR);
626 | 	average(Ve, Vax, nx+2, ny+1, nz+2, nx+1, ny+1, nz+2, X_DIR);
627 | 	average(Ue, Uaz, nx+1, ny+2, nz+2, nx+1, ny+2, nz+1, Z_DIR);
628 | 	average(We, Wax, nx+2, ny+2, nz+1, nx+1, ny+2, nz+1, X_DIR);
629 | 	average(Ve, Vaz, nx+2, ny+1, nz+2, nx+2, ny+1, nz+1, Z_DIR);
630 | 	average(We, Way, nx+2, ny+2, nz+1, nx+2, ny+1, nz+1, Y_DIR);
631 | 
632 | 	uint t;
633 | 	
634 | #pragma omp parallel for shared(UV, Uay, Vax) num_threads(nt)
635 | 	for(int i=0; i<nx+1; i++){
636 | 		for(int j=0; j<ny+1; j++){
637 | 			for(int k=0; k<nz+2; k++){
638 | 				three_d_to_one_d(i,j,k, nx+1, ny+1, t);
639 | 				UV[t] = Uay[t]*Vax[t];
640 | 			}
641 | 		}
642 | 	}
643 | 
644 | #pragma omp parallel for shared(UW, Uaz, Wax) num_threads(nt)
645 | 	for(int i=0; i<nx+1; i++){
646 | 		for(int j=0; j<ny+2; j++){
647 | 			for(int k=0; k<nz+1; k++){
648 | 				three_d_to_one_d(i,j,k, nx+1, ny+2, t);
649 | 				UW[t] = Uaz[t]*Wax[t];
650 | 			}
651 | 		}
652 | 	}
653 | 
654 | #pragma omp parallel for shared(VW, Vaz, Wax) num_threads(nt)
655 | 	for(int i=0; i<nx+2; i++){
656 | 		for(int j=0; j<ny+1; j++){
657 | 			for(int k=0; k<nz+1; k++){
658 | 				three_d_to_one_d(i,j,k, nx+2, ny+1, t);
659 | 				VW[t] = Vaz[t]*Way[t];
660 | 			}
661 | 		}
662 | 	}
663 | 
664 | 	//cleanup
665 | 	delete[] Uay, Vax, Uaz, Wax, Vaz, Way;
666 | 	
667 | 	return;
668 | 
669 | }
670 | 
671 | // consolidate advection terms
672 | void consolidate_advection( double* U,
673 | 							double* V,
674 | 							double* W,
675 | 							double* U2_x,
676 | 							double* V2_y,
677 | 							double* W2_z,
678 | 							double* UV_y,
679 | 							double* UW_z,
680 | 							double* VU_x,
681 | 							double* VW_z,
682 | 							double* WU_x,
683 | 							double* WV_y,
684 | 							cuint nx, cuint ny, cuint nz,
685 | 							cdouble dt )
686 | {
687 | 	// need to truncate some terms
688 | 	uint t0, t1, t2, t3;
689 | 
690 | 	
691 | 	// x-direction
692 | #pragma omp parallel for private(t0, t1, t2, t3) shared(U, U2_x, UV_y, UW_z) num_threads(nt)
693 | 	for(int i=0; i<(nx-1); i++){
694 | 		for(int j=0; j<(ny); j++){
695 | 			for(int k=0; k<(nz); k++){
696 | 				three_d_to_one_d(i,j,k, nx-1,ny, t0);
697 | 				three_d_to_one_d(i,j+1,k+1, nx-1,ny+2, t1);
698 | 				three_d_to_one_d(i+1,j,k+1, nx+1,ny, t2);
699 | 				three_d_to_one_d(i+1,j+1,k, nx+1,ny+2, t3);
700 | 				
701 | 				U[t0] = U[t0] - dt * (U2_x[t1] + UV_y[t2] + UW_z[t3]);
702 | 			}
703 | 		}
704 | 	}
705 | 
706 | 	// y-direction
707 | #pragma omp parallel for private(t0, t1, t2, t3) shared(V, V2_y, VU_x, VW_z) num_threads(nt)
708 | 	for(int i=0; i<(nx); i++){
709 | 		for(int j=0; j<(ny-1); j++){
710 | 			for(int k=0; k<(nz); k++){
711 | 				three_d_to_one_d(i,j,k, nx,ny-1, t0);
712 | 				three_d_to_one_d(i+1,j,k+1, nx+2,ny-1, t1);
713 | 				three_d_to_one_d(i,j+1,k+1, nx,ny+1, t2);
714 | 				three_d_to_one_d(i+1,j+1,k, nx+2,ny+1, t3);
715 | 								
716 | 				V[t0] = V[t0] - dt * (V2_y[t1] + VU_x[t2] + VW_z[t3]);
717 | 			}
718 | 		}
719 | 	}
720 | 
721 | 	// z-direction
722 | #pragma omp parallel for private(t0, t1, t2, t3) shared(W, W2_z, WU_x, WV_y) num_threads(nt)
723 | 	for(int i=0; i<(nx); i++){
724 | 		for(int j=0; j<(ny); j++){
725 | 			for(int k=0; k<(nz-1); k++){
726 | 				three_d_to_one_d(i,j,k, nx,ny, t0);
727 | 				three_d_to_one_d(i+1,j+1,k, nx+2,ny+2, t1);
728 | 				three_d_to_one_d(i,j+1,k+1, nx, ny+2, t2);
729 | 				three_d_to_one_d(i+1,j,k+1, nx+2, ny, t3);
730 | 								
731 | 				W[t0] = W[t0] - dt *
732 | 					(W2_z[t1] + WU_x[t2] + WV_y[t3]);
733 | 			}
734 | 		}
735 | 	}
736 | 	
737 | 	return;
738 | 
739 | }
740 | 


--------------------------------------------------------------------------------
/advection.h:
--------------------------------------------------------------------------------
 1 | // advection contributions
 2 | #ifndef ADVECTION_H
 3 | #define ADVECTION_H
 4 | 
 5 | #include "utils.h"
 6 | 
 7 | using namespace std;
 8 | 
 9 | // treat nonlinear advection terms
10 | void advection( double* U,
11 | 				double* V,
12 | 				double* W,
13 | 				cuint nx, cuint ny, cuint nz,
14 | 				cdouble hx, cdouble hy, cdouble hz,
15 | 				cdouble dt,
16 | 				cdouble bcs[][6]
17 | 				);
18 | 	
19 | // generate grid matrix
20 | // dir: direction of velocity: 0:x 1:y 2:z
21 | void grid_matrix( double* U,
22 | 				  double* Ue,
23 | 				  cuint nx, cuint ny, cuint nz,
24 | 				  cuint nxe, cuint nye, cuint nze,
25 | 				  cuint dir,
26 | 				  cdouble* bc );
27 | 
28 | // average values in whatever direction
29 | void average( const double* Ue,
30 | 			  double* Ua,
31 | 			  cuint nxe, cuint nye, cuint nze,
32 | 			  cuint nxa, cuint nya, cuint nza,
33 | 			  cuint dir );
34 | 
35 | // get maximum value of the 3d array
36 | double max_3d_array( const boost::multi_array<double, 3>& U );
37 | 
38 | 
39 | // get upwinding differences
40 | void upwind_difference( const boost::multi_array<double, 3>& U,
41 | 						boost::multi_array<double, 3>& Ud,
42 | 						cuint dir );
43 | 
44 | // get 1d staggered difference
45 | // Ua is an averaged U value at the cell vertices
46 | // get the difference value at the center of cell
47 | void staggered_first_difference( const double* UV,
48 | 								 double* UV_x,
49 | 								 cuint nx, cuint ny, cuint nz,
50 | 								 cuint nx_x, cuint ny_x, cuint nz_x,
51 | 								 cdouble h,
52 | 								 cuint dir
53 | 								 );
54 | 
55 | // get central first difference at center of element
56 | void central_first_difference( const boost::multi_array<double, 3>& U2,
57 | 							   boost::multi_array<double, 3>& U2_x,
58 | 							   cdouble h,
59 | 							   cuint dir );
60 | 
61 | 
62 | // get mixed edge values
63 | void calculate_edge_values( double* Ue,
64 | 							double* Ve,
65 | 							double* We,
66 | 							double* UV,
67 | 							double* UW,
68 | 							double* VW,
69 | 							cuint nx, cuint ny, cuint nz);
70 | 
71 | // consolidate advection terms
72 | void consolidate_advection( double* U,
73 | 							double* V,
74 | 							double* W,
75 | 							double* U2_x,
76 | 							double* V2_y,
77 | 							double* W2_z,
78 | 							double* UV_y,
79 | 							double* UW_z,
80 | 							double* VU_x,
81 | 							double* VW_z,
82 | 							double* WU_x,
83 | 							double* WV_y,
84 | 							cuint nx, cuint ny, cuint nz,
85 | 							cdouble dt );
86 | 
87 | #endif //ASSEMBLE_H
88 | 


--------------------------------------------------------------------------------
/analysis.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | close all
 3 | 
 4 | nt = 1;
 5 | 
 6 | for i=0:nt-1
 7 |     U =  load(sprintf('results_%i.dat',i));
 8 |     figure(i+1);
 9 |     quiver3(U(:,1),U(:,2),U(:,3), U(:,5), U(:,6), U(:,7));
10 |     title(sprintf('velocity at i=%i', i))
11 | %     pause
12 |     
13 | end
14 | 
15 | % plot(U(:,1), U(:,4));
16 | 
17 | % 
18 | % 
19 | % for i=1:nt
20 | %     U{i} = load(sprintf('results_%i.dat',i));
21 | %     legend_names{i} = sprintf('level %i', i); 
22 | % end
23 | % U{max_level+1} = load('results_100.dat');
24 | % legend_names{max_level+1} = 'final solution';
25 | % legend_names{max_level+2} = 'exact solution';
26 | % 
27 | % hold on
28 | % col=hsv(max_level+3);
29 | % for i=1:max_level+1
30 | %    h= plot( U{i}(1:end,1),U{i}(1:end,4), '-o');
31 | %    set(h, 'Color',col(i+1,:));
32 | % end
33 | % 
34 | % x=0:0.01:1;
35 | % plot(x, -(1/(2*pi))^2*sin(x*2*pi));
36 | % 
37 | % legend(legend_names,0);
38 | % 
39 | % hold off
40 | % 
41 | % % v=zeros(9,9,9);
42 | % 
43 | % % for i=1:length(U)
44 | % %     v(U(i,1)+1,U(i,2)+1,U(i,3)+1) = U(i,4);
45 | % % end
46 | % %     
47 | % % x=U(:,1);
48 | % % y=U(:,2);
49 | % % z=U(:,3);
50 | % % % v=U(:,4);
51 | % % 
52 | % % [x y z v] = flow;
53 | % % h=contourslice(x,y,z,v,[1:9],[],[0], linspace(-8,2,10));
54 | % % axis([0 10 -3 3 -3 3]); daspect([1 1 1])
55 | % % camva(24); camproj perspective;
56 | % % campos([-3 -15 5])
57 | % % set(gcf, 'Color', [.3 .3 .3], 'renderer', 'zbuffer')
58 | % % set(gca, 'Color', 'black' , 'XColor', 'white', ...
59 | % %                'YColor', 'white' , 'ZColor', 'white')
60 | % % box on


--------------------------------------------------------------------------------
/assemble.C:
--------------------------------------------------------------------------------
  1 | #include "assemble.h"
  2 | #include "utils.h"
  3 | #include "msort.h"
  4 | 
  5 | // 2nd order stencil
  6 | void fd_matrix( double** M,
  7 | 				cuint I, cuint J, cuint K,
  8 | 				const double dx2i,
  9 | 				const double dy2i,
 10 | 				const double dz2i,
 11 | 				cuint n_dof
 12 | 				 )
 13 | {
 14 | #pragma omp parallel for shared(M) num_threads(nt)
 15 | 	for(int i=0; i<I; i++){
 16 | 		for(int j=0; j<J; j++){
 17 | 			for(int k=0; k<K; k++){
 18 | 				unsigned int p,q;
 19 | 				unsigned int t_011,t_111,t_211,t_101,t_121,t_110,t_112;
 20 | 				three_d_to_one_d(i,j,k, I,J, t_111);
 21 | 				if(i==0)
 22 | 					three_d_to_one_d(I-1,j,k, I,J, t_011);
 23 | 				else
 24 | 					three_d_to_one_d(i-1,j,k, I,J, t_011);
 25 | 				if(i==(I-1))
 26 | 					three_d_to_one_d(0,j,k, I,J, t_211);
 27 | 				else
 28 | 					three_d_to_one_d(i+1,j,k, I,J, t_211);
 29 | 
 30 | 				if(j==0)
 31 | 					three_d_to_one_d(i,J-1,k, I,J, t_101);
 32 | 				else
 33 | 					three_d_to_one_d(i,j-1,k, I,J, t_101);
 34 | 				if(j==(J-1))
 35 | 					three_d_to_one_d(i,0,k, I,J, t_121);
 36 | 				else
 37 | 					three_d_to_one_d(i,j+1,k, I,J, t_121);
 38 | 								
 39 | 				if(k==0)
 40 | 					three_d_to_one_d(i,j,K-1, I,J, t_110);
 41 | 				else
 42 | 					three_d_to_one_d(i,j,k-1, I,J, t_110);
 43 | 				if(k==(K-1))
 44 | 					three_d_to_one_d(i,j,0, I,J, t_112);
 45 | 				else
 46 | 					three_d_to_one_d(i,j,k+1, I,J, t_112);
 47 | 				
 48 | 				// I
 49 | 				M[t_111][t_011] += dx2i;
 50 | 				M[t_111][t_111] += -2*dx2i;
 51 | 				M[t_111][t_211] += dx2i;
 52 | 				// J
 53 | 				M[t_111][t_101] += dy2i;
 54 | 				M[t_111][t_111] += -2*dy2i;
 55 | 				M[t_111][t_121] += dy2i;
 56 | 				// K
 57 | 				M[t_111][t_110] += dz2i;
 58 | 				M[t_111][t_111] += -2*dz2i;
 59 | 				M[t_111][t_112] += dz2i;
 60 | 
 61 | 			}
 62 | 		}
 63 | 	}
 64 | 
 65 | 	// cout<<"setting global constraint"<<endl;
 66 | 	// global constraint
 67 | 	for(int i=0; i<(n_dof); i++){
 68 | 		M[i][n_dof-1] = 1;
 69 | 		M[n_dof-1][i] = 1;
 70 | 	}
 71 | 	M[n_dof-1][n_dof-1] = n_dof;
 72 | 
 73 | }
 74 | 
 75 | // 2nd order stencil
 76 | void fd_matrix_sparse( 	vector<tuple <uint, uint, double> >& M_sp,
 77 | 						vector<double>& val,
 78 | 						vector<uint>& col_ind,
 79 | 						vector<uint>& row_ptr,
 80 | 						cuint I, cuint J, cuint K,
 81 | 						const double dx2i,
 82 | 						const double dy2i,
 83 | 						const double dz2i,
 84 | 						cuint n_dof
 85 | 						)
 86 | {
 87 | 
 88 | 	// initialize sparse matrix (row#, col#, value)
 89 | 	vector<vector<tuple <uint, uint, double> > > M;
 90 | 	M.resize(nt);
 91 | 
 92 | 	
 93 | #pragma omp parallel  shared(M) num_threads(nt)
 94 | 	{
 95 | 		cuint myrank = omp_get_thread_num();
 96 | 		
 97 | #pragma omp for 
 98 | 	for(int i=0; i<I; i++){
 99 | 		for(int j=0; j<J; j++){
100 | 			for(int k=0; k<K; k++){
101 | 				unsigned int p,q;
102 | 				unsigned int t_011,t_111,t_211,t_101,t_121,t_110,t_112;
103 | 				three_d_to_one_d(i,j,k, I,J, t_111);
104 | 				if(i==0)
105 | 					three_d_to_one_d(I-1,j,k, I,J, t_011);
106 | 				else
107 | 					three_d_to_one_d(i-1,j,k, I,J, t_011);
108 | 				if(i==(I-1))
109 | 					three_d_to_one_d(0,j,k, I,J, t_211);
110 | 				else
111 | 					three_d_to_one_d(i+1,j,k, I,J, t_211);
112 | 
113 | 				if(j==0)
114 | 					three_d_to_one_d(i,J-1,k, I,J, t_101);
115 | 				else
116 | 					three_d_to_one_d(i,j-1,k, I,J, t_101);
117 | 				if(j==(J-1))
118 | 					three_d_to_one_d(i,0,k, I,J, t_121);
119 | 				else
120 | 					three_d_to_one_d(i,j+1,k, I,J, t_121);
121 | 								
122 | 				if(k==0)
123 | 					three_d_to_one_d(i,j,K-1, I,J, t_110);
124 | 				else
125 | 					three_d_to_one_d(i,j,k-1, I,J, t_110);
126 | 				if(k==(K-1))
127 | 					three_d_to_one_d(i,j,0, I,J, t_112);
128 | 				else
129 | 					three_d_to_one_d(i,j,k+1, I,J, t_112);
130 | 
131 | 				// I
132 | 				sparse_add(M[myrank], t_111, t_011, dx2i);
133 | 				sparse_add(M[myrank], t_111, t_111 , -2*dx2i);
134 | 				sparse_add(M[myrank], t_111, t_211 , dx2i);
135 | 				// J
136 | 				sparse_add(M[myrank], t_111, t_101 , dy2i);
137 | 				sparse_add(M[myrank], t_111, t_111 , -2*dy2i);
138 | 				sparse_add(M[myrank], t_111, t_121 , dy2i);
139 | 				// K
140 | 				sparse_add(M[myrank], t_111, t_110 , dz2i);
141 | 				sparse_add(M[myrank], t_111, t_111 , -2*dz2i);
142 | 				sparse_add(M[myrank], t_111, t_112 , dz2i);
143 | 
144 | 			}
145 | 		}
146 | 	} // end for
147 | 
148 | 
149 | 	// cout<<"setting global constraint"<<endl;
150 | 	// global constraint
151 | #pragma omp for
152 | 	for(int i=0; i<(n_dof-1); i++){
153 | 		sparse_add(M[myrank], i, n_dof-1, 1);
154 | 		sparse_add(M[myrank], n_dof-1, i, 1);
155 | 			// M[i][n_dof-1] = 1;
156 | 		// M[n_dof-1][i] = 1;
157 | 	}
158 | 	if(myrank==0)
159 | 		sparse_add(M[myrank], n_dof-1, n_dof-1,
160 | 					  n_dof);
161 | 	// else
162 | 	// 	sparse_add(M[myrank], n_dof-1, n_dof-1,
163 | 	// 				  -get<2>(M[myrank][n_dof-1]));
164 | 	// M[n_dof-1][n_dof-1] = n_dof;
165 | 	
166 | 	// sort and consolidate sparse matrix (row#, col#, value)
167 | 	// cout<<"sorting..."<<endl;
168 | 	// sort(M[myrank].begin(), M[myrank].end(), comp_pairs);
169 | 	// cout<<"sorting done"<<endl;
170 | 	// vector<tuple <uint, uint, double> >M_sp;
171 | 	// M_sp[myrank].push_back(M[0]);
172 | 	uint ct=0;
173 | 			
174 | // #pragma omp critical
175 | // 	{
176 | // 		cout<<"thread #: "<<omp_get_thread_num()<<endl;
177 | // 		for(int i=0; i<M_sp[myrank].size(); i++){
178 | // 			cout<<"i: "<<get<0>(M_sp[myrank][i])<<" j: "
179 | // 				<<get<1>(M_sp[myrank][i])<<" v: "
180 | // 				<<get<2>(M_sp[myrank][i])<<endl;
181 | // 		}
182 | // 	}
183 | 	
184 | 	} // end parallel region		
185 | 
186 | 	// merge and sort
187 | 	cout<<"sorting..."<<endl;
188 | 	for(int i=1; i<nt; i++)
189 | 		M[0].insert( M[0].end(), M[i].begin(), M[i].end() );
190 | 	// sort(M[0].begin(), M[0].end(), comp_pairs);
191 | 	vector<tuple <uint, uint, double> > tmp;
192 | 	tmp.resize(M[0].size());	
193 | 	mergesort(&M[0][0], nt, M[0].size(), &tmp[0] );
194 | 
195 | 
196 | 	cout<<"done"<<endl;
197 | 	
198 | 	// consolidate
199 | 	M_sp.push_back(M[0][0]);
200 | 	uint ct=0;
201 | 	for(int i =1; i<M[0].size(); i++){
202 | 		if( (get<0>(M_sp[ct])==get<0>(M[0][i]))
203 | 			&& (get<1>(M_sp[ct])==get<1>(M[0][i])) ){
204 | 			// get<0>(M_sp[ct]) += get<0>(M[0][i]);
205 | 			// get<1>(M_sp[ct]) += get<1>(M[0][i]);
206 | 			get<2>(M_sp[ct]) += get<2>(M[0][i]);
207 | 		}
208 | 		else{
209 | 			M_sp.push_back(M[0][i]);
210 | 			ct++;
211 | 		}
212 | 
213 | 	}
214 | 
215 |    
216 | 	// convert to CSR format
217 | 	cout<<"converting to CSR format"<<endl;
218 | 	val.resize(M_sp.size(),0.0);
219 | 	col_ind.resize(M_sp.size(), 0);
220 | 	
221 | #pragma omp parallel for shared(val, col_ind, M_sp) num_threads(nt)
222 | 	for(int i=0; i<M_sp.size(); i++){
223 | 		val[i] = get<2>(M_sp[i]);
224 | 		col_ind[i] = get<1>(M_sp[i]);
225 | 	}
226 | 	for(int i=1; i<M_sp.size(); i++){
227 | 		if(get<0>(M_sp[i])!=get<0>(M_sp[i-1]))
228 | 		   row_ptr.push_back(i);
229 | 	}
230 | 	row_ptr.push_back(M_sp.size());
231 | 
232 | 	// for(int i=0; i<row_ptr.size(); i++)
233 | 	// 	cout<<row_ptr[i]<<endl;
234 | 	cout<<"done"<<endl;
235 | 	
236 | 	// output to file for testing purpose
237 | 	ofstream file_out("test_sp_matrix.dat");
238 | 	for(int i=0; i<M_sp.size(); i++){
239 | 		file_out<<get<0>(M_sp[i])<<" "<<get<1>(M_sp[i])
240 | 			<<" "<<get<2>(M_sp[i])<<endl;
241 | 	}
242 | 	file_out.close();
243 | 	
244 | }
245 | 
246 | // assemble a load vector (only on level 0)
247 | void load_vector( double* F,
248 | 				  cuint n_dof,
249 | 				  cuint I,
250 | 				  cuint J,
251 | 				  cuint K
252 | 				  )
253 | {
254 | 	// construct load vector
255 | #pragma omp parallel for shared(F) num_threads(nt)
256 | 	for(int n=0; n<n_dof-1; n++){
257 | 		unsigned int i,j,k;
258 | 		one_d_to_three_d( n, I, J, i, j, k);
259 | 	    F[n] = sin(double(i)/double(I)*2*pi); // solution=(2pi)^2*sin(2pi*x);
260 | 
261 | 	    // F[n] = sin(double(i)/double(I)*2*pi) * sin(double(j)/double(J)*2*pi)
262 | 			// * sin(double(k)/double(K)*2*pi);
263 |      }
264 | 
265 | 	// global constraint
266 | 	F[n_dof-1] = 0;
267 | 
268 | }
269 | 
270 | // set dirichlet boudnary condition
271 | // for periodic domain, it is sufficient to set only one point
272 | int boundary_conditins( const unsigned int n_dof,
273 | 		const unsigned int I,
274 | 		const unsigned int J,
275 | 		const unsigned int K,
276 | 		double** M,
277 | 		double* F
278 | 		)
279 |  {
280 | 	int n_bd=0;
281 | 	// boundary conditions
282 | 	uint t;
283 | 	three_d_to_one_d(0,0,0, I,J, t);		
284 | 	for(int n=0; n<n_dof; n++){
285 | 	    M[n][t]=0;
286 | 		M[t][n]=0;
287 | 	}
288 | 	M[t][t] = 1;
289 | 	F[t] = 1;
290 | 	
291 | 	// #pragma omp parallel for shared(M)
292 | 	// for(int i=0; i<I; i++){
293 | 	// 	for(int j=0; j<J; j++){
294 | 	// 		for(int k=0; k<K; k++){
295 | 	// 			if(i==0 || j==0 || k==0
296 | 	// 				|| i==(I-1) || j==(J-1) || k==(K-1) ){
297 | 	// 				n_bd++;
298 | 	// 				unsigned int t;
299 | 	// 				three_d_to_one_d(i,j,k, I,J, t);
300 | 					
301 | 	// 				for(int n=0; n<n_dof; n++){
302 | 	// 					M[n][t]=0;
303 | 	// 					M[t][n]=0;
304 | 	// 				}
305 | 	// 				M[t][t] = 1;
306 | 	// 			}
307 | 	// 		}
308 | 	// 	}
309 | 	// }
310 | 
311 | 	return n_bd;
312 | }
313 |  
314 | // add index and value into a sparse matrix
315 | void sparse_add( vector<tuple<uint, uint, double > >& M,
316 | 				 cuint i, cuint j, cdouble v)
317 | {
318 | 	tuple<uint, uint, double > M_tmp(i, j, v);
319 | 	M.push_back(M_tmp);
320 | 
321 | 	// vector<int> idx_tmp(2, 0.0);
322 | 	// idx_tmp[0] = i; idx_tmp[1]=j;
323 | 	// idx.push_back(idx_tmp);
324 | 	// value.push_back(v);
325 | 					
326 | }
327 | 
328 | // insert index and value into a sparse matrix
329 | // note that M should be sorted before use
330 | void sparse_insert( vector<tuple<uint, uint, double > >& M,
331 | 				 cuint i, cuint j, cdouble v)
332 | {
333 | 	// replace the value
334 | 	for(int mn=0; mn<M.size(); mn++){
335 | 		if( get<0>(M[mn])==i && get<1>(M[mn])==j){
336 | 			get<2>(M[mn]) = v;
337 | 			return;
338 | 		}
339 | 	}
340 | 
341 | 	// if the value does not exist, add the value
342 | 	sparse_add(M, i,j, v);
343 | 	
344 | }
345 | 
346 | 
347 | // merge two sorted arrays 
348 |  void merge(vector<tuple <uint, uint, double> >& left,
349 | 		vector<tuple <uint, uint, double> >& right,
350 | 		cuint n_left, cuint n_right,
351 | 		vector<tuple <uint, uint, double> >& result,
352 | 		vector<tuple <uint, uint, double> >& tmp
353 | 		)
354 | {
355 | 	uint it = 0;
356 |     uint left_it = 0, right_it = 0;
357 | 	
358 |     while(left_it < n_left && right_it < n_right ) {
359 | 		it = left_it+right_it;
360 | 		// cout<<it<<endl;
361 | 		if(comp_pairs(left[left_it], right[right_it])) {
362 | 			tmp[it] = left[left_it];
363 | 			left_it++;
364 | 		}
365 | 		else{
366 | 			tmp[it] = right[right_it];
367 | 			right_it++;
368 | 		}
369 | 	}
370 | 
371 |     // Push the remaining data from both vectors onto the tmp
372 |     while(left_it < n_left) {
373 | 		it = left_it+right_it;
374 |         tmp[it] = left[left_it];
375 |         left_it++;
376 |     }
377 | 
378 |     while(right_it < n_right) {
379 | 		it = left_it+right_it;
380 |         tmp[it] = right[right_it];
381 |         right_it++;
382 |     }
383 | 
384 | 	// Finally put everyhing in result array
385 | 	for(int i=0; i<(n_right+n_left); i++)
386 | 		result[i] = tmp[i];
387 | 
388 | }
389 | 


--------------------------------------------------------------------------------
/assemble.h:
--------------------------------------------------------------------------------
 1 | // assemble matrix and vector
 2 | #ifndef ASSEMBLE_H
 3 | #define ASSEMBLE_H
 4 | 
 5 | #include "utils.h"
 6 | 
 7 | using namespace std;
 8 | 
 9 | void fd_matrix( double** M,
10 | 				cuint I, cuint J, cuint K,
11 | 				const double dx2i,
12 | 				const double dy2i,
13 | 				const double dz2i,
14 | 				cuint n_dof
15 | 				);
16 | 
17 | // 2nd order stencil
18 | void fd_matrix_sparse( 	vector<tuple <uint, uint, double> >& M_sp,
19 | 						vector<double>& val,
20 | 						vector<uint>& col_ind,
21 | 						vector<uint>& row_ptr,
22 | 						cuint I, cuint J, cuint K,
23 | 						const double dx2i,
24 | 						const double dy2i,
25 | 						const double dz2i,
26 | 						cuint n_dof
27 | 						);
28 | 
29 | void load_vector( double* F,
30 | 				  cuint n_dof,
31 | 				  cuint I,
32 | 				  cuint J,
33 | 				  cuint K
34 | 				  );
35 | 
36 | int boundary_conditins( cuint n_dof,
37 | 						cuint I,
38 | 						cuint J,
39 | 						cuint K,
40 | 						double** M,
41 | 						double* F
42 | 						);
43 | 
44 | // add index and value into a sparse matrix
45 | void sparse_add( vector<tuple<uint, uint, double > >& M,
46 | 					cuint i, cuint j, cdouble v);
47 | 
48 | // merge two sorted arrays
49 | void merge(vector<tuple <uint, uint, double> >& left,
50 | 		   vector<tuple <uint, uint, double> >& right,
51 | 		   cuint n_left, cuint n_right,
52 | 		   vector<tuple <uint, uint, double> >& result,
53 | 		   vector<tuple <uint, uint, double> >& tmp
54 | 		   );
55 | 
56 | 
57 | #endif //ASSEMBLE_H
58 | 


--------------------------------------------------------------------------------
/jacobi.C:
--------------------------------------------------------------------------------
  1 | #include "jacobi.h"
  2 | #include "assemble.h"
  3 | #include "IO.h"
  4 | 
  5 | // jacobi method
  6 | void jacobi( cdouble tol,
  7 | 			 cuint max_iteration,
  8 | 			 cuint n_dof,
  9 | 			 double* u_new,
 10 | 			 double* u_old,
 11 | 			 double** M,
 12 | 			 double* F,
 13 | 			 double& Er,
 14 | 			 double* R)
 15 | {
 16 | 	// iteration counter
 17 | 	int ct = 0;
 18 | 	cdouble tol2 = tol*tol;
 19 | 	
 20 | 	while(Er>tol2 && ct<max_iteration){
 21 | #pragma omp parallel for shared(u_old, u_new) num_threads(nt)
 22 | 		for(int i=0;i<n_dof;i++)
 23 | 			u_old[i]=u_new[i];
 24 | 
 25 | #pragma omp parallel for shared(M,F,u_old,u_new) num_threads(nt)
 26 | 		for(int i=0; i<n_dof; i++){
 27 | 			double S=0;
 28 | 			for(int j=0; j<n_dof; j++){
 29 | 				if(i!=j)
 30 | 					S += M[i][j]*u_old[j]; 
 31 | 			}
 32 | 			// if(M[i][i]==0) cout<<"zero "<<i<<endl;
 33 | 			// if(F[i] != F[i]) cout<<F[i]<<" "<<i<<endl;
 34 | 			u_new[i] = 1/M[i][i] * (F[i] - S);
 35 | 			// cout<<u_new[i]<<endl;
 36 | 		}
 37 | 
 38 | 		
 39 | 		Er = convergence_check(M, u_new, F, R, n_dof);
 40 | 		// cout<<"Er: "<<Er<<endl;
 41 | 		ct++;
 42 | 	}
 43 | 	
 44 | 	
 45 | 	if(max_iteration==0) 		
 46 | 		Er = convergence_check(M, u_new, F, R, n_dof);
 47 | 
 48 | 	if(Er<tol2)
 49 | 		cout<<"convergence reached after "<<ct<<"iterations"<<endl;
 50 | 	
 51 | 	return;
 52 | }
 53 | 
 54 | // sparse jacobi method
 55 | void jacobi_sparse( cdouble tol,
 56 | 					cuint max_iteration,
 57 | 					cuint n_dof,
 58 | 					double* U,
 59 | 					double* U_tmp,
 60 | 					const vector<double>& val,
 61 | 					const vector<uint>& col_ind,
 62 | 					const vector<uint>& row_ptr,
 63 | 					double* F,
 64 | 					double& Er,
 65 | 					double* R)
 66 | {
 67 | 	// iteration counter
 68 | 	int ct = 0;
 69 | 	cdouble tol2 = tol*tol;
 70 | 	double E=tol2*100;
 71 | 	
 72 | #pragma omp parallel shared(F, U_tmp, U, R, val,col_ind, row_ptr, ct) num_threads(nt)
 73 | 	{
 74 | 		while(E>tol2 && ct<max_iteration){
 75 | #pragma omp for
 76 | 			for(int i=0;i<n_dof;i++)
 77 | 				U_tmp[i]=U[i];
 78 | 
 79 | 			// double sta=omp_get_wtime();
 80 | #pragma omp for 
 81 | 			for(int i=0; i<row_ptr.size()-1; i++){
 82 | 				double S=0;
 83 | 				double T=0;
 84 | 				for(int j=row_ptr[i]; j<row_ptr[i+1]; j++){
 85 | 					// cout<<"U_tmp "<<U_tmp[col_ind[j]]<<endl;
 86 | 					if(i!=col_ind[j])
 87 | 						S += val[j]*U_tmp[col_ind[j]];
 88 | 					else{ // get diagonal element
 89 | 						T = val[j];					
 90 | 					}
 91 | 				}
 92 | 				U[i] = 1/T * (F[i]-S);
 93 | 			}
 94 | 
 95 | 			// cout<<"time: "<<omp_get_wtime()-sta<<endl;		
 96 | 			// Er = convergence_check_sparse(val, col_ind, row_ptr, U, F, R, n_dof);
 97 | 						E=0;
 98 | #pragma omp for reduction(+:E)
 99 | 			for(int i=0; i<row_ptr.size()-1; i++){
100 | 			    R[i] = 0.0;
101 | 			    for(int j=row_ptr[i]; j<row_ptr[i+1]; j++){
102 | 			        R[i] -= val[j]*U[col_ind[j]];
103 | 		        }
104 | 			    R[i] += F[i];
105 | 			    E += R[i]*R[i];
106 | 		}
107 | 			// cout<<"end "<<E<<endl;
108 | 		
109 | 			// if(int(max_iteration/1000!=0))
110 | 				// if(ct%int(max_iteration/1000)==0)
111 | 			if(!omp_get_thread_num())
112 | 				ct++;
113 | 
114 | #pragma omp barrier
115 | 			
116 | 		} // end while
117 | 			
118 | 
119 | 		} //end parallel
120 | 
121 | 			Er=E;
122 | 
123 | 			
124 | 			if(max_iteration==0) 		
125 | 				// Er = convergence_check(M, U, F, R, n_dof);
126 | 				Er = convergence_check_sparse(val, col_ind, row_ptr, U, F, R, n_dof);
127 | 
128 | 	
129 | 			cout<<"convergence reached after "<<ct<<" iterations"<<endl;
130 | 	
131 | 	
132 | 	return;
133 | }
134 | 
135 | double convergence_check ( double** M,
136 | 						   double* U,
137 | 						   double* F,
138 | 						   double* R,
139 | 						   cuint n_dof
140 | 						   )
141 | {
142 | 	double E=0;
143 | 	#pragma omp parallel for shared(R,M,U,F) num_threads(nt) reduction(+:E)
144 | 	for(int i=0; i<n_dof; i++){
145 | 		R[i] = 0.0;
146 | 		for(int j=0; j<n_dof; j++){
147 | 			R[i] -= M[i][j]*U[j];
148 | 		}
149 | 		R[i] += F[i];
150 | 		E += R[i]*R[i];
151 | 		
152 | 	}
153 | 	
154 | 	return E; 
155 | }
156 | 
157 | double convergence_check_sparse ( const vector<double>& val,
158 | 								  const vector<uint>& col_ind,
159 | 								  const vector<uint>& row_ptr,
160 | 								  double* U,
161 | 								  double* F,
162 | 								  double* R,
163 | 								  cuint n_dof)
164 | {
165 | 	double E=0;
166 | 	#pragma omp parallel for shared(R,val,col_ind,row_ptr,U,F) num_threads(nt) reduction(+:E)
167 | 	for(int i=0; i<row_ptr.size()-1; i++){
168 | 		R[i] = 0.0;
169 | 		for(int j=row_ptr[i]; j<row_ptr[i+1]; j++){
170 | 			R[i] -= val[j]*U[col_ind[j]];
171 | 		}
172 | 		R[i] += F[i];
173 | 		E += R[i]*R[i];
174 | 	}
175 | 	
176 | 	return E; 
177 | }
178 | 
179 | 
180 | // // sparse jacobi method
181 | // void gs_sparse( cdouble tol,
182 | // 				cuint max_iteration,
183 | // 				cuint n_dof,
184 | // 				double* U,
185 | // 				double* U_tmp,
186 | // 				const vector<double>& val,
187 | // 				const vector<uint>& col_ind,
188 | // 				const vector<uint>& row_ptr,
189 | // 				double* F,
190 | // 				double& Er,
191 | // 				double* R)
192 | // {
193 | // 	// iteration counter
194 | // 	int ct = 0;
195 | // 	cdouble tol2 = tol*tol;
196 | 		
197 | // 	while(Er>tol2 && ct<max_iteration){
198 | // 		for(int i=0;i<n_dof;i++)
199 | // 			U_tmp[i]=U[i];
200 | 
201 | 
202 | // 		for(int i=0; i<row_ptr.size()-1; i+=2){
203 | // 			double A=0;
204 | // 			double B=0;
205 | 
206 | // 			for(int j=row_ptr[i]; j<row_ptr[i+1]; j++){
207 | // 				if(j<i)
208 | // 					A += val[j]*U_
209 | 
210 | 
211 | // 			}
212 | 				
213 | 
214 | 
215 | // 		}
216 | 		
217 | // 		// double sta=omp_get_wtime();
218 | // #pragma omp parallel for num_threads(nt) shared(F,U_tmp,U) 
219 | //  		for(int i=0; i<row_ptr.size()-1; i++){
220 | // 			double S=0;
221 | // 			double T=0;
222 | // 			for(int j=row_ptr[i]; j<row_ptr[i+1]; j++){
223 | // 				// cout<<"U_tmp "<<U_tmp[col_ind[j]]<<endl;
224 | // 				if(i!=col_ind[j])
225 | // 					S += val[j]*U_tmp[col_ind[j]];
226 | // 				else{ // get diagonal element
227 | // 					T = val[j];					
228 | // 				}
229 | // 			}
230 | // 			U[i] = 1/T * (F[i]-S);
231 | // 		}
232 | 
233 | // 		// cout<<"time: "<<omp_get_wtime()-sta<<endl;		
234 | // 		Er = convergence_check_sparse(val, col_ind, row_ptr, U, F, R, n_dof);
235 | 
236 | // 		if(int(max_iteration/1000!=0))
237 | // 			if(ct%int(max_iteration/1000)==0)
238 | // 				cout<<"i: "<<ct<<" Er: "<<Er<<endl;
239 | // 		ct++;
240 | // 	}
241 | 
242 | // 	if(max_iteration==0) 		
243 | // 		// Er = convergence_check(M, U, F, R, n_dof);
244 | // 		Er = convergence_check_sparse(val, col_ind, row_ptr, U, F, R, n_dof);
245 | 
246 | 	
247 | // 	cout<<"convergence reached after "<<ct<<" iterations"<<endl;
248 | 	
249 | 	
250 | // 	return;
251 | // }
252 | 


--------------------------------------------------------------------------------
/jacobi.h:
--------------------------------------------------------------------------------
 1 | // jacobi method
 2 | #ifndef JACOBI_H
 3 | #define JACOBI_H
 4 | 
 5 | #include "utils.h"
 6 | 
 7 | using namespace std;
 8 | 
 9 | void jacobi( cdouble tol,
10 | 			 cuint max_iteration,
11 | 			 cuint n_dof,
12 | 			 double* u_new,
13 | 			 double* u_old,
14 | 			 double** M,
15 | 			 double* F,
16 | 			 double& E,
17 | 			 double* R);
18 | 
19 | // sparse jacobi method
20 | void jacobi_sparse( cdouble tol,
21 | 					cuint max_iteration,
22 | 					cuint n_dof,
23 | 					double* U,
24 | 					double* U_tmp,
25 | 					const vector<double>& val,
26 | 					const vector<uint>& col_ind,
27 | 					const vector<uint>& row_ptr,
28 | 					double* F,
29 | 					double& Er,
30 | 					double* R);
31 | 
32 | double convergence_check ( double** M,
33 | 						   double* U,
34 | 						   double* F,
35 | 						   double* R,
36 | 						   cuint n_dof
37 | 						   );
38 | 
39 | double convergence_check_sparse ( const vector<double>& val,
40 | 								  const vector<uint>& col_ind,
41 | 								  const vector<uint>& row_ptr,
42 | 								  double* U,
43 | 								  double* F,
44 | 								  double* R,
45 | 								  cuint n_dof);
46 | 
47 | #endif //JACOBI_H
48 | 
49 | 


--------------------------------------------------------------------------------
/main.C:
--------------------------------------------------------------------------------
  1 | #include "jacobi.h"
  2 | #include "utils.h"
  3 | #include "IO.h"
  4 | #include "v_cycle.h"
  5 | #include "advection.h"
  6 | #include "viscosity.h"
  7 | #include "pressure.h"
  8 | 
  9 | // number of threads
 10 | uint nt;
 11 | 
 12 | // set up initial conditions
 13 | void initial_conditions( double* U,
 14 | 						 double* V,
 15 | 						 double* W,
 16 | 						 double* P,
 17 | 						 cuint nx, cuint ny, cuint nz )
 18 | {
 19 | 	for(int i=0; i<(nx-1)*(ny)*(nz); i++)
 20 | 		U[i]=0.0;
 21 | 
 22 | 	for(int i=0; i<(nx)*(ny-1)*(nz); i++)
 23 | 		V[i]=0.0;
 24 | 
 25 | 	for(int i=0; i<(nx)*(ny)*(nz-1); i++)
 26 | 		W[i]=0.0;
 27 | 
 28 | 	for(int i=0; i<(nx)*(ny)*(nz); i++)
 29 | 		P[i] = 0.0;
 30 | 	
 31 | 	return;
 32 | }
 33 | 
 34 | // main function!
 35 | int main( int argc, char** argv )
 36 | {
 37 | 	// initialize constants
 38 | 	cdouble nu = 100; // kinetic viscosity (mu/rho)
 39 | 	double dt = 0.1; //time step
 40 | 	cdouble tf = 0.1; // final time
 41 | 	
 42 | 	// domain size
 43 | 	cdouble lx = 1.0; 
 44 | 	cdouble ly = 1.0;
 45 | 	cdouble lz = 1.0;
 46 | 
 47 | 	// domain cornders
 48 | 	cdouble xmin = 0.0;
 49 | 	cdouble ymin = 0.0;
 50 | 	cdouble zmin = 0.0;
 51 | 	cdouble xmax = xmin+lx;
 52 | 	cdouble ymax = ymin+ly;
 53 | 	cdouble zmax = zmin+lz;
 54 | 	
 55 | 	// number of gridpointts in each dimension
 56 | 	nt=1;
 57 | 	uint nx=10;
 58 | 	uint ny=10;
 59 | 	uint nz=10; // problem size (n_dof=n_size^3)
 60 | 	uint max_level=0; // maximum v-cycle level
 61 | 	if(argc>5){
 62 | 		nt = atoi(argv[1]);
 63 | 		max_level = atoi(argv[2]);
 64 | 		nx = atoi(argv[3]);
 65 | 		ny = atoi(argv[4]);
 66 | 		nz = atoi(argv[5]);
 67 | 	}
 68 | 	else{
 69 | 		cout<<"multigrid [# of threads] [max level] [I_size] [J_size] [K_size]"<<endl;
 70 | 		return 0;
 71 | 	}
 72 | 	
 73 | 	// number of nodes in each dimension
 74 | 	// minimum size =3*3*3, should be 2^n+1: n=max_level-1
 75 | 	// should be 2^n due to periodic domain
 76 | 
 77 | 	cuint n_dof = nx*ny*nz;
 78 | 	cuint n_u_dof = (nx-1)*ny*nz;
 79 | 	cuint n_v_dof = (nx)*(ny-1)*nz;
 80 | 	cuint n_w_dof = (nx)*ny*(nz-1);
 81 | 	
 82 | 	// number of time steps
 83 | 	cuint nts = floor(tf/dt);
 84 | 	// corrected time step size
 85 | 	dt = tf/nts;
 86 | 
 87 | 	// mesh size
 88 | 	cdouble hx = lx/(nx);
 89 | 	cdouble hy = ly/(ny);
 90 | 	cdouble hz = lz/(nz);
 91 | 
 92 | 	// inverse of square of mesh sizes
 93 | 	cdouble hx2i = 1.0/(hx*hx);
 94 | 	cdouble hy2i = 1.0/(hy*hy);
 95 | 	cdouble hz2i = 1.0/(hz*hz);
 96 | 
 97 | 	// interior values
 98 | 	double* U = new double[n_u_dof];
 99 | 	double* V = new double[n_v_dof];
100 | 	double* W = new double[n_w_dof];
101 | 	double* P = new double[n_dof];
102 | 
103 | 	// set up initial conditions
104 | 	cout<<"setting up initial conditions..."<<endl;
105 | 	initial_conditions(U, V, W, P, nx, ny, nz);
106 | 	
107 | 	// for jacobi method
108 | 	cdouble tol = 0.01;
109 | 	cuint max_iteration = 1000000000;
110 | 	cuint pre_smooth_iteration = 10;
111 | 	
112 | 	// boundary conditions
113 | 	// x0 xl y0 yl z0 zl
114 | 	// cdouble bcs[3][6] = { {0,0,0,0,0,1}, {0,0,0,0,0,1}, {0,0,0,0,0,0}};
115 | 	cdouble bcs[3][6] = { {1,1,0,0,0,0}, {0,0,0,0,0,0}, {0,0,0,0,0,0}};
116 | 
117 | 	// measuring time
118 | 	double st_ad, ed_ad, st_vi, ed_vi, st_pr, ed_pr;
119 | 	
120 | 	double start=omp_get_wtime();
121 | 	for(int ts=0; ts<nts; ts++){
122 | 		cout<<"loop: "<<ts<<endl;
123 | 		
124 | 		// treat nonlinear (advection) terms
125 | 		cout<<"calculating advection terms..."<<endl;
126 | 		st_ad=omp_get_wtime();
127 | 		advection(U,V,W, nx,ny,nz, hx, hy, hz, dt, bcs);
128 | 		ed_ad=omp_get_wtime();
129 | 		
130 | 		// implicitly solve viscosity terms
131 | 		double* Uss = new double[n_u_dof];
132 | 		double* Vss = new double[n_v_dof];
133 | 		double* Wss = new double[n_w_dof];
134 | 		cout<<"solving for viscosity terms..."<<endl;
135 | 		st_vi = omp_get_wtime();
136 | 		viscosity( U, V, W, Uss, Vss, Wss, nx, ny, nz, hx, hy, hz,
137 | 				   hx2i, hy2i, hz2i,
138 | 				   dt, nu, bcs,
139 | 				   tol, max_iteration );
140 | 		ed_vi = omp_get_wtime();
141 | 
142 | 		// cout<<"Uss"<<endl;
143 | 		// for(int i=0; i<n_u_dof; i++)
144 | 		// 	cout<<Uss[i]<<endl;
145 | 		// cout<<endl;
146 | 
147 | 		// cout<<"Vss"<<endl;
148 | 		// for(int i=0; i<n_v_dof; i++)
149 | 		// 	cout<<Vss[i]<<endl;
150 | 		// cout<<endl;
151 | 		
152 | 		// cout<<"Wss"<<endl;
153 | 		// for(int i=0; i<n_w_dof; i++)
154 | 		// 	cout<<Wss[i]<<endl;
155 | 		// cout<<endl;
156 | 				
157 | 		// solve for pressure and update
158 | 		cout<<"solving for pressure..."<<endl;
159 | 		st_pr = omp_get_wtime();
160 | 		pressure( U,V,W, P, Uss, Vss, Wss, nx, ny, nz, bcs,
161 | 					  lx, ly, lz, hx, hy, hz,
162 | 					  hx2i, hy2i, hz2i, tol, max_iteration,
163 | 					  pre_smooth_iteration, max_level,
164 | 					  dt);
165 | 		ed_pr = omp_get_wtime();
166 | 
167 | 		// cout<<"U"<<endl;
168 | 		// for(int i=0; i<n_u_dof; i++){
169 | 		// 	cout<<U[i]<<endl;
170 | 		// }
171 | 
172 | 		// cout<<"V"<<endl;
173 | 		// for(int i=0; i<n_v_dof; i++){
174 | 		// 	cout<<V[i]<<endl;
175 | 		// }
176 | 		// cout<<"W"<<endl;
177 | 		// for(int i=0; i<n_w_dof; i++){
178 | 		// 	cout<<W[i]<<endl;
179 | 		// }
180 | 		
181 | 		// write out the results
182 | 		// cout<<"writing results..."<<endl;
183 | 		write_results( U, V, W, P, n_dof, nx, ny, nz,
184 | 					   xmin, ymin, zmin,
185 | 					   hx, hy, hz, ts, bcs);
186 | 
187 | 		delete[] Uss, Vss, Wss;
188 | 		
189 | 	}
190 | 	double end=omp_get_wtime();
191 | 
192 | 	cout<<endl<<endl;
193 | 	cout<<"advection time: "<<ed_ad-st_ad<<endl;
194 | 	cout<<"viscosity time: "<<ed_vi-st_vi<<endl;
195 | 	cout<<"pressure time: " <<ed_pr-st_pr<<endl;	
196 | 	cout<<"total time: "    <<end-start<<" with "<<nt<<" threads"<<endl;
197 | 
198 | 
199 | 	// cleanup
200 | 	delete[] U, V, W, P;
201 | 	
202 | 	return 0;
203 | }
204 | 
205 | 


--------------------------------------------------------------------------------
/msort.C:
--------------------------------------------------------------------------------
  1 | #include "msort.h"
  2 | 
  3 | // comparison function for sorting pairs
  4 | int comp_tuples( const tuple<uint, uint, double>& i,
  5 | 				 const tuple<uint, uint, double>& j ) {
  6 |     if( (get<0>(i)) < (get<0>(j)) ) return true;
  7 | 	else if( get<0>(i) == get<0>(j)) return (get<1>(i)) < (get<1>(j));
  8 | 	else return false;
  9 | }
 10 | 
 11 | // merge two sorted arrays
 12 | void merge(tuple <uint, uint, double>* left,
 13 | 		   tuple <uint, uint, double>* right,
 14 | 		   const int n_left, const int n_right,
 15 | 		   tuple <uint, uint, double>* result,
 16 | 		   tuple <uint, uint, double>* tmp )
 17 | {
 18 | 	unsigned int it = 0;
 19 |     unsigned int left_it = 0, right_it = 0;
 20 | 	// cout<<"n_left "<<n_left<<" n_right "<<n_right<<endl;
 21 | 	
 22 |     while(left_it < n_left && right_it < n_right ) {
 23 | 		it = left_it+right_it;
 24 | 		// cout<<it<<endl;
 25 | 		if(comp_tuples(left[left_it], right[right_it])) {
 26 | 			tmp[it] = left[left_it];
 27 | 			left_it++;
 28 | 		}
 29 | 		else{
 30 | 			tmp[it] = right[right_it];
 31 | 			right_it++;
 32 | 		}
 33 | 	}
 34 | 
 35 |     // Push the remaining data from both vectors onto the tmp
 36 |     while(left_it < n_left) {
 37 | 		it = left_it+right_it;
 38 |         tmp[it] = left[left_it];
 39 |         left_it++;
 40 |     }
 41 | 
 42 |     while(right_it < n_right) {
 43 | 		it = left_it+right_it;
 44 |         tmp[it] = right[right_it];
 45 |         right_it++;
 46 |     }
 47 | 
 48 | 	// Finally put everyhing in result array
 49 | 	for(int i=0; i<(n_right+n_left); i++)
 50 | 		result[i] = tmp[i];
 51 | 
 52 | }
 53 | 
 54 | // mergesort with OpenMP parallelism
 55 | void mergesort(tuple <uint, uint, double>* vec,
 56 | 			   const int threads,
 57 | 			   const int n,
 58 | 			   tuple <uint, uint, double>* tmp
 59 | 			   )
 60 | {
 61 |     // Termination condition: List is completely sorted if it
 62 |     // only contains a single element.
 63 |     if(n == 1){
 64 | 		return;
 65 | 	}
 66 | 
 67 |     // Determine the location of the middle element in the vector
 68 | 	tuple <uint, uint, double>* left = vec; // left array pointer
 69 | 	int n_left = n/2; // number of elements in left array
 70 | 	tuple <uint, uint, double>* tmp_left = tmp; // left tmp array pointer
 71 | 	
 72 | 	tuple <uint, uint, double>* right = left+n/2; // right array pointer
 73 | 	int n_right = n-n/2; // number of elements in right array
 74 | 	tuple <uint, uint, double>* tmp_right = tmp_left+n/2; // right tmp array pointer
 75 | 
 76 |     // Perform a merge sort on the two smaller vectors
 77 |     if (threads > 1) {
 78 | 		
 79 | 		#pragma omp parallel sections
 80 | 		{
 81 | 			#pragma omp section
 82 | 			{
 83 | 				mergesort(left, threads/2, n_left, tmp_left);
 84 | 			}
 85 | 			#pragma omp section
 86 | 			{
 87 | 				mergesort(right, threads - threads/2, n_right, tmp_right);
 88 | 			}
 89 | 		}
 90 | 	}
 91 |     else {
 92 | 		mergesort(left, 1, n_left, tmp_left);
 93 | 		mergesort(right, 1, n_right, tmp_right);
 94 | 	}
 95 | 
 96 |     merge(left, right, n_left, n_right, left, tmp );
 97 | 	
 98 | 	return;
 99 | }
100 | 


--------------------------------------------------------------------------------
/msort.h:
--------------------------------------------------------------------------------
 1 | // openmp merge sort
 2 | #ifndef MSORT_H
 3 | #define MSORT_H
 4 | 
 5 | #include "utils.h"
 6 | 
 7 | using namespace std;
 8 | 
 9 | // comparison function for sorting pairs
10 | int comp_tuples( const tuple<uint, uint, double>& i,
11 | 				 const tuple<uint, uint, double>& j );
12 | 
13 | // merge two sorted arrays
14 | void merge(tuple <uint, uint, double>* left,
15 | 		   tuple <uint, uint, double>* right,
16 | 		   const int n_left, const int n_right,
17 | 		   tuple <uint, uint, double>* result,
18 | 		   tuple <uint, uint, double>* tmp );
19 | 
20 | // mergesort with OpenMP parallelism
21 | void mergesort(tuple <uint, uint, double>* vec,
22 | 			   const int threads,
23 | 			   const int n,
24 | 			   tuple <uint, uint, double>* tmp
25 | 			   );
26 | 
27 | 
28 | #endif // MSORT_H
29 | 


--------------------------------------------------------------------------------
/pressure.C:
--------------------------------------------------------------------------------
  1 | #include "pressure.h"
  2 | #include "v_cycle.h"
  3 | 
  4 | // compute pressure correction
  5 | void pressure( 	double* U, double* V, double* W, double* P,
  6 | 					double* Uss, double* Vss, double* Wss,
  7 | 					cuint nx, cuint ny, cuint nz,
  8 | 					cdouble bcs[][6],
  9 | 					cdouble lx, cdouble ly, cdouble lz,
 10 | 					cdouble hx, cdouble hy, cdouble hz,
 11 | 					cdouble hx2i, cdouble hy2i, cdouble hz2i,
 12 | 					cdouble tol, cuint max_iteration,
 13 | 					cuint pre_smooth_iteration, cuint max_level,
 14 | 					cdouble dt)
 15 | {
 16 | 	cuint n_dof = nx*ny*nz;
 17 | 
 18 | 	// residual and error
 19 | 	double* Rp = new double[n_dof];
 20 | 	double Er = tol*10;
 21 | 	
 22 | 	// 0-level v_cycle
 23 | 	if(max_level==0)
 24 | 		v_cycle_0( P, Rp,
 25 | 				   n_dof, nx, ny, nz,
 26 | 				   hx, hy, hz,
 27 | 				   hx2i, hy2i, hz2i,
 28 | 				   tol, max_iteration, pre_smooth_iteration,
 29 | 				   hx, hy, hz,
 30 | 				   0, max_level,
 31 | 				   Er,
 32 | 				   Uss, Vss, Wss,
 33 | 				   bcs,dt  );
 34 | 	else
 35 | 		// v-cycle
 36 | 		v_cycle( P, n_dof, nx, ny, nz,
 37 | 				 hx, hy, hz,
 38 | 				 hx2i, hy2i, hz2i,
 39 | 				 tol, max_iteration, pre_smooth_iteration,
 40 | 				 lx, ly, lz,
 41 | 				 0, max_level-1,
 42 | 				 Rp,
 43 | 				 Er,
 44 | 				 Uss, Vss, Wss,
 45 | 				 bcs, dt
 46 | 				 );
 47 | 
 48 | 	
 49 | 	// compute pressure corrections
 50 | 	double* Pr_x = new double[(nx-1)*(ny)*(nz)];
 51 | 	double* Pr_y = new double[(nx)*(ny-1)*(nz)];
 52 | 	double* Pr_z = new double[(nx)*(ny)*(nz-1)];
 53 | 
 54 | 	for(int i=0; i<(nx-1)*(ny)*(nz); i++)
 55 | 		Pr_x[i]=0.0;
 56 | 
 57 | 	compute_corrections(  P, Pr_x, Pr_y, Pr_z, nx, ny, nz, hy, hy, hz );
 58 | 	
 59 | 	// 1d index
 60 | 	uint t;
 61 | 
 62 | 	// correct velocities
 63 | 	// x-direction
 64 | #pragma omp parallel for private(t) shared(U, Uss, Pr_x) num_threads(nt)
 65 | 	for(int i=0; i<nx-1; i++){
 66 | 		for(int j=0; j<ny; j++){
 67 | 			for(int k=0; k<nz; k++){
 68 | 				three_d_to_one_d(i,j,k, nx-1, ny, t);
 69 | 				U[t] = Uss[t] - Pr_x[t]*dt;
 70 | 			}
 71 | 		}
 72 | 	}
 73 | 	
 74 | 	// y-direction	
 75 | #pragma omp parallel for private(t) shared(V, Vss, Pr_y) num_threads(nt)
 76 | 	for(int i=0; i<nx; i++){
 77 | 		for(int j=0; j<ny-1; j++){
 78 | 			for(int k=0; k<nz; k++){
 79 | 				three_d_to_one_d(i,j,k, nx, ny-1, t);
 80 | 				V[t] = Vss[t] - Pr_y[t]*dt;
 81 | 			}
 82 | 		}
 83 | 	}
 84 | 	
 85 | 	// z-direction
 86 | #pragma omp parallel for private(t) shared(W, Wss, Pr_z) num_threads(nt)
 87 | 	for(int i=0; i<nx; i++){
 88 | 		for(int j=0; j<ny; j++){
 89 | 			for(int k=0; k<nz-1; k++){
 90 | 				three_d_to_one_d(i,j,k, nx, ny, t);
 91 | 				W[t] = Wss[t] - Pr_z[t]*dt;
 92 | 			}
 93 | 		}
 94 | 	}
 95 | 
 96 | 	// cleanup
 97 | 	delete[] Pr_x, Pr_y, Pr_z;
 98 | 
 99 | }
100 | 
101 | // build right hand side of pressure poisson equation
102 | void pressure_rhs( double* F,
103 | 				   double* Uss, double* Vss, double* Wss,
104 | 				   cuint nx, cuint ny, cuint nz,
105 | 				   cdouble bcs[][6],
106 | 				   cdouble hx, cdouble hy, cdouble hz,
107 | 				   cdouble dt
108 | 				   )
109 | {
110 | 	// for(int i=0; i<(nx-1)*ny*nz; i++)
111 | 	// 	cout<<"Uss: "<<Uss[i]<<endl;
112 | 	// for(int i=0; i<(nx)*(ny-1)*nz; i++)
113 | 	// 	cout<<"Vss: "<<Vss[i]<<endl;
114 | 	// for(int i=0; i<(nx)*ny*(nz-1); i++)
115 | 	// 	cout<<"Wss: "<<Wss[i]<<endl;
116 | 	cuint n_dof = nx*ny*nz;
117 | 
118 | 	// initialize
119 | 	for(int i=0; i<n_dof; i++)
120 | 		F[i] =0;
121 | 	
122 | 	// Uss contribution
123 | #pragma omp parallel for shared(Uss, F) num_threads(nt)
124 | 	for(int i=0; i<nx; i++){
125 | 		for(int j=0; j<ny; j++){
126 | 			for(int k=0; k<nz; k++){
127 | 				uint t0, t1, t;
128 | 				three_d_to_one_d(i-1,j,k, nx-1,ny, t0);
129 | 				three_d_to_one_d(i,j,k, nx-1,ny, t1);
130 | 				three_d_to_one_d(i,j,k, nx,ny, t);
131 | 				if(i==0)
132 | 					F[t] += (Uss[t1]-bcs[0][0])/hx;
133 | 				else if(i==nx-1)
134 | 					F[t] += (bcs[0][1]-Uss[t0])/hx;
135 | 				else{
136 | 					F[t] += (Uss[t1]-Uss[t0]) / hx;
137 | 				}
138 | 			}
139 | 		}
140 | 	}
141 | 
142 | 	// Vss contribution
143 | #pragma omp parallel for shared(Vss, F) num_threads(nt)
144 | 	for(int i=0; i<nx; i++){
145 | 		for(int j=0; j<ny; j++){
146 | 			for(int k=0; k<nz; k++){
147 | 				uint t0, t1, t;
148 | 				three_d_to_one_d(i,j-1,k, nx,ny-1, t0);
149 | 				three_d_to_one_d(i,j,k, nx,ny-1, t1);
150 | 				three_d_to_one_d(i,j,k, nx,ny, t);
151 | 				if(j==0)
152 | 					F[t] += (Vss[t1]-bcs[1][2])/hy;
153 | 				else if(j==ny-1)
154 | 					F[t] += (bcs[1][3]-Vss[t0])/hy;
155 | 				else{
156 | 					F[t] += (Vss[t1]-Vss[t0]) / hy;
157 | 				}
158 | 			}
159 | 		}
160 | 	}
161 | 	
162 | 	// Wss contribution
163 | #pragma omp parallel for shared(Wss, F) num_threads(nt)
164 | 	for(int i=0; i<nx; i++){
165 | 		for(int j=0; j<ny; j++){
166 | 			for(int k=0; k<nz; k++){
167 | 				uint t0, t1, t;
168 | 				three_d_to_one_d(i,j,k-1, nx,ny, t0);
169 | 				three_d_to_one_d(i,j,k, nx,ny, t1);
170 | 				three_d_to_one_d(i,j,k, nx,ny, t);
171 | 				if(k==0)
172 | 					F[t] += (Wss[t1]-bcs[2][4])/hz;
173 | 				else if(k==nz-1){
174 | 					F[t] += (bcs[2][5]-Wss[t0])/hz;
175 | 				}
176 | 				else{
177 | 					F[t] += (Wss[t1]-Wss[t0]) / hz;
178 | 				}
179 | 			}
180 | 		}
181 | 	}
182 | 
183 | 	// divide everyhing by dt
184 | 	for(int i=0; i<(nx*ny*nz); i++){
185 | 		F[i] = F[i]/dt;
186 | 	}
187 | 									 
188 | 	
189 | 	// global constraint to close the system
190 | 	// F[n_dof] = 0;
191 | 
192 | 	// point boundary condition at the center of domain
193 | 	// uint t;
194 | 	// three_d_to_one_d(uint(nx/2),uint(ny/2),uint(nz/2), nx,ny, t);
195 | 	// F[t] = 0;
196 | 	
197 | 	// output to file for testing purpose
198 | 	// ofstream file_out("Fp_vector.dat");
199 | 	// for(int i=0; i<(n_dof); i++){
200 | 	// 	file_out<<F[i]<<endl;
201 | 	// }
202 | 	// file_out.close();
203 | 
204 | 	
205 | 	return;
206 | }
207 | 
208 | // 2nd order stencil
209 | void pressure_matrix( vector<tuple <uint, uint, double> >& Lp_sp,
210 | 					  vector<double>& val,
211 | 					  vector<uint>& col_ind,
212 | 					  vector<uint>& row_ptr,
213 | 					  cuint nx, cuint ny, cuint nz,
214 | 					  const double hx2i,
215 | 					  const double hy2i,
216 | 					  const double hz2i,
217 | 					  cuint n_dof
218 | 					  )
219 | {
220 | 	// initialize sparse matrix (row#, col#, value)
221 | 	vector<vector<tuple <uint, uint, double> > > M;
222 | 	M.resize(nt);
223 | 	
224 | #pragma omp parallel  shared(M) num_threads(nt)
225 | 	{
226 | 		cuint myrank = omp_get_thread_num();
227 | 
228 | 		// loop through inner nodes
229 | #pragma omp for 
230 | 		for(int i=0; i<nx; i++){
231 | 			for(int j=0; j<ny; j++){
232 | 				for(int k=0; k<nz; k++){
233 | 					// 2nd order stencil
234 | 					unsigned int p,q;
235 | 					unsigned int t_011,t_111,t_211,t_101,t_121,t_110,t_112;
236 | 					three_d_to_one_d(i,  j,  k,   nx,ny, t_111);
237 | 					three_d_to_one_d(i-1,j,  k,   nx,ny, t_011);
238 | 					three_d_to_one_d(i+1,j,  k,   nx,ny, t_211);
239 | 					three_d_to_one_d(i,  j-1,k,   nx,ny, t_101);
240 | 					three_d_to_one_d(i,  j+1,k,   nx,ny, t_121);
241 | 					three_d_to_one_d(i,  j,  k-1, nx,ny, t_110);
242 | 					three_d_to_one_d(i,  j,  k+1, nx,ny, t_112);
243 | 
244 | 					// p_xx contributions
245 | 					if(i-1>=0)
246 | 						sparse_add(M[myrank], t_111, t_011, hx2i);
247 | 					else // x0: i==0 (P[-1][j][k]==P[0][j][k])
248 | 						sparse_add(M[myrank], t_111, t_111, hx2i);
249 | 					
250 | 					sparse_add(M[myrank], t_111, t_111, -2*hx2i);
251 | 
252 | 					if(i+1<nx)
253 | 						sparse_add(M[myrank], t_111, t_211, hx2i);
254 | 					else // xl: i==nx-1
255 | 						sparse_add(M[myrank], t_111, t_111, hx2i);
256 | 
257 | 					// p_yy contributions
258 | 					if(j-1>=0)
259 | 						sparse_add(M[myrank], t_111, t_101, hy2i);
260 | 					else // y0: j==0 (P[i][-1][k]==P[i][0][k])
261 | 						sparse_add(M[myrank], t_111, t_111, hy2i);
262 | 					
263 | 					sparse_add(M[myrank], t_111, t_111, -2*hy2i);
264 | 
265 | 					if(j+1<ny)
266 | 						sparse_add(M[myrank], t_111, t_121, hy2i);
267 | 					else // yl: j==ny-1
268 | 						sparse_add(M[myrank], t_111, t_111, hy2i);
269 | 
270 | 					// p_zz contributions
271 | 					if(k-1>=0)
272 | 						sparse_add(M[myrank], t_111, t_110, hz2i);
273 | 					else // z0: k==0 (P[i][j][-1]==P[i][j][0])
274 | 						sparse_add(M[myrank], t_111, t_111, hz2i);
275 | 					
276 | 					sparse_add(M[myrank], t_111, t_111, -2*hz2i);
277 | 
278 | 					if(k+1<nz)
279 | 						sparse_add(M[myrank], t_111, t_112, hz2i);
280 | 					else // zl: k==nz-1
281 | 						sparse_add(M[myrank], t_111, t_111, hz2i);
282 | 
283 | 				}
284 | 			}
285 | 		} // end for
286 | 
287 | 
288 | 		// global constraint to close the system
289 | 		// #pragma omp for
290 | 		// 		for(int i=0; i<n_dof; i++){
291 | 		// 			sparse_add(M[myrank], i, n_dof, 1);
292 | 		// 			sparse_add(M[myrank], n_dof, i, 1);
293 | 		// 		}
294 | 		// 		if(myrank==0)
295 | 		// 			sparse_add(M[myrank], n_dof, n_dof,
296 | 		// 					   n_dof);
297 | 		
298 | 	} // end parallel region		
299 | 
300 | 	// merge and sort
301 | 	for(int i=1; i<nt; i++)
302 | 		M[0].insert( M[0].end(), M[i].begin(), M[i].end() );
303 | 	vector<tuple <uint, uint, double> > tmp;
304 | 	tmp.resize(M[0].size());	
305 | 	mergesort(&M[0][0], nt, M[0].size(), &tmp[0] );
306 | 	
307 | 	// consolidate
308 | 	Lp_sp.push_back(M[0][0]);
309 | 	uint ct=0;
310 | 	for(int i =1; i<M[0].size(); i++){
311 | 		if( (get<0>(Lp_sp[ct])==get<0>(M[0][i]))
312 | 			&& (get<1>(Lp_sp[ct])==get<1>(M[0][i])) ){
313 | 			get<2>(Lp_sp[ct]) += get<2>(M[0][i]);
314 | 		}
315 | 		else{
316 | 			Lp_sp.push_back(M[0][i]);
317 | 			ct++;
318 | 		}
319 | 	}
320 | 
321 | 	// point constraint to close the system
322 | 	get<2>(Lp_sp[0]) = 5*get<2>(Lp_sp[0]);
323 | 		
324 | 	// convert to CSR format
325 | 	val.resize(Lp_sp.size(),0.0);
326 | 	col_ind.resize(Lp_sp.size(), 0);
327 | 	
328 | #pragma omp parallel for shared(val, col_ind, Lp_sp) num_threads(nt)
329 | 	for(int i=0; i<Lp_sp.size(); i++){
330 | 		val[i] = get<2>(Lp_sp[i]);
331 | 		col_ind[i] = get<1>(Lp_sp[i]);
332 | 	}
333 | 	for(int i=1; i<Lp_sp.size(); i++){
334 | 		if(get<0>(Lp_sp[i])!=get<0>(Lp_sp[i-1]))
335 | 			row_ptr.push_back(i);
336 | 	}
337 | 	row_ptr.push_back(Lp_sp.size());
338 | 
339 | 	// output to file for testing purpose
340 | 	ofstream file_out("Lp_matrix.dat");
341 | 	for(int i=0; i<Lp_sp.size(); i++){
342 | 		file_out<<get<0>(Lp_sp[i])<<" "<<get<1>(Lp_sp[i])
343 | 				<<" "<<get<2>(Lp_sp[i])<<endl;
344 | 	}
345 | 	file_out.close();
346 | 	
347 | }
348 | 
349 | // compute corrections from pressure value
350 | void compute_corrections( double* Pr,
351 | 						  double* Pr_x,
352 | 						  double* Pr_y,
353 | 						  double* Pr_z,
354 | 						  cuint nx, cuint ny, cuint nz,
355 | 						  cdouble hx, cdouble hy, cdouble hz )
356 | {
357 | 	for(int i=0; i<(nx-1)*(ny)*(nz); i++)
358 | 		Pr_x[i]=0.0;
359 | 	for(int i=0; i<(nx)*(ny-1)*(nz); i++)
360 | 		Pr_y[i]=0.0;
361 | 	for(int i=0; i<(nx)*(ny)*(nz-1); i++)
362 | 		Pr_z[i]=0.0;
363 | 	
364 | 	staggered_first_difference( Pr, Pr_x, nx, ny, nz, nx-1, ny, nz, hx, X_DIR );
365 | 	staggered_first_difference( Pr, Pr_y, nx, ny, nz, nx, ny-1, nz, hy, Y_DIR );
366 | 	staggered_first_difference( Pr, Pr_z, nx, ny, nz, nx, ny, nz-1, hz, Z_DIR );
367 | 
368 | 	return;
369 | }
370 | 	
371 | 


--------------------------------------------------------------------------------
/pressure.h:
--------------------------------------------------------------------------------
 1 | // implicit viscosity contributions
 2 | #ifndef PRESSURE_H
 3 | #define PRESSURE_H
 4 | 
 5 | #include "utils.h"
 6 | #include "assemble.h"
 7 | #include "msort.h"
 8 | #include "jacobi.h"
 9 | #include "advection.h"
10 | 
11 | using namespace std;
12 | 
13 | // compute pressure correction
14 | void pressure( 	double* U, double* V, double* W, double* P,
15 | 				double* Uss, double* Vss, double* Wss,
16 | 				cuint nx, cuint ny, cuint nz,
17 | 				cdouble bcs[][6],
18 | 				cdouble lx, cdouble ly, cdouble lz,
19 | 				cdouble hx, cdouble hy, cdouble hz,
20 | 				cdouble hx2i, cdouble hy2i, cdouble hz2i,
21 | 				cdouble tol, cuint max_iteration,
22 | 					cuint pre_smooth_iteration, cuint max_level,
23 | 					cdouble dt);
24 | 
25 | // build the load vector of pressure equation
26 | void pressure_rhs( double* F, double* Uss, double* Vss, double* Wss,
27 | 				   cuint nx, cuint ny, cuint nz,
28 | 				   cdouble bcs[][6],
29 | 				   cdouble hx, cdouble hy, cdouble hz,
30 | 				   cdouble dt);
31 | 
32 | 
33 | // build a pressure matrix
34 | void pressure_matrix( vector<tuple <uint, uint, double> >& Lp_sp,
35 | 					  vector<double>& val,
36 | 					  vector<uint>& col_ind,
37 | 					  vector<uint>& row_ptr,
38 | 					  cuint nx, cuint ny, cuint nz,
39 | 					  const double hx2i,
40 | 					  const double hy2i,
41 | 					  const double hz2i,
42 | 					  cuint n_dof
43 | 					  );
44 | 
45 | // compute corrections from pressure value
46 | void compute_corrections( double* Pr,
47 | 						  double* Pr_x,
48 | 						  double* Pr_y,
49 | 						  double* Pr_z,
50 | 						  cuint nx, cuint ny, cuint nz,
51 | 						  cdouble hx, cdouble hy, cdouble hz );
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/scaling.m:
--------------------------------------------------------------------------------
 1 | clear all
 2 | close all
 3 | 
 4 | % strong scaling
 5 | % 32^3
 6 | Ta = [0.0112691 0.00559711 0.00452089 0.00302196 0.00250888];
 7 | Tv = [0.0432389 0.041455 0.0408649 0.0634542 0.116877];
 8 | Tp = [3.77661 4.3534 4.69743 7.65514 13.1779];
 9 | Tt = [4.1107 4.66746 5.0014 7.97422 13.5518];
10 | Nt = [16 8 4 2 1];
11 | 
12 | loglog(Nt, Ta, '-o', Nt, Tv, '-x', Nt, Tp, '-+', Nt, Tt, '-s');
13 | xlim([0 16]);
14 | set(gca,'XTick',[1 2 4 8 16]);
15 | title('Strong Scaling');
16 | legend('advection','viscosity','pressure','total');
17 | xlabel('# of nodes');
18 | ylabel('time [s]');
19 | 
20 | Ep = Tt(end)./(Tt.*Nt);
21 | semilogx(Nt, Ep, '-o');
22 | xlim([0 16]);
23 | set(gca,'XTick',[1 2 4 8 16]);
24 | title('Efficiency');
25 | xlabel('# of nodes');
26 | ylabel('efficiency');
27 | 
28 | % weak scaling
29 | sqrt((32^3)^2/16*8); % 24*24*40
30 | sqrt((32^3)^2/16*4); % 32*32*16
31 | sqrt((32^3)^2/16*2); % 24*24*20
32 | sqrt((32^3)^2/16*1); % 32*16*16
33 | 
34 | % Tw_weak = [96.2477 77.2512 70.8685 58.825 65.499];
35 | Tw_weak = [94.9259 47.8498 19.2619 7.54821 3.63176];
36 | 
37 | semilogx(Nt, Tw_weak, '-o');
38 | xlim([0 16]);
39 | xlabel('# of nodes');
40 | ylabel('total time [s]');
41 | set(gca,'XTick',[1 2 4 8 16]);
42 | title('Weak Scaling');
43 | 


--------------------------------------------------------------------------------
/utils.C:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | // comparison function for sorting pairs
 4 | int comp_pairs( const tuple<uint, uint, double>& i,
 5 | 				 const tuple<uint, uint, double>& j ) {
 6 |     if( (get<0>(i)) < (get<0>(j)) ) return true;
 7 | 	else if( get<0>(i) == get<0>(j)) return (get<1>(i)) < (get<1>(j));
 8 | 	else return false;
 9 | }
10 | 
11 | void three_d_to_one_d( const unsigned int i,
12 | 					  const unsigned int j,
13 | 					  const unsigned int k,
14 | 					  const unsigned int I,
15 | 					  const unsigned int J,
16 | 					  unsigned int& t )
17 | {
18 | 	t=i + j*I + k*I*J;
19 | }
20 | 
21 | void one_d_to_three_d( const unsigned int t,
22 | 					   const unsigned int I,
23 | 					   const unsigned int J,
24 | 					   unsigned int& i,
25 | 					   unsigned int& j,
26 | 					   unsigned int& k)
27 | {
28 | 	k = t/(I*J);
29 | 	j = (t-k*I*J)/I;
30 | 	i = t-j*I - k*I*J;
31 | }
32 | 
33 | // get the neighboring node numbers (periodic domain)
34 | // watch out for the negative unsigned int!!
35 | void get_neighbor( uint t[][3][3],
36 | 				   cuint i, cuint j, cuint k,
37 | 				   cuint I, cuint J, cuint K )
38 | {
39 | 	for(int p=0; p<3; p++){
40 | 		for(int q=0; q<3; q++){
41 | 			for(int r=0; r<3; r++){
42 | 				int nei_i, nei_j, nei_k;
43 | 				if(i+p>=I+1) nei_i = i+p-1-I;
44 | 				else if(i+p<1) nei_i = i+p+I-1; 
45 | 				else nei_i = i+p-1;
46 | 
47 | 				if(j+q>=J+1) nei_j = j+q-1-J;
48 | 				else if(j+q<1) nei_j = j+q+J-1; 
49 | 				else nei_j = j+q-1;
50 | 				
51 | 				if(k+r>=K+1) nei_k = k+r-1-K;
52 | 				else if(k+r<1) nei_k = k+r+K-1; 
53 | 				else nei_k = k+r-1;
54 | 				// cout<<nei_i<<" "<<nei_j<<" "<<nei_k<<endl;
55 | 				
56 | 				// int nei_i = ((i+p)<1) ? (i+p+I-1) : (i+p-1);
57 | 				// int nei_j = ((j+p)<1) ? (j+p+J-1) : (j+p-1);
58 | 				// int nei_k = ((k+p)<1) ? (k+p+K-1) : (k+p-1);
59 | 				three_d_to_one_d(nei_i,nei_j,nei_k, I,J, t[p][q][r]);
60 | 			}
61 | 		}
62 | 	}
63 | }
64 | 
65 | // get the node numbers in a box (periodic domain)
66 | // watch out for the negative unsigned int!!
67 | void get_box( uint t[][2][2],
68 | 				   cuint i, cuint j, cuint k,
69 | 				   cuint I, cuint J, cuint K )
70 | {
71 | 	for(int p=0; p<2; p++){
72 | 		for(int q=0; q<2; q++){
73 | 			for(int r=0; r<2; r++){
74 | 				int nei_i, nei_j, nei_k;
75 | 				if(i+p>=I) nei_i = i+p-I;
76 | 				else nei_i = i+p;
77 | 				if(j+q>=J) nei_j = j+q-J;
78 | 				else nei_j = j+q;
79 | 				if(k+r>=K) nei_k = k+r-K;
80 | 				else nei_k = k+r;
81 | 				
82 | 				three_d_to_one_d(nei_i,nei_j,nei_k, I,J, t[p][q][r]);
83 | 			}
84 | 		}
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/utils.h:
--------------------------------------------------------------------------------
 1 | // utility functions
 2 | #ifndef UTILS_H
 3 | #define UTILS_H
 4 | 
 5 | #include <iostream>
 6 | #include <cmath>
 7 | #include <fstream>
 8 | #include <omp.h>
 9 | #include <vector>
10 | #include <tuple>
11 | #include <algorithm>
12 | #include <omp.h>
13 | #include <cstdlib>
14 | 
15 | #include "boost/multi_array.hpp"
16 | 
17 | using namespace std;
18 | 
19 | typedef const unsigned int cuint;
20 | typedef unsigned int uint;
21 | typedef const double cdouble;
22 | typedef const int cint;
23 | 
24 | const double pi=3.14159265359;
25 | extern unsigned int nt;
26 | 
27 | cuint X_DIR=0;
28 | cuint Y_DIR=1;
29 | cuint Z_DIR=2;
30 | cuint XY_DIR=3;
31 | cuint XZ_DIR=4;
32 | cuint YZ_DIR=5;
33 | cuint X2_DIR=6;
34 | cuint Y2_DIR=7;
35 | cuint Z2_DIR=8;
36 | 
37 | 
38 | // comparison function for sorting pairs
39 | int comp_pairs( const tuple<uint, uint, double>& i,
40 | 				 const tuple<uint, uint, double>& j );
41 | 
42 | void three_d_to_one_d( const unsigned int i,
43 | 					   const unsigned int j,
44 | 					   const unsigned int k,
45 | 					   const unsigned int I,
46 | 					   const unsigned int J,
47 | 					   unsigned int& t );
48 | 
49 | void one_d_to_three_d( const unsigned int t,
50 | 					   const unsigned int I,
51 | 					   const unsigned int J,
52 | 					   unsigned int& i,
53 | 					   unsigned int& j,
54 | 					   unsigned int& k);
55 | 
56 | void get_neighbor( unsigned int t[][3][3],
57 | 				   cuint i, cuint j, cuint k,
58 | 				   cuint I, cuint J, cuint K );
59 | 
60 | // get node numbers in a box
61 | void get_box( uint t[][2][2],
62 | 				   cuint i, cuint j, cuint k,
63 | 			  cuint I, cuint J, cuint K );
64 | 
65 | 
66 | #endif //UTILS_H
67 | 


--------------------------------------------------------------------------------
/v_cycle.C:
--------------------------------------------------------------------------------
  1 | #include "v_cycle.h"
  2 | #include "pressure.h"
  3 | 
  4 | // multigrid v-cycle
  5 | void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz,
  6 | 			  cdouble hx, cdouble hy, cdouble hz,
  7 | 			  cdouble hx2i, cdouble hy2i, cdouble hz2i,
  8 | 			  cdouble tol, cuint max_iteration, cuint pre_smooth_iteration,
  9 | 			  cdouble lx, cdouble ly, cdouble lz,
 10 | 			  cuint level, cuint max_level,
 11 | 			  double* F,
 12 | 			  double& Er,
 13 | 			  double* Uss, double* Vss, double* Wss,
 14 | 			  cdouble bcs[][6],
 15 | 			  cdouble dt
 16 | 			  )
 17 | {
 18 | 	cout<<"level: "<<level<<" n_dof: "<<n_dof<<endl;
 19 | 
 20 | 	// initialize finite difference matrix (+1 for global constraint)
 21 | // 	double** M = new double*[n_dof];
 22 | // 	for(int n = 0; n < (n_dof); n++)
 23 | // 		M[n] = new double[n_dof];
 24 | // 	// initialize 
 25 | // #pragma omp parallel for shared(n_dof, M)
 26 | // 	for(int i=0; i<n_dof; i++)
 27 | // 		for(int j=0; j<n_dof; j++)
 28 | // 			M[i][j] = 0;
 29 | 
 30 | 	cout<<"fd_matrix_sparse"<<endl;
 31 | 	vector<tuple <uint, uint, double> > M_sp;
 32 | 	vector<double> val;
 33 | 	vector<uint> col_ind;
 34 | 	vector<uint> row_ptr(1,0);
 35 | 	
 36 | 	// create finite difference matrix
 37 | 	cout<<"create finite difference matrix"<<endl;
 38 | 	// build pressure matrix
 39 | 	pressure_matrix( M_sp,
 40 | 					 val, col_ind, row_ptr,
 41 | 					 nx, ny, nz,
 42 | 					 hx2i, hy2i, hz2i,
 43 | 					 n_dof
 44 | 					 );	
 45 | 	
 46 | 	// construct load vector
 47 | 	// load vector is created only at the level 0
 48 | 	if(level==0){
 49 | 		F = new double[n_dof];
 50 | 		cout<<"create load vector"<<endl;
 51 | 
 52 | 		pressure_rhs(F, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt);
 53 | 		// load_vector(F, n_dof, I,J,K );
 54 | 	}
 55 | 
 56 | 	// cout<<"save matrix and vector"<<endl;
 57 | 	// char matrix_file[100];
 58 | 	// char vector_file[100];
 59 | 	// sprintf(vector_file, "vector_%i.dat", level);
 60 | 	// if(write_vector(n_dof,F,vector_file)) cout<<"write_vector fail"<<endl;
 61 | 	
 62 | 	// construct solution vector
 63 | 	double* U;
 64 | 	if(level==0) U=P;
 65 | 	else U = new double[n_dof];
 66 | 	double* U_tmp = new double[n_dof];
 67 | 	// initial guess
 68 | #pragma omp parallel for shared(U, U_tmp) num_threads(nt)
 69 | 	for(int n=0; n<n_dof; n++){
 70 | 	    U[n] = 0.0;
 71 | 	    U_tmp[n] = 0.0;
 72 |     }
 73 | 
 74 | 	// residual and error
 75 | 	double* R = new double[n_dof];
 76 | 
 77 | 	// perform pre-smoothing and compute residual
 78 | 	cout<<"pre-smoothing "<<pre_smooth_iteration<<" times"<<endl;
 79 | 	Er = tol*10;
 80 | 	jacobi_sparse(tol, pre_smooth_iteration, n_dof, U, U_tmp,
 81 | 				  val, col_ind, row_ptr, F, Er, R);
 82 | 		
 83 | 	// restriction of residual on coarse grid
 84 | 	double* F_coar;
 85 | 		
 86 | 	// Restrict the residual
 87 | 	cuint nx_coar = (nx)/2;
 88 | 	cuint ny_coar = (ny)/2;
 89 | 	cuint nz_coar = (nz)/2;
 90 | 	uint n_dof_coar = nx_coar*ny_coar*nz_coar; 
 91 | 	F_coar = new double[n_dof_coar];
 92 | 
 93 | 	// mesh size 
 94 | 	cdouble hx_coar = lx/(nx_coar);
 95 | 	cdouble hy_coar = ly/(ny_coar);
 96 | 	cdouble hz_coar = lz/(nz_coar);
 97 | 	
 98 | 	// inverse of square of mesh sizes
 99 | 	cdouble hx2i_coar = 1.0/(hx_coar*hx_coar);
100 | 	cdouble hy2i_coar = 1.0/(hy_coar*hy_coar);
101 | 	cdouble hz2i_coar = 1.0/(hz_coar*hz_coar);
102 | 		
103 | 	// restric residual to the coarrse grid
104 | 	cout<<"restriction"<<endl;
105 | 	restriction( R, F_coar, nx, ny, nz, nx_coar, ny_coar, nz_coar);
106 | 	
107 | 	// construct solution vector on coarse grid
108 | 	double* U_coar = new double[n_dof_coar];
109 | 	double* U_coar_tmp = new double[n_dof_coar];
110 | 	
111 | 	// if the grid is coarsest
112 | 	if( level==max_level){
113 | 		cout<<"level: "<<level+1<<" n_dof: "<<n_dof_coar<<endl;
114 | 
115 | 		// initial guess
116 | #pragma omp parallel for shared(U_coar, U_coar_tmp) num_threads(nt)
117 | 		for(int n=0; n<n_dof_coar; n++){
118 | 			U_coar[n] = 0.0;
119 | 			U_coar_tmp[n] = 0.0;
120 | 		}
121 | 
122 | 		vector<tuple <uint, uint, double> > M_sp_coar;
123 | 		vector<double> val_coar;
124 | 		vector<uint> col_ind_coar;
125 | 		vector<uint> row_ptr_coar(1,0);
126 | 		
127 | 		// create finite difference matrix
128 | 		cout<<"create finite difference matrix"<<endl;
129 | 		// fd_matrix_sparse(M_sp_coar, val_coar, col_ind_coar, row_ptr_coar,
130 | 		// 				 nx_coar,ny_coar,nz_coar,
131 | 		// 				 hx2i_coar, hy2i_coar, hz2i_coar, n_dof_coar );
132 | 		
133 | 		pressure_matrix( M_sp_coar, val_coar, col_ind_coar, row_ptr_coar,
134 | 						 nx_coar, ny_coar, nz_coar,
135 | 						 hx2i_coar, hy2i_coar, hz2i_coar,
136 | 						 n_dof_coar
137 | 						 );
138 | 		
139 | 		// residual on coarse grid
140 | 		double* R_coar = new double[n_dof_coar];
141 | 		
142 | 		// exact Jacobi method
143 | 		Er = tol*10;
144 | 		jacobi_sparse(tol, max_iteration, n_dof_coar, U_coar, U_coar_tmp,
145 | 					  val_coar, col_ind_coar, row_ptr_coar, F_coar,
146 | 					  Er, R_coar);
147 | 		
148 | 		// write_results( U_coar,
149 | 		// 			   n_dof_coar,
150 | 		// 			   I_coar, J_coar, K_coar,
151 | 		// 			   dx_coar, dy_coar, dz_coar, level);
152 | 		
153 | 		delete[] R_coar;
154 | 		
155 | 		// cout<<"R"<<endl;
156 | 		// for(int i=0; i<n_dof; i++)
157 | 		// 	cout<<R[i]<<endl;
158 | 		 
159 | 	}
160 | 	else{
161 | 		// v_cycle on the coarse grid
162 | 		v_cycle( U_coar, n_dof_coar, nx_coar, ny_coar, nz_coar,
163 | 						  hx_coar, hy_coar, hz_coar,
164 | 						  hx2i_coar, hy2i_coar, hz2i_coar,
165 | 						  tol, max_iteration, pre_smooth_iteration,
166 | 						  lx, ly, lz,
167 | 						  level+1, max_level,
168 | 						  F_coar, Er,
169 | 						  Uss, Vss, Wss,
170 | 						  bcs, dt
171 | 						  );
172 | 		
173 | 		cdouble dx_coar = lx/(nx_coar);
174 | 		cdouble dy_coar = ly/(ny_coar);
175 | 		cdouble dz_coar = lz/(nz_coar);
176 | 
177 | 		// // write partial results for test purpose
178 | 		// write_results( U_coar,
179 | 		// 			   n_dof_coar,
180 | 		// 			   I_coar, J_coar, K_coar,
181 | 		// 			   dx_coar, dy_coar, dz_coar, level);
182 | 		 
183 | 	}
184 | 
185 | 	// interpolate to fine grid
186 | 	double* E = new double[n_dof];
187 | 	interpolation(U_coar, E, nx_coar,ny_coar,nz_coar, nx, ny, nz);
188 | 
189 | 	// correct the fine grid approximation
190 | #pragma omp parallel for shared(U,E) num_threads(nt)
191 | 	for(int i=0; i<n_dof; i++){
192 | 		// cout<<i<<" "<<U[i]<<" "<<E[i]<<" "<<E[i]/U[i]<<endl;
193 | 		U[i] += E[i];
194 | 	}
195 | 
196 | 	// perform post-smoothing and compute residual
197 | 	uint post_smooth_iteration;
198 | 	// if(level==0)
199 | 		post_smooth_iteration=max_iteration;
200 | 	// else
201 | 		// post_smooth_iteration=( pre_smooth_iteration+1)*1000;
202 | 
203 | 	cout<<"post-smoothing "<<post_smooth_iteration<<" times on level "
204 | 		<<level<<endl;
205 | 	// jacobi(tol, post_smooth_iteration, n_dof, U, U_tmp, M, F, Er, R);
206 | 	Er = tol*10;
207 | 	jacobi_sparse(tol, post_smooth_iteration, n_dof, U, U_tmp,
208 | 				  val, col_ind, row_ptr, F, Er, R);
209 | 
210 | 	
211 | 	// cleanup
212 | 	if (level==0)
213 | 		delete[] F;
214 | 
215 | 	delete[] U_tmp;
216 | 	delete[] R, F_coar;
217 | 	delete[] E;
218 | 	delete[] U_coar, U_coar_tmp;
219 | 	
220 | }
221 | 
222 | // 3D full weight restriction
223 | void restriction( double* R, double* R_new, cuint I, cuint J, cuint K,
224 | 				  cuint I_new, cuint J_new, cuint K_new )
225 | {	
226 | 	unsigned int nei[3][3][3];
227 | #pragma omp parallel for shared(R, R_new) private(nei) num_threads(nt)
228 | 	for(int i=0; i<I; i+=2){
229 | 		for(int j=0; j<J; j+=2){
230 | 			for(int k=0; k<K; k+=2){
231 | 				get_neighbor( nei, i,j,k, I,J,K);
232 | 				// get new index
233 | 				uint t_new;
234 | 				three_d_to_one_d( i/2, j/2, k/2, I_new, J_new, t_new);
235 | 				coarse_map( R, R_new, nei, i,j,k, t_new);
236 | 			}
237 | 		}
238 | 	}
239 | }
240 | 
241 | // map from fine to coarse grid
242 | void coarse_map( double* R, double* R_new,
243 | 				 cuint nei[][3][3], cuint i, cuint j, cuint k, cuint t_new )
244 | {
245 | 	// initialize to 0 (otherwise nan can )
246 | 	R_new[t_new] = 0.0;
247 | 	for(int p=0; p<3; p++){
248 | 		for(int q=0; q<3; q++){
249 | 			for(int r=0; r<3; r++){
250 | 				R_new[t_new] += R[nei[p][q][r]]*fw_stencil[p][q][r];
251 | 			}
252 | 		}
253 | 	}
254 | }
255 | 
256 | // 3d trilinear interpolation
257 | void interpolation( double* U, double* U_fine,
258 | 					cuint I, cuint J, cuint K,
259 | 					cuint I_fine, cuint J_fine, cuint K_fine)
260 | {
261 | 
262 | 	uint box_old[2][2][2];
263 | 	uint box_fine[2][2][2];
264 | 
265 | #pragma omp parallel for shared(U, U_fine) private(box_old, box_fine) num_threads(nt)
266 | 	for(int i=0; i<I; i++){
267 | 		for(int j=0; j<J; j++){
268 | 			for(int k=0; k<K; k++){
269 | 				// get the node nubmers of the old (coarse) box
270 | 				get_box(box_old, i,j,k, I, J, K);
271 | 				// get the node nubmbers of new (fine) box
272 | 				get_box(box_fine, i*2,j*2,k*2, I_fine, J_fine, K_fine);
273 | 
274 | 				// map from coarse to fine grid
275 | 				fine_map(U, U_fine, box_old, box_fine);
276 | 			}
277 | 		}
278 | 	}
279 | 
280 | 	// global constraints
281 | 	U_fine[I_fine*J_fine*K_fine] =
282 | 		U[I*J*K]/(I*J*K)*I_fine*J_fine*K_fine; // global constraint
283 | 
284 | 	
285 | 	return;
286 | }
287 | 
288 | void fine_map( double* U, double* U_new,
289 | 			   uint box_old[][2][2],
290 | 			   uint box_new[][2][2] )
291 | {
292 | 	U_new[box_new[0][0][0]] = U[box_old[0][0][0]];
293 | 	U_new[box_new[1][0][0]] = (U[box_old[0][0][0]]
294 | 							   + U[box_old[1][0][0]])/2;
295 | 	U_new[box_new[0][1][0]] = (U[box_old[0][0][0]]
296 | 							   + U[box_old[0][1][0]])/2;
297 | 	U_new[box_new[0][0][1]] = (U[box_old[0][0][0]]
298 | 							   + U[box_old[0][0][1]])/2;
299 | 	U_new[box_new[1][1][0]] = (U[box_old[0][0][0]]
300 | 							   + U[box_old[1][0][0]]
301 | 							   + U[box_old[0][1][0]]
302 | 							   + U[box_old[1][1][0]])/4;
303 | 	U_new[box_new[1][0][1]] = (U[box_old[0][0][0]]
304 | 							   + U[box_old[1][0][0]]
305 | 							   + U[box_old[0][0][1]]
306 | 							   + U[box_old[1][0][1]])/4;
307 | 	U_new[box_new[0][1][1]] = (U[box_old[0][0][0]]
308 | 							   + U[box_old[0][1][0]]
309 | 							   + U[box_old[0][0][1]]
310 | 							   + U[box_old[0][1][1]])/4;
311 | 	U_new[box_new[1][1][1]] = ( U[box_old[0][0][0]]
312 | 								+ U[box_old[1][0][0]]
313 | 								+ U[box_old[0][1][0]]
314 | 								+ U[box_old[0][0][1]]
315 | 								+ U[box_old[0][1][1]]
316 | 								+ U[box_old[1][0][1]]
317 | 								+ U[box_old[1][1][0]]
318 | 								+ U[box_old[1][1][1]]
319 | 								)/8;
320 | 
321 | }
322 | 
323 | 
324 | // 0_level v-cycle for testing purpose
325 | void v_cycle_0( double* P, double* Rp,
326 | 				   uint n_dof, cuint nx, cuint ny, cuint nz,
327 | 				   cdouble hx, cdouble hy, cdouble hz,
328 | 				   cdouble hx2i, cdouble hy2i, cdouble hz2i,
329 | 				   cdouble tol, cuint max_iteration, cuint pre_smooth_iteration,
330 | 				   cdouble width, cdouble length, cdouble height,
331 | 				   cuint level, cuint max_level,
332 | 				   double& Er,
333 | 				   double* Uss, double* Vss, double* Wss,
334 | 				cdouble bcs[][6],
335 | 				cdouble dt)
336 | {
337 | 	cout<<"level: "<<level<<" n_dof: "<<n_dof<<endl;
338 | 
339 | 	// load vector (extra +1 for global constraint) 
340 | 	double* Fp = new double[n_dof];
341 | 
342 | 	// Lp
343 | 	vector<tuple <uint, uint, double> > Lp_sp;
344 | 	vector<double> Lp_val(Lp_sp.size(),0.0);
345 | 	vector<uint> Lp_col_ind(Lp_sp.size(), 0);
346 | 	vector<uint> Lp_row_ptr(1,0);		
347 | 	
348 | 	// build right hand side of pressure poisson equation
349 | 	pressure_rhs(Fp, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt);
350 | 
351 | 	// build pressure matrix
352 | 	pressure_matrix( Lp_sp,
353 | 					 Lp_val, Lp_col_ind, Lp_row_ptr,
354 | 					 nx, ny, nz,
355 | 					 hx2i, hy2i, hz2i,
356 | 					 n_dof
357 | 					 );	
358 | 
359 | 	// solve dicrete poisson equation: Lp\Fp
360 | 	// construct solution vector
361 | 	double* P_tmp = new double[n_dof];
362 | 	// initial guess
363 | #pragma omp parallel for shared(P, P_tmp) num_threads(nt)
364 | 	for(int n=0; n<n_dof; n++){
365 | 	    P[n] = 0.0;
366 | 	    P_tmp[n] = 0.0;
367 |     }
368 | 	
369 | 	// jacobi iteration
370 | 	jacobi_sparse(tol, max_iteration, n_dof, P, P_tmp,
371 | 				  Lp_val, Lp_col_ind, Lp_row_ptr, Fp, Er, Rp);
372 | 
373 | 	delete[] P_tmp;
374 | 
375 | 	return ;
376 | }
377 | 


--------------------------------------------------------------------------------
/v_cycle.h:
--------------------------------------------------------------------------------
 1 | // v-cycle
 2 | #ifndef V_CYCLE_H
 3 | #define V_CYCLE_H
 4 | 
 5 | #include "utils.h"
 6 | #include "jacobi.h"
 7 | #include "assemble.h"
 8 | #include "IO.h"
 9 | 
10 | // 3d full weighting stencil
11 | // [[1 2 1; 2 4 2; 1 2 1] [2 4 2; 4 8 4; 2 4 2]; [1 2 1; 2 4 2; 1 2 1]];
12 | cdouble fw_stencil[3][3][3] =
13 | 	{ {{1.0/64.0,2.0/64.0,1.0/64.0}, {2.0/64.0,4.0/64.0,2.0/64.0},
14 | 	   {1.0/64.0,2.0/64.0,1.0/64.0}},
15 | 		{{2.0/64.0,4.0/64.0,2.0/64.0}, {4.0/64.0,8.0/64.0,4.0/64.0},
16 | 		 {2.0/64.0,4.0/64.0,2.0/64.0}},
17 | 			{{1.0/64.0,2.0/64.0,1.0/64.0}, {2.0/64.0,4.0/64.0,2.0/64.0},
18 | 			 {1.0/64.0,2.0/64.0,1.0/64.0}}};
19 | 
20 | 
21 | // multigrid v-cycle
22 | void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz,
23 | 				 cdouble hx, cdouble hy, cdouble hz,
24 | 				 cdouble hx2i, cdouble hy2i, cdouble hz2i,
25 | 				 cdouble tol, cuint max_iteration, cuint pre_smooth_iteration,
26 | 				 cdouble lx, cdouble ly, cdouble lz,
27 | 				 cuint level, cuint max_level,
28 | 				 double* F,
29 | 				 double& Er,
30 | 				 double* Uss, double* Vss, double* Wss,
31 | 				 cdouble bcs[][6],
32 | 				 cdouble dt
33 | 				 );
34 | 
35 | // 3D full weight restriction
36 | void restriction( double* R, double* R_new, cuint I, cuint J, cuint K,
37 | 				  cuint I_new, cuint J_new, cuint K_new );
38 | 
39 | // map from fine to coarse solution
40 | void coarse_map( double* R, double* R_new,
41 | 				 cuint nei[][3][3],
42 | 				 cuint i, cuint j, cuint k, cuint t_new );
43 | 
44 | // 3D trilinear interpolation
45 | void interpolation( double* U, double* U_fine,
46 | 					cuint I, cuint J, cuint K,
47 | 					cuint I_fine, cuint J_fine, cuint K_fine);
48 | 
49 | // map from coarse to fine solution
50 | void fine_map( double* U, double* U_new,
51 | 			   uint box_old[][2][2],
52 | 			   uint box_new[][2][2] );
53 | 
54 | // 0 level v_cycle
55 | void v_cycle_0( double* P, double* Rp,
56 | 				   uint n_dof, cuint nx, cuint ny, cuint nz,
57 | 				   cdouble hx, cdouble hy, cdouble hz,
58 | 				   cdouble hx2i, cdouble hy2i, cdouble hz2i,
59 | 				   cdouble tol, cuint max_iteration, cuint pre_smooth_iteration,
60 | 				   cdouble width, cdouble length, cdouble height,
61 | 				   cuint level, cuint max_level,
62 | 				   double& Er,
63 | 				   double* Uss, double* Vss, double* Wss,
64 | 				cdouble bcs[][6],
65 | 				cdouble dt
66 | 				   );
67 | 
68 | #endif // V_CYCLE_H
69 | 
70 | 


--------------------------------------------------------------------------------
/viscosity.C:
--------------------------------------------------------------------------------
  1 | #include "viscosity.h"
  2 | 
  3 | 
  4 | // implicitly solve viscosity
  5 | void viscosity(  double* U,
  6 | 				 double* V,
  7 | 				 double* W,
  8 | 				 double* Uss, double* Vss, double* Wss,
  9 | 				 cuint nx, cuint ny, cuint nz,
 10 | 				 cdouble hx, cdouble hy, cdouble hz,
 11 | 				 cdouble hx2i, cdouble hy2i, cdouble hz2i,
 12 | 				 cdouble dt, cdouble nu,
 13 | 				 cdouble bcs[][6],
 14 | 				 cdouble tol, cuint max_iteration )
 15 | {
 16 | 	double Er;
 17 | 	
 18 | 	// Lu
 19 | 	cuint n_u_dof = (nx-1)*ny*nz;
 20 | 	vector<tuple <uint, uint, double> > Lu_sp;
 21 | 	vector<double> Lu_val(Lu_sp.size(),0.0);
 22 | 	vector<uint> Lu_col_ind(Lu_sp.size(), 0);
 23 | 	vector<uint> Lu_row_ptr(1,0);		
 24 | 	// double* Fu = new double[n_u_dof];
 25 | 	// set load vector
 26 | 	// viscosity_load_vector(Fu, U, nx-1, ny, nz);
 27 | 	// sparse viscosity matrix and bc modification
 28 | 	viscosity_matrix_sparse( Lu_sp, Lu_val, Lu_col_ind, Lu_row_ptr,
 29 | 							 U, nx-1, ny, nz, hx, hy, hz,
 30 | 							 hx2i, hy2i, hz2i, dt, nu, 
 31 | 							 bcs[0], X_DIR );
 32 | 	// now solve Lu\Fu
 33 | 	// construct solution vector
 34 | 	double* Uss_tmp = new double[n_u_dof];
 35 | 	// initial guess
 36 | #pragma omp parallel for shared(Uss, Uss_tmp) num_threads(nt)
 37 | 	for(int n=0; n<n_u_dof; n++){
 38 | 	    Uss[n] = 0.0;
 39 | 	    Uss_tmp[n] = 0.0;
 40 |     }
 41 | 	// residual and error
 42 | 	double* Ru = new double[n_u_dof];
 43 | 	Er = tol*10;
 44 | 	// jacobi iteration
 45 | 	jacobi_sparse(tol, max_iteration, n_u_dof, Uss, Uss_tmp,
 46 | 				  Lu_val, Lu_col_ind, Lu_row_ptr, U, Er, Ru);
 47 | 			
 48 | 	// lv
 49 | 	cuint n_v_dof = (nx)*(ny-1)*nz;
 50 | 	vector<tuple <uint, uint, double> > Lv_sp;
 51 | 	vector<double> Lv_val(Lv_sp.size(),0.0);
 52 | 	vector<uint> Lv_col_ind(Lv_sp.size(), 0);
 53 | 	vector<uint> Lv_row_ptr(1,0);		
 54 | 	// double* Fv = new double[n_v_dof];
 55 | 	// set load vector
 56 | 	// viscosity_load_vector(Fv, V, nx, ny-1, nz);
 57 |    	// sparse viscosity matrix and bc modification
 58 | 	viscosity_matrix_sparse( Lv_sp, Lv_val, Lv_col_ind, Lv_row_ptr,
 59 | 							 V, nx, ny-1, nz, hx, hy, hz,
 60 | 							 hx2i, hy2i, hz2i, dt, nu, 
 61 | 							 bcs[1], Y_DIR );
 62 | 	// now solve Lv\Fv
 63 | 	// construct solution vector
 64 | 	// double* Vss = new double[n_v_dof];
 65 | 	double* Vss_tmp = new double[n_v_dof];
 66 | 	// initial guess
 67 | #pragma omp parallel for shared(Vss, Vss_tmp) num_threads(nt)
 68 | 	for(int n=0; n<n_v_dof; n++){
 69 | 	    Vss[n] = 0.0;
 70 | 	    Vss_tmp[n] = 0.0;
 71 |     }
 72 | 	// residual and error
 73 | 	double* Rv = new double[n_v_dof];
 74 | 	Er = tol*10;
 75 | 	// jacobi iteration
 76 | 	jacobi_sparse(tol, max_iteration, n_v_dof, Vss, Vss_tmp,
 77 | 				  Lv_val, Lv_col_ind, Lv_row_ptr, V, Er, Rv);
 78 | 
 79 | 	// Lw
 80 | 	cuint n_w_dof = (nx)*(ny)*(nz-1);
 81 | 	vector<tuple <uint, uint, double> > Lw_sp;
 82 | 	vector<double> Lw_val(Lw_sp.size(),0.0);
 83 | 	vector<uint> Lw_col_ind(Lw_sp.size(), 0);
 84 | 	vector<uint> Lw_row_ptr(1,0);		
 85 | 	// double* Fw = new double[n_w_dof];
 86 |    	// set load vector
 87 | 	// viscosity_load_vector(Fw, W, nx, ny, nz-1);
 88 | 	// sparse viscosity matrix and bc modification
 89 | 	viscosity_matrix_sparse( Lw_sp, Lw_val, Lw_col_ind, Lw_row_ptr,
 90 | 							 W, nx, ny, nz-1, hx, hy, hz,
 91 | 							 hx2i, hy2i, hz2i, dt, nu, 
 92 | 							 bcs[2], Z_DIR );
 93 | 	// now solve Lw\Fw
 94 | 	// construct solution vector
 95 | 	// double* Wss = new double[n_w_dof];
 96 | 	double* Wss_tmp = new double[n_w_dof];
 97 | 	// initial guess
 98 | #pragma omp parallel for shared(Wss, Wss_tmp) num_threads(nt)
 99 | 	for(int n=0; n<n_w_dof; n++){
100 | 	    Wss[n] = 0.0;
101 | 	    Wss_tmp[n] = 0.0;
102 |     }
103 | 	// residual and error
104 | 	double* Rw = new double[n_w_dof];
105 | 	Er = tol*10;
106 | 	// jacobi iteration
107 | 	jacobi_sparse(tol, max_iteration, n_w_dof, Wss, Wss_tmp,
108 | 				  Lw_val, Lw_col_ind, Lw_row_ptr, W, Er, Rw);
109 | 	
110 | 	// v_cycle( n_u_dof, nx-1, ny, nz, hx2i, hy2i, hz2i,
111 | 	// 		 tol, max_iteration, pre_smooth_iteration,
112 | 	// 		 lx, ly, lz, 0, max_level, Fu, Er);
113 | 
114 | 	// cleanup
115 | 	delete[] Uss_tmp, Vss_tmp, Wss_tmp;
116 | 	delete[] Ru, Rv, Rw;
117 | 	
118 | }
119 | 
120 | // sparse viscosity matrix
121 | void viscosity_matrix_sparse( vector<tuple <uint, uint, double> >& L_sp,
122 | 							  vector<double>& val,
123 | 							  vector<uint>& col_ind,
124 | 							  vector<uint>& row_ptr,
125 | 							  double* F,
126 | 							  cuint nx, cuint ny, cuint nz,
127 | 							  cdouble hx, cdouble hy, cdouble hz,
128 | 							  cdouble hx2i, cdouble hy2i, cdouble hz2i,
129 | 							  cdouble dt, cdouble nu,
130 | 							  cdouble* u_bc,
131 | 							  cuint dir // direction of flow: u, v, or w?
132 | 							  )
133 | {
134 | 	// initialize sparse matrix (row#, col#, value)
135 | 	vector<vector<tuple <uint, uint, double> > > M;
136 | 	M.resize(nt);
137 | 	
138 | #pragma omp parallel  shared(M) num_threads(nt)
139 | 	{
140 | 		cuint myrank = omp_get_thread_num();
141 | 		
142 | #pragma omp for 
143 | 	for(int i=0; i<nx; i++){
144 | 		for(int j=0; j<ny; j++){
145 | 			for(int k=0; k<nz; k++){
146 | 
147 | 				unsigned int p,q;
148 | 				unsigned int t_011,t_111,t_211,t_101,t_121,t_110,t_112;
149 | 				three_d_to_one_d(i,  j,  k,   nx,ny, t_111);
150 | 				three_d_to_one_d(i-1,j,  k,   nx,ny, t_011);
151 | 				three_d_to_one_d(i+1,j,  k,   nx,ny, t_211);
152 | 				three_d_to_one_d(i,  j-1,k,   nx,ny, t_101);
153 | 				three_d_to_one_d(i,  j+1,k,   nx,ny, t_121);
154 | 				three_d_to_one_d(i,  j,  k-1, nx,ny, t_110);
155 | 				three_d_to_one_d(i,  j,  k+1, nx,ny, t_112);
156 | 				
157 | 				// assignning values
158 | 				//U** contribution from left hand side
159 | 				sparse_add(M[myrank], t_111, t_111, -1);
160 | 
161 | 				// avoid boundaries
162 | 				if(i-1>=0)
163 | 					sparse_add(M[myrank], t_111, t_011, hx2i);
164 | 				else{ // x0
165 | 					if(dir==X_DIR)
166 | 						F[t_111] -= dt*nu*u_bc[0]/(hx*hx);
167 | 					else{
168 | 						F[t_111] -= dt*nu/(hx*hx)*u_bc[0]*2;
169 | 						sparse_add(M[myrank], t_111, t_111, -1*hx2i);
170 | 					}
171 | 				}
172 | 					
173 | 				sparse_add(M[myrank], t_111, t_111, -2*hx2i);
174 | 
175 | 				if(i+1<nx)
176 | 					sparse_add(M[myrank], t_111, t_211, hx2i);
177 | 				else{ //xl
178 | 					if(dir==X_DIR)
179 | 						F[t_111] -= dt*nu/(hx*hx) * u_bc[1];
180 | 					else{
181 | 						F[t_111] -= dt*nu/(hx*hx)*u_bc[1]*2;
182 | 						sparse_add(M[myrank], t_111, t_111, -1*hx2i);
183 | 					}
184 | 				}
185 | 				
186 | 				if(j-1>=0)
187 | 					sparse_add(M[myrank], t_111, t_101, hy2i);
188 | 				else{ // y0
189 | 					if(dir==Y_DIR)
190 | 						F[t_111] -= dt*nu/(hy*hy) * u_bc[2];
191 | 					else{
192 | 						F[t_111] -= dt*nu/(hy*hy)*u_bc[2]*2;
193 | 						sparse_add(M[myrank], t_111, t_111, -1*hy2i);
194 | 					}
195 | 						
196 | 				}
197 | 
198 | 				sparse_add(M[myrank], t_111, t_111, -2*hy2i);
199 | 				
200 | 				if(j+1<ny)
201 | 					sparse_add(M[myrank], t_111, t_121, hy2i);
202 | 				else{ //yl
203 | 					if(dir==Y_DIR)
204 | 						F[t_111] -= dt*nu/(hy*hy) * u_bc[3];
205 | 					else{
206 | 						F[t_111] -= dt*nu/(hy*hy)*u_bc[3]*2;
207 | 						sparse_add(M[myrank], t_111, t_111, -1*hy2i);
208 | 					}
209 | 				}
210 | 					
211 | 				if(k-1>=0)
212 | 					sparse_add(M[myrank], t_111, t_110, hz2i);
213 | 				else{ // z0
214 | 					if(dir==Z_DIR)
215 | 						F[t_111] -= dt*nu/(hz*hz) * u_bc[4];
216 | 					else{
217 | 						F[t_111] -= dt*nu/(hz*hz)*u_bc[4]*2;
218 | 						sparse_add(M[myrank], t_111, t_111, -1*hz2i);
219 | 					}
220 | 				}
221 | 					
222 | 				sparse_add(M[myrank], t_111, t_111, -2*hz2i);
223 | 				
224 | 				if(k+1<nz)
225 | 					sparse_add(M[myrank], t_111, t_112, hz2i);
226 | 				else{ // zl
227 | 					if(dir==Z_DIR)
228 | 						F[t_111] -= dt*nu/(hz*hz) * u_bc[5];
229 | 					else{
230 | 						F[t_111] -= dt*nu/(hz*hz)*u_bc[5]*2;
231 | 						sparse_add(M[myrank], t_111, t_111, -1*hz2i);
232 | 					}
233 | 				}
234 | 			}
235 | 		}
236 | 	} // end for
237 | 
238 | 	} // end parallel region		
239 | 
240 | 	// merge and sort
241 | 	// cout<<"sorting..."<<endl;
242 | 	for(int i=1; i<nt; i++)
243 | 		M[0].insert( M[0].end(), M[i].begin(), M[i].end() );
244 | 	// sort(M[0].begin(), M[0].end(), comp_pairs);
245 | 	vector<tuple <uint, uint, double> > tmp;
246 | 	tmp.resize(M[0].size());	
247 | 	mergesort(&M[0][0], nt, M[0].size(), &tmp[0] );
248 | 
249 | 	// consolidate
250 | 	L_sp.push_back(M[0][0]);
251 | 	uint ct=0;
252 | 	for(int i =1; i<M[0].size(); i++){
253 | 		if( (get<0>(L_sp[ct])==get<0>(M[0][i]))
254 | 			&& (get<1>(L_sp[ct])==get<1>(M[0][i])) ){
255 | 			get<2>(L_sp[ct]) += get<2>(M[0][i]);
256 | 		}
257 | 		else{
258 | 			L_sp.push_back(M[0][i]);
259 | 			ct++;
260 | 		}
261 | 	}
262 |    
263 | 	// convert to CSR format
264 | 	// cout<<"converting to CSR format"<<endl;
265 | 	val.resize(L_sp.size(),0.0);
266 | 	col_ind.resize(L_sp.size(), 0);
267 | 	
268 | #pragma omp parallel for shared(val, col_ind, L_sp) num_threads(nt)
269 | 	for(int i=0; i<L_sp.size(); i++){
270 | 		val[i] = get<2>(L_sp[i]);
271 | 		col_ind[i] = get<1>(L_sp[i]);
272 | 	}
273 | 	for(int i=1; i<L_sp.size(); i++){
274 | 		if(get<0>(L_sp[i])!=get<0>(L_sp[i-1]))
275 | 		   row_ptr.push_back(i);
276 | 	}
277 | 	row_ptr.push_back(L_sp.size());
278 | 
279 | 	// cout<<"done"<<endl;
280 | 	
281 | 	// output to file for testing purpose
282 | 	// char file_name[100];
283 | 	// if(dir==X_DIR) sprintf(file_name, "L%s_matrix.dat", "u");
284 | 	// if(dir==Y_DIR) sprintf(file_name, "L%s_matrix.dat", "v");
285 | 	// if(dir==Z_DIR) sprintf(file_name, "L%s_matrix.dat", "w");
286 | 
287 | 	// ofstream file_out(file_name);
288 | 	// for(int i=0; i<L_sp.size(); i++){
289 | 	// 	file_out<<get<0>(L_sp[i])<<" "<<get<1>(L_sp[i])
290 | 	// 		<<" "<<get<2>(L_sp[i])<<endl;
291 | 	// }
292 | 	// file_out.close();
293 | 	
294 | }
295 | 
296 | // set load vector for implicit viscous solve
297 | void viscosity_load_vector( double* F, double* U,
298 | 							cuint nx, cuint ny, cuint nz)
299 | {
300 | 	uint t;
301 | #pragma omp parallel for private(t) shared(F, U) num_threads(nt)
302 | 	for(int i=0; i<(nx); i++){
303 | 		for(int j=0; j<(ny); j++){
304 | 			for(int k=0; k<(nz); k++){
305 | 				three_d_to_one_d(i,  j,  k, nx,ny, t);
306 | 
307 | 				F[t] = U[t];
308 | 			}
309 | 		}
310 | 	}
311 | 
312 | 	return;
313 | }
314 | 


--------------------------------------------------------------------------------
/viscosity.h:
--------------------------------------------------------------------------------
 1 | // implicit viscosity contributions
 2 | #ifndef VISCOSITY_H
 3 | #define VISCOSITY_H
 4 | 
 5 | #include "utils.h"
 6 | #include "assemble.h"
 7 | #include "msort.h"
 8 | #include "jacobi.h"
 9 | 
10 | using namespace std;
11 | 
12 | 
13 | // implicitly solve viscosity
14 | void viscosity(  double* U,
15 | 				 double* V,
16 | 				 double* W,
17 | 				 double* Uss, double* Vss, double* Wss,
18 | 				 cuint nx, cuint ny, cuint nz,
19 | 				 cdouble hx, cdouble hy, cdouble hz,
20 | 				 cdouble hx2i, cdouble hy2i, cdouble hz2i,
21 | 				 cdouble dt, cdouble nu,
22 | 				 cdouble bcs[][6],
23 | 				 cdouble tol, cuint max_iteration );
24 | 
25 | // implicitly solve viscosity
26 | void viscosity(  boost::multi_array<double, 3>& U,
27 | 				 boost::multi_array<double, 3>& V,
28 | 				 boost::multi_array<double, 3>& W,
29 | 				 double* Uss, double* Vss, double* Wss,
30 | 				 cuint nx, cuint ny, cuint nz,
31 | 				 cdouble hx, cdouble hy, cdouble hz,
32 | 				 cdouble hx2i, cdouble hy2i, cdouble hz2i,
33 | 				 cdouble dt, cdouble nu,
34 | 				 cdouble bcs[][6],
35 | 				 cdouble tol, cuint max_iteration );
36 | 
37 | 
38 | // sparse viscosity matrix
39 | void viscosity_matrix_sparse( vector<tuple <uint, uint, double> >& L_sp,
40 | 							  vector<double>& val,
41 | 							  vector<uint>& col_ind,
42 | 							  vector<uint>& row_ptr,
43 | 							  double* F,
44 | 							  cuint nx, cuint ny, cuint nz,
45 | 							  cdouble hx, cdouble hy, cdouble hz,
46 | 							  cdouble hx2i, cdouble hy2i, cdouble hz2i,
47 | 							  cdouble dt, cdouble nu,
48 | 							  cdouble* u_bc,
49 | 							  cuint dir // direction of flow: u, v, or w?
50 | 							  );
51 | 
52 | // set load vector for implicit viscous solve
53 | void viscosity_load_vector( double* F, double* U,
54 | 							cuint nx, cuint ny, cuint nz);
55 | 
56 | #endif //VISCOSITY_H
57 | 


--------------------------------------------------------------------------------