├── IO.C
├── IO.h
├── Makefile
├── README.md
├── advection.C
├── advection.h
├── analysis.m
├── assemble.C
├── assemble.h
├── jacobi.C
├── jacobi.h
├── main.C
├── msort.C
├── msort.h
├── pressure.C
├── pressure.h
├── scaling.m
├── utils.C
├── utils.h
├── v_cycle.C
├── v_cycle.h
├── viscosity.C
└── viscosity.h
/IO.C:
--------------------------------------------------------------------------------
1 | #include "IO.h"
2 |
3 | // write out the sparse matrix
4 | int write_matrix(cuint P,
5 | cuint Q,
6 | double** U,
7 | char* file_name)
8 | {
9 | ofstream file_out;
10 | file_out.open (file_name);
11 |
12 | if(!file_out.is_open()){
13 | return 1;
14 | }
15 |
16 | for(int p=0; p
& U )
433 | {
434 | boost::multi_array_types::size_type const* sizes = U.shape();
435 | cuint nx = sizes[0];
436 | cuint ny = sizes[1];
437 | cuint nz = sizes[2];
438 |
439 | double max_value = abs(U[0][0][0]);
440 |
441 | for(int i=0; i& U,
517 | boost::multi_array& Ud,
518 | cuint dir )
519 | {
520 | boost::multi_array_types::size_type const* sizes = U.shape();
521 | cuint nx = sizes[0];
522 | cuint ny = sizes[1];
523 | cuint nz = sizes[2];
524 |
525 | // differece in x-direction
526 | if(dir==X_DIR){
527 | for(int i=0; i& U2,
563 | boost::multi_array& U2_x,
564 | cdouble h,
565 | cuint dir )
566 | {
567 | boost::multi_array_types::size_type const* sizes = U2.shape();
568 | cuint nx = sizes[0];
569 | cuint ny = sizes[1];
570 | cuint nz = sizes[2];
571 |
572 | // x-difference
573 | if( dir==X_DIR){
574 | for(int i=0; i& U );
37 |
38 |
39 | // get upwinding differences
40 | void upwind_difference( const boost::multi_array& U,
41 | boost::multi_array& Ud,
42 | cuint dir );
43 |
44 | // get 1d staggered difference
45 | // Ua is an averaged U value at the cell vertices
46 | // get the difference value at the center of cell
47 | void staggered_first_difference( const double* UV,
48 | double* UV_x,
49 | cuint nx, cuint ny, cuint nz,
50 | cuint nx_x, cuint ny_x, cuint nz_x,
51 | cdouble h,
52 | cuint dir
53 | );
54 |
55 | // get central first difference at center of element
56 | void central_first_difference( const boost::multi_array& U2,
57 | boost::multi_array& U2_x,
58 | cdouble h,
59 | cuint dir );
60 |
61 |
62 | // get mixed edge values
63 | void calculate_edge_values( double* Ue,
64 | double* Ve,
65 | double* We,
66 | double* UV,
67 | double* UW,
68 | double* VW,
69 | cuint nx, cuint ny, cuint nz);
70 |
71 | // consolidate advection terms
72 | void consolidate_advection( double* U,
73 | double* V,
74 | double* W,
75 | double* U2_x,
76 | double* V2_y,
77 | double* W2_z,
78 | double* UV_y,
79 | double* UW_z,
80 | double* VU_x,
81 | double* VW_z,
82 | double* WU_x,
83 | double* WV_y,
84 | cuint nx, cuint ny, cuint nz,
85 | cdouble dt );
86 |
87 | #endif //ASSEMBLE_H
88 |
--------------------------------------------------------------------------------
/analysis.m:
--------------------------------------------------------------------------------
1 | clear all
2 | close all
3 |
4 | nt = 1;
5 |
6 | for i=0:nt-1
7 | U = load(sprintf('results_%i.dat',i));
8 | figure(i+1);
9 | quiver3(U(:,1),U(:,2),U(:,3), U(:,5), U(:,6), U(:,7));
10 | title(sprintf('velocity at i=%i', i))
11 | % pause
12 |
13 | end
14 |
15 | % plot(U(:,1), U(:,4));
16 |
17 | %
18 | %
19 | % for i=1:nt
20 | % U{i} = load(sprintf('results_%i.dat',i));
21 | % legend_names{i} = sprintf('level %i', i);
22 | % end
23 | % U{max_level+1} = load('results_100.dat');
24 | % legend_names{max_level+1} = 'final solution';
25 | % legend_names{max_level+2} = 'exact solution';
26 | %
27 | % hold on
28 | % col=hsv(max_level+3);
29 | % for i=1:max_level+1
30 | % h= plot( U{i}(1:end,1),U{i}(1:end,4), '-o');
31 | % set(h, 'Color',col(i+1,:));
32 | % end
33 | %
34 | % x=0:0.01:1;
35 | % plot(x, -(1/(2*pi))^2*sin(x*2*pi));
36 | %
37 | % legend(legend_names,0);
38 | %
39 | % hold off
40 | %
41 | % % v=zeros(9,9,9);
42 | %
43 | % % for i=1:length(U)
44 | % % v(U(i,1)+1,U(i,2)+1,U(i,3)+1) = U(i,4);
45 | % % end
46 | % %
47 | % % x=U(:,1);
48 | % % y=U(:,2);
49 | % % z=U(:,3);
50 | % % % v=U(:,4);
51 | % %
52 | % % [x y z v] = flow;
53 | % % h=contourslice(x,y,z,v,[1:9],[],[0], linspace(-8,2,10));
54 | % % axis([0 10 -3 3 -3 3]); daspect([1 1 1])
55 | % % camva(24); camproj perspective;
56 | % % campos([-3 -15 5])
57 | % % set(gcf, 'Color', [.3 .3 .3], 'renderer', 'zbuffer')
58 | % % set(gca, 'Color', 'black' , 'XColor', 'white', ...
59 | % % 'YColor', 'white' , 'ZColor', 'white')
60 | % % box on
--------------------------------------------------------------------------------
/assemble.C:
--------------------------------------------------------------------------------
1 | #include "assemble.h"
2 | #include "utils.h"
3 | #include "msort.h"
4 |
5 | // 2nd order stencil
6 | void fd_matrix( double** M,
7 | cuint I, cuint J, cuint K,
8 | const double dx2i,
9 | const double dy2i,
10 | const double dz2i,
11 | cuint n_dof
12 | )
13 | {
14 | #pragma omp parallel for shared(M) num_threads(nt)
15 | for(int i=0; i >& M_sp,
77 | vector& val,
78 | vector& col_ind,
79 | vector& row_ptr,
80 | cuint I, cuint J, cuint K,
81 | const double dx2i,
82 | const double dy2i,
83 | const double dz2i,
84 | cuint n_dof
85 | )
86 | {
87 |
88 | // initialize sparse matrix (row#, col#, value)
89 | vector > > M;
90 | M.resize(nt);
91 |
92 |
93 | #pragma omp parallel shared(M) num_threads(nt)
94 | {
95 | cuint myrank = omp_get_thread_num();
96 |
97 | #pragma omp for
98 | for(int i=0; i(M[myrank][n_dof-1]));
164 | // M[n_dof-1][n_dof-1] = n_dof;
165 |
166 | // sort and consolidate sparse matrix (row#, col#, value)
167 | // cout<<"sorting..."< >M_sp;
171 | // M_sp[myrank].push_back(M[0]);
172 | uint ct=0;
173 |
174 | // #pragma omp critical
175 | // {
176 | // cout<<"thread #: "<(M_sp[myrank][i])<<" j: "
179 | // <(M_sp[myrank][i])<<" v: "
180 | // <(M_sp[myrank][i])< > tmp;
192 | tmp.resize(M[0].size());
193 | mergesort(&M[0][0], nt, M[0].size(), &tmp[0] );
194 |
195 |
196 | cout<<"done"<(M_sp[ct])==get<0>(M[0][i]))
203 | && (get<1>(M_sp[ct])==get<1>(M[0][i])) ){
204 | // get<0>(M_sp[ct]) += get<0>(M[0][i]);
205 | // get<1>(M_sp[ct]) += get<1>(M[0][i]);
206 | get<2>(M_sp[ct]) += get<2>(M[0][i]);
207 | }
208 | else{
209 | M_sp.push_back(M[0][i]);
210 | ct++;
211 | }
212 |
213 | }
214 |
215 |
216 | // convert to CSR format
217 | cout<<"converting to CSR format"<(M_sp[i]);
224 | col_ind[i] = get<1>(M_sp[i]);
225 | }
226 | for(int i=1; i(M_sp[i])!=get<0>(M_sp[i-1]))
228 | row_ptr.push_back(i);
229 | }
230 | row_ptr.push_back(M_sp.size());
231 |
232 | // for(int i=0; i(M_sp[i])<<" "<(M_sp[i])
240 | <<" "<(M_sp[i])< >& M,
316 | cuint i, cuint j, cdouble v)
317 | {
318 | tuple M_tmp(i, j, v);
319 | M.push_back(M_tmp);
320 |
321 | // vector idx_tmp(2, 0.0);
322 | // idx_tmp[0] = i; idx_tmp[1]=j;
323 | // idx.push_back(idx_tmp);
324 | // value.push_back(v);
325 |
326 | }
327 |
328 | // insert index and value into a sparse matrix
329 | // note that M should be sorted before use
330 | void sparse_insert( vector >& M,
331 | cuint i, cuint j, cdouble v)
332 | {
333 | // replace the value
334 | for(int mn=0; mn(M[mn])==i && get<1>(M[mn])==j){
336 | get<2>(M[mn]) = v;
337 | return;
338 | }
339 | }
340 |
341 | // if the value does not exist, add the value
342 | sparse_add(M, i,j, v);
343 |
344 | }
345 |
346 |
347 | // merge two sorted arrays
348 | void merge(vector >& left,
349 | vector >& right,
350 | cuint n_left, cuint n_right,
351 | vector >& result,
352 | vector >& tmp
353 | )
354 | {
355 | uint it = 0;
356 | uint left_it = 0, right_it = 0;
357 |
358 | while(left_it < n_left && right_it < n_right ) {
359 | it = left_it+right_it;
360 | // cout< >& M_sp,
19 | vector& val,
20 | vector& col_ind,
21 | vector& row_ptr,
22 | cuint I, cuint J, cuint K,
23 | const double dx2i,
24 | const double dy2i,
25 | const double dz2i,
26 | cuint n_dof
27 | );
28 |
29 | void load_vector( double* F,
30 | cuint n_dof,
31 | cuint I,
32 | cuint J,
33 | cuint K
34 | );
35 |
36 | int boundary_conditins( cuint n_dof,
37 | cuint I,
38 | cuint J,
39 | cuint K,
40 | double** M,
41 | double* F
42 | );
43 |
44 | // add index and value into a sparse matrix
45 | void sparse_add( vector >& M,
46 | cuint i, cuint j, cdouble v);
47 |
48 | // merge two sorted arrays
49 | void merge(vector >& left,
50 | vector >& right,
51 | cuint n_left, cuint n_right,
52 | vector >& result,
53 | vector >& tmp
54 | );
55 |
56 |
57 | #endif //ASSEMBLE_H
58 |
--------------------------------------------------------------------------------
/jacobi.C:
--------------------------------------------------------------------------------
1 | #include "jacobi.h"
2 | #include "assemble.h"
3 | #include "IO.h"
4 |
5 | // jacobi method
6 | void jacobi( cdouble tol,
7 | cuint max_iteration,
8 | cuint n_dof,
9 | double* u_new,
10 | double* u_old,
11 | double** M,
12 | double* F,
13 | double& Er,
14 | double* R)
15 | {
16 | // iteration counter
17 | int ct = 0;
18 | cdouble tol2 = tol*tol;
19 |
20 | while(Er>tol2 && ct& val,
61 | const vector& col_ind,
62 | const vector& row_ptr,
63 | double* F,
64 | double& Er,
65 | double* R)
66 | {
67 | // iteration counter
68 | int ct = 0;
69 | cdouble tol2 = tol*tol;
70 | double E=tol2*100;
71 |
72 | #pragma omp parallel shared(F, U_tmp, U, R, val,col_ind, row_ptr, ct) num_threads(nt)
73 | {
74 | while(E>tol2 && ct& val,
158 | const vector& col_ind,
159 | const vector& row_ptr,
160 | double* U,
161 | double* F,
162 | double* R,
163 | cuint n_dof)
164 | {
165 | double E=0;
166 | #pragma omp parallel for shared(R,val,col_ind,row_ptr,U,F) num_threads(nt) reduction(+:E)
167 | for(int i=0; i& val,
187 | // const vector& col_ind,
188 | // const vector& row_ptr,
189 | // double* F,
190 | // double& Er,
191 | // double* R)
192 | // {
193 | // // iteration counter
194 | // int ct = 0;
195 | // cdouble tol2 = tol*tol;
196 |
197 | // while(Er>tol2 && ct& val,
26 | const vector& col_ind,
27 | const vector& row_ptr,
28 | double* F,
29 | double& Er,
30 | double* R);
31 |
32 | double convergence_check ( double** M,
33 | double* U,
34 | double* F,
35 | double* R,
36 | cuint n_dof
37 | );
38 |
39 | double convergence_check_sparse ( const vector& val,
40 | const vector& col_ind,
41 | const vector& row_ptr,
42 | double* U,
43 | double* F,
44 | double* R,
45 | cuint n_dof);
46 |
47 | #endif //JACOBI_H
48 |
49 |
--------------------------------------------------------------------------------
/main.C:
--------------------------------------------------------------------------------
1 | #include "jacobi.h"
2 | #include "utils.h"
3 | #include "IO.h"
4 | #include "v_cycle.h"
5 | #include "advection.h"
6 | #include "viscosity.h"
7 | #include "pressure.h"
8 |
9 | // number of threads
10 | uint nt;
11 |
12 | // set up initial conditions
13 | void initial_conditions( double* U,
14 | double* V,
15 | double* W,
16 | double* P,
17 | cuint nx, cuint ny, cuint nz )
18 | {
19 | for(int i=0; i<(nx-1)*(ny)*(nz); i++)
20 | U[i]=0.0;
21 |
22 | for(int i=0; i<(nx)*(ny-1)*(nz); i++)
23 | V[i]=0.0;
24 |
25 | for(int i=0; i<(nx)*(ny)*(nz-1); i++)
26 | W[i]=0.0;
27 |
28 | for(int i=0; i<(nx)*(ny)*(nz); i++)
29 | P[i] = 0.0;
30 |
31 | return;
32 | }
33 |
34 | // main function!
35 | int main( int argc, char** argv )
36 | {
37 | // initialize constants
38 | cdouble nu = 100; // kinetic viscosity (mu/rho)
39 | double dt = 0.1; //time step
40 | cdouble tf = 0.1; // final time
41 |
42 | // domain size
43 | cdouble lx = 1.0;
44 | cdouble ly = 1.0;
45 | cdouble lz = 1.0;
46 |
47 | // domain cornders
48 | cdouble xmin = 0.0;
49 | cdouble ymin = 0.0;
50 | cdouble zmin = 0.0;
51 | cdouble xmax = xmin+lx;
52 | cdouble ymax = ymin+ly;
53 | cdouble zmax = zmin+lz;
54 |
55 | // number of gridpointts in each dimension
56 | nt=1;
57 | uint nx=10;
58 | uint ny=10;
59 | uint nz=10; // problem size (n_dof=n_size^3)
60 | uint max_level=0; // maximum v-cycle level
61 | if(argc>5){
62 | nt = atoi(argv[1]);
63 | max_level = atoi(argv[2]);
64 | nx = atoi(argv[3]);
65 | ny = atoi(argv[4]);
66 | nz = atoi(argv[5]);
67 | }
68 | else{
69 | cout<<"multigrid [# of threads] [max level] [I_size] [J_size] [K_size]"<& i,
5 | const tuple& j ) {
6 | if( (get<0>(i)) < (get<0>(j)) ) return true;
7 | else if( get<0>(i) == get<0>(j)) return (get<1>(i)) < (get<1>(j));
8 | else return false;
9 | }
10 |
11 | // merge two sorted arrays
12 | void merge(tuple * left,
13 | tuple * right,
14 | const int n_left, const int n_right,
15 | tuple * result,
16 | tuple * tmp )
17 | {
18 | unsigned int it = 0;
19 | unsigned int left_it = 0, right_it = 0;
20 | // cout<<"n_left "<* vec,
56 | const int threads,
57 | const int n,
58 | tuple * tmp
59 | )
60 | {
61 | // Termination condition: List is completely sorted if it
62 | // only contains a single element.
63 | if(n == 1){
64 | return;
65 | }
66 |
67 | // Determine the location of the middle element in the vector
68 | tuple * left = vec; // left array pointer
69 | int n_left = n/2; // number of elements in left array
70 | tuple * tmp_left = tmp; // left tmp array pointer
71 |
72 | tuple * right = left+n/2; // right array pointer
73 | int n_right = n-n/2; // number of elements in right array
74 | tuple * tmp_right = tmp_left+n/2; // right tmp array pointer
75 |
76 | // Perform a merge sort on the two smaller vectors
77 | if (threads > 1) {
78 |
79 | #pragma omp parallel sections
80 | {
81 | #pragma omp section
82 | {
83 | mergesort(left, threads/2, n_left, tmp_left);
84 | }
85 | #pragma omp section
86 | {
87 | mergesort(right, threads - threads/2, n_right, tmp_right);
88 | }
89 | }
90 | }
91 | else {
92 | mergesort(left, 1, n_left, tmp_left);
93 | mergesort(right, 1, n_right, tmp_right);
94 | }
95 |
96 | merge(left, right, n_left, n_right, left, tmp );
97 |
98 | return;
99 | }
100 |
--------------------------------------------------------------------------------
/msort.h:
--------------------------------------------------------------------------------
1 | // openmp merge sort
2 | #ifndef MSORT_H
3 | #define MSORT_H
4 |
5 | #include "utils.h"
6 |
7 | using namespace std;
8 |
9 | // comparison function for sorting pairs
10 | int comp_tuples( const tuple& i,
11 | const tuple& j );
12 |
13 | // merge two sorted arrays
14 | void merge(tuple * left,
15 | tuple * right,
16 | const int n_left, const int n_right,
17 | tuple * result,
18 | tuple * tmp );
19 |
20 | // mergesort with OpenMP parallelism
21 | void mergesort(tuple * vec,
22 | const int threads,
23 | const int n,
24 | tuple * tmp
25 | );
26 |
27 |
28 | #endif // MSORT_H
29 |
--------------------------------------------------------------------------------
/pressure.C:
--------------------------------------------------------------------------------
1 | #include "pressure.h"
2 | #include "v_cycle.h"
3 |
4 | // compute pressure correction
5 | void pressure( double* U, double* V, double* W, double* P,
6 | double* Uss, double* Vss, double* Wss,
7 | cuint nx, cuint ny, cuint nz,
8 | cdouble bcs[][6],
9 | cdouble lx, cdouble ly, cdouble lz,
10 | cdouble hx, cdouble hy, cdouble hz,
11 | cdouble hx2i, cdouble hy2i, cdouble hz2i,
12 | cdouble tol, cuint max_iteration,
13 | cuint pre_smooth_iteration, cuint max_level,
14 | cdouble dt)
15 | {
16 | cuint n_dof = nx*ny*nz;
17 |
18 | // residual and error
19 | double* Rp = new double[n_dof];
20 | double Er = tol*10;
21 |
22 | // 0-level v_cycle
23 | if(max_level==0)
24 | v_cycle_0( P, Rp,
25 | n_dof, nx, ny, nz,
26 | hx, hy, hz,
27 | hx2i, hy2i, hz2i,
28 | tol, max_iteration, pre_smooth_iteration,
29 | hx, hy, hz,
30 | 0, max_level,
31 | Er,
32 | Uss, Vss, Wss,
33 | bcs,dt );
34 | else
35 | // v-cycle
36 | v_cycle( P, n_dof, nx, ny, nz,
37 | hx, hy, hz,
38 | hx2i, hy2i, hz2i,
39 | tol, max_iteration, pre_smooth_iteration,
40 | lx, ly, lz,
41 | 0, max_level-1,
42 | Rp,
43 | Er,
44 | Uss, Vss, Wss,
45 | bcs, dt
46 | );
47 |
48 |
49 | // compute pressure corrections
50 | double* Pr_x = new double[(nx-1)*(ny)*(nz)];
51 | double* Pr_y = new double[(nx)*(ny-1)*(nz)];
52 | double* Pr_z = new double[(nx)*(ny)*(nz-1)];
53 |
54 | for(int i=0; i<(nx-1)*(ny)*(nz); i++)
55 | Pr_x[i]=0.0;
56 |
57 | compute_corrections( P, Pr_x, Pr_y, Pr_z, nx, ny, nz, hy, hy, hz );
58 |
59 | // 1d index
60 | uint t;
61 |
62 | // correct velocities
63 | // x-direction
64 | #pragma omp parallel for private(t) shared(U, Uss, Pr_x) num_threads(nt)
65 | for(int i=0; i >& Lp_sp,
210 | vector& val,
211 | vector& col_ind,
212 | vector& row_ptr,
213 | cuint nx, cuint ny, cuint nz,
214 | const double hx2i,
215 | const double hy2i,
216 | const double hz2i,
217 | cuint n_dof
218 | )
219 | {
220 | // initialize sparse matrix (row#, col#, value)
221 | vector > > M;
222 | M.resize(nt);
223 |
224 | #pragma omp parallel shared(M) num_threads(nt)
225 | {
226 | cuint myrank = omp_get_thread_num();
227 |
228 | // loop through inner nodes
229 | #pragma omp for
230 | for(int i=0; i=0)
246 | sparse_add(M[myrank], t_111, t_011, hx2i);
247 | else // x0: i==0 (P[-1][j][k]==P[0][j][k])
248 | sparse_add(M[myrank], t_111, t_111, hx2i);
249 |
250 | sparse_add(M[myrank], t_111, t_111, -2*hx2i);
251 |
252 | if(i+1=0)
259 | sparse_add(M[myrank], t_111, t_101, hy2i);
260 | else // y0: j==0 (P[i][-1][k]==P[i][0][k])
261 | sparse_add(M[myrank], t_111, t_111, hy2i);
262 |
263 | sparse_add(M[myrank], t_111, t_111, -2*hy2i);
264 |
265 | if(j+1=0)
272 | sparse_add(M[myrank], t_111, t_110, hz2i);
273 | else // z0: k==0 (P[i][j][-1]==P[i][j][0])
274 | sparse_add(M[myrank], t_111, t_111, hz2i);
275 |
276 | sparse_add(M[myrank], t_111, t_111, -2*hz2i);
277 |
278 | if(k+1 > tmp;
304 | tmp.resize(M[0].size());
305 | mergesort(&M[0][0], nt, M[0].size(), &tmp[0] );
306 |
307 | // consolidate
308 | Lp_sp.push_back(M[0][0]);
309 | uint ct=0;
310 | for(int i =1; i(Lp_sp[ct])==get<0>(M[0][i]))
312 | && (get<1>(Lp_sp[ct])==get<1>(M[0][i])) ){
313 | get<2>(Lp_sp[ct]) += get<2>(M[0][i]);
314 | }
315 | else{
316 | Lp_sp.push_back(M[0][i]);
317 | ct++;
318 | }
319 | }
320 |
321 | // point constraint to close the system
322 | get<2>(Lp_sp[0]) = 5*get<2>(Lp_sp[0]);
323 |
324 | // convert to CSR format
325 | val.resize(Lp_sp.size(),0.0);
326 | col_ind.resize(Lp_sp.size(), 0);
327 |
328 | #pragma omp parallel for shared(val, col_ind, Lp_sp) num_threads(nt)
329 | for(int i=0; i(Lp_sp[i]);
331 | col_ind[i] = get<1>(Lp_sp[i]);
332 | }
333 | for(int i=1; i(Lp_sp[i])!=get<0>(Lp_sp[i-1]))
335 | row_ptr.push_back(i);
336 | }
337 | row_ptr.push_back(Lp_sp.size());
338 |
339 | // output to file for testing purpose
340 | ofstream file_out("Lp_matrix.dat");
341 | for(int i=0; i(Lp_sp[i])<<" "<(Lp_sp[i])
343 | <<" "<(Lp_sp[i])< >& Lp_sp,
35 | vector& val,
36 | vector& col_ind,
37 | vector& row_ptr,
38 | cuint nx, cuint ny, cuint nz,
39 | const double hx2i,
40 | const double hy2i,
41 | const double hz2i,
42 | cuint n_dof
43 | );
44 |
45 | // compute corrections from pressure value
46 | void compute_corrections( double* Pr,
47 | double* Pr_x,
48 | double* Pr_y,
49 | double* Pr_z,
50 | cuint nx, cuint ny, cuint nz,
51 | cdouble hx, cdouble hy, cdouble hz );
52 |
53 | #endif
54 |
--------------------------------------------------------------------------------
/scaling.m:
--------------------------------------------------------------------------------
1 | clear all
2 | close all
3 |
4 | % strong scaling
5 | % 32^3
6 | Ta = [0.0112691 0.00559711 0.00452089 0.00302196 0.00250888];
7 | Tv = [0.0432389 0.041455 0.0408649 0.0634542 0.116877];
8 | Tp = [3.77661 4.3534 4.69743 7.65514 13.1779];
9 | Tt = [4.1107 4.66746 5.0014 7.97422 13.5518];
10 | Nt = [16 8 4 2 1];
11 |
12 | loglog(Nt, Ta, '-o', Nt, Tv, '-x', Nt, Tp, '-+', Nt, Tt, '-s');
13 | xlim([0 16]);
14 | set(gca,'XTick',[1 2 4 8 16]);
15 | title('Strong Scaling');
16 | legend('advection','viscosity','pressure','total');
17 | xlabel('# of nodes');
18 | ylabel('time [s]');
19 |
20 | Ep = Tt(end)./(Tt.*Nt);
21 | semilogx(Nt, Ep, '-o');
22 | xlim([0 16]);
23 | set(gca,'XTick',[1 2 4 8 16]);
24 | title('Efficiency');
25 | xlabel('# of nodes');
26 | ylabel('efficiency');
27 |
28 | % weak scaling
29 | sqrt((32^3)^2/16*8); % 24*24*40
30 | sqrt((32^3)^2/16*4); % 32*32*16
31 | sqrt((32^3)^2/16*2); % 24*24*20
32 | sqrt((32^3)^2/16*1); % 32*16*16
33 |
34 | % Tw_weak = [96.2477 77.2512 70.8685 58.825 65.499];
35 | Tw_weak = [94.9259 47.8498 19.2619 7.54821 3.63176];
36 |
37 | semilogx(Nt, Tw_weak, '-o');
38 | xlim([0 16]);
39 | xlabel('# of nodes');
40 | ylabel('total time [s]');
41 | set(gca,'XTick',[1 2 4 8 16]);
42 | title('Weak Scaling');
43 |
--------------------------------------------------------------------------------
/utils.C:
--------------------------------------------------------------------------------
1 | #include "utils.h"
2 |
3 | // comparison function for sorting pairs
4 | int comp_pairs( const tuple& i,
5 | const tuple& j ) {
6 | if( (get<0>(i)) < (get<0>(j)) ) return true;
7 | else if( get<0>(i) == get<0>(j)) return (get<1>(i)) < (get<1>(j));
8 | else return false;
9 | }
10 |
11 | void three_d_to_one_d( const unsigned int i,
12 | const unsigned int j,
13 | const unsigned int k,
14 | const unsigned int I,
15 | const unsigned int J,
16 | unsigned int& t )
17 | {
18 | t=i + j*I + k*I*J;
19 | }
20 |
21 | void one_d_to_three_d( const unsigned int t,
22 | const unsigned int I,
23 | const unsigned int J,
24 | unsigned int& i,
25 | unsigned int& j,
26 | unsigned int& k)
27 | {
28 | k = t/(I*J);
29 | j = (t-k*I*J)/I;
30 | i = t-j*I - k*I*J;
31 | }
32 |
33 | // get the neighboring node numbers (periodic domain)
34 | // watch out for the negative unsigned int!!
35 | void get_neighbor( uint t[][3][3],
36 | cuint i, cuint j, cuint k,
37 | cuint I, cuint J, cuint K )
38 | {
39 | for(int p=0; p<3; p++){
40 | for(int q=0; q<3; q++){
41 | for(int r=0; r<3; r++){
42 | int nei_i, nei_j, nei_k;
43 | if(i+p>=I+1) nei_i = i+p-1-I;
44 | else if(i+p<1) nei_i = i+p+I-1;
45 | else nei_i = i+p-1;
46 |
47 | if(j+q>=J+1) nei_j = j+q-1-J;
48 | else if(j+q<1) nei_j = j+q+J-1;
49 | else nei_j = j+q-1;
50 |
51 | if(k+r>=K+1) nei_k = k+r-1-K;
52 | else if(k+r<1) nei_k = k+r+K-1;
53 | else nei_k = k+r-1;
54 | // cout<=I) nei_i = i+p-I;
76 | else nei_i = i+p;
77 | if(j+q>=J) nei_j = j+q-J;
78 | else nei_j = j+q;
79 | if(k+r>=K) nei_k = k+r-K;
80 | else nei_k = k+r;
81 |
82 | three_d_to_one_d(nei_i,nei_j,nei_k, I,J, t[p][q][r]);
83 | }
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/utils.h:
--------------------------------------------------------------------------------
1 | // utility functions
2 | #ifndef UTILS_H
3 | #define UTILS_H
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include "boost/multi_array.hpp"
16 |
17 | using namespace std;
18 |
19 | typedef const unsigned int cuint;
20 | typedef unsigned int uint;
21 | typedef const double cdouble;
22 | typedef const int cint;
23 |
24 | const double pi=3.14159265359;
25 | extern unsigned int nt;
26 |
27 | cuint X_DIR=0;
28 | cuint Y_DIR=1;
29 | cuint Z_DIR=2;
30 | cuint XY_DIR=3;
31 | cuint XZ_DIR=4;
32 | cuint YZ_DIR=5;
33 | cuint X2_DIR=6;
34 | cuint Y2_DIR=7;
35 | cuint Z2_DIR=8;
36 |
37 |
38 | // comparison function for sorting pairs
39 | int comp_pairs( const tuple& i,
40 | const tuple& j );
41 |
42 | void three_d_to_one_d( const unsigned int i,
43 | const unsigned int j,
44 | const unsigned int k,
45 | const unsigned int I,
46 | const unsigned int J,
47 | unsigned int& t );
48 |
49 | void one_d_to_three_d( const unsigned int t,
50 | const unsigned int I,
51 | const unsigned int J,
52 | unsigned int& i,
53 | unsigned int& j,
54 | unsigned int& k);
55 |
56 | void get_neighbor( unsigned int t[][3][3],
57 | cuint i, cuint j, cuint k,
58 | cuint I, cuint J, cuint K );
59 |
60 | // get node numbers in a box
61 | void get_box( uint t[][2][2],
62 | cuint i, cuint j, cuint k,
63 | cuint I, cuint J, cuint K );
64 |
65 |
66 | #endif //UTILS_H
67 |
--------------------------------------------------------------------------------
/v_cycle.C:
--------------------------------------------------------------------------------
1 | #include "v_cycle.h"
2 | #include "pressure.h"
3 |
4 | // multigrid v-cycle
5 | void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz,
6 | cdouble hx, cdouble hy, cdouble hz,
7 | cdouble hx2i, cdouble hy2i, cdouble hz2i,
8 | cdouble tol, cuint max_iteration, cuint pre_smooth_iteration,
9 | cdouble lx, cdouble ly, cdouble lz,
10 | cuint level, cuint max_level,
11 | double* F,
12 | double& Er,
13 | double* Uss, double* Vss, double* Wss,
14 | cdouble bcs[][6],
15 | cdouble dt
16 | )
17 | {
18 | cout<<"level: "< > M_sp;
32 | vector val;
33 | vector col_ind;
34 | vector row_ptr(1,0);
35 |
36 | // create finite difference matrix
37 | cout<<"create finite difference matrix"< > M_sp_coar;
123 | vector val_coar;
124 | vector col_ind_coar;
125 | vector row_ptr_coar(1,0);
126 |
127 | // create finite difference matrix
128 | cout<<"create finite difference matrix"< > Lp_sp;
344 | vector Lp_val(Lp_sp.size(),0.0);
345 | vector Lp_col_ind(Lp_sp.size(), 0);
346 | vector Lp_row_ptr(1,0);
347 |
348 | // build right hand side of pressure poisson equation
349 | pressure_rhs(Fp, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt);
350 |
351 | // build pressure matrix
352 | pressure_matrix( Lp_sp,
353 | Lp_val, Lp_col_ind, Lp_row_ptr,
354 | nx, ny, nz,
355 | hx2i, hy2i, hz2i,
356 | n_dof
357 | );
358 |
359 | // solve dicrete poisson equation: Lp\Fp
360 | // construct solution vector
361 | double* P_tmp = new double[n_dof];
362 | // initial guess
363 | #pragma omp parallel for shared(P, P_tmp) num_threads(nt)
364 | for(int n=0; n > Lu_sp;
21 | vector Lu_val(Lu_sp.size(),0.0);
22 | vector Lu_col_ind(Lu_sp.size(), 0);
23 | vector Lu_row_ptr(1,0);
24 | // double* Fu = new double[n_u_dof];
25 | // set load vector
26 | // viscosity_load_vector(Fu, U, nx-1, ny, nz);
27 | // sparse viscosity matrix and bc modification
28 | viscosity_matrix_sparse( Lu_sp, Lu_val, Lu_col_ind, Lu_row_ptr,
29 | U, nx-1, ny, nz, hx, hy, hz,
30 | hx2i, hy2i, hz2i, dt, nu,
31 | bcs[0], X_DIR );
32 | // now solve Lu\Fu
33 | // construct solution vector
34 | double* Uss_tmp = new double[n_u_dof];
35 | // initial guess
36 | #pragma omp parallel for shared(Uss, Uss_tmp) num_threads(nt)
37 | for(int n=0; n > Lv_sp;
51 | vector Lv_val(Lv_sp.size(),0.0);
52 | vector Lv_col_ind(Lv_sp.size(), 0);
53 | vector Lv_row_ptr(1,0);
54 | // double* Fv = new double[n_v_dof];
55 | // set load vector
56 | // viscosity_load_vector(Fv, V, nx, ny-1, nz);
57 | // sparse viscosity matrix and bc modification
58 | viscosity_matrix_sparse( Lv_sp, Lv_val, Lv_col_ind, Lv_row_ptr,
59 | V, nx, ny-1, nz, hx, hy, hz,
60 | hx2i, hy2i, hz2i, dt, nu,
61 | bcs[1], Y_DIR );
62 | // now solve Lv\Fv
63 | // construct solution vector
64 | // double* Vss = new double[n_v_dof];
65 | double* Vss_tmp = new double[n_v_dof];
66 | // initial guess
67 | #pragma omp parallel for shared(Vss, Vss_tmp) num_threads(nt)
68 | for(int n=0; n > Lw_sp;
82 | vector Lw_val(Lw_sp.size(),0.0);
83 | vector Lw_col_ind(Lw_sp.size(), 0);
84 | vector Lw_row_ptr(1,0);
85 | // double* Fw = new double[n_w_dof];
86 | // set load vector
87 | // viscosity_load_vector(Fw, W, nx, ny, nz-1);
88 | // sparse viscosity matrix and bc modification
89 | viscosity_matrix_sparse( Lw_sp, Lw_val, Lw_col_ind, Lw_row_ptr,
90 | W, nx, ny, nz-1, hx, hy, hz,
91 | hx2i, hy2i, hz2i, dt, nu,
92 | bcs[2], Z_DIR );
93 | // now solve Lw\Fw
94 | // construct solution vector
95 | // double* Wss = new double[n_w_dof];
96 | double* Wss_tmp = new double[n_w_dof];
97 | // initial guess
98 | #pragma omp parallel for shared(Wss, Wss_tmp) num_threads(nt)
99 | for(int n=0; n >& L_sp,
122 | vector& val,
123 | vector& col_ind,
124 | vector& row_ptr,
125 | double* F,
126 | cuint nx, cuint ny, cuint nz,
127 | cdouble hx, cdouble hy, cdouble hz,
128 | cdouble hx2i, cdouble hy2i, cdouble hz2i,
129 | cdouble dt, cdouble nu,
130 | cdouble* u_bc,
131 | cuint dir // direction of flow: u, v, or w?
132 | )
133 | {
134 | // initialize sparse matrix (row#, col#, value)
135 | vector > > M;
136 | M.resize(nt);
137 |
138 | #pragma omp parallel shared(M) num_threads(nt)
139 | {
140 | cuint myrank = omp_get_thread_num();
141 |
142 | #pragma omp for
143 | for(int i=0; i=0)
163 | sparse_add(M[myrank], t_111, t_011, hx2i);
164 | else{ // x0
165 | if(dir==X_DIR)
166 | F[t_111] -= dt*nu*u_bc[0]/(hx*hx);
167 | else{
168 | F[t_111] -= dt*nu/(hx*hx)*u_bc[0]*2;
169 | sparse_add(M[myrank], t_111, t_111, -1*hx2i);
170 | }
171 | }
172 |
173 | sparse_add(M[myrank], t_111, t_111, -2*hx2i);
174 |
175 | if(i+1=0)
187 | sparse_add(M[myrank], t_111, t_101, hy2i);
188 | else{ // y0
189 | if(dir==Y_DIR)
190 | F[t_111] -= dt*nu/(hy*hy) * u_bc[2];
191 | else{
192 | F[t_111] -= dt*nu/(hy*hy)*u_bc[2]*2;
193 | sparse_add(M[myrank], t_111, t_111, -1*hy2i);
194 | }
195 |
196 | }
197 |
198 | sparse_add(M[myrank], t_111, t_111, -2*hy2i);
199 |
200 | if(j+1=0)
212 | sparse_add(M[myrank], t_111, t_110, hz2i);
213 | else{ // z0
214 | if(dir==Z_DIR)
215 | F[t_111] -= dt*nu/(hz*hz) * u_bc[4];
216 | else{
217 | F[t_111] -= dt*nu/(hz*hz)*u_bc[4]*2;
218 | sparse_add(M[myrank], t_111, t_111, -1*hz2i);
219 | }
220 | }
221 |
222 | sparse_add(M[myrank], t_111, t_111, -2*hz2i);
223 |
224 | if(k+1 > tmp;
246 | tmp.resize(M[0].size());
247 | mergesort(&M[0][0], nt, M[0].size(), &tmp[0] );
248 |
249 | // consolidate
250 | L_sp.push_back(M[0][0]);
251 | uint ct=0;
252 | for(int i =1; i(L_sp[ct])==get<0>(M[0][i]))
254 | && (get<1>(L_sp[ct])==get<1>(M[0][i])) ){
255 | get<2>(L_sp[ct]) += get<2>(M[0][i]);
256 | }
257 | else{
258 | L_sp.push_back(M[0][i]);
259 | ct++;
260 | }
261 | }
262 |
263 | // convert to CSR format
264 | // cout<<"converting to CSR format"<(L_sp[i]);
271 | col_ind[i] = get<1>(L_sp[i]);
272 | }
273 | for(int i=1; i(L_sp[i])!=get<0>(L_sp[i-1]))
275 | row_ptr.push_back(i);
276 | }
277 | row_ptr.push_back(L_sp.size());
278 |
279 | // cout<<"done"<(L_sp[i])<<" "<(L_sp[i])
290 | // <<" "<(L_sp[i])<& U,
27 | boost::multi_array& V,
28 | boost::multi_array& W,
29 | double* Uss, double* Vss, double* Wss,
30 | cuint nx, cuint ny, cuint nz,
31 | cdouble hx, cdouble hy, cdouble hz,
32 | cdouble hx2i, cdouble hy2i, cdouble hz2i,
33 | cdouble dt, cdouble nu,
34 | cdouble bcs[][6],
35 | cdouble tol, cuint max_iteration );
36 |
37 |
38 | // sparse viscosity matrix
39 | void viscosity_matrix_sparse( vector >& L_sp,
40 | vector& val,
41 | vector& col_ind,
42 | vector& row_ptr,
43 | double* F,
44 | cuint nx, cuint ny, cuint nz,
45 | cdouble hx, cdouble hy, cdouble hz,
46 | cdouble hx2i, cdouble hy2i, cdouble hz2i,
47 | cdouble dt, cdouble nu,
48 | cdouble* u_bc,
49 | cuint dir // direction of flow: u, v, or w?
50 | );
51 |
52 | // set load vector for implicit viscous solve
53 | void viscosity_load_vector( double* F, double* U,
54 | cuint nx, cuint ny, cuint nz);
55 |
56 | #endif //VISCOSITY_H
57 |
--------------------------------------------------------------------------------