├── IO.C ├── IO.h ├── Makefile ├── README.md ├── advection.C ├── advection.h ├── analysis.m ├── assemble.C ├── assemble.h ├── jacobi.C ├── jacobi.h ├── main.C ├── msort.C ├── msort.h ├── pressure.C ├── pressure.h ├── scaling.m ├── utils.C ├── utils.h ├── v_cycle.C ├── v_cycle.h ├── viscosity.C └── viscosity.h /IO.C: -------------------------------------------------------------------------------- 1 | #include "IO.h" 2 | 3 | // write out the sparse matrix 4 | int write_matrix(cuint P, 5 | cuint Q, 6 | double** U, 7 | char* file_name) 8 | { 9 | ofstream file_out; 10 | file_out.open (file_name); 11 | 12 | if(!file_out.is_open()){ 13 | return 1; 14 | } 15 | 16 | for(int p=0; p& U ) 433 | { 434 | boost::multi_array_types::size_type const* sizes = U.shape(); 435 | cuint nx = sizes[0]; 436 | cuint ny = sizes[1]; 437 | cuint nz = sizes[2]; 438 | 439 | double max_value = abs(U[0][0][0]); 440 | 441 | for(int i=0; i& U, 517 | boost::multi_array& Ud, 518 | cuint dir ) 519 | { 520 | boost::multi_array_types::size_type const* sizes = U.shape(); 521 | cuint nx = sizes[0]; 522 | cuint ny = sizes[1]; 523 | cuint nz = sizes[2]; 524 | 525 | // differece in x-direction 526 | if(dir==X_DIR){ 527 | for(int i=0; i& U2, 563 | boost::multi_array& U2_x, 564 | cdouble h, 565 | cuint dir ) 566 | { 567 | boost::multi_array_types::size_type const* sizes = U2.shape(); 568 | cuint nx = sizes[0]; 569 | cuint ny = sizes[1]; 570 | cuint nz = sizes[2]; 571 | 572 | // x-difference 573 | if( dir==X_DIR){ 574 | for(int i=0; i& U ); 37 | 38 | 39 | // get upwinding differences 40 | void upwind_difference( const boost::multi_array& U, 41 | boost::multi_array& Ud, 42 | cuint dir ); 43 | 44 | // get 1d staggered difference 45 | // Ua is an averaged U value at the cell vertices 46 | // get the difference value at the center of cell 47 | void staggered_first_difference( const double* UV, 48 | double* UV_x, 49 | cuint nx, cuint ny, cuint nz, 50 | cuint nx_x, cuint ny_x, cuint nz_x, 51 | cdouble h, 52 | cuint dir 53 | ); 54 | 55 | // get central first difference at center of element 56 | void central_first_difference( const boost::multi_array& U2, 57 | boost::multi_array& U2_x, 58 | cdouble h, 59 | cuint dir ); 60 | 61 | 62 | // get mixed edge values 63 | void calculate_edge_values( double* Ue, 64 | double* Ve, 65 | double* We, 66 | double* UV, 67 | double* UW, 68 | double* VW, 69 | cuint nx, cuint ny, cuint nz); 70 | 71 | // consolidate advection terms 72 | void consolidate_advection( double* U, 73 | double* V, 74 | double* W, 75 | double* U2_x, 76 | double* V2_y, 77 | double* W2_z, 78 | double* UV_y, 79 | double* UW_z, 80 | double* VU_x, 81 | double* VW_z, 82 | double* WU_x, 83 | double* WV_y, 84 | cuint nx, cuint ny, cuint nz, 85 | cdouble dt ); 86 | 87 | #endif //ASSEMBLE_H 88 | -------------------------------------------------------------------------------- /analysis.m: -------------------------------------------------------------------------------- 1 | clear all 2 | close all 3 | 4 | nt = 1; 5 | 6 | for i=0:nt-1 7 | U = load(sprintf('results_%i.dat',i)); 8 | figure(i+1); 9 | quiver3(U(:,1),U(:,2),U(:,3), U(:,5), U(:,6), U(:,7)); 10 | title(sprintf('velocity at i=%i', i)) 11 | % pause 12 | 13 | end 14 | 15 | % plot(U(:,1), U(:,4)); 16 | 17 | % 18 | % 19 | % for i=1:nt 20 | % U{i} = load(sprintf('results_%i.dat',i)); 21 | % legend_names{i} = sprintf('level %i', i); 22 | % end 23 | % U{max_level+1} = load('results_100.dat'); 24 | % legend_names{max_level+1} = 'final solution'; 25 | % legend_names{max_level+2} = 'exact solution'; 26 | % 27 | % hold on 28 | % col=hsv(max_level+3); 29 | % for i=1:max_level+1 30 | % h= plot( U{i}(1:end,1),U{i}(1:end,4), '-o'); 31 | % set(h, 'Color',col(i+1,:)); 32 | % end 33 | % 34 | % x=0:0.01:1; 35 | % plot(x, -(1/(2*pi))^2*sin(x*2*pi)); 36 | % 37 | % legend(legend_names,0); 38 | % 39 | % hold off 40 | % 41 | % % v=zeros(9,9,9); 42 | % 43 | % % for i=1:length(U) 44 | % % v(U(i,1)+1,U(i,2)+1,U(i,3)+1) = U(i,4); 45 | % % end 46 | % % 47 | % % x=U(:,1); 48 | % % y=U(:,2); 49 | % % z=U(:,3); 50 | % % % v=U(:,4); 51 | % % 52 | % % [x y z v] = flow; 53 | % % h=contourslice(x,y,z,v,[1:9],[],[0], linspace(-8,2,10)); 54 | % % axis([0 10 -3 3 -3 3]); daspect([1 1 1]) 55 | % % camva(24); camproj perspective; 56 | % % campos([-3 -15 5]) 57 | % % set(gcf, 'Color', [.3 .3 .3], 'renderer', 'zbuffer') 58 | % % set(gca, 'Color', 'black' , 'XColor', 'white', ... 59 | % % 'YColor', 'white' , 'ZColor', 'white') 60 | % % box on -------------------------------------------------------------------------------- /assemble.C: -------------------------------------------------------------------------------- 1 | #include "assemble.h" 2 | #include "utils.h" 3 | #include "msort.h" 4 | 5 | // 2nd order stencil 6 | void fd_matrix( double** M, 7 | cuint I, cuint J, cuint K, 8 | const double dx2i, 9 | const double dy2i, 10 | const double dz2i, 11 | cuint n_dof 12 | ) 13 | { 14 | #pragma omp parallel for shared(M) num_threads(nt) 15 | for(int i=0; i >& M_sp, 77 | vector& val, 78 | vector& col_ind, 79 | vector& row_ptr, 80 | cuint I, cuint J, cuint K, 81 | const double dx2i, 82 | const double dy2i, 83 | const double dz2i, 84 | cuint n_dof 85 | ) 86 | { 87 | 88 | // initialize sparse matrix (row#, col#, value) 89 | vector > > M; 90 | M.resize(nt); 91 | 92 | 93 | #pragma omp parallel shared(M) num_threads(nt) 94 | { 95 | cuint myrank = omp_get_thread_num(); 96 | 97 | #pragma omp for 98 | for(int i=0; i(M[myrank][n_dof-1])); 164 | // M[n_dof-1][n_dof-1] = n_dof; 165 | 166 | // sort and consolidate sparse matrix (row#, col#, value) 167 | // cout<<"sorting..."< >M_sp; 171 | // M_sp[myrank].push_back(M[0]); 172 | uint ct=0; 173 | 174 | // #pragma omp critical 175 | // { 176 | // cout<<"thread #: "<(M_sp[myrank][i])<<" j: " 179 | // <(M_sp[myrank][i])<<" v: " 180 | // <(M_sp[myrank][i])< > tmp; 192 | tmp.resize(M[0].size()); 193 | mergesort(&M[0][0], nt, M[0].size(), &tmp[0] ); 194 | 195 | 196 | cout<<"done"<(M_sp[ct])==get<0>(M[0][i])) 203 | && (get<1>(M_sp[ct])==get<1>(M[0][i])) ){ 204 | // get<0>(M_sp[ct]) += get<0>(M[0][i]); 205 | // get<1>(M_sp[ct]) += get<1>(M[0][i]); 206 | get<2>(M_sp[ct]) += get<2>(M[0][i]); 207 | } 208 | else{ 209 | M_sp.push_back(M[0][i]); 210 | ct++; 211 | } 212 | 213 | } 214 | 215 | 216 | // convert to CSR format 217 | cout<<"converting to CSR format"<(M_sp[i]); 224 | col_ind[i] = get<1>(M_sp[i]); 225 | } 226 | for(int i=1; i(M_sp[i])!=get<0>(M_sp[i-1])) 228 | row_ptr.push_back(i); 229 | } 230 | row_ptr.push_back(M_sp.size()); 231 | 232 | // for(int i=0; i(M_sp[i])<<" "<(M_sp[i]) 240 | <<" "<(M_sp[i])< >& M, 316 | cuint i, cuint j, cdouble v) 317 | { 318 | tuple M_tmp(i, j, v); 319 | M.push_back(M_tmp); 320 | 321 | // vector idx_tmp(2, 0.0); 322 | // idx_tmp[0] = i; idx_tmp[1]=j; 323 | // idx.push_back(idx_tmp); 324 | // value.push_back(v); 325 | 326 | } 327 | 328 | // insert index and value into a sparse matrix 329 | // note that M should be sorted before use 330 | void sparse_insert( vector >& M, 331 | cuint i, cuint j, cdouble v) 332 | { 333 | // replace the value 334 | for(int mn=0; mn(M[mn])==i && get<1>(M[mn])==j){ 336 | get<2>(M[mn]) = v; 337 | return; 338 | } 339 | } 340 | 341 | // if the value does not exist, add the value 342 | sparse_add(M, i,j, v); 343 | 344 | } 345 | 346 | 347 | // merge two sorted arrays 348 | void merge(vector >& left, 349 | vector >& right, 350 | cuint n_left, cuint n_right, 351 | vector >& result, 352 | vector >& tmp 353 | ) 354 | { 355 | uint it = 0; 356 | uint left_it = 0, right_it = 0; 357 | 358 | while(left_it < n_left && right_it < n_right ) { 359 | it = left_it+right_it; 360 | // cout< >& M_sp, 19 | vector& val, 20 | vector& col_ind, 21 | vector& row_ptr, 22 | cuint I, cuint J, cuint K, 23 | const double dx2i, 24 | const double dy2i, 25 | const double dz2i, 26 | cuint n_dof 27 | ); 28 | 29 | void load_vector( double* F, 30 | cuint n_dof, 31 | cuint I, 32 | cuint J, 33 | cuint K 34 | ); 35 | 36 | int boundary_conditins( cuint n_dof, 37 | cuint I, 38 | cuint J, 39 | cuint K, 40 | double** M, 41 | double* F 42 | ); 43 | 44 | // add index and value into a sparse matrix 45 | void sparse_add( vector >& M, 46 | cuint i, cuint j, cdouble v); 47 | 48 | // merge two sorted arrays 49 | void merge(vector >& left, 50 | vector >& right, 51 | cuint n_left, cuint n_right, 52 | vector >& result, 53 | vector >& tmp 54 | ); 55 | 56 | 57 | #endif //ASSEMBLE_H 58 | -------------------------------------------------------------------------------- /jacobi.C: -------------------------------------------------------------------------------- 1 | #include "jacobi.h" 2 | #include "assemble.h" 3 | #include "IO.h" 4 | 5 | // jacobi method 6 | void jacobi( cdouble tol, 7 | cuint max_iteration, 8 | cuint n_dof, 9 | double* u_new, 10 | double* u_old, 11 | double** M, 12 | double* F, 13 | double& Er, 14 | double* R) 15 | { 16 | // iteration counter 17 | int ct = 0; 18 | cdouble tol2 = tol*tol; 19 | 20 | while(Er>tol2 && ct& val, 61 | const vector& col_ind, 62 | const vector& row_ptr, 63 | double* F, 64 | double& Er, 65 | double* R) 66 | { 67 | // iteration counter 68 | int ct = 0; 69 | cdouble tol2 = tol*tol; 70 | double E=tol2*100; 71 | 72 | #pragma omp parallel shared(F, U_tmp, U, R, val,col_ind, row_ptr, ct) num_threads(nt) 73 | { 74 | while(E>tol2 && ct& val, 158 | const vector& col_ind, 159 | const vector& row_ptr, 160 | double* U, 161 | double* F, 162 | double* R, 163 | cuint n_dof) 164 | { 165 | double E=0; 166 | #pragma omp parallel for shared(R,val,col_ind,row_ptr,U,F) num_threads(nt) reduction(+:E) 167 | for(int i=0; i& val, 187 | // const vector& col_ind, 188 | // const vector& row_ptr, 189 | // double* F, 190 | // double& Er, 191 | // double* R) 192 | // { 193 | // // iteration counter 194 | // int ct = 0; 195 | // cdouble tol2 = tol*tol; 196 | 197 | // while(Er>tol2 && ct& val, 26 | const vector& col_ind, 27 | const vector& row_ptr, 28 | double* F, 29 | double& Er, 30 | double* R); 31 | 32 | double convergence_check ( double** M, 33 | double* U, 34 | double* F, 35 | double* R, 36 | cuint n_dof 37 | ); 38 | 39 | double convergence_check_sparse ( const vector& val, 40 | const vector& col_ind, 41 | const vector& row_ptr, 42 | double* U, 43 | double* F, 44 | double* R, 45 | cuint n_dof); 46 | 47 | #endif //JACOBI_H 48 | 49 | -------------------------------------------------------------------------------- /main.C: -------------------------------------------------------------------------------- 1 | #include "jacobi.h" 2 | #include "utils.h" 3 | #include "IO.h" 4 | #include "v_cycle.h" 5 | #include "advection.h" 6 | #include "viscosity.h" 7 | #include "pressure.h" 8 | 9 | // number of threads 10 | uint nt; 11 | 12 | // set up initial conditions 13 | void initial_conditions( double* U, 14 | double* V, 15 | double* W, 16 | double* P, 17 | cuint nx, cuint ny, cuint nz ) 18 | { 19 | for(int i=0; i<(nx-1)*(ny)*(nz); i++) 20 | U[i]=0.0; 21 | 22 | for(int i=0; i<(nx)*(ny-1)*(nz); i++) 23 | V[i]=0.0; 24 | 25 | for(int i=0; i<(nx)*(ny)*(nz-1); i++) 26 | W[i]=0.0; 27 | 28 | for(int i=0; i<(nx)*(ny)*(nz); i++) 29 | P[i] = 0.0; 30 | 31 | return; 32 | } 33 | 34 | // main function! 35 | int main( int argc, char** argv ) 36 | { 37 | // initialize constants 38 | cdouble nu = 100; // kinetic viscosity (mu/rho) 39 | double dt = 0.1; //time step 40 | cdouble tf = 0.1; // final time 41 | 42 | // domain size 43 | cdouble lx = 1.0; 44 | cdouble ly = 1.0; 45 | cdouble lz = 1.0; 46 | 47 | // domain cornders 48 | cdouble xmin = 0.0; 49 | cdouble ymin = 0.0; 50 | cdouble zmin = 0.0; 51 | cdouble xmax = xmin+lx; 52 | cdouble ymax = ymin+ly; 53 | cdouble zmax = zmin+lz; 54 | 55 | // number of gridpointts in each dimension 56 | nt=1; 57 | uint nx=10; 58 | uint ny=10; 59 | uint nz=10; // problem size (n_dof=n_size^3) 60 | uint max_level=0; // maximum v-cycle level 61 | if(argc>5){ 62 | nt = atoi(argv[1]); 63 | max_level = atoi(argv[2]); 64 | nx = atoi(argv[3]); 65 | ny = atoi(argv[4]); 66 | nz = atoi(argv[5]); 67 | } 68 | else{ 69 | cout<<"multigrid [# of threads] [max level] [I_size] [J_size] [K_size]"<& i, 5 | const tuple& j ) { 6 | if( (get<0>(i)) < (get<0>(j)) ) return true; 7 | else if( get<0>(i) == get<0>(j)) return (get<1>(i)) < (get<1>(j)); 8 | else return false; 9 | } 10 | 11 | // merge two sorted arrays 12 | void merge(tuple * left, 13 | tuple * right, 14 | const int n_left, const int n_right, 15 | tuple * result, 16 | tuple * tmp ) 17 | { 18 | unsigned int it = 0; 19 | unsigned int left_it = 0, right_it = 0; 20 | // cout<<"n_left "<* vec, 56 | const int threads, 57 | const int n, 58 | tuple * tmp 59 | ) 60 | { 61 | // Termination condition: List is completely sorted if it 62 | // only contains a single element. 63 | if(n == 1){ 64 | return; 65 | } 66 | 67 | // Determine the location of the middle element in the vector 68 | tuple * left = vec; // left array pointer 69 | int n_left = n/2; // number of elements in left array 70 | tuple * tmp_left = tmp; // left tmp array pointer 71 | 72 | tuple * right = left+n/2; // right array pointer 73 | int n_right = n-n/2; // number of elements in right array 74 | tuple * tmp_right = tmp_left+n/2; // right tmp array pointer 75 | 76 | // Perform a merge sort on the two smaller vectors 77 | if (threads > 1) { 78 | 79 | #pragma omp parallel sections 80 | { 81 | #pragma omp section 82 | { 83 | mergesort(left, threads/2, n_left, tmp_left); 84 | } 85 | #pragma omp section 86 | { 87 | mergesort(right, threads - threads/2, n_right, tmp_right); 88 | } 89 | } 90 | } 91 | else { 92 | mergesort(left, 1, n_left, tmp_left); 93 | mergesort(right, 1, n_right, tmp_right); 94 | } 95 | 96 | merge(left, right, n_left, n_right, left, tmp ); 97 | 98 | return; 99 | } 100 | -------------------------------------------------------------------------------- /msort.h: -------------------------------------------------------------------------------- 1 | // openmp merge sort 2 | #ifndef MSORT_H 3 | #define MSORT_H 4 | 5 | #include "utils.h" 6 | 7 | using namespace std; 8 | 9 | // comparison function for sorting pairs 10 | int comp_tuples( const tuple& i, 11 | const tuple& j ); 12 | 13 | // merge two sorted arrays 14 | void merge(tuple * left, 15 | tuple * right, 16 | const int n_left, const int n_right, 17 | tuple * result, 18 | tuple * tmp ); 19 | 20 | // mergesort with OpenMP parallelism 21 | void mergesort(tuple * vec, 22 | const int threads, 23 | const int n, 24 | tuple * tmp 25 | ); 26 | 27 | 28 | #endif // MSORT_H 29 | -------------------------------------------------------------------------------- /pressure.C: -------------------------------------------------------------------------------- 1 | #include "pressure.h" 2 | #include "v_cycle.h" 3 | 4 | // compute pressure correction 5 | void pressure( double* U, double* V, double* W, double* P, 6 | double* Uss, double* Vss, double* Wss, 7 | cuint nx, cuint ny, cuint nz, 8 | cdouble bcs[][6], 9 | cdouble lx, cdouble ly, cdouble lz, 10 | cdouble hx, cdouble hy, cdouble hz, 11 | cdouble hx2i, cdouble hy2i, cdouble hz2i, 12 | cdouble tol, cuint max_iteration, 13 | cuint pre_smooth_iteration, cuint max_level, 14 | cdouble dt) 15 | { 16 | cuint n_dof = nx*ny*nz; 17 | 18 | // residual and error 19 | double* Rp = new double[n_dof]; 20 | double Er = tol*10; 21 | 22 | // 0-level v_cycle 23 | if(max_level==0) 24 | v_cycle_0( P, Rp, 25 | n_dof, nx, ny, nz, 26 | hx, hy, hz, 27 | hx2i, hy2i, hz2i, 28 | tol, max_iteration, pre_smooth_iteration, 29 | hx, hy, hz, 30 | 0, max_level, 31 | Er, 32 | Uss, Vss, Wss, 33 | bcs,dt ); 34 | else 35 | // v-cycle 36 | v_cycle( P, n_dof, nx, ny, nz, 37 | hx, hy, hz, 38 | hx2i, hy2i, hz2i, 39 | tol, max_iteration, pre_smooth_iteration, 40 | lx, ly, lz, 41 | 0, max_level-1, 42 | Rp, 43 | Er, 44 | Uss, Vss, Wss, 45 | bcs, dt 46 | ); 47 | 48 | 49 | // compute pressure corrections 50 | double* Pr_x = new double[(nx-1)*(ny)*(nz)]; 51 | double* Pr_y = new double[(nx)*(ny-1)*(nz)]; 52 | double* Pr_z = new double[(nx)*(ny)*(nz-1)]; 53 | 54 | for(int i=0; i<(nx-1)*(ny)*(nz); i++) 55 | Pr_x[i]=0.0; 56 | 57 | compute_corrections( P, Pr_x, Pr_y, Pr_z, nx, ny, nz, hy, hy, hz ); 58 | 59 | // 1d index 60 | uint t; 61 | 62 | // correct velocities 63 | // x-direction 64 | #pragma omp parallel for private(t) shared(U, Uss, Pr_x) num_threads(nt) 65 | for(int i=0; i >& Lp_sp, 210 | vector& val, 211 | vector& col_ind, 212 | vector& row_ptr, 213 | cuint nx, cuint ny, cuint nz, 214 | const double hx2i, 215 | const double hy2i, 216 | const double hz2i, 217 | cuint n_dof 218 | ) 219 | { 220 | // initialize sparse matrix (row#, col#, value) 221 | vector > > M; 222 | M.resize(nt); 223 | 224 | #pragma omp parallel shared(M) num_threads(nt) 225 | { 226 | cuint myrank = omp_get_thread_num(); 227 | 228 | // loop through inner nodes 229 | #pragma omp for 230 | for(int i=0; i=0) 246 | sparse_add(M[myrank], t_111, t_011, hx2i); 247 | else // x0: i==0 (P[-1][j][k]==P[0][j][k]) 248 | sparse_add(M[myrank], t_111, t_111, hx2i); 249 | 250 | sparse_add(M[myrank], t_111, t_111, -2*hx2i); 251 | 252 | if(i+1=0) 259 | sparse_add(M[myrank], t_111, t_101, hy2i); 260 | else // y0: j==0 (P[i][-1][k]==P[i][0][k]) 261 | sparse_add(M[myrank], t_111, t_111, hy2i); 262 | 263 | sparse_add(M[myrank], t_111, t_111, -2*hy2i); 264 | 265 | if(j+1=0) 272 | sparse_add(M[myrank], t_111, t_110, hz2i); 273 | else // z0: k==0 (P[i][j][-1]==P[i][j][0]) 274 | sparse_add(M[myrank], t_111, t_111, hz2i); 275 | 276 | sparse_add(M[myrank], t_111, t_111, -2*hz2i); 277 | 278 | if(k+1 > tmp; 304 | tmp.resize(M[0].size()); 305 | mergesort(&M[0][0], nt, M[0].size(), &tmp[0] ); 306 | 307 | // consolidate 308 | Lp_sp.push_back(M[0][0]); 309 | uint ct=0; 310 | for(int i =1; i(Lp_sp[ct])==get<0>(M[0][i])) 312 | && (get<1>(Lp_sp[ct])==get<1>(M[0][i])) ){ 313 | get<2>(Lp_sp[ct]) += get<2>(M[0][i]); 314 | } 315 | else{ 316 | Lp_sp.push_back(M[0][i]); 317 | ct++; 318 | } 319 | } 320 | 321 | // point constraint to close the system 322 | get<2>(Lp_sp[0]) = 5*get<2>(Lp_sp[0]); 323 | 324 | // convert to CSR format 325 | val.resize(Lp_sp.size(),0.0); 326 | col_ind.resize(Lp_sp.size(), 0); 327 | 328 | #pragma omp parallel for shared(val, col_ind, Lp_sp) num_threads(nt) 329 | for(int i=0; i(Lp_sp[i]); 331 | col_ind[i] = get<1>(Lp_sp[i]); 332 | } 333 | for(int i=1; i(Lp_sp[i])!=get<0>(Lp_sp[i-1])) 335 | row_ptr.push_back(i); 336 | } 337 | row_ptr.push_back(Lp_sp.size()); 338 | 339 | // output to file for testing purpose 340 | ofstream file_out("Lp_matrix.dat"); 341 | for(int i=0; i(Lp_sp[i])<<" "<(Lp_sp[i]) 343 | <<" "<(Lp_sp[i])< >& Lp_sp, 35 | vector& val, 36 | vector& col_ind, 37 | vector& row_ptr, 38 | cuint nx, cuint ny, cuint nz, 39 | const double hx2i, 40 | const double hy2i, 41 | const double hz2i, 42 | cuint n_dof 43 | ); 44 | 45 | // compute corrections from pressure value 46 | void compute_corrections( double* Pr, 47 | double* Pr_x, 48 | double* Pr_y, 49 | double* Pr_z, 50 | cuint nx, cuint ny, cuint nz, 51 | cdouble hx, cdouble hy, cdouble hz ); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /scaling.m: -------------------------------------------------------------------------------- 1 | clear all 2 | close all 3 | 4 | % strong scaling 5 | % 32^3 6 | Ta = [0.0112691 0.00559711 0.00452089 0.00302196 0.00250888]; 7 | Tv = [0.0432389 0.041455 0.0408649 0.0634542 0.116877]; 8 | Tp = [3.77661 4.3534 4.69743 7.65514 13.1779]; 9 | Tt = [4.1107 4.66746 5.0014 7.97422 13.5518]; 10 | Nt = [16 8 4 2 1]; 11 | 12 | loglog(Nt, Ta, '-o', Nt, Tv, '-x', Nt, Tp, '-+', Nt, Tt, '-s'); 13 | xlim([0 16]); 14 | set(gca,'XTick',[1 2 4 8 16]); 15 | title('Strong Scaling'); 16 | legend('advection','viscosity','pressure','total'); 17 | xlabel('# of nodes'); 18 | ylabel('time [s]'); 19 | 20 | Ep = Tt(end)./(Tt.*Nt); 21 | semilogx(Nt, Ep, '-o'); 22 | xlim([0 16]); 23 | set(gca,'XTick',[1 2 4 8 16]); 24 | title('Efficiency'); 25 | xlabel('# of nodes'); 26 | ylabel('efficiency'); 27 | 28 | % weak scaling 29 | sqrt((32^3)^2/16*8); % 24*24*40 30 | sqrt((32^3)^2/16*4); % 32*32*16 31 | sqrt((32^3)^2/16*2); % 24*24*20 32 | sqrt((32^3)^2/16*1); % 32*16*16 33 | 34 | % Tw_weak = [96.2477 77.2512 70.8685 58.825 65.499]; 35 | Tw_weak = [94.9259 47.8498 19.2619 7.54821 3.63176]; 36 | 37 | semilogx(Nt, Tw_weak, '-o'); 38 | xlim([0 16]); 39 | xlabel('# of nodes'); 40 | ylabel('total time [s]'); 41 | set(gca,'XTick',[1 2 4 8 16]); 42 | title('Weak Scaling'); 43 | -------------------------------------------------------------------------------- /utils.C: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | // comparison function for sorting pairs 4 | int comp_pairs( const tuple& i, 5 | const tuple& j ) { 6 | if( (get<0>(i)) < (get<0>(j)) ) return true; 7 | else if( get<0>(i) == get<0>(j)) return (get<1>(i)) < (get<1>(j)); 8 | else return false; 9 | } 10 | 11 | void three_d_to_one_d( const unsigned int i, 12 | const unsigned int j, 13 | const unsigned int k, 14 | const unsigned int I, 15 | const unsigned int J, 16 | unsigned int& t ) 17 | { 18 | t=i + j*I + k*I*J; 19 | } 20 | 21 | void one_d_to_three_d( const unsigned int t, 22 | const unsigned int I, 23 | const unsigned int J, 24 | unsigned int& i, 25 | unsigned int& j, 26 | unsigned int& k) 27 | { 28 | k = t/(I*J); 29 | j = (t-k*I*J)/I; 30 | i = t-j*I - k*I*J; 31 | } 32 | 33 | // get the neighboring node numbers (periodic domain) 34 | // watch out for the negative unsigned int!! 35 | void get_neighbor( uint t[][3][3], 36 | cuint i, cuint j, cuint k, 37 | cuint I, cuint J, cuint K ) 38 | { 39 | for(int p=0; p<3; p++){ 40 | for(int q=0; q<3; q++){ 41 | for(int r=0; r<3; r++){ 42 | int nei_i, nei_j, nei_k; 43 | if(i+p>=I+1) nei_i = i+p-1-I; 44 | else if(i+p<1) nei_i = i+p+I-1; 45 | else nei_i = i+p-1; 46 | 47 | if(j+q>=J+1) nei_j = j+q-1-J; 48 | else if(j+q<1) nei_j = j+q+J-1; 49 | else nei_j = j+q-1; 50 | 51 | if(k+r>=K+1) nei_k = k+r-1-K; 52 | else if(k+r<1) nei_k = k+r+K-1; 53 | else nei_k = k+r-1; 54 | // cout<=I) nei_i = i+p-I; 76 | else nei_i = i+p; 77 | if(j+q>=J) nei_j = j+q-J; 78 | else nei_j = j+q; 79 | if(k+r>=K) nei_k = k+r-K; 80 | else nei_k = k+r; 81 | 82 | three_d_to_one_d(nei_i,nei_j,nei_k, I,J, t[p][q][r]); 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | // utility functions 2 | #ifndef UTILS_H 3 | #define UTILS_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "boost/multi_array.hpp" 16 | 17 | using namespace std; 18 | 19 | typedef const unsigned int cuint; 20 | typedef unsigned int uint; 21 | typedef const double cdouble; 22 | typedef const int cint; 23 | 24 | const double pi=3.14159265359; 25 | extern unsigned int nt; 26 | 27 | cuint X_DIR=0; 28 | cuint Y_DIR=1; 29 | cuint Z_DIR=2; 30 | cuint XY_DIR=3; 31 | cuint XZ_DIR=4; 32 | cuint YZ_DIR=5; 33 | cuint X2_DIR=6; 34 | cuint Y2_DIR=7; 35 | cuint Z2_DIR=8; 36 | 37 | 38 | // comparison function for sorting pairs 39 | int comp_pairs( const tuple& i, 40 | const tuple& j ); 41 | 42 | void three_d_to_one_d( const unsigned int i, 43 | const unsigned int j, 44 | const unsigned int k, 45 | const unsigned int I, 46 | const unsigned int J, 47 | unsigned int& t ); 48 | 49 | void one_d_to_three_d( const unsigned int t, 50 | const unsigned int I, 51 | const unsigned int J, 52 | unsigned int& i, 53 | unsigned int& j, 54 | unsigned int& k); 55 | 56 | void get_neighbor( unsigned int t[][3][3], 57 | cuint i, cuint j, cuint k, 58 | cuint I, cuint J, cuint K ); 59 | 60 | // get node numbers in a box 61 | void get_box( uint t[][2][2], 62 | cuint i, cuint j, cuint k, 63 | cuint I, cuint J, cuint K ); 64 | 65 | 66 | #endif //UTILS_H 67 | -------------------------------------------------------------------------------- /v_cycle.C: -------------------------------------------------------------------------------- 1 | #include "v_cycle.h" 2 | #include "pressure.h" 3 | 4 | // multigrid v-cycle 5 | void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz, 6 | cdouble hx, cdouble hy, cdouble hz, 7 | cdouble hx2i, cdouble hy2i, cdouble hz2i, 8 | cdouble tol, cuint max_iteration, cuint pre_smooth_iteration, 9 | cdouble lx, cdouble ly, cdouble lz, 10 | cuint level, cuint max_level, 11 | double* F, 12 | double& Er, 13 | double* Uss, double* Vss, double* Wss, 14 | cdouble bcs[][6], 15 | cdouble dt 16 | ) 17 | { 18 | cout<<"level: "< > M_sp; 32 | vector val; 33 | vector col_ind; 34 | vector row_ptr(1,0); 35 | 36 | // create finite difference matrix 37 | cout<<"create finite difference matrix"< > M_sp_coar; 123 | vector val_coar; 124 | vector col_ind_coar; 125 | vector row_ptr_coar(1,0); 126 | 127 | // create finite difference matrix 128 | cout<<"create finite difference matrix"< > Lp_sp; 344 | vector Lp_val(Lp_sp.size(),0.0); 345 | vector Lp_col_ind(Lp_sp.size(), 0); 346 | vector Lp_row_ptr(1,0); 347 | 348 | // build right hand side of pressure poisson equation 349 | pressure_rhs(Fp, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt); 350 | 351 | // build pressure matrix 352 | pressure_matrix( Lp_sp, 353 | Lp_val, Lp_col_ind, Lp_row_ptr, 354 | nx, ny, nz, 355 | hx2i, hy2i, hz2i, 356 | n_dof 357 | ); 358 | 359 | // solve dicrete poisson equation: Lp\Fp 360 | // construct solution vector 361 | double* P_tmp = new double[n_dof]; 362 | // initial guess 363 | #pragma omp parallel for shared(P, P_tmp) num_threads(nt) 364 | for(int n=0; n > Lu_sp; 21 | vector Lu_val(Lu_sp.size(),0.0); 22 | vector Lu_col_ind(Lu_sp.size(), 0); 23 | vector Lu_row_ptr(1,0); 24 | // double* Fu = new double[n_u_dof]; 25 | // set load vector 26 | // viscosity_load_vector(Fu, U, nx-1, ny, nz); 27 | // sparse viscosity matrix and bc modification 28 | viscosity_matrix_sparse( Lu_sp, Lu_val, Lu_col_ind, Lu_row_ptr, 29 | U, nx-1, ny, nz, hx, hy, hz, 30 | hx2i, hy2i, hz2i, dt, nu, 31 | bcs[0], X_DIR ); 32 | // now solve Lu\Fu 33 | // construct solution vector 34 | double* Uss_tmp = new double[n_u_dof]; 35 | // initial guess 36 | #pragma omp parallel for shared(Uss, Uss_tmp) num_threads(nt) 37 | for(int n=0; n > Lv_sp; 51 | vector Lv_val(Lv_sp.size(),0.0); 52 | vector Lv_col_ind(Lv_sp.size(), 0); 53 | vector Lv_row_ptr(1,0); 54 | // double* Fv = new double[n_v_dof]; 55 | // set load vector 56 | // viscosity_load_vector(Fv, V, nx, ny-1, nz); 57 | // sparse viscosity matrix and bc modification 58 | viscosity_matrix_sparse( Lv_sp, Lv_val, Lv_col_ind, Lv_row_ptr, 59 | V, nx, ny-1, nz, hx, hy, hz, 60 | hx2i, hy2i, hz2i, dt, nu, 61 | bcs[1], Y_DIR ); 62 | // now solve Lv\Fv 63 | // construct solution vector 64 | // double* Vss = new double[n_v_dof]; 65 | double* Vss_tmp = new double[n_v_dof]; 66 | // initial guess 67 | #pragma omp parallel for shared(Vss, Vss_tmp) num_threads(nt) 68 | for(int n=0; n > Lw_sp; 82 | vector Lw_val(Lw_sp.size(),0.0); 83 | vector Lw_col_ind(Lw_sp.size(), 0); 84 | vector Lw_row_ptr(1,0); 85 | // double* Fw = new double[n_w_dof]; 86 | // set load vector 87 | // viscosity_load_vector(Fw, W, nx, ny, nz-1); 88 | // sparse viscosity matrix and bc modification 89 | viscosity_matrix_sparse( Lw_sp, Lw_val, Lw_col_ind, Lw_row_ptr, 90 | W, nx, ny, nz-1, hx, hy, hz, 91 | hx2i, hy2i, hz2i, dt, nu, 92 | bcs[2], Z_DIR ); 93 | // now solve Lw\Fw 94 | // construct solution vector 95 | // double* Wss = new double[n_w_dof]; 96 | double* Wss_tmp = new double[n_w_dof]; 97 | // initial guess 98 | #pragma omp parallel for shared(Wss, Wss_tmp) num_threads(nt) 99 | for(int n=0; n >& L_sp, 122 | vector& val, 123 | vector& col_ind, 124 | vector& row_ptr, 125 | double* F, 126 | cuint nx, cuint ny, cuint nz, 127 | cdouble hx, cdouble hy, cdouble hz, 128 | cdouble hx2i, cdouble hy2i, cdouble hz2i, 129 | cdouble dt, cdouble nu, 130 | cdouble* u_bc, 131 | cuint dir // direction of flow: u, v, or w? 132 | ) 133 | { 134 | // initialize sparse matrix (row#, col#, value) 135 | vector > > M; 136 | M.resize(nt); 137 | 138 | #pragma omp parallel shared(M) num_threads(nt) 139 | { 140 | cuint myrank = omp_get_thread_num(); 141 | 142 | #pragma omp for 143 | for(int i=0; i=0) 163 | sparse_add(M[myrank], t_111, t_011, hx2i); 164 | else{ // x0 165 | if(dir==X_DIR) 166 | F[t_111] -= dt*nu*u_bc[0]/(hx*hx); 167 | else{ 168 | F[t_111] -= dt*nu/(hx*hx)*u_bc[0]*2; 169 | sparse_add(M[myrank], t_111, t_111, -1*hx2i); 170 | } 171 | } 172 | 173 | sparse_add(M[myrank], t_111, t_111, -2*hx2i); 174 | 175 | if(i+1=0) 187 | sparse_add(M[myrank], t_111, t_101, hy2i); 188 | else{ // y0 189 | if(dir==Y_DIR) 190 | F[t_111] -= dt*nu/(hy*hy) * u_bc[2]; 191 | else{ 192 | F[t_111] -= dt*nu/(hy*hy)*u_bc[2]*2; 193 | sparse_add(M[myrank], t_111, t_111, -1*hy2i); 194 | } 195 | 196 | } 197 | 198 | sparse_add(M[myrank], t_111, t_111, -2*hy2i); 199 | 200 | if(j+1=0) 212 | sparse_add(M[myrank], t_111, t_110, hz2i); 213 | else{ // z0 214 | if(dir==Z_DIR) 215 | F[t_111] -= dt*nu/(hz*hz) * u_bc[4]; 216 | else{ 217 | F[t_111] -= dt*nu/(hz*hz)*u_bc[4]*2; 218 | sparse_add(M[myrank], t_111, t_111, -1*hz2i); 219 | } 220 | } 221 | 222 | sparse_add(M[myrank], t_111, t_111, -2*hz2i); 223 | 224 | if(k+1 > tmp; 246 | tmp.resize(M[0].size()); 247 | mergesort(&M[0][0], nt, M[0].size(), &tmp[0] ); 248 | 249 | // consolidate 250 | L_sp.push_back(M[0][0]); 251 | uint ct=0; 252 | for(int i =1; i(L_sp[ct])==get<0>(M[0][i])) 254 | && (get<1>(L_sp[ct])==get<1>(M[0][i])) ){ 255 | get<2>(L_sp[ct]) += get<2>(M[0][i]); 256 | } 257 | else{ 258 | L_sp.push_back(M[0][i]); 259 | ct++; 260 | } 261 | } 262 | 263 | // convert to CSR format 264 | // cout<<"converting to CSR format"<(L_sp[i]); 271 | col_ind[i] = get<1>(L_sp[i]); 272 | } 273 | for(int i=1; i(L_sp[i])!=get<0>(L_sp[i-1])) 275 | row_ptr.push_back(i); 276 | } 277 | row_ptr.push_back(L_sp.size()); 278 | 279 | // cout<<"done"<(L_sp[i])<<" "<(L_sp[i]) 290 | // <<" "<(L_sp[i])<& U, 27 | boost::multi_array& V, 28 | boost::multi_array& W, 29 | double* Uss, double* Vss, double* Wss, 30 | cuint nx, cuint ny, cuint nz, 31 | cdouble hx, cdouble hy, cdouble hz, 32 | cdouble hx2i, cdouble hy2i, cdouble hz2i, 33 | cdouble dt, cdouble nu, 34 | cdouble bcs[][6], 35 | cdouble tol, cuint max_iteration ); 36 | 37 | 38 | // sparse viscosity matrix 39 | void viscosity_matrix_sparse( vector >& L_sp, 40 | vector& val, 41 | vector& col_ind, 42 | vector& row_ptr, 43 | double* F, 44 | cuint nx, cuint ny, cuint nz, 45 | cdouble hx, cdouble hy, cdouble hz, 46 | cdouble hx2i, cdouble hy2i, cdouble hz2i, 47 | cdouble dt, cdouble nu, 48 | cdouble* u_bc, 49 | cuint dir // direction of flow: u, v, or w? 50 | ); 51 | 52 | // set load vector for implicit viscous solve 53 | void viscosity_load_vector( double* F, double* U, 54 | cuint nx, cuint ny, cuint nz); 55 | 56 | #endif //VISCOSITY_H 57 | --------------------------------------------------------------------------------