├── .gitignore ├── PP_01 ├── 1.c ├── 2.c ├── 3.c ├── 4.c ├── 5.c ├── Makefile ├── ex-1-2020.pdf ├── run.sh └── util.h ├── PP_02 ├── 1.c ├── 2.c ├── 3.c ├── 4_1.c ├── 4_2.c ├── 5.c ├── 6.c ├── 7.c ├── 7_2.c ├── Makefile ├── config ├── mpi-lab.pdf ├── run.sh └── test.sh ├── README.md ├── example ├── mini-omp-demo │ ├── badloop.c │ ├── cri.c │ ├── critical.c │ ├── dis-err.c │ ├── dis-ok.c │ ├── dis-ok1.c │ ├── loopA1.c │ ├── loopA2.c │ ├── master.c │ ├── pfor-no-schedule.c │ ├── pfor.c │ ├── pi01.c │ ├── pi02.c │ ├── pi03.c │ ├── pi04.c │ ├── pip.c │ ├── pipe.c │ ├── private.c │ ├── reduction.c │ ├── section.c │ ├── single.c │ └── threadprivate.c ├── mpi │ ├── bigdata_sort.c │ ├── matmul.c │ ├── matmul1.c │ ├── max.c │ ├── mul.c │ ├── pi.c │ ├── quicksort.c │ ├── rece.c │ ├── sort.c │ └── sort_.c ├── mpi_demo │ ├── allgather │ ├── allgather.c │ ├── allgatherv │ ├── allgatherv.c │ ├── allreduce-int-sum │ ├── allreduce-int-sum.c │ ├── allreduce-vector-sum.c │ ├── alltoall │ ├── alltoall.c │ ├── alltoallv.c │ ├── bcast │ ├── bcast.c │ ├── gather │ ├── gather.c │ ├── gatherv.c │ ├── isend_irecv │ ├── isend_irecv.c │ ├── mpi-1.c │ ├── mpi_config │ ├── overview │ ├── overview.c │ ├── pack │ ├── pack.c │ ├── pack1 │ ├── pack1.c │ ├── reduce-int-sum │ ├── reduce-int-sum.c │ ├── reduce-max.c │ ├── reduce-maxloc.c │ ├── reduce-minloc.c │ ├── reduce-user-complex │ ├── reduce-user-complex.c │ ├── reduce-user-matrix.c │ ├── reduce-vector-sum.c │ ├── ring │ ├── ring.c │ ├── scan.c │ ├── scanme.c │ ├── scatter.c │ ├── scatterv.c │ ├── type_struct.c │ ├── type_struct1 │ ├── type_struct1.c │ ├── type_vector-1.c │ └── type_vector.c └── openmp │ ├── Makefile │ ├── copyin.c │ ├── critical.c │ ├── dynamic.c │ ├── firstprivate.c │ ├── for.c │ ├── fork_join.c │ ├── get_num_procs.c │ ├── get_thread_num.c │ ├── lastprivate.c │ ├── lock.c │ ├── omp_in_parallel.c │ ├── ordered.c │ ├── parallel.c │ ├── private.c │ ├── reduction.c │ ├── schedule.c │ ├── section.c │ ├── set_dynamic.c │ ├── set_num_threads.c │ ├── shared.c │ ├── size.c │ ├── test_lock.c │ └── threadprivate.c ├── matrix ├── cannon.c ├── fox.c ├── matrix.h └── tranpose.c ├── parallel01 ├── PSRS.c ├── input.txt ├── merge ├── merge.c └── pi.c ├── parallel02 ├── PSRS.c ├── input.txt ├── pi.c └── test.c └── sort ├── enum_sort.c ├── psrs_sort.c ├── quick_sort.c └── sort.h /.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | .vscode/ 3 | build/ 4 | -------------------------------------------------------------------------------- /PP_01/1.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | #include 3 | 4 | int _1() { 5 | int A[256]; 6 | int B[256]; 7 | random_array(A, 256); 8 | copy_array(B, A, 256); 9 | omp_set_num_threads(9); 10 | 11 | clock_t start = clock(); 12 | for(int i = 2; i <= 10; i++) { 13 | #pragma omp parallel for 14 | for (int j = 2; j <= 10; j++) { 15 | A[IN(i,j,16)] = 0.5*(A[IN(i-1,j-1,16)]+A[IN(i+1,j+1,16)]); 16 | } 17 | } 18 | clock_t end = clock(); 19 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 20 | 21 | start = clock(); 22 | for(int i = 2; i <= 10; i++) { 23 | for (int j = 2; j <= 10; j++) { 24 | B[IN(i,j,16)] = 0.5*(B[IN(i-1,j-1,16)]+B[IN(i+1,j+1,16)]); 25 | } 26 | } 27 | end = clock(); 28 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 29 | 30 | return check_ans(A, B, 256); 31 | } 32 | 33 | int _2() { 34 | int A[50]; 35 | int B[22]; 36 | int C[50]; 37 | random_array(A, 50); 38 | random_array(B, 50); 39 | copy_array(C, A, 50); 40 | omp_set_num_threads(8); 41 | 42 | clock_t start = clock(); 43 | for(int k = 2; k <= 20; k+=2) { 44 | #pragma omp parallel for 45 | for(int i = k; i <= min(20,k+1); i++) { 46 | A[2*i+2] = A[2*i-2]+B[i]; 47 | } 48 | } 49 | clock_t end = clock(); 50 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 51 | 52 | start = clock(); 53 | for(int i = 2; i <= 20; i++) { 54 | C[2*i+2] = C[2*i-2]+B[i]; 55 | } 56 | end = clock(); 57 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 58 | 59 | return check_ans(A, C, 50); 60 | 61 | } 62 | 63 | int _3() { 64 | int A[24]; 65 | int B[24]; 66 | int C[24]; 67 | 68 | int B2[24]; 69 | int C2[24]; 70 | 71 | random_array(A, 24); 72 | random_array(B, 24); 73 | random_array(C, 24); 74 | copy_array(B2, B, 24); 75 | copy_array(C2, C, 24); 76 | 77 | omp_set_num_threads(8); 78 | 79 | int D[24]; 80 | int share = 1; 81 | D[0] = 1; 82 | clock_t start = clock(); 83 | // find the parallel block border 84 | #pragma omp parallel for shared(share) 85 | for(int i = 2; i <= 20; i++) { 86 | if (A[i] < 0 && A[i+1] > 0) 87 | D[share++] = i; 88 | } 89 | qsort(D, share, sizeof(int), cmp); 90 | D[share++] = 20; 91 | 92 | // Parallel in the Block, Serial the Block 93 | for(int j = 0; j < share-1; j++) 94 | #pragma omp parallel for 95 | for(int i = D[j]+1; i <= D[j+1]; i++) { 96 | if (A[i] > 0) 97 | B[i] = C[i-1]+1; 98 | else 99 | C[i] = B[i]-1; 100 | } 101 | clock_t end = clock(); 102 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 103 | 104 | start = clock(); 105 | for(int i = 2; i <= 20; i++) { 106 | if (A[i] > 0) 107 | B2[i] = C2[i-1]+1; 108 | else 109 | C2[i] = B2[i]-1; 110 | } 111 | end = clock(); 112 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 113 | 114 | int ans = check_ans(B, B2, 24); 115 | ans &= check_ans(C, C2, 24); 116 | return ans; 117 | } 118 | 119 | int main() { 120 | if (!_1()) 121 | printf("Error! 1_1\n"); 122 | else 123 | printf("1_1 Done.\n"); 124 | 125 | if (!_2()) 126 | printf("Error! 1_2\n"); 127 | else 128 | printf("1_2 Done.\n"); 129 | 130 | if (!_3()) 131 | printf("Error! 1_3\n"); 132 | else 133 | printf("1_3 Done.\n"); 134 | } -------------------------------------------------------------------------------- /PP_01/2.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | #include 3 | 4 | #ifndef N 5 | #define N 20 6 | #endif 7 | 8 | 9 | #ifndef M 10 | #define M 20 11 | #endif 12 | 13 | int _1() { 14 | int n = (M+2)*N; 15 | int A[n]; 16 | int B[n]; 17 | int C = 2; 18 | 19 | random_array(A, n); 20 | copy_array(B, A, n); 21 | omp_set_num_threads(8); 22 | 23 | clock_t start = clock(); 24 | for(int i = 1; i <= M; i++) { 25 | #pragma omp parallel for 26 | for (int j = 1; j <= N; j++) { 27 | A[IN(i+1,j+1,N)] = A[IN(i,j,N)]+C; 28 | } 29 | } 30 | clock_t end = clock(); 31 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 32 | 33 | start = clock(); 34 | for(int i = 1; i <= M; i++) { 35 | for (int j = 1; j <= N; j++) { 36 | B[IN(i+1,j+1,N)] = B[IN(i,j,N)]+C; 37 | } 38 | } 39 | end = clock(); 40 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 41 | 42 | return check_ans(A, B, n); 43 | } 44 | 45 | int _2() { 46 | int X[101]; 47 | int X2[101]; 48 | int Y[201]; 49 | int Y2[201]; 50 | int B[101]; 51 | int B2[101]; 52 | int *A, *C; 53 | int *A2; 54 | 55 | int n = 110*110; 56 | A = malloc(n*sizeof(int)); 57 | A2 = malloc(n*sizeof(int)); 58 | C = malloc(n*sizeof(int)); 59 | random_array(A, n); 60 | random_array(C, n); 61 | random_array(Y, 201); 62 | copy_array(A2, A, n); 63 | copy_array(Y2, Y, 201); 64 | 65 | omp_set_num_threads(10); 66 | clock_t start = clock(); 67 | 68 | for(int i = 1; i <= 100; i++) { 69 | for(int j = 1; j <= 100; j++) { 70 | B[j] = A[IN(i,N,110)]; 71 | #pragma omp parallel for 72 | for(int k = 1; k <= 100; k++) { 73 | A[IN(j+1, k, 110)] = B[j] + C[IN(j, k, 110)]; 74 | } 75 | Y[i+j] = A[IN(j+1, N, 110)]; 76 | } 77 | } 78 | #pragma omp parallel for 79 | for(int i = 1; i <= 100; i++) 80 | X[i] = Y[i] + 10; 81 | 82 | clock_t end = clock(); 83 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 84 | 85 | 86 | start = clock(); 87 | for(int i = 1; i <= 100; i++) { 88 | X2[i] = Y2[i] + 10; 89 | for(int j = 1; j <= 100; j++) { 90 | B2[j] = A2[IN(i,N,110)]; 91 | for(int k = 1; k <= 100; k++) { 92 | A2[IN(j+1, k, 110)] = B2[j] + C[IN(j, k, 110)]; 93 | } 94 | Y2[i+j] = A2[IN(j+1, N, 110)]; 95 | } 96 | } 97 | end = clock(); 98 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 99 | 100 | free(A); 101 | free(A2); 102 | free(C); 103 | return check_ans(X+1, X2+1, 100); 104 | } 105 | 106 | int main() 107 | { 108 | if (!_1()) 109 | printf("Error! 2_1\n"); 110 | else 111 | printf("2_1 Done.\n"); 112 | 113 | if (!_2()) 114 | printf("Error! 2_2\n"); 115 | else 116 | printf("2_2 Done.\n"); 117 | } 118 | -------------------------------------------------------------------------------- /PP_01/3.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | int _1() { 4 | int *A; 5 | int *B; 6 | int n = 310*110; 7 | A = malloc(n*sizeof(int)); 8 | B = malloc(n*sizeof(int)); 9 | random_array(A, n); 10 | copy_array(B, A, n); 11 | omp_set_num_threads(10); 12 | 13 | clock_t start = clock(); 14 | for(int i = 1; i <= 100; i++) { 15 | for (int j = 1; j <= 50; j++) { 16 | A[IN(3*i+2, 2*j-1, 110)] = A[IN(5*j,i+3,110)]+2; 17 | } 18 | } 19 | clock_t end = clock(); 20 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 21 | 22 | start = clock(); 23 | for(int i = 1; i <= 100; i++) { 24 | for (int j = 1; j <= 50; j++) { 25 | B[IN(3*i+2, 2*j-1, 110)] = B[IN(5*j,i+3,110)]+2; 26 | } 27 | } 28 | end = clock(); 29 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 30 | 31 | int ans = check_ans(A, B, n); 32 | free(A); 33 | free(B); 34 | return ans; 35 | } 36 | 37 | int _2() { 38 | int x; 39 | int y = rand() % 100 -50; 40 | int z = rand() % 100 -50; 41 | int z2 = z; 42 | int B[101]; 43 | int A[101]; 44 | int A2[101]; 45 | int C[102]; 46 | int C2[102]; 47 | int *D; 48 | int *D2; 49 | 50 | int n = 101*51; 51 | D = malloc(n*sizeof(int)); 52 | D2 = malloc(n*sizeof(int)); 53 | random_array(A, 101); 54 | random_array(B, 101); 55 | random_array(C, 102); 56 | random_array(D, n); 57 | copy_array(A2, A, 101); 58 | copy_array(C2, C, 102); 59 | copy_array(D2, D, n); 60 | 61 | omp_set_num_threads(5); 62 | 63 | clock_t start = clock(); 64 | x = y * 2; 65 | #pragma omp parallel for 66 | for(int i = 1; i <= 100; i++) { 67 | C[i] = B[i] + x; 68 | } 69 | #pragma omp parallel for 70 | for(int i = 1; i <= 100; i++) { 71 | A[i] = C[i-1] + z; 72 | for(int j = 1; j <= 50; j++) { 73 | D[IN(i, j, 51)] = D[IN(i, j-1, 51)] + x; 74 | } 75 | } 76 | C[101] = A[100] * B[100]; 77 | z = y + 4; 78 | clock_t end = clock(); 79 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 80 | 81 | start = clock(); 82 | x = y * 2; 83 | for(int i = 1; i <= 100; i++) { 84 | C2[i] = B[i] + x; 85 | A2[i] = C2[i-1] + z2; 86 | C2[i+1] = A2[i] * B[i]; 87 | for(int j = 1; j <= 50; j++) { 88 | D2[IN(i, j, 51)] = D2[IN(i, j-1, 51)] + x; 89 | } 90 | } 91 | z2 = y + 4; 92 | end = clock(); 93 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 94 | 95 | 96 | int ans = check_ans(A2, A, 101); 97 | ans &= check_ans(C2, C, 102); 98 | ans &= check_ans(D2, D, n); 99 | free(D); 100 | free(D2); 101 | return ans; 102 | } 103 | 104 | int main() { 105 | if (!_1()) 106 | printf("Error! 3_1\n"); 107 | else 108 | printf("3_1 Done.\n"); 109 | 110 | if (!_2()) 111 | printf("Error! 3_2\n"); 112 | else 113 | printf("3_2 Done.\n"); 114 | } -------------------------------------------------------------------------------- /PP_01/4.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | int _1() { 4 | int A[128]; 5 | int B[128]; 6 | 7 | random_array(A, 128); 8 | copy_array(B, A, 128); 9 | 10 | omp_set_num_threads(4); 11 | clock_t start = clock(); 12 | // diagonal parallel 13 | for(int i = 4; i <= 20; i++) { 14 | #pragma omp parallel for 15 | for(int j = max(2, i-10); j <= min(i/2, 10); j++) { 16 | A[IN(j, i-j, 11)] = (A[IN(j, i-j-1, 11)] + A[IN(j-1, i-j, 11)])*0.5; 17 | } 18 | } 19 | clock_t end = clock(); 20 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 21 | 22 | start = clock(); 23 | for(int i = 2; i <= 10; i++) { 24 | for(int j = i; j <= 10; j++) { 25 | B[IN(i, j, 11)] = (B[IN(i, j-1, 11)] + B[IN(i-1, j, 11)])*0.5; 26 | } 27 | } 28 | end = clock(); 29 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 30 | 31 | return check_ans(A, B, 128); 32 | } 33 | 34 | int _2() { 35 | int A[20]; 36 | int A2[20]; 37 | int B[20]; 38 | 39 | random_array(A, 20); 40 | random_array(B, 20); 41 | copy_array(A2, A, 20); 42 | 43 | omp_set_num_threads(3); 44 | clock_t start = clock(); 45 | for(int k = 1; k <= 16; k+=3) { 46 | #pragma omp parallel for 47 | for(int i = k; i <= min(16, k+2); i++) { 48 | A2[i+3] = A2[i] + B[i]; 49 | } 50 | } 51 | clock_t end = clock(); 52 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 53 | 54 | start = clock(); 55 | for(int i = 1; i <= 16; i++) { 56 | A[i+3] = A[i] + B[i]; 57 | } 58 | end = clock(); 59 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 60 | 61 | return check_ans(A, A2, 20); 62 | } 63 | 64 | 65 | int main() { 66 | if (!_1()) 67 | printf("Error! 4_1\n"); 68 | else 69 | printf("4_1 Done.\n"); 70 | 71 | if (!_2()) 72 | printf("Error! 4_2\n"); 73 | else 74 | printf("4_2 Done.\n"); 75 | 76 | printf("4_3 is the same as 4_2\n"); 77 | } -------------------------------------------------------------------------------- /PP_01/5.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | int _1() { 4 | float A[101]; 5 | float B[101]; 6 | float C[101]; 7 | float D[101]; 8 | float A2[101]; 9 | float B2[101]; 10 | float C2[101]; 11 | float D2[101]; 12 | 13 | random_array_f(A, 101); 14 | random_array_f(B, 101); 15 | random_array_f(C, 101); 16 | random_array_f(D, 101); 17 | copy_array_f(A2, A, 101); 18 | copy_array_f(B2, B, 101); 19 | copy_array_f(C2, C, 101); 20 | copy_array_f(D2, D, 101); 21 | 22 | omp_set_num_threads(10); 23 | 24 | clock_t start = clock(); 25 | for(int i = 1; i <= 100; i++) { 26 | B[i] = C[i-1] * 2; 27 | C[i] = 1 / B[i]; 28 | } 29 | #pragma omp parallel for 30 | for(int i = 1; i <= 100; i++) { 31 | A[i] = A[i] + B[i-1]; 32 | D[i] = C[i] * C[i]; 33 | } 34 | clock_t end = clock(); 35 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 36 | 37 | start = clock(); 38 | for(int i = 1; i <= 100; i++) { 39 | A2[i] = A2[i] + B2[i-1]; 40 | B2[i] = C2[i-1] * 2; 41 | C2[i] = 1 / B2[i]; 42 | D2[i] = C2[i] * C2[i]; 43 | } 44 | end = clock(); 45 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 46 | 47 | return check_ans_f(D, D2, 101); 48 | } 49 | 50 | int _2() { 51 | int *A; 52 | int *A2; 53 | int *B; 54 | int *C; 55 | int *D; 56 | int *D2; 57 | 58 | int n = 1000; 59 | A = malloc(n*sizeof(int)); 60 | A2 = malloc(n*sizeof(int)); 61 | B = malloc(n*sizeof(int)); 62 | C = malloc(n*sizeof(int)); 63 | D = malloc(n*sizeof(int)); 64 | D2 = malloc(n*sizeof(int)); 65 | random_array(A, n); 66 | random_array(B, n); 67 | random_array(C, n); 68 | copy_array(A2, A, n); 69 | 70 | omp_set_num_threads(5); 71 | 72 | clock_t start = clock(); 73 | #pragma omp parallel for 74 | for(int i = 1; i <= 500; i++) { 75 | A[i] = B[i] + C[i]; 76 | D[i] = (A[i] + A[1000-i]) / 2; 77 | } 78 | #pragma omp parallel for 79 | for(int i = 501; i <= 999; i++) { 80 | A[i] = B[i] + C[i]; 81 | D[i] = (A[i] + A[1000-i]) / 2; 82 | } 83 | clock_t end = clock(); 84 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 85 | 86 | start = clock(); 87 | for(int i = 1; i <= 999; i++) { 88 | A2[i] = B[i] + C[i]; 89 | D2[i] = (A2[i] + A2[1000-i]) / 2; 90 | } 91 | end = clock(); 92 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 93 | 94 | int ans = check_ans(D, D2, n); 95 | free(A); 96 | free(A2); 97 | free(B); 98 | free(C); 99 | free(D); 100 | free(D2); 101 | return ans; 102 | } 103 | 104 | int _3() { 105 | int *A; 106 | int *A2; 107 | int *C; 108 | int *D; 109 | int *D2; 110 | 111 | int n = 601 * 201; 112 | A = malloc(n*sizeof(int)); 113 | A2 = malloc(n*sizeof(int)); 114 | C = malloc(101*101*sizeof(int)); 115 | D = malloc(101*101*sizeof(int)); 116 | D2 = malloc(101*101*sizeof(int)); 117 | 118 | random_array(C, 101*101); 119 | random_array(A, n); 120 | copy_array(A2, A, n); 121 | 122 | omp_set_num_threads(5); 123 | clock_t start = clock(); 124 | #pragma omp parallel for 125 | for(int i = 1; i <= 100; i++) { 126 | for(int j = 1; j <= 100; j++) { 127 | A[IN(3*i+2*j+99, 2*j, 201)] = C[IN(i, j, 101)] * 2; 128 | D[IN(i, j, 101)] = A[IN(i-j+99, i+j, 201)]; 129 | } 130 | } 131 | clock_t end = clock(); 132 | printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 133 | 134 | start = clock(); 135 | for(int i = 1; i <= 100; i++) { 136 | for(int j = 1; j <= 100; j++) { 137 | A2[IN(3*i+2*j+99, 2*j, 201)] = C[IN(i, j, 101)] * 2; 138 | D2[IN(i, j, 101)] = A2[IN(i-j+99, i+j, 201)]; 139 | } 140 | } 141 | end = clock(); 142 | printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC); 143 | 144 | int ans = check_ans(D2, D, 101*101); 145 | free(A); 146 | free(A2); 147 | free(C); 148 | free(D); 149 | free(D2); 150 | return ans; 151 | } 152 | 153 | int main() { 154 | if (!_1()) 155 | printf("Error! 5_1\n"); 156 | else 157 | printf("5_1 Done.\n"); 158 | 159 | if (!_2()) 160 | printf("Error! 5_2\n"); 161 | else 162 | printf("5_2 Done.\n"); 163 | 164 | if (!_3()) 165 | printf("Error! 5_3\n"); 166 | else 167 | printf("5_3 Done.\n"); 168 | } -------------------------------------------------------------------------------- /PP_01/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | OPENMP=-fopenmp 3 | SOURCES:=$(shell find $($(shell pwd)) -name '*.c') 4 | OBJS=$(SOURCES:%.c=%) 5 | 6 | 7 | all : $(OBJS) 8 | @echo "编译中..." 9 | @echo $(SOURCES) 10 | @echo "编译完成!" 11 | if [ ! -d "build" ]; then mkdir build; fi 12 | mv $(OBJS) build 13 | 14 | 15 | %: %.c 16 | $(CC) $(OPENMP) $< -o $@ 17 | 18 | .PHONY: clean 19 | clean: 20 | rm -rf build/ -------------------------------------------------------------------------------- /PP_01/ex-1-2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/PP_01/ex-1-2020.pdf -------------------------------------------------------------------------------- /PP_01/run.sh: -------------------------------------------------------------------------------- 1 | make 2 | 3 | for file in `ls build` 4 | do 5 | ./build/${file} 6 | done 7 | -------------------------------------------------------------------------------- /PP_01/util.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define IN(i, j, LINE) ((i)*(LINE)+(j)) 8 | 9 | void random_array(int *a, int num) { 10 | srand(time(NULL)); 11 | for(int i = 0; i < num; i++) { 12 | a[i] = rand() % 100 - 50; 13 | } 14 | } 15 | 16 | void random_array_f(float *a, int num) { 17 | srand(time(NULL)); 18 | for(int i = 0; i < num; i++) { 19 | a[i] = (float)rand() / (RAND_MAX); 20 | } 21 | } 22 | 23 | void copy_array(int *dst, int *src, int num) { 24 | for(int i = 0; i < num; i++) { 25 | dst[i] = src[i]; 26 | } 27 | } 28 | 29 | void copy_array_f(float *dst, float *src, int num) { 30 | for(int i = 0; i < num; i++) { 31 | dst[i] = src[i]; 32 | } 33 | } 34 | 35 | int check_ans(int *a, int *b, int num) { 36 | for(int i = 0; i < num; i++) { 37 | if (a[i] != b[i]) 38 | return 0; 39 | } 40 | return 1; 41 | } 42 | 43 | int check_ans_f(float *a, float *b, int num) { 44 | for(int i = 0; i < num; i++) { 45 | if (fabs(a[i]-b[i]) > 1e-2) 46 | return 0; 47 | } 48 | return 1; 49 | } 50 | 51 | void print(int *D, int num) { 52 | for(int i = 0; i < num; i++) 53 | printf("%d ", D[i]); 54 | printf("\n"); 55 | } 56 | 57 | int min(int a, int b) { 58 | return a > b ? b : a; 59 | } 60 | 61 | int max(int a, int b) { 62 | return a > b ? a : b; 63 | } 64 | 65 | int cmp(const void *a , const void *b) { 66 | return *(int *)a - *(int *)b; //升序排序 67 | } 68 | //return *(int *)b - *(int *)a; //降序排序 -------------------------------------------------------------------------------- /PP_02/1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | int id_procs, num_procs; 9 | int msg = 10; 10 | int tag = 5; 11 | char seq[16] = "Hello MPI!"; 12 | char seqin[16]; 13 | 14 | MPI_Init(&argc, &argv); 15 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 16 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 17 | 18 | 19 | int color = id_procs % 3; 20 | int key = id_procs / 3; 21 | MPI_Comm split_comm_world; 22 | MPI_Status status; 23 | int rank; 24 | int size; 25 | int msgin; 26 | 27 | // MPI split COMM_WORLD into 3 groups 28 | MPI_Comm_split(MPI_COMM_WORLD, color, key, &split_comm_world); 29 | MPI_Comm_rank(split_comm_world, &rank); 30 | MPI_Comm_size(split_comm_world, &size); 31 | 32 | // message to 0 proc of Each Comm 33 | if (id_procs == 0) { 34 | strcpy(seqin, seq); 35 | for (int i = 1; i < 3; i++) 36 | MPI_Send(&seq, 16, MPI_CHAR, i, tag, MPI_COMM_WORLD); 37 | } else if (id_procs > 0 && id_procs < 3) { 38 | MPI_Recv(&seqin, 16, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 39 | } 40 | 41 | // Broadcast within the group 42 | MPI_Bcast(&seqin, 16, MPI_CHAR, 0, split_comm_world); 43 | 44 | printf("MPI Comm rank %d, original id %d, size %d. the new msg is %s\n", rank, id_procs, size, seqin); 45 | MPI_Finalize(); 46 | return 0; 47 | } -------------------------------------------------------------------------------- /PP_02/2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print(int id, int id_procs, int num_procs, int *recvbuf) { 6 | if (id_procs == id) { 7 | printf("recv buf %d : ", id_procs); 8 | for(int i = 0; i < num_procs; i++) { 9 | printf("%d, ", recvbuf[i]); 10 | } 11 | printf("\n"); 12 | } 13 | } 14 | 15 | int main(int argc, char *argv[]) 16 | { 17 | int id_procs, num_procs; 18 | MPI_Status status; 19 | MPI_Request req; 20 | 21 | MPI_Init(&argc, &argv); 22 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 23 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 24 | 25 | int sendbuf[num_procs]; 26 | int recvbuf[num_procs]; 27 | for(int i = 0; i < num_procs; i++) 28 | sendbuf[i] = 10*(i+1)+id_procs; 29 | 30 | for(int i = 0; i < num_procs; i++) { 31 | if (i != id_procs) { 32 | MPI_Send(sendbuf+i, 1, MPI_INT, i, id_procs, MPI_COMM_WORLD); 33 | MPI_Recv(recvbuf+i, 1, MPI_INT, i, i, MPI_COMM_WORLD, &status); 34 | } 35 | else { 36 | recvbuf[i] = sendbuf[i]; 37 | } 38 | } 39 | 40 | MPI_Barrier(MPI_COMM_WORLD); 41 | 42 | // for(int i = 0; i < num_procs; i++) 43 | // print(i, id_procs, num_procs, recvbuf); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /PP_02/3.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print(int id, int id_procs, int num_procs, int *recvbuf) { 6 | if (id_procs == id) { 7 | printf("recv buf %d : ", id_procs); 8 | for(int i = 0; i < num_procs; i++) { 9 | printf("%d, ", recvbuf[i]); 10 | } 11 | printf("\n"); 12 | } 13 | } 14 | 15 | int main(int argc, char *argv[]) 16 | { 17 | int id_procs, num_procs; 18 | 19 | MPI_Init(&argc, &argv); 20 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 21 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 22 | 23 | int sendbuf[num_procs]; 24 | int recvbuf[num_procs]; 25 | for(int i = 0; i < num_procs; i++) 26 | sendbuf[i] = 10*(i+1)+id_procs; 27 | 28 | MPI_Alltoall(sendbuf, 1, MPI_INT, recvbuf, 1, MPI_INT, MPI_COMM_WORLD); 29 | MPI_Barrier(MPI_COMM_WORLD); 30 | 31 | // for(int i = 0; i < num_procs; i++) 32 | // print(i, id_procs, num_procs, recvbuf); 33 | 34 | MPI_Finalize(); 35 | return 0; 36 | } -------------------------------------------------------------------------------- /PP_02/4_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int id_procs, num_procs; 10 | 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 14 | 15 | srand(clock()); 16 | int data = rand() % 100; 17 | int recvdata; 18 | MPI_Status status; 19 | 20 | printf("data = %d\n", data); 21 | // Butterfly sum 22 | // each loop is a layer 23 | for(int i = 2; i <= num_procs; i <<= 1) { 24 | int tag = i >> 1; 25 | int dest = id_procs ^ tag; 26 | MPI_Send(&data, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); 27 | MPI_Recv(&recvdata, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &status); 28 | data += recvdata; 29 | } 30 | 31 | printf("Sum is = %d\n", data); 32 | 33 | MPI_Finalize(); 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /PP_02/4_2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int id_procs, num_procs; 10 | 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 14 | 15 | srand(clock()); 16 | int data = rand() % 100; 17 | int recvdata; 18 | MPI_Status status; 19 | 20 | printf("data = %d\n", data); 21 | // Binary Tree sum 22 | // each loop is a layer 23 | for(int i = 2; i <= num_procs; i <<= 1) { 24 | int tag = i >> 1; 25 | int diff = id_procs & tag; 26 | if (diff) { 27 | MPI_Send(&data, 1, MPI_INT, id_procs-tag, tag, MPI_COMM_WORLD); 28 | } else { 29 | MPI_Recv(&recvdata, 1, MPI_INT, id_procs+tag, tag, MPI_COMM_WORLD, &status); 30 | } 31 | data += recvdata; 32 | } 33 | 34 | // now 0 process has the sum 35 | for(int i = num_procs; i >= 2; i >>= 1) { 36 | int tag = i; 37 | if (id_procs % i == 0) { 38 | MPI_Send(&data, 1, MPI_INT, id_procs+(i>>1), tag, MPI_COMM_WORLD); 39 | } else if(id_procs % (i >> 1) == 0) { 40 | MPI_Recv(&data, 1, MPI_INT, id_procs-(i>>1), tag, MPI_COMM_WORLD, &status); 41 | } 42 | } 43 | printf("%d Sum is = %d\n", id_procs, data); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /PP_02/5.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #define INDEX(i, j, N) (((i)*(N))+(j)) 10 | 11 | void random_mat(int *a, int num) { 12 | for(int i = 0; i < num; i++) { 13 | srand(clock()); 14 | for(int j = 0; j < num; j++) { 15 | a[INDEX(i, j, num)] = rand() % 100; 16 | } 17 | } 18 | } 19 | 20 | void print_mat(int *a, int num, int id) { 21 | for(int i = 0; i < num; i++) { 22 | for(int j = 0; j < num; j++) { 23 | printf("|%d : %d ", id, a[INDEX(i, j, num)]); 24 | } 25 | printf("\n"); 26 | } 27 | } 28 | 29 | void comp(int *A, int *B, int *C, int num) { 30 | for(int i = 0; i < num; i++) { 31 | for(int j = 0; j < num; j++) { 32 | for(int k = 0; k < num; k++) 33 | C[INDEX(i, j, num)] += A[INDEX(i, k, num)] * B[INDEX(k, j, num)]; 34 | } 35 | } 36 | } 37 | 38 | 39 | int check(int *C, int *nC, int num) { 40 | for(int i = 0; i < num; i++) { 41 | for(int j = 0; j < num; j++) { 42 | if (C[INDEX(i, j, num)] != nC[INDEX(i, j, num)]) { 43 | printf("C[%d,%d] should be %d ,not %d\n", 44 | i,j,C[INDEX(i,j,num)],nC[INDEX(i,j,num)]); 45 | return 0; 46 | } 47 | } 48 | } 49 | return 1; 50 | } 51 | 52 | int main(int argc, char *argv[]) 53 | { 54 | int id_procs, num_procs; 55 | int blksize, sqrt_procs; 56 | 57 | MPI_Init(&argc, &argv); 58 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 59 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 60 | 61 | sqrt_procs = sqrt(num_procs); 62 | if (sqrt_procs * sqrt_procs != num_procs) { 63 | fprintf(stderr, "The Num of Proc must be Perfect square!\n"); 64 | return 1; 65 | } 66 | if (argc != 2) { 67 | fprintf(stderr, "Please add a Parameter about the block size!\n"); 68 | return 1; 69 | } 70 | blksize = atoi(argv[1]); 71 | 72 | // produce random data 73 | int *A, *B, *C, *ans; 74 | int *A_in, *B_in; 75 | int *sA, *sB, *sC; 76 | int N = blksize*sqrt_procs; 77 | 78 | if (id_procs == 0) { 79 | sA = (int*)malloc(N*N*sizeof(int)); 80 | sB = (int*)malloc(N*N*sizeof(int)); 81 | sC = (int*)malloc(N*N*sizeof(int)); 82 | 83 | memset(sC, 0, N*N*sizeof(int)); 84 | random_mat(sA, N); 85 | random_mat(sB, N); 86 | comp(sA, sB, sC, N); 87 | } 88 | A = (int*)malloc(blksize*blksize*sizeof(int)); 89 | B = (int*)malloc(blksize*blksize*sizeof(int)); 90 | C = (int*)malloc(blksize*blksize*sizeof(int)); 91 | ans = (int*)malloc(blksize*blksize*sizeof(int)); 92 | A_in = (int*)malloc(blksize*blksize*sizeof(int)); 93 | B_in = (int*)malloc(blksize*blksize*sizeof(int)); 94 | 95 | memset(C, 0, blksize*blksize*sizeof(int)); 96 | 97 | MPI_Datatype SubMat, Mat; 98 | MPI_Status status; 99 | MPI_Request request; 100 | MPI_Type_vector(blksize, blksize, N, MPI_INT, &SubMat); 101 | MPI_Type_commit(&SubMat); 102 | 103 | MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat); 104 | MPI_Type_commit(&Mat); 105 | 106 | if (id_procs == 0) { 107 | for(int i = 0; i < sqrt_procs; i++) { 108 | int lineoff = blksize * N * i; 109 | for(int j = 0; j < sqrt_procs; j++) { 110 | if (i == 0 && j == 0) { 111 | // 分发矩阵 A 112 | MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 113 | MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 114 | MPI_Wait(&request, &status); 115 | // 分发矩阵 B 116 | MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request); 117 | MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request); 118 | MPI_Wait(&request, &status); 119 | continue; 120 | } 121 | int offset = j * blksize + lineoff; 122 | MPI_Send(sA+offset, 1, SubMat, i*sqrt_procs+j, 0, MPI_COMM_WORLD); 123 | MPI_Send(sB+offset, 1, SubMat, i*sqrt_procs+j, 1, MPI_COMM_WORLD); 124 | } 125 | } 126 | } else { 127 | MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status); 128 | // print_mat(A, blksize, id_procs); 129 | MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status); 130 | // print_mat(B, blksize, id_procs); 131 | } 132 | 133 | MPI_Comm row_comm, col_comm; 134 | int rank_A, size_A; 135 | int color_A; 136 | int key_A; 137 | 138 | int rank_B, size_B; 139 | int color_B; 140 | int key_B; 141 | 142 | // Comm Group by row 143 | key_A = id_procs % sqrt_procs; 144 | color_A = id_procs / sqrt_procs; 145 | MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm); 146 | MPI_Comm_rank(row_comm, &rank_A); 147 | MPI_Comm_size(row_comm, &size_A); 148 | 149 | // Comm Group by B 150 | key_B = id_procs / sqrt_procs; 151 | color_B = id_procs % sqrt_procs; 152 | MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm); 153 | MPI_Comm_rank(col_comm, &rank_B); 154 | MPI_Comm_size(col_comm, &size_B); 155 | 156 | for(int k = 0; k < sqrt_procs; k++) { 157 | if (rank_A == (color_A+k)%size_A) { 158 | memcpy(A_in, A, blksize*blksize*sizeof(int)); 159 | } 160 | // broadcast Ai,j 161 | MPI_Bcast(A_in, 1, Mat, (color_A+k)%size_A, row_comm); 162 | 163 | // compute 164 | comp(A_in, B, C, blksize); 165 | 166 | int dest = (rank_B-1 + size_B)%size_B; 167 | MPI_Send(B, 1, Mat, dest, 0, col_comm); 168 | MPI_Recv(B_in, 1, Mat, (rank_B+1)%size_B, 0, col_comm, &status); 169 | memcpy(B, B_in, blksize*blksize*sizeof(int)); 170 | } 171 | 172 | 173 | // 分发结果 自行比较 174 | if (id_procs == 0) { 175 | for(int i = 0; i < sqrt_procs; i++) { 176 | int lineoff = blksize * N * i; 177 | for(int j = 0; j < sqrt_procs; j++) { 178 | if (i == 0 && j == 0) { 179 | // 分发矩阵 A 180 | MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 181 | MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 182 | MPI_Wait(&request, &status); 183 | continue; 184 | } 185 | int offset = j * blksize + lineoff; 186 | MPI_Send(sC+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD); 187 | } 188 | } 189 | } else { 190 | MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status); 191 | } 192 | 193 | // print_mat(ans, blksize, id_procs); 194 | 195 | if (check(C, ans, blksize)) { 196 | printf("Proc#%d Done.\n", id_procs); 197 | } 198 | 199 | // print_mat(C, blksize, id_procs); 200 | 201 | free(A); 202 | free(B); 203 | free(C); 204 | free(ans); 205 | free(A_in); 206 | free(B_in); 207 | 208 | if (id_procs == 0) { 209 | free(sA); 210 | free(sB); 211 | free(sC); 212 | } 213 | MPI_Finalize(); 214 | return 0; 215 | } 216 | -------------------------------------------------------------------------------- /PP_02/6.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #ifndef PNum 7 | #define PNum 2 8 | #endif 9 | 10 | enum MsgTag { 11 | RANDATA, 12 | AVERAGE 13 | }; 14 | 15 | void work(int id) { 16 | int randata; 17 | int recvdata; 18 | MPI_Status status; 19 | 20 | while(1) { 21 | srand(time(NULL)+id); 22 | randata = rand() % 100; 23 | MPI_Send(&randata, 1, MPI_INT, id % PNum, RANDATA, MPI_COMM_WORLD); 24 | MPI_Recv(&recvdata, 1, MPI_INT, id % PNum, AVERAGE, MPI_COMM_WORLD, &status); 25 | printf("Proc#%d receive average data = %d\n", id, recvdata); 26 | } 27 | } 28 | 29 | void serve(int id, int num) { 30 | int input[num]; 31 | int average; 32 | int sum, ctn; 33 | MPI_Status status; 34 | 35 | while (1) { 36 | sum = 0; 37 | ctn = 0; 38 | for(int i = 1; i*PNum+id P-1) { 69 | work(id_procs); 70 | } 71 | else { 72 | serve(id_procs, num_procs); 73 | } 74 | 75 | MPI_Finalize(); 76 | return 0; 77 | } 78 | -------------------------------------------------------------------------------- /PP_02/7.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef N 8 | #define N 50 9 | #endif 10 | 11 | #define INDEX(i, j) (((i)*N)+(j)) 12 | 13 | 14 | void random_array(double *a, int num) { 15 | for(int i = 0; i < num; i++) { 16 | srand(time(NULL)); 17 | a[i] = rand() % 100; 18 | } 19 | } 20 | 21 | void comp(double *A, double *B, int num) { 22 | for(int i = 1; i < N-1; i++) { 23 | for(int j = 1; j < N-1; j++) { 24 | B[INDEX(i, j)] = (A[INDEX(i-1, j)]+A[INDEX(i, j+1)]+A[INDEX(i+1, j)]+A[INDEX(i, j-1)]) / 4.0; 25 | } 26 | } 27 | } 28 | 29 | int check(double *B, double *C) { 30 | for(int i = 1; i < N-1; i++) { 31 | for(int j = 1; j < N-1; j++) { 32 | if (fabs(B[INDEX(i, j)]-C[INDEX(i, j)]) >= 1e-2) { 33 | printf("B[%d,%d] = %lf not %lf!\n", i, j, B[INDEX(i, j)], C[INDEX(i, j)]); 34 | return 0; 35 | } 36 | } 37 | } 38 | return 1; 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | double *A, *B, *B2; 43 | A = (double*)malloc(N*N*sizeof(double)); 44 | B = (double*)malloc(N*N*sizeof(double)); 45 | B2= (double*)malloc(N*N*sizeof(double)); 46 | 47 | int id_procs, num_procs, num_1; 48 | MPI_Status status; 49 | MPI_Init(&argc, &argv); 50 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 51 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 52 | 53 | num_1 = num_procs -1; 54 | // Proc#N-1 randomize the data 55 | if (id_procs == num_1) { 56 | random_array(A, N*N); 57 | comp(A, B2, N*N); 58 | } 59 | 60 | MPI_Barrier(MPI_COMM_WORLD); 61 | 62 | // Proc#N-1 broadcast 3 lines of A to each Proc 63 | int ctn = 0; 64 | for(int i = 0; i < N-2; i++) { 65 | if (id_procs == num_1) { 66 | int dest = i % num_1; 67 | int tag = i / num_1; 68 | MPI_Send(&A[INDEX(i, 0)], N*3, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD); 69 | } 70 | } 71 | 72 | for(int i = 0; i < (N-2)/num_1; i++) { 73 | if (id_procs != num_1) { 74 | MPI_Recv(&A[INDEX(3*ctn, 0)], 3*N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status); 75 | ctn++; 76 | } 77 | } 78 | if (id_procs < (N-2) % num_1) { 79 | MPI_Recv(&A[INDEX(ctn*3, 0)], 3*N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status); 80 | ctn++; 81 | } 82 | 83 | // compute 84 | if (id_procs != num_1) { 85 | for(int i = 1; i <= ctn; i++) { 86 | for(int j = 1; j < N-1; j++) { 87 | B[INDEX(i, j)] = (A[INDEX(i-1, j)]+A[INDEX(i, j+1)]+A[INDEX(i+1, j)]+A[INDEX(i, j-1)]) / 4.0; 88 | } 89 | } 90 | } 91 | 92 | // Gather 93 | for(int i = 0; i < N-2; i++) { 94 | if (id_procs == num_1) { 95 | int src = i % num_1; 96 | MPI_Recv(&B[INDEX(i+1, 1)], N-2, MPI_DOUBLE, src, i/num_1+N, MPI_COMM_WORLD, &status); 97 | } 98 | else { 99 | for(int j = 0; j < ctn; j++) 100 | MPI_Send(&B[INDEX(j+1, 1)], N-2, MPI_DOUBLE, num_1, j+N, MPI_COMM_WORLD); 101 | } 102 | } 103 | 104 | 105 | if (id_procs == num_1) { 106 | if(check(B, B2)) { 107 | printf("Done.No Error\n"); 108 | } else { 109 | printf("Error Occured!\n"); 110 | } 111 | } 112 | free(A); 113 | free(B); 114 | free(B2); 115 | MPI_Finalize(); 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /PP_02/7_2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef N 8 | #define N 50 9 | #endif 10 | 11 | #define INDEX(i, j) (((i)*N)+(j)) 12 | 13 | void random_array(double *a, int num) { 14 | for(int i = 0; i < num; i++) { 15 | srand(time(NULL)); 16 | a[i] = rand() % 100; 17 | } 18 | } 19 | 20 | void comp(double *A, double *B, int a, int b) { 21 | for(int i = 1; i <= a; i++) { 22 | for(int j = 1; j <= b; j++) { 23 | B[INDEX(i, j)] = (A[INDEX(i-1, j)]+A[INDEX(i, j+1)]+A[INDEX(i+1, j)]+A[INDEX(i, j-1)]) / 4.0; 24 | } 25 | } 26 | } 27 | 28 | int check(double *B, double *C) { 29 | for(int i = 1; i < N-1; i++) { 30 | for(int j = 1; j < N-1; j++) { 31 | if (fabs(B[INDEX(i, j)]-C[INDEX(i, j)]) >= 1e-2) { 32 | printf("B[%d,%d] = %lf not %lf!\n", i, j, B[INDEX(i, j)], C[INDEX(i, j)]); 33 | return 0; 34 | } 35 | } 36 | } 37 | return 1; 38 | } 39 | 40 | 41 | int main(int argc, char *argv[]) 42 | { 43 | double *A, *B, *B2; 44 | 45 | int id_procs, num_procs; 46 | MPI_Status status; 47 | MPI_Init(&argc, &argv); 48 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 49 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 50 | 51 | MPI_Datatype SubMat; 52 | int rows = sqrt(num_procs); 53 | int cols = num_procs / rows; 54 | int a = (N-2 + rows-1) / rows; 55 | int b = (N-2 + cols-1) / cols; 56 | int alloc_num = (a+1)*(b+1)*num_procs; 57 | A = (double*)malloc(alloc_num*sizeof(double)); 58 | B = (double*)malloc(alloc_num*sizeof(double)); 59 | B2= (double*)malloc(alloc_num*sizeof(double)); 60 | 61 | // Proc#0 randomize the data 62 | if (id_procs == 0) { 63 | random_array(A, N*N); 64 | comp(A, B2, N-2, N-2); 65 | } 66 | 67 | MPI_Barrier(MPI_COMM_WORLD); 68 | 69 | // Proc#0 broadcast (a+2)x(b+2) mat 70 | MPI_Type_vector(a+2, b+2, N, MPI_DOUBLE, &SubMat); 71 | MPI_Type_commit(&SubMat); 72 | 73 | if (id_procs == 0) { 74 | for(int i = 0; i < rows; i++) { 75 | for(int j = 0; j < cols; j++) { 76 | if (i == 0 && j == 0) 77 | continue; 78 | MPI_Send(A+i*a*N+b*j, 1, SubMat, j+cols*i, 0, MPI_COMM_WORLD); 79 | } 80 | } 81 | } 82 | else { 83 | MPI_Recv(A, 1, SubMat, 0, 0, MPI_COMM_WORLD, &status); 84 | } 85 | 86 | // compute 87 | comp(A, B, a, b); 88 | 89 | // Gather result 90 | MPI_Datatype SubMat_B; 91 | MPI_Type_vector(a, b, N, MPI_DOUBLE, &SubMat_B); 92 | MPI_Type_commit(&SubMat_B); 93 | if (id_procs == 0) { 94 | for(int i = 0; i < rows; i++) { 95 | for(int j = 0; j < cols; j++) { 96 | if (i == 0 && j == 0) 97 | continue; 98 | MPI_Recv(&B[INDEX(a*i+1, b*j+1)], 1, SubMat_B, i*cols+j, 1, MPI_COMM_WORLD, &status); 99 | } 100 | } 101 | } else { 102 | int x = id_procs / cols; 103 | int y = id_procs % cols; 104 | MPI_Send(&B[INDEX(1, 1)], 1, SubMat_B, 0, 1, MPI_COMM_WORLD); 105 | } 106 | 107 | if (id_procs == 0) 108 | if (check(B, B2)) { 109 | printf("Done.No Error\n"); 110 | } else { 111 | printf("Error!\n"); 112 | } 113 | 114 | free(A); 115 | free(B); 116 | free(B2); 117 | MPI_Finalize(); 118 | return 0; 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /PP_02/Makefile: -------------------------------------------------------------------------------- 1 | CC=mpicc 2 | OPENMP= 3 | SOURCES:=$(shell find $(.) -name '*.c') 4 | LIB=-lm 5 | OBJS=$(SOURCES:%.c=%) 6 | 7 | 8 | all : $(OBJS) 9 | @echo $(SOURCES) 10 | @echo "编译完成" 11 | if [ ! -d "build" ]; then mkdir build; fi 12 | mv $(OBJS) build 13 | 14 | %: %.c 15 | $(CC) $(OPENMP) $< $(LIB) -o $@ 16 | 17 | .PHONY: clean 18 | clean: 19 | rm -rf build -------------------------------------------------------------------------------- /PP_02/config: -------------------------------------------------------------------------------- 1 | node1:4 2 | node2:4 3 | node3:4 -------------------------------------------------------------------------------- /PP_02/mpi-lab.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/PP_02/mpi-lab.pdf -------------------------------------------------------------------------------- /PP_02/run.sh: -------------------------------------------------------------------------------- 1 | make 2 | 3 | for file in `ls build` 4 | do 5 | echo "*********************" 6 | echo "Lab02 " ${file} " program :" 7 | time mpiexec -n 8 ./build/${file} 8 | done 9 | -------------------------------------------------------------------------------- /PP_02/test.sh: -------------------------------------------------------------------------------- 1 | for i in $(seq 2 16) 2 | do 3 | time -o report2_${i} mpiexec -n ${i} build/2 4 | time -o report3_${i} mpiexec -n ${i} build/3 5 | done -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 并行计算程序设计代码库 2 | 3 | - 包含有 OpenMP 和 MPI 等多线程的编程实例 4 | - 详见example 5 | - 相关课程上机实验 6 | - PP_01 第一次实验 OpenMp 7 | - PP_02 第二次实验 MPI 8 | - 《并行算法实践》中部分章节的代码实现 9 | - sort 第十三章 排序 10 | - matrix 第十八章 矩阵运算 11 | -------------------------------------------------------------------------------- /example/mini-omp-demo/badloop.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | DESCRIPTION: 4 | Parallelizing an inner loop with dependences 5 | Backward dependency 6 | 7 | 8 | for (i=0; i 19 | #include 20 | #include 21 | 22 | #define NUM_THREADS 4 23 | 24 | #define VSIZE 100 25 | 26 | void main() 27 | { 28 | int V[ VSIZE+1 ],i,U[VSIZE+1]; 29 | for (i=0; i 2 | main() 3 | { 4 | int x; 5 | int i,id; 6 | x = 0; 7 | #pragma omp parallel shared(x) private(i,id) 8 | { 9 | // #pragma omp critical 10 | { 11 | id = omp_get_thread_num(); 12 | printf("before thread %d : X = %d\n",id,x); 13 | for(i=0;i<3000000;i++) x = x + 1; 14 | printf("after thread %d : X = %d\n",id,x); 15 | 16 | } 17 | } /* end of parallel section */ 18 | 19 | printf("out of the parallel region : X = %d\n",x); 20 | } 21 | 22 | 23 | -------------------------------------------------------------------------------- /example/mini-omp-demo/critical.c: -------------------------------------------------------------------------------- 1 | #include 2 | main() 3 | { 4 | int x; 5 | x = 0; 6 | #pragma omp parallel shared(x) 7 | { 8 | #pragma omp critical 9 | x = x + 1; 10 | } /* end of parallel section */ 11 | 12 | printf("out of the parallel region : X = %d\n",x); 13 | } 14 | 15 | 16 | -------------------------------------------------------------------------------- /example/mini-omp-demo/dis-err.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | 6 | for(i=4;i<100;i++){ 7 | a[i] = b[i-2] + 1; 8 | c[i] = b[i-1] + f[i]; 9 | b[i] = a[i-1] + 2; 10 | d[i] = d[i+1] + b[i-1]; 11 | } 12 | 13 | */ 14 | 15 | #define Iter 100000 16 | 17 | main() 18 | { 19 | int i; 20 | 21 | int a[Iter],b[Iter],c[Iter],d[Iter],f[Iter]; 22 | int a1[Iter],b1[Iter],c1[Iter],d1[Iter],f1[Iter]; 23 | 24 | 25 | for(i=0;i 2 | #include 3 | 4 | /* 5 | 6 | for(i=4;i<100;i++){ 7 | S1: a[i] = b[i-2] + 1; 8 | S2: c[i] = b[i-1] + f[i]; 9 | S3: b[i] = a[i-1] + 2; 10 | S4: d[i] = d[i+1] + b[i-1]; 11 | } 12 | 13 | */ 14 | 15 | #define Iter 10000 16 | 17 | main() 18 | { 19 | int i; 20 | int a[Iter],b[Iter],c[Iter],d[Iter],f[Iter]; 21 | int a1[Iter],b1[Iter],c1[Iter],d1[Iter],f1[Iter]; 22 | 23 | int old_d[Iter]; // duplicating array-d to avoid 24 | // anti-dependency 25 | 26 | for(i=0;i 2 | #include 3 | 4 | /* 5 | 6 | for(i=4;i<100;i++){ 7 | S1: a[i] = b[i-2] + 1; 8 | S2: c[i] = b[i-1] + f[i]; 9 | S3: b[i] = a[i-1] + 2; 10 | S4: d[i] = d[i+1] + b[i-1]; 11 | } 12 | 13 | */ 14 | 15 | #define Iter 100000 16 | 17 | main() 18 | { 19 | int i; 20 | int a[Iter],b[Iter],c[Iter],d[Iter],f[Iter]; 21 | int a1[Iter],b1[Iter],c1[Iter],d1[Iter],f1[Iter]; 22 | 23 | int old_d[Iter]; // duplicating array-d to avoid 24 | // anti-dependency 25 | 26 | for(i=0;i 20 | #include 21 | #include 22 | 23 | 24 | #define VSIZE 40000 25 | 26 | /* PROTOYPES */ 27 | 28 | /* MAIN: PROCESS PARAMETERS */ 29 | int main(int argc, char *argv[]) { 30 | 31 | 32 | int V[VSIZE],oldV[VSIZE],U[VSIZE]; 33 | 34 | 35 | 36 | int i; 37 | 38 | 39 | for (i=0; i 20 | #include 21 | #include 22 | 23 | 24 | #define VSIZE 40000 25 | 26 | #define THREADS_NUM 4 27 | 28 | /* PROTOYPES */ 29 | 30 | /* MAIN: PROCESS PARAMETERS */ 31 | void main() { 32 | 33 | 34 | int V[VSIZE],U[VSIZE]; 35 | 36 | int border,size; 37 | int LimitL, LimitR; 38 | int i,id; 39 | 40 | 41 | omp_set_num_threads(THREADS_NUM); 42 | 43 | size = VSIZE / THREADS_NUM ; 44 | 45 | for (i=0; i 2 | main() 3 | { 4 | int x; 5 | x = 0; 6 | #pragma omp parallel shared(x) 7 | { 8 | #pragma omp master 9 | x = x + 10; 10 | #pragma omp critical 11 | x = x + 1; 12 | } /* end of parallel section */ 13 | 14 | printf("out of the parallel region : X = %d\n",x); 15 | } 16 | 17 | 18 | -------------------------------------------------------------------------------- /example/mini-omp-demo/pfor-no-schedule.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define N 100000 3 | int main () 4 | { 5 | int i, chunk,id; 6 | float a[N], b[N], c[N]; 7 | /* Some initializations */ 8 | for (i=0; i < N; i++) 9 | a[i] = b[i] = i * 1.0; 10 | 11 | #pragma omp parallel for shared(a,b,c) private(i,chunk,id) 12 | for (i=0; i < N; i++) 13 | { 14 | 15 | c[i] = a[i] + b[i]; 16 | 17 | chunk = N / omp_get_num_threads(); 18 | id = omp_get_thread_num(); 19 | 20 | if ( (i%chunk)==0 ) printf("Iteration #%d in thread #%d\n",i, id); 21 | 22 | } 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /example/mini-omp-demo/pfor.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define N 100000 3 | #define CHUNKSIZE 1000 4 | int main () { 5 | int i, chunk; 6 | float a[N], b[N], c[N]; 7 | /* Some initializations */ 8 | for (i=0; i < N; i++) 9 | a[i] = b[i] = i * 1.0; 10 | chunk = CHUNKSIZE; 11 | #pragma omp parallel for \ 12 | shared(a,b,c,chunk) private(i) \ 13 | schedule(static,chunk) 14 | for (i=0; i < N; i++) 15 | { 16 | int id; 17 | 18 | c[i] = a[i] + b[i]; 19 | 20 | id = omp_get_thread_num(); 21 | if ( (i % chunk) == 0 ) printf("Iteration #%d in thread #%d\n",i, id); 22 | 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /example/mini-omp-demo/pi01.c: -------------------------------------------------------------------------------- 1 | #include 2 | static long num_steps = 100000; 3 | double step; 4 | #define NUM_THREADS 2 5 | void main () 6 | { 7 | int i; 8 | double x, pi, sum[NUM_THREADS]; 9 | step = 1.0/(double) num_steps; 10 | omp_set_num_threads(NUM_THREADS); 11 | #pragma omp parallel private(i) 12 | { 13 | double x; 14 | int id; 15 | id = omp_get_thread_num(); 16 | sum[id] = 0; 17 | #pragma omp for 18 | for (i=0;i< num_steps; i++){ 19 | x = (i+0.5)*step; 20 | sum[id] += 4.0/(1.0+x*x); 21 | } 22 | } 23 | for(i=0, pi=0.0;i 2 | static long num_steps = 100000; 3 | double step; 4 | #define NUM_THREADS 4 5 | void main () 6 | { int i; 7 | double x, pi, sum[NUM_THREADS]; 8 | step = 1.0/(double) num_steps; 9 | omp_set_num_threads(NUM_THREADS); 10 | #pragma omp parallel 11 | { 12 | double x; 13 | int id; 14 | id = omp_get_thread_num(); 15 | for (i=id, sum[id]=0.0;i< num_steps; i=i+NUM_THREADS){ 16 | x = (i+0.5)*step; 17 | sum[id] += 4.0/(1.0+x*x); 18 | } 19 | } 20 | for(i=0, pi=0.0;i 2 | static long num_steps = 100000; 3 | double step; 4 | #define NUM_THREADS 8 5 | void main () 6 | { 7 | int i,id; 8 | double x, sum, pi=0.0; 9 | step = 1.0/(double) num_steps; 10 | omp_set_num_threads(NUM_THREADS); 11 | #pragma omp parallel private (x,i,sum) 12 | { 13 | id = omp_get_thread_num(); 14 | for (i=id,sum=0.0;i< num_steps;i=i+NUM_THREADS){ 15 | x = (i+0.5)*step; 16 | sum += 4.0/(1.0+x*x); 17 | } 18 | #pragma omp critical 19 | pi += sum*step; 20 | 21 | // #pragma omp barrier 22 | 23 | // #pragma omp master 24 | // printf("Pi = %lf\n",pi); 25 | } 26 | printf("Pi = %lf\n",pi); 27 | 28 | } 29 | -------------------------------------------------------------------------------- /example/mini-omp-demo/pi04.c: -------------------------------------------------------------------------------- 1 | #include 2 | static long num_steps = 100000; 3 | double step; 4 | #define NUM_THREADS 8 5 | void main () 6 | { int i; 7 | double x, pi, sum = 0.0; 8 | step = 1.0/(double) num_steps; 9 | omp_set_num_threads(NUM_THREADS); 10 | #pragma omp parallel for reduction(+:sum) private(x) 11 | for (i=0;i 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | double omp_time() { 25 | static int sec = -1; 26 | struct timeval tv; 27 | gettimeofday(&tv, (void *)0); 28 | if (sec < 0) sec = tv.tv_sec; 29 | return (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec; 30 | } 31 | 32 | 33 | void loop(int, int, int); 34 | 35 | 36 | 37 | int main(int argc, char *argv[]) { 38 | 39 | int nthreads, size, numiter; 40 | 41 | 42 | double start,finish; 43 | 44 | nthreads = omp_get_max_threads(); 45 | 46 | size = atoi(argv[1]); 47 | numiter = atoi(argv[2]); 48 | 49 | start = omp_time(); 50 | loop(nthreads, size, numiter); 51 | finish = omp_time(); 52 | 53 | printf(" %d Threads of %d iterations with %d elements = %f (sec)\n", 54 | nthreads, numiter, size, finish-start); 55 | 56 | return 0; 57 | } 58 | 59 | 60 | #define f(x,y) ((x+y)/2.0) 61 | 62 | /* 63 | * 64 | * PARALLEL LOOP 65 | * 66 | */ 67 | 68 | void loop(int nthreads, int size, int numiter) { 69 | /* VARIABLES */ 70 | int i,iter; 71 | 72 | int thread; 73 | int limitL, limitR; 74 | 75 | int *leftLimit; 76 | int *rightLimit; 77 | 78 | 79 | 80 | /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */ 81 | double *V=NULL; 82 | double border; 83 | 84 | int blockSize = size/nthreads; 85 | 86 | V = (double *)malloc(size*sizeof(double)); 87 | 88 | leftLimit = (int*)malloc(nthreads*sizeof(int)); 89 | rightLimit = (int*)malloc(nthreads*sizeof(int)); 90 | 91 | 92 | for(i = 0; i< nthreads; i++){ 93 | leftLimit [i] = i * blockSize; 94 | rightLimit[i] = (i+1)*blockSize - 1; 95 | } 96 | 97 | if( size % nthreads ){ 98 | rightLimit[nthreads-1] = size - 1; 99 | } 100 | 101 | 102 | // Initialization of array V 103 | 104 | for (i=0; i(iter-numiter) ) { 129 | 130 | /* COMPUTE FIRST ELEMENT (EXCEPT THREAD 0) */ 131 | 132 | if (thread != 0) 133 | V[limitL] = f( V[limitL], border ); 134 | 135 | /* COMPUTE THE REST OF ELEMENTS */ 136 | 137 | for (i=limitL+1; i<=limitR; i++) { 138 | V[i] = f( V[i], V[i-1] ); 139 | } 140 | } 141 | 142 | /* SYNCHRONIZE BEFORE COPYING UPDATED BORDER ELEMENT */ 143 | #pragma omp barrier 144 | 145 | } // end-of-for 146 | 147 | } // end-of-parallel-region 148 | 149 | } // end-of-main 150 | -------------------------------------------------------------------------------- /example/mini-omp-demo/private.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int alpha[10],beta[10],i; 4 | #pragma omp threadprivate(alpha) 5 | 6 | main() 7 | { 8 | /* first parallel region */ 9 | #pragma omp parallel private(i,beta) 10 | { int id ; 11 | id = omp_get_thread_num(); 12 | 13 | for(i=0;i<10;i++) 14 | alpha[i] = beta[i] = id * i; 15 | } 16 | /* second parallel region */ 17 | #pragma omp parallel 18 | printf("I am thread %d :alpha[3] = %d and beta[3] = %d\n", omp_get_thread_num(),alpha[3],beta[3]); 19 | 20 | } 21 | -------------------------------------------------------------------------------- /example/mini-omp-demo/reduction.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main () 3 | { 4 | int i, n, chunk; 5 | float a[100], b[100], result; 6 | /* Some initializations */ 7 | n = 100; 8 | chunk = 10; 9 | result = 0.0; 10 | for (i=0; i < n; i++) 11 | { 12 | a[i] = i * 1.0; 13 | b[i] = i * 2.0; 14 | } 15 | #pragma omp parallel for default(shared) private(i) 16 | for (i=0; i < n; i++) 17 | result = result + (a[i] * b[i]); 18 | printf("Final result= %f\n",result); 19 | } 20 | 21 | -------------------------------------------------------------------------------- /example/mini-omp-demo/section.c: -------------------------------------------------------------------------------- 1 | #include 2 | #define N 1000 3 | int main (){ 4 | int i,id; 5 | float a[N], b[N], c[N]; 6 | /* Some initializations */ 7 | for (i=0; i < N; i++) 8 | a[i] = b[i] = i * 1.0; 9 | #pragma omp parallel shared(a,b,c) private(i,id) 10 | { 11 | #pragma omp sections nowait 12 | { 13 | #pragma omp section 14 | id = omp_get_thread_num(); 15 | printf("working in Thread %d\n",id); 16 | for (i=0; i < N/2; i++) 17 | c[i] = a[i] + b[i]; 18 | #pragma omp section 19 | id = omp_get_thread_num(); 20 | printf("working in Thread %d\n",id); 21 | for (i=N/2; i < N; i++) 22 | c[i] = a[i] + b[i]; 23 | } /* end of sections */ 24 | } /* end of parallel section */ 25 | } 26 | -------------------------------------------------------------------------------- /example/mini-omp-demo/single.c: -------------------------------------------------------------------------------- 1 | #include 2 | main() 3 | { 4 | int x; 5 | x = 0; 6 | #pragma omp parallel shared(x) 7 | { 8 | #pragma omp single 9 | { 10 | int id = omp_get_thread_num(); 11 | printf("I am thread #%d\n",id); 12 | x = x + 1; 13 | } 14 | } /* end of parallel section */ 15 | 16 | printf("out of the parallel region : X = %d\n",x); 17 | } 18 | 19 | 20 | -------------------------------------------------------------------------------- /example/mini-omp-demo/threadprivate.c: -------------------------------------------------------------------------------- 1 | #include 2 | int alpha[10], beta[10], i; 3 | #pragma omp threadprivate(alpha) 4 | int main () 5 | { 6 | /* First parallel region */ 7 | #pragma omp parallel private(i,beta) 8 | for (i=0; i < 10; i++) 9 | alpha[i] = beta[i] = i; 10 | /* Second parallel region */ 11 | #pragma omp parallel 12 | printf("alpha[3]= %d and beta[3]=%d\n",alpha[3],beta[3]); 13 | } 14 | -------------------------------------------------------------------------------- /example/mpi/bigdata_sort.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #define length 20 7 | 8 | void swap(int *data, int i, int j) { 9 | int temp = data[i]; 10 | data[i] = data[j]; 11 | data[j] = temp; 12 | } 13 | 14 | int partition(int *data, int start, int end) { 15 | if (start >= end) return 0; 16 | int pivotValue = data[start]; 17 | int low = start; 18 | int high = end - 1; 19 | while (low < high) { 20 | while (data[low] <= pivotValue && low < end) low++; 21 | while (data[high] > pivotValue && high > start) high--; 22 | if (low < high) swap(data, low, high); 23 | } 24 | swap(data, start, high); 25 | return high; 26 | } 27 | 28 | void quicksort(int *data, int start, int end) { 29 | // why not end-start < 1 30 | if (end-start+1 < 2) return; 31 | 32 | int pivot = partition(data, start, end); 33 | quicksort(data, start, pivot); 34 | quicksort(data, pivot+1, end); 35 | } 36 | 37 | int main(int argc, char *argv[]) { 38 | MPI_Init(&argc, &argv); 39 | int rank, size; 40 | MPI_Comm_rank (MPI_COMM_WORLD, &rank); 41 | MPI_Comm_size (MPI_COMM_WORLD, &size); 42 | 43 | // 随机生成data 数组 44 | srand(time(0)); 45 | int *data = (int*)malloc(sizeof(int)*length); 46 | printf("data = %p, rank = %d\n", data, rank); 47 | 48 | int i; 49 | for (i=0; i 1; s /= 2) { 81 | if (rank % s == 0) { 82 | pivot = partition(data, 0, localDataSize); 83 | MPI_Send(data+pivot, localDataSize - pivot,MPI_INT, rank + s/2, 0, MPI_COMM_WORLD); 84 | localDataSize = pivot; 85 | } 86 | else if (rank % s == s/2) { 87 | MPI_Recv(data, length, MPI_INT, rank - s/2, 88 | MPI_ANY_TAG, MPI_COMM_WORLD, &status); 89 | MPI_Get_count(&status, MPI_INT, 90 | &localDataSize); 91 | } 92 | } 93 | 94 | quicksort(data, 0, localDataSize); 95 | gettimeofday(&end, 0); 96 | 97 | MPI_Barrier(MPI_COMM_WORLD); 98 | if (rank == 0) 99 | { 100 | float time = (end.tv_sec - start.tv_sec) + 101 | 0.000001*(end.tv_usec - start.tv_usec); 102 | for(int i = 0;i < length;i++) 103 | { 104 | printf("%d, ", data[i]); 105 | } 106 | printf("Time: %f s\n", time); 107 | } 108 | MPI_Finalize(); 109 | return 0; 110 | } -------------------------------------------------------------------------------- /example/mpi/matmul.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | MPI_Init(&argc, &argv); 9 | 10 | int num_procs; 11 | int rank; 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | 15 | int i; 16 | int vec; 17 | int *mat = (int *)malloc(sizeof(int)*num_procs); 18 | int *ret = (int *)malloc(sizeof(int)*num_procs); 19 | int *ans = (int *)malloc(sizeof(int)*num_procs); 20 | srand(time(0)); 21 | for(i = 0;i < num_procs;i++) 22 | { 23 | mat[i] = rand() % 10; 24 | } 25 | vec = rand() % 10; 26 | 27 | for(i = 0;i < num_procs;i++) 28 | { 29 | printf("%d: %d\n", rank, mat[i]); 30 | printf("%d: vec = %d\n", rank, vec); 31 | } 32 | for(i = 0;i < num_procs;i++) 33 | { 34 | ans[i] = vec * mat[i]; 35 | } 36 | 37 | MPI_Reduce(ans, ret, num_procs, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); 38 | 39 | if(rank == 0) 40 | { 41 | for(i = 0;i < num_procs;i++) 42 | printf("%d, ", ret[i]); 43 | } 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /example/mpi/matmul1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | const int rows = 40; //the rows of matrix 6 | const int cols = 100; //the cols of matrix 7 | 8 | int main(int argc, char* argv[]) 9 | { 10 | int i, j, k, myid, numprocs, anstag; 11 | int A[rows][cols], B[cols], C[rows]; 12 | int masterpro,buf[cols], ans,cnt; 13 | double starttime,endtime; 14 | double tmp,totaltime; 15 | 16 | MPI_Status status; 17 | masterpro = 0; 18 | MPI_Init(&argc, &argv); 19 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 20 | MPI_Comm_size(MPI_COMM_WORLD, &numprocs); 21 | for(cnt = 0; cnt < 100000; cnt++){ 22 | if(numprocs < 2){ 23 | printf("Error:Too few processes!\n"); 24 | MPI_Abort(MPI_COMM_WORLD,99); 25 | } 26 | if(myid == masterpro){ 27 | starttime = MPI_Wtime(); 28 | for (i = 0; i < cols; i++) 29 | { 30 | B[i] = rand()%10; 31 | for (j = 0; j < rows; j++) 32 | { 33 | A[j][i] = rand()%10; 34 | } 35 | } 36 | //bcast the B vector to all slave processor 37 | MPI_Bcast(B, cols, MPI_INT, masterpro, MPI_COMM_WORLD); 38 | //partition the A matrix to all slave processor 39 | for (i = 1; i < numprocs; i++) 40 | { 41 | for (k = i - 1; k < rows; k += numprocs - 1) 42 | { 43 | for (j = 0; j < cols; j++) 44 | { 45 | buf[j] = A[k][j]; 46 | } 47 | MPI_Send(buf, cols, MPI_INT, i, k, MPI_COMM_WORLD); 48 | } 49 | } 50 | } 51 | else{ 52 | //starttime = MPI_Wtime(); 53 | MPI_Bcast(B, cols, MPI_INT, masterpro, MPI_COMM_WORLD); 54 | //every processor receive the part of A matrix,and make Mul operator with B vector 55 | for ( i = myid - 1; i < rows; i += numprocs - 1){ 56 | MPI_Recv(buf, cols, MPI_INT, masterpro, i, MPI_COMM_WORLD, &status); 57 | ans = 0; 58 | 59 | for ( j = 0; j < cols; j++) 60 | { 61 | ans += buf[j] * B[j]; 62 | } 63 | //send back the result 64 | MPI_Send(&ans, 1, MPI_INT, masterpro, i, MPI_COMM_WORLD); 65 | } 66 | //endtime = MPI_Wtime(); 67 | //tmp = endtime-starttime; 68 | } 69 | if(myid == masterpro){ 70 | //receive the result from all slave processor 71 | for ( i = 0; i < rows; i++) 72 | { 73 | MPI_Recv(&ans, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); 74 | //sender = status.MPI_SOURCE; 75 | anstag = status.MPI_TAG; 76 | C[anstag] = ans; 77 | } 78 | //print the result 79 | 80 | for (i = 0; i < rows; i++) 81 | { 82 | printf("%d ",C[i]); 83 | if((i+1)%20 == 0) 84 | printf("\n"); 85 | } 86 | 87 | } 88 | } 89 | endtime = MPI_Wtime(); 90 | totaltime = endtime-starttime; 91 | //printf("cost time:%f s.\n",tmp); 92 | //MPI_Reduce(&tmp,&totaltime,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); 93 | if(myid == masterpro) 94 | printf("total time:%f s.\n",totaltime); 95 | MPI_Finalize(); 96 | return 0; 97 | } -------------------------------------------------------------------------------- /example/mpi/max.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int compare(int m, int n) 5 | { 6 | if (m >= n) return m; 7 | else return n; 8 | } 9 | 10 | int main(int argc, char *argv[]) 11 | { 12 | MPI_Init(&argc, &argv); 13 | int m = 2000; 14 | int n = 2400; 15 | printf("%d\n", compare(m, n)); 16 | MPI_Finalize(); 17 | } 18 | -------------------------------------------------------------------------------- /example/mpi/mul.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | /* code */ 7 | MPI_Init(&argc, &argv); 8 | int m = 233; 9 | int n = 341; 10 | printf("%d\n", m*n); 11 | MPI_Finalize(); 12 | } 13 | -------------------------------------------------------------------------------- /example/mpi/pi.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]){ 6 | int my_rank, num_procs; 7 | int i, n = 0; 8 | double sum, width, local, mypi, pi; 9 | double start = 0.0, stop = 0.0; 10 | int proc_len; 11 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 12 | 13 | MPI_Init(&argc, &argv); 14 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 16 | MPI_Get_processor_name(processor_name, &proc_len); 17 | printf("Process %d of %d\n", my_rank, num_procs); 18 | if(my_rank == 0){ 19 | printf("please give step number n:"); 20 | n = 2000; 21 | printf("\n"); 22 | start = MPI_Wtime(); 23 | } 24 | // printf("Process %d of %d\n", my_rank, num_procs); 25 | 26 | MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); 27 | width = 1.0 / n; 28 | sum = 0.0; 29 | for(i = my_rank; i < n; i += num_procs){ 30 | local = width * ((double)i + 0.5); 31 | sum += 4.0 / (1.0 + local * local); 32 | } 33 | mypi = width * sum; 34 | MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, 35 | MPI_COMM_WORLD); 36 | if(my_rank == 0){ 37 | printf("PI is %.20f\n", pi); 38 | stop = MPI_Wtime(); 39 | printf("Time: %f on %s\n", stop-start, processor_name); 40 | fflush(stdout); 41 | } 42 | MPI_Finalize(); 43 | return 0; 44 | } -------------------------------------------------------------------------------- /example/mpi/quicksort.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int cmp(const void *a,const void *b) 7 | { 8 | return *(int*)a-*(int*)b; 9 | } 10 | 11 | 12 | void swap(int *a,int *b) 13 | { 14 | int tmp=*a; 15 | *a=*b; 16 | *b=tmp; 17 | } 18 | 19 | 20 | int partition(int *buf,int n) 21 | { 22 | if(n == 0) return 0; 23 | --n; 24 | int pri = buf[n]; // privot buf[n-1] 25 | int i = -1; 26 | int j = 0; 27 | while(j < n) 28 | { 29 | if(buf[j] >= pri) 30 | { 31 | ++j; 32 | continue; 33 | } 34 | ++i; 35 | swap(&buf[i],&buf[j]); 36 | ++j; 37 | } 38 | ++i; 39 | swap(&buf[i],&buf[n]); 40 | return i; 41 | } 42 | 43 | 44 | int main(int argc,char **argv) 45 | { 46 | MPI_Init(&argc,&argv); 47 | int rank,size; 48 | MPI_Comm_rank(MPI_COMM_WORLD,&rank); 49 | MPI_Comm_size(MPI_COMM_WORLD,&size); 50 | 51 | int n = 10; 52 | int i; 53 | 54 | int *buf = (int*)malloc(n*sizeof(int)); 55 | if(buf == NULL) 56 | { 57 | printf("%d malloc failed\n"); 58 | exit(1); 59 | } 60 | 61 | int *ofss=NULL; 62 | int *counts=NULL; 63 | int *retbuf=NULL; 64 | 65 | if(rank == 0) 66 | { 67 | // 打印未排序的数组 68 | int *var = (int*)malloc(n*sizeof(int)); 69 | srand((int)time(0)); 70 | for(i = 0;i < n; ++i) 71 | { 72 | buf[i] = (int)rand() % n; 73 | var[i] = buf[i]; 74 | printf("%d\t",buf[i]); 75 | } 76 | printf("\n"); 77 | 78 | qsort(var,n,sizeof(int),cmp); 79 | free(var); 80 | 81 | ofss = (int*)malloc(size*sizeof(int)); 82 | counts = (int*)malloc(size*sizeof(int)); 83 | retbuf = (int*)malloc(n*sizeof(int)); 84 | 85 | if(ofss==NULL || counts==NULL || retbuf==NULL) 86 | { 87 | printf("malloc failed\n"); 88 | exit(2); 89 | } 90 | } 91 | 92 | int mod=1; 93 | int sz=1; 94 | int block=n; 95 | int pos=0; 96 | MPI_Status st; 97 | 98 | while(1) 99 | { 100 | if(rank >= sz) 101 | { 102 | sz <<= 1; 103 | if(sz > size) sz = size; 104 | mod <<= 1; 105 | continue; 106 | } 107 | if(rank != 0 && rank-(mod>>1)>=0) 108 | { 109 | MPI_Recv(&pos, 1, MPI_INT, rank-(mod>>1), 1, MPI_COMM_WORLD, &st); 110 | MPI_Recv(&block, 1, MPI_INT, rank-(mod>>1), 2, MPI_COMM_WORLD, &st); 111 | MPI_Recv(buf+pos, block, MPI_INT, rank-(mod>>1), 3, MPI_COMM_WORLD, &st); 112 | } 113 | if(rank+mod >= size) 114 | { 115 | qsort(buf+pos,block,sizeof(int),cmp); 116 | break; 117 | } 118 | 119 | int p = partition(buf+pos,block)+pos; 120 | int tb=block-p+pos; 121 | 122 | MPI_Send(&p ,1, MPI_INT, rank+mod,1,MPI_COMM_WORLD); 123 | MPI_Send(&tb, 1, MPI_INT, rank+mod,2,MPI_COMM_WORLD); 124 | MPI_Send(buf+p, tb, MPI_INT, rank+mod,3,MPI_COMM_WORLD); 125 | 126 | block=p-pos; 127 | sz<<=1; 128 | if(sz>size) sz=size; 129 | mod<<=1; 130 | } 131 | MPI_Gather(&block, 1, MPI_INT, counts, 1, MPI_INT,0,MPI_COMM_WORLD); 132 | MPI_Gather(&pos, 1, MPI_INT, ofss,1,MPI_INT,0,MPI_COMM_WORLD); 133 | MPI_Gatherv(buf+pos,block,MPI_INT,retbuf,counts,ofss,MPI_INT,0,MPI_COMM_WORLD); 134 | 135 | if(rank==0) 136 | { 137 | for(i=0;i 2 | #include 3 | #include 4 | 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | MPI_Init(&argc, &argv); 9 | int size; 10 | int rank; 11 | MPI_Comm_size(MPI_COMM_WORLD, &size); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | if(size==1){ 14 | printf("Too less of process.\n"); 15 | MPI_Abort(MPI_COMM_WORLD, MPI_ERR_COUNT); 16 | } 17 | char buf[] = "hello world\n"; 18 | MPI_Status status; 19 | if(rank!=0){ 20 | MPI_Send(buf, 13, MPI_CHAR, 0, 0, MPI_COMM_WORLD); 21 | } 22 | else{ 23 | char *ans = (char *)malloc(13*sizeof(char)); 24 | MPI_Recv(ans, 13, MPI_CHAR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 25 | printf("ans is %s\n", ans); 26 | } 27 | 28 | MPI_Finalize(); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /example/mpi/sort.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | int main(int argc, char **argv) 4 | { 5 | MPI_Init(&argc, &argv); 6 | 7 | int i,j,t,a[10]={56,98,76,69,88,43,29,74,58,66}; 8 | 9 | for(i=0;i<9;i++) 10 | for(j=0;j<9-i;j++) 11 | if(a[j]>a[j+1]) 12 | {t=a[j];a[j]=a[j+1];a[j+1]=t;} 13 | 14 | for(i=0;i<10;i++) 15 | printf("%d ",a[i]); 16 | printf("\n"); 17 | MPI_Finalize(); 18 | return 0; 19 | } -------------------------------------------------------------------------------- /example/mpi/sort_.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define length 1000000 6 | 7 | 8 | void swap(int *data, int i, int j) { 9 | int temp = data[i]; 10 | data[i] = data[j]; 11 | data[j] = temp; 12 | } 13 | 14 | int partition(int *data, int start, int end) { 15 | if (start >= end) return 0; 16 | int pivotValue = data[start]; 17 | int low = start; 18 | int high = end - 1; 19 | while (low < high) { 20 | while (data[low] <= pivotValue && low < end) low++; 21 | while (data[high] > pivotValue && high > start) high--; 22 | if (low < high) swap(data, low, high); 23 | } 24 | swap(data, start, high); 25 | return high; 26 | } 27 | 28 | void quicksort(int *data, int start, int end) { 29 | // why not end-start < 1 30 | if (end-start+1 < 2) return; 31 | 32 | int pivot = partition(data, start, end); 33 | quicksort(data, start, pivot); 34 | quicksort(data, pivot+1, end); 35 | } 36 | 37 | 38 | int main(int argc, char const *argv[]) 39 | { 40 | srand(time(0)); 41 | int *data = (int*)malloc(sizeof(int)*length); 42 | int i; 43 | for (i=0; i 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double a[10],b[10]; 11 | 12 | double sum,c; 13 | 14 | 15 | MPI_Status status; 16 | 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | 22 | //int MPI_Allreduce( 23 | // void* sendbuf, 24 | // void* recvbuf, 25 | // int count , 26 | // MPI_Datatype datatype, 27 | // MPI_Op op, 28 | // MPI_Comm comm 29 | // ) 30 | 31 | 32 | for(i=0;i<10;i++) 33 | a[i] = b[i] = (double)(rank*10 + i); 34 | 35 | sum = 0.0; 36 | 37 | for(i=0;i<10;i++) 38 | sum += a[i] * b[i]; 39 | 40 | MPI_Barrier(MPI_COMM_WORLD); 41 | printf("Proc# %d got sub-sum : %.1f\n",rank,sum); 42 | MPI_Barrier(MPI_COMM_WORLD); 43 | 44 | MPI_Allreduce( &sum, &c, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 45 | 46 | MPI_Barrier(MPI_COMM_WORLD); 47 | 48 | printf("Proc#%d got the final sum %.1f\n", rank,c); 49 | 50 | MPI_Barrier(MPI_COMM_WORLD); 51 | 52 | MPI_Finalize(); /*quit from MPI world*/ 53 | return (0); 54 | } 55 | -------------------------------------------------------------------------------- /example/mpi_demo/allreduce-vector-sum.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double a[10]; 11 | 12 | double sum[10]; 13 | 14 | 15 | MPI_Status status; 16 | 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | //A routine that computes the dot product of two vectors that are distributed 22 | //across a group of processes and returns the answer at node zero. 23 | 24 | //int MPI_Allreduce( 25 | // void* sendbuf, 26 | // void* recvbuf, 27 | // int count , 28 | // MPI_Datatype datatype, 29 | // MPI_Op op, 30 | // MPI_Comm comm 31 | // ) 32 | 33 | 34 | for(i=0;i<10;i++) 35 | a[i] = (double)(rank*10 + i); 36 | 37 | MPI_Barrier(MPI_COMM_WORLD); 38 | 39 | printf("Proc#%d :",rank); 40 | for(i=0;i<10;i++) printf("%5.1f ", a[i]); 41 | printf("\n"); 42 | 43 | MPI_Barrier(MPI_COMM_WORLD); 44 | 45 | for(i=0;i<10;i++) 46 | sum[i] = 0.0; 47 | 48 | MPI_Allreduce( a, sum, 10, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 49 | 50 | MPI_Barrier(MPI_COMM_WORLD); 51 | printf("Proc# %d: ",rank); 52 | for(i=0;i<10;i++) printf("%5.1f ", sum[i]); 53 | printf("\n"); 54 | MPI_Barrier(MPI_COMM_WORLD); 55 | 56 | MPI_Finalize(); /*quit from MPI world*/ 57 | return (0); 58 | } 59 | -------------------------------------------------------------------------------- /example/mpi_demo/alltoall: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/alltoall -------------------------------------------------------------------------------- /example/mpi_demo/alltoall.c: -------------------------------------------------------------------------------- 1 | #include "mpi.h" 2 | 3 | int main( int argc, char* argv[] ){ 4 | int i; 5 | int rank, nproc; 6 | int isend[32], irecv[32]; 7 | 8 | MPI_Init( &argc, &argv ); 9 | MPI_Comm_size( MPI_COMM_WORLD, &nproc ); 10 | MPI_Comm_rank( MPI_COMM_WORLD, &rank ); 11 | 12 | printf("Before : My rank = %d <",rank ); 13 | for(i=0; i\n"); 17 | 18 | MPI_Alltoall(isend, 1, MPI_INT, irecv, 1, MPI_INT, 19 | MPI_COMM_WORLD); 20 | printf("After : My rank = %d <", rank); 21 | for(i=0; i\n"); 24 | 25 | MPI_Finalize(); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /example/mpi_demo/alltoallv.c: -------------------------------------------------------------------------------- 1 | #include "mpi.h" 2 | int main( int argc, char* argv[] ){ 3 | int i; 4 | int rank, nproc; 5 | int isend[6] = {1,2,2,3,3,3}, irecv[9]; 6 | int iscnt[3] = {1,2,3}, isdsp[3] = {0,1,3}, ircnt[3], irdsp[3]; 7 | 8 | MPI_Init( &argc, &argv ); 9 | MPI_Comm_size( MPI_COMM_WORLD, &nproc ); 10 | MPI_Comm_rank( MPI_COMM_WORLD, &rank ); 11 | 12 | for(i=0; i<6; i++) 13 | isend[i] = isend[i] + nproc * rank; 14 | for(i=0; i 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int myrank,size; 9 | 10 | double f; 11 | int position, i; 12 | int a[2]; 13 | char buff[1000]; 14 | int j; 15 | 16 | MPI_Status status; 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | if (myrank == 0) 22 | { 23 | /* SENDER CODE */ 24 | position = 0; 25 | i = 100; j = 200; f = 1.0; 26 | MPI_Pack(&i, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); 27 | printf("pos = %d\n",position); 28 | MPI_Pack(&j, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); 29 | printf("pos = %d\n",position); 30 | MPI_Pack(&f, 1, MPI_DOUBLE, buff, 1000, &position, MPI_COMM_WORLD); 31 | printf("pos = %d\n",position); 32 | MPI_Send( buff, position, MPI_PACKED, 1, 0, MPI_COMM_WORLD); 33 | } 34 | else /* RECEIVER CODE */ 35 | if(myrank == 1){ 36 | //MPI_Recv( a, 2, MPI_INT, 0, 0, MPI_COMM_WORLD,&status); 37 | printf(" before Proc#1 received 2 int(s) from Proc#0 : %d %d\n",a[0],a[1]); 38 | 39 | MPI_Recv(buff, 1000,MPI_PACKED,0,0,MPI_COMM_WORLD,&status); 40 | position = 0; 41 | MPI_Unpack(buff,1000,&position,&a[0],1,MPI_INT,MPI_COMM_WORLD); 42 | MPI_Unpack(buff,1000,&position,&a[1],1,MPI_INT,MPI_COMM_WORLD); 43 | printf("Proc#1 received 2 int(s) from Proc#0 : %d %d\n",a[0],a[1]); 44 | } 45 | MPI_Finalize(); /*quit from MPI world*/ 46 | return (0); 47 | } 48 | -------------------------------------------------------------------------------- /example/mpi_demo/pack1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/pack1 -------------------------------------------------------------------------------- /example/mpi_demo/pack1.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | #include 6 | 7 | int main( int argc, char** argv ) 8 | { 9 | 10 | int myrank,size; 11 | 12 | double A[50][50]; 13 | 14 | void *TempBuffer; 15 | 16 | int i, Position, BufferSize; 17 | 18 | MPI_Status status; 19 | 20 | MPI_Init(&argc, &argv); /*initializing */ 21 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/ 22 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 23 | 24 | if (myrank == 0) 25 | { 26 | for(i=0;i<50;i++) A[i][i] = (double)i; 27 | 28 | //Apply memeory space for 50 double data 29 | MPI_Pack_size(50, MPI_DOUBLE, MPI_COMM_WORLD, &BufferSize); 30 | TempBuffer = malloc(BufferSize); 31 | 32 | Position = 0; 33 | for (i=0;i<50;i++) 34 | MPI_Pack(&A[i][i], 1, MPI_DOUBLE, 35 | TempBuffer, BufferSize, &Position,MPI_COMM_WORLD); 36 | printf("buffersize is %d\n",BufferSize); 37 | MPI_Send(TempBuffer, Position, MPI_PACKED, 1, 0, MPI_COMM_WORLD); 38 | } 39 | else /* RECEIVER CODE */ 40 | { 41 | MPI_Pack_size(50, MPI_DOUBLE, MPI_COMM_WORLD, &BufferSize); 42 | TempBuffer = malloc(BufferSize); 43 | MPI_Recv(TempBuffer, BufferSize, MPI_PACKED,0, 0, MPI_COMM_WORLD,&status); 44 | Position = 0; 45 | for(i=0;i<50;i++) 46 | MPI_Unpack(TempBuffer,BufferSize,&Position, 47 | &A[i][i], 1, MPI_DOUBLE, MPI_COMM_WORLD); 48 | 49 | for(i=0;i<5;i++)printf("Proc#1 received %3.1lf from Proc#0\n",A[i][i]); 50 | } 51 | MPI_Finalize(); /*quit from MPI world*/ 52 | return (0); 53 | } 54 | -------------------------------------------------------------------------------- /example/mpi_demo/reduce-int-sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/reduce-int-sum -------------------------------------------------------------------------------- /example/mpi_demo/reduce-int-sum.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double a[10],b[10]; 11 | 12 | double sum,c; 13 | 14 | 15 | MPI_Status status; 16 | 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | 22 | //int MPI_Reduce( 23 | // void* sendbuf, 24 | // void* recvbuf, 25 | // int count , 26 | // MPI_Datatype datatype, 27 | // MPI_Op op, 28 | // int root, 29 | // MPI_Comm comm 30 | // ) 31 | 32 | 33 | for(i=0;i<10;i++) 34 | a[i] = b[i] = (double)(rank*10 + i); 35 | 36 | sum = 0.0; 37 | 38 | for(i=0;i<10;i++) 39 | sum += a[i] * b[i]; 40 | 41 | MPI_Barrier(MPI_COMM_WORLD); 42 | printf("Proc# %d got sub-sum : %.1f\n",rank,sum); 43 | MPI_Barrier(MPI_COMM_WORLD); 44 | 45 | MPI_Reduce( &sum, &c, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 46 | 47 | if ( rank == 0 ) printf("Proc#0 got the final sum %.1f\n", c); 48 | 49 | MPI_Barrier(MPI_COMM_WORLD); 50 | 51 | MPI_Finalize(); /*quit from MPI world*/ 52 | return (0); 53 | } 54 | -------------------------------------------------------------------------------- /example/mpi_demo/reduce-max.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double a[10],b[10]; 11 | 12 | double sum,c; 13 | 14 | 15 | MPI_Status status; 16 | 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | 22 | //int MPI_Reduce( 23 | // void* sendbuf, 24 | // void* recvbuf, 25 | // int count , 26 | // MPI_Datatype datatype, 27 | // MPI_Op op, 28 | // int root, 29 | // MPI_Comm comm 30 | // ) 31 | 32 | 33 | sum = 0.0; 34 | 35 | srandom((unsigned int)(&sum)%100000); 36 | 37 | for(i=0;i<10;i++) 38 | sum += (double)(random()%100); 39 | 40 | MPI_Barrier(MPI_COMM_WORLD); 41 | 42 | printf("Prco#%d has sum : %.1f\n",rank, sum); 43 | 44 | MPI_Barrier(MPI_COMM_WORLD); 45 | 46 | MPI_Reduce( &sum, &c, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); 47 | 48 | if ( rank == 0 ) printf("Proc#0 got the final MAX of sum %.1f\n", c); 49 | 50 | MPI_Barrier(MPI_COMM_WORLD); 51 | 52 | MPI_Finalize(); /*quit from MPI world*/ 53 | return (0); 54 | } 55 | -------------------------------------------------------------------------------- /example/mpi_demo/reduce-maxloc.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double ain[30], aout[30]; 11 | int ind[30]; 12 | struct { 13 | double val; 14 | int rank; 15 | } in[30], out[30]; 16 | 17 | 18 | MPI_Status status; 19 | 20 | MPI_Init(&argc, &argv); /*initializing */ 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 22 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 23 | 24 | 25 | //int MPI_Reduce( 26 | // void* sendbuf, 27 | // void* recvbuf, 28 | // int count , 29 | // MPI_Datatype datatype, 30 | // MPI_Op op, 31 | // int root, 32 | // MPI_Comm comm 33 | // ) 34 | 35 | 36 | /* each process has an array of 30 double: ain[30] 37 | */ 38 | 39 | srandom((unsigned int)(&in[0])); 40 | 41 | for (i=0; i<30; ++i) { 42 | in[i].val = ain[i] = (double)(random()%1000); 43 | in[i].rank = rank; 44 | } 45 | MPI_Reduce( in, out, 30, MPI_DOUBLE_INT, MPI_MAXLOC, 0, MPI_COMM_WORLD); 46 | /* At this point, the answer resides on process root */ 47 | if (rank == 0 ) { 48 | /* read ranks out */ 49 | for (i=0; i<30; ++i) { 50 | aout[i] = out[i].val; 51 | ind[i] = out[i].rank; 52 | printf("aout[%d] = %f ind[%d] = %d\n",i,aout[i],i,ind[i]); 53 | } 54 | } 55 | 56 | MPI_Barrier(MPI_COMM_WORLD); 57 | 58 | MPI_Finalize(); /*quit from MPI world*/ 59 | return (0); 60 | } 61 | -------------------------------------------------------------------------------- /example/mpi_demo/reduce-minloc.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | 9 | #define LEN 1000 10 | 11 | int i, rank, size; 12 | int j; 13 | double ain[LEN]; 14 | 15 | struct { 16 | double val; 17 | int index; 18 | } in, out; 19 | 20 | 21 | MPI_Status status; 22 | 23 | MPI_Init(&argc, &argv); /*initializing */ 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 25 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 26 | 27 | //A routine that computes the dot product of two vectors that are distributed 28 | //across a group of processes and returns the answer at node zero. 29 | 30 | //int MPI_Reduce( 31 | // void* sendbuf, 32 | // void* recvbuf, 33 | // int count , 34 | // MPI_Datatype datatype, 35 | // MPI_Op op, 36 | // int root, 37 | // MPI_Comm comm 38 | // ) 39 | 40 | 41 | srandom(((unsigned int)(&in.val))*(rank+10)); 42 | 43 | for(i=0;i ain[i] ) { 52 | in.val = ain[i]; 53 | in.index = i; 54 | } 55 | } 56 | in.index = rank * LEN + in.index; 57 | 58 | MPI_Reduce( &in, &out, 1, MPI_DOUBLE_INT, MPI_MINLOC, 0, MPI_COMM_WORLD); 59 | /* At this point, the answer resides on process root */ 60 | if (rank == 0 ) { 61 | int minrank, minindex; 62 | /* read ranks out */ 63 | minrank = out.index / LEN; 64 | minindex = out.index % LEN; 65 | printf("Proc#%d has the Minimum of ain[%d] = %f \n", 66 | minrank,minindex,out.val); 67 | } 68 | 69 | MPI_Barrier(MPI_COMM_WORLD); 70 | 71 | MPI_Finalize(); /*quit from MPI world*/ 72 | return (0); 73 | } 74 | -------------------------------------------------------------------------------- /example/mpi_demo/reduce-user-complex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/reduce-user-complex -------------------------------------------------------------------------------- /example/mpi_demo/reduce-user-complex.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | typedef struct { 7 | double real,imag; 8 | } Complex; 9 | 10 | /* the user-defined function */ 11 | void myProd( Complex *in, Complex *inout, int *len, MPI_Datatype *dptr ) 12 | { 13 | int i; 14 | Complex c; 15 | 16 | for (i=0; i< *len; ++i) { 17 | c.real = inout->real*in->real - inout->imag*in->imag; 18 | c.imag = inout->real*in->imag + inout->imag*in->real; 19 | *inout = c; 20 | in++; inout++; 21 | } 22 | } 23 | 24 | 25 | int main( int argc, char** argv ) 26 | { 27 | #define LEN 5 28 | int i, rank, size; 29 | 30 | Complex a[LEN], answer[LEN]; 31 | MPI_Op myOp; 32 | MPI_Datatype ctype; 33 | 34 | 35 | MPI_Status status; 36 | 37 | MPI_Init(&argc, &argv); /*initializing */ 38 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 39 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 40 | 41 | 42 | //int MPI_Reduce( 43 | // void* sendbuf, 44 | // void* recvbuf, 45 | // int count , 46 | // MPI_Datatype datatype, 47 | // MPI_Op op, 48 | // int root, 49 | // MPI_Comm comm 50 | // ) 51 | 52 | 53 | // srandom(((unsigned int)(&in.val))*(rank+10)); 54 | 55 | /* explain to MPI how type Complex is defined */ 56 | MPI_Type_contiguous( 2, MPI_DOUBLE, &ctype ); 57 | MPI_Type_commit( &ctype ); 58 | 59 | for(i=0;i 5 | 6 | 7 | /* the user-defined function */ 8 | #define LEN 5 9 | 10 | void myProd( double *in, double *inout, int *len, MPI_Datatype *dptr ) 11 | { 12 | int i,j; 13 | for (i=0; i< *len; ++i) 14 | for(j=0;j 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double a[10]; 11 | 12 | double sum[10]; 13 | 14 | 15 | MPI_Status status; 16 | 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | //A routine that computes the dot product of two vectors that are distributed 22 | //across a group of processes and returns the answer at node zero. 23 | 24 | //int MPI_Reduce( 25 | // void* sendbuf, 26 | // void* recvbuf, 27 | // int count , 28 | // MPI_Datatype datatype, 29 | // MPI_Op op, 30 | // int root, 31 | // MPI_Comm comm 32 | // ) 33 | 34 | 35 | for(i=0;i<10;i++) 36 | a[i] = (double)(rank*10 + i); 37 | 38 | MPI_Barrier(MPI_COMM_WORLD); 39 | 40 | printf("Proc#%d :",rank); 41 | for(i=0;i<10;i++) printf("%5.1f ", a[i]); 42 | printf("\n"); 43 | 44 | MPI_Barrier(MPI_COMM_WORLD); 45 | 46 | for(i=0;i<10;i++) 47 | sum[i] = 0.0; 48 | 49 | MPI_Reduce( a, sum, 10, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 50 | 51 | if ( rank == 0 ) { 52 | printf("\nProc#0 :"); 53 | for(i=0;i<10;i++) printf("%5.1f ", sum[i]); 54 | printf("\n"); 55 | } 56 | 57 | 58 | MPI_Finalize(); /*quit from MPI world*/ 59 | return (0); 60 | } 61 | -------------------------------------------------------------------------------- /example/mpi_demo/ring: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/ring -------------------------------------------------------------------------------- /example/mpi_demo/ring.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size, tag=1; 9 | 10 | int to, from; 11 | 12 | int senddata,recvdata; 13 | MPI_Status status; 14 | MPI_Init(&argc, &argv); /*initializing */ 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 17 | 18 | // cyclic shift send-recv with step 1 ~ Size-1 19 | 20 | for(i=1;i Proc#%d\n", recvdata, from, rank); 42 | 43 | MPI_Barrier(MPI_COMM_WORLD); 44 | 45 | } 46 | 47 | MPI_Finalize(); /*quit from MPI world*/ 48 | return (0); 49 | } 50 | -------------------------------------------------------------------------------- /example/mpi_demo/scan.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int i, rank, size; 9 | 10 | double a[10],b[10]; 11 | 12 | double sum,c; 13 | 14 | int senddata,recvdata; 15 | 16 | MPI_Status status; 17 | 18 | MPI_Init(&argc, &argv); /*initializing */ 19 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 20 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 21 | 22 | //A routine that computes the dot product of two vectors that are distributed 23 | //across a group of processes and returns the answer at node zero. 24 | 25 | //int MPI_Reduce( 26 | // void* sendbuf, 27 | // void* recvbuf, 28 | // int count , 29 | // MPI_Datatype datatype, 30 | // MPI_Op op, 31 | // int root, 32 | // MPI_Comm comm 33 | // ) 34 | 35 | 36 | for(i=0;i<10;i++) 37 | a[i] = b[i] = (double)(rank + 1); 38 | 39 | sum = 0.0; 40 | 41 | for(i=0;i<10;i++) 42 | sum += a[i] * b[i]; 43 | 44 | MPI_Reduce( &sum, &c, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 45 | 46 | if ( rank == 0 ) printf("Proc#0 got the final sum %lf\n", c); 47 | 48 | MPI_Barrier(MPI_COMM_WORLD); 49 | 50 | MPI_Finalize(); /*quit from MPI world*/ 51 | return (0); 52 | } 53 | -------------------------------------------------------------------------------- /example/mpi_demo/scanme.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | 7 | int main( int argc, char** argv ) 8 | { 9 | #define LEN 5 10 | int i, rank, size, true; 11 | 12 | double a[LEN], b[LEN]; 13 | 14 | 15 | MPI_Status status; 16 | 17 | MPI_Init(&argc, &argv); /*initializing */ 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/ 19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 20 | 21 | //int MPI_Scan( 22 | // void* sendbuf, 23 | // void* recvbuf, 24 | // int count, 25 | // MPI_Datatype datatype, 26 | // MPI_Op op, 27 | // MPI_Comm comm ) 28 | 29 | 30 | 31 | // srandom(((unsigned int)(&in.val))*(rank+10)); 32 | 33 | for(i=0;i 5 | 6 | int main( int argc, char** argv ) 7 | { 8 | int myrank,size; 9 | 10 | int i; 11 | 12 | struct Partstruct 13 | { 14 | int class; /* particle class */ 15 | double d[6]; /* particle coordinates */ 16 | char b[7]; /* some additional information */ 17 | }Par,p[100]; 18 | 19 | 20 | 21 | MPI_Status status; 22 | MPI_Init(&argc, &argv); /*initializing */ 23 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/ 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 25 | 26 | if (myrank == 0) 27 | { 28 | /* SENDER CODE */ 29 | 30 | Par.class = 100; 31 | for(i=0;i<6;i++) Par.d[i] = (double)i; 32 | for(i=0;i<7;i++) Par.b[i] = 'A' + i; 33 | 34 | printf("Par = %d, P = %d\n",sizeof(Par), sizeof(p)); 35 | 36 | MPI_Send(&Par, sizeof(Par), MPI_BYTE, 1,0, MPI_COMM_WORLD); 37 | } 38 | else /* RECEIVER CODE */ 39 | { 40 | MPI_Recv(&Par, sizeof(Par), MPI_BYTE,0,0,MPI_COMM_WORLD,&status); 41 | 42 | printf("Proc#1 received structs from Proc#0 : \n"); 43 | 44 | printf("class = %d\n", Par.class); 45 | 46 | printf("array d is : "); 47 | for(i=0;i<6;i++) printf("%3.1lf ",Par.d[i]); 48 | 49 | printf("\narray b is : "); 50 | for(i=0;i<7;i++) printf("%c ",Par.b[i]); 51 | 52 | printf("\n"); 53 | } 54 | MPI_Finalize(); /*quit from MPI world*/ 55 | return (0); 56 | } 57 | -------------------------------------------------------------------------------- /example/mpi_demo/type_struct1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/type_struct1 -------------------------------------------------------------------------------- /example/mpi_demo/type_struct1.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | 6 | #define RELA 7 | 8 | int main( int argc, char** argv ) 9 | { 10 | int myrank,size; 11 | 12 | int i; 13 | 14 | struct Partstruct 15 | { 16 | int class; 17 | double d[6]; 18 | char b[7]; 19 | }; 20 | 21 | struct Partstruct particle; 22 | 23 | 24 | MPI_Datatype Particletype; 25 | MPI_Datatype type[3] = {MPI_INT, MPI_DOUBLE, MPI_CHAR}; 26 | int block[3] = {1, 6, 7}; 27 | MPI_Aint disp[3]; 28 | int base; // using relative displacement 29 | 30 | MPI_Status status; 31 | MPI_Init(&argc, &argv); /*initializing */ 32 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/ 33 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 34 | 35 | 36 | /* Particletype : using absolute addresses */ 37 | 38 | MPI_Address( &particle, disp); 39 | MPI_Address( &particle.d, disp+1); 40 | MPI_Address( &particle.b, disp+2); 41 | 42 | 43 | 44 | #ifdef RELA 45 | // the following codes use relative displacement 46 | base = disp[0]; 47 | for (i=0; i <3; i++) disp[i] -= base; 48 | printf(" Using realtive displacement\n"); 49 | #else 50 | printf(" Using absolute address\n"); 51 | #endif 52 | 53 | 54 | MPI_Type_struct( 3, block, disp, type, &Particletype); 55 | 56 | MPI_Type_commit( &Particletype); 57 | 58 | 59 | if (myrank == 0) 60 | { 61 | /* SENDER CODE */ 62 | 63 | particle.class = 100; 64 | for(i=0;i<6;i++) particle.d[i] = (double)i; 65 | for(i=0;i<7;i++) particle.b[i] = 'A' + i; 66 | 67 | #ifdef RELA 68 | 69 | // Using relative displacement 70 | MPI_Send( &particle, 1, Particletype, 1, 0, MPI_COMM_WORLD); 71 | 72 | #else 73 | 74 | // Using absolute address 75 | MPI_Send( MPI_BOTTOM, 1, Particletype, 1, 0, MPI_COMM_WORLD); 76 | 77 | #endif 78 | } 79 | else /* RECEIVER CODE */ 80 | { 81 | #ifdef RELA 82 | // Using relative displacement 83 | 84 | MPI_Recv(&particle , 1, Particletype, 0, 0, MPI_COMM_WORLD, &status); 85 | 86 | #else 87 | // Using absolute address 88 | 89 | MPI_Recv(MPI_BOTTOM, 1, Particletype, 0, 0, MPI_COMM_WORLD,&status); 90 | 91 | #endif 92 | printf("Proc#1 received structs Particle from Proc#0 : \n"); 93 | 94 | printf("class = %d\n", particle.class); 95 | 96 | printf("array d is : "); 97 | for(i=0;i<6;i++) printf("%3.1lf ",particle.d[i]); 98 | 99 | printf("\narray b is : "); 100 | for(i=0;i<7;i++) printf("%c ", particle.b[i]); 101 | 102 | printf("\n"); 103 | } 104 | MPI_Finalize(); /*quit from MPI world*/ 105 | return (0); 106 | } 107 | -------------------------------------------------------------------------------- /example/mpi_demo/type_vector-1.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | #include 6 | 7 | int main( int argc, char** argv ) 8 | { 9 | 10 | int myrank,size; 11 | 12 | double A[20][20]; 13 | 14 | MPI_Datatype EvenLine; 15 | 16 | void *TempBuffer; 17 | 18 | int i, j; 19 | 20 | MPI_Status status; 21 | 22 | MPI_Init(&argc, &argv); /*initializing */ 23 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/ 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 25 | 26 | 27 | MPI_Type_vector(10, 20, 40, MPI_DOUBLE, &EvenLine); 28 | MPI_Type_commit(&EvenLine); 29 | 30 | if (myrank == 0) 31 | { /* Sender Code */ 32 | /* All Even# Lines */ 33 | for(i=0;i<20;i++) 34 | for(j=0;j<20;j++) 35 | A[i][j] = (double)i; 36 | 37 | MPI_Send(A, 1, EvenLine, 1, 0, MPI_COMM_WORLD); 38 | 39 | 40 | } 41 | else /* RECEIVER CODE */ 42 | { 43 | for(i=0;i<20;i++) 44 | for(j=0;j<20;j++) 45 | A[i][j] = 0.0; 46 | 47 | MPI_Recv(A, 1, EvenLine,0, 0, MPI_COMM_WORLD,&status); 48 | 49 | for(i=0;i<20;i++) 50 | { 51 | for(j=0;j<10;j++) 52 | printf("%3.1lf ",A[i][j]); 53 | printf("\n"); 54 | } 55 | } 56 | MPI_Finalize(); /*quit from MPI world*/ 57 | return (0); 58 | } 59 | 60 | -------------------------------------------------------------------------------- /example/mpi_demo/type_vector.c: -------------------------------------------------------------------------------- 1 | 2 | #include "mpi.h" /*MPI head file*/ 3 | 4 | #include 5 | #include 6 | 7 | int main( int argc, char** argv ) 8 | { 9 | 10 | int myrank,size; 11 | 12 | double A[100]; 13 | 14 | MPI_Datatype EvenElements; 15 | 16 | void *TempBuffer; 17 | 18 | int i, Position, BufferSize; 19 | 20 | MPI_Status status; 21 | 22 | MPI_Init(&argc, &argv); /*initializing */ 23 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/ 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/ 25 | 26 | 27 | MPI_Type_vector(50, 1, 2, MPI_DOUBLE, &EvenElements); 28 | MPI_Type_commit(&EvenElements); 29 | 30 | if (myrank == 0) 31 | { /* Sender Code */ 32 | 33 | for(i=0;i<100;i++) A[i] = (double)i*(double)i; 34 | 35 | MPI_Send(A, 1, EvenElements, 1, 0, MPI_COMM_WORLD); 36 | 37 | 38 | } 39 | else /* RECEIVER CODE */ 40 | { 41 | for(i=0;i<100;i++) A[i] = 0.0; 42 | 43 | MPI_Recv(A, 1, EvenElements,0, 0, MPI_COMM_WORLD,&status); 44 | 45 | for(i=0;i<10;i++)printf("Proc#1: A[%d] = %3.2f\n",i,A[i]); 46 | } 47 | MPI_Finalize(); /*quit from MPI world*/ 48 | return (0); 49 | } 50 | -------------------------------------------------------------------------------- /example/openmp/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | OPENMP=-fopenmp 3 | SOURCES:=$(shell find $(.) -name '*.c') 4 | OBJS=$(SOURCES:%.c=%) 5 | 6 | 7 | all : $(OBJS) 8 | @echo $(SOURCES) 9 | 10 | %: %.c 11 | $(CC) $(OPENMP) $< -o $@ 12 | 13 | .PHONY: clean 14 | clean: 15 | rm $(OBJS) -------------------------------------------------------------------------------- /example/openmp/copyin.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int g = 0; 5 | #pragma omp threadprivate(g) 6 | int main(int argc, char* argv[]) 7 | { 8 | int i; 9 | #pragma omp parallel for 10 | for (i = 0; i < 4; i++) 11 | { 12 | g = omp_get_thread_num(); 13 | printf("thread %d, g = %d\n", omp_get_thread_num(), g); 14 | } 15 | printf("global g: %d\n", g); 16 | // YOUR CODE HERE 17 | #pragma omp parallel for copyin(g) 18 | // END OF YOUR CODE 19 | for (i = 0; i < 4; i++) 20 | printf("thread %d, g = %d\n", omp_get_thread_num(), g); 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /example/openmp/critical.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | int main() 6 | { 7 | int x; 8 | x = 0; 9 | omp_set_num_threads(10); 10 | #pragma omp parallel shared(x) 11 | { 12 | #pragma omp critical 13 | x = x + 1; 14 | } /* end of parallel section */ 15 | 16 | printf("x = %d\n", x); 17 | } -------------------------------------------------------------------------------- /example/openmp/dynamic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int i; 7 | // YOUR CODE HERE 8 | #pragma omp parallel for schedule(dynamic) num_threads(10) 9 | // END OF YOUR CODE 10 | for (i = 0; i < 10; i++) 11 | { 12 | printf("i = %d, thread %d\n", i, omp_get_thread_num()); 13 | } 14 | return 0; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /example/openmp/firstprivate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int t = 20, i; 7 | // YOUR CODE HERE 8 | #pragma omp parallel for firstprivate(t) 9 | // END OF YOUR CODE 10 | for (i = 0; i < 5; i++) 11 | { 12 | t += i; 13 | printf("t = %d\n", t); 14 | } 15 | printf("outside t = %d\n", t); 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /example/openmp/for.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | #pragma omp parallel 7 | { 8 | int i, j; 9 | // YOUR CODE HERE 10 | #pragma omp for 11 | // END OF YOUR CODE 12 | for (i = 0; i < 5; i++) 13 | printf("i = %d\n", i); 14 | // YOUR CODE HERE 15 | #pragma omp for 16 | // END OF YOUR CODE 17 | for (j = 0; j < 5; j++) 18 | printf("j = %d\n", j); 19 | } 20 | return 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /example/openmp/fork_join.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void foo() 5 | { 6 | int cnt = 0; 7 | clock_t t1 = clock(); 8 | int i; 9 | for (i = 0; i < 1e8; i++) { 10 | cnt++; 11 | } 12 | clock_t t2 = clock(); 13 | printf("Time = %d\n", t2 - t1); 14 | } 15 | 16 | int main(int argc, char* argv[]) 17 | { 18 | clock_t t1 = clock(); 19 | int i; 20 | // YOUR CODE HERE 21 | #pragma omp parallel for 22 | // END OF YOUR CODE 23 | for (i = 0; i < 2; i++) { 24 | foo(); 25 | } 26 | clock_t t2 = clock(); 27 | printf("Total time = %d\n", t2 - t1); 28 | return 0; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /example/openmp/get_num_procs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | printf("the num of procs = %d\n", omp_get_num_procs()); 7 | printf("the num of threads = %d\n", omp_get_num_threads()); 8 | #pragma omp parallel 9 | { 10 | // YOUR CODE HERE 11 | printf("%d\n", omp_get_num_procs()); 12 | // END OF YOUR CODE 13 | } 14 | return 0; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /example/openmp/get_thread_num.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | printf("%d\n", omp_get_thread_num()); 7 | #pragma omp parallel 8 | { 9 | // YOUR CODE HERE 10 | printf("%d\n", omp_get_thread_num()); 11 | // END OF YOUR CODE 12 | } 13 | return 0; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /example/openmp/lastprivate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int t = 20, i; 7 | // YOUR CODE HERE 8 | #pragma omp parallel for firstprivate(t), lastprivate(t) 9 | // END OF YOUR CODE 10 | for (i = 0; i < 5; i++) 11 | { 12 | t += i; 13 | printf("t = %d\n", t); 14 | } 15 | printf("outside t = %d\n", t); 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /example/openmp/lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static omp_lock_t lock; 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | int i; 9 | omp_init_lock(&lock); 10 | #pragma omp parallel for 11 | for (i = 0; i < 5; ++i) 12 | { 13 | // YOUR CODE HERE 14 | omp_set_lock(&lock); 15 | // END OF YOUR CODE 16 | printf("%d+\n", omp_get_thread_num()); 17 | printf("%d-\n", omp_get_thread_num()); 18 | // YOUR CODE HERE 19 | omp_unset_lock(&lock); 20 | // END OF YOUR CODE 21 | } 22 | omp_destroy_lock(&lock); 23 | return 0; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /example/openmp/omp_in_parallel.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | printf("max threads = %d\n", omp_get_max_threads()); 7 | printf("%d\n", omp_in_parallel()); 8 | omp_set_num_threads(4); 9 | #pragma omp parallel 10 | { 11 | // YOUR CODE HERE 12 | printf("%d\n", omp_in_parallel()); 13 | // END OF YOUR CODE 14 | } 15 | return 0; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /example/openmp/ordered.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char* argv[]) 6 | { 7 | #pragma omp parallel num_threads(8) 8 | { 9 | #pragma omp for ordered 10 | for(int i=0; i<10; ++i){ 11 | #pragma omp critical 12 | printf("%d ", i); 13 | #pragma omp ordered 14 | { 15 | #pragma omp critical 16 | printf("-%d ", i); 17 | } 18 | } 19 | } 20 | printf("\n"); 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /example/openmp/parallel.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | // YOUR CODE HERE 7 | #pragma omp parallel num_threads(6) 8 | // END OF YOUR CODE 9 | { 10 | printf("Thread: %d\n", omp_get_thread_num()); 11 | } 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /example/openmp/private.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | int main(int argc, char* argv[]) 6 | { 7 | int i = 20; 8 | int x = 2; 9 | int y = 2; 10 | // YOUR CODE HERE 11 | #pragma omp parallel for private(i,y) 12 | // END OF YOUR CODE 13 | for (i = 0; i < 10; i++) 14 | { 15 | printf("i = %d\n", i); 16 | x = i; 17 | y = i; 18 | printf("x = %d\n", x); 19 | printf("y = %d\n", y); 20 | } 21 | printf("outside i = %d\n", i); 22 | printf("outside x = %d\n", x); 23 | printf("outside y = %d\n", y); 24 | return 0; 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /example/openmp/reduction.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | 7 | int i; 8 | long sum = 10; 9 | // YOUR CODE HERE 10 | #pragma omp parallel for reduction(+: sum) 11 | // END OF YOUR CODE 12 | for (i = 0; i < 10; i++) 13 | { 14 | sum += i; 15 | printf("%ld\n", sum); 16 | } 17 | printf("sum = %ld\n", sum); 18 | return 0; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /example/openmp/schedule.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int i; 7 | // YOUR CODE HERE 8 | #pragma omp parallel for schedule(static) 9 | // END OF YOUR CODE 10 | for (i = 0; i < 10; i++) 11 | { 12 | printf("i = %d, thread %d\n", i, omp_get_thread_num()); 13 | } 14 | return 0; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /example/openmp/section.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | // YOUR CODE HERE 7 | #pragma omp parallel sections num_threads(4) 8 | // END OF YOUR CODE 9 | { 10 | #pragma omp section 11 | printf("Section 1 ThreadId = %d\n", omp_get_thread_num()); 12 | #pragma omp section 13 | printf("Section 2 ThreadId = %d\n", omp_get_thread_num()); 14 | #pragma omp section 15 | printf("Section 3 ThreadId = %d\n", omp_get_thread_num()); 16 | #pragma omp section 17 | printf("Section 4 ThreadId = %d\n", omp_get_thread_num()); 18 | } 19 | return 0; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /example/openmp/set_dynamic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int i; 7 | // YOUR CODE HERE 8 | omp_set_dynamic(1); 9 | printf("%d\n", omp_get_dynamic()); 10 | // END OF YOUR CODE 11 | #pragma omp parallel for 12 | for (i = 0; i < 16; i++) 13 | { 14 | printf("%d\n", omp_get_thread_num()); 15 | } 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /example/openmp/set_num_threads.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | // YOUR CODE HERE 7 | omp_set_num_threads(10); 8 | // END OF YOUR CODE 9 | #pragma omp parallel 10 | { 11 | printf("%d of %d threads\n", omp_get_thread_num(), omp_get_num_threads()); 12 | } 13 | return 0; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /example/openmp/shared.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int t = 20, i; 7 | // YOUR CODE HERE 8 | #pragma omp parallel for shared(t) 9 | // END OF YOUR CODE 10 | for (i = 0; i < 10; i++) 11 | { 12 | if (i % 2 == 0) 13 | t++; 14 | printf("i = %d, t = %d\n", i, t); 15 | } 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /example/openmp/size.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | int i; 7 | // YOUR CODE HERE 8 | #pragma omp parallel for schedule(static, 5) 9 | // END OF YOUR CODE 10 | for (i = 0; i < 10; i++) 11 | { 12 | printf("i = %d, thread %d\n", i, omp_get_thread_num()); 13 | } 14 | return 0; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /example/openmp/test_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static omp_lock_t lock; 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | int i; 9 | omp_init_lock(&lock); 10 | #pragma omp parallel for 11 | for (i = 0; i < 5; ++i) 12 | { 13 | // YOUR CODE HERE 14 | if (omp_test_lock(&lock)) 15 | // END OF YOUR CODE 16 | { 17 | printf("%d+\n", omp_get_thread_num()); 18 | printf("%d-\n", omp_get_thread_num()); 19 | omp_unset_lock(&lock); 20 | } 21 | else 22 | { 23 | printf("fail to get lock\n"); 24 | } 25 | } 26 | omp_destroy_lock(&lock); 27 | return 0; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /example/openmp/threadprivate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int g = 0; 5 | #pragma omp threadprivate(g) 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | int t = 20, i; 10 | // YOUR CODE HERE 11 | #pragma omp parallel num_threads(4) 12 | // END OF YOUR CODE 13 | { 14 | g = omp_get_thread_num(); 15 | } 16 | #pragma omp parallel num_threads(8) 17 | { 18 | printf("thread id: %d g: %d\n", omp_get_thread_num(), g); 19 | } 20 | return 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /matrix/cannon.c: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include "math.h" 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | int id_procs, num_procs; 7 | int blksize, sqrt_procs; 8 | MPI_Status status; 9 | MPI_Request request; 10 | 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 14 | 15 | sqrt_procs = sqrt(num_procs); 16 | if (sqrt_procs * sqrt_procs != num_procs) { 17 | fprintf(stderr, "The Num of Proc must be Perfect square!\n"); 18 | return 1; 19 | } 20 | if (argc != 2) { 21 | fprintf(stderr, "Please add a Parameter about the block size!\n"); 22 | return 1; 23 | } 24 | blksize = atoi(argv[1]); 25 | 26 | // Proc#0 产生随机矩阵 A B 27 | int *A, *B, *C, *ans; 28 | int *A_in, *B_in; 29 | int *sA, *sB, *sC; 30 | int N = blksize*sqrt_procs; 31 | 32 | if (id_procs == 0) { 33 | // 串行计算结果 34 | sA = (int*)malloc(N*N*sizeof(int)); 35 | sB = (int*)malloc(N*N*sizeof(int)); 36 | sC = (int*)malloc(N*N*sizeof(int)); 37 | 38 | memset(sC, 0, N*N*sizeof(int)); 39 | random_mat(sA, N); 40 | // print_mat(sA, N, 0); 41 | random_mat(sB, N); 42 | // print_mat(sB, N, 0); 43 | matrix_multi(sA, sB, sC, N); 44 | } 45 | A = (int*)malloc(blksize*blksize*sizeof(int)); 46 | B = (int*)malloc(blksize*blksize*sizeof(int)); 47 | C = (int*)malloc(blksize*blksize*sizeof(int)); 48 | ans = (int*)malloc(blksize*blksize*sizeof(int)); 49 | A_in = (int*)malloc(blksize*blksize*sizeof(int)); 50 | B_in = (int*)malloc(blksize*blksize*sizeof(int)); 51 | 52 | memset(C, 0, blksize*blksize*sizeof(int)); 53 | 54 | MPI_Datatype SubMat; 55 | MPI_Type_vector(blksize, blksize, N, MPI_INT, &SubMat); 56 | MPI_Type_commit(&SubMat); 57 | 58 | MPI_Datatype Mat; 59 | MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat); 60 | MPI_Type_commit(&Mat); 61 | 62 | // Proc#0 将相应的数据发送到各个 Proc 中 63 | if (id_procs == 0) { 64 | for(int i = 0; i < sqrt_procs; i++) { 65 | int lineoff = blksize * N * i; 66 | for(int j = 0; j < sqrt_procs; j++) { 67 | if (i == 0 && j == 0) { 68 | // 分发矩阵 A 69 | MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 70 | MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 71 | MPI_Wait(&request, &status); 72 | // 分发矩阵 B 73 | MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request); 74 | MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request); 75 | MPI_Wait(&request, &status); 76 | continue; 77 | } 78 | int offset = j * blksize + lineoff; 79 | MPI_Send(sA+offset, 1, SubMat, i*sqrt_procs+j, 0, MPI_COMM_WORLD); 80 | MPI_Send(sB+offset, 1, SubMat, i*sqrt_procs+j, 1, MPI_COMM_WORLD); 81 | } 82 | } 83 | } else { 84 | MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status); 85 | // print_mat(A, blksize, id_procs); 86 | MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status); 87 | // print_mat(B, blksize, id_procs); 88 | } 89 | 90 | // 算法正式开始 91 | // 初始对齐 Aij 向左移i格 Bij 向上移j格 92 | MPI_Comm row_comm; 93 | int rank_A, size_A; 94 | int color_A; 95 | int key_A; 96 | key_A = id_procs % sqrt_procs; 97 | color_A = id_procs / sqrt_procs; 98 | 99 | MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm); 100 | MPI_Comm_rank(row_comm, &rank_A); 101 | MPI_Comm_size(row_comm, &size_A); 102 | 103 | if (color_A > 0) { 104 | MPI_Send(A, 1, Mat, (rank_A-color_A+size_A)%size_A, 2, row_comm); 105 | MPI_Recv(A_in, 1, Mat, (rank_A+color_A)%size_A, 2, row_comm, &status); 106 | memcpy(A, A_in, blksize*blksize*sizeof(int)); 107 | } 108 | 109 | MPI_Comm col_comm; 110 | int rank_B, size_B; 111 | int color_B; 112 | int key_B; 113 | key_B = id_procs / sqrt_procs; 114 | color_B = id_procs % sqrt_procs; 115 | 116 | MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm); 117 | MPI_Comm_rank(col_comm, &rank_B); 118 | MPI_Comm_size(col_comm, &size_B); 119 | 120 | if (color_B > 0) { 121 | MPI_Send(B, 1, Mat, (rank_B-color_B+size_B)%size_B, 2, col_comm); 122 | MPI_Recv(B_in, 1, Mat, (rank_B+color_B)%size_B, 2, col_comm, &status); 123 | memcpy(B, B_in, blksize*blksize*sizeof(int)); 124 | } 125 | 126 | // 重复执行 sqrt(num_procs) 次 127 | for(int i = 0; i < sqrt_procs; i++) { 128 | matrix_multi(A, B, C, blksize); 129 | 130 | MPI_Send(A, 1, Mat, (rank_A-1+size_A)%size_A, i+3, row_comm); 131 | MPI_Recv(A_in, 1, Mat, (rank_A+1)%size_A, i+3, row_comm, &status); 132 | memcpy(A, A_in, blksize*blksize*sizeof(int)); 133 | 134 | MPI_Send(B, 1, Mat, (rank_B-1+size_B)%size_B, i+3, col_comm); 135 | MPI_Recv(B_in, 1, Mat, (rank_B+1)%size_B, i+3, col_comm, &status); 136 | memcpy(B, B_in, blksize*blksize*sizeof(int)); 137 | } 138 | 139 | // 分发结果 自行比较 140 | if (id_procs == 0) { 141 | for(int i = 0; i < sqrt_procs; i++) { 142 | int lineoff = blksize * N * i; 143 | for(int j = 0; j < sqrt_procs; j++) { 144 | if (i == 0 && j == 0) { 145 | // 分发矩阵 A 146 | MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 147 | MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 148 | MPI_Wait(&request, &status); 149 | continue; 150 | } 151 | int offset = j * blksize + lineoff; 152 | MPI_Send(sC+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD); 153 | } 154 | } 155 | } else { 156 | MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status); 157 | } 158 | 159 | // print_mat(ans, blksize, id_procs); 160 | 161 | if (check_mat(C, ans, blksize)) { 162 | printf("Proc#%d Done.\n", id_procs); 163 | } 164 | 165 | // print_mat(C, blksize, id_procs); 166 | 167 | MPI_Finalize(); 168 | free(A); 169 | free(B); 170 | free(C); 171 | free(ans); 172 | free(A_in); 173 | free(B_in); 174 | 175 | if (id_procs == 0) { 176 | free(sA); 177 | free(sB); 178 | free(sC); 179 | } 180 | return 0; 181 | } 182 | -------------------------------------------------------------------------------- /matrix/fox.c: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include 3 | 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | int id_procs, num_procs; 8 | int blksize, sqrt_procs; 9 | 10 | MPI_Init(&argc, &argv); 11 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 13 | 14 | sqrt_procs = sqrt(num_procs); 15 | if (sqrt_procs * sqrt_procs != num_procs) { 16 | fprintf(stderr, "The Num of Proc must be Perfect square!\n"); 17 | return 1; 18 | } 19 | if (argc != 2) { 20 | fprintf(stderr, "Please add a Parameter about the block size!\n"); 21 | return 1; 22 | } 23 | blksize = atoi(argv[1]); 24 | 25 | // produce random data 26 | int *A, *B, *C, *ans; 27 | int *A_in, *B_in; 28 | int *sA, *sB, *sC; 29 | int N = blksize*sqrt_procs; 30 | 31 | if (id_procs == 0) { 32 | sA = (int*)malloc(N*N*sizeof(int)); 33 | sB = (int*)malloc(N*N*sizeof(int)); 34 | sC = (int*)malloc(N*N*sizeof(int)); 35 | 36 | memset(sC, 0, N*N*sizeof(int)); 37 | random_mat(sA, N); 38 | random_mat(sB, N); 39 | matrix_multi(sA, sB, sC, N); 40 | } 41 | A = (int*)malloc(blksize*blksize*sizeof(int)); 42 | B = (int*)malloc(blksize*blksize*sizeof(int)); 43 | C = (int*)malloc(blksize*blksize*sizeof(int)); 44 | ans = (int*)malloc(blksize*blksize*sizeof(int)); 45 | A_in = (int*)malloc(blksize*blksize*sizeof(int)); 46 | B_in = (int*)malloc(blksize*blksize*sizeof(int)); 47 | 48 | memset(C, 0, blksize*blksize*sizeof(int)); 49 | 50 | MPI_Datatype SubMat, Mat; 51 | MPI_Status status; 52 | MPI_Request request; 53 | MPI_Type_vector(blksize, blksize, N, MPI_INT, &SubMat); 54 | MPI_Type_commit(&SubMat); 55 | 56 | MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat); 57 | MPI_Type_commit(&Mat); 58 | 59 | if (id_procs == 0) { 60 | for(int i = 0; i < sqrt_procs; i++) { 61 | int lineoff = blksize * N * i; 62 | for(int j = 0; j < sqrt_procs; j++) { 63 | if (i == 0 && j == 0) { 64 | // 分发矩阵 A 65 | MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 66 | MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 67 | MPI_Wait(&request, &status); 68 | // 分发矩阵 B 69 | MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request); 70 | MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request); 71 | MPI_Wait(&request, &status); 72 | continue; 73 | } 74 | int offset = j * blksize + lineoff; 75 | MPI_Send(sA+offset, 1, SubMat, i*sqrt_procs+j, 0, MPI_COMM_WORLD); 76 | MPI_Send(sB+offset, 1, SubMat, i*sqrt_procs+j, 1, MPI_COMM_WORLD); 77 | } 78 | } 79 | } else { 80 | MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status); 81 | // print_mat(A, blksize, id_procs); 82 | MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status); 83 | // print_mat(B, blksize, id_procs); 84 | } 85 | 86 | MPI_Comm row_comm, col_comm; 87 | int rank_A, size_A; 88 | int color_A; 89 | int key_A; 90 | 91 | int rank_B, size_B; 92 | int color_B; 93 | int key_B; 94 | 95 | // Comm Group by row 96 | key_A = id_procs % sqrt_procs; 97 | color_A = id_procs / sqrt_procs; 98 | MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm); 99 | MPI_Comm_rank(row_comm, &rank_A); 100 | MPI_Comm_size(row_comm, &size_A); 101 | 102 | // Comm Group by B 103 | key_B = id_procs / sqrt_procs; 104 | color_B = id_procs % sqrt_procs; 105 | MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm); 106 | MPI_Comm_rank(col_comm, &rank_B); 107 | MPI_Comm_size(col_comm, &size_B); 108 | 109 | for(int k = 0; k < sqrt_procs; k++) { 110 | if (rank_A == (color_A+k)%size_A) { 111 | memcpy(A_in, A, blksize*blksize*sizeof(int)); 112 | } 113 | // broadcast Ai,j 114 | MPI_Bcast(A_in, 1, Mat, (color_A+k)%size_A, row_comm); 115 | 116 | // compute 117 | matrix_multi(A_in, B, C, blksize); 118 | 119 | int dest = (rank_B-1 + size_B)%size_B; 120 | MPI_Send(B, 1, Mat, dest, 0, col_comm); 121 | MPI_Recv(B_in, 1, Mat, (rank_B+1)%size_B, 0, col_comm, &status); 122 | memcpy(B, B_in, blksize*blksize*sizeof(int)); 123 | } 124 | 125 | 126 | // 分发结果 自行比较 127 | if (id_procs == 0) { 128 | for(int i = 0; i < sqrt_procs; i++) { 129 | int lineoff = blksize * N * i; 130 | for(int j = 0; j < sqrt_procs; j++) { 131 | if (i == 0 && j == 0) { 132 | // 分发矩阵 A 133 | MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 134 | MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 135 | MPI_Wait(&request, &status); 136 | continue; 137 | } 138 | int offset = j * blksize + lineoff; 139 | MPI_Send(sC+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD); 140 | } 141 | } 142 | } else { 143 | MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status); 144 | } 145 | 146 | // print_mat(ans, blksize, id_procs); 147 | 148 | if (check_mat(C, ans, blksize)) { 149 | printf("Proc#%d Done.\n", id_procs); 150 | } 151 | 152 | // print_mat(C, blksize, id_procs); 153 | 154 | free(A); 155 | free(B); 156 | free(C); 157 | free(ans); 158 | free(A_in); 159 | free(B_in); 160 | 161 | if (id_procs == 0) { 162 | free(sA); 163 | free(sB); 164 | free(sC); 165 | } 166 | MPI_Finalize(); 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /matrix/matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef _MPI_SORT_H 2 | #define _MPI_SORT_H 3 | 4 | #include "mpi.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | #define INDEX(i, j, n) (((i)*(n))+(j)) 12 | 13 | #define SURCLK(x) \ 14 | do { \ 15 | clock_t t1 = clock(); \ 16 | x \ 17 | clock_t t2 = clock(); \ 18 | printf("Time: %lf\n", (double)(t2-t1)/CLOCKS_PER_SEC); \ 19 | }while(0); 20 | 21 | 22 | void random_mat(int *a, int num) { 23 | for(int i = 0; i < num; i++) { 24 | srand(clock()); 25 | for(int j = 0; j < num; j++) { 26 | a[INDEX(i, j, num)] = rand() % 100; 27 | } 28 | } 29 | } 30 | 31 | void print_mat(int *a, int num, int id) { 32 | for(int i = 0; i < num; i++) { 33 | for(int j = 0; j < num; j++) { 34 | printf("|%d : %d ", id, a[INDEX(i, j, num)]); 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | int check_mat(int *C, int *nC, int num) { 41 | for(int i = 0; i < num; i++) { 42 | for(int j = 0; j < num; j++) { 43 | if (C[INDEX(i, j, num)] != nC[INDEX(i, j, num)]) { 44 | printf("C[%d,%d] should be %d ,not %d\n", 45 | i,j,C[INDEX(i,j,num)],nC[INDEX(i,j,num)]); 46 | return 0; 47 | } 48 | } 49 | } 50 | return 1; 51 | } 52 | 53 | int compare(const void *arg1, const void *arg2) { 54 | return *(int*)arg1 >= *(int*)arg2; 55 | } 56 | 57 | void matrix_multi(int *A, int *B, int *C, int num) { 58 | for(int i = 0; i < num; i++) { 59 | for(int j = 0; j < num; j++) { 60 | for(int k = 0; k < num; k++) 61 | C[INDEX(i, j, num)] += A[INDEX(i, k, num)] * B[INDEX(k, j, num)]; 62 | } 63 | } 64 | } 65 | 66 | void matrix_transpose(int *a, int num) { 67 | int temp; 68 | for(int i = 1; i < num; i++) { 69 | for(int j = 0; j < i; j++) { 70 | temp = a[INDEX(i, j, num)]; 71 | a[INDEX(i, j, num)] = a[INDEX(j, i, num)]; 72 | a[INDEX(j, i, num)] = temp; 73 | } 74 | } 75 | } 76 | 77 | #endif // _MPI_SORT_H 78 | -------------------------------------------------------------------------------- /matrix/tranpose.c: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include "math.h" 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | int *sa; 7 | int *sb; 8 | int id_procs, num_procs; 9 | int blksize, sqrt_procs; 10 | 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 14 | 15 | sqrt_procs = sqrt(num_procs); 16 | if (sqrt_procs * sqrt_procs != num_procs) { 17 | fprintf(stderr, "The Num of Proc must be Perfect square!\n"); 18 | return 1; 19 | } 20 | if (argc != 2) { 21 | fprintf(stderr, "Please add a Parameter about the matrix size!\n"); 22 | return 1; 23 | } 24 | blksize = atoi(argv[1]); 25 | int n = blksize * sqrt_procs; 26 | int *a = (int*)malloc(blksize*blksize*sizeof(int)); 27 | int *b = (int*)malloc(blksize*blksize*sizeof(int)); 28 | 29 | if (id_procs == 0) { 30 | sb = (int *)malloc(n*n*sizeof(int)); 31 | sa = (int *)malloc(n*n*sizeof(int)); 32 | random_mat(sb, n); 33 | memcpy(sa, sb, n*n*sizeof(int)); 34 | matrix_transpose(sb, n); 35 | } 36 | 37 | // Proc#0 send the transpose submat 38 | // Put A_ij to #Proc_ji 39 | MPI_Datatype SubMat, Mat; 40 | MPI_Status status; 41 | MPI_Request request; 42 | MPI_Type_vector(blksize, blksize, n, MPI_INT, &SubMat); 43 | MPI_Type_commit(&SubMat); 44 | MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat); 45 | MPI_Type_commit(&Mat); 46 | 47 | if (id_procs == 0) { 48 | for(int i = 0; i < sqrt_procs; i++) { 49 | int lineoff = blksize * n * i; 50 | for(int j = 0; j < sqrt_procs; j++) { 51 | if (i == 0 && j == 0) { 52 | // 分发矩阵 A 53 | MPI_Isend(sa, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 54 | MPI_Irecv(a, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 55 | MPI_Wait(&request, &status); 56 | continue; 57 | } 58 | int offset = j * blksize + lineoff; 59 | MPI_Send(sa+offset, 1, SubMat, j*sqrt_procs+i, 0, MPI_COMM_WORLD); 60 | } 61 | } 62 | } else { 63 | MPI_Recv(a, 1, Mat, 0, 0, MPI_COMM_WORLD, &status); 64 | } 65 | 66 | matrix_transpose(a, blksize); 67 | 68 | if (id_procs == 0) { 69 | for(int i = 0; i < sqrt_procs; i++) { 70 | int lineoff = blksize * n * i; 71 | for(int j = 0; j < sqrt_procs; j++) { 72 | if (i == 0 && j == 0) { 73 | // 分发矩阵 A 74 | MPI_Isend(sb, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 75 | MPI_Irecv(b, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 76 | MPI_Wait(&request, &status); 77 | continue; 78 | } 79 | int offset = j * blksize + lineoff; 80 | MPI_Send(sb+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD); 81 | } 82 | } 83 | } else { 84 | MPI_Recv(b, 1, Mat, 0, 100, MPI_COMM_WORLD, &status); 85 | } 86 | 87 | if(check_mat(a,b, blksize)) { 88 | printf("Proc#%d Done.\n", id_procs); 89 | } 90 | 91 | 92 | if (id_procs == 0) { 93 | free(sa); 94 | free(sb); 95 | } 96 | free(a); 97 | free(b); 98 | MPI_Finalize(); 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /parallel01/PSRS.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int compare(const void *p1, const void *p2) 7 | { 8 | return (*(int *)p1 - *(int *)p2); 9 | } 10 | 11 | //Merge函数合并两个子数组形成单一的已排好序的字数组 12 | //并代替当前的子数组A[p..r] 13 | void Merge(int *a, int p, int q, int r) 14 | { 15 | int i, j, k; 16 | int n1 = q - p + 1; 17 | int n2 = r - q; 18 | int L[n1 + 1]; 19 | int R[n2 + 1]; 20 | for (i = 0; i < n1; i++) 21 | L[i] = a[p + i]; 22 | L[i] = 65536; 23 | for (j = 0; j < n2; j++) 24 | R[j] = a[q + j + 1]; 25 | R[j] = 65536; 26 | i = 0, j = 0; 27 | for (k = p; k <= r; k++) 28 | { 29 | if (L[i] <= R[j]) 30 | { 31 | a[k] = L[i]; 32 | i++; 33 | } 34 | else 35 | { 36 | a[k] = R[j]; 37 | j++; 38 | } 39 | } 40 | } 41 | //归并排序 42 | void MergeSort(int *a, int p, int r) 43 | { 44 | if (p < r) 45 | { 46 | int q = (p + r) / 2; 47 | MergeSort(a, p, q); 48 | MergeSort(a, q + 1, r); 49 | Merge(a, p, q, r); 50 | } 51 | } 52 | 53 | int main() 54 | { 55 | int a[1024]; // 输入的数组 56 | int result[1024]; // 排好顺序的数组 57 | int i = 0, j, k = 0; 58 | int p = 3; 59 | FILE *fin; 60 | fin = fopen("input.txt", "r"); 61 | while (fscanf(fin, "%d", &a[i]) != EOF) 62 | { 63 | i++; 64 | } 65 | 66 | int len = i; 67 | int group = len / p; 68 | int mod = len % p; 69 | 70 | omp_set_num_threads(p); 71 | int b[p * p]; // 采样排序 72 | int pivot[p + 1]; // 主元数组 73 | int pivot_a[p][p + 1]; // 重排序用到的 记录位置的数组 74 | int newpivot[p + 1]; 75 | for (i = 0; i < p + 1; i++) 76 | newpivot[i] = 0; 77 | 78 | #pragma omp parallel private(i, j, k) shared(b, pivot, pivot_a, result, newpivot) 79 | { 80 | // 均匀划分 局部排序 81 | int id = omp_get_thread_num(); 82 | if(id!=p-1) 83 | qsort(a + id * group, group, sizeof(int), compare); 84 | else qsort(a + id * group, group + mod, sizeof(int), compare); 85 | 86 | // 正则采样 87 | for (j = 0; j < p; j++) 88 | b[j + id * group / p] = a[id * group + j * group / p]; 89 | 90 | // 采样排序 91 | #pragma omp barrier 92 | #pragma omp single 93 | { 94 | qsort(b, p * p, sizeof(int), compare); 95 | // 选择p-1个主元 96 | for (i = 1; i < p; i++) 97 | pivot[i] = b[i * p]; 98 | } 99 | // pivot_a 计算出小于主元的数的位置 100 | // 6 14 15 | 39 46 48 | 72 91 93 101 | // 12 21 | 36 40 54 61 69 | 89 97 102 | // 20 27 32 33 | 53 58 | 72 84 97 103 | // 3 6 104 | // 2 7 105 | // 4 6 106 | for (j = 0; j < group; j++) 107 | { 108 | for (i = 0; i <= p; i++) 109 | { 110 | if (i == p) 111 | { 112 | if(id != p-1) 113 | pivot_a[id][i] = group; 114 | else pivot_a[id][i] = group + mod; 115 | continue; 116 | } 117 | if (i == 0) 118 | { 119 | pivot_a[id][i] = 0; 120 | continue; 121 | } 122 | if (a[id * group + j] <= pivot[i]) 123 | pivot_a[id][i] = j + 1; 124 | } 125 | } 126 | #pragma omp critical 127 | for (j = 1; j <= p; j++) 128 | { 129 | int sumoffset = 0; 130 | for (i = 0; i < p; i++) 131 | if (i >= id) 132 | sumoffset += pivot_a[i][j - 1]; 133 | else 134 | sumoffset += pivot_a[i][j]; 135 | memcpy(result + sumoffset, &a[id * group + pivot_a[id][j - 1]], (pivot_a[id][j] - pivot_a[id][j - 1]) * sizeof(int)); 136 | } 137 | 138 | #pragma omp barrier 139 | #pragma omp single 140 | { 141 | for (i = 1; i <= p; i++) 142 | for (j = 0; j < p; j++) 143 | newpivot[i] += pivot_a[j][i]; 144 | } 145 | MergeSort(result, newpivot[id], newpivot[id + 1] - 1); 146 | } 147 | 148 | printf("原输入的数据:\n"); 149 | for (int i = 0; i < len; i++) 150 | printf("%d,", a[i]); 151 | 152 | printf("\n 打印主元:%d, %d\n", pivot[1], pivot[2]); 153 | for (i = 0; i < p; i++) 154 | { 155 | for (j = 0; j <= p; j++) 156 | printf("%d,", pivot_a[i][j]); 157 | printf("\n"); 158 | } 159 | 160 | printf("排序结果:\n"); 161 | for (i = 0; i < len; i++) 162 | printf("%d ", result[i]); 163 | } 164 | -------------------------------------------------------------------------------- /parallel01/input.txt: -------------------------------------------------------------------------------- 1 | 15 46 48 93 39 6 72 91 14 36 69 40 89 61 97 12 21 54 53 97 84 58 32 27 33 72 20 12 13 -------------------------------------------------------------------------------- /parallel01/merge: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/parallel01/merge -------------------------------------------------------------------------------- /parallel01/merge.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //Merge函数合并两个子数组形成单一的已排好序的字数组 4 | //并代替当前的子数组A[p..r] 5 | void Merge(int *a, int p, int q, int r) 6 | { 7 | int i,j,k; 8 | int n1 = q - p + 1; 9 | int n2 = r - q; 10 | int *L = (int*)malloc((n1+1)*sizeof(int)); 11 | int *R = (int*)malloc((n2+1)*sizeof(int)); 12 | for(i=0; i 2 | #include 3 | 4 | #define NUM_THREAD 4 5 | 6 | int main() 7 | { 8 | int i; 9 | double pi=0.0; 10 | 11 | long num_steps = 100000; 12 | double step = 1.0/num_steps; 13 | 14 | for(i=0;i 2 | #include 3 | #include 4 | 5 | int compare(const void *p1, const void *p2) 6 | { 7 | return (*(int *)p1 - *(int *)p2); 8 | } 9 | 10 | //Merge函数合并两个子数组形成单一的已排好序的字数组 11 | //并代替当前的子数组A[p..r] 12 | void Merge(int *a, int p, int q, int r) 13 | { 14 | int i, j, k; 15 | int n1 = q - p + 1; 16 | int n2 = r - q; 17 | int L[n1 + 1]; 18 | int R[n2 + 1]; 19 | for (i = 0; i < n1; i++) 20 | L[i] = a[p + i]; 21 | L[i] = 65536; 22 | for (j = 0; j < n2; j++) 23 | R[j] = a[q + j + 1]; 24 | R[j] = 65536; 25 | i = 0, j = 0; 26 | for (k = p; k <= r; k++) 27 | { 28 | if (L[i] <= R[j]) 29 | { 30 | a[k] = L[i]; 31 | i++; 32 | } 33 | else 34 | { 35 | a[k] = R[j]; 36 | j++; 37 | } 38 | } 39 | } 40 | //归并排序 41 | void MergeSort(int *a, int p, int r) 42 | { 43 | if (p < r) 44 | { 45 | int q = (p + r) / 2; 46 | MergeSort(a, p, q); 47 | MergeSort(a, q + 1, r); 48 | Merge(a, p, q, r); 49 | } 50 | } 51 | 52 | 53 | void debug(int *array, int len) 54 | { 55 | for (int i = 0; i < len; i++) 56 | printf("%d,", array[i]); 57 | printf("\n"); 58 | } 59 | 60 | int main(int argc, char *argv[]) 61 | { 62 | int array[1024]; 63 | int result[1024]; 64 | int len; 65 | int processors; 66 | int id; 67 | int group; 68 | int group_len; 69 | int mod; 70 | 71 | int i, index; 72 | // 读入数据部分 73 | FILE *fin; 74 | fin = fopen("input.txt", "r"); 75 | while (fscanf(fin, "%d", &array[i]) != EOF) 76 | i++; 77 | 78 | // 开始mpi 79 | MPI_Init(&argc, &argv); 80 | MPI_Comm_size(MPI_COMM_WORLD, &processors); 81 | MPI_Comm_rank(MPI_COMM_WORLD, &id); 82 | MPI_Status status; 83 | MPI_Request request; 84 | 85 | len = i; 86 | group = len / processors; 87 | mod = len % processors; 88 | 89 | int sample[processors * processors]; 90 | int pivot[processors]; 91 | 92 | // 均匀划分 局部排序 93 | if (id != processors - 1) { 94 | qsort(array + id * group, group, sizeof(int), compare); 95 | group_len = group; 96 | } 97 | else { 98 | qsort(array + id * group, group + mod, sizeof(int), compare); 99 | group_len = group+mod; 100 | } 101 | // 正则采样 102 | for (i = 0; i < processors; i++) 103 | sample[i + id * group / processors] = array[id * group + i * group / processors]; 104 | 105 | // 采样到的样本全部放到0号线程 106 | if (id == 0) 107 | for (i = 1; i < processors; i++) 108 | MPI_Recv(sample + processors * i, processors, MPI_INT, i, 100 + i, MPI_COMM_WORLD, &status); 109 | else 110 | MPI_Send(sample + id * group / processors, processors, MPI_INT, 0, 100 + id, MPI_COMM_WORLD); 111 | 112 | // 采样排序 113 | if (id == 0) 114 | { 115 | qsort(sample, processors * processors, sizeof(int), compare); 116 | for (i = 0; i < processors-1; i++) 117 | pivot[i] = sample[(i+1) * processors]; 118 | // 分发主元 119 | for(i = 1;i < processors; i++) 120 | MPI_Send(pivot, processors, MPI_INT, i, 110+i, MPI_COMM_WORLD); 121 | } 122 | 123 | MPI_Barrier(MPI_COMM_WORLD); 124 | 125 | // 接收主元 126 | if(id != 0) 127 | MPI_Recv(pivot, processors, MPI_INT, 0, 110+id, MPI_COMM_WORLD, &status); 128 | 129 | // 进行主元划分 130 | index = 0; 131 | int partionSize[processors]; 132 | for(i = 0;i < processors;i++) 133 | partionSize[i] = 0; 134 | 135 | for(i = 0; i < group_len;i++) { 136 | if(array[i+id*group] > pivot[index]) 137 | index++; 138 | if(index == processors) { 139 | partionSize[processors-1] = group_len -i+1; 140 | break; 141 | } 142 | partionSize[index]++; 143 | } 144 | 145 | // 广播分组 先发送各个段的长度 146 | int newSize[processors]; 147 | MPI_Alltoall(partionSize, 1, MPI_INT, newSize, 1, MPI_INT, MPI_COMM_WORLD); 148 | 149 | // 计算位移 150 | int sendIndex[processors]; 151 | int recvIndex[processors]; 152 | 153 | sendIndex[0] = 0; 154 | recvIndex[0] = 0; 155 | for(i = 1;i < processors;i++) { 156 | sendIndex[i] = sendIndex[i-1]+partionSize[i-1]; 157 | recvIndex[i] = recvIndex[i-1]+newSize[i-1]; 158 | } 159 | // 计算总长度 160 | int totalSize = 0; 161 | for(i = 0;i < processors;i++) 162 | totalSize += newSize[i]; 163 | 164 | 165 | // 每个处理器发送数据给其他所有处理器,且每个处理发送的数据长度都不同 166 | // 故有长度数组和位移数组 167 | MPI_Alltoallv(array+id*group, partionSize, sendIndex, MPI_INT, 168 | result, newSize, recvIndex, MPI_INT, MPI_COMM_WORLD); 169 | 170 | // 归并排序 171 | MergeSort(result, 0, totalSize-1); 172 | 173 | MPI_Gather(&totalSize, 1, MPI_INT, recvIndex, 1, MPI_INT, 0, MPI_COMM_WORLD); 174 | 175 | int recvDisp[processors]; 176 | recvDisp[0]; 177 | for(i = 1;i < processors;i++) 178 | recvDisp[i] = recvIndex[i-1] + recvDisp[i-1]; 179 | 180 | MPI_Gatherv(result, totalSize, MPI_INT, array, recvIndex, recvDisp, MPI_INT, 0, MPI_COMM_WORLD); 181 | 182 | if(id == 0) 183 | debug(array, len); 184 | 185 | MPI_Finalize(); 186 | return 0; 187 | } 188 | -------------------------------------------------------------------------------- /parallel02/input.txt: -------------------------------------------------------------------------------- 1 | 15 46 48 93 39 6 72 91 14 36 69 40 89 61 97 12 21 54 53 97 84 58 32 27 33 72 20 -------------------------------------------------------------------------------- /parallel02/pi.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]){ 6 | int my_rank, num_procs; 7 | int i, n = 0; 8 | double sum, width, local, mypi, pi; 9 | double start = 0.0, stop = 0.0; 10 | int proc_len; 11 | char processor_name[MPI_MAX_PROCESSOR_NAME]; 12 | 13 | MPI_Init(&argc, &argv); 14 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 16 | MPI_Get_processor_name(processor_name, &proc_len); 17 | printf("Process %d of %d\n", my_rank, num_procs); 18 | if(my_rank == 0){ 19 | printf("please give step number n:"); 20 | scanf("%d", &n); 21 | printf("step number is : %d\n", n); 22 | start = MPI_Wtime(); 23 | } 24 | // printf("Process %d of %d\n", my_rank, num_procs); 25 | 26 | MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); 27 | width = 1.0 / n; 28 | sum = 0.0; 29 | for(i = my_rank; i < n; i += num_procs){ 30 | local = width * ((double)i + 0.5); 31 | sum += 4.0 / (1.0 + local * local); 32 | } 33 | mypi = width * sum; 34 | MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, 35 | MPI_COMM_WORLD); 36 | if(my_rank == 0){ 37 | printf("PI is %.20f\n", pi); 38 | stop = MPI_Wtime(); 39 | printf("Time: %f on %s\n", stop-start, processor_name); 40 | fflush(stdout); 41 | } 42 | MPI_Finalize(); 43 | return 0; 44 | } -------------------------------------------------------------------------------- /parallel02/test.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/parallel02/test.c -------------------------------------------------------------------------------- /sort/enum_sort.c: -------------------------------------------------------------------------------- 1 | #include "sort.h" 2 | 3 | /** 4 | * @input 输入数组 a 5 | * @output 输出数组 b 6 | */ 7 | int main(int argc, char *argv[]) 8 | { 9 | int num_procs, id_procs; 10 | MPI_Status status; 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 14 | 15 | int *a, *b, *b2; 16 | a = (int*)malloc(num_procs*sizeof(int)); 17 | if (id_procs == 0) { 18 | b2 = (int*)malloc(num_procs*sizeof(int)); 19 | b = (int*)malloc(num_procs*sizeof(int)); 20 | random_array(b2, num_procs); 21 | memcpy(a, b2, num_procs*sizeof(int)); 22 | SURCLK(qsort(b2, num_procs, sizeof(int), compare);) 23 | } 24 | 25 | 26 | MPI_Bcast(a, num_procs, MPI_INT, 0, MPI_COMM_WORLD); 27 | int k = 0; 28 | for(int j = 0; j < num_procs; j++) { 29 | if (a[id_procs] > a[j] || (a[id_procs] == a[j] && id_procs > j)) 30 | k++; 31 | } 32 | 33 | 34 | if (id_procs == 0) { 35 | int recv; 36 | for(int i = 1; i < num_procs; i++) { 37 | MPI_Recv(&recv, 1, MPI_INT, i, 1, MPI_COMM_WORLD, &status); 38 | b[recv] = a[i]; 39 | } 40 | b[k] = a[0]; 41 | } else { 42 | MPI_Send(&k, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); 43 | } 44 | 45 | if (id_procs == 0) { 46 | if (check_array(b, b2, num_procs)) 47 | printf("Done.\n"); 48 | else 49 | printf("Error Occured!\n"); 50 | } 51 | 52 | free(a); 53 | if (id_procs == 0) { 54 | free(b2); 55 | free(b); 56 | } 57 | MPI_Finalize(); 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /sort/psrs_sort.c: -------------------------------------------------------------------------------- 1 | #include "sort.h" 2 | 3 | //Merge函数合并两个子数组形成单一的已排好序的字数组 4 | //并代替当前的子数组A[p..r] 5 | void merge(int *a, int p, int q, int r) { 6 | int i, j, k; 7 | int n1 = q - p + 1; 8 | int n2 = r - q; 9 | int L[n1 + 1]; 10 | int R[n2 + 1]; 11 | for (i = 0; i < n1; i++) 12 | L[i] = a[p + i]; 13 | L[i] = 65536; 14 | for (j = 0; j < n2; j++) 15 | R[j] = a[q + j + 1]; 16 | R[j] = 65536; 17 | i = 0, j = 0; 18 | for (k = p; k <= r; k++) { 19 | if (L[i] <= R[j]) { 20 | a[k] = L[i]; 21 | i++; 22 | } 23 | else { 24 | a[k] = R[j]; 25 | j++; 26 | } 27 | } 28 | } 29 | 30 | 31 | //归并排序 32 | void merge_sort(int *a, int p, int r) { 33 | if (p < r) { 34 | int q = (p + r) / 2; 35 | merge_sort(a, p, q); 36 | merge_sort(a, q + 1, r); 37 | merge(a, p, q, r); 38 | } 39 | } 40 | 41 | 42 | int main(int argc, char *argv[]) 43 | { 44 | int *array, *a; 45 | int *result, *ans; 46 | int n; 47 | int num_procs, id_procs; 48 | int group, group_len; 49 | int mod; 50 | 51 | int i, index; 52 | 53 | // 开始mpi 54 | MPI_Init(&argc, &argv); 55 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 56 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 57 | MPI_Status status; 58 | MPI_Request request; 59 | 60 | if (argc != 2) { 61 | fprintf(stderr, "Please input the num of array!\n"); 62 | return 1; 63 | } 64 | n = atoi(argv[1]); 65 | if (n <= 0) { 66 | fprintf(stderr, "Please, the size must > 0.\n"); 67 | return 1; 68 | } 69 | 70 | group = n / num_procs; 71 | mod = n % num_procs; 72 | a = (int *)malloc((group+mod)*sizeof(int)); 73 | 74 | if (id_procs == 0) { 75 | // 产生随机数据 76 | // 串行运行 得出结果 77 | array = (int*)malloc((n+2)*sizeof(int)); 78 | result = (int*)malloc((n+2)*sizeof(int)); 79 | ans = (int*)malloc((n+2)*sizeof(int)); 80 | random_array(array, n); 81 | memcpy(ans, array, (n)*sizeof(int)); 82 | qsort(ans, n, sizeof(int), compare); 83 | } 84 | 85 | // 分段数据 移交各个处理器 86 | int sendcounts[num_procs]; 87 | int sdispls[num_procs]; 88 | for(int i = 0; i < num_procs; i++) { 89 | sendcounts[i] = group; 90 | sdispls[i] = i*group; 91 | } 92 | sendcounts[num_procs-1] = group+mod; 93 | 94 | MPI_Scatterv(array, sendcounts, sdispls, MPI_INT, a, group+mod, MPI_INT, 0, MPI_COMM_WORLD); 95 | 96 | group_len = sendcounts[id_procs]; 97 | // 均匀划分 局部排序 98 | qsort(a, group_len, sizeof(int), compare); 99 | 100 | // 正则采样 101 | int samples[num_procs * num_procs]; 102 | int s[num_procs]; 103 | for (i = 1; i < num_procs; i++) { 104 | s[i-1] = a[i * group / num_procs]; 105 | } 106 | // 采到样本 收集到Proc#0 107 | MPI_Gather(s, num_procs-1, MPI_INT, samples, num_procs-1, MPI_INT, 0, MPI_COMM_WORLD); 108 | 109 | // 采样排序 110 | int pivot[num_procs]; 111 | if (id_procs == 0) { 112 | qsort(samples, (num_procs-1)*num_procs, sizeof(int), compare); 113 | for (i = 1; i < num_procs; i++) 114 | pivot[i-1] = samples[i*(num_procs-1)]; 115 | } 116 | // 广播主元 117 | MPI_Bcast(pivot, num_procs-1, MPI_INT, 0, MPI_COMM_WORLD); 118 | 119 | // 进行主元划分 120 | index = 0; 121 | int pcounts[num_procs]; 122 | for(i = 0; i < num_procs; i++) 123 | pcounts[i] = 0; 124 | pivot[num_procs-1] = INT32_MAX; 125 | 126 | for(i = 0; i < group_len; i++) { 127 | if(a[i] <= pivot[index]) 128 | pcounts[index]++; 129 | else { 130 | i--; 131 | index++; 132 | } 133 | } 134 | 135 | // 发送各个段的长度 136 | int rcounts[num_procs]; 137 | MPI_Alltoall(pcounts, 1, MPI_INT, rcounts, 1, MPI_INT, MPI_COMM_WORLD); 138 | 139 | // 计算位移 140 | int rdispls[num_procs]; 141 | sdispls[0] = 0; 142 | rdispls[0] = 0; 143 | for(i = 1;i < num_procs;i++) { 144 | sdispls[i] = sdispls[i-1] + pcounts[i-1]; 145 | rdispls[i] = rdispls[i-1] + rcounts[i-1]; 146 | } 147 | // 计算总长度 148 | int totalcounts = 0; 149 | for(i = 0; i < num_procs; i++) 150 | totalcounts += rcounts[i]; 151 | 152 | int *b = (int *)malloc(totalcounts*sizeof(int)); 153 | 154 | // 每个处理器发送数据给其他所有处理器,且每个处理发送的数据长度都不同 155 | // 故有长度数组和位移数组 156 | MPI_Alltoallv(a, pcounts, sdispls, MPI_INT, 157 | b, rcounts, rdispls, MPI_INT, MPI_COMM_WORLD); 158 | 159 | // 归并排序 160 | merge_sort(b, 0, totalcounts-1); 161 | 162 | // Proc#0 收集有序数组 163 | MPI_Gather(&totalcounts, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD); 164 | 165 | rdispls[0] = 0; 166 | for(i = 1; i < num_procs; i++) 167 | rdispls[i] = rdispls[i-1] + rcounts[i-1]; 168 | 169 | MPI_Gatherv(b, totalcounts, MPI_INT, result, rcounts, rdispls, MPI_INT, 0, MPI_COMM_WORLD); 170 | 171 | if(id_procs == 0) { 172 | if (check_array(result, ans, n)) 173 | printf("Done.\n"); 174 | free(ans); 175 | free(result); 176 | free(array); 177 | } 178 | if (b != NULL) { 179 | free(b); 180 | b = NULL; 181 | } 182 | if (a != NULL) { 183 | free(a); 184 | a = NULL; 185 | } 186 | 187 | MPI_Finalize(); 188 | return 0; 189 | } 190 | -------------------------------------------------------------------------------- /sort/quick_sort.c: -------------------------------------------------------------------------------- 1 | #include "sort.h" 2 | 3 | enum TagType { 4 | ORDER = 1, 5 | UNORDER, 6 | MMSG, 7 | LEN 8 | }; 9 | 10 | 11 | int getpow2(int n) { 12 | int ans = 0; 13 | while((n = n >> 1)) 14 | ans++; 15 | return ans; 16 | } 17 | 18 | 19 | int partition(int *a, int k, int l) 20 | { 21 | int pivof = a[l]; 22 | int temp; 23 | int i = k-1; 24 | for(int j = k; j < l; j++) { 25 | if (a[j] <= pivof) { 26 | i++; 27 | temp = a[i]; 28 | a[i] = a[j]; 29 | a[j] = temp; 30 | } 31 | } 32 | temp = a[i+1]; 33 | a[i+1] = a[l]; 34 | a[l] = temp; 35 | return i+1; 36 | } 37 | 38 | 39 | void do_sort(int *a, int i, int j) 40 | { 41 | if (i < j) { 42 | int r = partition(a, i, j); 43 | do_sort(a, i, r-1); 44 | do_sort(a, r+1, j); 45 | } 46 | } 47 | 48 | 49 | void quicksort(int *a, int beg, int end, int m, int id_procs) 50 | { 51 | int pivot; 52 | int dest; 53 | int len; 54 | MPI_Status status; 55 | if (beg < end) { 56 | if (m > 0) { 57 | dest = id_procs + (1 << (m-1)); 58 | pivot = partition(a, beg, end); 59 | len = end-(pivot+1)+1; 60 | m = m-1 > 0 ? m-1:0; 61 | // send info to Proc#id+2^m-1, ask it run latter array 62 | MPI_Send(&len, 1, MPI_INT, dest, LEN, MPI_COMM_WORLD); 63 | MPI_Send(&m, 1, MPI_INT, dest, MMSG, MPI_COMM_WORLD); 64 | MPI_Send(&a[pivot+1], len, MPI_INT, dest, UNORDER, MPI_COMM_WORLD); 65 | printf("Proc#%d send %d data to Proc#%d\n", id_procs, len, dest); 66 | 67 | // run former array local 68 | quicksort(a, beg, pivot-1, m, id_procs); 69 | 70 | // Get the order array from Proc#id+2^m-1 71 | if (len > 1) { 72 | int *b = (int *)malloc(len*sizeof(int)); 73 | MPI_Recv(b, len, MPI_INT, dest, ORDER, MPI_COMM_WORLD, &status); 74 | printf("Proc#%d recv %d data from Proc#%d\n", id_procs, len, dest); 75 | // Merge 76 | memcpy(&a[pivot+1], b, len*sizeof(int)); 77 | free(b); 78 | } 79 | 80 | if ((id_procs >> (m+1)) % 2) { 81 | dest = id_procs - (1 << (m+1)); 82 | len = end -beg +1; 83 | MPI_Send(a, len, MPI_INT, dest, ORDER, MPI_COMM_WORLD); 84 | printf("Proc#%d send %d data to Proc#%d\n", id_procs, len, dest); 85 | } 86 | } 87 | else { 88 | pivot = partition(a, beg, end); 89 | do_sort(a, beg, pivot-1); 90 | do_sort(a, pivot+1, end); 91 | 92 | // return the order array 93 | if (id_procs % 2) { 94 | dest = id_procs -1; 95 | len = end - beg +1; 96 | MPI_Send(a, len, MPI_INT, dest, ORDER, MPI_COMM_WORLD); 97 | printf("Proc#%d send %d data to Proc#%d\n", id_procs, len, dest); 98 | } 99 | } 100 | } 101 | } 102 | 103 | int main(int argc, char *argv[]) 104 | { 105 | int num_procs, id_procs; 106 | MPI_Status status; 107 | MPI_Init(&argc, &argv); 108 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 109 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 110 | 111 | if (argc != 2) { 112 | fprintf(stderr, "Please input the size of data array.\n"); 113 | return 1; 114 | } 115 | int n = atoi(argv[1]); 116 | if (n <= 0) { 117 | fprintf(stderr, "Please, the size must > 0.\n"); 118 | return 1; 119 | } 120 | 121 | int *a, *b; 122 | int m = getpow2(num_procs); 123 | int len; 124 | a = (int*)malloc((n+2)*sizeof(int)); 125 | if (id_procs == 0) { 126 | b = (int*)malloc((n+2)*sizeof(int)); 127 | random_array(a, n); 128 | memcpy(b, a, n*sizeof(int)); 129 | qsort(b, n, sizeof(int), compare); 130 | } 131 | 132 | 133 | if (id_procs != 0) { 134 | // None-0 Proc Get UnOrder Data from other 135 | MPI_Recv(&len, 1, MPI_INT, MPI_ANY_SOURCE, LEN, MPI_COMM_WORLD, &status); 136 | printf("Proc#%d Run!\n", id_procs); 137 | MPI_Recv(&m, 1, MPI_INT, MPI_ANY_SOURCE, MMSG, MPI_COMM_WORLD, &status); 138 | MPI_Recv(a, len, MPI_INT, MPI_ANY_SOURCE, UNORDER, MPI_COMM_WORLD, &status); 139 | quicksort(a, 0, len-1, m, id_procs); 140 | } else { 141 | quicksort(a, 0, n-1, m, id_procs); 142 | } 143 | 144 | if (id_procs == 0) { 145 | print_array(a, n); 146 | if (check_array(a, b, n)) 147 | printf("Done.\n"); 148 | else 149 | printf("Error Occured!\n"); 150 | } 151 | 152 | free(a); 153 | if (id_procs == 0) { 154 | free(b); 155 | } 156 | MPI_Finalize(); 157 | return 0; 158 | } 159 | -------------------------------------------------------------------------------- /sort/sort.h: -------------------------------------------------------------------------------- 1 | #ifndef _MPI_SORT_H 2 | #define _MPI_SORT_H 3 | 4 | #include "mpi.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define INDEX(i, j, n) (((i)*(n))+(j)) 11 | 12 | #define SURCLK(x) \ 13 | do { \ 14 | clock_t t1 = clock(); \ 15 | x \ 16 | clock_t t2 = clock(); \ 17 | printf("Time: %lf\n", (double)(t2-t1)/CLOCKS_PER_SEC); \ 18 | }while(0); 19 | 20 | 21 | void random_array(int *a, int num) { 22 | for(int i = 0; i < num; i++) { 23 | srand(clock()); 24 | a[i] = rand() % 400; 25 | } 26 | } 27 | 28 | void print_array(int *a, int num) { 29 | for(int i = 0; i < num; i++) { 30 | if (i % 20 == 0) 31 | printf("\n"); 32 | printf("%d ", a[i]); 33 | } 34 | printf("\n"); 35 | } 36 | 37 | int check_array(int *B, int *C, int num) { 38 | for(int i = 0; i < num; i++) { 39 | if (B[i] != C[i]) { 40 | printf("A[%d] = %d not %d!\n", i, B[i], C[i]); 41 | return 0; 42 | } 43 | } 44 | return 1; 45 | } 46 | 47 | int compare(const void *arg1, const void *arg2) { 48 | return *(int*)arg1 >= *(int*)arg2; 49 | } 50 | 51 | #endif // _MPI_SORT_H 52 | --------------------------------------------------------------------------------