├── .gitignore
├── PP_01
    ├── 1.c
    ├── 2.c
    ├── 3.c
    ├── 4.c
    ├── 5.c
    ├── Makefile
    ├── ex-1-2020.pdf
    ├── run.sh
    └── util.h
├── PP_02
    ├── 1.c
    ├── 2.c
    ├── 3.c
    ├── 4_1.c
    ├── 4_2.c
    ├── 5.c
    ├── 6.c
    ├── 7.c
    ├── 7_2.c
    ├── Makefile
    ├── config
    ├── mpi-lab.pdf
    ├── run.sh
    └── test.sh
├── README.md
├── example
    ├── mini-omp-demo
    │   ├── badloop.c
    │   ├── cri.c
    │   ├── critical.c
    │   ├── dis-err.c
    │   ├── dis-ok.c
    │   ├── dis-ok1.c
    │   ├── loopA1.c
    │   ├── loopA2.c
    │   ├── master.c
    │   ├── pfor-no-schedule.c
    │   ├── pfor.c
    │   ├── pi01.c
    │   ├── pi02.c
    │   ├── pi03.c
    │   ├── pi04.c
    │   ├── pip.c
    │   ├── pipe.c
    │   ├── private.c
    │   ├── reduction.c
    │   ├── section.c
    │   ├── single.c
    │   └── threadprivate.c
    ├── mpi
    │   ├── bigdata_sort.c
    │   ├── matmul.c
    │   ├── matmul1.c
    │   ├── max.c
    │   ├── mul.c
    │   ├── pi.c
    │   ├── quicksort.c
    │   ├── rece.c
    │   ├── sort.c
    │   └── sort_.c
    ├── mpi_demo
    │   ├── allgather
    │   ├── allgather.c
    │   ├── allgatherv
    │   ├── allgatherv.c
    │   ├── allreduce-int-sum
    │   ├── allreduce-int-sum.c
    │   ├── allreduce-vector-sum.c
    │   ├── alltoall
    │   ├── alltoall.c
    │   ├── alltoallv.c
    │   ├── bcast
    │   ├── bcast.c
    │   ├── gather
    │   ├── gather.c
    │   ├── gatherv.c
    │   ├── isend_irecv
    │   ├── isend_irecv.c
    │   ├── mpi-1.c
    │   ├── mpi_config
    │   ├── overview
    │   ├── overview.c
    │   ├── pack
    │   ├── pack.c
    │   ├── pack1
    │   ├── pack1.c
    │   ├── reduce-int-sum
    │   ├── reduce-int-sum.c
    │   ├── reduce-max.c
    │   ├── reduce-maxloc.c
    │   ├── reduce-minloc.c
    │   ├── reduce-user-complex
    │   ├── reduce-user-complex.c
    │   ├── reduce-user-matrix.c
    │   ├── reduce-vector-sum.c
    │   ├── ring
    │   ├── ring.c
    │   ├── scan.c
    │   ├── scanme.c
    │   ├── scatter.c
    │   ├── scatterv.c
    │   ├── type_struct.c
    │   ├── type_struct1
    │   ├── type_struct1.c
    │   ├── type_vector-1.c
    │   └── type_vector.c
    └── openmp
    │   ├── Makefile
    │   ├── copyin.c
    │   ├── critical.c
    │   ├── dynamic.c
    │   ├── firstprivate.c
    │   ├── for.c
    │   ├── fork_join.c
    │   ├── get_num_procs.c
    │   ├── get_thread_num.c
    │   ├── lastprivate.c
    │   ├── lock.c
    │   ├── omp_in_parallel.c
    │   ├── ordered.c
    │   ├── parallel.c
    │   ├── private.c
    │   ├── reduction.c
    │   ├── schedule.c
    │   ├── section.c
    │   ├── set_dynamic.c
    │   ├── set_num_threads.c
    │   ├── shared.c
    │   ├── size.c
    │   ├── test_lock.c
    │   └── threadprivate.c
├── matrix
    ├── cannon.c
    ├── fox.c
    ├── matrix.h
    └── tranpose.c
├── parallel01
    ├── PSRS.c
    ├── input.txt
    ├── merge
    ├── merge.c
    └── pi.c
├── parallel02
    ├── PSRS.c
    ├── input.txt
    ├── pi.c
    └── test.c
└── sort
    ├── enum_sort.c
    ├── psrs_sort.c
    ├── quick_sort.c
    └── sort.h


/.gitignore:
--------------------------------------------------------------------------------
1 | a.out
2 | .vscode/
3 | build/
4 | 


--------------------------------------------------------------------------------
/PP_01/1.c:
--------------------------------------------------------------------------------
  1 | #include "util.h"
  2 | #include <stdio.h>
  3 | 
  4 | int _1() {
  5 |     int A[256];
  6 |     int B[256];
  7 |     random_array(A, 256);
  8 |     copy_array(B, A, 256);
  9 |     omp_set_num_threads(9);
 10 | 
 11 |     clock_t start = clock();
 12 |     for(int i = 2; i <= 10; i++) {
 13 |         #pragma omp parallel for
 14 |         for (int j = 2; j <= 10; j++) {
 15 |             A[IN(i,j,16)] = 0.5*(A[IN(i-1,j-1,16)]+A[IN(i+1,j+1,16)]);
 16 |         }
 17 |     }
 18 |     clock_t end = clock();
 19 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 20 | 
 21 |     start = clock();
 22 |     for(int i = 2; i <= 10; i++) {
 23 |         for (int j = 2; j <= 10; j++) {
 24 |             B[IN(i,j,16)] = 0.5*(B[IN(i-1,j-1,16)]+B[IN(i+1,j+1,16)]);
 25 |         }
 26 |     }
 27 |     end = clock();
 28 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 29 | 
 30 |     return check_ans(A, B, 256);
 31 | }
 32 | 
 33 | int _2() {
 34 |     int A[50];
 35 |     int B[22];
 36 |     int C[50];
 37 |     random_array(A, 50);
 38 |     random_array(B, 50);
 39 |     copy_array(C, A, 50);
 40 |     omp_set_num_threads(8);
 41 | 
 42 |     clock_t start = clock();
 43 |     for(int k = 2; k <= 20; k+=2) {
 44 |         #pragma omp parallel for
 45 |         for(int i = k; i <= min(20,k+1); i++) {
 46 |             A[2*i+2] = A[2*i-2]+B[i];
 47 |         }
 48 |     }
 49 |     clock_t end = clock();
 50 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 51 | 
 52 |     start = clock();
 53 |     for(int i = 2; i <= 20; i++) {
 54 |         C[2*i+2] = C[2*i-2]+B[i];
 55 |     }
 56 |     end = clock();
 57 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 58 | 
 59 |     return check_ans(A, C, 50);
 60 | 
 61 | }
 62 | 
 63 | int _3() {
 64 |     int A[24];
 65 |     int B[24];
 66 |     int C[24];
 67 | 
 68 |     int B2[24];
 69 |     int C2[24];
 70 | 
 71 |     random_array(A, 24);
 72 |     random_array(B, 24);
 73 |     random_array(C, 24);
 74 |     copy_array(B2, B, 24);
 75 |     copy_array(C2, C, 24);
 76 | 
 77 |     omp_set_num_threads(8);
 78 | 
 79 |     int D[24];
 80 |     int share = 1;
 81 |     D[0] = 1;
 82 |     clock_t start = clock();
 83 |     // find the parallel block border
 84 |     #pragma omp parallel for shared(share)
 85 |     for(int i = 2; i <= 20; i++) {
 86 |         if (A[i] < 0 && A[i+1] > 0)
 87 |             D[share++] = i;
 88 |     }
 89 |     qsort(D, share, sizeof(int), cmp);
 90 |     D[share++] = 20;
 91 | 
 92 |     // Parallel in the Block, Serial the Block
 93 |     for(int j = 0; j < share-1; j++)
 94 |         #pragma omp parallel for
 95 |         for(int i = D[j]+1; i <= D[j+1]; i++) {
 96 |             if (A[i] > 0)
 97 |                 B[i] = C[i-1]+1;
 98 |             else
 99 |                 C[i] = B[i]-1;
100 |         }
101 |     clock_t end = clock();
102 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
103 | 
104 |     start = clock();
105 |     for(int i = 2; i <= 20; i++) {
106 |         if (A[i] > 0)
107 |             B2[i] = C2[i-1]+1;
108 |         else 
109 |             C2[i] = B2[i]-1;
110 |     }
111 |     end = clock();
112 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
113 | 
114 |     int ans = check_ans(B, B2, 24);
115 |     ans &= check_ans(C, C2, 24);
116 |     return ans;
117 | }
118 | 
119 | int main() {
120 |     if (!_1())
121 |         printf("Error! 1_1\n");
122 |     else 
123 |         printf("1_1 Done.\n");
124 | 
125 |     if (!_2())
126 |         printf("Error! 1_2\n");
127 |     else 
128 |         printf("1_2 Done.\n");
129 |     
130 |     if (!_3())
131 |         printf("Error! 1_3\n");
132 |     else 
133 |         printf("1_3 Done.\n");
134 | }


--------------------------------------------------------------------------------
/PP_01/2.c:
--------------------------------------------------------------------------------
  1 | #include "util.h"
  2 | #include <stdio.h>
  3 | 
  4 | #ifndef N
  5 | #define N 20
  6 | #endif
  7 | 
  8 | 
  9 | #ifndef M
 10 | #define M 20
 11 | #endif
 12 | 
 13 | int _1() {
 14 |     int n = (M+2)*N;
 15 |     int A[n];
 16 |     int B[n];
 17 |     int C = 2;
 18 |     
 19 |     random_array(A, n);
 20 |     copy_array(B, A, n);
 21 |     omp_set_num_threads(8);
 22 | 
 23 |     clock_t start = clock();
 24 |     for(int i = 1; i <= M; i++) {
 25 |         #pragma omp parallel for
 26 |         for (int j = 1; j <= N; j++) {
 27 |             A[IN(i+1,j+1,N)] = A[IN(i,j,N)]+C;
 28 |         }
 29 |     }
 30 |     clock_t end = clock();
 31 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 32 | 
 33 |     start = clock();
 34 |     for(int i = 1; i <= M; i++) {
 35 |         for (int j = 1; j <= N; j++) {
 36 |             B[IN(i+1,j+1,N)] = B[IN(i,j,N)]+C;
 37 |         }
 38 |     }
 39 |     end = clock();
 40 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 41 | 
 42 |     return check_ans(A, B, n);
 43 | }
 44 | 
 45 | int _2() {
 46 |     int X[101];
 47 |     int X2[101];
 48 |     int Y[201];
 49 |     int Y2[201];
 50 |     int B[101];
 51 |     int B2[101];
 52 |     int *A, *C;
 53 |     int *A2;
 54 | 
 55 |     int n = 110*110;
 56 |     A = malloc(n*sizeof(int));
 57 |     A2 = malloc(n*sizeof(int));
 58 |     C = malloc(n*sizeof(int));
 59 |     random_array(A, n);
 60 |     random_array(C, n);
 61 |     random_array(Y, 201);
 62 |     copy_array(A2, A, n);
 63 |     copy_array(Y2, Y, 201);
 64 | 
 65 |     omp_set_num_threads(10);
 66 |     clock_t start = clock();
 67 |     
 68 |     for(int i = 1; i <= 100; i++) {
 69 |         for(int j = 1; j <= 100; j++) {
 70 |             B[j] = A[IN(i,N,110)];
 71 |             #pragma omp parallel for
 72 |             for(int k = 1; k <= 100; k++) {
 73 |                 A[IN(j+1, k, 110)] = B[j] + C[IN(j, k, 110)];
 74 |             }
 75 |             Y[i+j] = A[IN(j+1, N, 110)];
 76 |         }
 77 |     }
 78 |     #pragma omp parallel for
 79 |     for(int i = 1; i <= 100; i++)
 80 |         X[i] = Y[i] + 10;
 81 | 
 82 |     clock_t end = clock();
 83 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 84 | 
 85 | 
 86 |     start = clock();
 87 |     for(int i = 1; i <= 100; i++) {
 88 |         X2[i] = Y2[i] + 10;
 89 |         for(int j = 1; j <= 100; j++) {
 90 |             B2[j] = A2[IN(i,N,110)];
 91 |             for(int k = 1; k <= 100; k++) {
 92 |                 A2[IN(j+1, k, 110)] = B2[j] + C[IN(j, k, 110)];
 93 |             }
 94 |             Y2[i+j] = A2[IN(j+1, N, 110)];
 95 |         }
 96 |     }
 97 |     end = clock();
 98 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 99 | 
100 |     free(A);
101 |     free(A2);
102 |     free(C);
103 |     return check_ans(X+1, X2+1, 100);
104 | }
105 | 
106 | int main()
107 | {
108 |     if (!_1())
109 |         printf("Error! 2_1\n");
110 |     else 
111 |         printf("2_1 Done.\n");
112 | 
113 |     if (!_2())
114 |         printf("Error! 2_2\n");
115 |     else 
116 |         printf("2_2 Done.\n");
117 | }
118 | 


--------------------------------------------------------------------------------
/PP_01/3.c:
--------------------------------------------------------------------------------
  1 | #include "util.h"
  2 | 
  3 | int _1() {
  4 |     int *A;
  5 |     int *B;
  6 |     int n = 310*110;
  7 |     A = malloc(n*sizeof(int));
  8 |     B = malloc(n*sizeof(int));
  9 |     random_array(A, n);
 10 |     copy_array(B, A, n);
 11 |     omp_set_num_threads(10);
 12 | 
 13 |     clock_t start = clock();
 14 |     for(int i = 1; i <= 100; i++) {
 15 |         for (int j = 1; j <= 50; j++) {
 16 |             A[IN(3*i+2, 2*j-1, 110)] = A[IN(5*j,i+3,110)]+2;
 17 |         }
 18 |     }
 19 |     clock_t end = clock();
 20 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 21 | 
 22 |     start = clock();
 23 |     for(int i = 1; i <= 100; i++) {
 24 |         for (int j = 1; j <= 50; j++) {
 25 |             B[IN(3*i+2, 2*j-1, 110)] = B[IN(5*j,i+3,110)]+2;
 26 |         }
 27 |     }
 28 |     end = clock();
 29 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 30 | 
 31 |     int ans = check_ans(A, B, n);
 32 |     free(A);
 33 |     free(B);
 34 |     return ans;
 35 | }
 36 | 
 37 | int _2() {
 38 |     int x;
 39 |     int y = rand() % 100 -50;
 40 |     int z = rand() % 100 -50;
 41 |     int z2 = z;
 42 |     int B[101];
 43 |     int A[101];
 44 |     int A2[101];
 45 |     int C[102];
 46 |     int C2[102];
 47 |     int *D;
 48 |     int *D2;
 49 | 
 50 |     int n = 101*51;
 51 |     D  = malloc(n*sizeof(int));
 52 |     D2 = malloc(n*sizeof(int));
 53 |     random_array(A, 101);
 54 |     random_array(B, 101);
 55 |     random_array(C, 102);
 56 |     random_array(D, n);
 57 |     copy_array(A2, A, 101);
 58 |     copy_array(C2, C, 102);
 59 |     copy_array(D2, D, n);
 60 | 
 61 |     omp_set_num_threads(5);
 62 | 
 63 |     clock_t start = clock();
 64 |     x = y * 2;
 65 |     #pragma omp parallel for
 66 |     for(int i = 1; i <= 100; i++) {
 67 |         C[i] = B[i] + x;
 68 |     }
 69 |     #pragma omp parallel for
 70 |     for(int i = 1; i <= 100; i++) {
 71 |         A[i] = C[i-1] + z;
 72 |         for(int j = 1; j <= 50; j++) {
 73 |             D[IN(i, j, 51)] = D[IN(i, j-1, 51)] + x;
 74 |         }
 75 |     }
 76 |     C[101] = A[100] * B[100];
 77 |     z = y + 4;
 78 |     clock_t end = clock();
 79 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 80 | 
 81 |     start = clock();
 82 |     x = y * 2;
 83 |     for(int i = 1; i <= 100; i++) {
 84 |         C2[i] = B[i] + x;
 85 |         A2[i] = C2[i-1] + z2;
 86 |         C2[i+1] = A2[i] * B[i];
 87 |         for(int j = 1; j <= 50; j++) {
 88 |             D2[IN(i, j, 51)] = D2[IN(i, j-1, 51)] + x;
 89 |         }
 90 |     }
 91 |     z2 = y + 4;
 92 |     end = clock();
 93 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 94 | 
 95 | 
 96 |     int ans = check_ans(A2, A, 101);
 97 |     ans &= check_ans(C2, C, 102);
 98 |     ans &= check_ans(D2, D, n);
 99 |     free(D);
100 |     free(D2);
101 |     return ans;
102 | }
103 | 
104 | int main() {
105 |     if (!_1())
106 |         printf("Error! 3_1\n");
107 |     else 
108 |         printf("3_1 Done.\n");
109 | 
110 |     if (!_2())
111 |         printf("Error! 3_2\n");
112 |     else 
113 |         printf("3_2 Done.\n");
114 | }


--------------------------------------------------------------------------------
/PP_01/4.c:
--------------------------------------------------------------------------------
 1 | #include "util.h"
 2 | 
 3 | int _1() {
 4 |     int A[128];
 5 |     int B[128];
 6 | 
 7 |     random_array(A, 128);
 8 |     copy_array(B, A, 128);
 9 | 
10 |     omp_set_num_threads(4);
11 |     clock_t start = clock();
12 |     // diagonal parallel 
13 |     for(int i = 4; i <= 20; i++) {
14 |         #pragma omp parallel for
15 |         for(int j = max(2, i-10); j <= min(i/2, 10); j++) {
16 |             A[IN(j, i-j, 11)] = (A[IN(j, i-j-1, 11)] + A[IN(j-1, i-j, 11)])*0.5;
17 |         }
18 |     }
19 |     clock_t end = clock();
20 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
21 | 
22 |     start = clock();
23 |     for(int i = 2; i <= 10; i++) {
24 |         for(int j = i; j <= 10; j++) {
25 |             B[IN(i, j, 11)] = (B[IN(i, j-1, 11)] + B[IN(i-1, j, 11)])*0.5;
26 |         }
27 |     }
28 |     end = clock();
29 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
30 | 
31 |     return check_ans(A, B, 128);
32 | }
33 | 
34 | int _2() {
35 |     int A[20];
36 |     int A2[20];
37 |     int B[20];
38 | 
39 |     random_array(A, 20);
40 |     random_array(B, 20);
41 |     copy_array(A2, A, 20);
42 | 
43 |     omp_set_num_threads(3);
44 |     clock_t start = clock();
45 |     for(int k = 1; k <= 16; k+=3) {
46 |         #pragma omp parallel for
47 |         for(int i = k; i <= min(16, k+2); i++) {
48 |             A2[i+3] = A2[i] + B[i];
49 |         }
50 |     }
51 |     clock_t end = clock();
52 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
53 | 
54 |     start = clock();
55 |     for(int i = 1; i <= 16; i++) {
56 |         A[i+3] = A[i] + B[i];
57 |     }
58 |     end = clock();
59 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
60 | 
61 |     return check_ans(A, A2, 20);
62 | }
63 | 
64 | 
65 | int main() {
66 |     if (!_1())
67 |         printf("Error! 4_1\n");
68 |     else 
69 |         printf("4_1 Done.\n");
70 | 
71 |     if (!_2())
72 |         printf("Error! 4_2\n");
73 |     else 
74 |         printf("4_2 Done.\n");
75 | 
76 |     printf("4_3 is the same as 4_2\n");
77 | }


--------------------------------------------------------------------------------
/PP_01/5.c:
--------------------------------------------------------------------------------
  1 | #include "util.h"
  2 | 
  3 | int _1() {
  4 |     float A[101];
  5 |     float B[101];
  6 |     float C[101];
  7 |     float D[101];
  8 |     float A2[101];
  9 |     float B2[101];
 10 |     float C2[101];
 11 |     float D2[101];
 12 | 
 13 |     random_array_f(A, 101);
 14 |     random_array_f(B, 101);
 15 |     random_array_f(C, 101);
 16 |     random_array_f(D, 101);
 17 |     copy_array_f(A2, A, 101);
 18 |     copy_array_f(B2, B, 101);
 19 |     copy_array_f(C2, C, 101);
 20 |     copy_array_f(D2, D, 101);
 21 | 
 22 |     omp_set_num_threads(10);
 23 | 
 24 |     clock_t start = clock();
 25 |     for(int i = 1; i <= 100; i++) {
 26 |         B[i] = C[i-1] * 2;
 27 |         C[i] = 1 / B[i];
 28 |     }
 29 |     #pragma omp parallel for
 30 |     for(int i = 1; i <= 100; i++) {
 31 |         A[i] = A[i] + B[i-1];
 32 |         D[i] = C[i] * C[i];
 33 |     }
 34 |     clock_t end = clock();
 35 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 36 | 
 37 |     start = clock();
 38 |     for(int i = 1; i <= 100; i++) {
 39 |         A2[i] = A2[i] + B2[i-1];
 40 |         B2[i] = C2[i-1] * 2;
 41 |         C2[i] = 1 / B2[i];
 42 |         D2[i] = C2[i] * C2[i];
 43 |     }
 44 |     end = clock();
 45 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 46 | 
 47 |     return check_ans_f(D, D2, 101);
 48 | }
 49 | 
 50 | int _2() {
 51 |     int *A;
 52 |     int *A2;
 53 |     int *B;
 54 |     int *C;
 55 |     int *D;
 56 |     int *D2;
 57 | 
 58 |     int n = 1000;
 59 |     A = malloc(n*sizeof(int));
 60 |     A2 = malloc(n*sizeof(int));
 61 |     B = malloc(n*sizeof(int));
 62 |     C = malloc(n*sizeof(int));
 63 |     D = malloc(n*sizeof(int));
 64 |     D2 = malloc(n*sizeof(int));
 65 |     random_array(A, n);
 66 |     random_array(B, n);
 67 |     random_array(C, n);
 68 |     copy_array(A2, A, n);
 69 | 
 70 |     omp_set_num_threads(5);
 71 | 
 72 |     clock_t start = clock();
 73 |     #pragma omp parallel for
 74 |     for(int i = 1; i <= 500; i++) {
 75 |         A[i] = B[i] + C[i];
 76 |         D[i] = (A[i] + A[1000-i]) / 2;
 77 |     }
 78 |     #pragma omp parallel for
 79 |     for(int i = 501; i <= 999; i++) {
 80 |         A[i] = B[i] + C[i];
 81 |         D[i] = (A[i] + A[1000-i]) / 2;
 82 |     }
 83 |     clock_t end = clock();
 84 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 85 | 
 86 |     start = clock();
 87 |     for(int i = 1; i <= 999; i++) {
 88 |         A2[i] = B[i] + C[i];
 89 |         D2[i] = (A2[i] + A2[1000-i]) / 2;
 90 |     }
 91 |     end = clock();
 92 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
 93 | 
 94 |     int ans = check_ans(D, D2, n);
 95 |     free(A);
 96 |     free(A2);
 97 |     free(B);
 98 |     free(C);
 99 |     free(D);
100 |     free(D2);
101 |     return ans;
102 | }
103 | 
104 | int _3() {
105 |     int *A;
106 |     int *A2;
107 |     int *C;
108 |     int *D;
109 |     int *D2;
110 | 
111 |     int n = 601 * 201;
112 |     A = malloc(n*sizeof(int));
113 |     A2 = malloc(n*sizeof(int));
114 |     C = malloc(101*101*sizeof(int));
115 |     D = malloc(101*101*sizeof(int));
116 |     D2 = malloc(101*101*sizeof(int));
117 | 
118 |     random_array(C, 101*101);
119 |     random_array(A, n);
120 |     copy_array(A2, A, n);
121 | 
122 |     omp_set_num_threads(5);
123 |     clock_t start = clock();
124 |     #pragma omp parallel for
125 |     for(int i = 1; i <= 100; i++) {
126 |         for(int j = 1; j <= 100; j++) {
127 |             A[IN(3*i+2*j+99, 2*j, 201)] = C[IN(i, j, 101)] * 2;
128 |             D[IN(i, j, 101)] = A[IN(i-j+99, i+j, 201)];
129 |         }
130 |     }
131 |     clock_t end = clock();
132 |     printf("OpenMP costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
133 | 
134 |     start = clock();
135 |     for(int i = 1; i <= 100; i++) {
136 |         for(int j = 1; j <= 100; j++) {
137 |             A2[IN(3*i+2*j+99, 2*j, 201)] = C[IN(i, j, 101)] * 2;
138 |             D2[IN(i, j, 101)] = A2[IN(i-j+99, i+j, 201)];
139 |         }
140 |     }
141 |     end = clock();
142 |     printf("C costs : %Lf\n", (long double)(end-start)/CLOCKS_PER_SEC);
143 | 
144 |     int ans = check_ans(D2, D, 101*101);
145 |     free(A);
146 |     free(A2);
147 |     free(C);
148 |     free(D);
149 |     free(D2);
150 |     return ans;
151 | }
152 | 
153 | int main() {
154 |     if (!_1())
155 |         printf("Error! 5_1\n");
156 |     else 
157 |         printf("5_1 Done.\n");
158 | 
159 |     if (!_2())
160 |         printf("Error! 5_2\n");
161 |     else 
162 |         printf("5_2 Done.\n");
163 |     
164 |     if (!_3())
165 |         printf("Error! 5_3\n");
166 |     else 
167 |         printf("5_3 Done.\n");
168 | }


--------------------------------------------------------------------------------
/PP_01/Makefile:
--------------------------------------------------------------------------------
 1 | CC=gcc
 2 | OPENMP=-fopenmp
 3 | SOURCES:=$(shell find $($(shell pwd)) -name '*.c')
 4 | OBJS=$(SOURCES:%.c=%)
 5 | 
 6 | 
 7 | all : $(OBJS)
 8 | 	@echo "编译中..."
 9 | 	@echo $(SOURCES)
10 | 	@echo "编译完成！"
11 | 	if [ ! -d "build" ]; then mkdir build; fi
12 | 	mv $(OBJS) build
13 | 	
14 | 
15 | %: %.c
16 | 	$(CC) $(OPENMP) $< -o $@
17 | 
18 | .PHONY: clean
19 | clean: 
20 | 	rm  -rf build/


--------------------------------------------------------------------------------
/PP_01/ex-1-2020.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/PP_01/ex-1-2020.pdf


--------------------------------------------------------------------------------
/PP_01/run.sh:
--------------------------------------------------------------------------------
1 | make 
2 | 
3 | for file in `ls build`
4 | do
5 |     ./build/${file}
6 | done
7 | 


--------------------------------------------------------------------------------
/PP_01/util.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <time.h>
 3 | #include <omp.h>
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | 
 7 | #define IN(i, j, LINE) ((i)*(LINE)+(j)) 
 8 | 
 9 | void random_array(int *a, int num) {
10 |     srand(time(NULL));
11 |     for(int i = 0; i < num; i++) {
12 |         a[i] = rand() % 100 - 50;
13 |     }
14 | }
15 | 
16 | void random_array_f(float *a, int num) {
17 |     srand(time(NULL));
18 |     for(int i = 0; i < num; i++) {
19 |         a[i] = (float)rand() / (RAND_MAX);
20 |     }
21 | }
22 | 
23 | void copy_array(int *dst, int *src, int num) {
24 |     for(int i = 0; i < num; i++) {
25 |         dst[i] = src[i];
26 |     }
27 | }
28 | 
29 | void copy_array_f(float *dst, float *src, int num) {
30 |     for(int i = 0; i < num; i++) {
31 |         dst[i] = src[i];
32 |     }
33 | }
34 | 
35 | int check_ans(int *a, int *b, int num) {
36 |     for(int i = 0; i < num; i++) {
37 |         if (a[i] != b[i])
38 |             return 0;
39 |     }
40 |     return 1;
41 | }
42 | 
43 | int check_ans_f(float *a, float *b, int num) {
44 |     for(int i = 0; i < num; i++) {
45 |         if (fabs(a[i]-b[i]) > 1e-2)
46 |             return 0;
47 |     }
48 |     return 1;
49 | }
50 | 
51 | void print(int *D, int num) {
52 |     for(int i = 0; i < num; i++)
53 |         printf("%d ", D[i]);
54 |     printf("\n");
55 | }
56 | 
57 | int min(int a, int b) {
58 |     return a > b ? b : a;
59 | }
60 | 
61 | int max(int a, int b) {
62 |     return a > b ? a : b;
63 | }
64 | 
65 | int cmp(const void *a , const void *b) {
66 |   return *(int *)a - *(int *)b;  //升序排序
67 | }
68 | //return *(int *)b - *(int *)a; //降序排序


--------------------------------------------------------------------------------
/PP_02/1.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 |     int id_procs, num_procs;
 9 |     int msg = 10;
10 |     int tag = 5;
11 |     char seq[16] = "Hello MPI!";
12 |     char seqin[16];
13 | 
14 |     MPI_Init(&argc, &argv);
15 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
16 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
17 | 
18 | 
19 |     int color = id_procs % 3;
20 |     int key = id_procs / 3;
21 |     MPI_Comm split_comm_world;
22 |     MPI_Status status;
23 |     int rank;
24 |     int size;
25 |     int msgin;
26 | 
27 |     // MPI split COMM_WORLD into 3 groups 
28 |     MPI_Comm_split(MPI_COMM_WORLD, color, key, &split_comm_world);
29 |     MPI_Comm_rank(split_comm_world, &rank);
30 |     MPI_Comm_size(split_comm_world, &size);
31 | 
32 |     // message to 0 proc of Each Comm
33 |     if (id_procs == 0) {
34 |         strcpy(seqin, seq);
35 |         for (int i = 1; i < 3; i++)
36 |             MPI_Send(&seq, 16, MPI_CHAR, i, tag, MPI_COMM_WORLD);
37 |     } else if (id_procs > 0 && id_procs < 3) {
38 |         MPI_Recv(&seqin, 16, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
39 |     }
40 | 
41 |     // Broadcast within the group
42 |     MPI_Bcast(&seqin, 16, MPI_CHAR, 0, split_comm_world);
43 | 
44 |     printf("MPI Comm rank %d, original id %d, size %d. the new msg is %s\n", rank, id_procs, size, seqin);
45 |     MPI_Finalize();
46 |     return 0;
47 | }


--------------------------------------------------------------------------------
/PP_02/2.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | void print(int id, int id_procs, int num_procs, int *recvbuf) {
 6 |     if (id_procs == id) {
 7 |         printf("recv buf %d : ", id_procs);
 8 |         for(int i = 0; i < num_procs; i++) {
 9 |             printf("%d, ", recvbuf[i]);
10 |         }
11 |         printf("\n");
12 |     }
13 | }
14 | 
15 | int main(int argc, char *argv[])
16 | {
17 |     int id_procs, num_procs;
18 |     MPI_Status status;
19 |     MPI_Request req;
20 | 
21 |     MPI_Init(&argc, &argv);
22 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
23 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
24 | 
25 |     int sendbuf[num_procs];
26 |     int recvbuf[num_procs];
27 |     for(int i = 0; i < num_procs; i++)
28 |         sendbuf[i] = 10*(i+1)+id_procs;
29 | 
30 |     for(int i = 0; i < num_procs; i++) {
31 |         if (i != id_procs) {
32 |             MPI_Send(sendbuf+i, 1, MPI_INT, i, id_procs, MPI_COMM_WORLD);
33 |             MPI_Recv(recvbuf+i, 1, MPI_INT, i, i, MPI_COMM_WORLD, &status);
34 |         }
35 |         else {
36 |             recvbuf[i] = sendbuf[i];
37 |         }
38 |     }
39 | 
40 |     MPI_Barrier(MPI_COMM_WORLD);
41 | 
42 |     // for(int i = 0; i < num_procs; i++)
43 |     //     print(i, id_procs, num_procs, recvbuf);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/PP_02/3.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | void print(int id, int id_procs, int num_procs, int *recvbuf) {
 6 |     if (id_procs == id) {
 7 |         printf("recv buf %d : ", id_procs);
 8 |         for(int i = 0; i < num_procs; i++) {
 9 |             printf("%d, ", recvbuf[i]);
10 |         }
11 |         printf("\n");
12 |     }
13 | }
14 | 
15 | int main(int argc, char *argv[])
16 | {
17 |     int id_procs, num_procs;
18 | 
19 |     MPI_Init(&argc, &argv);
20 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
21 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
22 | 
23 |     int sendbuf[num_procs];
24 |     int recvbuf[num_procs];
25 |     for(int i = 0; i < num_procs; i++)
26 |         sendbuf[i] = 10*(i+1)+id_procs;
27 | 
28 |     MPI_Alltoall(sendbuf, 1, MPI_INT, recvbuf, 1, MPI_INT, MPI_COMM_WORLD);
29 |     MPI_Barrier(MPI_COMM_WORLD);
30 | 
31 |     // for(int i = 0; i < num_procs; i++)
32 |     //     print(i, id_procs, num_procs, recvbuf);
33 | 
34 |     MPI_Finalize();
35 |     return 0;
36 | }


--------------------------------------------------------------------------------
/PP_02/4_1.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <time.h>
 4 | #include <stdlib.h>
 5 | 
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int id_procs, num_procs;
10 | 
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
14 | 
15 |     srand(clock());
16 |     int data = rand() % 100;
17 |     int recvdata;
18 |     MPI_Status status;
19 | 
20 |     printf("data = %d\n", data);
21 |     // Butterfly sum
22 |     // each loop is a layer
23 |     for(int i = 2; i <= num_procs; i <<= 1) {
24 |         int tag = i >> 1;
25 |         int dest = id_procs ^ tag;
26 |         MPI_Send(&data, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
27 |         MPI_Recv(&recvdata, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &status);
28 |         data += recvdata;
29 |     }
30 | 
31 |     printf("Sum is = %d\n", data);
32 | 
33 |     MPI_Finalize();
34 |     return 0;
35 | }
36 | 


--------------------------------------------------------------------------------
/PP_02/4_2.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <time.h>
 4 | #include <stdlib.h>
 5 | 
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int id_procs, num_procs;
10 | 
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
14 | 
15 |     srand(clock());
16 |     int data = rand() % 100;
17 |     int recvdata;
18 |     MPI_Status status;
19 | 
20 |     printf("data = %d\n", data);
21 |     // Binary Tree sum
22 |     // each loop is a layer
23 |     for(int i = 2; i <= num_procs; i <<= 1) {
24 |         int tag = i >> 1;
25 |         int diff = id_procs & tag;
26 |         if (diff) {
27 |             MPI_Send(&data, 1, MPI_INT, id_procs-tag, tag, MPI_COMM_WORLD);
28 |         } else {
29 |             MPI_Recv(&recvdata, 1, MPI_INT, id_procs+tag, tag, MPI_COMM_WORLD, &status);
30 |         }
31 |         data += recvdata;
32 |     }
33 | 
34 |     // now 0 process has the sum
35 |     for(int i = num_procs; i >= 2; i >>= 1) {
36 |         int tag = i;
37 |         if (id_procs % i == 0) {
38 |             MPI_Send(&data, 1, MPI_INT, id_procs+(i>>1), tag, MPI_COMM_WORLD);
39 |         } else if(id_procs % (i >> 1) == 0) {
40 |             MPI_Recv(&data, 1, MPI_INT, id_procs-(i>>1), tag, MPI_COMM_WORLD, &status);
41 |         }
42 |     }
43 |     printf("%d Sum is = %d\n", id_procs, data);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/PP_02/5.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <math.h>
  6 | #include <string.h>
  7 | 
  8 | 
  9 | #define INDEX(i, j, N) (((i)*(N))+(j))
 10 | 
 11 | void random_mat(int *a, int num) {
 12 |     for(int i = 0; i < num; i++) {
 13 |         srand(clock());
 14 |         for(int j = 0; j < num; j++) {
 15 |             a[INDEX(i, j, num)] = rand() % 100;
 16 |         }
 17 |     }
 18 | }
 19 | 
 20 | void print_mat(int *a, int num, int id) {
 21 |     for(int i = 0; i < num; i++) {
 22 |         for(int j = 0; j < num; j++) {
 23 |             printf("|%d :  %d ", id, a[INDEX(i, j, num)]);
 24 |         }
 25 |         printf("\n");
 26 |     }
 27 | }
 28 | 
 29 | void comp(int *A, int *B, int *C, int num) {
 30 |     for(int i = 0; i < num; i++) {
 31 |         for(int j = 0; j < num; j++) {
 32 |             for(int k = 0; k < num; k++)
 33 |                 C[INDEX(i, j, num)] += A[INDEX(i, k, num)] * B[INDEX(k, j, num)];
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | int check(int *C, int *nC, int num) {
 40 |     for(int i = 0; i < num; i++) {
 41 |         for(int j = 0; j < num; j++) {
 42 |             if (C[INDEX(i, j, num)] != nC[INDEX(i, j, num)]) {
 43 |                 printf("C[%d,%d] should be %d ,not %d\n", 
 44 |                     i,j,C[INDEX(i,j,num)],nC[INDEX(i,j,num)]);           
 45 |                 return 0;
 46 |             }
 47 |         }
 48 |     }
 49 |     return 1;
 50 | }
 51 | 
 52 | int main(int argc, char *argv[])
 53 | {
 54 |     int id_procs, num_procs;
 55 |     int blksize, sqrt_procs;
 56 | 
 57 |     MPI_Init(&argc, &argv);
 58 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 59 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 60 | 
 61 |     sqrt_procs = sqrt(num_procs);
 62 |     if (sqrt_procs * sqrt_procs != num_procs) {
 63 |         fprintf(stderr, "The Num of Proc must be Perfect square!\n");
 64 |         return 1;
 65 |     }
 66 |     if (argc != 2) {
 67 |         fprintf(stderr, "Please add a Parameter about the block size!\n");
 68 |         return 1;
 69 |     }
 70 |     blksize = atoi(argv[1]);
 71 |     
 72 |     // produce random data
 73 |     int *A, *B, *C, *ans;
 74 |     int *A_in, *B_in;
 75 |     int *sA, *sB, *sC;
 76 |     int N = blksize*sqrt_procs;
 77 | 
 78 |     if (id_procs == 0) {
 79 |         sA = (int*)malloc(N*N*sizeof(int));
 80 |         sB = (int*)malloc(N*N*sizeof(int));
 81 |         sC = (int*)malloc(N*N*sizeof(int));
 82 | 
 83 |         memset(sC, 0, N*N*sizeof(int));
 84 |         random_mat(sA, N);
 85 |         random_mat(sB, N);
 86 |         comp(sA, sB, sC, N);
 87 |     }
 88 |     A = (int*)malloc(blksize*blksize*sizeof(int));
 89 |     B = (int*)malloc(blksize*blksize*sizeof(int));
 90 |     C = (int*)malloc(blksize*blksize*sizeof(int));
 91 |     ans = (int*)malloc(blksize*blksize*sizeof(int));
 92 |     A_in = (int*)malloc(blksize*blksize*sizeof(int));
 93 |     B_in = (int*)malloc(blksize*blksize*sizeof(int));
 94 | 
 95 |     memset(C, 0, blksize*blksize*sizeof(int));
 96 | 
 97 |     MPI_Datatype SubMat, Mat;
 98 |     MPI_Status status;
 99 |     MPI_Request request;
100 |     MPI_Type_vector(blksize, blksize, N, MPI_INT, &SubMat);
101 |     MPI_Type_commit(&SubMat);
102 | 
103 |     MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat);
104 |     MPI_Type_commit(&Mat);
105 | 
106 |     if (id_procs == 0) {
107 |         for(int i = 0; i < sqrt_procs; i++) {
108 |             int lineoff = blksize * N * i;
109 |             for(int j = 0; j < sqrt_procs; j++) {
110 |                 if (i == 0 && j == 0) {
111 |                     // 分发矩阵 A
112 |                     MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
113 |                     MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
114 |                     MPI_Wait(&request, &status);
115 |                     // 分发矩阵 B
116 |                     MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request);
117 |                     MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request);
118 |                     MPI_Wait(&request, &status);
119 |                     continue;
120 |                 }
121 |                 int offset = j * blksize + lineoff;
122 |                 MPI_Send(sA+offset, 1, SubMat, i*sqrt_procs+j, 0, MPI_COMM_WORLD);
123 |                 MPI_Send(sB+offset, 1, SubMat, i*sqrt_procs+j, 1, MPI_COMM_WORLD);
124 |             }
125 |         }
126 |     } else {
127 |         MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status);
128 |         // print_mat(A, blksize, id_procs);
129 |         MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status);
130 |         // print_mat(B, blksize, id_procs);
131 |     }
132 | 
133 |     MPI_Comm row_comm, col_comm;
134 |     int rank_A, size_A;
135 |     int color_A;
136 |     int key_A;
137 | 
138 |     int rank_B, size_B;
139 |     int color_B;
140 |     int key_B;
141 | 
142 |     // Comm Group by row
143 |     key_A = id_procs % sqrt_procs;
144 |     color_A = id_procs / sqrt_procs;
145 |     MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm);
146 |     MPI_Comm_rank(row_comm, &rank_A);
147 |     MPI_Comm_size(row_comm, &size_A);
148 | 
149 |     // Comm Group by B
150 |     key_B = id_procs / sqrt_procs;
151 |     color_B = id_procs % sqrt_procs;
152 |     MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm);
153 |     MPI_Comm_rank(col_comm, &rank_B);
154 |     MPI_Comm_size(col_comm, &size_B);
155 | 
156 |     for(int k = 0; k < sqrt_procs; k++) {
157 |         if (rank_A == (color_A+k)%size_A) {
158 |             memcpy(A_in, A, blksize*blksize*sizeof(int));
159 |         }
160 |         // broadcast Ai,j
161 |         MPI_Bcast(A_in, 1, Mat, (color_A+k)%size_A, row_comm);
162 | 
163 |         // compute
164 |         comp(A_in, B, C, blksize);
165 | 
166 |         int dest = (rank_B-1 + size_B)%size_B;
167 |         MPI_Send(B, 1, Mat, dest, 0, col_comm);
168 |         MPI_Recv(B_in, 1, Mat, (rank_B+1)%size_B, 0, col_comm, &status);
169 |         memcpy(B, B_in, blksize*blksize*sizeof(int));
170 |     }
171 | 
172 | 
173 |     // 分发结果 自行比较
174 |     if (id_procs == 0) {
175 |         for(int i = 0; i < sqrt_procs; i++) {
176 |             int lineoff = blksize * N * i;
177 |             for(int j = 0; j < sqrt_procs; j++) {
178 |                 if (i == 0 && j == 0) {
179 |                     // 分发矩阵 A
180 |                     MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
181 |                     MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
182 |                     MPI_Wait(&request, &status);
183 |                     continue;
184 |                 }
185 |                 int offset = j * blksize + lineoff;
186 |                 MPI_Send(sC+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD);
187 |             }
188 |         }
189 |     } else {
190 |         MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status);
191 |     }
192 | 
193 |     // print_mat(ans, blksize, id_procs);
194 | 
195 |     if (check(C, ans, blksize)) {
196 |         printf("Proc#%d Done.\n", id_procs);
197 |     }
198 | 
199 |     // print_mat(C, blksize, id_procs);
200 | 
201 |     free(A);
202 |     free(B);
203 |     free(C);
204 |     free(ans);
205 |     free(A_in);
206 |     free(B_in);
207 | 
208 |     if (id_procs == 0) {
209 |         free(sA);
210 |         free(sB);
211 |         free(sC);
212 |     }
213 |     MPI_Finalize();
214 |     return 0;
215 | }
216 | 


--------------------------------------------------------------------------------
/PP_02/6.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <time.h>
 5 | 
 6 | #ifndef PNum
 7 | #define PNum 2
 8 | #endif
 9 | 
10 | enum MsgTag {
11 |     RANDATA,
12 |     AVERAGE
13 | };
14 | 
15 | void work(int id) {
16 |     int randata;
17 |     int recvdata;
18 |     MPI_Status status;
19 | 
20 |     while(1) {
21 |         srand(time(NULL)+id);
22 |         randata = rand() % 100;
23 |         MPI_Send(&randata, 1, MPI_INT, id % PNum, RANDATA, MPI_COMM_WORLD);
24 |         MPI_Recv(&recvdata, 1, MPI_INT, id % PNum, AVERAGE, MPI_COMM_WORLD, &status);
25 |         printf("Proc#%d receive average data = %d\n", id, recvdata);
26 |     }
27 | }
28 | 
29 | void serve(int id, int num) {
30 |     int input[num];
31 |     int average;
32 |     int sum, ctn;
33 |     MPI_Status status;
34 | 
35 |     while (1) {
36 |         sum = 0;
37 |         ctn = 0;
38 |         for(int i = 1; i*PNum+id<num; i++) {
39 |             MPI_Recv(input+i, 1, MPI_INT, i*PNum+id, RANDATA, MPI_COMM_WORLD, &status);
40 |             ctn++;
41 |         }
42 |         printf("Proc#%d, input data = ", id);
43 |         for(int i = 1; i <= ctn; i++) {
44 |             sum += input[i];
45 |             printf("%d, ", input[i]);
46 |         }
47 |         average = sum / ctn;
48 |         printf("Proc#%d send average data = %d\n", id, average);
49 |         for(int i = 1; i <= ctn; i++) {
50 |             MPI_Send(&average, 1, MPI_INT, i*PNum+id, AVERAGE, MPI_COMM_WORLD);
51 |         }
52 |     }
53 |     
54 | }
55 | 
56 | int main(int argc, char *argv[])
57 | {
58 |     int id_procs, num_procs;
59 | 
60 |     MPI_Init(&argc, &argv);
61 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
62 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
63 | 
64 |     int P, Q;
65 |     P = PNum;
66 |     Q = num_procs - P;
67 |     
68 |     if (id_procs > P-1) {
69 |         work(id_procs);
70 |     }
71 |     else {
72 |         serve(id_procs, num_procs);
73 |     }
74 | 
75 |     MPI_Finalize();
76 |     return 0;
77 | }
78 | 


--------------------------------------------------------------------------------
/PP_02/7.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <math.h>
  6 | 
  7 | #ifndef N
  8 | #define N 50
  9 | #endif
 10 | 
 11 | #define INDEX(i, j) (((i)*N)+(j))
 12 | 
 13 | 
 14 | void random_array(double *a, int num) {
 15 |     for(int i = 0; i < num; i++) {
 16 |         srand(time(NULL));
 17 |         a[i] = rand() % 100;
 18 |     }
 19 | }
 20 | 
 21 | void comp(double *A, double *B, int num) {
 22 |     for(int i = 1; i < N-1; i++) {
 23 |         for(int j = 1; j < N-1; j++) {
 24 |             B[INDEX(i, j)] = (A[INDEX(i-1, j)]+A[INDEX(i, j+1)]+A[INDEX(i+1, j)]+A[INDEX(i, j-1)]) / 4.0;
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | int check(double *B, double *C) {
 30 |     for(int i = 1; i < N-1; i++) {
 31 |         for(int j = 1; j < N-1; j++) {
 32 |             if (fabs(B[INDEX(i, j)]-C[INDEX(i, j)]) >= 1e-2) {
 33 |                 printf("B[%d,%d] = %lf not %lf!\n", i, j, B[INDEX(i, j)], C[INDEX(i, j)]);
 34 |                 return 0;
 35 |             }
 36 |         }
 37 |     }
 38 |     return 1;
 39 | }
 40 | 
 41 | int main(int argc, char *argv[]) {
 42 |     double *A, *B, *B2;
 43 |     A = (double*)malloc(N*N*sizeof(double));
 44 |     B = (double*)malloc(N*N*sizeof(double));
 45 |     B2= (double*)malloc(N*N*sizeof(double));
 46 | 
 47 |     int id_procs, num_procs, num_1;
 48 |     MPI_Status status;
 49 |     MPI_Init(&argc, &argv);
 50 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 51 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 52 | 
 53 |     num_1 = num_procs -1;
 54 |     // Proc#N-1 randomize the data
 55 |     if (id_procs == num_1) {
 56 |         random_array(A, N*N);
 57 |         comp(A, B2, N*N);
 58 |     }
 59 | 
 60 |     MPI_Barrier(MPI_COMM_WORLD);
 61 | 
 62 |     // Proc#N-1 broadcast 3 lines of A to each Proc
 63 |     int ctn = 0;
 64 |     for(int i = 0; i < N-2; i++) {
 65 |         if (id_procs == num_1) {
 66 |             int dest = i % num_1;
 67 |             int tag = i / num_1;
 68 |             MPI_Send(&A[INDEX(i, 0)], N*3, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD);
 69 |         }
 70 |     }
 71 | 
 72 |     for(int i = 0; i < (N-2)/num_1; i++) {
 73 |         if (id_procs != num_1) {
 74 |             MPI_Recv(&A[INDEX(3*ctn, 0)], 3*N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status);
 75 |             ctn++;
 76 |         }
 77 |     }
 78 |     if (id_procs < (N-2) % num_1) {
 79 |         MPI_Recv(&A[INDEX(ctn*3, 0)], 3*N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status);
 80 |         ctn++;
 81 |     }
 82 | 
 83 |     // compute
 84 |     if (id_procs != num_1) {
 85 |         for(int i = 1; i <= ctn; i++) {
 86 |             for(int j = 1; j < N-1; j++) {
 87 |                 B[INDEX(i, j)] = (A[INDEX(i-1, j)]+A[INDEX(i, j+1)]+A[INDEX(i+1, j)]+A[INDEX(i, j-1)]) / 4.0;
 88 |             }
 89 |         }
 90 |     }
 91 | 
 92 |     // Gather
 93 |     for(int i = 0; i < N-2; i++) {
 94 |         if (id_procs == num_1) {
 95 |             int src = i % num_1;
 96 |             MPI_Recv(&B[INDEX(i+1, 1)], N-2, MPI_DOUBLE, src, i/num_1+N, MPI_COMM_WORLD, &status);
 97 |         }
 98 |         else {
 99 |             for(int j = 0; j < ctn; j++)
100 |                 MPI_Send(&B[INDEX(j+1, 1)], N-2, MPI_DOUBLE, num_1, j+N, MPI_COMM_WORLD);
101 |         }
102 |     }
103 | 
104 | 
105 |     if (id_procs == num_1) {
106 |         if(check(B, B2)) {
107 |             printf("Done.No Error\n");
108 |         } else {
109 |             printf("Error Occured!\n");
110 |         }
111 |     }
112 |     free(A);
113 |     free(B);
114 |     free(B2);
115 |     MPI_Finalize();
116 |     return 0;
117 | }
118 | 


--------------------------------------------------------------------------------
/PP_02/7_2.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <math.h>
  6 | 
  7 | #ifndef N
  8 | #define N 50
  9 | #endif
 10 | 
 11 | #define INDEX(i, j) (((i)*N)+(j))
 12 | 
 13 | void random_array(double *a, int num) {
 14 |     for(int i = 0; i < num; i++) {
 15 |         srand(time(NULL));
 16 |         a[i] = rand() % 100;
 17 |     }
 18 | }
 19 | 
 20 | void comp(double *A, double *B, int a, int b) {
 21 |     for(int i = 1; i <= a; i++) {
 22 |         for(int j = 1; j <= b; j++) {
 23 |             B[INDEX(i, j)] = (A[INDEX(i-1, j)]+A[INDEX(i, j+1)]+A[INDEX(i+1, j)]+A[INDEX(i, j-1)]) / 4.0;
 24 |         }
 25 |     }
 26 | }
 27 | 
 28 | int check(double *B, double *C) {
 29 |     for(int i = 1; i < N-1; i++) {
 30 |         for(int j = 1; j < N-1; j++) {
 31 |             if (fabs(B[INDEX(i, j)]-C[INDEX(i, j)]) >= 1e-2) {
 32 |                 printf("B[%d,%d] = %lf not %lf!\n", i, j, B[INDEX(i, j)], C[INDEX(i, j)]);
 33 |                 return 0;
 34 |             }
 35 |         }
 36 |     }
 37 |     return 1;
 38 | }
 39 | 
 40 | 
 41 | int main(int argc, char *argv[])
 42 | {
 43 |     double *A, *B, *B2;
 44 |     
 45 |     int id_procs, num_procs;
 46 |     MPI_Status status;
 47 |     MPI_Init(&argc, &argv);
 48 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 49 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 50 | 
 51 |     MPI_Datatype SubMat;
 52 |     int rows = sqrt(num_procs);
 53 |     int cols = num_procs / rows;
 54 |     int a = (N-2 + rows-1) / rows;
 55 |     int b = (N-2 + cols-1) / cols;
 56 |     int alloc_num = (a+1)*(b+1)*num_procs;
 57 |     A = (double*)malloc(alloc_num*sizeof(double));
 58 |     B = (double*)malloc(alloc_num*sizeof(double));
 59 |     B2= (double*)malloc(alloc_num*sizeof(double));
 60 | 
 61 |     // Proc#0 randomize the data
 62 |     if (id_procs == 0) {
 63 |         random_array(A, N*N);
 64 |         comp(A, B2, N-2, N-2);
 65 |     }
 66 | 
 67 |     MPI_Barrier(MPI_COMM_WORLD);
 68 | 
 69 |     // Proc#0 broadcast (a+2)x(b+2) mat
 70 |     MPI_Type_vector(a+2, b+2, N, MPI_DOUBLE, &SubMat);
 71 |     MPI_Type_commit(&SubMat);
 72 | 
 73 |     if (id_procs == 0) {
 74 |         for(int i = 0; i < rows; i++) {
 75 |             for(int j = 0; j < cols; j++) {
 76 |                 if (i == 0 && j == 0)
 77 |                     continue;
 78 |                 MPI_Send(A+i*a*N+b*j, 1, SubMat, j+cols*i, 0, MPI_COMM_WORLD);
 79 |             }
 80 |         }
 81 |     }
 82 |     else {
 83 |         MPI_Recv(A, 1, SubMat, 0, 0, MPI_COMM_WORLD, &status);
 84 |     }
 85 | 
 86 |     // compute
 87 |     comp(A, B, a, b);
 88 | 
 89 |     // Gather result
 90 |     MPI_Datatype SubMat_B;
 91 |     MPI_Type_vector(a, b, N, MPI_DOUBLE, &SubMat_B);
 92 |     MPI_Type_commit(&SubMat_B);
 93 |     if (id_procs == 0) {
 94 |         for(int i = 0; i < rows; i++) {
 95 |             for(int j = 0; j < cols; j++) {
 96 |                 if (i == 0 && j == 0)
 97 |                     continue;
 98 |                 MPI_Recv(&B[INDEX(a*i+1, b*j+1)], 1, SubMat_B, i*cols+j, 1, MPI_COMM_WORLD, &status);
 99 |             }
100 |         }
101 |     } else {
102 |         int x = id_procs / cols;
103 |         int y = id_procs % cols;
104 |         MPI_Send(&B[INDEX(1, 1)], 1, SubMat_B, 0, 1, MPI_COMM_WORLD);
105 |     }
106 | 
107 |     if (id_procs == 0)
108 |         if (check(B, B2)) {
109 |             printf("Done.No Error\n");
110 |         } else {
111 |             printf("Error!\n");
112 |         }
113 | 
114 |     free(A);
115 |     free(B);
116 |     free(B2);
117 |     MPI_Finalize();
118 |     return 0;
119 |     return 0;
120 | }
121 | 


--------------------------------------------------------------------------------
/PP_02/Makefile:
--------------------------------------------------------------------------------
 1 | CC=mpicc
 2 | OPENMP=
 3 | SOURCES:=$(shell find $(.) -name '*.c')
 4 | LIB=-lm
 5 | OBJS=$(SOURCES:%.c=%)
 6 | 
 7 | 
 8 | all : $(OBJS)
 9 | 	@echo $(SOURCES)
10 | 	@echo "编译完成"
11 | 	if [ ! -d "build" ]; then mkdir build; fi
12 | 	mv $(OBJS) build
13 | 
14 | %: %.c
15 | 	$(CC) $(OPENMP) $< $(LIB) -o $@
16 | 
17 | .PHONY: clean
18 | clean: 
19 | 	rm -rf build


--------------------------------------------------------------------------------
/PP_02/config:
--------------------------------------------------------------------------------
1 | node1:4
2 | node2:4
3 | node3:4


--------------------------------------------------------------------------------
/PP_02/mpi-lab.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/PP_02/mpi-lab.pdf


--------------------------------------------------------------------------------
/PP_02/run.sh:
--------------------------------------------------------------------------------
1 | make 
2 | 
3 | for file in `ls build`
4 | do
5 |     echo "*********************"
6 |     echo "Lab02 " ${file} " program :"
7 |     time mpiexec -n 8 ./build/${file}
8 | done
9 | 


--------------------------------------------------------------------------------
/PP_02/test.sh:
--------------------------------------------------------------------------------
1 | for i in $(seq 2 16)
2 | do
3 |   time -o report2_${i} mpiexec -n ${i} build/2
4 |   time -o report3_${i} mpiexec -n ${i} build/3
5 | done


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 并行计算程序设计代码库
 2 | 
 3 | - 包含有 OpenMP 和 MPI 等多线程的编程实例
 4 |   - 详见example
 5 | - 相关课程上机实验
 6 |   - PP_01 第一次实验 OpenMp
 7 |   - PP_02 第二次实验 MPI
 8 | - 《并行算法实践》中部分章节的代码实现
 9 |   - sort     第十三章 排序
10 |   - matrix   第十八章 矩阵运算
11 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/badloop.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | DESCRIPTION:
 4 | 		Parallelizing an inner loop with dependences
 5 | 		Backward dependency
 6 | 
 7 | 		 
 8 | 			for (i=0; i<VSIZE-1; i++) {
 9 | 				V[i] = ( V[i] + V[i+1] ) / 2;
10 | 			}
11 | 		 
12 | 
13 | 		Method: Try direct parallelization with PARALLEL FOR
14 | 		Result: NON-CORRECT!!!
15 | 
16 | */
17 | 
18 | #include<stdio.h>
19 | #include<stdlib.h>
20 | #include<omp.h>
21 | 
22 | #define NUM_THREADS 4 
23 | 
24 | #define VSIZE  100
25 | 
26 | void main()
27 | {  
28 |    int V[ VSIZE+1 ],i,U[VSIZE+1];
29 |    for (i=0; i<VSIZE+1; i++) {
30 | 	V[i]=  i ;
31 |         U[i] = i ;
32 | 	}
33 |    for(i=0;i<VSIZE;i++) U[i] = U[i] + U[i+1];
34 | 
35 |    printf("Before Parallel Loop\n");
36 | /*   for( i = 0 ; i<10;i++) printf("V[%d]=%d ",i,V[i]); */
37 |    printf("\n");   
38 | 
39 |    omp_set_num_threads(NUM_THREADS);
40 | 
41 | #pragma omp parallel for default(none) shared(V) private(i) schedule(static)
42 | 	for (i=0; i<VSIZE; i++) {
43 | 		V[i] = V[i] + V[i+1];
44 | 		}
45 | 
46 |    printf("After Parallel Loop\n");	
47 | /*   for( i = 0 ; i<10;i++) printf("V[%d]=%d ",i,V[i]); */
48 |    printf("\n");
49 | 
50 |    for( i=0;i<VSIZE;i++)
51 |      if ( V[i] != U[i] ) printf("Found V[%d] error\n", i);
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/cri.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | main()
 3 | {
 4 | 	int x;
 5 |         int i,id; 
 6 | 	x = 0;
 7 | 	#pragma omp parallel shared(x) private(i,id) 
 8 | 	    { 
 9 | //		#pragma omp critical
10 | 		{	          
11 | 	    	  id = omp_get_thread_num();
12 |     	          printf("before thread %d : X = %d\n",id,x);
13 |  		  for(i=0;i<3000000;i++) x = x + 1;
14 | 	          printf("after  thread %d : X = %d\n",id,x);
15 | 
16 | 		}
17 | 	}  /* end of parallel section */
18 | 		
19 | 	printf("out of the parallel region : X = %d\n",x);		
20 | } 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/critical.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | main()
 3 | {
 4 | 	int x;
 5 | 	x = 0;
 6 | 	#pragma omp parallel shared(x) 
 7 | 	    {  
 8 | 	    #pragma omp critical   
 9 | 		   	x = x + 1;  
10 | 	}  /* end of parallel section */
11 | 		
12 | 	printf("out of the parallel region : X = %d\n",x);		
13 | } 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/dis-err.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #include <stdio.h>
 3 | 
 4 | /*
 5 | 
 6 |   for(i=4;i<100;i++){               
 7 |        a[i] = b[i-2] + 1;
 8 |        c[i] = b[i-1] + f[i];  
 9 |        b[i] = a[i-1] + 2;    
10 |        d[i] = d[i+1] + b[i-1];     
11 |   }
12 |   
13 | */
14 | 
15 | #define Iter 100000
16 | 
17 | main()
18 | {
19 |   int i;
20 | 
21 |   int a[Iter],b[Iter],c[Iter],d[Iter],f[Iter];
22 |   int a1[Iter],b1[Iter],c1[Iter],d1[Iter],f1[Iter];
23 | 
24 | 
25 |   for(i=0;i<Iter;i++) a[i]=b[i]=c[i]=d[i]=f[i]=i;
26 |   for(i=0;i<Iter;i++) a1[i]=b1[i]=c1[i]=d1[i]=f1[i]=i;
27 |   
28 | 
29 |   for(i=4;i<Iter;i++){
30 |      a1[i] = b1[i-2] + 1;
31 |      c1[i] = b1[i-1] + f1[i];
32 |      b1[i] = a1[i-1] + 2;
33 |      d1[i] = d1[i+1] + b1[i-1];
34 |   }
35 | 
36 | #pragma omp parallel for shared(a,b,c,d,f) private(i)   
37 |   for(i=4;i<Iter;i++){ 
38 |      a[i] = b[i-2] + 1;
39 |      c[i] = b[i-1] + f[i];
40 |      b[i] = a[i-1] + 2;
41 |      d[i] = d[i+1] + b[i-1];
42 |   }
43 |   
44 | 
45 | for(i=4;i<Iter;i++) {
46 | 
47 |   if ( a[i]!=a1[i]) printf("a[%d] = %d , a1[%d] = %d\n",i,a[i],i,a1[i]);
48 |   if ( b[i]!=b1[i]) printf("b[%d] = %d , b1[%d] = %d\n",i,b[i],i,b1[i]); 
49 |   if ( c[i]!=c1[i]) printf("c[%d] = %d , c1[%d] = %d\n",i,c[i],i,c1[i]);
50 |   if ( d[i]!=d1[i]) printf("d[%d] = %d , d1[%d] = %d\n\n\n",i,d[i],i,d1[i]);
51 |   
52 | 
53 | }  
54 |   
55 |   
56 |   
57 |  return 0;  
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/dis-ok.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #include <stdio.h>
 3 | 
 4 | /*
 5 | 
 6 |   for(i=4;i<100;i++){               
 7 | S1:       a[i] = b[i-2] + 1;
 8 | S2:       c[i] = b[i-1] + f[i];  
 9 | S3:       b[i] = a[i-1] + 2;    
10 | S4:       d[i] = d[i+1] + b[i-1];     
11 |   }
12 |   
13 | */
14 | 
15 | #define Iter 10000
16 | 
17 | main()
18 | {
19 |   int i;
20 |   int a[Iter],b[Iter],c[Iter],d[Iter],f[Iter];
21 |   int a1[Iter],b1[Iter],c1[Iter],d1[Iter],f1[Iter];
22 | 
23 |   int old_d[Iter]; // duplicating array-d to avoid
24 |                    // anti-dependency  
25 |   
26 |   for(i=0;i<Iter;i++) a[i]=b[i]=c[i]=d[i]=f[i]=old_d[i]=i;
27 | 
28 |   for(i=0;i<Iter;i++) a1[i]=b1[i]=c1[i]=d1[i]=f1[i]=i;
29 |   
30 | 
31 |   for(i=4;i<Iter;i++){
32 |      a1[i] = b1[i-2] + 1;
33 |      c1[i] = b1[i-1] + f1[i];
34 |      b1[i] = a1[i-1] + 2;
35 |      d1[i] = d1[i+1] + b1[i-1];
36 |   }
37 | 
38 | /* The sequencial Loop below containing Statement S1 and S3 */
39 | 
40 |   for(i=4;i<Iter;i++){ 
41 |      a[i] = b[i-2] + 1; // S1
42 |      b[i] = a[i-1] + 2; // S3
43 |   }
44 | 
45 | /* The Parallel Loop below only containing both Statement S2 and S4*/ 
46 | 
47 | #pragma omp parallel for shared(c,b,f,d,old_d) private(i)
48 |   for(i=4;i<Iter;i++)
49 |   {
50 |     c[i] = b[i-1] + f[i] ; // S2
51 |     d[i] = old_d[i+1] + b[i-1] ;  // S4 
52 |   }  
53 |     
54 | /* The Parallel Loop below only containing Statement S4 */ 
55 |     
56 | //#pragma omp parallel for shared(d,b) private(i)
57 | //  for(i=4;i<Iter;i++)
58 | //      d[i] = old_d[i+1] + b[i-1] ; // S4    
59 |   
60 | for(i=4;i<Iter;i++) {
61 | 
62 |   if ( a[i]!=a1[i]) printf("a[%d] = %d , a1[%d] = %d\n",i,a[i],i,a1[i]);
63 |   if ( b[i]!=b1[i]) printf("b[%d] = %d , b1[%d] = %d\n",i,b[i],i,b1[i]); 
64 |   if ( c[i]!=c1[i]) printf("c[%d] = %d , c1[%d] = %d\n",i,c[i],i,c1[i]);
65 |   if ( d[i]!=d1[i]) printf("d[%d] = %d , d1[%d] = %d\n",i,d[i],i,d1[i]);
66 | 
67 | }  
68 |   
69 |   
70 |   
71 |  return 0;  
72 | }
73 | 
74 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/dis-ok1.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #include <stdio.h>
 3 | 
 4 | /*
 5 | 
 6 |   for(i=4;i<100;i++){               
 7 | S1:       a[i] = b[i-2] + 1;
 8 | S2:       c[i] = b[i-1] + f[i];  
 9 | S3:       b[i] = a[i-1] + 2;    
10 | S4:       d[i] = d[i+1] + b[i-1];     
11 |   }
12 |   
13 | */
14 | 
15 | #define Iter 100000
16 | 
17 | main()
18 | {
19 |   int i;
20 |   int a[Iter],b[Iter],c[Iter],d[Iter],f[Iter];
21 |   int a1[Iter],b1[Iter],c1[Iter],d1[Iter],f1[Iter];
22 | 
23 |   int old_d[Iter]; // duplicating array-d to avoid
24 |                    // anti-dependency  
25 |   
26 |   for(i=0;i<Iter;i++) a[i]=b[i]=c[i]=d[i]=f[i]=old_d[i]=i;
27 | 
28 |   for(i=0;i<Iter;i++) a1[i]=b1[i]=c1[i]=d1[i]=f1[i]=i;
29 |   
30 | 
31 |   for(i=4;i<Iter;i++){
32 |      a1[i] = b1[i-2] + 1;
33 |      c1[i] = b1[i-1] + f1[i];
34 |      b1[i] = a1[i-1] + 2;
35 |      d1[i] = d1[i+1] + b1[i-1];
36 |   }
37 | 
38 | #pragma omp parallel shared(a,b,c,d,old_d,f) private(i)
39 | { 
40 |   #pragma omp master
41 |     for(i=4;i<Iter;i++){ 
42 |        a[i] = b[i-2] + 1; // S1
43 |        b[i] = a[i-1] + 2; // S3
44 |     } // only the MASTER thread can do loop of S1 and S3
45 |     
46 |   #pragma omp barrier
47 |   
48 | /* The Parallel Loop below containing both Statement S2 and S4*/ 
49 | 
50 |   #pragma omp for 
51 |     for(i=4;i<Iter;i++) {
52 |        c[i] = b[i-1] + f[i] ; // S2
53 |        d[i] = old_d[i+1] + b[i-1] ;  // S4 
54 |     }  
55 |     
56 | }  
57 | 
58 | #pragma omp parallel for shared(a,b,c,d,a1,b1,c1,d1)
59 | for(i=4;i<Iter;i++) {
60 | 
61 |   if ( a[i]!=a1[i]) printf("a[%d] = %d , a1[%d] = %d\n",i,a[i],i,a1[i]);
62 |   if ( b[i]!=b1[i]) printf("b[%d] = %d , b1[%d] = %d\n",i,b[i],i,b1[i]); 
63 |   if ( c[i]!=c1[i]) printf("c[%d] = %d , c1[%d] = %d\n",i,c[i],i,c1[i]);
64 |   if ( d[i]!=d1[i]) printf("d[%d] = %d , d1[%d] = %d\n",i,d[i],i,d1[i]);
65 | 
66 | }  
67 |   
68 |   
69 |   
70 |  return 0;  
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/loopA1.c:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | DESCRIPTION:       
 3 | 		Parallelizing an inner loop with dependences
 4 | 		Backward dependency
 5 | 
 6 | 		for (iter=0; iter<numiter; iter++) {
 7 | 			for (i=0; i<size-1; i++) {
 8 | 				V[i] = V[i]+V[i+1];
 9 | 			}
10 | 		}
11 | 
12 | 		Method: Eliminate dependences by duplicating data
13 | 		Optimization: None, duplicate the full data
14 | 
15 | COMMENTS:          
16 | 
17 | */
18 | 
19 | #include<stdio.h>
20 | #include<stdlib.h>
21 | #include<omp.h>
22 | 
23 | 
24 | #define VSIZE 40000
25 | 
26 | /* PROTOYPES */
27 | 
28 | /* MAIN: PROCESS PARAMETERS */
29 | int main(int argc, char *argv[]) {
30 | 
31 | 
32 | int V[VSIZE],oldV[VSIZE],U[VSIZE];
33 | 
34 | 
35 | 
36 | int i;
37 | 
38 | 
39 | for (i=0; i<VSIZE; i++) {
40 | 	V[i] = U[i] = i;
41 | 	}
42 | 
43 | for (i=0; i<VSIZE; i++) {
44 | 	U[i] = U[i] + U[i+1];
45 | 	}
46 | 
47 | 
48 | #pragma omp parallel for default(none) shared(V,oldV) private(i) schedule(static)
49 | 	for (i=0; i<VSIZE; i++) {
50 | 		oldV[i] = V[i] ;
51 | 		}
52 | 
53 | 	
54 | #pragma omp parallel for default(none) shared(V,oldV) private(i) schedule(static)
55 | 	for (i=0; i<VSIZE-1; i++) {
56 | 		V[i] = V[i]+oldV[i+1];
57 | 		}
58 | 
59 | 
60 | for ( i = 0; i<VSIZE;i++) 
61 |     if ( V[i] != U[i] ) printf("V[%d] error\n",i);
62 | 
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/loopA2.c:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | DESCRIPTION:       
 3 | 		Parallelizing an inner loop with dependences
 4 | 		Backward dependency
 5 | 
 6 | 		for (iter=0; iter<numiter; iter++) {
 7 | 			for (i=0; i<size-1; i++) {
 8 | 				V[i] = f( V[i], V[i+1] );
 9 | 			}
10 | 		}
11 | 
12 | 		Method: Eliminate dependences by duplicating data
13 | 		Optimization: None, duplicate the full data
14 | 
15 | COMMENTS:          
16 | 
17 | */
18 | 
19 | #include<stdio.h>
20 | #include<stdlib.h>
21 | #include<omp.h>
22 | 
23 | 
24 | #define VSIZE 40000
25 | 
26 | #define THREADS_NUM 4
27 | 
28 | /* PROTOYPES */
29 | 
30 | /* MAIN: PROCESS PARAMETERS */
31 | void main() {
32 | 
33 | 
34 | int V[VSIZE],U[VSIZE];
35 | 
36 | int border,size;
37 | int LimitL, LimitR;	
38 | int i,id;
39 | 
40 | 
41 | omp_set_num_threads(THREADS_NUM);
42 | 
43 | size = VSIZE / THREADS_NUM ;
44 | 
45 | for (i=0; i<VSIZE; i++) {
46 | 	V[i] = U[i] = i;
47 | 	}
48 | 
49 | for (i=0; i<VSIZE; i++) {
50 | 	U[i] = U[i] + U[i+1];
51 | 	}
52 | 	
53 | 	
54 | #pragma omp parallel default(none) shared(V,size) private(id,LimitL,LimitR,border,i)
55 | 	{
56 | 	
57 | 	id = omp_get_thread_num();
58 | 
59 | 	
60 | 	LimitL = id*size;
61 | 	LimitR = (id+1)*size-1;
62 | 	
63 | 	if (id != THREADS_NUM) border = V[LimitR+1];
64 | 
65 | 	
66 | #pragma omp 	barrier
67 | 
68 | 	
69 | 		for (i=LimitL; i<LimitR; i++) {
70 | 			V[i] = V[i] + V[i+1];
71 | 			}
72 | 	
73 | 		if (id != THREADS_NUM-1)
74 | 			V[LimitR] = V[LimitR] + border ;
75 | 		
76 | 	
77 | #pragma omp 	barrier
78 | 	
79 | 	}		
80 | 
81 | 
82 | for ( i = 0; i<VSIZE;i++) 
83 |     if ( V[i] != U[i] ) printf("V[%d] error\n",i);
84 | 
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/master.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | main()
 3 | {
 4 | 	int x;
 5 | 	x = 0;
 6 | 	#pragma omp parallel shared(x) 
 7 | 	    {  
 8 | 	      #pragma omp master    
 9 | 		   	x = x + 10;
10 | 	      #pragma omp critical
11 | 	                x = x + 1;	   	
12 | 	}  /* end of parallel section */
13 | 		
14 | 	printf("out of the parallel region : X = %d\n",x);		
15 | } 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pfor-no-schedule.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #define N       100000
 3 | int main () 
 4 | {
 5 | 	int i, chunk,id;
 6 | 	float a[N], b[N], c[N];
 7 | 	/* Some initializations */
 8 | 	for (i=0; i < N; i++)  
 9 | 		a[i] = b[i] = i * 1.0;
10 | 
11 | 	#pragma omp parallel for shared(a,b,c) private(i,chunk,id) 
12 | 		for (i=0; i < N; i++)    
13 | 			{
14 | 
15 | 			 c[i] = a[i] + b[i];
16 | 			
17 | 			 chunk = N / omp_get_num_threads(); 
18 | 			 id = omp_get_thread_num();
19 | 			 
20 | 	                 if ( (i%chunk)==0 ) printf("Iteration #%d in  thread #%d\n",i, id);
21 | 
22 | 			}
23 |         return 0;
24 | } 
25 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pfor.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #define N       100000
 3 | #define CHUNKSIZE   1000
 4 | int main () {
 5 | 	int i, chunk;
 6 | 	float a[N], b[N], c[N];
 7 | 	/* Some initializations */
 8 | 	for (i=0; i < N; i++)  
 9 | 		a[i] = b[i] = i * 1.0;
10 | 	chunk = CHUNKSIZE;
11 | 	#pragma omp parallel for \
12 | 		shared(a,b,c,chunk) private(i) \
13 | 		schedule(static,chunk)
14 | 		for (i=0; i < N; i++)    
15 | 			{
16 | 			 int id; 
17 | 
18 | 			 c[i] = a[i] + b[i];
19 | 
20 | 			 id = omp_get_thread_num();
21 | 			 if (  (i % chunk) == 0 )   printf("Iteration #%d in thread #%d\n",i, id);
22 | 
23 | 			}
24 | } 
25 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pi01.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | static long num_steps = 100000;
 3 | double step; 
 4 | #define NUM_THREADS 2 
 5 | void main () 
 6 | {	  
 7 | 	  int i; 	  
 8 | 	  double x, pi, sum[NUM_THREADS]; 
 9 | 	  step = 1.0/(double) num_steps; 
10 | 	  omp_set_num_threads(NUM_THREADS); 
11 | #pragma omp parallel private(i)
12 | {	
13 | 		double x;
14 | 		int id;
15 | 	  	id = omp_get_thread_num();       
16 | 		sum[id] = 0; 
17 | 		#pragma omp for
18 | 	 	for (i=0;i< num_steps; i++){
19 | 		 	 x = (i+0.5)*step; 
20 | 		  	sum[id] += 4.0/(1.0+x*x); 
21 | 	  	} 
22 | 	  }
23 | for(i=0, pi=0.0;i<NUM_THREADS;i++)pi += sum[i] * step; 
24 | printf("Pi = %lf\n",pi);
25 | } 
26 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pi02.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | static long num_steps = 100000;
 3 | double step; 
 4 | #define NUM_THREADS 4 
 5 | void main () 
 6 | {	  int i; 	  
 7 | 	  double x, pi, sum[NUM_THREADS]; 
 8 | 	  step = 1.0/(double) num_steps; 
 9 | 	  omp_set_num_threads(NUM_THREADS); 
10 | 	 #pragma omp parallel 
11 | 	 {	  
12 | 		double x;     
13 | 		int id; 
14 | 	  	id = omp_get_thread_num(); 
15 | 	  	for (i=id, sum[id]=0.0;i< num_steps; i=i+NUM_THREADS){ 
16 | 		  	x = (i+0.5)*step; 
17 | 		  	sum[id] += 4.0/(1.0+x*x); 
18 | 	  	} 
19 | 	 } 
20 | 	  for(i=0, pi=0.0;i<NUM_THREADS;i++)pi += sum[i] * step; 
21 | 	  printf("Pi = %lf\n",pi);
22 | }
23 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pi03.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | static long num_steps = 100000;
 3 | double step;
 4 | #define NUM_THREADS 8 
 5 | void main () 
 6 | {	  
 7 | 	int i,id; 	  
 8 | 	double x, sum, pi=0.0; 
 9 | 	step = 1.0/(double) num_steps; 
10 | 	omp_set_num_threads(NUM_THREADS); 
11 | 	#pragma omp parallel private (x,i,sum) 
12 | 	{	
13 | 		id = omp_get_thread_num(); 
14 | 	  	for (i=id,sum=0.0;i< num_steps;i=i+NUM_THREADS){ 
15 | 		  	x = (i+0.5)*step; 
16 | 		  	sum += 4.0/(1.0+x*x); 
17 | 	  	} 
18 | 		#pragma omp critical
19 | 	  		pi += sum*step; 
20 | 
21 | //		#pragma omp barrier
22 | 			  		
23 | //	  	#pragma omp master
24 | //  	          printf("Pi = %lf\n",pi);
25 |   	 }
26 | 	printf("Pi = %lf\n",pi);
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pi04.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h> 
 2 | static long num_steps = 100000;
 3 | double step; 
 4 | #define NUM_THREADS 8 
 5 | void main () 
 6 | {	  int i; 	  
 7 | 	  double x, pi, sum = 0.0; 
 8 | 	  step = 1.0/(double) num_steps; 
 9 | 	  omp_set_num_threads(NUM_THREADS); 
10 | 	  #pragma omp parallel for reduction(+:sum) private(x) 
11 | 	  for (i=0;i<num_steps; i++){ 
12 | 		  x = (i+0.5)*step; 
13 | 		  sum = sum + 4.0/(1.0+x*x); 
14 | 	  } 
15 | 	  pi = step * sum; 
16 | 	  printf("Pi = %lf\n",pi);
17 | }
18 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/pip.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mini-omp-demo/pip.c


--------------------------------------------------------------------------------
/example/mini-omp-demo/pipe.c:
--------------------------------------------------------------------------------
  1 | 
  2 | // Using the following command to compile your-c-file.c :
  3 | // pgCC -mp -o pi04 your-c-file.c
  4 | 
  5 | /*************************************************************************
  6 | 		Parallelizing an inner loop with dependences
  7 | 		Forward dependency (pipeline)
  8 | 
  9 | 		for (iter=0; iter<numiter; iter++) {
 10 | 			for (i=0; i<size-1; i++) {
 11 | 				V[i] = f( V[i], V[i-1] );
 12 | 			}
 13 | 		}
 14 | 
 15 | 		Method: Program a threads pipeline
 16 | **************************************************************************/
 17 | 
 18 | #include<stdio.h>
 19 | #include<stdlib.h>
 20 | #include<omp.h>
 21 | 
 22 | #include <sys/time.h>
 23 | 
 24 | double omp_time() {
 25 |   static int sec = -1;  
 26 |   struct timeval tv;  
 27 |   gettimeofday(&tv, (void *)0);  
 28 |   if (sec < 0) sec = tv.tv_sec;  
 29 |   return (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec;
 30 | }
 31 | 
 32 | 
 33 | void loop(int, int, int);
 34 | 
 35 | 
 36 | 
 37 | int main(int argc, char *argv[]) {
 38 | 
 39 | int nthreads, size, numiter;
 40 | 
 41 | 
 42 | double start,finish;
 43 | 
 44 | nthreads = omp_get_max_threads();
 45 | 
 46 | size = atoi(argv[1]);
 47 | numiter = atoi(argv[2]);
 48 | 
 49 | start = omp_time();
 50 | loop(nthreads, size, numiter);
 51 | finish = omp_time();
 52 | 
 53 | printf(" %d Threads of %d iterations with %d elements = %f (sec)\n",
 54 |                       nthreads, numiter, size, finish-start);
 55 | 
 56 | return 0;
 57 | }
 58 | 
 59 | 
 60 | #define f(x,y)	((x+y)/2.0)
 61 | 
 62 | /*
 63 | *
 64 | * PARALLEL LOOP
 65 | *
 66 | */
 67 | 
 68 | void loop(int nthreads, int size, int numiter) {
 69 | /* VARIABLES */
 70 | int i,iter;
 71 | 
 72 | int thread;
 73 | int limitL, limitR;
 74 | 
 75 | int *leftLimit;
 76 | int *rightLimit;
 77 | 
 78 | 
 79 | 
 80 | /* DECLARE VECTOR AND ANCILLARY DATA STRUCTURES */
 81 | double *V=NULL;
 82 | double border;
 83 | 
 84 | int blockSize = size/nthreads;
 85 | 
 86 | V = (double *)malloc(size*sizeof(double));
 87 | 
 88 | leftLimit  = (int*)malloc(nthreads*sizeof(int));
 89 | rightLimit = (int*)malloc(nthreads*sizeof(int));
 90 | 
 91 | 
 92 | for(i = 0; i< nthreads; i++){
 93 |    leftLimit [i] = i * blockSize;
 94 |    rightLimit[i] = (i+1)*blockSize - 1; 
 95 | }
 96 | 
 97 | if( size % nthreads ){
 98 |    rightLimit[nthreads-1] = size - 1;
 99 | }
100 |    
101 | 
102 | // Initialization of array V
103 | 
104 | for (i=0; i<size; i++) {
105 | 	V[i]= 0.0 + i;
106 | 	}
107 | 
108 | 
109 | // Parallel Region 
110 | 
111 | #pragma omp parallel default(none) shared(V,size,nthreads,numiter,leftLimit,rightLimit) private(iter,thread,limitL,limitR,border,i)
112 | {
113 | 
114 | 	thread = omp_get_thread_num();
115 | 
116 | 	limitL = leftLimit [thread];
117 | 	limitR = rightLimit[thread];
118 | 
119 | 	for(iter=0; iter<(numiter+nthreads-1); iter++) {
120 | 
121 | 		if (thread != 0) border = V[limitL-1];
122 | 
123 | #pragma omp 	barrier
124 | 
125 | 		/*  COMPUTE LOCAL UPDATES */
126 | 		/*  (ONLY ACTIVE THREADS - CHECK PIPELINE STAGE) */
127 | 
128 | 		if ( thread<=iter && thread>(iter-numiter) ) {
129 | 
130 | 			/* COMPUTE FIRST ELEMENT (EXCEPT THREAD 0) */
131 | 
132 | 			if (thread != 0)
133 | 				V[limitL] = f( V[limitL], border );
134 | 
135 | 			/* COMPUTE THE REST OF ELEMENTS */
136 | 
137 | 			for (i=limitL+1; i<=limitR; i++) {
138 | 				V[i] = f( V[i], V[i-1] );
139 | 				}
140 | 			}
141 | 		
142 | 		/* SYNCHRONIZE BEFORE COPYING UPDATED BORDER ELEMENT  */
143 | #pragma omp 	barrier
144 | 
145 | 	} // end-of-for
146 | 
147 | } // end-of-parallel-region
148 | 
149 | } // end-of-main
150 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/private.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | 
 3 | int alpha[10],beta[10],i;
 4 | #pragma omp threadprivate(alpha)
 5 | 
 6 | main()
 7 | {
 8 | /* first parallel region */
 9 | #pragma omp parallel private(i,beta)
10 | {   int id ;
11 |     id = omp_get_thread_num();
12 |     
13 |     for(i=0;i<10;i++)
14 |         alpha[i] = beta[i] = id * i;
15 | }
16 | /* second parallel region */
17 | #pragma omp parallel
18 |     printf("I am thread %d :alpha[3] = %d and beta[3] = %d\n", omp_get_thread_num(),alpha[3],beta[3]);
19 |     
20 | }
21 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/reduction.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | int main ()  
 3 | {
 4 | 	int   i, n, chunk;
 5 | 	float a[100], b[100], result;
 6 | 	/* Some initializations */
 7 | 	n = 100;
 8 | 	chunk = 10;
 9 | 	result = 0.0;
10 | 	for (i=0; i < n; i++)  
11 | 	{  
12 | 		a[i] = i * 1.0; 
13 | 		b[i] = i * 2.0; 
14 |  	}
15 | 	#pragma omp parallel for default(shared) private(i)
16 | 	for (i=0; i < n; i++)    
17 | 		result = result + (a[i] * b[i]);
18 | 	printf("Final result= %f\n",result);
19 | } 
20 | 
21 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/section.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #define N     1000
 3 | int main (){
 4 | 	int i,id;
 5 | 	float a[N], b[N], c[N];
 6 | 	/* Some initializations */
 7 | 	for (i=0; i < N; i++)  
 8 | 		a[i] = b[i] = i * 1.0;
 9 | 	#pragma omp parallel shared(a,b,c) private(i,id) 
10 |  	    { 
11 |  	    #pragma omp sections nowait    
12 | 		{    
13 | 		#pragma omp section    
14 | 		id = omp_get_thread_num();
15 | 		printf("working in Thread %d\n",id);
16 | 		for (i=0; i < N/2; i++)      
17 | 			c[i] = a[i] + b[i];    
18 | 		#pragma omp section    
19 | 		id = omp_get_thread_num();
20 | 		printf("working in Thread %d\n",id);
21 | 		for (i=N/2; i < N; i++)      
22 | 			c[i] = a[i] + b[i];    
23 | 		}  /* end of sections */ 
24 |  	    } /* end of parallel section */
25 | } 
26 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/single.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | main()
 3 | {
 4 | 	int x;
 5 | 	x = 0;
 6 | 	#pragma omp parallel shared(x) 
 7 | 	    {  
 8 | 	      #pragma omp single    
 9 | 	      { 
10 | 	        int id = omp_get_thread_num();
11 | 	        printf("I am thread #%d\n",id);
12 | 	       	x = x + 1;
13 | 	      }
14 | 	}  /* end of parallel section */
15 | 		
16 | 	printf("out of the parallel region : X = %d\n",x);		
17 | } 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/example/mini-omp-demo/threadprivate.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h> 
 2 | int alpha[10], beta[10], i;
 3 | #pragma omp threadprivate(alpha) 
 4 | int main ()  
 5 | {
 6 | 	/* First parallel region */
 7 | 	#pragma omp parallel private(i,beta)   
 8 | 	for (i=0; i < 10; i++)     
 9 | 		alpha[i] = beta[i] = i;
10 | 	/* Second parallel region */
11 | 	#pragma omp parallel   
12 | 		printf("alpha[3]= %d and beta[3]=%d\n",alpha[3],beta[3]);
13 | } 
14 | 


--------------------------------------------------------------------------------
/example/mpi/bigdata_sort.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <sys/time.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <mpi.h>
  6 | #define length 20
  7 | 
  8 | void swap(int *data, int i, int j) {
  9 | int temp = data[i];
 10 | data[i] = data[j];
 11 | data[j] = temp;
 12 | }
 13 | 
 14 | int partition(int *data, int start, int end) {
 15 |     if (start >= end) return 0;
 16 |     int pivotValue = data[start];
 17 |     int low = start;
 18 |     int high = end - 1;
 19 |     while (low < high) {
 20 |         while (data[low] <= pivotValue && low < end) low++;
 21 |         while (data[high] > pivotValue && high > start) high--;
 22 |         if (low < high) swap(data, low, high);
 23 |     }
 24 |     swap(data, start, high);
 25 |     return high;
 26 | }
 27 | 
 28 | void quicksort(int *data, int start, int end) {
 29 |     // why not end-start < 1
 30 |     if (end-start+1 < 2) return;
 31 | 
 32 |     int pivot = partition(data, start, end);
 33 |     quicksort(data, start, pivot);
 34 |     quicksort(data, pivot+1, end);
 35 | }
 36 | 
 37 | int main(int argc, char *argv[]) {
 38 |     MPI_Init(&argc, &argv);
 39 |     int rank, size;
 40 |     MPI_Comm_rank (MPI_COMM_WORLD, &rank);
 41 |     MPI_Comm_size (MPI_COMM_WORLD, &size);
 42 |   
 43 |     // 随机生成data 数组
 44 |     srand(time(0));
 45 |     int *data = (int*)malloc(sizeof(int)*length);
 46 |     printf("data = %p, rank = %d\n", data, rank);
 47 |     
 48 |     int i;
 49 |     for (i=0; i<length/size; i++)
 50 |         data[i] = rand() % 1000;
 51 | 
 52 |     MPI_Status status;
 53 |     if (rank == 0) {
 54 |         // 进程0 接收所有的数据
 55 |         for (i=1; i<size; i++)
 56 |             MPI_Recv(data+i*length/size, length/size, MPI_INT, i, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
 57 |     }
 58 |     else {
 59 |         MPI_Send(data,  length/size,  MPI_INT,  0,  0,  MPI_COMM_WORLD);
 60 |     }
 61 | 
 62 |     MPI_Barrier(MPI_COMM_WORLD);
 63 |     if(rank == 0)
 64 |     {
 65 |         printf("Before: %p\n", data);
 66 |         for(i=0;i<length;i++)
 67 |         {
 68 |             printf("%d, ", data[i]);
 69 |         }
 70 |         printf("\nAfter:\n");
 71 |     }
 72 | 
 73 |     struct timeval start, end;
 74 |     gettimeofday(&start, 0);
 75 |     int s;
 76 |     int localDataSize =  length;
 77 |     int pivot;
 78 | 
 79 |     // 数据分割
 80 |     for (s=size; s > 1; s /= 2) {
 81 |         if (rank % s == 0) {
 82 |             pivot = partition(data, 0, localDataSize);
 83 |             MPI_Send(data+pivot,  localDataSize  -  pivot,MPI_INT, rank + s/2, 0, MPI_COMM_WORLD);
 84 |             localDataSize = pivot;
 85 |         }
 86 |         else if (rank % s == s/2) {
 87 |             MPI_Recv(data,  length,  MPI_INT,  rank  -  s/2,
 88 |                     MPI_ANY_TAG, MPI_COMM_WORLD, &status);
 89 |             MPI_Get_count(&status,  MPI_INT,
 90 |                     &localDataSize);
 91 |         }
 92 |     }
 93 | 
 94 |     quicksort(data, 0, localDataSize);
 95 |     gettimeofday(&end, 0);
 96 | 
 97 |     MPI_Barrier(MPI_COMM_WORLD);
 98 |     if (rank == 0)
 99 |     {
100 |         float  time  =  (end.tv_sec  -  start.tv_sec)  +
101 |             0.000001*(end.tv_usec - start.tv_usec);
102 |         for(int i = 0;i < length;i++)
103 |         {
104 |             printf("%d, ", data[i]);
105 |         }
106 |         printf("Time: %f s\n", time);
107 |     }
108 |     MPI_Finalize();
109 |     return 0;
110 | }


--------------------------------------------------------------------------------
/example/mpi/matmul.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | #include <time.h>
 4 | #include <stdlib.h>
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     int num_procs;
11 |     int rank;
12 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
14 | 
15 |     int i;
16 |     int vec;
17 |     int *mat = (int *)malloc(sizeof(int)*num_procs);
18 |     int *ret = (int *)malloc(sizeof(int)*num_procs);
19 |     int *ans = (int *)malloc(sizeof(int)*num_procs);
20 |     srand(time(0));
21 |     for(i = 0;i < num_procs;i++)
22 |     {
23 |         mat[i] = rand() % 10;
24 |     }
25 |     vec = rand() % 10;
26 | 
27 |     for(i = 0;i < num_procs;i++)
28 |     {
29 |         printf("%d: %d\n", rank, mat[i]);
30 |         printf("%d: vec = %d\n", rank, vec);
31 |     }
32 |     for(i = 0;i < num_procs;i++)
33 |     {
34 |         ans[i] = vec * mat[i];
35 |     }
36 |     
37 |     MPI_Reduce(ans, ret, num_procs, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
38 | 
39 |     if(rank == 0)
40 |     {
41 |         for(i = 0;i < num_procs;i++)
42 |             printf("%d, ", ret[i]);
43 |     }
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/example/mpi/matmul1.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | const int rows = 40;  //the rows of matrix
 6 | const int cols = 100;  //the cols of matrix
 7 | 
 8 | int main(int argc, char* argv[])
 9 | {
10 |     int i, j, k, myid, numprocs, anstag;
11 |     int A[rows][cols], B[cols], C[rows];
12 |     int masterpro,buf[cols], ans,cnt;
13 |     double starttime,endtime;
14 |     double tmp,totaltime;
15 | 
16 |     MPI_Status status;
17 |     masterpro = 0;
18 |     MPI_Init(&argc, &argv);
19 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
20 |     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
21 |     for(cnt = 0; cnt < 100000; cnt++){        
22 |         if(numprocs < 2){
23 |             printf("Error:Too few processes!\n");
24 |             MPI_Abort(MPI_COMM_WORLD,99);
25 |         }
26 |         if(myid == masterpro){
27 |             starttime = MPI_Wtime();
28 |             for (i = 0; i < cols; i++)
29 |             {
30 |                 B[i] = rand()%10;
31 |                 for (j = 0; j < rows; j++)
32 |                 {
33 |                     A[j][i] = rand()%10;
34 |                 }
35 |             }
36 |             //bcast the B vector to all slave processor
37 |             MPI_Bcast(B, cols, MPI_INT, masterpro, MPI_COMM_WORLD);
38 |             //partition the A matrix to all slave processor
39 |             for (i = 1; i < numprocs; i++)
40 |             {
41 |                 for (k = i - 1; k < rows; k += numprocs - 1)
42 |                 {
43 |                     for (j = 0; j < cols; j++)
44 |                     {    
45 |                         buf[j] = A[k][j];
46 |                     }
47 |                     MPI_Send(buf, cols, MPI_INT, i, k, MPI_COMM_WORLD);
48 |                 }
49 |             }
50 |         }
51 |         else{
52 |             //starttime = MPI_Wtime();
53 |             MPI_Bcast(B, cols, MPI_INT, masterpro, MPI_COMM_WORLD);
54 |             //every processor receive the part of A matrix,and make Mul operator with B vector
55 |             for ( i = myid - 1; i < rows; i += numprocs - 1){
56 |                 MPI_Recv(buf, cols, MPI_INT, masterpro, i, MPI_COMM_WORLD, &status);
57 |                 ans = 0;
58 |         
59 |                 for ( j = 0; j < cols; j++)
60 |                 {
61 |                     ans += buf[j] * B[j];
62 |                 }
63 |                 //send back the result
64 |                 MPI_Send(&ans, 1, MPI_INT, masterpro, i, MPI_COMM_WORLD);
65 |             }
66 |             //endtime = MPI_Wtime();
67 |             //tmp = endtime-starttime;
68 |         } 
69 |         if(myid == masterpro){    
70 |             //receive the result from all slave processor
71 |             for ( i = 0; i < rows; i++)
72 |             {
73 |                 MPI_Recv(&ans, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
74 |             //sender = status.MPI_SOURCE;
75 |                 anstag = status.MPI_TAG;
76 |                 C[anstag] = ans;
77 |             }
78 |             //print the result
79 |         
80 |             for (i = 0; i < rows; i++)
81 |             {
82 |                 printf("%d ",C[i]);
83 |                 if((i+1)%20 == 0)
84 |                     printf("\n");
85 |             }
86 |         
87 |         }
88 |     }
89 |     endtime = MPI_Wtime();
90 |     totaltime = endtime-starttime;
91 |     //printf("cost time:%f s.\n",tmp); 
92 |     //MPI_Reduce(&tmp,&totaltime,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
93 |     if(myid == masterpro)
94 |         printf("total time:%f s.\n",totaltime);
95 |     MPI_Finalize();
96 |     return 0;
97 | }


--------------------------------------------------------------------------------
/example/mpi/max.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int compare(int m, int n)
 5 | {
 6 |     if (m >= n) return m;
 7 |     else return n;
 8 | }
 9 | 
10 | int main(int argc, char *argv[])
11 | {
12 |     MPI_Init(&argc, &argv);
13 |     int m = 2000;
14 |     int n = 2400;
15 |     printf("%d\n", compare(m, n));
16 |     MPI_Finalize();
17 | }
18 | 


--------------------------------------------------------------------------------
/example/mpi/mul.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char *argv[])
 5 | {
 6 |     /* code */
 7 |     MPI_Init(&argc, &argv);
 8 |     int m = 233;
 9 |     int n = 341;
10 |     printf("%d\n", m*n);
11 |     MPI_Finalize();
12 | }
13 | 


--------------------------------------------------------------------------------
/example/mpi/pi.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<mpi.h>
 3 | #include<math.h>
 4 | 
 5 | int main(int argc, char *argv[]){
 6 |     int my_rank, num_procs;
 7 |     int i, n = 0;
 8 |     double sum, width, local, mypi, pi;
 9 |     double start = 0.0, stop = 0.0;
10 |     int proc_len;
11 |     char processor_name[MPI_MAX_PROCESSOR_NAME];
12 |         
13 |     MPI_Init(&argc, &argv);
14 |     MPI_Comm_size(MPI_COMM_WORLD,  &num_procs);
15 |     MPI_Comm_rank(MPI_COMM_WORLD,  &my_rank);
16 |     MPI_Get_processor_name(processor_name,  &proc_len);
17 |     printf("Process %d of %d\n", my_rank, num_procs);
18 |     if(my_rank == 0){
19 |         printf("please give step number n:");
20 |         n = 2000;
21 |         printf("\n");
22 |         start = MPI_Wtime();
23 |     }
24 | //  printf("Process %d of %d\n", my_rank, num_procs);
25 | 
26 |     MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
27 |     width = 1.0 / n;
28 |     sum = 0.0;
29 |     for(i = my_rank; i < n; i += num_procs){
30 |         local = width * ((double)i + 0.5);
31 |         sum += 4.0 / (1.0 + local * local);
32 |     }
33 |     mypi = width * sum;
34 |     MPI_Reduce(&mypi,  &pi,  1,  MPI_DOUBLE,  MPI_SUM,  0,
35 |             MPI_COMM_WORLD);
36 |     if(my_rank == 0){
37 |         printf("PI is %.20f\n", pi);
38 |         stop = MPI_Wtime();
39 |         printf("Time: %f on %s\n", stop-start, processor_name);
40 |         fflush(stdout);
41 |     }
42 |     MPI_Finalize();
43 |     return 0;
44 | }


--------------------------------------------------------------------------------
/example/mpi/quicksort.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <stdlib.h>
  4 | #include <mpi.h>
  5 | 
  6 | int cmp(const void *a,const void *b)
  7 | {
  8 | 	return *(int*)a-*(int*)b;
  9 | }
 10 | 
 11 | 
 12 | void swap(int *a,int *b)
 13 | {
 14 | 	int tmp=*a;
 15 | 	*a=*b;
 16 | 	*b=tmp;
 17 | }
 18 | 
 19 | 
 20 | int partition(int *buf,int n)
 21 | {
 22 | 	if(n == 0) return 0;
 23 | 	--n;
 24 | 	int pri = buf[n];       // privot buf[n-1]
 25 | 	int i = -1;
 26 | 	int j = 0;
 27 | 	while(j < n)
 28 | 	{
 29 | 		if(buf[j] >= pri) 
 30 |         {
 31 |             ++j;
 32 |             continue;
 33 |         }
 34 | 		++i;
 35 | 		swap(&buf[i],&buf[j]);
 36 | 		++j;
 37 | 	}
 38 | 	++i;
 39 | 	swap(&buf[i],&buf[n]);
 40 | 	return i;
 41 | }
 42 | 
 43 | 
 44 | int main(int argc,char **argv)
 45 | {
 46 | 	MPI_Init(&argc,&argv);
 47 | 	int rank,size;
 48 | 	MPI_Comm_rank(MPI_COMM_WORLD,&rank);
 49 | 	MPI_Comm_size(MPI_COMM_WORLD,&size);
 50 | 
 51 | 	int n = 10;
 52 | 	int i;
 53 | 	
 54 |     int *buf = (int*)malloc(n*sizeof(int));
 55 | 	if(buf == NULL)
 56 | 	{
 57 | 		printf("%d malloc failed\n");
 58 | 		exit(1);
 59 | 	}
 60 | 	
 61 |     int *ofss=NULL;
 62 | 	int *counts=NULL;
 63 | 	int *retbuf=NULL;
 64 | 	
 65 |     if(rank == 0)
 66 | 	{
 67 |         // 打印未排序的数组
 68 | 		int *var = (int*)malloc(n*sizeof(int));
 69 | 		srand((int)time(0));
 70 | 		for(i = 0;i < n; ++i)
 71 | 		{
 72 | 			buf[i] = (int)rand() % n;
 73 | 			var[i] = buf[i];
 74 | 			printf("%d\t",buf[i]);
 75 | 		}
 76 | 		printf("\n");
 77 | 
 78 | 		qsort(var,n,sizeof(int),cmp);
 79 | 		free(var);
 80 | 
 81 | 		ofss = (int*)malloc(size*sizeof(int));
 82 | 		counts = (int*)malloc(size*sizeof(int));
 83 | 		retbuf = (int*)malloc(n*sizeof(int));
 84 | 		
 85 |         if(ofss==NULL || counts==NULL || retbuf==NULL)
 86 | 		{
 87 | 			printf("malloc failed\n");
 88 | 			exit(2);
 89 | 		}
 90 | 	}
 91 | 
 92 | 	int mod=1;
 93 | 	int sz=1;
 94 | 	int block=n;
 95 | 	int pos=0;
 96 | 	MPI_Status st;
 97 | 
 98 | 	while(1)
 99 | 	{
100 | 		if(rank >= sz)
101 | 		{
102 | 			sz <<= 1;
103 | 			if(sz > size) sz = size;
104 | 			mod <<= 1;
105 | 			continue;
106 | 		}
107 | 		if(rank != 0 && rank-(mod>>1)>=0)
108 | 		{
109 | 			MPI_Recv(&pos, 1, MPI_INT, rank-(mod>>1), 1, MPI_COMM_WORLD, &st);
110 | 			MPI_Recv(&block, 1, MPI_INT, rank-(mod>>1), 2, MPI_COMM_WORLD, &st);
111 | 			MPI_Recv(buf+pos, block, MPI_INT, rank-(mod>>1), 3, MPI_COMM_WORLD, &st);
112 | 		}
113 | 		if(rank+mod >= size)
114 | 		{
115 | 			qsort(buf+pos,block,sizeof(int),cmp);
116 | 			break;
117 | 		}
118 | 
119 | 		int p = partition(buf+pos,block)+pos;
120 | 		int tb=block-p+pos;
121 | 		
122 |         MPI_Send(&p ,1, MPI_INT, rank+mod,1,MPI_COMM_WORLD);
123 | 		MPI_Send(&tb, 1, MPI_INT, rank+mod,2,MPI_COMM_WORLD);
124 | 		MPI_Send(buf+p, tb, MPI_INT, rank+mod,3,MPI_COMM_WORLD);
125 | 		
126 |         block=p-pos;
127 | 		sz<<=1;
128 | 		if(sz>size) sz=size;
129 | 		mod<<=1;
130 | 	}
131 | 	MPI_Gather(&block, 1, MPI_INT, counts, 1, MPI_INT,0,MPI_COMM_WORLD);
132 | 	MPI_Gather(&pos, 1, MPI_INT, ofss,1,MPI_INT,0,MPI_COMM_WORLD);
133 | 	MPI_Gatherv(buf+pos,block,MPI_INT,retbuf,counts,ofss,MPI_INT,0,MPI_COMM_WORLD);
134 | 	
135 |     if(rank==0)
136 | 	{
137 | 		for(i=0;i<n;++i)
138 | 			printf("%d\t",retbuf[i]);
139 | 		printf("\n");
140 | 		free(counts);
141 | 		free(ofss);
142 | 		free(retbuf);
143 | 	}
144 | 	free(buf);
145 | 	MPI_Finalize();
146 | 	return 0;
147 | }


--------------------------------------------------------------------------------
/example/mpi/rece.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | #include <stdlib.h>
 4 | 
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 |     MPI_Init(&argc, &argv);
 9 |     int size;
10 |     int rank;
11 |     MPI_Comm_size(MPI_COMM_WORLD, &size);
12 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
13 |     if(size==1){
14 |         printf("Too less of process.\n");
15 |         MPI_Abort(MPI_COMM_WORLD, MPI_ERR_COUNT);
16 |     }
17 |     char buf[] = "hello world\n";
18 |     MPI_Status status;
19 |     if(rank!=0){
20 |         MPI_Send(buf, 13, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
21 |     }
22 |     else{
23 |         char *ans = (char *)malloc(13*sizeof(char));
24 |         MPI_Recv(ans, 13, MPI_CHAR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
25 |         printf("ans is %s\n", ans);
26 |     }
27 |     
28 |     MPI_Finalize();
29 |     return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/example/mpi/sort.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | int main(int argc, char **argv)
 4 | { 
 5 |     MPI_Init(&argc, &argv); 
 6 |     
 7 |     int i,j,t,a[10]={56,98,76,69,88,43,29,74,58,66};
 8 | 
 9 |     for(i=0;i<9;i++)
10 |         for(j=0;j<9-i;j++)
11 |             if(a[j]>a[j+1])
12 |                 {t=a[j];a[j]=a[j+1];a[j+1]=t;}
13 | 
14 |     for(i=0;i<10;i++)
15 |         printf("%d ",a[i]);
16 |     printf("\n");
17 |     MPI_Finalize();
18 |     return 0;
19 | } 


--------------------------------------------------------------------------------
/example/mpi/sort_.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <time.h>
 4 | #include <sys/time.h>
 5 | #define length 1000000
 6 | 
 7 | 
 8 | void swap(int *data, int i, int j) {
 9 | int temp = data[i];
10 | data[i] = data[j];
11 | data[j] = temp;
12 | }
13 | 
14 | int partition(int *data, int start, int end) {
15 |     if (start >= end) return 0;
16 |     int pivotValue = data[start];
17 |     int low = start;
18 |     int high = end - 1;
19 |     while (low < high) {
20 |         while (data[low] <= pivotValue && low < end) low++;
21 |         while (data[high] > pivotValue && high > start) high--;
22 |         if (low < high) swap(data, low, high);
23 |     }
24 |     swap(data, start, high);
25 |     return high;
26 | }
27 | 
28 | void quicksort(int *data, int start, int end) {
29 |     // why not end-start < 1
30 |     if (end-start+1 < 2) return;
31 | 
32 |     int pivot = partition(data, start, end);
33 |     quicksort(data, start, pivot);
34 |     quicksort(data, pivot+1, end);
35 | }
36 | 
37 | 
38 | int main(int argc, char const *argv[])
39 | {
40 |     srand(time(0));
41 |     int *data = (int*)malloc(sizeof(int)*length);
42 |     int i;
43 |     for (i=0; i<length ; i++)
44 |         data[i] = rand();
45 | 
46 |     struct timeval start, end;
47 |     gettimeofday(&start, 0);
48 |     quicksort(data, 0, length);
49 |     gettimeofday(&end, 0);
50 |     float  time  =  (end.tv_sec  -  start.tv_sec)  +
51 |         0.000001*(end.tv_usec - start.tv_usec);
52 |     // for(int i = 0;i < length;i++)
53 |     //    printf("%d, ", data[i]);
54 |     printf("Time: %f s\n", time);
55 | }
56 | 


--------------------------------------------------------------------------------
/example/mpi_demo/allgather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/allgather


--------------------------------------------------------------------------------
/example/mpi_demo/allgather.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | int main( int argc, char* argv[] )
 3 | {
 4 |     int i;
 5 |     int rank, nproc;
 6 |     int isend, irecv[32];
 7 |  
 8 |     MPI_Init( &argc, &argv );
 9 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
10 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
11 | 
12 |     isend = rank + 1;
13 |     MPI_Allgather(&isend, 1, MPI_INT, irecv, 1, MPI_INT,
14 |                               MPI_COMM_WORLD);
15 |     for(i=0; i<nproc; i++)
16 |          printf("My rank =  %d  irecv = %d\n", rank, irecv[i]);
17 |     MPI_Finalize();
18 | }
19 | 


--------------------------------------------------------------------------------
/example/mpi_demo/allgatherv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/allgatherv


--------------------------------------------------------------------------------
/example/mpi_demo/allgatherv.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] )
 4 | {
 5 |     int i;
 6 |     int rank, nproc;
 7 |     int isend[3], iscnt, irecv[6];
 8 |     int ircnt[3] = {1,2,3}, idisp[3] = {0,1,3};
 9 |  
10 |     MPI_Init( &argc, &argv );
11 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
12 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
13 | 
14 |     if ( rank == 0 ) printf(" This program must run with 3 processes: np = 3\n");
15 |     
16 |     for(i=0; i<rank+1; i++)
17 |         isend[i] = rank + 1;
18 |         iscnt = rank + 1;
19 |        MPI_Allgatherv(isend, iscnt, MPI_INT, irecv, ircnt, idisp, 
20 |                                 MPI_INT, MPI_COMM_WORLD);
21 |        for(i=0; i<6; i++)
22 |             printf("My rank = %d irecv = %d\n", rank, irecv[i]);
23 |     MPI_Finalize();
24 | }
25 | 


--------------------------------------------------------------------------------
/example/mpi_demo/allreduce-int-sum:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/allreduce-int-sum


--------------------------------------------------------------------------------
/example/mpi_demo/allreduce-int-sum.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double a[10],b[10];
11 | 
12 | double sum,c;
13 | 
14 | 
15 | MPI_Status status; 
16 | 
17 | MPI_Init(&argc, &argv); /*initializing */
18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | 
22 | //int MPI_Allreduce(
23 |     // void* sendbuf, 
24 |     // void* recvbuf, 
25 |     // int count    ,
26 |     // MPI_Datatype datatype, 
27 |     // MPI_Op op, 
28 |     // MPI_Comm comm
29 |     // )
30 | 
31 | 
32 | for(i=0;i<10;i++)
33 |   a[i] = b[i] = (double)(rank*10 + i);
34 | 
35 | sum = 0.0;
36 | 
37 | for(i=0;i<10;i++)
38 |   sum += a[i] * b[i];
39 | 
40 | MPI_Barrier(MPI_COMM_WORLD);
41 | printf("Proc# %d got sub-sum : %.1f\n",rank,sum);
42 | MPI_Barrier(MPI_COMM_WORLD);
43 | 
44 | MPI_Allreduce( &sum, &c, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 
45 | 
46 | MPI_Barrier(MPI_COMM_WORLD);
47 | 
48 | printf("Proc#%d got the final sum %.1f\n", rank,c);
49 | 
50 | MPI_Barrier(MPI_COMM_WORLD);
51 | 
52 | MPI_Finalize(); /*quit from MPI world*/
53 | return (0);
54 | }
55 | 


--------------------------------------------------------------------------------
/example/mpi_demo/allreduce-vector-sum.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double a[10];
11 | 
12 | double sum[10];
13 | 
14 | 
15 | MPI_Status status; 
16 | 
17 | MPI_Init(&argc, &argv); /*initializing */
18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | //A routine that computes the dot product of two vectors that are distributed 
22 | //across a group of processes and returns the answer at node zero. 
23 | 
24 | //int MPI_Allreduce(
25 |     // void* sendbuf, 
26 |     // void* recvbuf, 
27 |     // int count    ,
28 |     // MPI_Datatype datatype, 
29 |     // MPI_Op op, 
30 |     // MPI_Comm comm
31 |     // )
32 | 
33 | 
34 | for(i=0;i<10;i++)
35 |   a[i] = (double)(rank*10 + i);
36 | 
37 | MPI_Barrier(MPI_COMM_WORLD);
38 | 
39 | printf("Proc#%d :",rank);
40 | for(i=0;i<10;i++) printf("%5.1f ", a[i]);
41 | printf("\n");
42 | 
43 | MPI_Barrier(MPI_COMM_WORLD);
44 | 
45 | for(i=0;i<10;i++) 
46 |   sum[i] = 0.0;
47 | 
48 | MPI_Allreduce( a, sum, 10, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 
49 | 
50 | MPI_Barrier(MPI_COMM_WORLD);
51 |   printf("Proc# %d: ",rank);
52 |   for(i=0;i<10;i++) printf("%5.1f ", sum[i]);
53 |   printf("\n");
54 | MPI_Barrier(MPI_COMM_WORLD);
55 | 
56 | MPI_Finalize(); /*quit from MPI world*/
57 | return (0);
58 | }
59 | 


--------------------------------------------------------------------------------
/example/mpi_demo/alltoall:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/alltoall


--------------------------------------------------------------------------------
/example/mpi_demo/alltoall.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] ){
 4 |     int i;
 5 |     int rank, nproc;
 6 |     int isend[32], irecv[32];
 7 |  
 8 |     MPI_Init( &argc, &argv );
 9 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
10 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
11 | 
12 |    printf("Before : My rank = %d  <",rank );
13 |     for(i=0; i<nproc; i++)
14 |         printf(" %d ", isend[i] = i + nproc * rank);
15 |         
16 |     printf( " >\n");
17 |     
18 |     MPI_Alltoall(isend, 1, MPI_INT, irecv, 1, MPI_INT, 
19 |                        MPI_COMM_WORLD);
20 |     printf("After  : My rank = %d  <", rank);                        
21 |     for(i=0; i<nproc; i++)
22 |         printf(" %d ", irecv[i]);
23 |     printf(" >\n");
24 | 
25 |     MPI_Finalize();
26 |     
27 | }
28 | 


--------------------------------------------------------------------------------
/example/mpi_demo/alltoallv.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | int main( int argc, char* argv[] ){
 3 |     int i;
 4 |     int rank, nproc;
 5 |     int isend[6] = {1,2,2,3,3,3}, irecv[9];
 6 |     int iscnt[3] = {1,2,3}, isdsp[3] = {0,1,3}, ircnt[3], irdsp[3];
 7 |  
 8 |     MPI_Init( &argc, &argv );
 9 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
10 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
11 | 
12 |     for(i=0; i<6; i++)
13 |         isend[i] = isend[i] + nproc * rank;
14 |     for(i=0; i<nproc; i++) {
15 |         ircnt[i] = rank + 1;
16 |         irdsp[i] = i * (rank + 1);
17 |         }
18 |     MPI_Alltoallv( isend, iscnt, isdsp, MPI_INT, 
19 |                           irecv, ircnt, irdsp, MPI_INT, MPI_COMM_WORLD);
20 |     for(i=0; i<iscnt[rank] * nproc; i++) printf("My rank %d  irecv = %d\n", rank, irecv[i]);
21 |     MPI_Finalize();
22 | }
23 | 


--------------------------------------------------------------------------------
/example/mpi_demo/bcast:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/bcast


--------------------------------------------------------------------------------
/example/mpi_demo/bcast.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] )
 4 | {
 5 |     int rank;
 6 |     int ibuf;
 7 |  
 8 |     MPI_Init( &argc, &argv );
 9 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
10 | 
11 |     if(rank == 0) ibuf = 12345;
12 |     else ibuf = 0; // set ibuf Zero for non-root processes
13 |     MPI_Bcast(&ibuf, 1, MPI_INT, 0, MPI_COMM_WORLD);
14 |     //if (rank !=0 ) 
15 |     printf("my rank = %d  ibuf = %d\n", rank,ibuf);
16 | 
17 |     MPI_Finalize();
18 | }
19 | 


--------------------------------------------------------------------------------
/example/mpi_demo/gather:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/gather


--------------------------------------------------------------------------------
/example/mpi_demo/gather.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] ){
 4 |     int i;
 5 |     int rank, nproc;
 6 |     int isend, irecv[32];
 7 |  
 8 |     MPI_Init( &argc, &argv );
 9 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
10 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
11 | 
12 |     isend = rank + 1;
13 |     MPI_Gather( &isend, 1, MPI_INT, irecv, 1, MPI_INT, 0,
14 |                            MPI_COMM_WORLD);
15 |     if(rank == 0) {
16 |         for(i=0; i<nproc; i++)
17 |             printf("irecv = %d\n", irecv[i]);
18 |         }
19 |     MPI_Finalize();
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/example/mpi_demo/gatherv.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] ){
 4 |     int i;
 5 |     int rank, nproc;
 6 |     int isend[3],iscnt,irecv[6];
 7 |     int ircnt[3] = {1,2,3}, idisp[3] = {0,1,3};
 8 |  
 9 |     MPI_Init( &argc, &argv );
10 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
11 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
12 | 
13 |    if ( rank == 0 ) printf("This program must run with 3 processes: np = 3 \n");
14 |    
15 |     for(i=0; i<3; i++)
16 |         isend[i] = rank + 1;
17 |         iscnt = rank + 1;
18 |        MPI_Gatherv( isend, iscnt, MPI_INT, irecv, ircnt, idisp, 
19 |                                MPI_INT, 0, MPI_COMM_WORLD);
20 |        if(rank == 0) {
21 |              for(i=0; i<6; i++)  printf("irecv = %d\n", irecv[i]);
22 |        }
23 |     MPI_Finalize();
24 | }
25 | 


--------------------------------------------------------------------------------
/example/mpi_demo/isend_irecv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/isend_irecv


--------------------------------------------------------------------------------
/example/mpi_demo/isend_irecv.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | int main( int argc, char* argv[] ){
 3 |     int rank, nproc;
 4 |     int isbuf, irbuf, count;
 5 |     MPI_Request request;
 6 |     MPI_Status status;
 7 |     int TAG = 100;
 8 |  
 9 |     MPI_Init( &argc, &argv );
10 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
11 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
12 | 
13 |     if(rank == 0) {
14 |          isbuf = 9;
15 |          MPI_Isend( &isbuf, 1, MPI_INT, 1, TAG, MPI_COMM_WORLD, &request );
16 |     } else if(rank == 1) {
17 |         MPI_Irecv( &irbuf, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &request);
18 |         MPI_Wait(&request, &status);
19 |         MPI_Get_count(&status, MPI_INT, &count);
20 |         printf( "irbuf = %d source = %d tag = %d count = %d\n", 
21 |                    irbuf, status.MPI_SOURCE, status.MPI_TAG, count);
22 |         }
23 |     MPI_Finalize();
24 | }
25 | 


--------------------------------------------------------------------------------
/example/mpi_demo/mpi-1.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | int main( int argc, char** argv )
 4 | {
 5 | int rank, size, tag=1;
 6 | int senddata,recvdata;
 7 | MPI_Status status; 
 8 | MPI_Init(&argc, &argv); /*initializing */
 9 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
10 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
11 | 
12 | 
13 | if (rank==0){
14 | senddata=9999; 
15 | MPI_Send( &senddata, 1, MPI_INT, 1, tag, MPI_COMM_WORLD); /*sending data to node#1*/
16 | }
17 | if (rank==1) 
18 | MPI_Recv(&recvdata, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); 
19 | /*receiving from node#0*/
20 | MPI_Finalize(); /*quit from MPI world*/
21 | return (0);
22 | }


--------------------------------------------------------------------------------
/example/mpi_demo/mpi_config:
--------------------------------------------------------------------------------
1 | node1:32
2 | node2:32
3 | node3:32
4 | 


--------------------------------------------------------------------------------
/example/mpi_demo/overview:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/overview


--------------------------------------------------------------------------------
/example/mpi_demo/overview.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] ){
 4 |     int rank, nproc;
 5 |     int isbuf, irbuf;
 6 |     int TAG = 100;
 7 |    MPI_Status 	status;
 8 |  
 9 |     MPI_Init( &argc, &argv );
10 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
11 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
12 |     if(rank == 0) {
13 |          isbuf = 9;
14 |          MPI_Send( &isbuf, 1, MPI_INT, 1, TAG, MPI_COMM_WORLD);
15 |     } else if(rank == 1) {
16 |         MPI_Recv( &irbuf, 1, MPI_INT, 0, TAG, MPI_COMM_WORLD, &status);
17 |         printf( "%d\n", irbuf );
18 |       }
19 |     MPI_Finalize();
20 |     
21 | }
22 | 


--------------------------------------------------------------------------------
/example/mpi_demo/pack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/pack


--------------------------------------------------------------------------------
/example/mpi_demo/pack.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int myrank,size;
 9 | 
10 | double f;
11 | int position, i;
12 | int a[2]; 
13 | char buff[1000]; 
14 | int j;
15 | 
16 | MPI_Status status; 
17 | MPI_Init(&argc, &argv); /*initializing */
18 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/
19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | if (myrank == 0) 
22 | { 
23 |       /* SENDER CODE */ 
24 |       position = 0;
25 |       i = 100; j = 200; f = 1.0;
26 |       MPI_Pack(&i, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD); 
27 |       printf("pos = %d\n",position);
28 |       MPI_Pack(&j, 1, MPI_INT, buff, 1000, &position, MPI_COMM_WORLD);
29 |       printf("pos = %d\n",position);
30 |       MPI_Pack(&f, 1, MPI_DOUBLE, buff, 1000, &position, MPI_COMM_WORLD); 
31 |       printf("pos = %d\n",position);
32 |       MPI_Send( buff, position, MPI_PACKED, 1, 0, MPI_COMM_WORLD); 
33 | } 
34 | else  /* RECEIVER CODE */ 
35 | if(myrank == 1){ 
36 |       //MPI_Recv( a, 2, MPI_INT, 0, 0, MPI_COMM_WORLD,&status);
37 | printf(" before Proc#1 received 2 int(s) from Proc#0 : %d %d\n",a[0],a[1]);
38 | 
39 |       MPI_Recv(buff, 1000,MPI_PACKED,0,0,MPI_COMM_WORLD,&status);
40 |       position = 0;
41 |       MPI_Unpack(buff,1000,&position,&a[0],1,MPI_INT,MPI_COMM_WORLD);
42 |       MPI_Unpack(buff,1000,&position,&a[1],1,MPI_INT,MPI_COMM_WORLD);
43 |  printf("Proc#1 received 2 int(s) from Proc#0 : %d %d\n",a[0],a[1]);  
44 | }
45 | MPI_Finalize(); /*quit from MPI world*/
46 | return (0);
47 | }
48 | 


--------------------------------------------------------------------------------
/example/mpi_demo/pack1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/pack1


--------------------------------------------------------------------------------
/example/mpi_demo/pack1.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | int main( int argc, char** argv )
 8 | {
 9 | 
10 | int myrank,size;
11 | 
12 | double A[50][50];
13 | 
14 | void *TempBuffer;
15 | 
16 | int i, Position, BufferSize;
17 | 
18 | MPI_Status status; 
19 | 
20 | MPI_Init(&argc, &argv); /*initializing */
21 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/
22 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
23 | 
24 | if (myrank == 0) 
25 | { 
26 |       for(i=0;i<50;i++) A[i][i] = (double)i;
27 |       
28 |       //Apply memeory space for 50 double data  
29 |       MPI_Pack_size(50, MPI_DOUBLE, MPI_COMM_WORLD, &BufferSize);
30 |       TempBuffer = malloc(BufferSize);
31 |       
32 |       Position = 0;
33 |       for (i=0;i<50;i++)
34 | 	MPI_Pack(&A[i][i], 1, MPI_DOUBLE, 
35 | 	         TempBuffer, BufferSize, &Position,MPI_COMM_WORLD);
36 |       printf("buffersize is %d\n",BufferSize);
37 |       MPI_Send(TempBuffer, Position, MPI_PACKED, 1, 0, MPI_COMM_WORLD);       
38 | } 
39 | else  /* RECEIVER CODE */ 
40 | { 
41 |       MPI_Pack_size(50, MPI_DOUBLE, MPI_COMM_WORLD, &BufferSize);
42 |       TempBuffer = malloc(BufferSize);
43 |       MPI_Recv(TempBuffer, BufferSize, MPI_PACKED,0, 0, MPI_COMM_WORLD,&status);
44 |       Position = 0;
45 |       for(i=0;i<50;i++)
46 |          MPI_Unpack(TempBuffer,BufferSize,&Position,
47 |                     &A[i][i], 1, MPI_DOUBLE, MPI_COMM_WORLD);
48 | 
49 |       for(i=0;i<5;i++)printf("Proc#1 received %3.1lf from Proc#0\n",A[i][i]);  
50 | }
51 | MPI_Finalize(); /*quit from MPI world*/
52 | return (0);
53 | }
54 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-int-sum:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/reduce-int-sum


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-int-sum.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double a[10],b[10];
11 | 
12 | double sum,c;
13 | 
14 | 
15 | MPI_Status status; 
16 | 
17 | MPI_Init(&argc, &argv); /*initializing */
18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | 
22 | //int MPI_Reduce(
23 |     // void* sendbuf, 
24 |     // void* recvbuf, 
25 |     // int count    ,
26 |     // MPI_Datatype datatype, 
27 |     // MPI_Op op, 
28 |     // int root, 
29 |     // MPI_Comm comm
30 |     // )
31 | 
32 | 
33 | for(i=0;i<10;i++)
34 |   a[i] = b[i] = (double)(rank*10 + i);
35 | 
36 | sum = 0.0;
37 | 
38 | for(i=0;i<10;i++)
39 |   sum += a[i] * b[i];
40 | 
41 | MPI_Barrier(MPI_COMM_WORLD);
42 | printf("Proc# %d got sub-sum : %.1f\n",rank,sum);
43 | MPI_Barrier(MPI_COMM_WORLD);
44 | 
45 | MPI_Reduce( &sum, &c, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 
46 | 
47 | if ( rank == 0 ) printf("Proc#0 got the final sum %.1f\n", c);
48 | 
49 | MPI_Barrier(MPI_COMM_WORLD);
50 | 
51 | MPI_Finalize(); /*quit from MPI world*/
52 | return (0);
53 | }
54 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-max.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double a[10],b[10];
11 | 
12 | double sum,c;
13 | 
14 | 
15 | MPI_Status status; 
16 | 
17 | MPI_Init(&argc, &argv); /*initializing */
18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | 
22 | //int MPI_Reduce(
23 |     // void* sendbuf, 
24 |     // void* recvbuf, 
25 |     // int count    ,
26 |     // MPI_Datatype datatype, 
27 |     // MPI_Op op, 
28 |     // int root, 
29 |     // MPI_Comm comm
30 |     // )
31 | 
32 | 
33 | sum = 0.0;
34 | 
35 | srandom((unsigned int)(&sum)%100000);
36 | 
37 | for(i=0;i<10;i++)
38 |   sum += (double)(random()%100);
39 |   
40 | MPI_Barrier(MPI_COMM_WORLD);
41 | 
42 | printf("Prco#%d has sum : %.1f\n",rank, sum);
43 | 
44 | MPI_Barrier(MPI_COMM_WORLD);  
45 | 
46 | MPI_Reduce( &sum, &c, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); 
47 | 
48 | if ( rank == 0 ) printf("Proc#0 got the final MAX of sum %.1f\n", c);
49 | 
50 | MPI_Barrier(MPI_COMM_WORLD);
51 | 
52 | MPI_Finalize(); /*quit from MPI world*/
53 | return (0);
54 | }
55 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-maxloc.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double ain[30], aout[30];           
11 | int  ind[30];               
12 | struct {                   
13 |   double val;
14 |   int   rank;
15 | } in[30], out[30];
16 | 
17 | 
18 | MPI_Status status; 
19 | 
20 | MPI_Init(&argc, &argv); /*initializing */
21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
22 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
23 | 
24 | 
25 | //int MPI_Reduce(
26 |     // void* sendbuf, 
27 |     // void* recvbuf, 
28 |     // int count    ,
29 |     // MPI_Datatype datatype, 
30 |     // MPI_Op op, 
31 |     // int root, 
32 |     // MPI_Comm comm
33 |     // )
34 | 
35 | 
36 |   /* each process has an array of 30 double: ain[30] 
37 |        */ 
38 | 
39 | srandom((unsigned int)(&in[0]));
40 |   
41 |      for (i=0; i<30; ++i) { 
42 |           in[i].val = ain[i] = (double)(random()%1000); 
43 |           in[i].rank = rank;
44 |      } 
45 |   MPI_Reduce( in, out, 30, MPI_DOUBLE_INT, MPI_MAXLOC, 0, MPI_COMM_WORLD); 
46 |    /* At this point, the answer resides on process root   */ 
47 |   if (rank == 0 ) { 
48 |         /* read ranks out  */ 
49 |     for (i=0; i<30; ++i) { 
50 |        aout[i] = out[i].val; 
51 |        ind[i] = out[i].rank; 
52 |        printf("aout[%d] = %f  ind[%d] = %d\n",i,aout[i],i,ind[i]);
53 |     } 
54 |   }
55 | 
56 | MPI_Barrier(MPI_COMM_WORLD);
57 | 
58 | MPI_Finalize(); /*quit from MPI world*/
59 | return (0);
60 | }
61 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-minloc.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | 
 9 | #define LEN 1000
10 | 
11 | int i, rank, size;
12 | int j;
13 | double ain[LEN];
14 | 
15 | struct {                   
16 |   double val;
17 |   int   index;
18 | } in, out;
19 | 
20 | 
21 | MPI_Status status; 
22 | 
23 | MPI_Init(&argc, &argv); /*initializing */
24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
25 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
26 | 
27 | //A routine that computes the dot product of two vectors that are distributed 
28 | //across a group of processes and returns the answer at node zero. 
29 | 
30 | //int MPI_Reduce(
31 |     // void* sendbuf, 
32 |     // void* recvbuf, 
33 |     // int count    ,
34 |     // MPI_Datatype datatype, 
35 |     // MPI_Op op, 
36 |     // int root, 
37 |     // MPI_Comm comm
38 |     // )
39 | 
40 | 
41 |   srandom(((unsigned int)(&in.val))*(rank+10));
42 | 
43 |   for(i=0;i<LEN;++i)
44 |     ain[i] = (double)(random()%1000+random()%1000);
45 | 
46 | // find local minloc
47 |      in.val = ain[0];
48 |      in.index = 0; 
49 | 
50 |      for (i=1;i<LEN; ++i) { 
51 |           if( in.val > ain[i] ) {
52 |               in.val = ain[i];
53 |               in.index = i;
54 |           } 
55 |      } 
56 |   in.index = rank * LEN + in.index;
57 | 
58 |   MPI_Reduce( &in, &out, 1, MPI_DOUBLE_INT, MPI_MINLOC, 0, MPI_COMM_WORLD); 
59 |    /* At this point, the answer resides on process root   */ 
60 |   if (rank == 0 ) { 
61 |        int minrank, minindex;
62 |         /* read ranks out  */ 
63 |        minrank = out.index / LEN;
64 |        minindex = out.index % LEN;
65 |        printf("Proc#%d has the Minimum of ain[%d] = %f  \n",
66 |                                minrank,minindex,out.val);
67 |   }
68 | 
69 | MPI_Barrier(MPI_COMM_WORLD);
70 | 
71 | MPI_Finalize(); /*quit from MPI world*/
72 | return (0);
73 | }
74 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-user-complex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/reduce-user-complex


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-user-complex.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | typedef struct { 
 7 |   double real,imag; 
 8 | } Complex; 
 9 |      
10 |  /* the user-defined function  */ 
11 | void myProd( Complex *in, Complex *inout, int *len, MPI_Datatype *dptr ) 
12 | { 
13 |     int i; 
14 |     Complex c; 
15 |                
16 |     for (i=0; i< *len; ++i) { 
17 |       c.real = inout->real*in->real - inout->imag*in->imag; 
18 |       c.imag = inout->real*in->imag + inout->imag*in->real; 
19 |       *inout = c; 
20 |       in++; inout++; 
21 | } 
22 |                                                                                          } 
23 |                                                                                          
24 | 
25 | int main( int argc, char** argv )
26 | {
27 |    #define LEN 5
28 |    int i, rank, size;
29 |    
30 |    Complex a[LEN], answer[LEN]; 
31 |    MPI_Op myOp; 
32 |    MPI_Datatype ctype; 
33 |                                                                                                   
34 | 
35 |    MPI_Status status; 
36 | 
37 |    MPI_Init(&argc, &argv); /*initializing */
38 |    MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
39 |    MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
40 | 
41 | 
42 | //int MPI_Reduce(
43 |     // void* sendbuf, 
44 |     // void* recvbuf, 
45 |     // int count    ,
46 |     // MPI_Datatype datatype, 
47 |     // MPI_Op op, 
48 |     // int root, 
49 |     // MPI_Comm comm
50 |     // )
51 | 
52 | 
53 | //  srandom(((unsigned int)(&in.val))*(rank+10));
54 | 
55 |  /* explain to MPI how type Complex is defined   */ 
56 |  MPI_Type_contiguous( 2, MPI_DOUBLE, &ctype ); 
57 |  MPI_Type_commit( &ctype ); 
58 | 
59 |  for(i=0;i<LEN;i++)
60 |    { 
61 |      a[i].real = (double)(i+1);
62 |      a[i].imag = (double)(i+2);
63 |    }
64 |  /* create the complex-product user-op  */ 
65 |  MPI_Op_create( myProd,1, &myOp ); 
66 | 
67 |  MPI_Reduce( a, answer, LEN, ctype, myOp, 0, MPI_COMM_WORLD ); 
68 |                                              
69 | 
70 |    /* At this point, the answer resides on process root   */ 
71 |   if (rank == 0 ) { 
72 |         /* read ranks out  */ 
73 |      printf("answer[0]: real = %f, imag = %f\n",answer[0].real, answer[0].imag); 
74 |   }
75 | 
76 | MPI_Barrier(MPI_COMM_WORLD);
77 | 
78 | MPI_Finalize(); /*quit from MPI world*/
79 | return (0);
80 | }
81 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-user-matrix.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 |      
 7 |  /* the user-defined function  */ 
 8 | #define LEN 5
 9 | 
10 | void myProd( double *in, double *inout, int *len, MPI_Datatype *dptr ) 
11 | { 
12 |     int i,j; 
13 |     for (i=0; i< *len; ++i)        
14 |       for(j=0;j<LEN*LEN;j++) {
15 |           *inout = (*inout)* (*in); 
16 |           in++; inout++; 
17 |       }
18 | }
19 | 
20 | int main( int argc, char** argv )
21 | {
22 |    int i, j, rank, size;
23 |    
24 |    double a[LEN][LEN], answer[LEN][LEN]; 
25 |    MPI_Op myOp; 
26 |    MPI_Datatype ctype; 
27 |                                                                                                   
28 | 
29 |    MPI_Status status; 
30 | 
31 |    MPI_Init(&argc, &argv); /*initializing */
32 |    MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
33 |    MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
34 | 
35 | 
36 | //int MPI_Reduce(
37 |     // void* sendbuf, 
38 |     // void* recvbuf, 
39 |     // int count    ,
40 |     // MPI_Datatype datatype, 
41 |     // MPI_Op op, 
42 |     // int root, 
43 |     // MPI_Comm comm
44 |     // )
45 | 
46 | 
47 | //  srandom(((unsigned int)(&in.val))*(rank+10));
48 | 
49 |  /* explain to MPI how type Complex is defined   */ 
50 |  MPI_Type_contiguous( LEN*LEN, MPI_DOUBLE, &ctype ); 
51 |  MPI_Type_commit( &ctype ); 
52 | 
53 |  for(i=0;i<LEN;i++)
54 |    for(j=0;j<LEN;j++)
55 |      a[i][j] = (double)(i+j+1);
56 | 
57 |  /* create the sum of matrix user-op  */ 
58 |  
59 |  MPI_Op_create( myProd,1, &myOp ); 
60 | 
61 |  MPI_Reduce( a, answer, 1, ctype, myOp, 0, MPI_COMM_WORLD ); 
62 |                                              
63 | 
64 |    /* At this point, the answer resides on process root   */ 
65 |   if (rank == 0 ) { 
66 |         /* read ranks out  */ 
67 |     for(i=0;i<LEN;i++){
68 |       for(j=0;j<LEN;j++)
69 |         printf("%10.1f ",answer[i][j]);
70 |       printf("\n");
71 |     }
72 | 
73 |   }
74 | 
75 | MPI_Barrier(MPI_COMM_WORLD);
76 | 
77 | MPI_Finalize(); /*quit from MPI world*/
78 | return (0);
79 | }
80 | 


--------------------------------------------------------------------------------
/example/mpi_demo/reduce-vector-sum.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double a[10];
11 | 
12 | double sum[10];
13 | 
14 | 
15 | MPI_Status status; 
16 | 
17 | MPI_Init(&argc, &argv); /*initializing */
18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
19 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | //A routine that computes the dot product of two vectors that are distributed 
22 | //across a group of processes and returns the answer at node zero. 
23 | 
24 | //int MPI_Reduce(
25 |     // void* sendbuf, 
26 |     // void* recvbuf, 
27 |     // int count    ,
28 |     // MPI_Datatype datatype, 
29 |     // MPI_Op op, 
30 |     // int root, 
31 |     // MPI_Comm comm
32 |     // )
33 | 
34 | 
35 | for(i=0;i<10;i++)
36 |   a[i] = (double)(rank*10 + i);
37 | 
38 | MPI_Barrier(MPI_COMM_WORLD);
39 | 
40 | printf("Proc#%d :",rank);
41 | for(i=0;i<10;i++) printf("%5.1f ", a[i]);
42 | printf("\n");
43 | 
44 | MPI_Barrier(MPI_COMM_WORLD);
45 | 
46 | for(i=0;i<10;i++) 
47 |   sum[i] = 0.0;
48 | 
49 | MPI_Reduce( a, sum, 10, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 
50 | 
51 | if ( rank == 0 ) {
52 |   printf("\nProc#0 :");
53 |   for(i=0;i<10;i++) printf("%5.1f ", sum[i]);
54 |   printf("\n");
55 | }
56 | 
57 | 
58 | MPI_Finalize(); /*quit from MPI world*/
59 | return (0);
60 | }
61 | 


--------------------------------------------------------------------------------
/example/mpi_demo/ring:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/ring


--------------------------------------------------------------------------------
/example/mpi_demo/ring.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size, tag=1;
 9 | 
10 | int to, from;
11 | 
12 | int senddata,recvdata;
13 | MPI_Status status; 
14 | MPI_Init(&argc, &argv); /*initializing */
15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
16 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
17 | 
18 | // cyclic shift send-recv with step 1 ~ Size-1
19 | 
20 | for(i=1;i<size;i++) {
21 | 
22 |   to = (rank+i)%size;
23 | 
24 |   from = (size+rank-i)%size;
25 | 
26 | MPI_Barrier(MPI_COMM_WORLD);
27 |   
28 | if (rank==0) printf("\n*****Shift Step#%d*****\n\n",i);
29 | 
30 | //int MPI_Sendrecv(
31 | // void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, 
32 | // void *recvbuf, int recvcount, MPI_Datatype recvtype, int source, MPI_Datatype recvtag, 
33 | // MPI_Comm comm, MPI_Status *status) 
34 |  
35 | senddata=rank; 
36 | 
37 | MPI_Sendrecv(&senddata, 1, MPI_INT, to, i, 
38 |              &recvdata, 1, MPI_INT, from, i,
39 |              MPI_COMM_WORLD, &status);
40 | 
41 | printf("data# %d :  Proc#%d ---> Proc#%d\n", recvdata, from, rank);
42 | 
43 | MPI_Barrier(MPI_COMM_WORLD);
44 | 
45 | }
46 | 
47 | MPI_Finalize(); /*quit from MPI world*/
48 | return (0);
49 | }
50 | 


--------------------------------------------------------------------------------
/example/mpi_demo/scan.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int i, rank, size;
 9 | 
10 | double a[10],b[10];
11 | 
12 | double sum,c;
13 | 
14 | int senddata,recvdata;
15 | 
16 | MPI_Status status; 
17 | 
18 | MPI_Init(&argc, &argv); /*initializing */
19 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
20 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
21 | 
22 | //A routine that computes the dot product of two vectors that are distributed 
23 | //across a group of processes and returns the answer at node zero. 
24 | 
25 | //int MPI_Reduce(
26 |     // void* sendbuf, 
27 |     // void* recvbuf, 
28 |     // int count    ,
29 |     // MPI_Datatype datatype, 
30 |     // MPI_Op op, 
31 |     // int root, 
32 |     // MPI_Comm comm
33 |     // )
34 | 
35 | 
36 | for(i=0;i<10;i++)
37 |   a[i] = b[i] = (double)(rank + 1);
38 | 
39 | sum = 0.0;
40 | 
41 | for(i=0;i<10;i++)
42 |   sum += a[i] * b[i];
43 | 
44 | MPI_Reduce( &sum, &c, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); 
45 | 
46 | if ( rank == 0 ) printf("Proc#0 got the final sum %lf\n", c);
47 | 
48 | MPI_Barrier(MPI_COMM_WORLD);
49 | 
50 | MPI_Finalize(); /*quit from MPI world*/
51 | return (0);
52 | }
53 | 


--------------------------------------------------------------------------------
/example/mpi_demo/scanme.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | 
 7 | int main( int argc, char** argv )
 8 | {
 9 |    #define LEN 5
10 |    int i, rank, size, true;
11 |    
12 |    double a[LEN], b[LEN]; 
13 |                                                                                                   
14 | 
15 |    MPI_Status status; 
16 | 
17 |    MPI_Init(&argc, &argv); /*initializing */
18 |    MPI_Comm_rank(MPI_COMM_WORLD, &rank); /*Process#*/
19 |    MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
20 | 
21 | //int MPI_Scan(
22 | //    void* sendbuf, 
23 | //    void* recvbuf, 
24 | //    int count, 
25 | //    MPI_Datatype datatype, 
26 | //    MPI_Op op, 
27 | //    MPI_Comm comm ) 
28 | 
29 | 
30 | 
31 | //  srandom(((unsigned int)(&in.val))*(rank+10));
32 | 
33 |  for(i=0;i<LEN;i++)
34 |      a[i]  = (double)(i+1);
35 |   MPI_Barrier(MPI_COMM_WORLD);
36 |     printf("Before Scan Proc# %d : ", rank);
37 |     for(i=0;i<LEN;i++) printf("%5.1f",a[i]);
38 |     printf("\n");
39 |   MPI_Barrier(MPI_COMM_WORLD);
40 | 
41 | 
42 |  MPI_Scan( a, b, LEN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); 
43 |                                              
44 | 
45 |   MPI_Barrier(MPI_COMM_WORLD);
46 |      printf("After  Scan Proc# %d : ", rank);   
47 |      for(i=0;i<LEN;i++) printf("%5.1f",b[i]);
48 |      printf("\n"); 
49 |   MPI_Barrier(MPI_COMM_WORLD);
50 | 
51 | MPI_Finalize(); /*quit from MPI world*/
52 | return (0);
53 | }
54 | 


--------------------------------------------------------------------------------
/example/mpi_demo/scatter.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] )
 4 | {
 5 |     int i;
 6 |     int rank, nproc;
 7 |     int isend[30], irecv;
 8 |  
 9 |     MPI_Init( &argc, &argv );
10 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
11 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
12 | 
13 |     if(rank == 0) {
14 |         for(i=0; i<nproc; i++)   isend[i] = i+1;
15 |          }
16 |     MPI_Scatter( isend, 1, MPI_INT, &irecv, 1, MPI_INT, 0,MPI_COMM_WORLD);
17 |     printf("My rank = %d  irecv = %d\n", rank, irecv);
18 | 
19 |     MPI_Finalize();
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/example/mpi_demo/scatterv.c:
--------------------------------------------------------------------------------
 1 | #include "mpi.h"
 2 | 
 3 | int main( int argc, char* argv[] ){
 4 |     int i;
 5 |     int rank, nproc;
 6 |     int iscnt[3] = {1,2,3}, irdisp[3] = {0,1,3};
 7 |     int isend[6] = {1,2,2,3,3,3}, ircnt,irecv[3];
 8 |  
 9 |     MPI_Init( &argc, &argv );
10 |     MPI_Comm_size( MPI_COMM_WORLD, &nproc );
11 |     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
12 | 
13 |    irecv[0] = -1; irecv[1] = -1 ; irecv[2] = -1;
14 |    
15 |     ircnt = rank + 1;
16 |     MPI_Scatterv( isend, iscnt, irdisp, MPI_INT, irecv, ircnt, 
17 |                              MPI_INT, 0, MPI_COMM_WORLD);
18 |     printf("My rank = %d  irecv[0] = %d irecv[1] = %d irecv[2] = %d \n", 
19 |                rank,      irecv[0],     irecv[1],     irecv[2]);
20 | 
21 |     MPI_Finalize();
22 | }
23 | 


--------------------------------------------------------------------------------
/example/mpi_demo/type_struct.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | int main( int argc, char** argv )
 7 | {
 8 | int myrank,size;
 9 | 
10 | int i;
11 | 
12 | struct Partstruct 
13 |    { 
14 |       int    class;  /* particle class */ 
15 |       double d[6];   /* particle coordinates */ 
16 |       char   b[7];   /* some additional information */ 
17 |    }Par,p[100]; 
18 |                
19 | 
20 | 
21 | MPI_Status status; 
22 | MPI_Init(&argc, &argv); /*initializing */
23 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/
24 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
25 | 
26 | if (myrank == 0) 
27 | { 
28 |       /* SENDER CODE */ 
29 |       
30 |       Par.class = 100;
31 |       for(i=0;i<6;i++) Par.d[i] = (double)i;
32 |       for(i=0;i<7;i++) Par.b[i] = 'A' + i;
33 | 
34 |       printf("Par = %d, P = %d\n",sizeof(Par), sizeof(p));            
35 | 
36 |       MPI_Send(&Par, sizeof(Par), MPI_BYTE, 1,0, MPI_COMM_WORLD); 
37 | } 
38 | else  /* RECEIVER CODE */ 
39 | { 
40 |       MPI_Recv(&Par, sizeof(Par), MPI_BYTE,0,0,MPI_COMM_WORLD,&status);
41 |     
42 |       printf("Proc#1 received structs from Proc#0 : \n");
43 | 
44 |       printf("class = %d\n", Par.class);
45 | 
46 |       printf("array d is : ");
47 |       for(i=0;i<6;i++) printf("%3.1lf ",Par.d[i]);
48 | 
49 |       printf("\narray b is : ");
50 |       for(i=0;i<7;i++) printf("%c ",Par.b[i]);        
51 | 
52 |       printf("\n");
53 | }
54 | MPI_Finalize(); /*quit from MPI world*/
55 | return (0);
56 | }
57 | 


--------------------------------------------------------------------------------
/example/mpi_demo/type_struct1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/example/mpi_demo/type_struct1


--------------------------------------------------------------------------------
/example/mpi_demo/type_struct1.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "mpi.h" /*MPI head file*/
  3 | 
  4 | #include <stdio.h>
  5 | 
  6 | #define RELA 
  7 | 
  8 | int main( int argc, char** argv )
  9 | {
 10 | int myrank,size;
 11 | 
 12 | int i;
 13 | 
 14 | struct Partstruct 
 15 |    { 
 16 |       int class; 
 17 |       double d[6]; 
 18 |       char b[7]; 
 19 |    }; 
 20 |                 
 21 |  struct Partstruct particle; 
 22 |                  
 23 |               
 24 |  MPI_Datatype Particletype; 
 25 |  MPI_Datatype type[3] = {MPI_INT, MPI_DOUBLE, MPI_CHAR}; 
 26 |  int          block[3] = {1, 6, 7}; 
 27 |  MPI_Aint     disp[3]; 
 28 |  int base;    // using relative displacement                  
 29 | 
 30 | MPI_Status status; 
 31 | MPI_Init(&argc, &argv); /*initializing */
 32 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/
 33 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
 34 | 
 35 | 
 36 |  /* Particletype : using absolute addresses */ 
 37 |  
 38 |   MPI_Address( &particle, disp);
 39 |   MPI_Address( &particle.d, disp+1);   
 40 |   MPI_Address( &particle.b, disp+2);   
 41 | 
 42 | 
 43 | 
 44 | #ifdef RELA
 45 | //  the following codes use relative displacement
 46 |   base = disp[0]; 
 47 |   for (i=0; i <3; i++) disp[i] -= base; 
 48 |   printf(" Using realtive displacement\n");
 49 | #else
 50 |  printf(" Using absolute address\n");
 51 | #endif
 52 | 
 53 | 
 54 |   MPI_Type_struct( 3, block, disp, type, &Particletype); 
 55 | 
 56 |   MPI_Type_commit( &Particletype);
 57 | 
 58 | 
 59 | if (myrank == 0) 
 60 | { 
 61 |       /* SENDER CODE */ 
 62 |       
 63 |       particle.class = 100;
 64 |       for(i=0;i<6;i++) particle.d[i] = (double)i;
 65 |       for(i=0;i<7;i++) particle.b[i] = 'A' + i;
 66 | 
 67 | #ifdef RELA
 68 | 
 69 | // Using relative displacement
 70 |     MPI_Send( &particle,  1, Particletype, 1, 0, MPI_COMM_WORLD); 
 71 | 
 72 | #else
 73 | 
 74 | // Using absolute address
 75 |     MPI_Send( MPI_BOTTOM, 1, Particletype, 1, 0, MPI_COMM_WORLD); 
 76 | 
 77 | #endif
 78 | } 
 79 | else  /* RECEIVER CODE */ 
 80 | { 
 81 | #ifdef RELA
 82 | // Using relative displacement
 83 | 
 84 |     MPI_Recv(&particle , 1, Particletype, 0, 0, MPI_COMM_WORLD, &status);
 85 | 
 86 | #else
 87 | // Using absolute address 
 88 | 
 89 |     MPI_Recv(MPI_BOTTOM, 1, Particletype, 0, 0, MPI_COMM_WORLD,&status);
 90 | 
 91 | #endif    
 92 |       printf("Proc#1 received structs Particle from Proc#0 : \n");
 93 | 
 94 |       printf("class = %d\n", particle.class);
 95 | 
 96 |       printf("array d is : ");
 97 |       for(i=0;i<6;i++) printf("%3.1lf ",particle.d[i]);
 98 | 
 99 |       printf("\narray b is : ");
100 |       for(i=0;i<7;i++) printf("%c ", particle.b[i]);        
101 | 
102 |       printf("\n");
103 | }
104 | MPI_Finalize(); /*quit from MPI world*/
105 | return (0);
106 | }
107 | 


--------------------------------------------------------------------------------
/example/mpi_demo/type_vector-1.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | int main( int argc, char** argv )
 8 | {
 9 | 
10 | int myrank,size;
11 | 
12 | double A[20][20];
13 | 
14 | MPI_Datatype EvenLine;
15 | 
16 | void *TempBuffer;
17 | 
18 | int i, j;
19 | 
20 | MPI_Status status; 
21 | 
22 | MPI_Init(&argc, &argv); /*initializing */
23 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/
24 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
25 | 
26 | 
27 | MPI_Type_vector(10, 20, 40, MPI_DOUBLE, &EvenLine);
28 | MPI_Type_commit(&EvenLine);
29 | 
30 | if (myrank == 0) 
31 | {  /* Sender Code */
32 |   /*  All Even# Lines */
33 |     for(i=0;i<20;i++) 
34 |       for(j=0;j<20;j++)
35 |          A[i][j] = (double)i;
36 |  
37 |     MPI_Send(A, 1, EvenLine, 1, 0, MPI_COMM_WORLD);  
38 | 
39 | 
40 | } 
41 | else  /* RECEIVER CODE */ 
42 | { 
43 |       for(i=0;i<20;i++)
44 |          for(j=0;j<20;j++)
45 |            A[i][j] = 0.0;
46 | 
47 |       MPI_Recv(A, 1, EvenLine,0, 0, MPI_COMM_WORLD,&status);
48 | 
49 |       for(i=0;i<20;i++)
50 |        {
51 |         for(j=0;j<10;j++)
52 |           printf("%3.1lf ",A[i][j]);  
53 |         printf("\n");  
54 |        }
55 | }
56 | MPI_Finalize(); /*quit from MPI world*/
57 | return (0);
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/example/mpi_demo/type_vector.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mpi.h" /*MPI head file*/
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | int main( int argc, char** argv )
 8 | {
 9 | 
10 | int myrank,size;
11 | 
12 | double A[100];
13 | 
14 | MPI_Datatype EvenElements;
15 | 
16 | void *TempBuffer;
17 | 
18 | int i, Position, BufferSize;
19 | 
20 | MPI_Status status; 
21 | 
22 | MPI_Init(&argc, &argv); /*initializing */
23 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /*Process#*/
24 | MPI_Comm_size(MPI_COMM_WORLD, &size); /*Total processes#*/
25 | 
26 | 
27 | MPI_Type_vector(50, 1, 2, MPI_DOUBLE, &EvenElements);
28 | MPI_Type_commit(&EvenElements);
29 | 
30 | if (myrank == 0) 
31 | {  /* Sender Code */
32 | 
33 |     for(i=0;i<100;i++) A[i] = (double)i*(double)i;
34 |  
35 |     MPI_Send(A, 1, EvenElements, 1, 0, MPI_COMM_WORLD);  
36 | 
37 | 
38 | } 
39 | else  /* RECEIVER CODE */ 
40 | { 
41 |       for(i=0;i<100;i++) A[i] = 0.0;
42 | 
43 |       MPI_Recv(A, 1, EvenElements,0, 0, MPI_COMM_WORLD,&status);
44 | 
45 |       for(i=0;i<10;i++)printf("Proc#1: A[%d] = %3.2f\n",i,A[i]);  
46 | }
47 | MPI_Finalize(); /*quit from MPI world*/
48 | return (0);
49 | }
50 | 


--------------------------------------------------------------------------------
/example/openmp/Makefile:
--------------------------------------------------------------------------------
 1 | CC=gcc
 2 | OPENMP=-fopenmp
 3 | SOURCES:=$(shell find $(.) -name '*.c')
 4 | OBJS=$(SOURCES:%.c=%)
 5 | 
 6 | 
 7 | all : $(OBJS)
 8 | 	@echo $(SOURCES)
 9 | 
10 | %: %.c
11 | 	$(CC) $(OPENMP) $< -o $@
12 | 
13 | .PHONY: clean
14 | clean: 
15 | 	rm  $(OBJS)


--------------------------------------------------------------------------------
/example/openmp/copyin.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int g = 0;
 5 | #pragma omp threadprivate(g) 
 6 | int main(int argc, char* argv[])
 7 | {
 8 | 	int i;
 9 | 	#pragma omp parallel for   
10 | 	for (i = 0; i < 4; i++)
11 | 	{
12 | 		g = omp_get_thread_num();
13 | 		printf("thread %d, g = %d\n", omp_get_thread_num(), g);
14 | 	}
15 | 	printf("global g: %d\n", g);
16 | 	// YOUR CODE HERE
17 | 	#pragma omp parallel for copyin(g)
18 | 	// END OF YOUR CODE
19 | 	for (i = 0; i < 4; i++)
20 | 		printf("thread %d, g = %d\n", omp_get_thread_num(), g);
21 | 	return 0;
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/example/openmp/critical.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #include <stdio.h>
 3 | 
 4 | 
 5 | int main()
 6 | {
 7 | 	int x;
 8 | 	x = 0;
 9 |     omp_set_num_threads(10);
10 | 	#pragma omp parallel shared(x)   
11 | 	    {  
12 | 	    #pragma omp critical   
13 | 		   	x = x + 1;  
14 | 		}  /* end of parallel section */
15 |     
16 |     printf("x = %d\n", x);
17 | }


--------------------------------------------------------------------------------
/example/openmp/dynamic.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	int i;
 7 | 	// YOUR CODE HERE
 8 |     #pragma omp parallel for schedule(dynamic) num_threads(10)
 9 | 	// END OF YOUR CODE
10 | 	for (i = 0; i < 10; i++)
11 | 	{
12 | 		printf("i = %d, thread %d\n", i, omp_get_thread_num());
13 | 	}
14 | 	return 0;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/example/openmp/firstprivate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	int t = 20, i;
 7 | 	// YOUR CODE HERE
 8 |     #pragma omp parallel for firstprivate(t)
 9 | 	// END OF YOUR CODE
10 | 	for (i = 0; i < 5; i++)
11 | 	{
12 | 		t += i;
13 | 		printf("t = %d\n", t);
14 | 	}
15 | 	printf("outside t = %d\n", t);
16 | 	return 0;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/example/openmp/for.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	#pragma omp parallel 
 7 | 	{
 8 | 	    int i, j;
 9 | 		// YOUR CODE HERE
10 |         #pragma omp for
11 | 		// END OF YOUR CODE
12 | 		for (i = 0; i < 5; i++)
13 | 			printf("i = %d\n", i);
14 | 		// YOUR CODE HERE
15 |         #pragma omp for
16 | 		// END OF YOUR CODE
17 | 		for (j = 0; j < 5; j++)
18 | 			printf("j = %d\n", j);
19 | 	}
20 | 	return 0;
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/example/openmp/fork_join.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | 
 4 | void foo()
 5 | {
 6 | 	int cnt = 0;
 7 | 	clock_t t1 = clock();
 8 | 	int i;
 9 | 	for (i = 0; i < 1e8; i++) {
10 | 		cnt++;
11 | 	}
12 | 	clock_t t2 = clock();
13 | 	printf("Time = %d\n", t2 - t1);
14 | }
15 | 
16 | int main(int argc, char* argv[])
17 | {
18 | 	clock_t t1 = clock();
19 | 	int i;
20 | 	// YOUR CODE HERE
21 | #pragma omp parallel for
22 | 	// END OF YOUR CODE
23 | 	for (i = 0; i < 2; i++) {
24 | 		foo();
25 | 	}
26 | 	clock_t t2 = clock();
27 | 	printf("Total time = %d\n", t2 - t1);
28 | 	return 0;
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/example/openmp/get_num_procs.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	printf("the num of procs = %d\n", omp_get_num_procs());
 7 | 	printf("the num of threads = %d\n", omp_get_num_threads());
 8 | 	#pragma omp parallel  
 9 | 	{
10 | 	    // YOUR CODE HERE
11 | 	    printf("%d\n", omp_get_num_procs());
12 | 	    // END OF YOUR CODE
13 | 	}
14 | 	return 0;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/example/openmp/get_thread_num.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	printf("%d\n", omp_get_thread_num());
 7 | 	#pragma omp parallel  
 8 | 	{	    
 9 | 	    // YOUR CODE HERE
10 | 	    printf("%d\n", omp_get_thread_num());
11 | 	    // END OF YOUR CODE
12 | 	}
13 | 	return 0;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/example/openmp/lastprivate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	int t = 20, i;
 7 | 	// YOUR CODE HERE
 8 | 	#pragma omp parallel for firstprivate(t), lastprivate(t)
 9 | 	// END OF YOUR CODE
10 | 	for (i = 0; i < 5; i++)
11 | 	{
12 | 		t += i;
13 | 		printf("t = %d\n", t);
14 | 	}
15 | 	printf("outside t = %d\n", t);
16 | 	return 0;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/example/openmp/lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | static omp_lock_t lock;
 5 | 
 6 | int main(int argc, char* argv[])
 7 | {
 8 |     int i;
 9 | 	omp_init_lock(&lock); 
10 | 	#pragma omp parallel for   
11 | 	for (i = 0; i < 5; ++i)
12 | 	{
13 | 	    // YOUR CODE HERE
14 | 		omp_set_lock(&lock);
15 | 		// END OF YOUR CODE
16 | 		printf("%d+\n", omp_get_thread_num());
17 | 		printf("%d-\n", omp_get_thread_num());
18 | 		// YOUR CODE HERE
19 | 		omp_unset_lock(&lock); 
20 | 		// END OF YOUR CODE
21 | 	}
22 | 	omp_destroy_lock(&lock);
23 | 	return 0;
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/example/openmp/omp_in_parallel.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	printf("max threads = %d\n", omp_get_max_threads());
 7 | 	printf("%d\n", omp_in_parallel());
 8 | 	omp_set_num_threads(4);
 9 | 	#pragma omp parallel  
10 | 	{
11 | 	    // YOUR CODE HERE
12 | 	    printf("%d\n", omp_in_parallel());
13 | 	    // END OF YOUR CODE
14 | 	}
15 | 	return 0;
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/example/openmp/ordered.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | #include <stdio.h>
 4 | 
 5 | int main(int argc, char* argv[])
 6 | {
 7 |     #pragma omp parallel num_threads(8)
 8 |     {
 9 |         #pragma omp for ordered
10 |         for(int i=0; i<10; ++i){
11 |             #pragma omp critical
12 |                 printf("%d ", i);
13 |             #pragma omp ordered
14 |             {
15 |                 #pragma omp critical
16 |                     printf("-%d ", i);
17 |             }
18 |         }
19 |     }
20 |     printf("\n");
21 | 	return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/example/openmp/parallel.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	// YOUR CODE HERE
 7 |     #pragma omp parallel num_threads(6)
 8 | 	// END OF YOUR CODE
 9 | 	{
10 | 		printf("Thread: %d\n", omp_get_thread_num());
11 | 	}
12 | 	return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/example/openmp/private.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | 
 5 | int main(int argc, char* argv[])
 6 | {
 7 | 	int i = 20;
 8 | 	int x = 2;
 9 | 	int y = 2;
10 | 	// YOUR CODE HERE
11 | 	#pragma omp parallel for private(i,y)
12 | 	// END OF YOUR CODE
13 | 	for (i = 0; i < 10; i++)
14 | 	{
15 | 		printf("i = %d\n", i);
16 | 		x = i;
17 | 		y = i;
18 | 		printf("x = %d\n", x);
19 | 		printf("y = %d\n", y);
20 | 	}
21 | 	printf("outside i = %d\n", i);
22 | 	printf("outside x = %d\n", x);
23 | 	printf("outside y = %d\n", y);
24 | 	return 0;
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/example/openmp/reduction.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	
 7 | 	int i;
 8 | 	long sum = 10;
 9 | 	// YOUR CODE HERE
10 | 	#pragma omp parallel for reduction(+: sum)
11 | 	// END OF YOUR CODE
12 | 	for (i = 0; i < 10; i++)
13 | 	{
14 | 		sum += i;
15 | 		printf("%ld\n", sum);
16 | 	}
17 | 	printf("sum = %ld\n", sum);
18 | 	return 0;
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/example/openmp/schedule.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	int i;
 7 | 	// YOUR CODE HERE
 8 | 	#pragma omp parallel for schedule(static)
 9 | 	// END OF YOUR CODE
10 | 	for (i = 0; i < 10; i++)
11 | 	{
12 | 		printf("i = %d, thread %d\n", i, omp_get_thread_num());
13 | 	}
14 | 	return 0;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/example/openmp/section.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	// YOUR CODE HERE
 7 | 	#pragma omp parallel sections num_threads(4)
 8 | 	// END OF YOUR CODE
 9 | 	{
10 | 		#pragma omp section 
11 | 		printf("Section 1 ThreadId = %d\n", omp_get_thread_num());
12 | 		#pragma omp section
13 | 		printf("Section 2 ThreadId = %d\n", omp_get_thread_num());
14 | 		#pragma omp section
15 | 		printf("Section 3 ThreadId = %d\n", omp_get_thread_num());
16 | 		#pragma omp section
17 | 		printf("Section 4 ThreadId = %d\n", omp_get_thread_num());
18 | 	}
19 | 	return 0;
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/example/openmp/set_dynamic.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 |     int i;
 7 |     // YOUR CODE HERE
 8 | 	omp_set_dynamic(1);
 9 | 	printf("%d\n", omp_get_dynamic());
10 | 	// END OF YOUR CODE
11 | 	#pragma omp parallel for
12 | 	for (i = 0; i < 16; i++)
13 | 	{
14 | 		printf("%d\n", omp_get_thread_num());
15 | 	}
16 | 	return 0;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/example/openmp/set_num_threads.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 |     // YOUR CODE HERE
 7 |     omp_set_num_threads(10);
 8 |     // END OF YOUR CODE
 9 | 	#pragma omp parallel  
10 | 	{
11 | 		printf("%d of %d threads\n", omp_get_thread_num(), omp_get_num_threads());
12 | 	}
13 | 	return 0;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/example/openmp/shared.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	int t = 20, i;
 7 | 	// YOUR CODE HERE
 8 |     #pragma omp parallel for shared(t)
 9 | 	// END OF YOUR CODE
10 | 	for (i = 0; i < 10; i++)
11 | 	{
12 | 		if (i % 2 == 0)
13 | 			t++;
14 | 		printf("i = %d, t = %d\n", i, t);
15 | 	}
16 | 	return 0;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/example/openmp/size.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	int i;
 7 | 	// YOUR CODE HERE
 8 | 	#pragma omp parallel for schedule(static, 5)
 9 | 	// END OF YOUR CODE
10 | 	for (i = 0; i < 10; i++)
11 | 	{
12 | 		printf("i = %d, thread %d\n", i, omp_get_thread_num());
13 | 	}
14 | 	return 0;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/example/openmp/test_lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | static omp_lock_t lock;
 5 | 
 6 | int main(int argc, char* argv[])
 7 | {
 8 |     int i;
 9 | 	omp_init_lock(&lock); 
10 | 	#pragma omp parallel for   
11 | 	for (i = 0; i < 5; ++i)
12 | 	{
13 | 	    // YOUR CODE HERE
14 | 		if (omp_test_lock(&lock))
15 | 		// END OF YOUR CODE
16 | 		{
17 | 			printf("%d+\n", omp_get_thread_num());
18 | 			printf("%d-\n", omp_get_thread_num());
19 | 			omp_unset_lock(&lock);
20 | 		}
21 | 		else
22 | 		{
23 | 			printf("fail to get lock\n");
24 | 		}
25 | 	}
26 | 	omp_destroy_lock(&lock);
27 | 	return 0;
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/example/openmp/threadprivate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 | 
 4 | int g = 0;
 5 | #pragma omp threadprivate(g)
 6 | 
 7 | int main(int argc, char* argv[])
 8 | {
 9 | 	int t = 20, i;
10 | 	// YOUR CODE HERE
11 | 	#pragma omp parallel num_threads(4)
12 | 	// END OF YOUR CODE
13 | 	{
14 | 		g = omp_get_thread_num();
15 | 	}
16 | 	#pragma omp parallel num_threads(8)
17 | 	{
18 | 		printf("thread id: %d g: %d\n", omp_get_thread_num(), g);
19 | 	}
20 | 	return 0;
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/matrix/cannon.c:
--------------------------------------------------------------------------------
  1 | #include "matrix.h"
  2 | #include "math.h"
  3 | 
  4 | int main(int argc, char *argv[])
  5 | {
  6 |     int id_procs, num_procs;
  7 |     int blksize, sqrt_procs;
  8 |     MPI_Status status;
  9 |     MPI_Request request;
 10 | 
 11 |     MPI_Init(&argc, &argv);
 12 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 13 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 14 | 
 15 |     sqrt_procs = sqrt(num_procs);
 16 |     if (sqrt_procs * sqrt_procs != num_procs) {
 17 |         fprintf(stderr, "The Num of Proc must be Perfect square!\n");
 18 |         return 1;
 19 |     }
 20 |     if (argc != 2) {
 21 |         fprintf(stderr, "Please add a Parameter about the block size!\n");
 22 |         return 1;
 23 |     }
 24 |     blksize = atoi(argv[1]);
 25 | 
 26 |     // Proc#0 产生随机矩阵 A B
 27 |     int *A, *B, *C, *ans;
 28 |     int *A_in, *B_in;
 29 |     int *sA, *sB, *sC;
 30 |     int N = blksize*sqrt_procs;
 31 | 
 32 |     if (id_procs == 0) {
 33 |         // 串行计算结果
 34 |         sA = (int*)malloc(N*N*sizeof(int));
 35 |         sB = (int*)malloc(N*N*sizeof(int));
 36 |         sC = (int*)malloc(N*N*sizeof(int));
 37 |     
 38 |         memset(sC, 0, N*N*sizeof(int));
 39 |         random_mat(sA, N);
 40 |         // print_mat(sA, N, 0);
 41 |         random_mat(sB, N);
 42 |         // print_mat(sB, N, 0);
 43 |         matrix_multi(sA, sB, sC, N);
 44 |     }
 45 |     A = (int*)malloc(blksize*blksize*sizeof(int));
 46 |     B = (int*)malloc(blksize*blksize*sizeof(int));
 47 |     C = (int*)malloc(blksize*blksize*sizeof(int));
 48 |     ans = (int*)malloc(blksize*blksize*sizeof(int));
 49 |     A_in = (int*)malloc(blksize*blksize*sizeof(int));
 50 |     B_in = (int*)malloc(blksize*blksize*sizeof(int));
 51 |     
 52 |     memset(C, 0, blksize*blksize*sizeof(int));
 53 | 
 54 |     MPI_Datatype SubMat;
 55 |     MPI_Type_vector(blksize, blksize, N, MPI_INT, &SubMat);
 56 |     MPI_Type_commit(&SubMat);
 57 | 
 58 |     MPI_Datatype Mat;
 59 |     MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat);
 60 |     MPI_Type_commit(&Mat);
 61 | 
 62 |     // Proc#0 将相应的数据发送到各个 Proc 中
 63 |     if (id_procs == 0) {
 64 |         for(int i = 0; i < sqrt_procs; i++) {
 65 |             int lineoff = blksize * N * i;
 66 |             for(int j = 0; j < sqrt_procs; j++) {
 67 |                 if (i == 0 && j == 0) {
 68 |                     // 分发矩阵 A
 69 |                     MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
 70 |                     MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
 71 |                     MPI_Wait(&request, &status);
 72 |                     // 分发矩阵 B
 73 |                     MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request);
 74 |                     MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request);
 75 |                     MPI_Wait(&request, &status);
 76 |                     continue;
 77 |                 }
 78 |                 int offset = j * blksize + lineoff;
 79 |                 MPI_Send(sA+offset, 1, SubMat, i*sqrt_procs+j, 0, MPI_COMM_WORLD);
 80 |                 MPI_Send(sB+offset, 1, SubMat, i*sqrt_procs+j, 1, MPI_COMM_WORLD);
 81 |             }
 82 |         }
 83 |     } else {
 84 |         MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status);
 85 |         // print_mat(A, blksize, id_procs);
 86 |         MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status);
 87 |         // print_mat(B, blksize, id_procs);
 88 |     }
 89 | 
 90 |     // 算法正式开始
 91 |     // 初始对齐 Aij 向左移i格 Bij 向上移j格
 92 |     MPI_Comm row_comm;
 93 |     int rank_A, size_A;
 94 |     int color_A;
 95 |     int key_A;
 96 |     key_A = id_procs % sqrt_procs;
 97 |     color_A = id_procs / sqrt_procs;
 98 | 
 99 |     MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm);
100 |     MPI_Comm_rank(row_comm, &rank_A);
101 |     MPI_Comm_size(row_comm, &size_A);
102 | 
103 |     if (color_A > 0) {
104 |         MPI_Send(A, 1, Mat, (rank_A-color_A+size_A)%size_A, 2, row_comm);
105 |         MPI_Recv(A_in, 1, Mat, (rank_A+color_A)%size_A, 2, row_comm, &status);
106 |         memcpy(A, A_in, blksize*blksize*sizeof(int));
107 |     }
108 | 
109 |     MPI_Comm col_comm;
110 |     int rank_B, size_B;
111 |     int color_B;
112 |     int key_B;
113 |     key_B = id_procs / sqrt_procs;
114 |     color_B = id_procs % sqrt_procs;
115 | 
116 |     MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm);
117 |     MPI_Comm_rank(col_comm, &rank_B);
118 |     MPI_Comm_size(col_comm, &size_B);
119 | 
120 |     if (color_B > 0) {
121 |         MPI_Send(B, 1, Mat, (rank_B-color_B+size_B)%size_B, 2, col_comm);
122 |         MPI_Recv(B_in, 1, Mat, (rank_B+color_B)%size_B, 2, col_comm, &status);
123 |         memcpy(B, B_in, blksize*blksize*sizeof(int));
124 |     }
125 | 
126 |     // 重复执行 sqrt(num_procs) 次
127 |     for(int i = 0; i < sqrt_procs; i++) {
128 |         matrix_multi(A, B, C, blksize);
129 | 
130 |         MPI_Send(A, 1, Mat, (rank_A-1+size_A)%size_A, i+3, row_comm);
131 |         MPI_Recv(A_in, 1, Mat, (rank_A+1)%size_A, i+3, row_comm, &status);
132 |         memcpy(A, A_in, blksize*blksize*sizeof(int));
133 | 
134 |         MPI_Send(B, 1, Mat, (rank_B-1+size_B)%size_B, i+3, col_comm);
135 |         MPI_Recv(B_in, 1, Mat, (rank_B+1)%size_B, i+3, col_comm, &status);
136 |         memcpy(B, B_in, blksize*blksize*sizeof(int));
137 |     }
138 | 
139 |     // 分发结果 自行比较
140 |     if (id_procs == 0) {
141 |         for(int i = 0; i < sqrt_procs; i++) {
142 |             int lineoff = blksize * N * i;
143 |             for(int j = 0; j < sqrt_procs; j++) {
144 |                 if (i == 0 && j == 0) {
145 |                     // 分发矩阵 A
146 |                     MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
147 |                     MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
148 |                     MPI_Wait(&request, &status);
149 |                     continue;
150 |                 }
151 |                 int offset = j * blksize + lineoff;
152 |                 MPI_Send(sC+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD);
153 |             }
154 |         }
155 |     } else {
156 |         MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status);
157 |     }
158 | 
159 |     // print_mat(ans, blksize, id_procs);
160 |     
161 |     if (check_mat(C, ans, blksize)) {
162 |         printf("Proc#%d Done.\n", id_procs);
163 |     }
164 | 
165 |     // print_mat(C, blksize, id_procs);
166 | 
167 |     MPI_Finalize();
168 |     free(A);
169 |     free(B);
170 |     free(C);
171 |     free(ans);
172 |     free(A_in);
173 |     free(B_in);
174 | 
175 |     if (id_procs == 0) {
176 |         free(sA);
177 |         free(sB);
178 |         free(sC);
179 |     }
180 |     return 0;
181 | }
182 | 


--------------------------------------------------------------------------------
/matrix/fox.c:
--------------------------------------------------------------------------------
  1 | #include "matrix.h"
  2 | #include <math.h>
  3 | 
  4 | 
  5 | int main(int argc, char *argv[])
  6 | {
  7 |     int id_procs, num_procs;
  8 |     int blksize, sqrt_procs;
  9 | 
 10 |     MPI_Init(&argc, &argv);
 11 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 12 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 13 | 
 14 |     sqrt_procs = sqrt(num_procs);
 15 |     if (sqrt_procs * sqrt_procs != num_procs) {
 16 |         fprintf(stderr, "The Num of Proc must be Perfect square!\n");
 17 |         return 1;
 18 |     }
 19 |     if (argc != 2) {
 20 |         fprintf(stderr, "Please add a Parameter about the block size!\n");
 21 |         return 1;
 22 |     }
 23 |     blksize = atoi(argv[1]);
 24 |     
 25 |     // produce random data
 26 |     int *A, *B, *C, *ans;
 27 |     int *A_in, *B_in;
 28 |     int *sA, *sB, *sC;
 29 |     int N = blksize*sqrt_procs;
 30 | 
 31 |     if (id_procs == 0) {
 32 |         sA = (int*)malloc(N*N*sizeof(int));
 33 |         sB = (int*)malloc(N*N*sizeof(int));
 34 |         sC = (int*)malloc(N*N*sizeof(int));
 35 | 
 36 |         memset(sC, 0, N*N*sizeof(int));
 37 |         random_mat(sA, N);
 38 |         random_mat(sB, N);
 39 |         matrix_multi(sA, sB, sC, N);
 40 |     }
 41 |     A = (int*)malloc(blksize*blksize*sizeof(int));
 42 |     B = (int*)malloc(blksize*blksize*sizeof(int));
 43 |     C = (int*)malloc(blksize*blksize*sizeof(int));
 44 |     ans = (int*)malloc(blksize*blksize*sizeof(int));
 45 |     A_in = (int*)malloc(blksize*blksize*sizeof(int));
 46 |     B_in = (int*)malloc(blksize*blksize*sizeof(int));
 47 | 
 48 |     memset(C, 0, blksize*blksize*sizeof(int));
 49 | 
 50 |     MPI_Datatype SubMat, Mat;
 51 |     MPI_Status status;
 52 |     MPI_Request request;
 53 |     MPI_Type_vector(blksize, blksize, N, MPI_INT, &SubMat);
 54 |     MPI_Type_commit(&SubMat);
 55 | 
 56 |     MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat);
 57 |     MPI_Type_commit(&Mat);
 58 | 
 59 |     if (id_procs == 0) {
 60 |         for(int i = 0; i < sqrt_procs; i++) {
 61 |             int lineoff = blksize * N * i;
 62 |             for(int j = 0; j < sqrt_procs; j++) {
 63 |                 if (i == 0 && j == 0) {
 64 |                     // 分发矩阵 A
 65 |                     MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
 66 |                     MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
 67 |                     MPI_Wait(&request, &status);
 68 |                     // 分发矩阵 B
 69 |                     MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request);
 70 |                     MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request);
 71 |                     MPI_Wait(&request, &status);
 72 |                     continue;
 73 |                 }
 74 |                 int offset = j * blksize + lineoff;
 75 |                 MPI_Send(sA+offset, 1, SubMat, i*sqrt_procs+j, 0, MPI_COMM_WORLD);
 76 |                 MPI_Send(sB+offset, 1, SubMat, i*sqrt_procs+j, 1, MPI_COMM_WORLD);
 77 |             }
 78 |         }
 79 |     } else {
 80 |         MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status);
 81 |         // print_mat(A, blksize, id_procs);
 82 |         MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status);
 83 |         // print_mat(B, blksize, id_procs);
 84 |     }
 85 | 
 86 |     MPI_Comm row_comm, col_comm;
 87 |     int rank_A, size_A;
 88 |     int color_A;
 89 |     int key_A;
 90 | 
 91 |     int rank_B, size_B;
 92 |     int color_B;
 93 |     int key_B;
 94 | 
 95 |     // Comm Group by row
 96 |     key_A = id_procs % sqrt_procs;
 97 |     color_A = id_procs / sqrt_procs;
 98 |     MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm);
 99 |     MPI_Comm_rank(row_comm, &rank_A);
100 |     MPI_Comm_size(row_comm, &size_A);
101 | 
102 |     // Comm Group by B
103 |     key_B = id_procs / sqrt_procs;
104 |     color_B = id_procs % sqrt_procs;
105 |     MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm);
106 |     MPI_Comm_rank(col_comm, &rank_B);
107 |     MPI_Comm_size(col_comm, &size_B);
108 | 
109 |     for(int k = 0; k < sqrt_procs; k++) {
110 |         if (rank_A == (color_A+k)%size_A) {
111 |             memcpy(A_in, A, blksize*blksize*sizeof(int));
112 |         }
113 |         // broadcast Ai,j
114 |         MPI_Bcast(A_in, 1, Mat, (color_A+k)%size_A, row_comm);
115 | 
116 |         // compute
117 |         matrix_multi(A_in, B, C, blksize);
118 | 
119 |         int dest = (rank_B-1 + size_B)%size_B;
120 |         MPI_Send(B, 1, Mat, dest, 0, col_comm);
121 |         MPI_Recv(B_in, 1, Mat, (rank_B+1)%size_B, 0, col_comm, &status);
122 |         memcpy(B, B_in, blksize*blksize*sizeof(int));
123 |     }
124 | 
125 | 
126 |     // 分发结果 自行比较
127 |     if (id_procs == 0) {
128 |         for(int i = 0; i < sqrt_procs; i++) {
129 |             int lineoff = blksize * N * i;
130 |             for(int j = 0; j < sqrt_procs; j++) {
131 |                 if (i == 0 && j == 0) {
132 |                     // 分发矩阵 A
133 |                     MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
134 |                     MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
135 |                     MPI_Wait(&request, &status);
136 |                     continue;
137 |                 }
138 |                 int offset = j * blksize + lineoff;
139 |                 MPI_Send(sC+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD);
140 |             }
141 |         }
142 |     } else {
143 |         MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status);
144 |     }
145 | 
146 |     // print_mat(ans, blksize, id_procs);
147 | 
148 |     if (check_mat(C, ans, blksize)) {
149 |         printf("Proc#%d Done.\n", id_procs);
150 |     }
151 | 
152 |     // print_mat(C, blksize, id_procs);
153 | 
154 |     free(A);
155 |     free(B);
156 |     free(C);
157 |     free(ans);
158 |     free(A_in);
159 |     free(B_in);
160 | 
161 |     if (id_procs == 0) {
162 |         free(sA);
163 |         free(sB);
164 |         free(sC);
165 |     }
166 |     MPI_Finalize();
167 |     return 0;
168 | }
169 | 


--------------------------------------------------------------------------------
/matrix/matrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MPI_SORT_H
 2 | #define _MPI_SORT_H
 3 | 
 4 | #include "mpi.h"
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <time.h>
 8 | #include <string.h>
 9 | 
10 | 
11 | #define INDEX(i, j, n)     (((i)*(n))+(j))
12 | 
13 | #define SURCLK(x) \
14 |   do {                  \
15 |   clock_t t1 = clock(); \
16 |   x                     \
17 |   clock_t t2 = clock(); \
18 |   printf("Time: %lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);  \
19 |   }while(0);
20 | 
21 | 
22 | void random_mat(int *a, int num) {
23 |     for(int i = 0; i < num; i++) {
24 |         srand(clock());
25 |         for(int j = 0; j < num; j++) {
26 |             a[INDEX(i, j, num)] = rand() % 100;
27 |         }
28 |     }
29 | }
30 | 
31 | void print_mat(int *a, int num, int id) {
32 |     for(int i = 0; i < num; i++) {
33 |         for(int j = 0; j < num; j++) {
34 |             printf("|%d :  %d ", id, a[INDEX(i, j, num)]);
35 |         }
36 |         printf("\n");
37 |     }
38 | }
39 | 
40 | int check_mat(int *C, int *nC, int num) {
41 |     for(int i = 0; i < num; i++) {
42 |         for(int j = 0; j < num; j++) {
43 |             if (C[INDEX(i, j, num)] != nC[INDEX(i, j, num)]) {
44 |                 printf("C[%d,%d] should be %d ,not %d\n", 
45 |                     i,j,C[INDEX(i,j,num)],nC[INDEX(i,j,num)]);           
46 |                 return 0;
47 |             }
48 |         }
49 |     }
50 |     return 1;
51 | }
52 | 
53 | int compare(const void *arg1, const void *arg2) {
54 |     return *(int*)arg1 >= *(int*)arg2;
55 | }
56 | 
57 | void matrix_multi(int *A, int *B, int *C, int num) {
58 |     for(int i = 0; i < num; i++) {
59 |         for(int j = 0; j < num; j++) {
60 |             for(int k = 0; k < num; k++)
61 |                 C[INDEX(i, j, num)] += A[INDEX(i, k, num)] * B[INDEX(k, j, num)];
62 |         }
63 |     }
64 | }
65 | 
66 | void matrix_transpose(int *a, int num) {
67 |     int temp;
68 |     for(int i = 1; i < num; i++) {
69 |         for(int j = 0; j < i; j++) {
70 |             temp = a[INDEX(i, j, num)];
71 |             a[INDEX(i, j, num)] = a[INDEX(j, i, num)];
72 |             a[INDEX(j, i, num)] = temp;
73 |         }
74 |     }
75 | }
76 | 
77 | #endif // _MPI_SORT_H
78 | 


--------------------------------------------------------------------------------
/matrix/tranpose.c:
--------------------------------------------------------------------------------
  1 | #include "matrix.h"
  2 | #include "math.h"
  3 | 
  4 | int main(int argc, char *argv[])
  5 | {
  6 |     int *sa;
  7 |     int *sb;
  8 |     int id_procs, num_procs;
  9 |     int blksize, sqrt_procs;
 10 | 
 11 |     MPI_Init(&argc, &argv);
 12 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 13 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 14 | 
 15 |     sqrt_procs = sqrt(num_procs);
 16 |     if (sqrt_procs * sqrt_procs != num_procs) {
 17 |         fprintf(stderr, "The Num of Proc must be Perfect square!\n");
 18 |         return 1;
 19 |     }
 20 |     if (argc != 2) {
 21 |         fprintf(stderr, "Please add a Parameter about the matrix size!\n");
 22 |         return 1;
 23 |     }
 24 |     blksize = atoi(argv[1]);
 25 |     int n = blksize * sqrt_procs;
 26 |     int *a = (int*)malloc(blksize*blksize*sizeof(int));
 27 |     int *b = (int*)malloc(blksize*blksize*sizeof(int));
 28 | 
 29 |     if (id_procs == 0) {
 30 |         sb = (int *)malloc(n*n*sizeof(int));
 31 |         sa = (int *)malloc(n*n*sizeof(int));
 32 |         random_mat(sb, n);
 33 |         memcpy(sa, sb, n*n*sizeof(int));
 34 |         matrix_transpose(sb, n);
 35 |     }
 36 | 
 37 |     // Proc#0 send the transpose submat
 38 |     // Put A_ij to #Proc_ji
 39 |     MPI_Datatype SubMat, Mat;
 40 |     MPI_Status status;
 41 |     MPI_Request request;
 42 |     MPI_Type_vector(blksize, blksize, n, MPI_INT, &SubMat);
 43 |     MPI_Type_commit(&SubMat);
 44 |     MPI_Type_vector(blksize, blksize, blksize, MPI_INT, &Mat);
 45 |     MPI_Type_commit(&Mat);
 46 | 
 47 |     if (id_procs == 0) {
 48 |         for(int i = 0; i < sqrt_procs; i++) {
 49 |             int lineoff = blksize * n * i;
 50 |             for(int j = 0; j < sqrt_procs; j++) {
 51 |                 if (i == 0 && j == 0) {
 52 |                     // 分发矩阵 A
 53 |                     MPI_Isend(sa, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
 54 |                     MPI_Irecv(a, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
 55 |                     MPI_Wait(&request, &status);
 56 |                     continue;
 57 |                 }
 58 |                 int offset = j * blksize + lineoff;
 59 |                 MPI_Send(sa+offset, 1, SubMat, j*sqrt_procs+i, 0, MPI_COMM_WORLD);
 60 |             }
 61 |         }
 62 |     } else {
 63 |         MPI_Recv(a, 1, Mat, 0, 0, MPI_COMM_WORLD, &status);
 64 |     }
 65 |     
 66 |     matrix_transpose(a, blksize);
 67 | 
 68 |     if (id_procs == 0) {
 69 |         for(int i = 0; i < sqrt_procs; i++) {
 70 |             int lineoff = blksize * n * i;
 71 |             for(int j = 0; j < sqrt_procs; j++) {
 72 |                 if (i == 0 && j == 0) {
 73 |                     // 分发矩阵 A
 74 |                     MPI_Isend(sb, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
 75 |                     MPI_Irecv(b, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
 76 |                     MPI_Wait(&request, &status);
 77 |                     continue;
 78 |                 }
 79 |                 int offset = j * blksize + lineoff;
 80 |                 MPI_Send(sb+offset, 1, SubMat, i*sqrt_procs+j, 100, MPI_COMM_WORLD);
 81 |             }
 82 |         }
 83 |     } else {
 84 |         MPI_Recv(b, 1, Mat, 0, 100, MPI_COMM_WORLD, &status);
 85 |     }
 86 | 
 87 |     if(check_mat(a,b, blksize)) {
 88 |         printf("Proc#%d Done.\n", id_procs);
 89 |     }
 90 |     
 91 | 
 92 |     if (id_procs == 0) {
 93 |         free(sa);
 94 |         free(sb);
 95 |     }
 96 |     free(a);
 97 |     free(b);
 98 |     MPI_Finalize();
 99 |     return 0;
100 | }
101 | 


--------------------------------------------------------------------------------
/parallel01/PSRS.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <omp.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | 
  6 | int compare(const void *p1, const void *p2)
  7 | {
  8 |     return (*(int *)p1 - *(int *)p2);
  9 | }
 10 | 
 11 | //Merge函数合并两个子数组形成单一的已排好序的字数组
 12 | //并代替当前的子数组A[p..r]
 13 | void Merge(int *a, int p, int q, int r)
 14 | {
 15 |     int i, j, k;
 16 |     int n1 = q - p + 1;
 17 |     int n2 = r - q;
 18 |     int L[n1 + 1];
 19 |     int R[n2 + 1];
 20 |     for (i = 0; i < n1; i++)
 21 |         L[i] = a[p + i];
 22 |     L[i] = 65536;
 23 |     for (j = 0; j < n2; j++)
 24 |         R[j] = a[q + j + 1];
 25 |     R[j] = 65536;
 26 |     i = 0, j = 0;
 27 |     for (k = p; k <= r; k++)
 28 |     {
 29 |         if (L[i] <= R[j])
 30 |         {
 31 |             a[k] = L[i];
 32 |             i++;
 33 |         }
 34 |         else
 35 |         {
 36 |             a[k] = R[j];
 37 |             j++;
 38 |         }
 39 |     }
 40 | }
 41 | //归并排序
 42 | void MergeSort(int *a, int p, int r)
 43 | {
 44 |     if (p < r)
 45 |     {
 46 |         int q = (p + r) / 2;
 47 |         MergeSort(a, p, q);
 48 |         MergeSort(a, q + 1, r);
 49 |         Merge(a, p, q, r);
 50 |     }
 51 | }
 52 | 
 53 | int main()
 54 | {
 55 |     int a[1024];      // 输入的数组
 56 |     int result[1024]; // 排好顺序的数组
 57 |     int i = 0, j, k = 0;
 58 |     int p = 3;
 59 |     FILE *fin;
 60 |     fin = fopen("input.txt", "r");
 61 |     while (fscanf(fin, "%d", &a[i]) != EOF)
 62 |     {
 63 |         i++;
 64 |     }
 65 | 
 66 |     int len = i;
 67 |     int group = len / p;
 68 |     int mod = len % p;
 69 | 
 70 |     omp_set_num_threads(p);
 71 |     int b[p * p];          // 采样排序
 72 |     int pivot[p + 1];      // 主元数组
 73 |     int pivot_a[p][p + 1]; // 重排序用到的 记录位置的数组
 74 |     int newpivot[p + 1];
 75 |     for (i = 0; i < p + 1; i++)
 76 |         newpivot[i] = 0;
 77 | 
 78 | #pragma omp parallel private(i, j, k) shared(b, pivot, pivot_a, result, newpivot)
 79 |     {
 80 |         // 均匀划分 局部排序
 81 |         int id = omp_get_thread_num();
 82 |         if(id!=p-1)
 83 |             qsort(a + id * group, group, sizeof(int), compare);
 84 |         else qsort(a + id * group, group + mod, sizeof(int), compare);
 85 | 
 86 |         // 正则采样
 87 |         for (j = 0; j < p; j++)
 88 |             b[j + id * group / p] = a[id * group + j * group / p];
 89 | 
 90 | // 采样排序
 91 | #pragma omp barrier
 92 | #pragma omp single
 93 |         {
 94 |             qsort(b, p * p, sizeof(int), compare);
 95 |             // 选择p-1个主元
 96 |             for (i = 1; i < p; i++)
 97 |                 pivot[i] = b[i * p];
 98 |         }
 99 |         // pivot_a 计算出小于主元的数的位置
100 |         // 6 14 15 | 39 46 48 | 72 91 93
101 |         // 12 21 | 36 40 54 61 69 | 89 97
102 |         // 20 27 32 33 | 53 58 | 72 84 97 
103 |         // 3 6 
104 |         // 2 7
105 |         // 4 6
106 |         for (j = 0; j < group; j++)
107 |         {
108 |             for (i = 0; i <= p; i++)
109 |             {
110 |                 if (i == p)
111 |                 {
112 |                     if(id != p-1)
113 |                         pivot_a[id][i] = group;
114 |                     else pivot_a[id][i] = group + mod;
115 |                     continue;
116 |                 }
117 |                 if (i == 0)
118 |                 {
119 |                     pivot_a[id][i] = 0;
120 |                     continue;
121 |                 }
122 |                 if (a[id * group + j] <= pivot[i])
123 |                     pivot_a[id][i] = j + 1;
124 |             }
125 |         }
126 | #pragma omp critical
127 |         for (j = 1; j <= p; j++)
128 |         {
129 |             int sumoffset = 0;
130 |             for (i = 0; i < p; i++)
131 |                 if (i >= id)
132 |                     sumoffset += pivot_a[i][j - 1];
133 |                 else
134 |                     sumoffset += pivot_a[i][j];
135 |             memcpy(result + sumoffset, &a[id * group + pivot_a[id][j - 1]], (pivot_a[id][j] - pivot_a[id][j - 1]) * sizeof(int));
136 |         }
137 | 
138 | #pragma omp barrier
139 | #pragma omp single
140 |         {
141 |             for (i = 1; i <= p; i++)
142 |                 for (j = 0; j < p; j++)
143 |                     newpivot[i] += pivot_a[j][i];
144 |         }
145 |         MergeSort(result, newpivot[id], newpivot[id + 1] - 1);
146 |     }
147 | 
148 |     printf("原输入的数据：\n");
149 |     for (int i = 0; i < len; i++)
150 |         printf("%d,", a[i]);
151 | 
152 |     printf("\n 打印主元：%d, %d\n", pivot[1], pivot[2]);
153 |     for (i = 0; i < p; i++)
154 |     {
155 |         for (j = 0; j <= p; j++)
156 |             printf("%d,", pivot_a[i][j]);
157 |         printf("\n");
158 |     }
159 | 
160 |     printf("排序结果：\n");
161 |     for (i = 0; i < len; i++)
162 |         printf("%d ", result[i]);
163 | }
164 | 


--------------------------------------------------------------------------------
/parallel01/input.txt:
--------------------------------------------------------------------------------
1 | 15 46 48 93 39 6 72 91 14 36 69 40 89 61 97 12 21 54 53 97 84 58 32 27 33 72 20 12 13


--------------------------------------------------------------------------------
/parallel01/merge:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/parallel01/merge


--------------------------------------------------------------------------------
/parallel01/merge.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | //Merge函数合并两个子数组形成单一的已排好序的字数组
 4 | //并代替当前的子数组A[p..r] 
 5 | void Merge(int *a, int p, int q, int r)
 6 | {
 7 |     int i,j,k;
 8 |     int n1 = q - p + 1;
 9 |     int n2 = r - q;
10 |     int *L = (int*)malloc((n1+1)*sizeof(int));
11 |     int *R = (int*)malloc((n2+1)*sizeof(int));
12 |     for(i=0; i<n1; i++)
13 |         L[i] = a[p+i];
14 |     L[i] = 65536;
15 |     for(j=0; j<n2; j++)
16 |         R[j] = a[q+j+1];
17 |     R[j] = 65536;
18 |     i=0,j=0;
19 |     for(k=p; k<=r; k++){
20 |         if(L[i]<=R[j]){
21 |             a[k] = L[i];
22 |             i++;
23 |         }
24 |         else{
25 |             a[k] = R[j];
26 |             j++;
27 |         }
28 |     }
29 |     free(L);
30 |     free(R);
31 | } 
32 | //归并排序
33 | void MergeSort(int *a, int p, int r)
34 | {
35 |     if(p<r){
36 |         int q = (p+r)/2;
37 |         MergeSort(a,p,q);
38 |         MergeSort(a,q+1,r);
39 |         Merge(a,p,q,r);
40 |     }
41 | } 
42 | 
43 | int main()
44 | {
45 |     int a[] = {15,46,48, 93, 39,
46 |      6, 72, 91, 14, 36, 69, 40, 89,
47 |       61, 97, 12, 21, 54, 53, 97, 
48 |       84, 58, 32, 27, 33, 72, 20};
49 |     MergeSort(a, 0, 9);
50 |     for(int i=0;i<27;i++)
51 |         printf("%d ",a[i]);
52 | }


--------------------------------------------------------------------------------
/parallel01/pi.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<omp.h>
 3 | 
 4 | #define NUM_THREAD 4
 5 | 
 6 | int main()
 7 | {
 8 | 	int i;
 9 | 	double pi=0.0;
10 | 	
11 | 	long num_steps = 100000;
12 | 	double step = 1.0/num_steps;
13 | 	
14 | 	for(i=0;i<num_steps;i++)
15 | 	{
16 | 		double x = (i+0.5)*step;
17 | 		pi += 4.0/(1.0+x*x);
18 | 	}
19 | 	printf("%.12lf\n",pi*step);
20 | 
21 | 	// 第一种方法 计算pi 使用并行域 parallel
22 | 	omp_set_num_threads(NUM_THREAD);
23 | 	double x;
24 | 	double sum[NUM_THREAD];
25 | 	pi=0.0;
26 | #pragma omp parallel private(x,i) 
27 | 	{
28 | 		int id = omp_get_thread_num();
29 | 		for(i=id,sum[id]=0.0;i<num_steps;i=i+NUM_THREAD)
30 | 		{
31 | 			x = (i+0.5)*step;
32 | 			sum[id] += 4.0/(1.0+x*x);
33 | 		}
34 | 	}
35 | 	for(i=0;i<NUM_THREAD;i++)
36 | 		pi+=step*sum[i];
37 | 	printf("%.12lf\n",pi);
38 | 
39 | 	// 第二种方法 计算pi 使用 for reduction(+:pi)
40 | 	pi = 0.0;
41 | #pragma omp parallel for reduction(+:pi) private(x)
42 | 	for(i=0;i<num_steps;i++)
43 | 	{
44 | 		x = (i+0.5)*step;
45 | 		pi += 4.0/(1.0+x*x);
46 | 	}
47 | 
48 | 	printf("%.12lf\n",pi*step);
49 | 
50 | 
51 | 	// 第三种方法 并行域中加上 #pragma omp for
52 | #pragma omp parallel private(x,i)
53 | 	{   
54 |     int id = omp_get_thread_num();       
55 |     sum[id] = 0; 
56 |     #pragma omp for
57 |     for (i=0;i< num_steps; i++)
58 | 	{
59 |     	x = (i+0.5)*step; 
60 |         sum[id] += 4.0/(1.0+x*x); 
61 |     } 
62 |     }
63 | 	for(i=0, pi=0.0;i<NUM_THREAD;i++)
64 | 		pi += sum[i] * step; 
65 | 	printf("%.12lf\n",pi);
66 | 
67 | 	// 第四种方法 使用critical 
68 | 	double s;
69 | 	pi = 0.0;
70 | #pragma omp parallel private (x,i,s) 
71 |     {   
72 |         int id = omp_get_thread_num(); 
73 |         for (i=id,s=0.0;i< num_steps;i=i+NUM_THREAD){ 
74 |             x = (i+0.5)*step; 
75 |             s += 4.0/(1.0+x*x);
76 |         } 
77 |         //critical指定某一区域的代码，每次只能同时被一个线程执行。
78 |         #pragma omp critical
79 |             pi += s*step; 
80 | 
81 |      }
82 |     printf("%.12lf\n",pi);
83 | 
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/parallel02/PSRS.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <mpi.h>
  3 | #include <stdlib.h>
  4 | 
  5 | int compare(const void *p1, const void *p2)
  6 | {
  7 |     return (*(int *)p1 - *(int *)p2);
  8 | }
  9 | 
 10 | //Merge函数合并两个子数组形成单一的已排好序的字数组
 11 | //并代替当前的子数组A[p..r]
 12 | void Merge(int *a, int p, int q, int r)
 13 | {
 14 |     int i, j, k;
 15 |     int n1 = q - p + 1;
 16 |     int n2 = r - q;
 17 |     int L[n1 + 1];
 18 |     int R[n2 + 1];
 19 |     for (i = 0; i < n1; i++)
 20 |         L[i] = a[p + i];
 21 |     L[i] = 65536;
 22 |     for (j = 0; j < n2; j++)
 23 |         R[j] = a[q + j + 1];
 24 |     R[j] = 65536;
 25 |     i = 0, j = 0;
 26 |     for (k = p; k <= r; k++)
 27 |     {
 28 |         if (L[i] <= R[j])
 29 |         {
 30 |             a[k] = L[i];
 31 |             i++;
 32 |         }
 33 |         else
 34 |         {
 35 |             a[k] = R[j];
 36 |             j++;
 37 |         }
 38 |     }
 39 | }
 40 | //归并排序
 41 | void MergeSort(int *a, int p, int r)
 42 | {
 43 |     if (p < r)
 44 |     {
 45 |         int q = (p + r) / 2;
 46 |         MergeSort(a, p, q);
 47 |         MergeSort(a, q + 1, r);
 48 |         Merge(a, p, q, r);
 49 |     }
 50 | }
 51 | 
 52 | 
 53 | void debug(int *array, int len)
 54 | {
 55 |     for (int i = 0; i < len; i++)
 56 |         printf("%d,", array[i]);
 57 |     printf("\n");
 58 | }
 59 | 
 60 | int main(int argc, char *argv[])
 61 | {
 62 |     int array[1024];
 63 |     int result[1024];
 64 |     int len;
 65 |     int processors;
 66 |     int id;
 67 |     int group;
 68 |     int group_len;
 69 |     int mod;
 70 | 
 71 |     int i, index;
 72 |     // 读入数据部分
 73 |     FILE *fin;
 74 |     fin = fopen("input.txt", "r");
 75 |     while (fscanf(fin, "%d", &array[i]) != EOF)
 76 |         i++;
 77 | 
 78 |     // 开始mpi
 79 |     MPI_Init(&argc, &argv);
 80 |     MPI_Comm_size(MPI_COMM_WORLD, &processors);
 81 |     MPI_Comm_rank(MPI_COMM_WORLD, &id);
 82 |     MPI_Status status;
 83 |     MPI_Request request;
 84 |     
 85 |     len = i;
 86 |     group = len / processors;
 87 |     mod = len % processors;
 88 |     
 89 |     int sample[processors * processors];
 90 |     int pivot[processors];
 91 |     
 92 |     // 均匀划分 局部排序
 93 |     if (id != processors - 1) {
 94 |         qsort(array + id * group, group, sizeof(int), compare);
 95 |         group_len = group;
 96 |     }
 97 |     else {
 98 |         qsort(array + id * group, group + mod, sizeof(int), compare);
 99 |         group_len = group+mod;
100 |     }
101 |     // 正则采样
102 |     for (i = 0; i < processors; i++)
103 |         sample[i + id * group / processors] = array[id * group + i * group / processors];
104 | 
105 |     // 采样到的样本全部放到0号线程
106 |     if (id == 0)
107 |         for (i = 1; i < processors; i++)
108 |             MPI_Recv(sample + processors * i, processors, MPI_INT, i, 100 + i, MPI_COMM_WORLD, &status);
109 |     else
110 |         MPI_Send(sample + id * group / processors, processors, MPI_INT, 0, 100 + id, MPI_COMM_WORLD);
111 | 
112 |     // 采样排序
113 |     if (id == 0)
114 |     {
115 |         qsort(sample, processors * processors, sizeof(int), compare);
116 |         for (i = 0; i < processors-1; i++) 
117 |             pivot[i] = sample[(i+1) * processors];
118 |     // 分发主元
119 |         for(i = 1;i < processors; i++)
120 |             MPI_Send(pivot, processors, MPI_INT, i, 110+i, MPI_COMM_WORLD);
121 |     }
122 | 
123 |     MPI_Barrier(MPI_COMM_WORLD);
124 | 
125 |     // 接收主元
126 |     if(id != 0)
127 |         MPI_Recv(pivot, processors, MPI_INT, 0, 110+id, MPI_COMM_WORLD, &status);
128 | 
129 |     // 进行主元划分
130 |     index = 0;
131 |     int partionSize[processors];
132 |     for(i = 0;i < processors;i++)
133 |         partionSize[i] = 0;
134 |     
135 |     for(i = 0; i < group_len;i++) {
136 |         if(array[i+id*group] > pivot[index])
137 |             index++;
138 |         if(index == processors) {
139 |             partionSize[processors-1] = group_len -i+1;
140 |             break;
141 |         }
142 |         partionSize[index]++;
143 |     }
144 | 
145 |     // 广播分组 先发送各个段的长度
146 |     int newSize[processors];
147 |     MPI_Alltoall(partionSize, 1, MPI_INT, newSize, 1, MPI_INT, MPI_COMM_WORLD);
148 | 
149 |     // 计算位移
150 |     int sendIndex[processors];
151 |     int recvIndex[processors];
152 | 
153 |     sendIndex[0] = 0;
154 |     recvIndex[0] = 0;
155 |     for(i = 1;i < processors;i++) {
156 |         sendIndex[i] = sendIndex[i-1]+partionSize[i-1];
157 |         recvIndex[i] = recvIndex[i-1]+newSize[i-1];
158 |     }
159 |     // 计算总长度
160 |     int totalSize = 0;
161 |     for(i = 0;i < processors;i++)
162 |         totalSize += newSize[i];
163 | 
164 | 
165 |     // 每个处理器发送数据给其他所有处理器，且每个处理发送的数据长度都不同
166 |     // 故有长度数组和位移数组
167 |     MPI_Alltoallv(array+id*group, partionSize, sendIndex, MPI_INT, 
168 |         result, newSize, recvIndex, MPI_INT, MPI_COMM_WORLD);
169 | 
170 |     // 归并排序
171 |     MergeSort(result, 0, totalSize-1);
172 | 
173 |     MPI_Gather(&totalSize, 1, MPI_INT, recvIndex, 1, MPI_INT, 0, MPI_COMM_WORLD);
174 | 
175 |     int recvDisp[processors];
176 |     recvDisp[0];
177 |     for(i = 1;i < processors;i++)
178 |         recvDisp[i] = recvIndex[i-1] + recvDisp[i-1];
179 | 
180 |     MPI_Gatherv(result, totalSize, MPI_INT, array, recvIndex, recvDisp, MPI_INT, 0, MPI_COMM_WORLD);
181 | 
182 |     if(id == 0)
183 |         debug(array, len);
184 |     
185 |     MPI_Finalize();
186 |     return 0;
187 | }
188 | 


--------------------------------------------------------------------------------
/parallel02/input.txt:
--------------------------------------------------------------------------------
1 | 15 46 48 93 39 6 72 91 14 36 69 40 89 61 97 12 21 54 53 97 84 58 32 27 33 72 20


--------------------------------------------------------------------------------
/parallel02/pi.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<mpi.h>
 3 | #include<math.h>
 4 | 
 5 | int main(int argc, char *argv[]){
 6 |     int my_rank, num_procs;
 7 |     int i, n = 0;
 8 |     double sum, width, local, mypi, pi;
 9 |     double start = 0.0, stop = 0.0;
10 |     int proc_len;
11 |     char processor_name[MPI_MAX_PROCESSOR_NAME];
12 |         
13 |     MPI_Init(&argc, &argv);
14 |     MPI_Comm_size(MPI_COMM_WORLD,  &num_procs);
15 |     MPI_Comm_rank(MPI_COMM_WORLD,  &my_rank);
16 |     MPI_Get_processor_name(processor_name,  &proc_len);
17 |     printf("Process %d of %d\n", my_rank, num_procs);
18 |     if(my_rank == 0){
19 |         printf("please give step number n:");
20 |         scanf("%d", &n);
21 |         printf("step number is : %d\n", n);
22 |         start = MPI_Wtime();
23 |     }
24 | //  printf("Process %d of %d\n", my_rank, num_procs);
25 | 
26 |     MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
27 |     width = 1.0 / n;
28 |     sum = 0.0;
29 |     for(i = my_rank; i < n; i += num_procs){
30 |         local = width * ((double)i + 0.5);
31 |         sum += 4.0 / (1.0 + local * local);
32 |     }
33 |     mypi = width * sum;
34 |     MPI_Reduce(&mypi,  &pi,  1,  MPI_DOUBLE,  MPI_SUM,  0,
35 |             MPI_COMM_WORLD);
36 |     if(my_rank == 0){
37 |         printf("PI is %.20f\n", pi);
38 |         stop = MPI_Wtime();
39 |         printf("Time: %f on %s\n", stop-start, processor_name);
40 |         fflush(stdout);
41 |     }
42 |     MPI_Finalize();
43 |     return 0;
44 | }


--------------------------------------------------------------------------------
/parallel02/test.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingfen/ParallelComputing/eff9b3a7aa62b0c9401dc3b1f962303d54437de4/parallel02/test.c


--------------------------------------------------------------------------------
/sort/enum_sort.c:
--------------------------------------------------------------------------------
 1 | #include "sort.h"
 2 | 
 3 | /**
 4 |  * @input  输入数组 a
 5 |  * @output 输出数组 b
 6 |  */ 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int num_procs, id_procs;
10 |     MPI_Status status;
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
14 | 
15 |     int *a, *b, *b2;
16 |     a = (int*)malloc(num_procs*sizeof(int));
17 |     if (id_procs == 0) {
18 |         b2 = (int*)malloc(num_procs*sizeof(int));
19 |         b  = (int*)malloc(num_procs*sizeof(int));
20 |         random_array(b2, num_procs);
21 |         memcpy(a, b2, num_procs*sizeof(int));
22 |         SURCLK(qsort(b2, num_procs, sizeof(int), compare);)
23 |     }
24 | 
25 |     
26 |     MPI_Bcast(a, num_procs, MPI_INT, 0, MPI_COMM_WORLD);
27 |     int k = 0;
28 |     for(int j = 0; j < num_procs; j++) {
29 |         if (a[id_procs] > a[j] || (a[id_procs] == a[j] && id_procs > j))
30 |             k++;
31 |     }
32 | 
33 | 
34 |     if (id_procs == 0) {
35 |         int recv;
36 |         for(int i = 1; i < num_procs; i++) {
37 |             MPI_Recv(&recv, 1, MPI_INT, i, 1, MPI_COMM_WORLD, &status);
38 |             b[recv] = a[i];
39 |         }
40 |         b[k] = a[0];
41 |     } else {
42 |         MPI_Send(&k, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
43 |     }
44 | 
45 |     if (id_procs == 0) {
46 |         if (check_array(b, b2, num_procs))
47 |             printf("Done.\n");
48 |         else 
49 |             printf("Error Occured!\n");
50 |     }
51 | 
52 |     free(a);
53 |     if (id_procs == 0) {
54 |         free(b2);
55 |         free(b);
56 |     }
57 |     MPI_Finalize();
58 |     return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/sort/psrs_sort.c:
--------------------------------------------------------------------------------
  1 | #include "sort.h"
  2 | 
  3 | //Merge函数合并两个子数组形成单一的已排好序的字数组
  4 | //并代替当前的子数组A[p..r]
  5 | void merge(int *a, int p, int q, int r) {
  6 |     int i, j, k;
  7 |     int n1 = q - p + 1;
  8 |     int n2 = r - q;
  9 |     int L[n1 + 1];
 10 |     int R[n2 + 1];
 11 |     for (i = 0; i < n1; i++)
 12 |         L[i] = a[p + i];
 13 |     L[i] = 65536;
 14 |     for (j = 0; j < n2; j++)
 15 |         R[j] = a[q + j + 1];
 16 |     R[j] = 65536;
 17 |     i = 0, j = 0;
 18 |     for (k = p; k <= r; k++) {
 19 |         if (L[i] <= R[j]) {
 20 |             a[k] = L[i];
 21 |             i++;
 22 |         }
 23 |         else {
 24 |             a[k] = R[j];
 25 |             j++;
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | 
 31 | //归并排序
 32 | void merge_sort(int *a, int p, int r) {
 33 |     if (p < r) {
 34 |         int q = (p + r) / 2;
 35 |         merge_sort(a, p, q);
 36 |         merge_sort(a, q + 1, r);
 37 |         merge(a, p, q, r);
 38 |     }
 39 | }
 40 | 
 41 | 
 42 | int main(int argc, char *argv[])
 43 | {
 44 |     int *array, *a;
 45 |     int *result, *ans;
 46 |     int n;
 47 |     int num_procs, id_procs;
 48 |     int group, group_len;
 49 |     int mod;
 50 | 
 51 |     int i, index;
 52 | 
 53 |     // 开始mpi
 54 |     MPI_Init(&argc, &argv);
 55 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 56 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 57 |     MPI_Status status;
 58 |     MPI_Request request;
 59 | 
 60 |     if (argc != 2) {
 61 |         fprintf(stderr, "Please input the num of array!\n");
 62 |         return 1;
 63 |     }
 64 |     n = atoi(argv[1]);
 65 |     if (n <= 0) {
 66 |         fprintf(stderr, "Please, the size must > 0.\n");
 67 |         return 1;
 68 |     }
 69 | 
 70 |     group = n / num_procs;
 71 |     mod = n % num_procs;
 72 |     a = (int *)malloc((group+mod)*sizeof(int));
 73 | 
 74 |     if (id_procs == 0) {
 75 |         // 产生随机数据
 76 |         // 串行运行 得出结果
 77 |         array = (int*)malloc((n+2)*sizeof(int));
 78 |         result = (int*)malloc((n+2)*sizeof(int));
 79 |         ans = (int*)malloc((n+2)*sizeof(int));
 80 |         random_array(array, n);
 81 |         memcpy(ans, array, (n)*sizeof(int));
 82 |         qsort(ans, n, sizeof(int), compare);
 83 |     }
 84 | 
 85 |     // 分段数据 移交各个处理器
 86 |     int sendcounts[num_procs];
 87 |     int sdispls[num_procs];
 88 |     for(int i = 0; i < num_procs; i++) {
 89 |         sendcounts[i] = group;
 90 |         sdispls[i] = i*group;
 91 |     }
 92 |     sendcounts[num_procs-1] = group+mod;
 93 | 
 94 |     MPI_Scatterv(array, sendcounts, sdispls, MPI_INT, a, group+mod, MPI_INT, 0, MPI_COMM_WORLD);
 95 | 
 96 |     group_len = sendcounts[id_procs];
 97 |     // 均匀划分 局部排序
 98 |     qsort(a, group_len, sizeof(int), compare);
 99 | 
100 |     // 正则采样
101 |     int samples[num_procs * num_procs];
102 |     int s[num_procs];    
103 |     for (i = 1; i < num_procs; i++) {
104 |         s[i-1] = a[i * group / num_procs];
105 |     }
106 |     // 采到样本 收集到Proc#0
107 |     MPI_Gather(s, num_procs-1, MPI_INT, samples, num_procs-1, MPI_INT, 0, MPI_COMM_WORLD);
108 | 
109 |     // 采样排序
110 |     int pivot[num_procs];
111 |     if (id_procs == 0) {
112 |         qsort(samples, (num_procs-1)*num_procs, sizeof(int), compare);
113 |         for (i = 1; i < num_procs; i++) 
114 |             pivot[i-1] = samples[i*(num_procs-1)];
115 |     }
116 |     // 广播主元
117 |     MPI_Bcast(pivot, num_procs-1, MPI_INT, 0, MPI_COMM_WORLD);
118 | 
119 |     // 进行主元划分
120 |     index = 0;
121 |     int pcounts[num_procs];
122 |     for(i = 0; i < num_procs; i++)
123 |         pcounts[i] = 0;
124 |     pivot[num_procs-1] = INT32_MAX;
125 | 
126 |     for(i = 0; i < group_len; i++) {
127 |         if(a[i] <= pivot[index])
128 |             pcounts[index]++;
129 |         else {
130 |             i--;
131 |             index++;
132 |         }
133 |     }
134 | 
135 |     // 发送各个段的长度
136 |     int rcounts[num_procs];
137 |     MPI_Alltoall(pcounts, 1, MPI_INT, rcounts, 1, MPI_INT, MPI_COMM_WORLD);
138 | 
139 |     // 计算位移
140 |     int rdispls[num_procs];
141 |     sdispls[0] = 0;
142 |     rdispls[0] = 0;
143 |     for(i = 1;i < num_procs;i++) {
144 |         sdispls[i] = sdispls[i-1] + pcounts[i-1];
145 |         rdispls[i] = rdispls[i-1] + rcounts[i-1];
146 |     }
147 |     // 计算总长度
148 |     int totalcounts = 0;
149 |     for(i = 0; i < num_procs; i++)
150 |         totalcounts += rcounts[i];
151 | 
152 |     int *b = (int *)malloc(totalcounts*sizeof(int));
153 | 
154 |     // 每个处理器发送数据给其他所有处理器，且每个处理发送的数据长度都不同
155 |     // 故有长度数组和位移数组
156 |     MPI_Alltoallv(a, pcounts, sdispls, MPI_INT, 
157 |         b, rcounts, rdispls, MPI_INT, MPI_COMM_WORLD);
158 | 
159 |     // 归并排序
160 |     merge_sort(b, 0, totalcounts-1);
161 | 
162 |     // Proc#0 收集有序数组
163 |     MPI_Gather(&totalcounts, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
164 | 
165 |     rdispls[0] = 0;
166 |     for(i = 1; i < num_procs; i++)
167 |         rdispls[i] = rdispls[i-1] + rcounts[i-1];
168 | 
169 |     MPI_Gatherv(b, totalcounts, MPI_INT, result, rcounts, rdispls, MPI_INT, 0, MPI_COMM_WORLD);
170 | 
171 |     if(id_procs == 0) {
172 |         if (check_array(result, ans, n))
173 |             printf("Done.\n");
174 |         free(ans);
175 |         free(result);
176 |         free(array);
177 |     }
178 |     if (b != NULL) {
179 |         free(b);
180 |         b = NULL;
181 |     }
182 |     if (a != NULL) {
183 |         free(a);
184 |         a = NULL;
185 |     }
186 |     
187 |     MPI_Finalize();
188 |     return 0;
189 | }
190 | 


--------------------------------------------------------------------------------
/sort/quick_sort.c:
--------------------------------------------------------------------------------
  1 | #include "sort.h"
  2 | 
  3 | enum TagType {
  4 |     ORDER = 1,
  5 |     UNORDER,
  6 |     MMSG,
  7 |     LEN
  8 | };
  9 | 
 10 | 
 11 | int getpow2(int n) {
 12 |     int ans = 0;
 13 |     while((n = n >> 1))
 14 |         ans++;
 15 |     return ans;
 16 | }
 17 | 
 18 | 
 19 | int partition(int *a, int k, int l)
 20 | {
 21 |     int pivof = a[l];
 22 |     int temp;
 23 |     int i = k-1;
 24 |     for(int j = k; j < l; j++) {
 25 |         if (a[j] <= pivof) {
 26 |             i++;
 27 |             temp = a[i];
 28 |             a[i] = a[j];
 29 |             a[j] = temp;
 30 |         }
 31 |     }
 32 |     temp = a[i+1];
 33 |     a[i+1] = a[l];
 34 |     a[l] = temp;
 35 |     return i+1;
 36 | }
 37 | 
 38 | 
 39 | void do_sort(int *a, int i, int j)
 40 | {
 41 |     if (i < j) {
 42 |         int r = partition(a, i, j);
 43 |         do_sort(a, i, r-1);
 44 |         do_sort(a, r+1, j);
 45 |     }
 46 | }
 47 | 
 48 | 
 49 | void quicksort(int *a, int beg, int end, int m, int id_procs)
 50 | {
 51 |     int pivot;
 52 |     int dest;
 53 |     int len;
 54 |     MPI_Status status;
 55 |     if (beg < end) {
 56 |         if (m > 0) {
 57 |             dest = id_procs + (1 << (m-1));
 58 |             pivot = partition(a, beg, end);
 59 |             len = end-(pivot+1)+1;
 60 |             m = m-1 > 0 ? m-1:0;
 61 |             // send info to Proc#id+2^m-1, ask it run latter array
 62 |             MPI_Send(&len, 1, MPI_INT, dest, LEN, MPI_COMM_WORLD);
 63 |             MPI_Send(&m, 1, MPI_INT, dest, MMSG, MPI_COMM_WORLD);
 64 |             MPI_Send(&a[pivot+1], len, MPI_INT, dest, UNORDER, MPI_COMM_WORLD);
 65 |             printf("Proc#%d send %d data to Proc#%d\n", id_procs, len, dest);
 66 | 
 67 |             // run former array local
 68 |             quicksort(a, beg, pivot-1, m, id_procs);
 69 |             
 70 |             // Get the order array from Proc#id+2^m-1
 71 |             if (len > 1) {
 72 |                 int *b = (int *)malloc(len*sizeof(int));
 73 |                 MPI_Recv(b, len, MPI_INT, dest, ORDER, MPI_COMM_WORLD, &status);
 74 |                 printf("Proc#%d recv %d data from Proc#%d\n", id_procs, len, dest);
 75 |                 // Merge
 76 |                 memcpy(&a[pivot+1], b, len*sizeof(int));
 77 |                 free(b);
 78 |             }
 79 | 
 80 |             if ((id_procs >> (m+1)) % 2) {
 81 |                 dest = id_procs - (1 << (m+1));
 82 |                 len = end -beg +1;
 83 |                 MPI_Send(a, len, MPI_INT, dest, ORDER, MPI_COMM_WORLD);
 84 |                 printf("Proc#%d send %d data to Proc#%d\n", id_procs, len, dest);
 85 |             }
 86 |         }
 87 |         else {
 88 |             pivot = partition(a, beg, end);
 89 |             do_sort(a, beg, pivot-1);
 90 |             do_sort(a, pivot+1, end);
 91 | 
 92 |             // return the order array
 93 |             if (id_procs % 2) {
 94 |                 dest = id_procs -1;
 95 |                 len = end - beg +1;
 96 |                 MPI_Send(a, len, MPI_INT, dest, ORDER, MPI_COMM_WORLD);
 97 |                 printf("Proc#%d send %d data to Proc#%d\n", id_procs, len, dest);
 98 |             }
 99 |         }
100 |     }
101 | }
102 | 
103 | int main(int argc, char *argv[])
104 | {
105 |     int num_procs, id_procs;
106 |     MPI_Status status;
107 |     MPI_Init(&argc, &argv);
108 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
109 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
110 | 
111 |     if (argc != 2) {
112 |         fprintf(stderr, "Please input the size of data array.\n");
113 |         return 1;
114 |     }
115 |     int n = atoi(argv[1]);
116 |     if (n <= 0) {
117 |         fprintf(stderr, "Please, the size must > 0.\n");
118 |         return 1;
119 |     }
120 | 
121 |     int *a, *b;
122 |     int m = getpow2(num_procs);
123 |     int len;
124 |     a = (int*)malloc((n+2)*sizeof(int));
125 |     if (id_procs == 0) {
126 |         b = (int*)malloc((n+2)*sizeof(int));
127 |         random_array(a, n);
128 |         memcpy(b, a, n*sizeof(int));
129 |         qsort(b, n, sizeof(int), compare);
130 |     }
131 | 
132 | 
133 |     if (id_procs != 0) {
134 |         // None-0 Proc Get UnOrder Data from other
135 |         MPI_Recv(&len, 1, MPI_INT, MPI_ANY_SOURCE, LEN, MPI_COMM_WORLD, &status);
136 |         printf("Proc#%d Run!\n", id_procs);
137 |         MPI_Recv(&m, 1, MPI_INT, MPI_ANY_SOURCE, MMSG, MPI_COMM_WORLD, &status);
138 |         MPI_Recv(a, len, MPI_INT, MPI_ANY_SOURCE, UNORDER, MPI_COMM_WORLD, &status);
139 |         quicksort(a, 0, len-1, m, id_procs);
140 |     } else {
141 |         quicksort(a, 0, n-1, m, id_procs);
142 |     }
143 | 
144 |     if (id_procs == 0) {
145 |         print_array(a, n);
146 |         if (check_array(a, b, n))
147 |             printf("Done.\n");
148 |         else 
149 |             printf("Error Occured!\n");
150 |     }
151 | 
152 |     free(a);
153 |     if (id_procs == 0) {
154 |         free(b);
155 |     }
156 |     MPI_Finalize();
157 |     return 0;
158 | }
159 | 


--------------------------------------------------------------------------------
/sort/sort.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MPI_SORT_H
 2 | #define _MPI_SORT_H
 3 | 
 4 | #include "mpi.h"
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <time.h>
 8 | #include <string.h>
 9 | 
10 | #define INDEX(i, j, n) (((i)*(n))+(j))
11 | 
12 | #define SURCLK(x) \
13 |   do {                  \
14 |   clock_t t1 = clock(); \
15 |   x                     \
16 |   clock_t t2 = clock(); \
17 |   printf("Time: %lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);  \
18 |   }while(0);
19 | 
20 | 
21 | void random_array(int *a, int num) {
22 |     for(int i = 0; i < num; i++) {
23 |         srand(clock());
24 |         a[i] = rand() % 400;
25 |     }
26 | }
27 | 
28 | void print_array(int *a, int num) {
29 |     for(int i = 0; i < num; i++) {
30 |         if (i % 20 == 0)
31 |             printf("\n");
32 |         printf("%d ", a[i]);
33 |     }
34 |     printf("\n");
35 | }
36 | 
37 | int check_array(int *B, int *C, int num) {
38 |     for(int i = 0; i < num; i++) {
39 |         if (B[i] != C[i]) {
40 |             printf("A[%d] = %d not %d!\n", i, B[i], C[i]);
41 |             return 0;
42 |         }
43 |     }
44 |     return 1;
45 | }
46 | 
47 | int compare(const void *arg1, const void *arg2) {
48 |     return *(int*)arg1 >= *(int*)arg2;
49 | }
50 | 
51 | #endif // _MPI_SORT_H
52 | 


--------------------------------------------------------------------------------