├── mpi
    ├── 1
    ├── 3
    ├── 4
    ├── config
    ├── 2_1
    ├── 2_2
    ├── 5_1
    ├── 5_2
    ├── 2_1.c
    ├── 2_2.c
    ├── 1.c
    ├── 4.c
    ├── 5_2.c
    ├── 5_1.c
    └── 3.c
├── omp
    ├── 1
    ├── 2
    ├── 3
    ├── 4
    ├── 5
    ├── 1_1
    ├── 2_1
    ├── 3_1
    ├── 4_1
    ├── 5_1
    ├── 2.c
    ├── 2_1.c
    ├── 4.c
    ├── 4_1.c
    ├── 1.c
    ├── 1_1.c
    ├── 3.c
    ├── 3_1.c
    ├── 5.c
    └── 5_1.c
├── Kmeans
    ├── serial
    ├── Kmeans_mpi
    ├── serial.c
    └── Kmeans_mpi.c
├── project_report.pdf
├── parallel_computing_hw1.pdf
└── README.md


/mpi/config:
--------------------------------------------------------------------------------
1 | 202.38.79.8 slots=4
2 | 202.38.75.64 slots=4
3 | 


--------------------------------------------------------------------------------
/mpi/1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/1


--------------------------------------------------------------------------------
/mpi/3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/3


--------------------------------------------------------------------------------
/mpi/4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/4


--------------------------------------------------------------------------------
/omp/1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/1


--------------------------------------------------------------------------------
/omp/2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/2


--------------------------------------------------------------------------------
/omp/3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/3


--------------------------------------------------------------------------------
/omp/4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/4


--------------------------------------------------------------------------------
/omp/5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/5


--------------------------------------------------------------------------------
/mpi/2_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/2_1


--------------------------------------------------------------------------------
/mpi/2_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/2_2


--------------------------------------------------------------------------------
/mpi/5_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/5_1


--------------------------------------------------------------------------------
/mpi/5_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/5_2


--------------------------------------------------------------------------------
/omp/1_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/1_1


--------------------------------------------------------------------------------
/omp/2_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/2_1


--------------------------------------------------------------------------------
/omp/3_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/3_1


--------------------------------------------------------------------------------
/omp/4_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/4_1


--------------------------------------------------------------------------------
/omp/5_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/5_1


--------------------------------------------------------------------------------
/Kmeans/serial:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/Kmeans/serial


--------------------------------------------------------------------------------
/Kmeans/Kmeans_mpi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/Kmeans/Kmeans_mpi


--------------------------------------------------------------------------------
/project_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/project_report.pdf


--------------------------------------------------------------------------------
/parallel_computing_hw1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/parallel_computing_hw1.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Parallel_computing_Exp
2 | Lab for Parallel computing (USTC COMP6201P), including hw1 and final project.
3 | If you find it helpful, give it a star!😄💯
4 | 


--------------------------------------------------------------------------------
/mpi/2_1.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <time.h>
 4 | #include <stdlib.h>
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 | 	int id_procs, num_procs;
 9 | 	MPI_Init(&argc, &argv);
10 | 	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
11 | 	MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
12 | 
13 | 	int data = id_procs;
14 | 	int recvdata;
15 | 	MPI_Status status;
16 | 
17 | 	for(int i=2; i<=num_procs; i<<=1)
18 | 	{
19 | 		int tag = i>>1;
20 | 		int dest = id_procs ^tag;
21 | 		MPI_Send(&data, 1, MPI_INT, dest, tag, MPI_COMM_WORLD);
22 | 		MPI_Recv(&recvdata, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &status);
23 | 		data += recvdata;
24 | 	}
25 | 
26 | 	printf("Proc:%d Sum is = %d\n",id_procs, data);
27 | 	MPI_Finalize();
28 | 	return 0;
29 | 
30 | 
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/mpi/2_2.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 | 	int id_procs,num_procs;
 8 | 
 9 | 	MPI_Init(&argc, &argv);
10 | 	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
11 | 	MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
12 | 
13 | 	int data = id_procs;
14 | 	int recvdata;
15 | 	MPI_Status status;
16 | 
17 | 	for (int i =2; i<= num_procs; i<<=1)
18 | 	{
19 | 		int tag = i>>1;
20 | 		int diff = id_procs & tag;
21 | 		if (diff){
22 | 			MPI_Send(&data, 1, MPI_INT, id_procs-tag, tag, MPI_COMM_WORLD);
23 | 		}
24 | 		else
25 | 		{
26 | 			MPI_Recv(&recvdata, 1, MPI_INT, id_procs+tag, tag, MPI_COMM_WORLD, &status);
27 | 		}
28 | 
29 | 		data += recvdata;
30 | 	}
31 | 
32 | 	for (int i = num_procs; i>=2; i>>=1){
33 | 		int tag =i;
34 | 		if(id_procs % i == 0)
35 | 		{
36 | 			MPI_Send(&data, 1, MPI_INT, id_procs+(i>>1), tag, MPI_COMM_WORLD);
37 | 		}
38 | 		else if (id_procs % (i>>1) ==0)
39 | 		{
40 | 			MPI_Recv(&data, 1, MPI_INT,id_procs-(i>>1), tag, MPI_COMM_WORLD, &status);
41 | 		}
42 | 	}
43 | 
44 | 	printf("%d Sum is = %d\n", id_procs, data);
45 | 
46 | 	MPI_Finalize();
47 | 	return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/mpi/1.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <unistd.h>
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 | 	int id_procs, num_procs;
10 | 	int msg = 10;
11 | 	int tag = 5;
12 | 	char seq[16] = "hello mpi!";
13 | 	char seqin[16];
14 | 	char hostname[100];
15 | 
16 | 	MPI_Init(&argc, &argv);
17 | 	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
18 | 	MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
19 | 
20 | 	int color = id_procs / 4;
21 | 	int key = id_procs % 4;
22 | 	gethostname(hostname, sizeof(hostname));
23 | 	MPI_Comm split_comm_world;
24 | 	MPI_Status status;
25 | 	int rank, size, msgin;
26 | 
27 | 	MPI_Comm_split(MPI_COMM_WORLD, color, key, &split_comm_world);
28 | 	MPI_Comm_rank(split_comm_world, &rank);
29 | 	MPI_Comm_size(split_comm_world, &size);
30 | 	printf("id_procs: %d. process %d of %d. comm: %d. host: %s\n", id_procs, rank, size, color, hostname);
31 | 	MPI_Barrier(MPI_COMM_WORLD);	
32 | 	if(id_procs == 0){
33 | 		strcpy(seqin, seq);
34 | 		MPI_Send(&seq, 16, MPI_CHAR, 4, tag, MPI_COMM_WORLD);
35 | 	}
36 | 
37 | 	else if (id_procs==4){
38 | 		MPI_Recv(&seqin, 16, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
39 | 	}
40 | 
41 | 	MPI_Bcast(&seqin, 16, MPI_CHAR, 0, split_comm_world);
42 | 	printf("MPI Commm rand %d, original id %d, size %d. The new msg is %s\n", rank, id_procs, size, seqin);
43 | 	MPI_Finalize();
44 | 	return 0;
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/mpi/4.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <time.h>
 5 | #include <string.h>
 6 | #include <unistd.h>
 7 | #define SERVER_NUM 2
 8 | 
 9 | void serve(MPI_Comm server_comm, int id, int num)
10 | {
11 |     int num_workers = num - SERVER_NUM;
12 |     int recv_size = num_workers / SERVER_NUM + 1;
13 |     int recv_data[recv_size];
14 |     int gather_buff[recv_size * SERVER_NUM];
15 |     int average;
16 |     int sum, ctn;
17 |     MPI_Status status;
18 |     while (1)
19 |     {
20 |         memset(recv_data, 0, recv_size * sizeof(int));
21 |         memset(gather_buff, 0, recv_size * SERVER_NUM * sizeof(int));
22 |         sum = 0;
23 |         ctn = 0;
24 |         for (int i = 1; i * SERVER_NUM + id < num; i++)
25 |         {
26 |             MPI_Recv(recv_data + i - 1, 1, MPI_INT, i * SERVER_NUM + id, 0, MPI_COMM_WORLD, &status);
27 |             ctn++;
28 |         }
29 | 
30 |         MPI_Allgather(recv_data, recv_size, MPI_INT, gather_buff, recv_size, MPI_INT, server_comm);
31 | 
32 |         for (int i = 0; i < recv_size * SERVER_NUM; i++)
33 |         {
34 |             sum += gather_buff[i];
35 |         }
36 |         average = sum / num_workers;
37 |         printf("Proc#%d send average data = %d\n", id, average);
38 |         MPI_Barrier(server_comm);
39 |         for (int i = 1; i <= ctn; i++)
40 |         {
41 |             MPI_Send(&average, 1, MPI_INT, i * SERVER_NUM + id, 1, MPI_COMM_WORLD);
42 |         }
43 |     }
44 | }
45 | 
46 | void work(int id)
47 | {
48 |     int randata;
49 |     int recvdata;
50 |     MPI_Status status;
51 | 
52 |     while (1)
53 |     {
54 |         srand(time(NULL) + id);
55 |         randata = rand() % 100;
56 |         printf("proc#%d send data = %d\n", id, randata);
57 |         MPI_Send(&randata, 1, MPI_INT, id % SERVER_NUM, 0, MPI_COMM_WORLD);
58 |         MPI_Recv(&recvdata, 1, MPI_INT, id % SERVER_NUM, 1, MPI_COMM_WORLD, &status);
59 |         printf("Proc#%d receive average data = %d\n", id, recvdata);
60 |         sleep(5);
61 |     }
62 | }
63 | 
64 | int main(int argc, char *argv[])
65 | {
66 |     int id_procs, num_procs;
67 | 
68 |     MPI_Init(&argc, &argv);
69 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
70 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
71 | 
72 |     int P, Q;
73 |     P = SERVER_NUM;
74 |     Q = num_procs - P;
75 | 
76 |     MPI_Comm server_comm;
77 |     MPI_Comm_split(MPI_COMM_WORLD, id_procs / P, id_procs, &server_comm);
78 | 
79 |     if (id_procs > P - 1)
80 |     {
81 |         work(id_procs);
82 |     }
83 |     else
84 |     {
85 |         serve(server_comm, id_procs, num_procs);
86 |     }
87 | 
88 |     MPI_Finalize();
89 |     return 0;
90 | }
91 | 


--------------------------------------------------------------------------------
/mpi/5_2.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <math.h>
  6 | #define N 500
  7 | #define IDX(i, j) (((i)*N) + (j))
  8 | 
  9 | void gen_rand_array(double *a, int num)
 10 | {
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         srand(time(NULL));
 14 |         a[i] = rand() % 100;
 15 |     }
 16 | }
 17 | 
 18 | void compute(double *A, double *B, int a, int b)
 19 | {
 20 |     for (int i = 1; i <= a; i++)
 21 |     {
 22 |         for (int j = 1; j <= b; j++)
 23 |         {
 24 |             B[IDX(i, j)] = (A[IDX(i - 1, j)] + A[IDX(i, j + 1)] + A[IDX(i + 1, j)] + A[IDX(i, j - 1)]) / 4.0;
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | int check_ans(double *B, double *A)
 30 | {
 31 |     for (int i = 1; i < N - 1; i++)
 32 |     {
 33 |         for (int j = 1; j < N - 1; j++)
 34 |         {
 35 |             if (fabs(B[IDX(i, j)] - A[IDX(i, j)]) >= 1e-2)
 36 |             {
 37 |                 return 0;
 38 |             }
 39 |         }
 40 |     }
 41 |     return 1;
 42 | }
 43 | 
 44 | int main(int argc, char *argv[])
 45 | {
 46 |     MPI_Init(&argc, &argv);
 47 |     int id_procs, num_procs;
 48 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 49 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 50 |     MPI_Status status;
 51 |     MPI_Datatype SubMat;
 52 |     int rows = sqrt(num_procs);
 53 |     int cols = num_procs / rows;
 54 |     int a = (N - 2 + rows - 1) / rows;
 55 |     int b = (N - 2 + cols - 1) / cols;
 56 |     int alloc_num = (a + 1) * (b + 1) * num_procs;
 57 |     double A[alloc_num];
 58 |     double B[alloc_num];
 59 |     double B2[alloc_num];
 60 | 
 61 |     // Proc#0 randomize the data
 62 |     if (id_procs == 0)
 63 |     {
 64 |         gen_rand_array(A, N * N);
 65 |         compute(A, B2, N - 2, N - 2);
 66 |     }
 67 | 
 68 |     MPI_Barrier(MPI_COMM_WORLD);
 69 | 
 70 |     // Proc#0 broadcast (a+2)x(b+2) mat
 71 |     MPI_Type_vector(a + 2, b + 2, N, MPI_DOUBLE, &SubMat);
 72 |     MPI_Type_commit(&SubMat);
 73 | 
 74 |     if (id_procs == 0)
 75 |     {
 76 |         for (int i = 0; i < rows; i++)
 77 |         {
 78 |             for (int j = 0; j < cols; j++)
 79 |             {
 80 |                 if (i == 0 && j == 0)
 81 |                     continue;
 82 |                 MPI_Send(A + i * a * N + b * j, 1, SubMat, j + cols * i, 0, MPI_COMM_WORLD);
 83 |             }
 84 |         }
 85 |     }
 86 |     else
 87 |     {
 88 |         MPI_Recv(A, 1, SubMat, 0, 0, MPI_COMM_WORLD, &status);
 89 |     }
 90 | 
 91 |     // computeute
 92 |     compute(A, B, a, b);
 93 | 
 94 |     // Gather result
 95 |     MPI_Datatype SubMat_B;
 96 |     MPI_Type_vector(a, b, N, MPI_DOUBLE, &SubMat_B);
 97 |     MPI_Type_commit(&SubMat_B);
 98 |     if (id_procs == 0)
 99 |     {
100 |         for (int i = 0; i < rows; i++)
101 |         {
102 |             for (int j = 0; j < cols; j++)
103 |             {
104 |                 if (i == 0 && j == 0)
105 |                     continue;
106 |                 MPI_Recv(&B[IDX(a * i + 1, b * j + 1)], 1, SubMat_B, i * cols + j, 1, MPI_COMM_WORLD, &status);
107 |             }
108 |         }
109 |     }
110 |     else
111 |     {
112 |         int x = id_procs / cols;
113 |         int y = id_procs % cols;
114 |         MPI_Send(&B[IDX(1, 1)], 1, SubMat_B, 0, 1, MPI_COMM_WORLD);
115 |     }
116 | 
117 |     if (id_procs == 0)
118 |         if (check_ans(B, B2))
119 |         {
120 |             printf("Done.No Error\n");
121 |         }
122 |         else
123 |         {
124 |             printf("Error!\n");
125 |         }
126 |     MPI_Finalize();
127 |     return 0;
128 |     return 0;
129 | }
130 | 


--------------------------------------------------------------------------------
/mpi/5_1.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <math.h>
  6 | #define N 500
  7 | #define IDX(i, j) (((i)*N) + (j))
  8 | void compute(double *A, double *B, int num)
  9 | {
 10 |     for (int i = 1; i < N - 1; i++)
 11 |     {
 12 |         for (int j = 1; j < N - 1; j++)
 13 |         {
 14 |             B[IDX(i, j)] = (A[IDX(i - 1, j)] + A[IDX(i, j + 1)] + A[IDX(i + 1, j)] + A[IDX(i, j - 1)]) / 4.0;
 15 |         }
 16 |     }
 17 | }
 18 | void gen_rand_array(double *a, int num)
 19 | {
 20 |     for (int i = 0; i < num; i++)
 21 |     {
 22 |         srand(time(NULL));
 23 |         a[i] = rand() % 100;
 24 |     }
 25 | }
 26 | int check_ans(double *B, double *C)
 27 | {
 28 |     for (int i = 1; i < N - 1; i++)
 29 |     {
 30 |         for (int j = 1; j < N - 1; j++)
 31 |         {
 32 |             if (fabs(B[IDX(i, j)] - C[IDX(i, j)]) >= 1e-4)
 33 |             {
 34 |                 return 0;
 35 |             }
 36 |         }
 37 |     }
 38 |     return 1;
 39 | }
 40 | 
 41 | int main(int argc, char *argv[])
 42 | {
 43 |     int id_procs, num_procs, num_1;
 44 |     MPI_Status status;
 45 |     MPI_Init(&argc, &argv);
 46 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 47 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 48 |     double *A, *B, *B2;
 49 |     A = (double *)malloc(N * N * sizeof(double));
 50 |     B = (double *)malloc(N * N * sizeof(double));
 51 |     B2 = (double *)malloc(N * N * sizeof(double));
 52 |     num_1 = num_procs - 1;
 53 |     // Proc#N-1 randomize the data
 54 |     if (id_procs == num_1)
 55 |     {
 56 |         gen_rand_array(A, N * N);
 57 |         compute(A, B2, N * N);
 58 |     }
 59 | 
 60 |     MPI_Barrier(MPI_COMM_WORLD);
 61 |     int ctn = 0;
 62 |     for (int i = 0; i < N - 2; i++)
 63 |     {
 64 |         if (id_procs == num_1)
 65 |         {
 66 |             int dest = i % num_1;
 67 |             int tag = i / num_1;
 68 |             MPI_Send(&A[IDX(i, 0)], N * 3, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD);
 69 |         }
 70 |     }
 71 | 
 72 |     for (int i = 0; i < (N - 2) / num_1; i++)
 73 |     {
 74 |         if (id_procs != num_1)
 75 |         {
 76 |             MPI_Recv(&A[IDX(3 * ctn, 0)], 3 * N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status);
 77 |             ctn++;
 78 |         }
 79 |     }
 80 |     if (id_procs < (N - 2) % num_1)
 81 |     {
 82 |         MPI_Recv(&A[IDX(ctn * 3, 0)], 3 * N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status);
 83 |         ctn++;
 84 |     }
 85 | 
 86 |     // compute
 87 |     if (id_procs != num_1)
 88 |     {
 89 |         for (int i = 1; i <= 3 * ctn - 2; i += 3) //!  每三行计算一次
 90 |         {
 91 |             for (int j = 1; j < N - 1; j++)
 92 |             {
 93 |                 B[IDX((i + 2) / 3, j)] = (A[IDX(i - 1, j)] + A[IDX(i, j + 1)] + A[IDX(i + 1, j)] + A[IDX(i, j - 1)]) / 4.0;
 94 |             }
 95 |         }
 96 |     }
 97 | 
 98 |     // Gather
 99 |     for (int i = 0; i < N - 2; i++)
100 |     {
101 |         if (id_procs == num_1)
102 |         {
103 |             int src = i % num_1;
104 |             MPI_Recv(&B[IDX(i + 1, 1)], N - 2, MPI_DOUBLE, src, i / num_1 + N, MPI_COMM_WORLD, &status);
105 |         }
106 |         else
107 |         {
108 |             for (int j = 0; j < ctn; j++)
109 |                 MPI_Send(&B[IDX(j + 1, 1)], N - 2, MPI_DOUBLE, num_1, j + N, MPI_COMM_WORLD);
110 |         }
111 |     }
112 | 
113 |     if (id_procs == num_1)
114 |     {
115 |         if (check_ans(B, B2))
116 |         {
117 |             printf("Done.No Error\n");
118 |         }
119 |         else
120 |         {
121 |             printf("Error Occured!\n");
122 |         }
123 |     }
124 |     free(A);
125 |     free(B);
126 |     free(B2);
127 |     MPI_Finalize();
128 |     return 0;
129 | }
130 | 


--------------------------------------------------------------------------------
/omp/2.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | #define M 20
  7 | #define N 20
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | int check_ans(int *a, int *b, int num)
 18 | {
 19 |     for (int i = 0; i < num; i++)
 20 |     {
 21 |         if (a[i] != b[i])
 22 |         {
 23 |             printf("%d\n", i);
 24 |             return 0;
 25 |         }
 26 |     }
 27 |     return 1;
 28 | }
 29 | 
 30 | void copy_array(int *dst, int *src, int num)
 31 | {
 32 |     for (int i = 0; i < num; i++)
 33 |     {
 34 |         dst[i] = src[i];
 35 |     }
 36 | }
 37 | 
 38 | int loop1()
 39 | {
 40 |     int n = (M + 2) * N;
 41 | 
 42 |     int A[n];
 43 |     int B[n];
 44 |     int C = 41734;
 45 | 
 46 |     omp_set_num_threads(4);
 47 | 
 48 |     random_init(A, n);
 49 |     copy_array(B, A, n);
 50 | 
 51 |     clock_t start = clock();
 52 |     for (int i = 1; i <= M; i++)
 53 |     {
 54 |         for (int j = 1; j < N; j++)
 55 |         {
 56 |             A[IN(i + 1, j + 1, N)] = A[IN(i, j, N)] + C;
 57 |         }
 58 |     }
 59 |     clock_t end = clock();
 60 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 61 | 
 62 |     start = clock();
 63 |     for (int i = 1; i <= M; i++)
 64 |     {
 65 | #pragma omp parallel for
 66 |         for (int j = 1; j < N; j++)
 67 |         {
 68 |             B[IN(i + 1, j + 1, N)] = B[IN(i, j, N)] + C;
 69 |         }
 70 |     }
 71 |     end = clock();
 72 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 73 |     return check_ans(A, B, n);
 74 | }
 75 | 
 76 | int loop2()
 77 | {
 78 |     int X[101];
 79 |     int X2[101];
 80 |     int Y[201];
 81 |     int Y2[201];
 82 |     int B[101];
 83 |     int B2[101];
 84 |     int n = 110 * 110;
 85 |     int A[n], C[n], A2[n];
 86 |     random_init(A, n);
 87 |     random_init(C, n);
 88 |     random_init(Y, 201);
 89 |     // random_init(B, 101);
 90 |     copy_array(B2, B, 101);
 91 |     copy_array(X2, X, 101);
 92 |     copy_array(A2, A, n);
 93 |     copy_array(Y2, Y, 201);
 94 | 
 95 |     omp_set_num_threads(4);
 96 |     clock_t start = clock();
 97 | 
 98 |     for (int i = 1; i <= 100; i++)
 99 |     {
100 |         X2[i] = Y2[i] + 10;
101 |         for (int j = 1; j <= 100; j++)
102 |         {
103 |             B2[j] = A2[IN(j, N, 110)];
104 |             for (int k = 1; k <= 100; k++)
105 |             {
106 |                 A2[IN(j + 1, k, 110)] = B2[j] + C[IN(j, k, 110)];
107 |             }
108 |             Y2[i + j] = A2[IN(j + 1, N, 110)];
109 |         }
110 |     }
111 |     clock_t end = clock();
112 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
113 | 
114 |     start = clock();
115 |     // #pragma omp parallel for
116 |     for (int i = 1; i <= 100; i++)
117 |     {
118 |         // #pragma omp parallel for
119 |         for (int j = 1; j <= 100; j++)
120 |         {
121 |             B[j] = A[IN(j, N, 110)];
122 | #pragma omp parallel for
123 |             for (int k = 1; k <= 100; k++)
124 |             {
125 |                 A[IN(j + 1, k, 110)] = B[j] + C[IN(j, k, 110)];
126 |             }
127 |         }
128 |     }
129 | 
130 |     for (int i = 1; i <= 100; i++)
131 |     {
132 | #pragma omp parallel for
133 |         for (int j = 1; j <= 100; j++)
134 |         {
135 |             Y[i + j] = A[IN(j + 1, N, 110)];
136 |         }
137 |     }
138 | 
139 | #pragma omp parallel for
140 |     for (int i = 1; i <= 100; i++)
141 |     {
142 |         X[i] = Y[i] + 10;
143 |     }
144 | 
145 |     end = clock();
146 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
147 |     return check_ans(A, A2, n) && check_ans(B, B2, 100) && check_ans(X, X2, 100);
148 | }
149 | 
150 | int main()
151 | {
152 |     if (loop1())
153 |         printf("loop1 done!\n");
154 |     else
155 |         printf("loop1 error!\n");
156 |     if (loop2())
157 |         printf("loop2 done!\n");
158 |     else
159 |         printf("loop2 error!\n");
160 | }


--------------------------------------------------------------------------------
/omp/2_1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | #define M 20
  7 | #define N 20
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | int check_ans(int *a, int *b, int num)
 18 | {
 19 |     for (int i = 0; i < num; i++)
 20 |     {
 21 |         if (a[i] != b[i])
 22 |         {
 23 |             printf("%d\n", i);
 24 |             return 0;
 25 |         }
 26 |     }
 27 |     return 1;
 28 | }
 29 | 
 30 | void copy_array(int *dst, int *src, int num)
 31 | {
 32 |     for (int i = 0; i < num; i++)
 33 |     {
 34 |         dst[i] = src[i];
 35 |     }
 36 | }
 37 | 
 38 | int loop1()
 39 | {
 40 |     int n = (M + 2) * N;
 41 | 
 42 |     int A[n];
 43 |     int B[n];
 44 |     int C = 41734;
 45 | 
 46 |     omp_set_num_threads(4);
 47 | 
 48 |     random_init(A, n);
 49 |     copy_array(B, A, n);
 50 | 
 51 |     clock_t start = clock();
 52 |     for (int i = 1; i <= M; i++)
 53 |     {
 54 |         for (int j = 1; j < N; j++)
 55 |         {
 56 |             A[IN(i + 1, j + 1, N)] = A[IN(i, j, N)] + C;
 57 |         }
 58 |     }
 59 |     clock_t end = clock();
 60 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 61 | 
 62 |     start = clock();
 63 |     for (int i = 1; i <= M; i++)
 64 |     {
 65 | #pragma omp parallel for
 66 |         for (int j = 1; j < N; j++)
 67 |         {
 68 |             B[IN(i + 1, j + 1, N)] = B[IN(i, j, N)] + C;
 69 |         }
 70 |     }
 71 |     end = clock();
 72 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 73 |     return check_ans(A, B, n);
 74 | }
 75 | 
 76 | int loop2()
 77 | {
 78 |     int X[101];
 79 |     int X2[101];
 80 |     int Y[201];
 81 |     int Y2[201];
 82 |     int B[101];
 83 |     int B2[101];
 84 |     int n = 110 * 110;
 85 |     int A[n], C[n], A2[n];
 86 |     random_init(A, n);
 87 |     random_init(C, n);
 88 |     random_init(Y, 201);
 89 |     // random_init(B, 101);
 90 |     copy_array(B2, B, 101);
 91 |     copy_array(X2, X, 101);
 92 |     copy_array(A2, A, n);
 93 |     copy_array(Y2, Y, 201);
 94 | 
 95 |     omp_set_num_threads(4);
 96 |     clock_t start = clock();
 97 | 
 98 |     for (int i = 1; i <= 100; i++)
 99 |     {
100 |         X2[i] = Y2[i] + 10;
101 |         for (int j = 1; j <= 100; j++)
102 |         {
103 |             B2[j] = A2[IN(j, N, 110)];
104 |             for (int k = 1; k <= 100; k++)
105 |             {
106 |                 A2[IN(j + 1, k, 110)] = B2[j] + C[IN(j, k, 110)];
107 |             }
108 |             Y2[i + j] = A2[IN(j + 1, N, 110)];
109 |         }
110 |     }
111 |     clock_t end = clock();
112 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
113 | 
114 |     start = clock();
115 |     // #pragma omp parallel for
116 |     for (int i = 1; i <= 100; i++)
117 |     {
118 |         // #pragma omp parallel for
119 |         for (int j = 1; j <= 100; j++)
120 |         {
121 |             B[j] = A[IN(j, N, 110)];
122 | #pragma omp parallel for
123 |             for (int k = 1; k <= 100; k++)
124 |             {
125 |                 A[IN(j + 1, k, 110)] = B[j] + C[IN(j, k, 110)];
126 |             }
127 |         }
128 |     }
129 | 
130 |     for (int i = 1; i <= 100; i++)
131 |     {
132 | #pragma omp parallel for
133 |         for (int j = 1; j <= 100; j++)
134 |         {
135 |             Y[i + j] = A[IN(j + 1, N, 110)];
136 |         }
137 |     }
138 | 
139 | #pragma omp parallel for
140 |     for (int i = 1; i <= 100; i++)
141 |     {
142 |         X[i] = Y[i] + 10;
143 |     }
144 | 
145 |     end = clock();
146 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
147 |     return check_ans(A, A2, n) && check_ans(B, B2, 100) && check_ans(X, X2, 100);
148 | }
149 | 
150 | int main()
151 | {
152 |     if (loop1())
153 |         printf("loop1 done!\n");
154 |     else
155 |         printf("loop1 error!\n");
156 |     if (loop2())
157 |         printf("loop2 done!\n");
158 |     else
159 |         printf("loop2 error!\n");
160 | }


--------------------------------------------------------------------------------
/omp/4.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | #define min(i, j) (((i) < (j)) ? (i) : (j))
  7 | #define max(i, j) (((i) > (j)) ? (i) : (j))
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | int check_ans(int *a, int *b, int num)
 18 | {
 19 |     for (int i = 0; i < num; i++)
 20 |     {
 21 |         if (a[i] != b[i])
 22 |         {
 23 |             printf("%d\n", i);
 24 |             return 0;
 25 |         }
 26 |     }
 27 | 
 28 |     return 1;
 29 | }
 30 | 
 31 | void copy_array(int *dst, int *src, int num)
 32 | {
 33 |     for (int i = 0; i < num; i++)
 34 |     {
 35 |         dst[i] = src[i];
 36 |     }
 37 | }
 38 | 
 39 | int loop1()
 40 | {
 41 |     int i, j, k;
 42 |     int n = 20;
 43 |     int B[n * n];
 44 |     int B2[n * n];
 45 |     int B3[n * n];
 46 | 
 47 |     omp_set_num_threads(4);
 48 |     random_init(B2, n * n);
 49 |     copy_array(B, B2, n * n);
 50 |     copy_array(B3, B2, n * n);
 51 |     clock_t start, end;
 52 |     start = clock();
 53 |     for (i = 2; i <= 10; i++)
 54 |     {
 55 |         for (j = i; j <= 10; j++)
 56 |         {
 57 |             B[IN(i, j, 20)] = (B[IN(i, j - 1, 20)] + B[IN(i - 1, j, 20)]) * 0.5;
 58 |         }
 59 |     }
 60 |     end = clock();
 61 | 
 62 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 63 | 
 64 |     start = clock();
 65 |     for (i = 4; i <= 20; i++)
 66 |     {
 67 | #pragma omp parallel for
 68 |         for (j = max(2, i - 10); j <= min(i / 2, 10); j++)
 69 |         {
 70 |             B3[IN(j, i - j, 20)] = (B3[IN(j, i - j - 1, 20)] + B3[IN(j - 1, i - j, 20)]) * 0.5;
 71 |         }
 72 |     }
 73 |     end = clock();
 74 |     printf("openmp diagonal parallel loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 75 |     return check_ans(B, B3, n * n);
 76 | }
 77 | 
 78 | int loop2()
 79 | {
 80 |     int i;
 81 |     int A[20];
 82 |     int B[20];
 83 |     int A2[20];
 84 |     random_init(A, 20);
 85 |     copy_array(A2, A, 20);
 86 | 
 87 |     omp_set_num_threads(4);
 88 | 
 89 |     clock_t start, end;
 90 | 
 91 |     start = clock();
 92 | 
 93 |     for (int i = 1; i <= 16; i++)
 94 |     {
 95 |         A[i + 3] = A[i] + B[i];
 96 |     }
 97 |     end = clock();
 98 | 
 99 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
100 | 
101 |     start = clock();
102 | 
103 |     for (int k = 1; k <= 16; k += 3)
104 |     {
105 | #pragma omp parallel for
106 |         for (int i = k; i <= min(16, k + 2); i++)
107 |         {
108 |             A2[i + 3] = A2[i] + B[i];
109 |         }
110 |     }
111 |     end = clock();
112 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
113 |     return check_ans(A2, A, 20);
114 | }
115 | 
116 | int loop3()
117 | {
118 |     int i, j, k;
119 |     int A[20];
120 |     int B[20];
121 |     int A2[20];
122 |     random_init(A, 20);
123 |     copy_array(A2, A, 20);
124 | 
125 |     omp_set_num_threads(4);
126 | 
127 |     clock_t start, end;
128 | 
129 |     start = clock();
130 |     for (k = 1; k <= 16; k += 5)
131 |     {
132 |         for (i = k; i <= min(16, k + 4); i++)
133 |         {
134 |             A[i + 3] = A[i] + B[i];
135 |         }
136 |     }
137 |     end = clock();
138 | 
139 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
140 | 
141 |     start = clock();
142 | 
143 |     for (int k = 1; k <= 16; k += 3)
144 |     {
145 | #pragma omp parallel for
146 |         for (int i = k; i <= min(16, k + 2); i++)
147 |         {
148 |             A2[i + 3] = A2[i] + B[i];
149 |         }
150 |     }
151 |     end = clock();
152 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
153 |     return check_ans(A2, A, 20);
154 | }
155 | 
156 | int main()
157 | {
158 |     if (loop1())
159 |         printf("loop1 done!\n");
160 |     else
161 |         printf("loop1 error!\n");
162 |     if (loop2())
163 |         printf("loop2 done!\n");
164 |     else
165 |         printf("loop2 error!\n");
166 |     if (loop3())printf("loop3 done\n");
167 | }
168 | 


--------------------------------------------------------------------------------
/omp/4_1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | #define min(i, j) (((i) < (j)) ? (i) : (j))
  7 | #define max(i, j) (((i) > (j)) ? (i) : (j))
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | int check_ans(int *a, int *b, int num)
 18 | {
 19 |     for (int i = 0; i < num; i++)
 20 |     {
 21 |         if (a[i] != b[i])
 22 |         {
 23 |             printf("%d\n", i);
 24 |             return 0;
 25 |         }
 26 |     }
 27 | 
 28 |     return 1;
 29 | }
 30 | 
 31 | void copy_array(int *dst, int *src, int num)
 32 | {
 33 |     for (int i = 0; i < num; i++)
 34 |     {
 35 |         dst[i] = src[i];
 36 |     }
 37 | }
 38 | 
 39 | int loop1()
 40 | {
 41 |     int i, j, k;
 42 |     int n = 20;
 43 |     int B[n * n];
 44 |     int B2[n * n];
 45 |     int B3[n * n];
 46 | 
 47 |     omp_set_num_threads(4);
 48 |     random_init(B2, n * n);
 49 |     copy_array(B, B2, n * n);
 50 |     copy_array(B3, B2, n * n);
 51 |     clock_t start, end;
 52 |     start = clock();
 53 |     for (i = 2; i <= 10; i++)
 54 |     {
 55 |         for (j = i; j <= 10; j++)
 56 |         {
 57 |             B[IN(i, j, 20)] = (B[IN(i, j - 1, 20)] + B[IN(i - 1, j, 20)]) * 0.5;
 58 |         }
 59 |     }
 60 |     end = clock();
 61 | 
 62 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 63 | 
 64 |     start = clock();
 65 |     for (i = 4; i <= 20; i++)
 66 |     {
 67 | #pragma omp parallel for
 68 |         for (j = max(2, i - 10); j <= min(i / 2, 10); j++)
 69 |         {
 70 |             B3[IN(j, i - j, 20)] = (B3[IN(j, i - j - 1, 20)] + B3[IN(j - 1, i - j, 20)]) * 0.5;
 71 |         }
 72 |     }
 73 |     end = clock();
 74 |     printf("openmp diagonal parallel loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 75 |     return check_ans(B, B3, n * n);
 76 | }
 77 | 
 78 | int loop2()
 79 | {
 80 |     int i;
 81 |     int A[20];
 82 |     int B[20];
 83 |     int A2[20];
 84 |     random_init(A, 20);
 85 |     copy_array(A2, A, 20);
 86 | 
 87 |     omp_set_num_threads(4);
 88 | 
 89 |     clock_t start, end;
 90 | 
 91 |     start = clock();
 92 | 
 93 |     for (int i = 1; i <= 16; i++)
 94 |     {
 95 |         A[i + 3] = A[i] + B[i];
 96 |     }
 97 |     end = clock();
 98 | 
 99 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
100 | 
101 |     start = clock();
102 | 
103 |     for (int k = 1; k <= 16; k += 3)
104 |     {
105 | #pragma omp parallel for
106 |         for (int i = k; i <= min(16, k + 2); i++)
107 |         {
108 |             A2[i + 3] = A2[i] + B[i];
109 |         }
110 |     }
111 |     end = clock();
112 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
113 |     return check_ans(A2, A, 20);
114 | }
115 | 
116 | int loop3()
117 | {
118 |     int i, j, k;
119 |     int A[20];
120 |     int B[20];
121 |     int A2[20];
122 |     random_init(A, 20);
123 |     copy_array(A2, A, 20);
124 | 
125 |     omp_set_num_threads(4);
126 | 
127 |     clock_t start, end;
128 | 
129 |     start = clock();
130 |     for (k = 1; k <= 16; k += 5)
131 |     {
132 |         for (i = k; i <= min(16, k + 4); i++)
133 |         {
134 |             A[i + 3] = A[i] + B[i];
135 |         }
136 |     }
137 |     end = clock();
138 | 
139 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
140 | 
141 |     start = clock();
142 | 
143 |     for (int k = 1; k <= 16; k += 3)
144 |     {
145 | #pragma omp parallel for
146 |         for (int i = k; i <= min(16, k + 2); i++)
147 |         {
148 |             A2[i + 3] = A2[i] + B[i];
149 |         }
150 |     }
151 |     end = clock();
152 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
153 |     return check_ans(A2, A, 20);
154 | }
155 | 
156 | int main()
157 | {
158 |     if (loop1())
159 |         printf("loop1 done!\n");
160 |     else
161 |         printf("loop1 error!\n");
162 |     if (loop2())
163 |         printf("loop2 done!\n");
164 |     else
165 |         printf("loop2 error!\n");
166 |     if (loop3())printf("loop3 done\n");
167 | }
168 | 


--------------------------------------------------------------------------------
/omp/1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | void random_init(int *a, int num)
  7 | {
  8 |     srand(time(NULL));
  9 |     for (int i = 0; i < num; i++)
 10 |     {
 11 |         a[i] = rand() % 1000 - 500;
 12 |     }
 13 | }
 14 | 
 15 | int check_ans(int *a, int *b, int num)
 16 | {
 17 |     for (int i = 0; i < num; i++)
 18 |     {
 19 |         if (a[i] != b[i]){
 20 | 	printf("%d\n",i);
 21 |             return 0;
 22 | 	}}
 23 |     return 1;
 24 | }
 25 | 
 26 | void copy_array(int *dst, int *src, int num)
 27 | {
 28 |     for (int i = 0; i < num; i++)
 29 |     {
 30 |         dst[i] = src[i];
 31 |     }
 32 | }
 33 | 
 34 | int loop1()
 35 | {
 36 |     int A[256];
 37 |     int B[256]; // 可初始化相同的值然后检查两个数组是否相同。
 38 |     omp_set_num_threads(4);
 39 |     random_init(A, 256);
 40 |     copy_array(B, A, 256);
 41 |     clock_t start = clock();
 42 |     for (int i = 2; i <= 10; i++)
 43 |     {
 44 |         for (int j = 2; j <= 10; j++)
 45 |         {
 46 |             A[IN(i, j, 16)] = 0.5 * (A[IN(i - 1, j - 1, 16)] + A[IN(i + 1, j + 1, 16)]);
 47 |         }
 48 |     }
 49 |     clock_t end = clock();
 50 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 51 | 
 52 |     start = clock();
 53 |     for (int i = 2; i <= 10; i++)
 54 |     {
 55 | #pragma omp parallel for
 56 |         for (int j = 2; j <= 10; j++)
 57 |         {
 58 |             B[IN(i, j, 16)] = 0.5 * (B[IN(i - 1, j - 1, 16)] + B[IN(i + 1, j + 1, 16)]);
 59 |         }
 60 |     }
 61 |     end = clock();
 62 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 63 |     return check_ans(A, B, 256);
 64 | }
 65 | 
 66 | int loop2()
 67 | {
 68 | 
 69 |     int A[100];
 70 |     int B[100];
 71 |     int A2[100];
 72 |     int B2[100];
 73 |     random_init(A, 100);
 74 |     random_init(B, 100);
 75 |     copy_array(A2, A, 100);
 76 |     copy_array(B2, B, 100);
 77 | 
 78 |     int i;
 79 |     clock_t start = clock();
 80 |     for (i = 2; i <= 20; i++)
 81 |     {
 82 |         A[2 * i + 2] = A[2 * i - 2] + B[i];
 83 |     }
 84 |     clock_t end = clock();
 85 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 86 | 
 87 |     start = clock();
 88 | 
 89 | #pragma omp parallel num_threads(2) private(i)
 90 |     {
 91 |         int tid = omp_get_thread_num();
 92 |         for (i = 2 + tid; i <= 20; i += 2)
 93 |         {
 94 |             A2[2 * i + 2] = A2[2 * i - 2] + B2[i];
 95 |         }
 96 |     }
 97 | 
 98 |     end = clock();
 99 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
100 |     return check_ans(A, A2, 100);
101 | }
102 | 
103 | int loop3()
104 | {
105 |     int A[100];
106 |     int B[100];
107 |     int C[100];
108 |     int D[100];
109 |     int B2[100];
110 |     int C2[100];
111 |     random_init(A, 100);
112 |     random_init(B, 100);
113 |     random_init(C, 100);
114 |     copy_array(B2, B, 100);
115 |     copy_array(C2, C, 100);
116 |     int k;
117 |     clock_t start = clock();
118 |     for (int i = 2; i < 20; i++)
119 |     {
120 |         if (A[i] > 0)
121 |             B[i] = C[i - 1] + 1;
122 |         else
123 |             C[i] = B[i] - 1;
124 |     }
125 |     clock_t end = clock();
126 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
127 | 
128 |     start = clock();
129 |     D[0]=2;
130 |     int m = 1;
131 |     for (int i = 2; i < 20; i++)
132 |     {
133 |         if ((A[i - 1] < 0) && (A[i] > 0))
134 |         {
135 |             D[m] = i;
136 |             m++;
137 |         }
138 |     }
139 |     D[m] = 20;
140 |     for (int i = 0; i < m; i++)
141 |     {
142 | #pragma omp parallel for
143 |         for (k = D[i]; k < D[i + 1]; k++)
144 |         {
145 |             if (A[k] > 0)
146 |             {
147 |                 B2[k] = C2[k - 1] + 1;
148 |             }
149 |             else
150 |             {
151 |                 C2[k] = B2[k] - 1;
152 |             }
153 |         }
154 |     }
155 |     end = clock();
156 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
157 |     return check_ans(C, C2, 100) && check_ans(B, B2, 100);
158 | }
159 | 
160 | int main()
161 | {
162 |     omp_set_num_threads(4);
163 |     if (loop1()) printf("loop1 done!\n");
164 |     if (loop2()) printf("loop2 done!\n");
165 |     if (loop3()) printf("loop3 done!\n");
166 | }
167 | 


--------------------------------------------------------------------------------
/omp/1_1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | void random_init(int *a, int num)
  7 | {
  8 |     srand(time(NULL));
  9 |     for (int i = 0; i < num; i++)
 10 |     {
 11 |         a[i] = rand() % 1000 - 500;
 12 |     }
 13 | }
 14 | 
 15 | int check_ans(int *a, int *b, int num)
 16 | {
 17 |     for (int i = 0; i < num; i++)
 18 |     {
 19 |         if (a[i] != b[i]){
 20 | 	printf("%d\n",i);
 21 |             return 0;
 22 | 	}}
 23 |     return 1;
 24 | }
 25 | 
 26 | void copy_array(int *dst, int *src, int num)
 27 | {
 28 |     for (int i = 0; i < num; i++)
 29 |     {
 30 |         dst[i] = src[i];
 31 |     }
 32 | }
 33 | 
 34 | int loop1()
 35 | {
 36 |     int A[256];
 37 |     int B[256]; // 可初始化相同的值然后检查两个数组是否相同。
 38 |     omp_set_num_threads(4);
 39 |     random_init(A, 256);
 40 |     copy_array(B, A, 256);
 41 |     clock_t start = clock();
 42 |     for (int i = 2; i <= 10; i++)
 43 |     {
 44 |         for (int j = 2; j <= 10; j++)
 45 |         {
 46 |             A[IN(i, j, 16)] = 0.5 * (A[IN(i - 1, j - 1, 16)] + A[IN(i + 1, j + 1, 16)]);
 47 |         }
 48 |     }
 49 |     clock_t end = clock();
 50 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 51 | 
 52 |     start = clock();
 53 |     for (int i = 2; i <= 10; i++)
 54 |     {
 55 | #pragma omp parallel for
 56 |         for (int j = 2; j <= 10; j++)
 57 |         {
 58 |             B[IN(i, j, 16)] = 0.5 * (B[IN(i - 1, j - 1, 16)] + B[IN(i + 1, j + 1, 16)]);
 59 |         }
 60 |     }
 61 |     end = clock();
 62 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 63 |     return check_ans(A, B, 256);
 64 | }
 65 | 
 66 | int loop2()
 67 | {
 68 | 
 69 |     int A[100];
 70 |     int B[100];
 71 |     int A2[100];
 72 |     int B2[100];
 73 |     random_init(A, 100);
 74 |     random_init(B, 100);
 75 |     copy_array(A2, A, 100);
 76 |     copy_array(B2, B, 100);
 77 | 
 78 |     int i;
 79 |     clock_t start = clock();
 80 |     for (i = 2; i <= 20; i++)
 81 |     {
 82 |         A[2 * i + 2] = A[2 * i - 2] + B[i];
 83 |     }
 84 |     clock_t end = clock();
 85 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 86 | 
 87 |     start = clock();
 88 | 
 89 | #pragma omp parallel num_threads(2) private(i)
 90 |     {
 91 |         int tid = omp_get_thread_num();
 92 |         for (i = 2 + tid; i <= 20; i += 2)
 93 |         {
 94 |             A2[2 * i + 2] = A2[2 * i - 2] + B2[i];
 95 |         }
 96 |     }
 97 | 
 98 |     end = clock();
 99 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
100 |     return check_ans(A, A2, 100);
101 | }
102 | 
103 | int loop3()
104 | {
105 |     int A[100];
106 |     int B[100];
107 |     int C[100];
108 |     int D[100];
109 |     int B2[100];
110 |     int C2[100];
111 |     random_init(A, 100);
112 |     random_init(B, 100);
113 |     random_init(C, 100);
114 |     copy_array(B2, B, 100);
115 |     copy_array(C2, C, 100);
116 |     int k;
117 |     clock_t start = clock();
118 |     for (int i = 2; i < 20; i++)
119 |     {
120 |         if (A[i] > 0)
121 |             B[i] = C[i - 1] + 1;
122 |         else
123 |             C[i] = B[i] - 1;
124 |     }
125 |     clock_t end = clock();
126 |     printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
127 | 
128 |     start = clock();
129 |     D[0]=2;
130 |     int m = 1;
131 |     for (int i = 2; i < 20; i++)
132 |     {
133 |         if ((A[i - 1] < 0) && (A[i] > 0))
134 |         {
135 |             D[m] = i;
136 |             m++;
137 |         }
138 |     }
139 |     D[m] = 20;
140 |     for (int i = 0; i < m; i++)
141 |     {
142 | #pragma omp parallel for
143 |         for (k = D[i]; k < D[i + 1]; k++)
144 |         {
145 |             if (A[k] > 0)
146 |             {
147 |                 B2[k] = C2[k - 1] + 1;
148 |             }
149 |             else
150 |             {
151 |                 C2[k] = B2[k] - 1;
152 |             }
153 |         }
154 |     }
155 |     end = clock();
156 |     printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
157 |     return check_ans(C, C2, 100) && check_ans(B, B2, 100);
158 | }
159 | 
160 | int main()
161 | {
162 |     omp_set_num_threads(4);
163 |     if (loop1()) printf("loop1 done!\n");
164 |     if (loop2()) printf("loop2 done!\n");
165 |     if (loop3()) printf("loop3 done!\n");
166 | }
167 | 


--------------------------------------------------------------------------------
/omp/3.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #include <math.h>
  6 | 
  7 | #define IN(i, j, line) ((i)*line + j)
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | void random_init_f(float *a, int num)
 18 | {
 19 |     srand(time(NULL));
 20 |     for (int i = 0; i < num; i++)
 21 |     {
 22 |         a[i] = (float)(rand() % 1000 - 500);
 23 |     }
 24 | }
 25 | 
 26 | int check_ans(int *a, int *b, int num)
 27 | {
 28 |     for (int i = 0; i < num; i++)
 29 |     {
 30 |         if (a[i] != b[i])
 31 |         {
 32 |             printf("%d\n", i);
 33 |             return 0;
 34 |         }
 35 |     }
 36 | 
 37 |     return 1;
 38 | }
 39 | 
 40 | int check_ans_f(float *a, float *b, int num)
 41 | {
 42 |     for (int i = 0; i < num; i++)
 43 |     {
 44 |         if (fabs(a[i] - b[i]) > 0.000001)
 45 |         {
 46 |             printf("%d\n", i);
 47 |            printf("%f %f \n", a[i], b[i]);
 48 | 	    return 0;
 49 |         }
 50 |     }
 51 | 
 52 |     return 1;
 53 | }
 54 | 
 55 | void copy_array(int *dst, int *src, int num)
 56 | {
 57 |     for (int i = 0; i < num; i++)
 58 |     {
 59 |         dst[i] = src[i];
 60 |     }
 61 | }
 62 | 
 63 | void copy_array_f(float *dst, float *src, int num)
 64 | {
 65 |     for (int i = 0; i < num; i++)
 66 |     {
 67 |         dst[i] = src[i];
 68 |     }
 69 | }
 70 | 
 71 | int loop1()//此循环不能并行化，见第一次作业，会输出error
 72 | {
 73 |     int i, j, k;
 74 |     int n = 500;
 75 |     int A[n * n];
 76 |     int B[n * n];
 77 |     int A2[n * n];
 78 |     int B2[n * n];
 79 |     clock_t start, end;
 80 |     random_init(A, n * n);
 81 |     random_init(B, n * n);
 82 |     copy_array(A2, A, n * n);
 83 |     copy_array(B2, B, n * n);
 84 | 
 85 |     omp_set_num_threads(4);
 86 | 
 87 |     start = clock();
 88 |     for (i = 1; i <= 100; i++)
 89 |     {
 90 |         for (j = 1; j <= 50; j++)
 91 |         {
 92 |             A[IN(3 * i + 2, 2 * j - 1, 500)] = A[IN(5 * j, i + 3, 500)] + 2;
 93 |         }
 94 |     }
 95 |     end = clock();
 96 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 97 | 
 98 |     start = clock();
 99 |     for (i = 1; i <= 100; i++)
100 |     {
101 | #pragma omp parallel for
102 |         for (j = 1; j <= 50; j++)
103 |         {
104 |            A2[IN(3 * i + 2, 2 * j - 1, 500)] = A2[IN(5 * j, i + 3, 500)] + 2;
105 |         }
106 |     }
107 | 
108 |     end = clock();
109 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
110 | 
111 |     return check_ans(A2, A, n * n);
112 | }
113 | 
114 | int loop2()
115 | {
116 |     int i, j, k;
117 |     float x, y, z;
118 |     x =5;
119 |     y = 10;
120 |     z = 20;
121 |     float z2 = z;
122 |     clock_t start, end;
123 | 
124 |     float A[200], B[200], C[200], D[10000];
125 |     random_init_f(A, 200);
126 |     random_init_f(B, 200);
127 |     random_init_f(C, 200);
128 |     random_init_f(D, 10000);
129 | 
130 |     float A2[200], B2[200], C2[200], D2[10000];
131 |     copy_array_f(A2, A, 200);
132 |     copy_array_f(B2, B, 200);
133 |     copy_array_f(C2, C, 200);
134 |     copy_array_f(D2, D, 10000);
135 | 
136 |     // copy_array(W, A, 100);
137 |     omp_set_num_threads(4);
138 |     start = clock();
139 |     x = y * 2;
140 |     for (i = 1; i <= 100; i++)
141 |     {
142 |         C[i] = B[i] + x;
143 |         A[i] = C[i - 1] + z;
144 |         C[i + 1] = A[i] * B[i];
145 |         for (j = 1; j <= 50; j++)
146 |         {
147 |             D[IN(i, j, 60)] = D[IN(i, j - 1, 60)] + x;
148 |         }
149 |     }
150 |     z = y + 4;
151 |     end = clock();
152 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
153 | 
154 |     start = clock();
155 |     x = y * 2;
156 | #pragma omp parallel for
157 |     for (i = 1; i <= 100; i++)
158 |     {
159 |         C2[i] = B2[i] + x;
160 |     }
161 | #pragma omp parallel for private(i, j)
162 |     for (i = 1; i <= 100; i++)
163 |     {
164 |         A2[i] = C2[i - 1] + z2;
165 |         for (j = 1; j <= 50; j++)
166 |         {
167 |             D2[IN(i, j, 60)] = D2[IN(i, j - 1, 60)] + x;
168 |         }
169 |     }
170 |     C2[101] = A2[100] *  B2[100];
171 |     z2 = y + 4;
172 |     end = clock();
173 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
174 |     return check_ans_f(D2, D, 200)&&check_ans_f(C2, C, 200);
175 | }
176 | 
177 | int main()
178 | {
179 |     if (loop1())
180 |         printf("loop1 done\n");
181 |     else
182 |         printf("loop1 error\n");
183 | 
184 |     if(loop2()) printf("loop2 done\n");
185 | }
186 | 


--------------------------------------------------------------------------------
/omp/3_1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #include <math.h>
  6 | 
  7 | #define IN(i, j, line) ((i)*line + j)
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | void random_init_f(float *a, int num)
 18 | {
 19 |     srand(time(NULL));
 20 |     for (int i = 0; i < num; i++)
 21 |     {
 22 |         a[i] = (float)(rand() % 1000 - 500);
 23 |     }
 24 | }
 25 | 
 26 | int check_ans(int *a, int *b, int num)
 27 | {
 28 |     for (int i = 0; i < num; i++)
 29 |     {
 30 |         if (a[i] != b[i])
 31 |         {
 32 |             printf("%d\n", i);
 33 |             return 0;
 34 |         }
 35 |     }
 36 | 
 37 |     return 1;
 38 | }
 39 | 
 40 | int check_ans_f(float *a, float *b, int num)
 41 | {
 42 |     for (int i = 0; i < num; i++)
 43 |     {
 44 |         if (fabs(a[i] - b[i]) > 0.000001)
 45 |         {
 46 |             printf("%d\n", i);
 47 |            printf("%f %f \n", a[i], b[i]);
 48 | 	    return 0;
 49 |         }
 50 |     }
 51 | 
 52 |     return 1;
 53 | }
 54 | 
 55 | void copy_array(int *dst, int *src, int num)
 56 | {
 57 |     for (int i = 0; i < num; i++)
 58 |     {
 59 |         dst[i] = src[i];
 60 |     }
 61 | }
 62 | 
 63 | void copy_array_f(float *dst, float *src, int num)
 64 | {
 65 |     for (int i = 0; i < num; i++)
 66 |     {
 67 |         dst[i] = src[i];
 68 |     }
 69 | }
 70 | 
 71 | int loop1()//此循环不能并行化，见第一次作业，会输出error
 72 | {
 73 |     int i, j, k;
 74 |     int n = 500;
 75 |     int A[n * n];
 76 |     int B[n * n];
 77 |     int A2[n * n];
 78 |     int B2[n * n];
 79 |     clock_t start, end;
 80 |     random_init(A, n * n);
 81 |     random_init(B, n * n);
 82 |     copy_array(A2, A, n * n);
 83 |     copy_array(B2, B, n * n);
 84 | 
 85 |     omp_set_num_threads(4);
 86 | 
 87 |     start = clock();
 88 |     for (i = 1; i <= 100; i++)
 89 |     {
 90 |         for (j = 1; j <= 50; j++)
 91 |         {
 92 |             A[IN(3 * i + 2, 2 * j - 1, 500)] = A[IN(5 * j, i + 3, 500)] + 2;
 93 |         }
 94 |     }
 95 |     end = clock();
 96 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 97 | 
 98 |     start = clock();
 99 |     for (i = 1; i <= 100; i++)
100 |     {
101 | #pragma omp parallel for
102 |         for (j = 1; j <= 50; j++)
103 |         {
104 |            A2[IN(3 * i + 2, 2 * j - 1, 500)] = A2[IN(5 * j, i + 3, 500)] + 2;
105 |         }
106 |     }
107 | 
108 |     end = clock();
109 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
110 | 
111 |     return check_ans(A2, A, n * n);
112 | }
113 | 
114 | int loop2()
115 | {
116 |     int i, j, k;
117 |     float x, y, z;
118 |     x =5;
119 |     y = 10;
120 |     z = 20;
121 |     float z2 = z;
122 |     clock_t start, end;
123 | 
124 |     float A[200], B[200], C[200], D[10000];
125 |     random_init_f(A, 200);
126 |     random_init_f(B, 200);
127 |     random_init_f(C, 200);
128 |     random_init_f(D, 10000);
129 | 
130 |     float A2[200], B2[200], C2[200], D2[10000];
131 |     copy_array_f(A2, A, 200);
132 |     copy_array_f(B2, B, 200);
133 |     copy_array_f(C2, C, 200);
134 |     copy_array_f(D2, D, 10000);
135 | 
136 |     // copy_array(W, A, 100);
137 |     omp_set_num_threads(4);
138 |     start = clock();
139 |     x = y * 2;
140 |     for (i = 1; i <= 100; i++)
141 |     {
142 |         C[i] = B[i] + x;
143 |         A[i] = C[i - 1] + z;
144 |         C[i + 1] = A[i] * B[i];
145 |         for (j = 1; j <= 50; j++)
146 |         {
147 |             D[IN(i, j, 60)] = D[IN(i, j - 1, 60)] + x;
148 |         }
149 |     }
150 |     z = y + 4;
151 |     end = clock();
152 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
153 | 
154 |     start = clock();
155 |     x = y * 2;
156 | #pragma omp parallel for
157 |     for (i = 1; i <= 100; i++)
158 |     {
159 |         C2[i] = B2[i] + x;
160 |     }
161 | #pragma omp parallel for private(i, j)
162 |     for (i = 1; i <= 100; i++)
163 |     {
164 |         A2[i] = C2[i - 1] + z2;
165 |         for (j = 1; j <= 50; j++)
166 |         {
167 |             D2[IN(i, j, 60)] = D2[IN(i, j - 1, 60)] + x;
168 |         }
169 |     }
170 |     C2[101] = A2[100] *  B2[100];
171 |     z2 = y + 4;
172 |     end = clock();
173 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
174 |     return check_ans_f(D2, D, 200)&&check_ans_f(C2, C, 200);
175 | }
176 | 
177 | int main()
178 | {
179 |     if (loop1())
180 |         printf("loop1 done\n");
181 |     else
182 |         printf("loop1 error\n");
183 | 
184 |     if(loop2()) printf("loop2 done\n");
185 | }
186 | 


--------------------------------------------------------------------------------
/Kmeans/serial.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | 
  5 | // Creates an array of random floats. Each number has a value from 0 - 1
  6 | float *create_rand_nums(const int num_elements)
  7 | {
  8 |     float *rand_nums = (float *)malloc(sizeof(float) * num_elements);
  9 |     assert(rand_nums != NULL);
 10 |     for (int i = 0; i < num_elements; i++)
 11 |     {
 12 |         rand_nums[i] = (rand() / (float)RAND_MAX);
 13 |     }
 14 |     return rand_nums;
 15 | }
 16 | 
 17 | // Distance**2 between d-vectors pointed to by v1, v2.
 18 | float distance2(const float *v1, const float *v2, const int d)
 19 | {
 20 |     float dist = 0.0;
 21 |     for (int i = 0; i < d; i++)
 22 |     {
 23 |         float diff = v1[i] - v2[i];
 24 |         dist += diff * diff;
 25 |     }
 26 |     return dist;
 27 | }
 28 | 
 29 | // Assign a site to the correct cluster by computing its distances to
 30 | // each cluster centroid.
 31 | int assign_site(const float *site, float *centroids,
 32 |                 const int k, const int d)
 33 | {
 34 |     int best_cluster = 0;
 35 |     float best_dist = distance2(site, centroids, d);
 36 |     float *centroid = centroids + d;
 37 |     for (int c = 1; c < k; c++, centroid += d)
 38 |     {
 39 |         float dist = distance2(site, centroid, d);
 40 |         if (dist < best_dist)
 41 |         {
 42 |             best_cluster = c;
 43 |             best_dist = dist;
 44 |         }
 45 |     }
 46 |     return best_cluster;
 47 | }
 48 | 
 49 | // Add a site (vector) into a sum of sites (vector).
 50 | void add_site(const float *site, float *sum, const int d)
 51 | {
 52 |     for (int i = 0; i < d; i++)
 53 |     {
 54 |         sum[i] += site[i];
 55 |     }
 56 | }
 57 | 
 58 | // Print the centroids one per line.
 59 | void print_centroids(float *centroids, const int k, const int d)
 60 | {
 61 |     float *p = centroids;
 62 |     printf("Centroids:\n");
 63 |     for (int i = 0; i < k; i++)
 64 |     {
 65 |         for (int j = 0; j < d; j++, p++)
 66 |         {
 67 |             printf("%f ", *p);
 68 |         }
 69 |         printf("\n");
 70 |     }
 71 | }
 72 | 
 73 | int main(int argc, char **argv)
 74 | {
 75 |     int sites_per_proc = atoi(argv[1]);
 76 |     int nprocs = 8;
 77 |     int k = atoi(argv[2]); // number of clusters.
 78 |     int d = atoi(argv[3]); // dimension of data.
 79 |     srand(31359);
 80 |     float *sums;
 81 |     assert(sums = malloc(k * d * sizeof(float)));
 82 |     // The number of sites assigned to each cluster by this process. k integers.
 83 |     int *counts;
 84 |     assert(counts = malloc(k * sizeof(int)));
 85 |     // The current centroids against which sites are being compared.
 86 |     // These are shipped to the process by the root process.
 87 |     float *centroids;
 88 |     assert(centroids = malloc(k * d * sizeof(float)));
 89 | 
 90 |     float *all_sites = NULL;
 91 |     int *labels;
 92 | 
 93 |     all_sites = create_rand_nums(d * sites_per_proc * nprocs);
 94 |     // Take the first k sites as the initial cluster centroids.
 95 |     for (int i = 0; i < k * d; i++)
 96 |     {
 97 |         centroids[i] = all_sites[i];
 98 |     }
 99 |     print_centroids(centroids, k, d);
100 |     assert(labels = malloc(nprocs * sites_per_proc * sizeof(int)));
101 | 
102 |     float norm = 1.0;
103 | 
104 |     while (norm > 0.00001)
105 |     { // While they've moved...
106 |         for (int i = 0; i < k * d; i++)
107 |             sums[i] = 0.0;
108 |         for (int i = 0; i < k; i++)
109 |             counts[i] = 0;
110 | 
111 |         // Find the closest centroid to each site and assign to cluster.
112 |         float *site = all_sites;
113 |         for (int i = 0; i < sites_per_proc * nprocs; i++, site += d)
114 |         {
115 |             int cluster = assign_site(site, centroids, k, d);
116 |             // Record the assignment of the site to the cluster.
117 |             counts[cluster]++;
118 |             add_site(site, &sums[cluster * d], d);
119 |         }
120 | 
121 |         for (int i = 0; i < k; i++)
122 |         {
123 |             for (int j = 0; j < d; j++)
124 |             {
125 |                 int dij = d * i + j;
126 |                 sums[dij] /= counts[i];
127 |             }
128 |         }
129 |         // Have the centroids changed much?
130 |         norm = distance2(sums, centroids, d * k);
131 |         printf("norm: %f\n", norm);
132 |         // Copy new centroids from grand_sums into centroids.
133 |         for (int i = 0; i < k * d; i++)
134 |         {
135 |             centroids[i] = sums[i];
136 |         }
137 |         print_centroids(centroids, k, d);
138 |     }
139 | 
140 |     // Now centroids are fixed, so compute a final label for each site.
141 |     float *site = all_sites;
142 |     for (int i = 0; i < sites_per_proc * nprocs; i++, site += d)
143 |     {
144 |         labels[i] = assign_site(site, centroids, k, d);
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/Kmeans/Kmeans_mpi.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <mpi.h>
  4 | #include <assert.h>
  5 | 
  6 | float *create_rand_nums(const int num_elements)
  7 | {
  8 |   float *rand_nums = (float *)malloc(sizeof(float) * num_elements);
  9 |   assert(rand_nums != NULL);
 10 |   for (int i = 0; i < num_elements; i++)
 11 |   {
 12 |     rand_nums[i] = (rand() / (float)RAND_MAX);
 13 |   }
 14 |   return rand_nums;
 15 | }
 16 | 
 17 | float distance2(const float *v1, const float *v2, const int d)
 18 | {
 19 |   float dist = 0.0;
 20 |   for (int i = 0; i < d; i++)
 21 |   {
 22 |     float diff = v1[i] - v2[i];
 23 |     dist += diff * diff;
 24 |   }
 25 |   return dist;
 26 | }
 27 | 
 28 | int assign_site(const float *site, float *centroids,
 29 |                 const int k, const int d)
 30 | {
 31 |   int best_cluster = 0;
 32 |   float best_dist = distance2(site, centroids, d);
 33 |   float *centroid = centroids + d;
 34 |   for (int c = 1; c < k; c++, centroid += d)
 35 |   {
 36 |     float dist = distance2(site, centroid, d);
 37 |     if (dist < best_dist)
 38 |     {
 39 |       best_cluster = c;
 40 |       best_dist = dist;
 41 |     }
 42 |   }
 43 |   return best_cluster;
 44 | }
 45 | 
 46 | void add_site(const float *site, float *sum, const int d)
 47 | {
 48 |   for (int i = 0; i < d; i++)
 49 |   {
 50 |     sum[i] += site[i];
 51 |   }
 52 | }
 53 | 
 54 | void print_centroids(float *centroids, const int k, const int d)
 55 | {
 56 |   float *p = centroids;
 57 |   printf("Centroids:\n");
 58 |   for (int i = 0; i < k; i++)
 59 |   {
 60 |     for (int j = 0; j < d; j++, p++)
 61 |     {
 62 |       printf("%f ", *p);
 63 |     }
 64 |     printf("\n");
 65 |   }
 66 | }
 67 | 
 68 | int main(int argc, char **argv)
 69 | {
 70 |   if (argc != 4)
 71 |   {
 72 |     fprintf(stderr,
 73 |             "Usage: kmeans num_sites_per_proc num_means num_dimensions\n");
 74 |     exit(1);
 75 |   }
 76 | 
 77 |   int sites_per_proc = atoi(argv[1]);
 78 |   int k = atoi(argv[2]); // number of clusters.
 79 |   int d = atoi(argv[3]); // dimension of data.
 80 |   srand(31359);
 81 | 
 82 |   MPI_Init(NULL, NULL);
 83 |   int rank, nprocs;
 84 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 85 |   MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
 86 | 
 87 |   float *sites;
 88 |   assert(sites = malloc(sites_per_proc * d * sizeof(float)));
 89 |   float *sums;
 90 |   assert(sums = malloc(k * d * sizeof(float)));
 91 |   int *counts;
 92 |   assert(counts = malloc(k * sizeof(int)));
 93 |   float *centroids;
 94 |   assert(centroids = malloc(k * d * sizeof(float)));
 95 |   // The cluster assignments for each site.
 96 |   int *labels;
 97 |   assert(labels = malloc(sites_per_proc * sizeof(int)));
 98 | 
 99 |   float *all_sites = NULL;
100 |   float *grand_sums = NULL;
101 |   int *grand_counts = NULL;
102 |   int *all_labels;
103 |   if (rank == 0)
104 |   {
105 |     all_sites = create_rand_nums(d * sites_per_proc * nprocs);
106 |     for (int i = 0; i < k * d; i++)
107 |     {
108 |       centroids[i] = all_sites[i];
109 |     }
110 |     print_centroids(centroids, k, d);
111 |     assert(grand_sums = malloc(k * d * sizeof(float)));
112 |     assert(grand_counts = malloc(k * sizeof(int)));
113 |     assert(all_labels = malloc(nprocs * sites_per_proc * sizeof(int)));
114 |   }
115 | 
116 |   MPI_Scatter(all_sites, d * sites_per_proc, MPI_FLOAT, sites,
117 |               d * sites_per_proc, MPI_FLOAT, 0, MPI_COMM_WORLD);
118 | 
119 |   float norm = 1.0;
120 | 
121 |   while (norm > 0.00001)
122 |   {
123 | 
124 |     MPI_Bcast(centroids, k * d, MPI_FLOAT, 0, MPI_COMM_WORLD);
125 |     for (int i = 0; i < k * d; i++)
126 |       sums[i] = 0.0;
127 |     for (int i = 0; i < k; i++)
128 |       counts[i] = 0;
129 |     float *site = sites;
130 |     for (int i = 0; i < sites_per_proc; i++, site += d)
131 |     {
132 |       int cluster = assign_site(site, centroids, k, d);
133 |       counts[cluster]++;
134 |       add_site(site, &sums[cluster * d], d);
135 |     }
136 | 
137 |     MPI_Reduce(sums, grand_sums, k * d, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
138 |     MPI_Reduce(counts, grand_counts, k, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
139 | 
140 |     if (rank == 0)
141 |     {
142 |       for (int i = 0; i < k; i++)
143 |       {
144 |         for (int j = 0; j < d; j++)
145 |         {
146 |           int dij = d * i + j;
147 |           grand_sums[dij] /= grand_counts[i];
148 |         }
149 |       }
150 |       norm = distance2(grand_sums, centroids, d * k);
151 |       printf("norm: %f\n", norm);
152 |       for (int i = 0; i < k * d; i++)
153 |       {
154 |         centroids[i] = grand_sums[i];
155 |       }
156 |       print_centroids(centroids, k, d);
157 |     }
158 |     MPI_Bcast(&norm, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
159 |   }
160 | 
161 |   float *site = sites;
162 |   for (int i = 0; i < sites_per_proc; i++, site += d)
163 |   {
164 |     labels[i] = assign_site(site, centroids, k, d);
165 |   }
166 | 
167 |   // Gather all labels into root process.
168 |   MPI_Gather(labels, sites_per_proc, MPI_INT,
169 |              all_labels, sites_per_proc, MPI_INT, 0, MPI_COMM_WORLD);
170 | 
171 |   MPI_Finalize();
172 | }
173 | 


--------------------------------------------------------------------------------
/omp/5.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | #define min(i, j) (((i) < (j)) ? (i) : (j))
  7 | #define max(i, j) (((i) > (j)) ? (i) : (j))
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | int check_ans(int *a, int *b, int num)
 18 | {
 19 |     for (int i = 0; i < num; i++)
 20 |     {
 21 |         if (a[i] != b[i])
 22 |         {
 23 |             printf("%d\n", i);
 24 |             return 0;
 25 |         }
 26 |     }
 27 | 
 28 |     return 1;
 29 | }
 30 | 
 31 | void copy_array(int *dst, int *src, int num)
 32 | {
 33 |     for (int i = 0; i < num; i++)
 34 |     {
 35 |         dst[i] = src[i];
 36 |     }
 37 | }
 38 | 
 39 | int loop1()
 40 | {
 41 |     int i, j, k;
 42 |     int A[200], B[200], C[200], D[200];
 43 |     int A2[200], B2[200], C2[200], D2[200];
 44 |     random_init(A, 200);
 45 |     random_init(B, 200);
 46 |     random_init(C, 200);
 47 |     random_init(D, 200);
 48 |     copy_array(A2, A, 200);
 49 |     copy_array(B2, B, 200);
 50 |     copy_array(C2, C, 200);
 51 |     copy_array(D2, D, 200);
 52 | 
 53 |     clock_t start, end;
 54 | 
 55 |     omp_set_num_threads(4);
 56 | 
 57 |     start = clock();
 58 | 
 59 |     for (i = 1; i <= 100; i++)
 60 |     {
 61 |         A[i] = A[i] + B[i - 1];
 62 |         B[i] = C[i - 1] * 2;
 63 |         C[i] = 1 + B[i]; // 除法边加法避免浮点数
 64 |         D[i] = C[i] * C[i];
 65 |     }
 66 | 
 67 |     end = clock();
 68 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 69 | 
 70 |     start = clock();
 71 |     for (i = 1; i <= 100; i++)
 72 |     {
 73 |         B2[i] = C2[i - 1] * 2;
 74 |         C2[i] = 1 + B2[i];
 75 |     }
 76 | 
 77 | #pragma omp parallel for
 78 |     for (i = 1; i <= 100; i++)
 79 |     {
 80 |         A2[i] = A2[i] + B2[i - 1];
 81 |         D2[i] = C2[i] * C2[i];
 82 |     }
 83 | 
 84 |     end = clock();
 85 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 86 |     return check_ans(A, A2, 200);
 87 | }
 88 | 
 89 | int loop2()
 90 | {
 91 |     int i, j, k;
 92 |     int A[1001], B[1001], C[1001], D[1001];
 93 |     int A2[1001], B2[1001], C2[1001], D2[1001];
 94 |     random_init(A, 1001);
 95 |     random_init(B, 1001);
 96 |     random_init(C, 1001);
 97 |     random_init(D, 1001);
 98 |     copy_array(A2, A, 1001);
 99 |     copy_array(B2, B, 1001);
100 |     copy_array(C2, C, 1001);
101 |     copy_array(D2, D, 1001);
102 | 
103 |     clock_t start, end;
104 | 
105 | 
106 |     start = clock();
107 | 
108 |     for (i = 1; i <= 1000; i++)
109 |     {
110 |         A[i] = B[i] + C[i];
111 |         D[i] = (A[i] + A[999 - i + 1]) / 2;
112 |     }
113 | 
114 |     end = clock();
115 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
116 | 
117 |     start = clock();
118 | #pragma omp parallel for
119 |     for (i = 1; i <= 500; i++)
120 |     {
121 |         A2[i] = B2[i] + C2[i];
122 |         D2[i] = (A2[i] + A2[1000 - i]) / 2;
123 |     }
124 | 
125 | #pragma omp parallel for
126 |     for (i = 501; i <= 999; i++)
127 |     {
128 |         A2[i] = B2[i] + C2[i];
129 |         D2[i] = (A2[i] + A2[1000 - i]) / 2;
130 |     }
131 | 
132 |     end = clock();
133 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
134 |     return check_ans(A, A2, 1000);
135 | }
136 | 
137 | int loop3()
138 | {
139 |     int i, j, k;
140 |     int n = 510 * 510;
141 |     int B[n], D[n];
142 |     int B2[n], D2[n];
143 |     int A[510][510], A2[510][510], C[510][510], C2[510][510];
144 | 
145 |     omp_set_num_threads(4);
146 |     for (i = 0; i < 510; i++)
147 |         for (j = 0; j < 510; j++)
148 |         {
149 |             A[i][j] = rand() % 1000 - 500;
150 |             A2[i][j] = A[i][j];
151 |             C[i][j] = rand() % 1000 - 500;
152 |             C2[i][j] = C[i][j];
153 |         }
154 | 
155 |     // random_init(A, n);
156 |     random_init(B, n);
157 |     // random_init(C, n);
158 |     random_init(D, n);
159 |     // copy_array(A2, A, n);
160 |     copy_array(B2, B, n);
161 |     // copy_array(C2, C, n);
162 |     copy_array(D2, D, n);
163 | 
164 |     clock_t start, end;
165 | 
166 |     omp_set_num_threads(4);
167 | 
168 |     start = clock();
169 | 
170 |     for (i = 1; i <= 100; i++)
171 |     {
172 |         for (j = 1; j <= 100; j++)
173 |         {
174 |             A[3 * i + 2 * j][2 * j] = C[i][j] * 2;
175 |             D[IN(i, j, 510)] = A[i - j + 6][i + j];
176 |         }
177 |     }
178 | 
179 |     end = clock();
180 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
181 | 
182 |     start = clock();
183 | 
184 | #pragma omp parallel for collapse(2)
185 |     for (i = 1; i <= 100; i++)
186 |     {
187 |         for (j = 1; j <= 100; j++)
188 |         {
189 |             A2[3 * i + 2 * j][2 * j] = C2[i][j] * 2;
190 |             D2[IN(i, j, 510)] = A2[i - j + 6][i + j];
191 |         }
192 |     }
193 | 
194 |     end = clock();
195 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
196 | 
197 |     for (i = 0; i < 510; i++)
198 |         for (j = 0; j < 510; j++)
199 |         {
200 |             if (A[i][j] != A2[i][j])
201 |             {
202 |                 printf("%d  %d\n", i, j);
203 |                 printf("%d  %d\n", A[i][j], A2[i][j]);
204 |                 return 0;
205 |             }
206 |         }
207 |     return 1;
208 | }
209 | int main()
210 | {
211 |     if (loop1())
212 |         printf("loop1 done!\n");
213 |     else
214 |         printf("loop1 error!\n");
215 |     if (loop2())
216 |         printf("loop2 done!\n");
217 |     else
218 |         printf("loop2 error!\n");
219 | 
220 |     if (loop3())
221 |         printf("loop3 done!\n");
222 |     else
223 |         printf("loop3 error!\n");
224 | }
225 | 


--------------------------------------------------------------------------------
/omp/5_1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <omp.h>
  4 | #include <stdlib.h>
  5 | #define IN(i, j, line) ((i)*line + j)
  6 | #define min(i, j) (((i) < (j)) ? (i) : (j))
  7 | #define max(i, j) (((i) > (j)) ? (i) : (j))
  8 | void random_init(int *a, int num)
  9 | {
 10 |     srand(time(NULL));
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         a[i] = rand() % 1000 - 500;
 14 |     }
 15 | }
 16 | 
 17 | int check_ans(int *a, int *b, int num)
 18 | {
 19 |     for (int i = 0; i < num; i++)
 20 |     {
 21 |         if (a[i] != b[i])
 22 |         {
 23 |             printf("%d\n", i);
 24 |             return 0;
 25 |         }
 26 |     }
 27 | 
 28 |     return 1;
 29 | }
 30 | 
 31 | void copy_array(int *dst, int *src, int num)
 32 | {
 33 |     for (int i = 0; i < num; i++)
 34 |     {
 35 |         dst[i] = src[i];
 36 |     }
 37 | }
 38 | 
 39 | int loop1()
 40 | {
 41 |     int i, j, k;
 42 |     int A[200], B[200], C[200], D[200];
 43 |     int A2[200], B2[200], C2[200], D2[200];
 44 |     random_init(A, 200);
 45 |     random_init(B, 200);
 46 |     random_init(C, 200);
 47 |     random_init(D, 200);
 48 |     copy_array(A2, A, 200);
 49 |     copy_array(B2, B, 200);
 50 |     copy_array(C2, C, 200);
 51 |     copy_array(D2, D, 200);
 52 | 
 53 |     clock_t start, end;
 54 | 
 55 |     omp_set_num_threads(4);
 56 | 
 57 |     start = clock();
 58 | 
 59 |     for (i = 1; i <= 100; i++)
 60 |     {
 61 |         A[i] = A[i] + B[i - 1];
 62 |         B[i] = C[i - 1] * 2;
 63 |         C[i] = 1 + B[i]; // 除法边加法避免浮点数
 64 |         D[i] = C[i] * C[i];
 65 |     }
 66 | 
 67 |     end = clock();
 68 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 69 | 
 70 |     start = clock();
 71 |     for (i = 1; i <= 100; i++)
 72 |     {
 73 |         B2[i] = C2[i - 1] * 2;
 74 |         C2[i] = 1 + B2[i];
 75 |     }
 76 | 
 77 | #pragma omp parallel for
 78 |     for (i = 1; i <= 100; i++)
 79 |     {
 80 |         A2[i] = A2[i] + B2[i - 1];
 81 |         D2[i] = C2[i] * C2[i];
 82 |     }
 83 | 
 84 |     end = clock();
 85 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
 86 |     return check_ans(A, A2, 200);
 87 | }
 88 | 
 89 | int loop2()
 90 | {
 91 |     int i, j, k;
 92 |     int A[1001], B[1001], C[1001], D[1001];
 93 |     int A2[1001], B2[1001], C2[1001], D2[1001];
 94 |     random_init(A, 1001);
 95 |     random_init(B, 1001);
 96 |     random_init(C, 1001);
 97 |     random_init(D, 1001);
 98 |     copy_array(A2, A, 1001);
 99 |     copy_array(B2, B, 1001);
100 |     copy_array(C2, C, 1001);
101 |     copy_array(D2, D, 1001);
102 | 
103 |     clock_t start, end;
104 | 
105 | 
106 |     start = clock();
107 | 
108 |     for (i = 1; i <= 1000; i++)
109 |     {
110 |         A[i] = B[i] + C[i];
111 |         D[i] = (A[i] + A[999 - i + 1]) / 2;
112 |     }
113 | 
114 |     end = clock();
115 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
116 | 
117 |     start = clock();
118 | #pragma omp parallel for
119 |     for (i = 1; i <= 500; i++)
120 |     {
121 |         A2[i] = B2[i] + C2[i];
122 |         D2[i] = (A2[i] + A2[1000 - i]) / 2;
123 |     }
124 | 
125 | #pragma omp parallel for
126 |     for (i = 501; i <= 999; i++)
127 |     {
128 |         A2[i] = B2[i] + C2[i];
129 |         D2[i] = (A2[i] + A2[1000 - i]) / 2;
130 |     }
131 | 
132 |     end = clock();
133 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
134 |     return check_ans(A, A2, 1000);
135 | }
136 | 
137 | int loop3()
138 | {
139 |     int i, j, k;
140 |     int n = 510 * 510;
141 |     int B[n], D[n];
142 |     int B2[n], D2[n];
143 |     int A[510][510], A2[510][510], C[510][510], C2[510][510];
144 | 
145 |     omp_set_num_threads(4);
146 |     for (i = 0; i < 510; i++)
147 |         for (j = 0; j < 510; j++)
148 |         {
149 |             A[i][j] = rand() % 1000 - 500;
150 |             A2[i][j] = A[i][j];
151 |             C[i][j] = rand() % 1000 - 500;
152 |             C2[i][j] = C[i][j];
153 |         }
154 | 
155 |     // random_init(A, n);
156 |     random_init(B, n);
157 |     // random_init(C, n);
158 |     random_init(D, n);
159 |     // copy_array(A2, A, n);
160 |     copy_array(B2, B, n);
161 |     // copy_array(C2, C, n);
162 |     copy_array(D2, D, n);
163 | 
164 |     clock_t start, end;
165 | 
166 |     omp_set_num_threads(4);
167 | 
168 |     start = clock();
169 | 
170 |     for (i = 1; i <= 100; i++)
171 |     {
172 |         for (j = 1; j <= 100; j++)
173 |         {
174 |             A[3 * i + 2 * j][2 * j] = C[i][j] * 2;
175 |             D[IN(i, j, 510)] = A[i - j + 6][i + j];
176 |         }
177 |     }
178 | 
179 |     end = clock();
180 |     printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
181 | 
182 |     start = clock();
183 | 
184 | #pragma omp parallel for collapse(2)
185 |     for (i = 1; i <= 100; i++)
186 |     {
187 |         for (j = 1; j <= 100; j++)
188 |         {
189 |             A2[3 * i + 2 * j][2 * j] = C2[i][j] * 2;
190 |             D2[IN(i, j, 510)] = A2[i - j + 6][i + j];
191 |         }
192 |     }
193 | 
194 |     end = clock();
195 |     printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC);
196 | 
197 |     for (i = 0; i < 510; i++)
198 |         for (j = 0; j < 510; j++)
199 |         {
200 |             if (A[i][j] != A2[i][j])
201 |             {
202 |                 printf("%d  %d\n", i, j);
203 |                 printf("%d  %d\n", A[i][j], A2[i][j]);
204 |                 return 0;
205 |             }
206 |         }
207 |     return 1;
208 | }
209 | int main()
210 | {
211 |     if (loop1())
212 |         printf("loop1 done!\n");
213 |     else
214 |         printf("loop1 error!\n");
215 |     if (loop2())
216 |         printf("loop2 done!\n");
217 |     else
218 |         printf("loop2 error!\n");
219 | 
220 |     if (loop3())
221 |         printf("loop3 done!\n");
222 |     else
223 |         printf("loop3 error!\n");
224 | }
225 | 


--------------------------------------------------------------------------------
/mpi/3.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <math.h>
  6 | #include <string.h>
  7 | 
  8 | #define IDX(i, j, N) (((i) * (N)) + (j))
  9 | void gen_rand_mat(int *a, int num)
 10 | {
 11 |     for (int i = 0; i < num; i++)
 12 |     {
 13 |         srand(clock());
 14 |         for (int j = 0; j < num; j++)
 15 |         {
 16 |             a[IDX(i, j, num)] = rand() % 100;
 17 |         }
 18 |     }
 19 | }
 20 | 
 21 | void print_mat(int *a, int num, int id)
 22 | {
 23 |     for (int i = 0; i < num; i++)
 24 |     {
 25 |         for (int j = 0; j < num; j++)
 26 |         {
 27 |             printf("|%d :  %d ", id, a[IDX(i, j, num)]);
 28 |         }
 29 |         printf("\n");
 30 |     }
 31 | }
 32 | 
 33 | void compute(int *A, int *B, int *C, int num)
 34 | {
 35 |     for (int i = 0; i < num; i++)
 36 |     {
 37 |         for (int j = 0; j < num; j++)
 38 |         {
 39 |             for (int k = 0; k < num; k++)
 40 |                 C[IDX(i, j, num)] += A[IDX(i, k, num)] * B[IDX(k, j, num)];
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | int check(int *C, int *nC, int num)
 46 | {
 47 |     for (int i = 0; i < num; i++)
 48 |     {
 49 |         for (int j = 0; j < num; j++)
 50 |         {
 51 |             if (C[IDX(i, j, num)] != nC[IDX(i, j, num)])
 52 |             {
 53 |                 return 0;
 54 |             }
 55 |         }
 56 |     }
 57 |     return 1;
 58 | }
 59 | 
 60 | int main(int argc, char *argv[])
 61 | {
 62 |     int id_procs, num_procs;
 63 |     int block_size, sqrt_procs;
 64 | 
 65 |     MPI_Init(&argc, &argv);
 66 |     MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
 67 |     MPI_Comm_rank(MPI_COMM_WORLD, &id_procs);
 68 | 
 69 |     sqrt_procs = sqrt(num_procs);
 70 |     if (sqrt_procs * sqrt_procs != num_procs)
 71 |     {
 72 |         fprintf(stderr, "num of procs must be square\n");
 73 |         return 1;
 74 |     }
 75 |     if (argc != 2)
 76 |     {
 77 |         fprintf(stderr, "you need to provide block size\n");
 78 |         return 1;
 79 |     }
 80 |     block_size = atoi(argv[1]);
 81 |     int *sA, *sB, *sC;
 82 |     int N = block_size * sqrt_procs;
 83 |     if (id_procs == 0)
 84 |     {
 85 |         sA = (int *)malloc(N * N * sizeof(int));
 86 |         sB = (int *)malloc(N * N * sizeof(int));
 87 |         sC = (int *)malloc(N * N * sizeof(int));
 88 | 
 89 |         memset(sC, 0, N * N * sizeof(int));
 90 |         gen_rand_mat(sA, N);
 91 |         gen_rand_mat(sB, N);
 92 |         compute(sA, sB, sC, N);
 93 |     }
 94 |     int A[block_size * block_size];
 95 |     int B[block_size * block_size];
 96 |     int C[block_size * block_size];
 97 |     int ans[block_size * block_size];
 98 |     int A_in[block_size * block_size];
 99 |     int B_in[block_size * block_size];
100 |     memset(C, 0, block_size * block_size * sizeof(int));
101 |     MPI_Datatype SubMat, Mat;
102 |     MPI_Status status;
103 |     MPI_Request request;
104 |     MPI_Type_vector(block_size, block_size, N, MPI_INT, &SubMat);
105 |     MPI_Type_commit(&SubMat);
106 |     MPI_Type_vector(block_size, block_size, block_size, MPI_INT, &Mat);
107 |     MPI_Type_commit(&Mat);
108 |     if (id_procs == 0)
109 |     {
110 |         for (int i = 0; i < sqrt_procs; i++)
111 |         {
112 |             int lineoff = block_size * N * i;
113 |             for (int j = 0; j < sqrt_procs; j++)
114 |             {
115 |                 if (i == 0 && j == 0)
116 |                 {
117 |                     MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
118 |                     MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
119 |                     MPI_Wait(&request, &status);
120 |                     MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request);
121 |                     MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request);
122 |                     MPI_Wait(&request, &status);
123 |                     continue;
124 |                 }
125 |                 int offset = j * block_size + lineoff;
126 |                 MPI_Send(sA + offset, 1, SubMat, i * sqrt_procs + j, 0, MPI_COMM_WORLD);
127 |                 MPI_Send(sB + offset, 1, SubMat, i * sqrt_procs + j, 1, MPI_COMM_WORLD);
128 |             }
129 |         }
130 |     }
131 |     else
132 |     {
133 |         MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status);
134 |         MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status);
135 |     }
136 | 
137 |     MPI_Comm row_comm, col_comm;
138 |     int rank_A, size_A;
139 |     int color_A;
140 |     int key_A;
141 | 
142 |     int rank_B, size_B;
143 |     int color_B;
144 |     int key_B;
145 | 
146 |     key_A = id_procs % sqrt_procs;
147 |     color_A = id_procs / sqrt_procs;
148 |     MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm);
149 |     MPI_Comm_rank(row_comm, &rank_A);
150 |     MPI_Comm_size(row_comm, &size_A);
151 | 
152 |     key_B = id_procs / sqrt_procs;
153 |     color_B = id_procs % sqrt_procs;
154 |     MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm);
155 |     MPI_Comm_rank(col_comm, &rank_B);
156 |     MPI_Comm_size(col_comm, &size_B);
157 | 
158 |     for (int k = 0; k < sqrt_procs; k++)
159 |     {
160 |         if (rank_A == (color_A + k) % size_A)
161 |         {
162 |             memcpy(A_in, A, block_size * block_size * sizeof(int));
163 |         }
164 |         MPI_Bcast(A_in, 1, Mat, (color_A + k) % size_A, row_comm);
165 |         compute(A_in, B, C, block_size);
166 |         int dest = (rank_B - 1 + size_B) % size_B;
167 |         MPI_Send(B, 1, Mat, dest, 0, col_comm);
168 |         MPI_Recv(B_in, 1, Mat, (rank_B + 1) % size_B, 0, col_comm, &status);
169 |         memcpy(B, B_in, block_size * block_size * sizeof(int));
170 |     }
171 | 
172 |     if (id_procs == 0)
173 |     {
174 |         for (int i = 0; i < sqrt_procs; i++)
175 |         {
176 |             for (int j = 0; j < sqrt_procs; j++)
177 |             {
178 |                 if (i == 0 && j == 0)
179 |                 {
180 |                     MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request);
181 |                     MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request);
182 |                     MPI_Wait(&request, &status);
183 |                     continue;
184 |                 }
185 |                 int offset = j * block_size + block_size * N * i;
186 |                 MPI_Send(sC + offset, 1, SubMat, i * sqrt_procs + j, 100, MPI_COMM_WORLD);
187 |             }
188 |         }
189 |     }
190 |     else
191 |     {
192 |         MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status);
193 |     }
194 | 
195 |     print_mat(ans, block_size, id_procs);
196 |     print_mat(C, block_size, id_procs);
197 |     if (check(C, ans, block_size))
198 |     {
199 |         printf("Proc#%d Done.\n", id_procs);
200 |     }
201 | 
202 |     if (id_procs == 0)
203 |     {
204 |         free(sA);
205 |         free(sB);
206 |         free(sC);
207 |     }
208 |     MPI_Finalize();
209 |     return 0;
210 | }
211 | 


--------------------------------------------------------------------------------