├── mpi ├── 1 ├── 3 ├── 4 ├── config ├── 2_1 ├── 2_2 ├── 5_1 ├── 5_2 ├── 2_1.c ├── 2_2.c ├── 1.c ├── 4.c ├── 5_2.c ├── 5_1.c └── 3.c ├── omp ├── 1 ├── 2 ├── 3 ├── 4 ├── 5 ├── 1_1 ├── 2_1 ├── 3_1 ├── 4_1 ├── 5_1 ├── 2.c ├── 2_1.c ├── 4.c ├── 4_1.c ├── 1.c ├── 1_1.c ├── 3.c ├── 3_1.c ├── 5.c └── 5_1.c ├── Kmeans ├── serial ├── Kmeans_mpi ├── serial.c └── Kmeans_mpi.c ├── project_report.pdf ├── parallel_computing_hw1.pdf └── README.md /mpi/config: -------------------------------------------------------------------------------- 1 | 202.38.79.8 slots=4 2 | 202.38.75.64 slots=4 3 | -------------------------------------------------------------------------------- /mpi/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/1 -------------------------------------------------------------------------------- /mpi/3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/3 -------------------------------------------------------------------------------- /mpi/4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/4 -------------------------------------------------------------------------------- /omp/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/1 -------------------------------------------------------------------------------- /omp/2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/2 -------------------------------------------------------------------------------- /omp/3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/3 -------------------------------------------------------------------------------- /omp/4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/4 -------------------------------------------------------------------------------- /omp/5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/5 -------------------------------------------------------------------------------- /mpi/2_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/2_1 -------------------------------------------------------------------------------- /mpi/2_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/2_2 -------------------------------------------------------------------------------- /mpi/5_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/5_1 -------------------------------------------------------------------------------- /mpi/5_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/mpi/5_2 -------------------------------------------------------------------------------- /omp/1_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/1_1 -------------------------------------------------------------------------------- /omp/2_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/2_1 -------------------------------------------------------------------------------- /omp/3_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/3_1 -------------------------------------------------------------------------------- /omp/4_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/4_1 -------------------------------------------------------------------------------- /omp/5_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/omp/5_1 -------------------------------------------------------------------------------- /Kmeans/serial: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/Kmeans/serial -------------------------------------------------------------------------------- /Kmeans/Kmeans_mpi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/Kmeans/Kmeans_mpi -------------------------------------------------------------------------------- /project_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/project_report.pdf -------------------------------------------------------------------------------- /parallel_computing_hw1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunxin000/Parallel_computing_Exp/HEAD/parallel_computing_hw1.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parallel_computing_Exp 2 | Lab for Parallel computing (USTC COMP6201P), including hw1 and final project. 3 | If you find it helpful, give it a star!😄💯 4 | -------------------------------------------------------------------------------- /mpi/2_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | int id_procs, num_procs; 9 | MPI_Init(&argc, &argv); 10 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 11 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 12 | 13 | int data = id_procs; 14 | int recvdata; 15 | MPI_Status status; 16 | 17 | for(int i=2; i<=num_procs; i<<=1) 18 | { 19 | int tag = i>>1; 20 | int dest = id_procs ^tag; 21 | MPI_Send(&data, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); 22 | MPI_Recv(&recvdata, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &status); 23 | data += recvdata; 24 | } 25 | 26 | printf("Proc:%d Sum is = %d\n",id_procs, data); 27 | MPI_Finalize(); 28 | return 0; 29 | 30 | 31 | 32 | } 33 | -------------------------------------------------------------------------------- /mpi/2_2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | int id_procs,num_procs; 8 | 9 | MPI_Init(&argc, &argv); 10 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 11 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 12 | 13 | int data = id_procs; 14 | int recvdata; 15 | MPI_Status status; 16 | 17 | for (int i =2; i<= num_procs; i<<=1) 18 | { 19 | int tag = i>>1; 20 | int diff = id_procs & tag; 21 | if (diff){ 22 | MPI_Send(&data, 1, MPI_INT, id_procs-tag, tag, MPI_COMM_WORLD); 23 | } 24 | else 25 | { 26 | MPI_Recv(&recvdata, 1, MPI_INT, id_procs+tag, tag, MPI_COMM_WORLD, &status); 27 | } 28 | 29 | data += recvdata; 30 | } 31 | 32 | for (int i = num_procs; i>=2; i>>=1){ 33 | int tag =i; 34 | if(id_procs % i == 0) 35 | { 36 | MPI_Send(&data, 1, MPI_INT, id_procs+(i>>1), tag, MPI_COMM_WORLD); 37 | } 38 | else if (id_procs % (i>>1) ==0) 39 | { 40 | MPI_Recv(&data, 1, MPI_INT,id_procs-(i>>1), tag, MPI_COMM_WORLD, &status); 41 | } 42 | } 43 | 44 | printf("%d Sum is = %d\n", id_procs, data); 45 | 46 | MPI_Finalize(); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /mpi/1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int id_procs, num_procs; 10 | int msg = 10; 11 | int tag = 5; 12 | char seq[16] = "hello mpi!"; 13 | char seqin[16]; 14 | char hostname[100]; 15 | 16 | MPI_Init(&argc, &argv); 17 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 18 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 19 | 20 | int color = id_procs / 4; 21 | int key = id_procs % 4; 22 | gethostname(hostname, sizeof(hostname)); 23 | MPI_Comm split_comm_world; 24 | MPI_Status status; 25 | int rank, size, msgin; 26 | 27 | MPI_Comm_split(MPI_COMM_WORLD, color, key, &split_comm_world); 28 | MPI_Comm_rank(split_comm_world, &rank); 29 | MPI_Comm_size(split_comm_world, &size); 30 | printf("id_procs: %d. process %d of %d. comm: %d. host: %s\n", id_procs, rank, size, color, hostname); 31 | MPI_Barrier(MPI_COMM_WORLD); 32 | if(id_procs == 0){ 33 | strcpy(seqin, seq); 34 | MPI_Send(&seq, 16, MPI_CHAR, 4, tag, MPI_COMM_WORLD); 35 | } 36 | 37 | else if (id_procs==4){ 38 | MPI_Recv(&seqin, 16, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 39 | } 40 | 41 | MPI_Bcast(&seqin, 16, MPI_CHAR, 0, split_comm_world); 42 | printf("MPI Commm rand %d, original id %d, size %d. The new msg is %s\n", rank, id_procs, size, seqin); 43 | MPI_Finalize(); 44 | return 0; 45 | 46 | } 47 | -------------------------------------------------------------------------------- /mpi/4.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #define SERVER_NUM 2 8 | 9 | void serve(MPI_Comm server_comm, int id, int num) 10 | { 11 | int num_workers = num - SERVER_NUM; 12 | int recv_size = num_workers / SERVER_NUM + 1; 13 | int recv_data[recv_size]; 14 | int gather_buff[recv_size * SERVER_NUM]; 15 | int average; 16 | int sum, ctn; 17 | MPI_Status status; 18 | while (1) 19 | { 20 | memset(recv_data, 0, recv_size * sizeof(int)); 21 | memset(gather_buff, 0, recv_size * SERVER_NUM * sizeof(int)); 22 | sum = 0; 23 | ctn = 0; 24 | for (int i = 1; i * SERVER_NUM + id < num; i++) 25 | { 26 | MPI_Recv(recv_data + i - 1, 1, MPI_INT, i * SERVER_NUM + id, 0, MPI_COMM_WORLD, &status); 27 | ctn++; 28 | } 29 | 30 | MPI_Allgather(recv_data, recv_size, MPI_INT, gather_buff, recv_size, MPI_INT, server_comm); 31 | 32 | for (int i = 0; i < recv_size * SERVER_NUM; i++) 33 | { 34 | sum += gather_buff[i]; 35 | } 36 | average = sum / num_workers; 37 | printf("Proc#%d send average data = %d\n", id, average); 38 | MPI_Barrier(server_comm); 39 | for (int i = 1; i <= ctn; i++) 40 | { 41 | MPI_Send(&average, 1, MPI_INT, i * SERVER_NUM + id, 1, MPI_COMM_WORLD); 42 | } 43 | } 44 | } 45 | 46 | void work(int id) 47 | { 48 | int randata; 49 | int recvdata; 50 | MPI_Status status; 51 | 52 | while (1) 53 | { 54 | srand(time(NULL) + id); 55 | randata = rand() % 100; 56 | printf("proc#%d send data = %d\n", id, randata); 57 | MPI_Send(&randata, 1, MPI_INT, id % SERVER_NUM, 0, MPI_COMM_WORLD); 58 | MPI_Recv(&recvdata, 1, MPI_INT, id % SERVER_NUM, 1, MPI_COMM_WORLD, &status); 59 | printf("Proc#%d receive average data = %d\n", id, recvdata); 60 | sleep(5); 61 | } 62 | } 63 | 64 | int main(int argc, char *argv[]) 65 | { 66 | int id_procs, num_procs; 67 | 68 | MPI_Init(&argc, &argv); 69 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 70 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 71 | 72 | int P, Q; 73 | P = SERVER_NUM; 74 | Q = num_procs - P; 75 | 76 | MPI_Comm server_comm; 77 | MPI_Comm_split(MPI_COMM_WORLD, id_procs / P, id_procs, &server_comm); 78 | 79 | if (id_procs > P - 1) 80 | { 81 | work(id_procs); 82 | } 83 | else 84 | { 85 | serve(server_comm, id_procs, num_procs); 86 | } 87 | 88 | MPI_Finalize(); 89 | return 0; 90 | } 91 | -------------------------------------------------------------------------------- /mpi/5_2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #define N 500 7 | #define IDX(i, j) (((i)*N) + (j)) 8 | 9 | void gen_rand_array(double *a, int num) 10 | { 11 | for (int i = 0; i < num; i++) 12 | { 13 | srand(time(NULL)); 14 | a[i] = rand() % 100; 15 | } 16 | } 17 | 18 | void compute(double *A, double *B, int a, int b) 19 | { 20 | for (int i = 1; i <= a; i++) 21 | { 22 | for (int j = 1; j <= b; j++) 23 | { 24 | B[IDX(i, j)] = (A[IDX(i - 1, j)] + A[IDX(i, j + 1)] + A[IDX(i + 1, j)] + A[IDX(i, j - 1)]) / 4.0; 25 | } 26 | } 27 | } 28 | 29 | int check_ans(double *B, double *A) 30 | { 31 | for (int i = 1; i < N - 1; i++) 32 | { 33 | for (int j = 1; j < N - 1; j++) 34 | { 35 | if (fabs(B[IDX(i, j)] - A[IDX(i, j)]) >= 1e-2) 36 | { 37 | return 0; 38 | } 39 | } 40 | } 41 | return 1; 42 | } 43 | 44 | int main(int argc, char *argv[]) 45 | { 46 | MPI_Init(&argc, &argv); 47 | int id_procs, num_procs; 48 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 49 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 50 | MPI_Status status; 51 | MPI_Datatype SubMat; 52 | int rows = sqrt(num_procs); 53 | int cols = num_procs / rows; 54 | int a = (N - 2 + rows - 1) / rows; 55 | int b = (N - 2 + cols - 1) / cols; 56 | int alloc_num = (a + 1) * (b + 1) * num_procs; 57 | double A[alloc_num]; 58 | double B[alloc_num]; 59 | double B2[alloc_num]; 60 | 61 | // Proc#0 randomize the data 62 | if (id_procs == 0) 63 | { 64 | gen_rand_array(A, N * N); 65 | compute(A, B2, N - 2, N - 2); 66 | } 67 | 68 | MPI_Barrier(MPI_COMM_WORLD); 69 | 70 | // Proc#0 broadcast (a+2)x(b+2) mat 71 | MPI_Type_vector(a + 2, b + 2, N, MPI_DOUBLE, &SubMat); 72 | MPI_Type_commit(&SubMat); 73 | 74 | if (id_procs == 0) 75 | { 76 | for (int i = 0; i < rows; i++) 77 | { 78 | for (int j = 0; j < cols; j++) 79 | { 80 | if (i == 0 && j == 0) 81 | continue; 82 | MPI_Send(A + i * a * N + b * j, 1, SubMat, j + cols * i, 0, MPI_COMM_WORLD); 83 | } 84 | } 85 | } 86 | else 87 | { 88 | MPI_Recv(A, 1, SubMat, 0, 0, MPI_COMM_WORLD, &status); 89 | } 90 | 91 | // computeute 92 | compute(A, B, a, b); 93 | 94 | // Gather result 95 | MPI_Datatype SubMat_B; 96 | MPI_Type_vector(a, b, N, MPI_DOUBLE, &SubMat_B); 97 | MPI_Type_commit(&SubMat_B); 98 | if (id_procs == 0) 99 | { 100 | for (int i = 0; i < rows; i++) 101 | { 102 | for (int j = 0; j < cols; j++) 103 | { 104 | if (i == 0 && j == 0) 105 | continue; 106 | MPI_Recv(&B[IDX(a * i + 1, b * j + 1)], 1, SubMat_B, i * cols + j, 1, MPI_COMM_WORLD, &status); 107 | } 108 | } 109 | } 110 | else 111 | { 112 | int x = id_procs / cols; 113 | int y = id_procs % cols; 114 | MPI_Send(&B[IDX(1, 1)], 1, SubMat_B, 0, 1, MPI_COMM_WORLD); 115 | } 116 | 117 | if (id_procs == 0) 118 | if (check_ans(B, B2)) 119 | { 120 | printf("Done.No Error\n"); 121 | } 122 | else 123 | { 124 | printf("Error!\n"); 125 | } 126 | MPI_Finalize(); 127 | return 0; 128 | return 0; 129 | } 130 | -------------------------------------------------------------------------------- /mpi/5_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #define N 500 7 | #define IDX(i, j) (((i)*N) + (j)) 8 | void compute(double *A, double *B, int num) 9 | { 10 | for (int i = 1; i < N - 1; i++) 11 | { 12 | for (int j = 1; j < N - 1; j++) 13 | { 14 | B[IDX(i, j)] = (A[IDX(i - 1, j)] + A[IDX(i, j + 1)] + A[IDX(i + 1, j)] + A[IDX(i, j - 1)]) / 4.0; 15 | } 16 | } 17 | } 18 | void gen_rand_array(double *a, int num) 19 | { 20 | for (int i = 0; i < num; i++) 21 | { 22 | srand(time(NULL)); 23 | a[i] = rand() % 100; 24 | } 25 | } 26 | int check_ans(double *B, double *C) 27 | { 28 | for (int i = 1; i < N - 1; i++) 29 | { 30 | for (int j = 1; j < N - 1; j++) 31 | { 32 | if (fabs(B[IDX(i, j)] - C[IDX(i, j)]) >= 1e-4) 33 | { 34 | return 0; 35 | } 36 | } 37 | } 38 | return 1; 39 | } 40 | 41 | int main(int argc, char *argv[]) 42 | { 43 | int id_procs, num_procs, num_1; 44 | MPI_Status status; 45 | MPI_Init(&argc, &argv); 46 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 47 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 48 | double *A, *B, *B2; 49 | A = (double *)malloc(N * N * sizeof(double)); 50 | B = (double *)malloc(N * N * sizeof(double)); 51 | B2 = (double *)malloc(N * N * sizeof(double)); 52 | num_1 = num_procs - 1; 53 | // Proc#N-1 randomize the data 54 | if (id_procs == num_1) 55 | { 56 | gen_rand_array(A, N * N); 57 | compute(A, B2, N * N); 58 | } 59 | 60 | MPI_Barrier(MPI_COMM_WORLD); 61 | int ctn = 0; 62 | for (int i = 0; i < N - 2; i++) 63 | { 64 | if (id_procs == num_1) 65 | { 66 | int dest = i % num_1; 67 | int tag = i / num_1; 68 | MPI_Send(&A[IDX(i, 0)], N * 3, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD); 69 | } 70 | } 71 | 72 | for (int i = 0; i < (N - 2) / num_1; i++) 73 | { 74 | if (id_procs != num_1) 75 | { 76 | MPI_Recv(&A[IDX(3 * ctn, 0)], 3 * N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status); 77 | ctn++; 78 | } 79 | } 80 | if (id_procs < (N - 2) % num_1) 81 | { 82 | MPI_Recv(&A[IDX(ctn * 3, 0)], 3 * N, MPI_DOUBLE, num_1, ctn, MPI_COMM_WORLD, &status); 83 | ctn++; 84 | } 85 | 86 | // compute 87 | if (id_procs != num_1) 88 | { 89 | for (int i = 1; i <= 3 * ctn - 2; i += 3) //! 每三行计算一次 90 | { 91 | for (int j = 1; j < N - 1; j++) 92 | { 93 | B[IDX((i + 2) / 3, j)] = (A[IDX(i - 1, j)] + A[IDX(i, j + 1)] + A[IDX(i + 1, j)] + A[IDX(i, j - 1)]) / 4.0; 94 | } 95 | } 96 | } 97 | 98 | // Gather 99 | for (int i = 0; i < N - 2; i++) 100 | { 101 | if (id_procs == num_1) 102 | { 103 | int src = i % num_1; 104 | MPI_Recv(&B[IDX(i + 1, 1)], N - 2, MPI_DOUBLE, src, i / num_1 + N, MPI_COMM_WORLD, &status); 105 | } 106 | else 107 | { 108 | for (int j = 0; j < ctn; j++) 109 | MPI_Send(&B[IDX(j + 1, 1)], N - 2, MPI_DOUBLE, num_1, j + N, MPI_COMM_WORLD); 110 | } 111 | } 112 | 113 | if (id_procs == num_1) 114 | { 115 | if (check_ans(B, B2)) 116 | { 117 | printf("Done.No Error\n"); 118 | } 119 | else 120 | { 121 | printf("Error Occured!\n"); 122 | } 123 | } 124 | free(A); 125 | free(B); 126 | free(B2); 127 | MPI_Finalize(); 128 | return 0; 129 | } 130 | -------------------------------------------------------------------------------- /omp/2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | #define M 20 7 | #define N 20 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | int check_ans(int *a, int *b, int num) 18 | { 19 | for (int i = 0; i < num; i++) 20 | { 21 | if (a[i] != b[i]) 22 | { 23 | printf("%d\n", i); 24 | return 0; 25 | } 26 | } 27 | return 1; 28 | } 29 | 30 | void copy_array(int *dst, int *src, int num) 31 | { 32 | for (int i = 0; i < num; i++) 33 | { 34 | dst[i] = src[i]; 35 | } 36 | } 37 | 38 | int loop1() 39 | { 40 | int n = (M + 2) * N; 41 | 42 | int A[n]; 43 | int B[n]; 44 | int C = 41734; 45 | 46 | omp_set_num_threads(4); 47 | 48 | random_init(A, n); 49 | copy_array(B, A, n); 50 | 51 | clock_t start = clock(); 52 | for (int i = 1; i <= M; i++) 53 | { 54 | for (int j = 1; j < N; j++) 55 | { 56 | A[IN(i + 1, j + 1, N)] = A[IN(i, j, N)] + C; 57 | } 58 | } 59 | clock_t end = clock(); 60 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 61 | 62 | start = clock(); 63 | for (int i = 1; i <= M; i++) 64 | { 65 | #pragma omp parallel for 66 | for (int j = 1; j < N; j++) 67 | { 68 | B[IN(i + 1, j + 1, N)] = B[IN(i, j, N)] + C; 69 | } 70 | } 71 | end = clock(); 72 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 73 | return check_ans(A, B, n); 74 | } 75 | 76 | int loop2() 77 | { 78 | int X[101]; 79 | int X2[101]; 80 | int Y[201]; 81 | int Y2[201]; 82 | int B[101]; 83 | int B2[101]; 84 | int n = 110 * 110; 85 | int A[n], C[n], A2[n]; 86 | random_init(A, n); 87 | random_init(C, n); 88 | random_init(Y, 201); 89 | // random_init(B, 101); 90 | copy_array(B2, B, 101); 91 | copy_array(X2, X, 101); 92 | copy_array(A2, A, n); 93 | copy_array(Y2, Y, 201); 94 | 95 | omp_set_num_threads(4); 96 | clock_t start = clock(); 97 | 98 | for (int i = 1; i <= 100; i++) 99 | { 100 | X2[i] = Y2[i] + 10; 101 | for (int j = 1; j <= 100; j++) 102 | { 103 | B2[j] = A2[IN(j, N, 110)]; 104 | for (int k = 1; k <= 100; k++) 105 | { 106 | A2[IN(j + 1, k, 110)] = B2[j] + C[IN(j, k, 110)]; 107 | } 108 | Y2[i + j] = A2[IN(j + 1, N, 110)]; 109 | } 110 | } 111 | clock_t end = clock(); 112 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 113 | 114 | start = clock(); 115 | // #pragma omp parallel for 116 | for (int i = 1; i <= 100; i++) 117 | { 118 | // #pragma omp parallel for 119 | for (int j = 1; j <= 100; j++) 120 | { 121 | B[j] = A[IN(j, N, 110)]; 122 | #pragma omp parallel for 123 | for (int k = 1; k <= 100; k++) 124 | { 125 | A[IN(j + 1, k, 110)] = B[j] + C[IN(j, k, 110)]; 126 | } 127 | } 128 | } 129 | 130 | for (int i = 1; i <= 100; i++) 131 | { 132 | #pragma omp parallel for 133 | for (int j = 1; j <= 100; j++) 134 | { 135 | Y[i + j] = A[IN(j + 1, N, 110)]; 136 | } 137 | } 138 | 139 | #pragma omp parallel for 140 | for (int i = 1; i <= 100; i++) 141 | { 142 | X[i] = Y[i] + 10; 143 | } 144 | 145 | end = clock(); 146 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 147 | return check_ans(A, A2, n) && check_ans(B, B2, 100) && check_ans(X, X2, 100); 148 | } 149 | 150 | int main() 151 | { 152 | if (loop1()) 153 | printf("loop1 done!\n"); 154 | else 155 | printf("loop1 error!\n"); 156 | if (loop2()) 157 | printf("loop2 done!\n"); 158 | else 159 | printf("loop2 error!\n"); 160 | } -------------------------------------------------------------------------------- /omp/2_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | #define M 20 7 | #define N 20 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | int check_ans(int *a, int *b, int num) 18 | { 19 | for (int i = 0; i < num; i++) 20 | { 21 | if (a[i] != b[i]) 22 | { 23 | printf("%d\n", i); 24 | return 0; 25 | } 26 | } 27 | return 1; 28 | } 29 | 30 | void copy_array(int *dst, int *src, int num) 31 | { 32 | for (int i = 0; i < num; i++) 33 | { 34 | dst[i] = src[i]; 35 | } 36 | } 37 | 38 | int loop1() 39 | { 40 | int n = (M + 2) * N; 41 | 42 | int A[n]; 43 | int B[n]; 44 | int C = 41734; 45 | 46 | omp_set_num_threads(4); 47 | 48 | random_init(A, n); 49 | copy_array(B, A, n); 50 | 51 | clock_t start = clock(); 52 | for (int i = 1; i <= M; i++) 53 | { 54 | for (int j = 1; j < N; j++) 55 | { 56 | A[IN(i + 1, j + 1, N)] = A[IN(i, j, N)] + C; 57 | } 58 | } 59 | clock_t end = clock(); 60 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 61 | 62 | start = clock(); 63 | for (int i = 1; i <= M; i++) 64 | { 65 | #pragma omp parallel for 66 | for (int j = 1; j < N; j++) 67 | { 68 | B[IN(i + 1, j + 1, N)] = B[IN(i, j, N)] + C; 69 | } 70 | } 71 | end = clock(); 72 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 73 | return check_ans(A, B, n); 74 | } 75 | 76 | int loop2() 77 | { 78 | int X[101]; 79 | int X2[101]; 80 | int Y[201]; 81 | int Y2[201]; 82 | int B[101]; 83 | int B2[101]; 84 | int n = 110 * 110; 85 | int A[n], C[n], A2[n]; 86 | random_init(A, n); 87 | random_init(C, n); 88 | random_init(Y, 201); 89 | // random_init(B, 101); 90 | copy_array(B2, B, 101); 91 | copy_array(X2, X, 101); 92 | copy_array(A2, A, n); 93 | copy_array(Y2, Y, 201); 94 | 95 | omp_set_num_threads(4); 96 | clock_t start = clock(); 97 | 98 | for (int i = 1; i <= 100; i++) 99 | { 100 | X2[i] = Y2[i] + 10; 101 | for (int j = 1; j <= 100; j++) 102 | { 103 | B2[j] = A2[IN(j, N, 110)]; 104 | for (int k = 1; k <= 100; k++) 105 | { 106 | A2[IN(j + 1, k, 110)] = B2[j] + C[IN(j, k, 110)]; 107 | } 108 | Y2[i + j] = A2[IN(j + 1, N, 110)]; 109 | } 110 | } 111 | clock_t end = clock(); 112 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 113 | 114 | start = clock(); 115 | // #pragma omp parallel for 116 | for (int i = 1; i <= 100; i++) 117 | { 118 | // #pragma omp parallel for 119 | for (int j = 1; j <= 100; j++) 120 | { 121 | B[j] = A[IN(j, N, 110)]; 122 | #pragma omp parallel for 123 | for (int k = 1; k <= 100; k++) 124 | { 125 | A[IN(j + 1, k, 110)] = B[j] + C[IN(j, k, 110)]; 126 | } 127 | } 128 | } 129 | 130 | for (int i = 1; i <= 100; i++) 131 | { 132 | #pragma omp parallel for 133 | for (int j = 1; j <= 100; j++) 134 | { 135 | Y[i + j] = A[IN(j + 1, N, 110)]; 136 | } 137 | } 138 | 139 | #pragma omp parallel for 140 | for (int i = 1; i <= 100; i++) 141 | { 142 | X[i] = Y[i] + 10; 143 | } 144 | 145 | end = clock(); 146 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 147 | return check_ans(A, A2, n) && check_ans(B, B2, 100) && check_ans(X, X2, 100); 148 | } 149 | 150 | int main() 151 | { 152 | if (loop1()) 153 | printf("loop1 done!\n"); 154 | else 155 | printf("loop1 error!\n"); 156 | if (loop2()) 157 | printf("loop2 done!\n"); 158 | else 159 | printf("loop2 error!\n"); 160 | } -------------------------------------------------------------------------------- /omp/4.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | #define min(i, j) (((i) < (j)) ? (i) : (j)) 7 | #define max(i, j) (((i) > (j)) ? (i) : (j)) 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | int check_ans(int *a, int *b, int num) 18 | { 19 | for (int i = 0; i < num; i++) 20 | { 21 | if (a[i] != b[i]) 22 | { 23 | printf("%d\n", i); 24 | return 0; 25 | } 26 | } 27 | 28 | return 1; 29 | } 30 | 31 | void copy_array(int *dst, int *src, int num) 32 | { 33 | for (int i = 0; i < num; i++) 34 | { 35 | dst[i] = src[i]; 36 | } 37 | } 38 | 39 | int loop1() 40 | { 41 | int i, j, k; 42 | int n = 20; 43 | int B[n * n]; 44 | int B2[n * n]; 45 | int B3[n * n]; 46 | 47 | omp_set_num_threads(4); 48 | random_init(B2, n * n); 49 | copy_array(B, B2, n * n); 50 | copy_array(B3, B2, n * n); 51 | clock_t start, end; 52 | start = clock(); 53 | for (i = 2; i <= 10; i++) 54 | { 55 | for (j = i; j <= 10; j++) 56 | { 57 | B[IN(i, j, 20)] = (B[IN(i, j - 1, 20)] + B[IN(i - 1, j, 20)]) * 0.5; 58 | } 59 | } 60 | end = clock(); 61 | 62 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 63 | 64 | start = clock(); 65 | for (i = 4; i <= 20; i++) 66 | { 67 | #pragma omp parallel for 68 | for (j = max(2, i - 10); j <= min(i / 2, 10); j++) 69 | { 70 | B3[IN(j, i - j, 20)] = (B3[IN(j, i - j - 1, 20)] + B3[IN(j - 1, i - j, 20)]) * 0.5; 71 | } 72 | } 73 | end = clock(); 74 | printf("openmp diagonal parallel loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 75 | return check_ans(B, B3, n * n); 76 | } 77 | 78 | int loop2() 79 | { 80 | int i; 81 | int A[20]; 82 | int B[20]; 83 | int A2[20]; 84 | random_init(A, 20); 85 | copy_array(A2, A, 20); 86 | 87 | omp_set_num_threads(4); 88 | 89 | clock_t start, end; 90 | 91 | start = clock(); 92 | 93 | for (int i = 1; i <= 16; i++) 94 | { 95 | A[i + 3] = A[i] + B[i]; 96 | } 97 | end = clock(); 98 | 99 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 100 | 101 | start = clock(); 102 | 103 | for (int k = 1; k <= 16; k += 3) 104 | { 105 | #pragma omp parallel for 106 | for (int i = k; i <= min(16, k + 2); i++) 107 | { 108 | A2[i + 3] = A2[i] + B[i]; 109 | } 110 | } 111 | end = clock(); 112 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 113 | return check_ans(A2, A, 20); 114 | } 115 | 116 | int loop3() 117 | { 118 | int i, j, k; 119 | int A[20]; 120 | int B[20]; 121 | int A2[20]; 122 | random_init(A, 20); 123 | copy_array(A2, A, 20); 124 | 125 | omp_set_num_threads(4); 126 | 127 | clock_t start, end; 128 | 129 | start = clock(); 130 | for (k = 1; k <= 16; k += 5) 131 | { 132 | for (i = k; i <= min(16, k + 4); i++) 133 | { 134 | A[i + 3] = A[i] + B[i]; 135 | } 136 | } 137 | end = clock(); 138 | 139 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 140 | 141 | start = clock(); 142 | 143 | for (int k = 1; k <= 16; k += 3) 144 | { 145 | #pragma omp parallel for 146 | for (int i = k; i <= min(16, k + 2); i++) 147 | { 148 | A2[i + 3] = A2[i] + B[i]; 149 | } 150 | } 151 | end = clock(); 152 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 153 | return check_ans(A2, A, 20); 154 | } 155 | 156 | int main() 157 | { 158 | if (loop1()) 159 | printf("loop1 done!\n"); 160 | else 161 | printf("loop1 error!\n"); 162 | if (loop2()) 163 | printf("loop2 done!\n"); 164 | else 165 | printf("loop2 error!\n"); 166 | if (loop3())printf("loop3 done\n"); 167 | } 168 | -------------------------------------------------------------------------------- /omp/4_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | #define min(i, j) (((i) < (j)) ? (i) : (j)) 7 | #define max(i, j) (((i) > (j)) ? (i) : (j)) 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | int check_ans(int *a, int *b, int num) 18 | { 19 | for (int i = 0; i < num; i++) 20 | { 21 | if (a[i] != b[i]) 22 | { 23 | printf("%d\n", i); 24 | return 0; 25 | } 26 | } 27 | 28 | return 1; 29 | } 30 | 31 | void copy_array(int *dst, int *src, int num) 32 | { 33 | for (int i = 0; i < num; i++) 34 | { 35 | dst[i] = src[i]; 36 | } 37 | } 38 | 39 | int loop1() 40 | { 41 | int i, j, k; 42 | int n = 20; 43 | int B[n * n]; 44 | int B2[n * n]; 45 | int B3[n * n]; 46 | 47 | omp_set_num_threads(4); 48 | random_init(B2, n * n); 49 | copy_array(B, B2, n * n); 50 | copy_array(B3, B2, n * n); 51 | clock_t start, end; 52 | start = clock(); 53 | for (i = 2; i <= 10; i++) 54 | { 55 | for (j = i; j <= 10; j++) 56 | { 57 | B[IN(i, j, 20)] = (B[IN(i, j - 1, 20)] + B[IN(i - 1, j, 20)]) * 0.5; 58 | } 59 | } 60 | end = clock(); 61 | 62 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 63 | 64 | start = clock(); 65 | for (i = 4; i <= 20; i++) 66 | { 67 | #pragma omp parallel for 68 | for (j = max(2, i - 10); j <= min(i / 2, 10); j++) 69 | { 70 | B3[IN(j, i - j, 20)] = (B3[IN(j, i - j - 1, 20)] + B3[IN(j - 1, i - j, 20)]) * 0.5; 71 | } 72 | } 73 | end = clock(); 74 | printf("openmp diagonal parallel loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 75 | return check_ans(B, B3, n * n); 76 | } 77 | 78 | int loop2() 79 | { 80 | int i; 81 | int A[20]; 82 | int B[20]; 83 | int A2[20]; 84 | random_init(A, 20); 85 | copy_array(A2, A, 20); 86 | 87 | omp_set_num_threads(4); 88 | 89 | clock_t start, end; 90 | 91 | start = clock(); 92 | 93 | for (int i = 1; i <= 16; i++) 94 | { 95 | A[i + 3] = A[i] + B[i]; 96 | } 97 | end = clock(); 98 | 99 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 100 | 101 | start = clock(); 102 | 103 | for (int k = 1; k <= 16; k += 3) 104 | { 105 | #pragma omp parallel for 106 | for (int i = k; i <= min(16, k + 2); i++) 107 | { 108 | A2[i + 3] = A2[i] + B[i]; 109 | } 110 | } 111 | end = clock(); 112 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 113 | return check_ans(A2, A, 20); 114 | } 115 | 116 | int loop3() 117 | { 118 | int i, j, k; 119 | int A[20]; 120 | int B[20]; 121 | int A2[20]; 122 | random_init(A, 20); 123 | copy_array(A2, A, 20); 124 | 125 | omp_set_num_threads(4); 126 | 127 | clock_t start, end; 128 | 129 | start = clock(); 130 | for (k = 1; k <= 16; k += 5) 131 | { 132 | for (i = k; i <= min(16, k + 4); i++) 133 | { 134 | A[i + 3] = A[i] + B[i]; 135 | } 136 | } 137 | end = clock(); 138 | 139 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 140 | 141 | start = clock(); 142 | 143 | for (int k = 1; k <= 16; k += 3) 144 | { 145 | #pragma omp parallel for 146 | for (int i = k; i <= min(16, k + 2); i++) 147 | { 148 | A2[i + 3] = A2[i] + B[i]; 149 | } 150 | } 151 | end = clock(); 152 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 153 | return check_ans(A2, A, 20); 154 | } 155 | 156 | int main() 157 | { 158 | if (loop1()) 159 | printf("loop1 done!\n"); 160 | else 161 | printf("loop1 error!\n"); 162 | if (loop2()) 163 | printf("loop2 done!\n"); 164 | else 165 | printf("loop2 error!\n"); 166 | if (loop3())printf("loop3 done\n"); 167 | } 168 | -------------------------------------------------------------------------------- /omp/1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | void random_init(int *a, int num) 7 | { 8 | srand(time(NULL)); 9 | for (int i = 0; i < num; i++) 10 | { 11 | a[i] = rand() % 1000 - 500; 12 | } 13 | } 14 | 15 | int check_ans(int *a, int *b, int num) 16 | { 17 | for (int i = 0; i < num; i++) 18 | { 19 | if (a[i] != b[i]){ 20 | printf("%d\n",i); 21 | return 0; 22 | }} 23 | return 1; 24 | } 25 | 26 | void copy_array(int *dst, int *src, int num) 27 | { 28 | for (int i = 0; i < num; i++) 29 | { 30 | dst[i] = src[i]; 31 | } 32 | } 33 | 34 | int loop1() 35 | { 36 | int A[256]; 37 | int B[256]; // 可初始化相同的值然后检查两个数组是否相同。 38 | omp_set_num_threads(4); 39 | random_init(A, 256); 40 | copy_array(B, A, 256); 41 | clock_t start = clock(); 42 | for (int i = 2; i <= 10; i++) 43 | { 44 | for (int j = 2; j <= 10; j++) 45 | { 46 | A[IN(i, j, 16)] = 0.5 * (A[IN(i - 1, j - 1, 16)] + A[IN(i + 1, j + 1, 16)]); 47 | } 48 | } 49 | clock_t end = clock(); 50 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 51 | 52 | start = clock(); 53 | for (int i = 2; i <= 10; i++) 54 | { 55 | #pragma omp parallel for 56 | for (int j = 2; j <= 10; j++) 57 | { 58 | B[IN(i, j, 16)] = 0.5 * (B[IN(i - 1, j - 1, 16)] + B[IN(i + 1, j + 1, 16)]); 59 | } 60 | } 61 | end = clock(); 62 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 63 | return check_ans(A, B, 256); 64 | } 65 | 66 | int loop2() 67 | { 68 | 69 | int A[100]; 70 | int B[100]; 71 | int A2[100]; 72 | int B2[100]; 73 | random_init(A, 100); 74 | random_init(B, 100); 75 | copy_array(A2, A, 100); 76 | copy_array(B2, B, 100); 77 | 78 | int i; 79 | clock_t start = clock(); 80 | for (i = 2; i <= 20; i++) 81 | { 82 | A[2 * i + 2] = A[2 * i - 2] + B[i]; 83 | } 84 | clock_t end = clock(); 85 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 86 | 87 | start = clock(); 88 | 89 | #pragma omp parallel num_threads(2) private(i) 90 | { 91 | int tid = omp_get_thread_num(); 92 | for (i = 2 + tid; i <= 20; i += 2) 93 | { 94 | A2[2 * i + 2] = A2[2 * i - 2] + B2[i]; 95 | } 96 | } 97 | 98 | end = clock(); 99 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 100 | return check_ans(A, A2, 100); 101 | } 102 | 103 | int loop3() 104 | { 105 | int A[100]; 106 | int B[100]; 107 | int C[100]; 108 | int D[100]; 109 | int B2[100]; 110 | int C2[100]; 111 | random_init(A, 100); 112 | random_init(B, 100); 113 | random_init(C, 100); 114 | copy_array(B2, B, 100); 115 | copy_array(C2, C, 100); 116 | int k; 117 | clock_t start = clock(); 118 | for (int i = 2; i < 20; i++) 119 | { 120 | if (A[i] > 0) 121 | B[i] = C[i - 1] + 1; 122 | else 123 | C[i] = B[i] - 1; 124 | } 125 | clock_t end = clock(); 126 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 127 | 128 | start = clock(); 129 | D[0]=2; 130 | int m = 1; 131 | for (int i = 2; i < 20; i++) 132 | { 133 | if ((A[i - 1] < 0) && (A[i] > 0)) 134 | { 135 | D[m] = i; 136 | m++; 137 | } 138 | } 139 | D[m] = 20; 140 | for (int i = 0; i < m; i++) 141 | { 142 | #pragma omp parallel for 143 | for (k = D[i]; k < D[i + 1]; k++) 144 | { 145 | if (A[k] > 0) 146 | { 147 | B2[k] = C2[k - 1] + 1; 148 | } 149 | else 150 | { 151 | C2[k] = B2[k] - 1; 152 | } 153 | } 154 | } 155 | end = clock(); 156 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 157 | return check_ans(C, C2, 100) && check_ans(B, B2, 100); 158 | } 159 | 160 | int main() 161 | { 162 | omp_set_num_threads(4); 163 | if (loop1()) printf("loop1 done!\n"); 164 | if (loop2()) printf("loop2 done!\n"); 165 | if (loop3()) printf("loop3 done!\n"); 166 | } 167 | -------------------------------------------------------------------------------- /omp/1_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | void random_init(int *a, int num) 7 | { 8 | srand(time(NULL)); 9 | for (int i = 0; i < num; i++) 10 | { 11 | a[i] = rand() % 1000 - 500; 12 | } 13 | } 14 | 15 | int check_ans(int *a, int *b, int num) 16 | { 17 | for (int i = 0; i < num; i++) 18 | { 19 | if (a[i] != b[i]){ 20 | printf("%d\n",i); 21 | return 0; 22 | }} 23 | return 1; 24 | } 25 | 26 | void copy_array(int *dst, int *src, int num) 27 | { 28 | for (int i = 0; i < num; i++) 29 | { 30 | dst[i] = src[i]; 31 | } 32 | } 33 | 34 | int loop1() 35 | { 36 | int A[256]; 37 | int B[256]; // 可初始化相同的值然后检查两个数组是否相同。 38 | omp_set_num_threads(4); 39 | random_init(A, 256); 40 | copy_array(B, A, 256); 41 | clock_t start = clock(); 42 | for (int i = 2; i <= 10; i++) 43 | { 44 | for (int j = 2; j <= 10; j++) 45 | { 46 | A[IN(i, j, 16)] = 0.5 * (A[IN(i - 1, j - 1, 16)] + A[IN(i + 1, j + 1, 16)]); 47 | } 48 | } 49 | clock_t end = clock(); 50 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 51 | 52 | start = clock(); 53 | for (int i = 2; i <= 10; i++) 54 | { 55 | #pragma omp parallel for 56 | for (int j = 2; j <= 10; j++) 57 | { 58 | B[IN(i, j, 16)] = 0.5 * (B[IN(i - 1, j - 1, 16)] + B[IN(i + 1, j + 1, 16)]); 59 | } 60 | } 61 | end = clock(); 62 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 63 | return check_ans(A, B, 256); 64 | } 65 | 66 | int loop2() 67 | { 68 | 69 | int A[100]; 70 | int B[100]; 71 | int A2[100]; 72 | int B2[100]; 73 | random_init(A, 100); 74 | random_init(B, 100); 75 | copy_array(A2, A, 100); 76 | copy_array(B2, B, 100); 77 | 78 | int i; 79 | clock_t start = clock(); 80 | for (i = 2; i <= 20; i++) 81 | { 82 | A[2 * i + 2] = A[2 * i - 2] + B[i]; 83 | } 84 | clock_t end = clock(); 85 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 86 | 87 | start = clock(); 88 | 89 | #pragma omp parallel num_threads(2) private(i) 90 | { 91 | int tid = omp_get_thread_num(); 92 | for (i = 2 + tid; i <= 20; i += 2) 93 | { 94 | A2[2 * i + 2] = A2[2 * i - 2] + B2[i]; 95 | } 96 | } 97 | 98 | end = clock(); 99 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 100 | return check_ans(A, A2, 100); 101 | } 102 | 103 | int loop3() 104 | { 105 | int A[100]; 106 | int B[100]; 107 | int C[100]; 108 | int D[100]; 109 | int B2[100]; 110 | int C2[100]; 111 | random_init(A, 100); 112 | random_init(B, 100); 113 | random_init(C, 100); 114 | copy_array(B2, B, 100); 115 | copy_array(C2, C, 100); 116 | int k; 117 | clock_t start = clock(); 118 | for (int i = 2; i < 20; i++) 119 | { 120 | if (A[i] > 0) 121 | B[i] = C[i - 1] + 1; 122 | else 123 | C[i] = B[i] - 1; 124 | } 125 | clock_t end = clock(); 126 | printf("normal loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 127 | 128 | start = clock(); 129 | D[0]=2; 130 | int m = 1; 131 | for (int i = 2; i < 20; i++) 132 | { 133 | if ((A[i - 1] < 0) && (A[i] > 0)) 134 | { 135 | D[m] = i; 136 | m++; 137 | } 138 | } 139 | D[m] = 20; 140 | for (int i = 0; i < m; i++) 141 | { 142 | #pragma omp parallel for 143 | for (k = D[i]; k < D[i + 1]; k++) 144 | { 145 | if (A[k] > 0) 146 | { 147 | B2[k] = C2[k - 1] + 1; 148 | } 149 | else 150 | { 151 | C2[k] = B2[k] - 1; 152 | } 153 | } 154 | } 155 | end = clock(); 156 | printf("openmp loop costs: %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 157 | return check_ans(C, C2, 100) && check_ans(B, B2, 100); 158 | } 159 | 160 | int main() 161 | { 162 | omp_set_num_threads(4); 163 | if (loop1()) printf("loop1 done!\n"); 164 | if (loop2()) printf("loop2 done!\n"); 165 | if (loop3()) printf("loop3 done!\n"); 166 | } 167 | -------------------------------------------------------------------------------- /omp/3.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define IN(i, j, line) ((i)*line + j) 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | void random_init_f(float *a, int num) 18 | { 19 | srand(time(NULL)); 20 | for (int i = 0; i < num; i++) 21 | { 22 | a[i] = (float)(rand() % 1000 - 500); 23 | } 24 | } 25 | 26 | int check_ans(int *a, int *b, int num) 27 | { 28 | for (int i = 0; i < num; i++) 29 | { 30 | if (a[i] != b[i]) 31 | { 32 | printf("%d\n", i); 33 | return 0; 34 | } 35 | } 36 | 37 | return 1; 38 | } 39 | 40 | int check_ans_f(float *a, float *b, int num) 41 | { 42 | for (int i = 0; i < num; i++) 43 | { 44 | if (fabs(a[i] - b[i]) > 0.000001) 45 | { 46 | printf("%d\n", i); 47 | printf("%f %f \n", a[i], b[i]); 48 | return 0; 49 | } 50 | } 51 | 52 | return 1; 53 | } 54 | 55 | void copy_array(int *dst, int *src, int num) 56 | { 57 | for (int i = 0; i < num; i++) 58 | { 59 | dst[i] = src[i]; 60 | } 61 | } 62 | 63 | void copy_array_f(float *dst, float *src, int num) 64 | { 65 | for (int i = 0; i < num; i++) 66 | { 67 | dst[i] = src[i]; 68 | } 69 | } 70 | 71 | int loop1()//此循环不能并行化,见第一次作业,会输出error 72 | { 73 | int i, j, k; 74 | int n = 500; 75 | int A[n * n]; 76 | int B[n * n]; 77 | int A2[n * n]; 78 | int B2[n * n]; 79 | clock_t start, end; 80 | random_init(A, n * n); 81 | random_init(B, n * n); 82 | copy_array(A2, A, n * n); 83 | copy_array(B2, B, n * n); 84 | 85 | omp_set_num_threads(4); 86 | 87 | start = clock(); 88 | for (i = 1; i <= 100; i++) 89 | { 90 | for (j = 1; j <= 50; j++) 91 | { 92 | A[IN(3 * i + 2, 2 * j - 1, 500)] = A[IN(5 * j, i + 3, 500)] + 2; 93 | } 94 | } 95 | end = clock(); 96 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 97 | 98 | start = clock(); 99 | for (i = 1; i <= 100; i++) 100 | { 101 | #pragma omp parallel for 102 | for (j = 1; j <= 50; j++) 103 | { 104 | A2[IN(3 * i + 2, 2 * j - 1, 500)] = A2[IN(5 * j, i + 3, 500)] + 2; 105 | } 106 | } 107 | 108 | end = clock(); 109 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 110 | 111 | return check_ans(A2, A, n * n); 112 | } 113 | 114 | int loop2() 115 | { 116 | int i, j, k; 117 | float x, y, z; 118 | x =5; 119 | y = 10; 120 | z = 20; 121 | float z2 = z; 122 | clock_t start, end; 123 | 124 | float A[200], B[200], C[200], D[10000]; 125 | random_init_f(A, 200); 126 | random_init_f(B, 200); 127 | random_init_f(C, 200); 128 | random_init_f(D, 10000); 129 | 130 | float A2[200], B2[200], C2[200], D2[10000]; 131 | copy_array_f(A2, A, 200); 132 | copy_array_f(B2, B, 200); 133 | copy_array_f(C2, C, 200); 134 | copy_array_f(D2, D, 10000); 135 | 136 | // copy_array(W, A, 100); 137 | omp_set_num_threads(4); 138 | start = clock(); 139 | x = y * 2; 140 | for (i = 1; i <= 100; i++) 141 | { 142 | C[i] = B[i] + x; 143 | A[i] = C[i - 1] + z; 144 | C[i + 1] = A[i] * B[i]; 145 | for (j = 1; j <= 50; j++) 146 | { 147 | D[IN(i, j, 60)] = D[IN(i, j - 1, 60)] + x; 148 | } 149 | } 150 | z = y + 4; 151 | end = clock(); 152 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 153 | 154 | start = clock(); 155 | x = y * 2; 156 | #pragma omp parallel for 157 | for (i = 1; i <= 100; i++) 158 | { 159 | C2[i] = B2[i] + x; 160 | } 161 | #pragma omp parallel for private(i, j) 162 | for (i = 1; i <= 100; i++) 163 | { 164 | A2[i] = C2[i - 1] + z2; 165 | for (j = 1; j <= 50; j++) 166 | { 167 | D2[IN(i, j, 60)] = D2[IN(i, j - 1, 60)] + x; 168 | } 169 | } 170 | C2[101] = A2[100] * B2[100]; 171 | z2 = y + 4; 172 | end = clock(); 173 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 174 | return check_ans_f(D2, D, 200)&&check_ans_f(C2, C, 200); 175 | } 176 | 177 | int main() 178 | { 179 | if (loop1()) 180 | printf("loop1 done\n"); 181 | else 182 | printf("loop1 error\n"); 183 | 184 | if(loop2()) printf("loop2 done\n"); 185 | } 186 | -------------------------------------------------------------------------------- /omp/3_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define IN(i, j, line) ((i)*line + j) 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | void random_init_f(float *a, int num) 18 | { 19 | srand(time(NULL)); 20 | for (int i = 0; i < num; i++) 21 | { 22 | a[i] = (float)(rand() % 1000 - 500); 23 | } 24 | } 25 | 26 | int check_ans(int *a, int *b, int num) 27 | { 28 | for (int i = 0; i < num; i++) 29 | { 30 | if (a[i] != b[i]) 31 | { 32 | printf("%d\n", i); 33 | return 0; 34 | } 35 | } 36 | 37 | return 1; 38 | } 39 | 40 | int check_ans_f(float *a, float *b, int num) 41 | { 42 | for (int i = 0; i < num; i++) 43 | { 44 | if (fabs(a[i] - b[i]) > 0.000001) 45 | { 46 | printf("%d\n", i); 47 | printf("%f %f \n", a[i], b[i]); 48 | return 0; 49 | } 50 | } 51 | 52 | return 1; 53 | } 54 | 55 | void copy_array(int *dst, int *src, int num) 56 | { 57 | for (int i = 0; i < num; i++) 58 | { 59 | dst[i] = src[i]; 60 | } 61 | } 62 | 63 | void copy_array_f(float *dst, float *src, int num) 64 | { 65 | for (int i = 0; i < num; i++) 66 | { 67 | dst[i] = src[i]; 68 | } 69 | } 70 | 71 | int loop1()//此循环不能并行化,见第一次作业,会输出error 72 | { 73 | int i, j, k; 74 | int n = 500; 75 | int A[n * n]; 76 | int B[n * n]; 77 | int A2[n * n]; 78 | int B2[n * n]; 79 | clock_t start, end; 80 | random_init(A, n * n); 81 | random_init(B, n * n); 82 | copy_array(A2, A, n * n); 83 | copy_array(B2, B, n * n); 84 | 85 | omp_set_num_threads(4); 86 | 87 | start = clock(); 88 | for (i = 1; i <= 100; i++) 89 | { 90 | for (j = 1; j <= 50; j++) 91 | { 92 | A[IN(3 * i + 2, 2 * j - 1, 500)] = A[IN(5 * j, i + 3, 500)] + 2; 93 | } 94 | } 95 | end = clock(); 96 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 97 | 98 | start = clock(); 99 | for (i = 1; i <= 100; i++) 100 | { 101 | #pragma omp parallel for 102 | for (j = 1; j <= 50; j++) 103 | { 104 | A2[IN(3 * i + 2, 2 * j - 1, 500)] = A2[IN(5 * j, i + 3, 500)] + 2; 105 | } 106 | } 107 | 108 | end = clock(); 109 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 110 | 111 | return check_ans(A2, A, n * n); 112 | } 113 | 114 | int loop2() 115 | { 116 | int i, j, k; 117 | float x, y, z; 118 | x =5; 119 | y = 10; 120 | z = 20; 121 | float z2 = z; 122 | clock_t start, end; 123 | 124 | float A[200], B[200], C[200], D[10000]; 125 | random_init_f(A, 200); 126 | random_init_f(B, 200); 127 | random_init_f(C, 200); 128 | random_init_f(D, 10000); 129 | 130 | float A2[200], B2[200], C2[200], D2[10000]; 131 | copy_array_f(A2, A, 200); 132 | copy_array_f(B2, B, 200); 133 | copy_array_f(C2, C, 200); 134 | copy_array_f(D2, D, 10000); 135 | 136 | // copy_array(W, A, 100); 137 | omp_set_num_threads(4); 138 | start = clock(); 139 | x = y * 2; 140 | for (i = 1; i <= 100; i++) 141 | { 142 | C[i] = B[i] + x; 143 | A[i] = C[i - 1] + z; 144 | C[i + 1] = A[i] * B[i]; 145 | for (j = 1; j <= 50; j++) 146 | { 147 | D[IN(i, j, 60)] = D[IN(i, j - 1, 60)] + x; 148 | } 149 | } 150 | z = y + 4; 151 | end = clock(); 152 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 153 | 154 | start = clock(); 155 | x = y * 2; 156 | #pragma omp parallel for 157 | for (i = 1; i <= 100; i++) 158 | { 159 | C2[i] = B2[i] + x; 160 | } 161 | #pragma omp parallel for private(i, j) 162 | for (i = 1; i <= 100; i++) 163 | { 164 | A2[i] = C2[i - 1] + z2; 165 | for (j = 1; j <= 50; j++) 166 | { 167 | D2[IN(i, j, 60)] = D2[IN(i, j - 1, 60)] + x; 168 | } 169 | } 170 | C2[101] = A2[100] * B2[100]; 171 | z2 = y + 4; 172 | end = clock(); 173 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 174 | return check_ans_f(D2, D, 200)&&check_ans_f(C2, C, 200); 175 | } 176 | 177 | int main() 178 | { 179 | if (loop1()) 180 | printf("loop1 done\n"); 181 | else 182 | printf("loop1 error\n"); 183 | 184 | if(loop2()) printf("loop2 done\n"); 185 | } 186 | -------------------------------------------------------------------------------- /Kmeans/serial.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // Creates an array of random floats. Each number has a value from 0 - 1 6 | float *create_rand_nums(const int num_elements) 7 | { 8 | float *rand_nums = (float *)malloc(sizeof(float) * num_elements); 9 | assert(rand_nums != NULL); 10 | for (int i = 0; i < num_elements; i++) 11 | { 12 | rand_nums[i] = (rand() / (float)RAND_MAX); 13 | } 14 | return rand_nums; 15 | } 16 | 17 | // Distance**2 between d-vectors pointed to by v1, v2. 18 | float distance2(const float *v1, const float *v2, const int d) 19 | { 20 | float dist = 0.0; 21 | for (int i = 0; i < d; i++) 22 | { 23 | float diff = v1[i] - v2[i]; 24 | dist += diff * diff; 25 | } 26 | return dist; 27 | } 28 | 29 | // Assign a site to the correct cluster by computing its distances to 30 | // each cluster centroid. 31 | int assign_site(const float *site, float *centroids, 32 | const int k, const int d) 33 | { 34 | int best_cluster = 0; 35 | float best_dist = distance2(site, centroids, d); 36 | float *centroid = centroids + d; 37 | for (int c = 1; c < k; c++, centroid += d) 38 | { 39 | float dist = distance2(site, centroid, d); 40 | if (dist < best_dist) 41 | { 42 | best_cluster = c; 43 | best_dist = dist; 44 | } 45 | } 46 | return best_cluster; 47 | } 48 | 49 | // Add a site (vector) into a sum of sites (vector). 50 | void add_site(const float *site, float *sum, const int d) 51 | { 52 | for (int i = 0; i < d; i++) 53 | { 54 | sum[i] += site[i]; 55 | } 56 | } 57 | 58 | // Print the centroids one per line. 59 | void print_centroids(float *centroids, const int k, const int d) 60 | { 61 | float *p = centroids; 62 | printf("Centroids:\n"); 63 | for (int i = 0; i < k; i++) 64 | { 65 | for (int j = 0; j < d; j++, p++) 66 | { 67 | printf("%f ", *p); 68 | } 69 | printf("\n"); 70 | } 71 | } 72 | 73 | int main(int argc, char **argv) 74 | { 75 | int sites_per_proc = atoi(argv[1]); 76 | int nprocs = 8; 77 | int k = atoi(argv[2]); // number of clusters. 78 | int d = atoi(argv[3]); // dimension of data. 79 | srand(31359); 80 | float *sums; 81 | assert(sums = malloc(k * d * sizeof(float))); 82 | // The number of sites assigned to each cluster by this process. k integers. 83 | int *counts; 84 | assert(counts = malloc(k * sizeof(int))); 85 | // The current centroids against which sites are being compared. 86 | // These are shipped to the process by the root process. 87 | float *centroids; 88 | assert(centroids = malloc(k * d * sizeof(float))); 89 | 90 | float *all_sites = NULL; 91 | int *labels; 92 | 93 | all_sites = create_rand_nums(d * sites_per_proc * nprocs); 94 | // Take the first k sites as the initial cluster centroids. 95 | for (int i = 0; i < k * d; i++) 96 | { 97 | centroids[i] = all_sites[i]; 98 | } 99 | print_centroids(centroids, k, d); 100 | assert(labels = malloc(nprocs * sites_per_proc * sizeof(int))); 101 | 102 | float norm = 1.0; 103 | 104 | while (norm > 0.00001) 105 | { // While they've moved... 106 | for (int i = 0; i < k * d; i++) 107 | sums[i] = 0.0; 108 | for (int i = 0; i < k; i++) 109 | counts[i] = 0; 110 | 111 | // Find the closest centroid to each site and assign to cluster. 112 | float *site = all_sites; 113 | for (int i = 0; i < sites_per_proc * nprocs; i++, site += d) 114 | { 115 | int cluster = assign_site(site, centroids, k, d); 116 | // Record the assignment of the site to the cluster. 117 | counts[cluster]++; 118 | add_site(site, &sums[cluster * d], d); 119 | } 120 | 121 | for (int i = 0; i < k; i++) 122 | { 123 | for (int j = 0; j < d; j++) 124 | { 125 | int dij = d * i + j; 126 | sums[dij] /= counts[i]; 127 | } 128 | } 129 | // Have the centroids changed much? 130 | norm = distance2(sums, centroids, d * k); 131 | printf("norm: %f\n", norm); 132 | // Copy new centroids from grand_sums into centroids. 133 | for (int i = 0; i < k * d; i++) 134 | { 135 | centroids[i] = sums[i]; 136 | } 137 | print_centroids(centroids, k, d); 138 | } 139 | 140 | // Now centroids are fixed, so compute a final label for each site. 141 | float *site = all_sites; 142 | for (int i = 0; i < sites_per_proc * nprocs; i++, site += d) 143 | { 144 | labels[i] = assign_site(site, centroids, k, d); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /Kmeans/Kmeans_mpi.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | float *create_rand_nums(const int num_elements) 7 | { 8 | float *rand_nums = (float *)malloc(sizeof(float) * num_elements); 9 | assert(rand_nums != NULL); 10 | for (int i = 0; i < num_elements; i++) 11 | { 12 | rand_nums[i] = (rand() / (float)RAND_MAX); 13 | } 14 | return rand_nums; 15 | } 16 | 17 | float distance2(const float *v1, const float *v2, const int d) 18 | { 19 | float dist = 0.0; 20 | for (int i = 0; i < d; i++) 21 | { 22 | float diff = v1[i] - v2[i]; 23 | dist += diff * diff; 24 | } 25 | return dist; 26 | } 27 | 28 | int assign_site(const float *site, float *centroids, 29 | const int k, const int d) 30 | { 31 | int best_cluster = 0; 32 | float best_dist = distance2(site, centroids, d); 33 | float *centroid = centroids + d; 34 | for (int c = 1; c < k; c++, centroid += d) 35 | { 36 | float dist = distance2(site, centroid, d); 37 | if (dist < best_dist) 38 | { 39 | best_cluster = c; 40 | best_dist = dist; 41 | } 42 | } 43 | return best_cluster; 44 | } 45 | 46 | void add_site(const float *site, float *sum, const int d) 47 | { 48 | for (int i = 0; i < d; i++) 49 | { 50 | sum[i] += site[i]; 51 | } 52 | } 53 | 54 | void print_centroids(float *centroids, const int k, const int d) 55 | { 56 | float *p = centroids; 57 | printf("Centroids:\n"); 58 | for (int i = 0; i < k; i++) 59 | { 60 | for (int j = 0; j < d; j++, p++) 61 | { 62 | printf("%f ", *p); 63 | } 64 | printf("\n"); 65 | } 66 | } 67 | 68 | int main(int argc, char **argv) 69 | { 70 | if (argc != 4) 71 | { 72 | fprintf(stderr, 73 | "Usage: kmeans num_sites_per_proc num_means num_dimensions\n"); 74 | exit(1); 75 | } 76 | 77 | int sites_per_proc = atoi(argv[1]); 78 | int k = atoi(argv[2]); // number of clusters. 79 | int d = atoi(argv[3]); // dimension of data. 80 | srand(31359); 81 | 82 | MPI_Init(NULL, NULL); 83 | int rank, nprocs; 84 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 85 | MPI_Comm_size(MPI_COMM_WORLD, &nprocs); 86 | 87 | float *sites; 88 | assert(sites = malloc(sites_per_proc * d * sizeof(float))); 89 | float *sums; 90 | assert(sums = malloc(k * d * sizeof(float))); 91 | int *counts; 92 | assert(counts = malloc(k * sizeof(int))); 93 | float *centroids; 94 | assert(centroids = malloc(k * d * sizeof(float))); 95 | // The cluster assignments for each site. 96 | int *labels; 97 | assert(labels = malloc(sites_per_proc * sizeof(int))); 98 | 99 | float *all_sites = NULL; 100 | float *grand_sums = NULL; 101 | int *grand_counts = NULL; 102 | int *all_labels; 103 | if (rank == 0) 104 | { 105 | all_sites = create_rand_nums(d * sites_per_proc * nprocs); 106 | for (int i = 0; i < k * d; i++) 107 | { 108 | centroids[i] = all_sites[i]; 109 | } 110 | print_centroids(centroids, k, d); 111 | assert(grand_sums = malloc(k * d * sizeof(float))); 112 | assert(grand_counts = malloc(k * sizeof(int))); 113 | assert(all_labels = malloc(nprocs * sites_per_proc * sizeof(int))); 114 | } 115 | 116 | MPI_Scatter(all_sites, d * sites_per_proc, MPI_FLOAT, sites, 117 | d * sites_per_proc, MPI_FLOAT, 0, MPI_COMM_WORLD); 118 | 119 | float norm = 1.0; 120 | 121 | while (norm > 0.00001) 122 | { 123 | 124 | MPI_Bcast(centroids, k * d, MPI_FLOAT, 0, MPI_COMM_WORLD); 125 | for (int i = 0; i < k * d; i++) 126 | sums[i] = 0.0; 127 | for (int i = 0; i < k; i++) 128 | counts[i] = 0; 129 | float *site = sites; 130 | for (int i = 0; i < sites_per_proc; i++, site += d) 131 | { 132 | int cluster = assign_site(site, centroids, k, d); 133 | counts[cluster]++; 134 | add_site(site, &sums[cluster * d], d); 135 | } 136 | 137 | MPI_Reduce(sums, grand_sums, k * d, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); 138 | MPI_Reduce(counts, grand_counts, k, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); 139 | 140 | if (rank == 0) 141 | { 142 | for (int i = 0; i < k; i++) 143 | { 144 | for (int j = 0; j < d; j++) 145 | { 146 | int dij = d * i + j; 147 | grand_sums[dij] /= grand_counts[i]; 148 | } 149 | } 150 | norm = distance2(grand_sums, centroids, d * k); 151 | printf("norm: %f\n", norm); 152 | for (int i = 0; i < k * d; i++) 153 | { 154 | centroids[i] = grand_sums[i]; 155 | } 156 | print_centroids(centroids, k, d); 157 | } 158 | MPI_Bcast(&norm, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); 159 | } 160 | 161 | float *site = sites; 162 | for (int i = 0; i < sites_per_proc; i++, site += d) 163 | { 164 | labels[i] = assign_site(site, centroids, k, d); 165 | } 166 | 167 | // Gather all labels into root process. 168 | MPI_Gather(labels, sites_per_proc, MPI_INT, 169 | all_labels, sites_per_proc, MPI_INT, 0, MPI_COMM_WORLD); 170 | 171 | MPI_Finalize(); 172 | } 173 | -------------------------------------------------------------------------------- /omp/5.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | #define min(i, j) (((i) < (j)) ? (i) : (j)) 7 | #define max(i, j) (((i) > (j)) ? (i) : (j)) 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | int check_ans(int *a, int *b, int num) 18 | { 19 | for (int i = 0; i < num; i++) 20 | { 21 | if (a[i] != b[i]) 22 | { 23 | printf("%d\n", i); 24 | return 0; 25 | } 26 | } 27 | 28 | return 1; 29 | } 30 | 31 | void copy_array(int *dst, int *src, int num) 32 | { 33 | for (int i = 0; i < num; i++) 34 | { 35 | dst[i] = src[i]; 36 | } 37 | } 38 | 39 | int loop1() 40 | { 41 | int i, j, k; 42 | int A[200], B[200], C[200], D[200]; 43 | int A2[200], B2[200], C2[200], D2[200]; 44 | random_init(A, 200); 45 | random_init(B, 200); 46 | random_init(C, 200); 47 | random_init(D, 200); 48 | copy_array(A2, A, 200); 49 | copy_array(B2, B, 200); 50 | copy_array(C2, C, 200); 51 | copy_array(D2, D, 200); 52 | 53 | clock_t start, end; 54 | 55 | omp_set_num_threads(4); 56 | 57 | start = clock(); 58 | 59 | for (i = 1; i <= 100; i++) 60 | { 61 | A[i] = A[i] + B[i - 1]; 62 | B[i] = C[i - 1] * 2; 63 | C[i] = 1 + B[i]; // 除法边加法避免浮点数 64 | D[i] = C[i] * C[i]; 65 | } 66 | 67 | end = clock(); 68 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 69 | 70 | start = clock(); 71 | for (i = 1; i <= 100; i++) 72 | { 73 | B2[i] = C2[i - 1] * 2; 74 | C2[i] = 1 + B2[i]; 75 | } 76 | 77 | #pragma omp parallel for 78 | for (i = 1; i <= 100; i++) 79 | { 80 | A2[i] = A2[i] + B2[i - 1]; 81 | D2[i] = C2[i] * C2[i]; 82 | } 83 | 84 | end = clock(); 85 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 86 | return check_ans(A, A2, 200); 87 | } 88 | 89 | int loop2() 90 | { 91 | int i, j, k; 92 | int A[1001], B[1001], C[1001], D[1001]; 93 | int A2[1001], B2[1001], C2[1001], D2[1001]; 94 | random_init(A, 1001); 95 | random_init(B, 1001); 96 | random_init(C, 1001); 97 | random_init(D, 1001); 98 | copy_array(A2, A, 1001); 99 | copy_array(B2, B, 1001); 100 | copy_array(C2, C, 1001); 101 | copy_array(D2, D, 1001); 102 | 103 | clock_t start, end; 104 | 105 | 106 | start = clock(); 107 | 108 | for (i = 1; i <= 1000; i++) 109 | { 110 | A[i] = B[i] + C[i]; 111 | D[i] = (A[i] + A[999 - i + 1]) / 2; 112 | } 113 | 114 | end = clock(); 115 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 116 | 117 | start = clock(); 118 | #pragma omp parallel for 119 | for (i = 1; i <= 500; i++) 120 | { 121 | A2[i] = B2[i] + C2[i]; 122 | D2[i] = (A2[i] + A2[1000 - i]) / 2; 123 | } 124 | 125 | #pragma omp parallel for 126 | for (i = 501; i <= 999; i++) 127 | { 128 | A2[i] = B2[i] + C2[i]; 129 | D2[i] = (A2[i] + A2[1000 - i]) / 2; 130 | } 131 | 132 | end = clock(); 133 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 134 | return check_ans(A, A2, 1000); 135 | } 136 | 137 | int loop3() 138 | { 139 | int i, j, k; 140 | int n = 510 * 510; 141 | int B[n], D[n]; 142 | int B2[n], D2[n]; 143 | int A[510][510], A2[510][510], C[510][510], C2[510][510]; 144 | 145 | omp_set_num_threads(4); 146 | for (i = 0; i < 510; i++) 147 | for (j = 0; j < 510; j++) 148 | { 149 | A[i][j] = rand() % 1000 - 500; 150 | A2[i][j] = A[i][j]; 151 | C[i][j] = rand() % 1000 - 500; 152 | C2[i][j] = C[i][j]; 153 | } 154 | 155 | // random_init(A, n); 156 | random_init(B, n); 157 | // random_init(C, n); 158 | random_init(D, n); 159 | // copy_array(A2, A, n); 160 | copy_array(B2, B, n); 161 | // copy_array(C2, C, n); 162 | copy_array(D2, D, n); 163 | 164 | clock_t start, end; 165 | 166 | omp_set_num_threads(4); 167 | 168 | start = clock(); 169 | 170 | for (i = 1; i <= 100; i++) 171 | { 172 | for (j = 1; j <= 100; j++) 173 | { 174 | A[3 * i + 2 * j][2 * j] = C[i][j] * 2; 175 | D[IN(i, j, 510)] = A[i - j + 6][i + j]; 176 | } 177 | } 178 | 179 | end = clock(); 180 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 181 | 182 | start = clock(); 183 | 184 | #pragma omp parallel for collapse(2) 185 | for (i = 1; i <= 100; i++) 186 | { 187 | for (j = 1; j <= 100; j++) 188 | { 189 | A2[3 * i + 2 * j][2 * j] = C2[i][j] * 2; 190 | D2[IN(i, j, 510)] = A2[i - j + 6][i + j]; 191 | } 192 | } 193 | 194 | end = clock(); 195 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 196 | 197 | for (i = 0; i < 510; i++) 198 | for (j = 0; j < 510; j++) 199 | { 200 | if (A[i][j] != A2[i][j]) 201 | { 202 | printf("%d %d\n", i, j); 203 | printf("%d %d\n", A[i][j], A2[i][j]); 204 | return 0; 205 | } 206 | } 207 | return 1; 208 | } 209 | int main() 210 | { 211 | if (loop1()) 212 | printf("loop1 done!\n"); 213 | else 214 | printf("loop1 error!\n"); 215 | if (loop2()) 216 | printf("loop2 done!\n"); 217 | else 218 | printf("loop2 error!\n"); 219 | 220 | if (loop3()) 221 | printf("loop3 done!\n"); 222 | else 223 | printf("loop3 error!\n"); 224 | } 225 | -------------------------------------------------------------------------------- /omp/5_1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define IN(i, j, line) ((i)*line + j) 6 | #define min(i, j) (((i) < (j)) ? (i) : (j)) 7 | #define max(i, j) (((i) > (j)) ? (i) : (j)) 8 | void random_init(int *a, int num) 9 | { 10 | srand(time(NULL)); 11 | for (int i = 0; i < num; i++) 12 | { 13 | a[i] = rand() % 1000 - 500; 14 | } 15 | } 16 | 17 | int check_ans(int *a, int *b, int num) 18 | { 19 | for (int i = 0; i < num; i++) 20 | { 21 | if (a[i] != b[i]) 22 | { 23 | printf("%d\n", i); 24 | return 0; 25 | } 26 | } 27 | 28 | return 1; 29 | } 30 | 31 | void copy_array(int *dst, int *src, int num) 32 | { 33 | for (int i = 0; i < num; i++) 34 | { 35 | dst[i] = src[i]; 36 | } 37 | } 38 | 39 | int loop1() 40 | { 41 | int i, j, k; 42 | int A[200], B[200], C[200], D[200]; 43 | int A2[200], B2[200], C2[200], D2[200]; 44 | random_init(A, 200); 45 | random_init(B, 200); 46 | random_init(C, 200); 47 | random_init(D, 200); 48 | copy_array(A2, A, 200); 49 | copy_array(B2, B, 200); 50 | copy_array(C2, C, 200); 51 | copy_array(D2, D, 200); 52 | 53 | clock_t start, end; 54 | 55 | omp_set_num_threads(4); 56 | 57 | start = clock(); 58 | 59 | for (i = 1; i <= 100; i++) 60 | { 61 | A[i] = A[i] + B[i - 1]; 62 | B[i] = C[i - 1] * 2; 63 | C[i] = 1 + B[i]; // 除法边加法避免浮点数 64 | D[i] = C[i] * C[i]; 65 | } 66 | 67 | end = clock(); 68 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 69 | 70 | start = clock(); 71 | for (i = 1; i <= 100; i++) 72 | { 73 | B2[i] = C2[i - 1] * 2; 74 | C2[i] = 1 + B2[i]; 75 | } 76 | 77 | #pragma omp parallel for 78 | for (i = 1; i <= 100; i++) 79 | { 80 | A2[i] = A2[i] + B2[i - 1]; 81 | D2[i] = C2[i] * C2[i]; 82 | } 83 | 84 | end = clock(); 85 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 86 | return check_ans(A, A2, 200); 87 | } 88 | 89 | int loop2() 90 | { 91 | int i, j, k; 92 | int A[1001], B[1001], C[1001], D[1001]; 93 | int A2[1001], B2[1001], C2[1001], D2[1001]; 94 | random_init(A, 1001); 95 | random_init(B, 1001); 96 | random_init(C, 1001); 97 | random_init(D, 1001); 98 | copy_array(A2, A, 1001); 99 | copy_array(B2, B, 1001); 100 | copy_array(C2, C, 1001); 101 | copy_array(D2, D, 1001); 102 | 103 | clock_t start, end; 104 | 105 | 106 | start = clock(); 107 | 108 | for (i = 1; i <= 1000; i++) 109 | { 110 | A[i] = B[i] + C[i]; 111 | D[i] = (A[i] + A[999 - i + 1]) / 2; 112 | } 113 | 114 | end = clock(); 115 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 116 | 117 | start = clock(); 118 | #pragma omp parallel for 119 | for (i = 1; i <= 500; i++) 120 | { 121 | A2[i] = B2[i] + C2[i]; 122 | D2[i] = (A2[i] + A2[1000 - i]) / 2; 123 | } 124 | 125 | #pragma omp parallel for 126 | for (i = 501; i <= 999; i++) 127 | { 128 | A2[i] = B2[i] + C2[i]; 129 | D2[i] = (A2[i] + A2[1000 - i]) / 2; 130 | } 131 | 132 | end = clock(); 133 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 134 | return check_ans(A, A2, 1000); 135 | } 136 | 137 | int loop3() 138 | { 139 | int i, j, k; 140 | int n = 510 * 510; 141 | int B[n], D[n]; 142 | int B2[n], D2[n]; 143 | int A[510][510], A2[510][510], C[510][510], C2[510][510]; 144 | 145 | omp_set_num_threads(4); 146 | for (i = 0; i < 510; i++) 147 | for (j = 0; j < 510; j++) 148 | { 149 | A[i][j] = rand() % 1000 - 500; 150 | A2[i][j] = A[i][j]; 151 | C[i][j] = rand() % 1000 - 500; 152 | C2[i][j] = C[i][j]; 153 | } 154 | 155 | // random_init(A, n); 156 | random_init(B, n); 157 | // random_init(C, n); 158 | random_init(D, n); 159 | // copy_array(A2, A, n); 160 | copy_array(B2, B, n); 161 | // copy_array(C2, C, n); 162 | copy_array(D2, D, n); 163 | 164 | clock_t start, end; 165 | 166 | omp_set_num_threads(4); 167 | 168 | start = clock(); 169 | 170 | for (i = 1; i <= 100; i++) 171 | { 172 | for (j = 1; j <= 100; j++) 173 | { 174 | A[3 * i + 2 * j][2 * j] = C[i][j] * 2; 175 | D[IN(i, j, 510)] = A[i - j + 6][i + j]; 176 | } 177 | } 178 | 179 | end = clock(); 180 | printf("normal loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 181 | 182 | start = clock(); 183 | 184 | #pragma omp parallel for collapse(2) 185 | for (i = 1; i <= 100; i++) 186 | { 187 | for (j = 1; j <= 100; j++) 188 | { 189 | A2[3 * i + 2 * j][2 * j] = C2[i][j] * 2; 190 | D2[IN(i, j, 510)] = A2[i - j + 6][i + j]; 191 | } 192 | } 193 | 194 | end = clock(); 195 | printf("openmp loop costs : %Lf\n", (long double)(end - start) / CLOCKS_PER_SEC); 196 | 197 | for (i = 0; i < 510; i++) 198 | for (j = 0; j < 510; j++) 199 | { 200 | if (A[i][j] != A2[i][j]) 201 | { 202 | printf("%d %d\n", i, j); 203 | printf("%d %d\n", A[i][j], A2[i][j]); 204 | return 0; 205 | } 206 | } 207 | return 1; 208 | } 209 | int main() 210 | { 211 | if (loop1()) 212 | printf("loop1 done!\n"); 213 | else 214 | printf("loop1 error!\n"); 215 | if (loop2()) 216 | printf("loop2 done!\n"); 217 | else 218 | printf("loop2 error!\n"); 219 | 220 | if (loop3()) 221 | printf("loop3 done!\n"); 222 | else 223 | printf("loop3 error!\n"); 224 | } 225 | -------------------------------------------------------------------------------- /mpi/3.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define IDX(i, j, N) (((i) * (N)) + (j)) 9 | void gen_rand_mat(int *a, int num) 10 | { 11 | for (int i = 0; i < num; i++) 12 | { 13 | srand(clock()); 14 | for (int j = 0; j < num; j++) 15 | { 16 | a[IDX(i, j, num)] = rand() % 100; 17 | } 18 | } 19 | } 20 | 21 | void print_mat(int *a, int num, int id) 22 | { 23 | for (int i = 0; i < num; i++) 24 | { 25 | for (int j = 0; j < num; j++) 26 | { 27 | printf("|%d : %d ", id, a[IDX(i, j, num)]); 28 | } 29 | printf("\n"); 30 | } 31 | } 32 | 33 | void compute(int *A, int *B, int *C, int num) 34 | { 35 | for (int i = 0; i < num; i++) 36 | { 37 | for (int j = 0; j < num; j++) 38 | { 39 | for (int k = 0; k < num; k++) 40 | C[IDX(i, j, num)] += A[IDX(i, k, num)] * B[IDX(k, j, num)]; 41 | } 42 | } 43 | } 44 | 45 | int check(int *C, int *nC, int num) 46 | { 47 | for (int i = 0; i < num; i++) 48 | { 49 | for (int j = 0; j < num; j++) 50 | { 51 | if (C[IDX(i, j, num)] != nC[IDX(i, j, num)]) 52 | { 53 | return 0; 54 | } 55 | } 56 | } 57 | return 1; 58 | } 59 | 60 | int main(int argc, char *argv[]) 61 | { 62 | int id_procs, num_procs; 63 | int block_size, sqrt_procs; 64 | 65 | MPI_Init(&argc, &argv); 66 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); 67 | MPI_Comm_rank(MPI_COMM_WORLD, &id_procs); 68 | 69 | sqrt_procs = sqrt(num_procs); 70 | if (sqrt_procs * sqrt_procs != num_procs) 71 | { 72 | fprintf(stderr, "num of procs must be square\n"); 73 | return 1; 74 | } 75 | if (argc != 2) 76 | { 77 | fprintf(stderr, "you need to provide block size\n"); 78 | return 1; 79 | } 80 | block_size = atoi(argv[1]); 81 | int *sA, *sB, *sC; 82 | int N = block_size * sqrt_procs; 83 | if (id_procs == 0) 84 | { 85 | sA = (int *)malloc(N * N * sizeof(int)); 86 | sB = (int *)malloc(N * N * sizeof(int)); 87 | sC = (int *)malloc(N * N * sizeof(int)); 88 | 89 | memset(sC, 0, N * N * sizeof(int)); 90 | gen_rand_mat(sA, N); 91 | gen_rand_mat(sB, N); 92 | compute(sA, sB, sC, N); 93 | } 94 | int A[block_size * block_size]; 95 | int B[block_size * block_size]; 96 | int C[block_size * block_size]; 97 | int ans[block_size * block_size]; 98 | int A_in[block_size * block_size]; 99 | int B_in[block_size * block_size]; 100 | memset(C, 0, block_size * block_size * sizeof(int)); 101 | MPI_Datatype SubMat, Mat; 102 | MPI_Status status; 103 | MPI_Request request; 104 | MPI_Type_vector(block_size, block_size, N, MPI_INT, &SubMat); 105 | MPI_Type_commit(&SubMat); 106 | MPI_Type_vector(block_size, block_size, block_size, MPI_INT, &Mat); 107 | MPI_Type_commit(&Mat); 108 | if (id_procs == 0) 109 | { 110 | for (int i = 0; i < sqrt_procs; i++) 111 | { 112 | int lineoff = block_size * N * i; 113 | for (int j = 0; j < sqrt_procs; j++) 114 | { 115 | if (i == 0 && j == 0) 116 | { 117 | MPI_Isend(sA, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 118 | MPI_Irecv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 119 | MPI_Wait(&request, &status); 120 | MPI_Isend(sB, 1, SubMat, 0, 1, MPI_COMM_WORLD, &request); 121 | MPI_Irecv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &request); 122 | MPI_Wait(&request, &status); 123 | continue; 124 | } 125 | int offset = j * block_size + lineoff; 126 | MPI_Send(sA + offset, 1, SubMat, i * sqrt_procs + j, 0, MPI_COMM_WORLD); 127 | MPI_Send(sB + offset, 1, SubMat, i * sqrt_procs + j, 1, MPI_COMM_WORLD); 128 | } 129 | } 130 | } 131 | else 132 | { 133 | MPI_Recv(A, 1, Mat, 0, 0, MPI_COMM_WORLD, &status); 134 | MPI_Recv(B, 1, Mat, 0, 1, MPI_COMM_WORLD, &status); 135 | } 136 | 137 | MPI_Comm row_comm, col_comm; 138 | int rank_A, size_A; 139 | int color_A; 140 | int key_A; 141 | 142 | int rank_B, size_B; 143 | int color_B; 144 | int key_B; 145 | 146 | key_A = id_procs % sqrt_procs; 147 | color_A = id_procs / sqrt_procs; 148 | MPI_Comm_split(MPI_COMM_WORLD, color_A, key_A, &row_comm); 149 | MPI_Comm_rank(row_comm, &rank_A); 150 | MPI_Comm_size(row_comm, &size_A); 151 | 152 | key_B = id_procs / sqrt_procs; 153 | color_B = id_procs % sqrt_procs; 154 | MPI_Comm_split(MPI_COMM_WORLD, color_B, key_B, &col_comm); 155 | MPI_Comm_rank(col_comm, &rank_B); 156 | MPI_Comm_size(col_comm, &size_B); 157 | 158 | for (int k = 0; k < sqrt_procs; k++) 159 | { 160 | if (rank_A == (color_A + k) % size_A) 161 | { 162 | memcpy(A_in, A, block_size * block_size * sizeof(int)); 163 | } 164 | MPI_Bcast(A_in, 1, Mat, (color_A + k) % size_A, row_comm); 165 | compute(A_in, B, C, block_size); 166 | int dest = (rank_B - 1 + size_B) % size_B; 167 | MPI_Send(B, 1, Mat, dest, 0, col_comm); 168 | MPI_Recv(B_in, 1, Mat, (rank_B + 1) % size_B, 0, col_comm, &status); 169 | memcpy(B, B_in, block_size * block_size * sizeof(int)); 170 | } 171 | 172 | if (id_procs == 0) 173 | { 174 | for (int i = 0; i < sqrt_procs; i++) 175 | { 176 | for (int j = 0; j < sqrt_procs; j++) 177 | { 178 | if (i == 0 && j == 0) 179 | { 180 | MPI_Isend(sC, 1, SubMat, 0, 0, MPI_COMM_WORLD, &request); 181 | MPI_Irecv(ans, 1, Mat, 0, 0, MPI_COMM_WORLD, &request); 182 | MPI_Wait(&request, &status); 183 | continue; 184 | } 185 | int offset = j * block_size + block_size * N * i; 186 | MPI_Send(sC + offset, 1, SubMat, i * sqrt_procs + j, 100, MPI_COMM_WORLD); 187 | } 188 | } 189 | } 190 | else 191 | { 192 | MPI_Recv(ans, 1, Mat, 0, 100, MPI_COMM_WORLD, &status); 193 | } 194 | 195 | print_mat(ans, block_size, id_procs); 196 | print_mat(C, block_size, id_procs); 197 | if (check(C, ans, block_size)) 198 | { 199 | printf("Proc#%d Done.\n", id_procs); 200 | } 201 | 202 | if (id_procs == 0) 203 | { 204 | free(sA); 205 | free(sB); 206 | free(sC); 207 | } 208 | MPI_Finalize(); 209 | return 0; 210 | } 211 | --------------------------------------------------------------------------------