├── AnIntroductiontoParallelProgrammingPeterPacheco └── ipp-source-use │ ├── INDEX │ ├── INDEX~ │ ├── README │ ├── ch2 │ └── histogram.c │ ├── ch3 │ ├── bubble.c │ ├── mat_vect_mult.c │ ├── mpi_hello │ ├── mpi_hello.c │ ├── mpi_hello.c~ │ ├── mpi_hello.dSYM │ │ └── Contents │ │ │ ├── Info.plist │ │ │ └── Resources │ │ │ └── DWARF │ │ │ └── mpi_hello │ ├── mpi_many_msgs.c │ ├── mpi_mat_vect_mult.c │ ├── mpi_mat_vect_time.c │ ├── mpi_odd_even.c │ ├── mpi_output.c │ ├── mpi_trap1.c │ ├── mpi_trap2.c │ ├── mpi_trap3.c │ ├── mpi_trap4.c │ ├── mpi_vector_add.c │ ├── odd_even.c │ ├── timer.h │ ├── trap.c │ └── vector_add.c │ ├── ch4 │ ├── linked_list.c │ ├── mat_vect_mult.c │ ├── my_rand.c │ ├── my_rand.h │ ├── pth_busy_bar.c │ ├── pth_cond_bar.c │ ├── pth_do_nothing.c │ ├── pth_hello.c │ ├── pth_ll_mult_mut.c │ ├── pth_ll_one_mut.c │ ├── pth_ll_rwl.c │ ├── pth_mat_vect.c │ ├── pth_mat_vect_rand_split.c │ ├── pth_mat_vect_rand_split.c~ │ ├── pth_msg.c │ ├── pth_msg_sem.c │ ├── pth_msg_sem_mac.c │ ├── pth_pi.c │ ├── pth_pi_busy1 │ ├── pth_pi_busy1.c │ ├── pth_pi_busy1.dSYM │ │ └── Contents │ │ │ ├── Info.plist │ │ │ └── Resources │ │ │ └── DWARF │ │ │ └── pth_pi_busy1 │ ├── pth_pi_busy2.c │ ├── pth_pi_mutex │ ├── pth_pi_mutex.c │ ├── pth_pi_mutex.dSYM │ │ └── Contents │ │ │ ├── Info.plist │ │ │ └── Resources │ │ │ └── DWARF │ │ │ └── pth_pi_mutex │ ├── pth_sem_bar.c │ ├── pth_tokenize.c │ ├── pth_tokenize_r.c │ └── timer.h │ ├── ch5 │ ├── bubble.c │ ├── mat_vect_mult.c │ ├── odd_even.c │ ├── omp_fibo.c │ ├── omp_hello.c │ ├── omp_hello_err_chk.c │ ├── omp_mat_vect.c │ ├── omp_mat_vect_rand_split.c │ ├── omp_mat_vect_rand_split.c~ │ ├── omp_msg │ │ ├── omp_msglk.c │ │ ├── omp_msgps.c │ │ ├── queue.c │ │ ├── queue.h │ │ ├── queue_lk.c │ │ └── queue_lk.h │ ├── omp_odd_even1.c │ ├── omp_odd_even2.c │ ├── omp_pi.c │ ├── omp_private.c │ ├── omp_sin_sum.c │ ├── omp_tokenize.c │ ├── omp_tokenize_r.c │ ├── omp_trap1.c │ ├── omp_trap2a.c │ ├── omp_trap2b.c │ ├── omp_trap3.c │ └── trap.c │ └── ch6 │ ├── cyclic_derived.c │ ├── frac.c │ ├── frac.h │ ├── mat_17e │ ├── mat_17e-mpi-dyn-spl20-cut5.out │ ├── mat_17e-mpi-dyn-spl8-cut8.out │ ├── mat_17e-mpi-stat.out │ ├── mat_17e-pth-dyn-8-spl.out │ ├── mat_17e-pth-stat.out │ ├── mpi_nbody_basic.c │ ├── mpi_nbody_red.c │ ├── mpi_tsp_dyn │ ├── mpi_tsp_dyn.c │ ├── mpi_tsp_stat │ ├── mpi_tsp_stat.c │ ├── mpi_tsp_stat.c~ │ ├── nbody_basic │ ├── nbody_basic.c │ ├── nbody_basic.c~ │ ├── nbody_red │ ├── nbody_red.c │ ├── nbody_red.dSYM │ └── Contents │ │ ├── Info.plist │ │ └── Resources │ │ └── DWARF │ │ └── nbody_red │ ├── omp_nbody_basic.c │ ├── omp_nbody_red.c │ ├── omp_tsp_dyn.c │ ├── omp_tsp_stat.c │ ├── pth_nbody_basic.c │ ├── pth_nbody_red.c │ ├── pth_tsp_dyn │ ├── pth_tsp_dyn.c │ ├── pth_tsp_stat │ ├── pth_tsp_stat.c │ ├── timer.h │ ├── tsp_iter1.c │ ├── tsp_iter2.c │ └── tsp_rec.c ├── Homeworks ├── CUDA │ └── ThreadBlocksAndGrids │ │ ├── cudaThreadBlock1.cu │ │ └── cudaThreadGrid1.cu ├── ExampleCodes │ ├── Code1 │ │ └── PI.cpp │ ├── Code2 │ │ ├── Prime │ │ │ ├── Solution │ │ │ │ ├── performance test │ │ │ │ │ ├── primeMD │ │ │ │ │ ├── primeMD.cpp │ │ │ │ │ └── result.txt │ │ │ │ ├── primeMD.cpp │ │ │ │ └── result verify │ │ │ │ │ ├── primeMD │ │ │ │ │ ├── primeMD.cpp │ │ │ │ │ └── result.txt │ │ │ └── source code │ │ │ │ ├── prime │ │ │ │ ├── prime.cpp │ │ │ │ └── testResult.txt │ │ ├── SplitInOneDimension │ │ │ └── pthread_heat2D.cpp │ │ ├── SplitInTwoDimension │ │ │ ├── pthreadBarried │ │ │ │ ├── pthread_Barried.txt │ │ │ │ └── pthread_heat2D.c │ │ │ └── pthread_heat2D.c │ │ └── threadsCommunicationMode │ │ │ └── pthread_heat2D.c │ ├── Code3 │ │ ├── multiBody.c │ │ └── pthread_MultiBody.cpp │ ├── Code4 │ │ ├── mpi_heat2D.c │ │ └── mpi_heat2D_Origin.c │ └── Code5 │ │ ├── mpiMC │ │ ├── mpiMC.c │ │ ├── mpiMCfort │ │ └── mpiMCfort.f ├── HeatTransferProblem │ └── heat2DCodes │ │ ├── mpi_heat2D.c │ │ ├── mpi_heat2D_Origin.c │ │ └── pthread_heat2D.c ├── Homework_1 │ ├── HomeworkDebug │ │ ├── PI │ │ ├── PI.cpp │ │ ├── PIintel │ │ ├── numstep 1000.dat │ │ ├── numstep 1000000.dat │ │ └── numstep 1000000000.dat │ ├── PI.cpp │ ├── numstep 1000 │ ├── numstep 1000000 │ ├── numstep 1000000000 │ ├── 第一次作业.pdf │ └── 第一次作业题目.pdf ├── Homework_2 │ ├── Homework_2 │ ├── heat2D │ │ ├── code_debug │ │ │ ├── Performance.m │ │ │ ├── SpeedUp.jpg │ │ │ ├── Time Cost.jpg │ │ │ ├── heat2DRun.txt │ │ │ └── pthread_heat2DRun.txt │ │ ├── original_code │ │ │ └── ser_heat2D.c │ │ └── solution │ │ │ ├── SplitInOneDimension │ │ │ └── pthread_heat2D.cpp │ │ │ ├── SplitInTwoDimension │ │ │ ├── pthreadBarried │ │ │ │ ├── pthread_Barried.txt │ │ │ │ └── pthread_heat2D.c │ │ │ └── pthread_heat2D.c │ │ │ └── threadsCommunicationMode │ │ │ └── pthread_heat2D.c │ ├── prime_number │ │ ├── code_debug │ │ │ ├── bucketSort.cpp │ │ │ ├── primeModified │ │ │ ├── primeModified.cpp │ │ │ ├── primeModified2.cpp │ │ │ ├── primeTemp │ │ │ ├── primeTemp.cpp │ │ │ ├── primeTest │ │ │ ├── primeTest.cpp │ │ │ ├── primeTest2 │ │ │ ├── primeTest2.cpp │ │ │ ├── primeTest2Result.txt │ │ │ └── sortBucket │ │ ├── original_codes │ │ │ ├── prime │ │ │ ├── prime.cpp │ │ │ └── testResult.txt │ │ └── solutions │ │ │ ├── performance_test │ │ │ ├── primeMD │ │ │ ├── primeMD.cpp │ │ │ └── result.txt │ │ │ ├── primeMD.cpp │ │ │ └── result_verify │ │ │ ├── primeMD │ │ │ ├── primeMD.cpp │ │ │ └── result.txt │ └── 第二次作业.pdf ├── Homework_3 │ ├── Homework 3 │ └── 第三次作业.pdf ├── Homework_4 │ ├── CodeTest │ │ └── TestResult.txt │ ├── Homework_4.txt │ ├── SourceCodes │ │ ├── multiBody.c │ │ └── pthread_MultiBody.cpp │ └── 第四次作业.pdf ├── Homework_5 │ ├── CodeTest │ │ ├── Performace.m │ │ ├── SpeedUp.jpg │ │ ├── Test1 │ │ │ ├── draw_heatf.c │ │ │ ├── heat2D.cpp │ │ │ ├── mpi_heat2D.c │ │ │ └── mpi_heat2D.f │ │ ├── TimeCost.jpg │ │ ├── mpi_heat2D.txt │ │ └── pthread_heat2D.txt │ ├── MyCodes │ │ ├── mpi_heat2D.c │ │ └── mpi_heat2D_Origin.c │ ├── RefrenceCode │ │ ├── mpi_heat2D.c │ │ └── mpi_heat2D.f │ ├── pthread_heat2D.cpp │ ├── 第五次作业.pdf │ └── 第五次作业.txt ├── Homework_6 │ ├── Code │ │ └── mpi_process.cpp │ └── homework 6.txt ├── Homework_7 │ ├── OriginalCodes │ │ ├── NBody(3).cpp │ │ └── mpi_MultiBody.cpp │ ├── 第七次作业.pdf │ └── 第七次作业.txt ├── Homework_8 │ ├── Code │ │ └── 2d5p-win.cpp │ ├── 第八次作业.pdf │ └── 第八次作业.txt ├── Homework_9 │ ├── Documents │ │ ├── 01531136.pdf │ │ ├── 24.pdf │ │ ├── Fox_example.pdf │ │ ├── Li-Fall-2012-CSE633.pdf │ │ ├── lawn129.pdf │ │ ├── matrixmult.pdf │ │ └── ppagerank_report.pdf │ └── 第九次作业.pdf ├── Shiyao_Li_s_MPI_homework │ └── 李师尧MPI作业参考 │ │ ├── 150*******李师尧-作业七.pdf │ │ ├── 2d5p-win.cpp │ │ ├── NBody-mpi.cpp │ │ ├── mpi_process.cpp │ │ ├── 作业九 FOX并行算法和PageRank算法.pdf │ │ ├── 作业八150*******李师尧.pdf │ │ └── 作业六150*******李师尧.pdf └── Yao_Hong_s_Homework │ ├── Homework_1 │ ├── README.md │ └── 并行程序设计作业-1.pdf │ ├── Homework_2 │ ├── README.md │ ├── saxpy.c │ └── 稠密向量的SAXPY_测评报告.pdf │ ├── Homework_3 │ ├── README.md │ ├── shuzu.cpp │ └── 数组拼接_测评报告.pdf │ ├── Homework_4 │ ├── 2D5P.cpp │ ├── 2D5P_测评报告.pdf │ └── README.md │ ├── Homework_5 │ ├── HEAT-TRANSFER_测评报告.pdf │ ├── README.md │ └── rechuandao.cpp │ ├── Homework_6 │ ├── N-body_测评报告.pdf │ ├── N_body.cpp │ └── README.md │ ├── Homework_7 │ └── README.md │ ├── Homework_8 │ └── README.md │ ├── Homework_9 │ └── README.md │ └── README.md ├── LICENSE ├── README.md └── ReferencePapers └── Superlinear Speedup in HPC Systems why and when.pdf /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/README: -------------------------------------------------------------------------------- 1 | The files in the various subdirectories are source code from the 2 | book "An Introduction to Parallel Programming" by Peter Pacheco, 3 | Morgan Kaufmann Publishers. 4 | 5 | COMPILING 6 | --------- 7 | Most of the programs are self-contained and should compile by 8 | themselves provided you make sure that the compiler can find the 9 | appropriate header files (e.g., mpi.h, pthread.h, semaphore.h, 10 | timer.h) and libraries. Documentation at the beginning of each 11 | program provides a sample command line for compilation on a typical 12 | Linux or MacOS X system. 13 | 14 | RUNNING 15 | ------- 16 | The exact syntax for starting the programs may be system dependent. 17 | See your local expert for details. Any input needed by the program 18 | can be determined from comments at the beginning of the source 19 | file. 20 | 21 | I/O 22 | --- 23 | All of the longer applications only use process/thread 0 for I/O. 24 | However, some short test programs (e.g., hello, world) print data 25 | from processes or threads other than 0. Also in many of the programs, 26 | if a DEBUG macro is defined, the program may print data from more 27 | than one process/thread. 28 | 29 | DISCLAIMER 30 | ---------- 31 | These programs are not production quality. Their main purpose is 32 | to help the reader explore parallel programming. They should be 33 | easy to modify for a specific application and you are welcome to 34 | do so. If you have suggestions or discover any bugs it would be 35 | greatly appreciated if you could let us know by sending email to 36 | peter at usfca dot edu 37 | 38 | COPYING 39 | ------- 40 | All of the programs in these directories are copyright Morgan 41 | Kaufmann Publishers and Peter Pacheco. However, they may be freely 42 | copied and used in non-commercial software, provided credit is given 43 | to both Morgan Kaufmann Publishers and Peter Pacheco. 44 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/bubble.c: -------------------------------------------------------------------------------- 1 | /* File: bubble.c 2 | * 3 | * Purpose: Use bubble sort to sort a list of ints. 4 | * 5 | * Compile: gcc -g -Wall -o bubble bubble.c 6 | * Usage: bubble 7 | * n: number of elements in list 8 | * 'g': generate list using a random number generator 9 | * 'i': user input list 10 | * 11 | * Input: list (optional) 12 | * Output: sorted list 13 | * 14 | * IPP: Section 3.7.1 (pp. 127 and ff.) and Section 5.6.1 15 | * (pp. 232 and ff.) 16 | */ 17 | #include 18 | #include 19 | 20 | /* For random list, 0 <= keys < RMAX */ 21 | const int RMAX = 100; 22 | 23 | void Usage(char* prog_name); 24 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p); 25 | void Generate_list(int a[], int n); 26 | void Print_list(int a[], int n, char* title); 27 | void Read_list(int a[], int n); 28 | void Bubble_sort(int a[], int n); 29 | 30 | /*-----------------------------------------------------------------*/ 31 | int main(int argc, char* argv[]) { 32 | int n; 33 | char g_i; 34 | int* a; 35 | 36 | Get_args(argc, argv, &n, &g_i); 37 | a = (int*) malloc(n*sizeof(int)); 38 | if (g_i == 'g') { 39 | Generate_list(a, n); 40 | Print_list(a, n, "Before sort"); 41 | } else { 42 | Read_list(a, n); 43 | } 44 | 45 | Bubble_sort(a, n); 46 | 47 | Print_list(a, n, "After sort"); 48 | 49 | free(a); 50 | return 0; 51 | } /* main */ 52 | 53 | 54 | /*----------------------------------------------------------------- 55 | * Function: Usage 56 | * Purpose: Summary of how to run program 57 | */ 58 | void Usage(char* prog_name) { 59 | fprintf(stderr, "usage: %s \n", prog_name); 60 | fprintf(stderr, " n: number of elements in list\n"); 61 | fprintf(stderr, " 'g': generate list using a random number generator\n"); 62 | fprintf(stderr, " 'i': user input list\n"); 63 | } /* Usage */ 64 | 65 | 66 | /*----------------------------------------------------------------- 67 | * Function: Get_args 68 | * Purpose: Get and check command line arguments 69 | * In args: argc, argv 70 | * Out args: n_p, g_i_p 71 | */ 72 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p) { 73 | if (argc != 3 ) { 74 | Usage(argv[0]); 75 | exit(0); 76 | } 77 | *n_p = atoi(argv[1]); 78 | *g_i_p = argv[2][0]; 79 | 80 | if (*n_p <= 0 || (*g_i_p != 'g' && *g_i_p != 'i') ) { 81 | Usage(argv[0]); 82 | exit(0); 83 | } 84 | } /* Get_args */ 85 | 86 | 87 | /*----------------------------------------------------------------- 88 | * Function: Generate_list 89 | * Purpose: Use random number generator to generate list elements 90 | * In args: n 91 | * Out args: a 92 | */ 93 | void Generate_list(int a[], int n) { 94 | int i; 95 | 96 | srandom(0); 97 | for (i = 0; i < n; i++) 98 | a[i] = random() % RMAX; 99 | } /* Generate_list */ 100 | 101 | 102 | /*----------------------------------------------------------------- 103 | * Function: Print_list 104 | * Purpose: Print the elements in the list 105 | * In args: a, n 106 | */ 107 | void Print_list(int a[], int n, char* title) { 108 | int i; 109 | 110 | printf("%s:\n", title); 111 | for (i = 0; i < n; i++) 112 | printf("%d ", a[i]); 113 | printf("\n\n"); 114 | } /* Print_list */ 115 | 116 | 117 | /*----------------------------------------------------------------- 118 | * Function: Read_list 119 | * Purpose: Read elements of list from stdin 120 | * In args: n 121 | * Out args: a 122 | */ 123 | void Read_list(int a[], int n) { 124 | int i; 125 | 126 | printf("Please enter the elements of the list\n"); 127 | for (i = 0; i < n; i++) 128 | scanf("%d", &a[i]); 129 | } /* Read_list */ 130 | 131 | 132 | /*----------------------------------------------------------------- 133 | * Function: Bubble_sort 134 | * Purpose: Sort list using bubble sort 135 | * In args: n 136 | * In/out args: a 137 | */ 138 | void Bubble_sort( 139 | int a[] /* in/out */, 140 | int n /* in */) { 141 | int list_length, i, temp; 142 | 143 | for (list_length = n; list_length >= 2; list_length--) 144 | for (i = 0; i < list_length-1; i++) 145 | if (a[i] > a[i+1]) { 146 | temp = a[i]; 147 | a[i] = a[i+1]; 148 | a[i+1] = temp; 149 | } 150 | 151 | } /* Bubble_sort */ 152 | 153 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * mpi_hello.c 3 | * 4 | * Purpose: 5 | * A "hello,world" program that uses MPI 6 | * 7 | * Compile: 8 | * mpicc -g -Wall -std=C99 -o mpi_hello mpi_hello.c 9 | * Usage: 10 | * mpiexec -n ./mpi_hello 11 | * 12 | * Input: 13 | * None 14 | * Output: 15 | * A greeting from each process 16 | * 17 | * Algorithm: 18 | * Each process sends a message to process 0, which prints 19 | * the messages it has received, as well as its own message. 20 | * 21 | * IPP: Section 3.1 (pp. 84 and ff.) 22 | */ 23 | #include 24 | #include /* For strlen */ 25 | #include /* For MPI functions, etc */ 26 | 27 | const int MAX_STRING = 100; 28 | 29 | int main(void) { 30 | char greeting[MAX_STRING]; /* String storing message */ 31 | int comm_sz; /* Number of processes */ 32 | int my_rank; /* My process rank */ 33 | 34 | /* Start up MPI */ 35 | MPI_Init(NULL, NULL); 36 | 37 | /* Get the number of processes */ 38 | MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 39 | 40 | /* Get my rank among all the processes */ 41 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 42 | 43 | if (my_rank != 0) { 44 | /* Create message */ 45 | sprintf(greeting, "Greetings from process %d of %d!", 46 | my_rank, comm_sz); 47 | /* Send message to process 0 */ 48 | MPI_Send(greeting, strlen(greeting)+1, MPI_CHAR, 0, 0, 49 | MPI_COMM_WORLD); 50 | } else { 51 | /* Print my message */ 52 | printf("Greetings from process %d of %d!\n", my_rank, comm_sz); 53 | for (int q = 1; q < comm_sz; q++) { 54 | /* Receive message from process q */ 55 | MPI_Recv(greeting, MAX_STRING, MPI_CHAR, q, 56 | 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 57 | /* Print message from process q */ 58 | printf("%s\n", greeting); 59 | } 60 | } 61 | 62 | /* Shut down MPI */ 63 | MPI_Finalize(); 64 | 65 | return 0; 66 | } /* main */ 67 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.c~: -------------------------------------------------------------------------------- 1 | /* File: 2 | * mpi_hello.c 3 | * 4 | * Purpose: 5 | * A "hello,world" program that uses MPI 6 | * 7 | * Compile: 8 | * mpicc -g -Wall -std=C99 -o mpi_hello mpi_hello.c 9 | * Usage: 10 | * mpiexec -n ./mpi_hello 11 | * 12 | * Input: 13 | * None 14 | * Output: 15 | * A greeting from each process 16 | * 17 | * Algorithm: 18 | * Each process sends a message to process 0, which prints 19 | * the messages it has received, as well as its own message. 20 | * 21 | * IPP: Section 3.1 (pp. 84 and ff.) 22 | */ 23 | #include 24 | #include /* For strlen */ 25 | #include /* For MPI functions, etc */ 26 | 27 | const int MAX_STRING = 100; 28 | 29 | int main(void) { 30 | char greeting[MAX_STRING]; /* String storing message */ 31 | int comm_sz; /* Number of processes */ 32 | int my_rank; /* My process rank */ 33 | 34 | /* Start up MPI */ 35 | MPI_Init(NULL, NULL); 36 | 37 | /* Get the number of processes */ 38 | MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 39 | 40 | /* Get my rank among all the processes */ 41 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 42 | 43 | if (my_rank != 0) { 44 | /* Create message */ 45 | sprintf(greeting, "Greetings from process %d of %d!", 46 | my_rank, comm_sz); 47 | /* Send message to process 0 */ 48 | MPI_Send(greeting, strlen(greeting), MPI_CHAR, 0, 0, 49 | MPI_COMM_WORLD); 50 | } else { 51 | /* Print my message */ 52 | printf("Greetings from process %d of %d!\n", my_rank, comm_sz); 53 | for (int q = 1; q < comm_sz; q++) { 54 | /* Receive message from process q */ 55 | MPI_Recv(greeting, MAX_STRING, MPI_CHAR, q, 56 | 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 57 | /* Print message from process q */ 58 | printf("%s\n", greeting); 59 | } 60 | } 61 | 62 | /* Shut down MPI */ 63 | MPI_Finalize(); 64 | 65 | return 0; 66 | } /* main */ 67 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.dSYM/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleIdentifier 8 | com.apple.xcode.dsym.mpi_hello 9 | CFBundleInfoDictionaryVersion 10 | 6.0 11 | CFBundlePackageType 12 | dSYM 13 | CFBundleSignature 14 | ???? 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleVersion 18 | 1 19 | 20 | 21 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.dSYM/Contents/Resources/DWARF/mpi_hello: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.dSYM/Contents/Resources/DWARF/mpi_hello -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_many_msgs.c: -------------------------------------------------------------------------------- 1 | /* File: mpi_many_msgs.c 2 | * Purpose: Compare the time needed to send n messages consisting of 3 | * a single double each with the time needed to send one message 4 | * consisting of n doubles. 5 | * 6 | * Compile: mpicc -g -Wall -O2 -o mpi_many_msgs mpi_many_msgs.c 7 | * Run: mpiexec -n 2 mpi_many_msgs 8 | * 9 | * Input: none 10 | * Output: Elapsed time for n messages of size 1 double and elapsed 11 | * time for 1 message of n doubles 12 | * 13 | * IPP: Section 3.5 (pp. 116 and ff.) 14 | */ 15 | #include 16 | #include 17 | #include 18 | 19 | int my_rank; 20 | int comm_sz; 21 | MPI_Comm comm; 22 | 23 | void Get_arg(int argc, char* argv[], int* n_p); 24 | 25 | int main(int argc, char* argv[]) { 26 | double* x; 27 | int i, n; 28 | MPI_Status status; 29 | double start, finish; 30 | 31 | MPI_Init(&argc, &argv); 32 | comm = MPI_COMM_WORLD; 33 | MPI_Comm_size(comm, &comm_sz); 34 | MPI_Comm_rank(comm, &my_rank); 35 | 36 | Get_arg(argc, argv, &n); 37 | 38 | x = malloc(n*sizeof(double)); 39 | 40 | if (my_rank == 0) 41 | for (i = 0; i < n; i++) x[i] = i; 42 | else /* my_rank == 1 */ 43 | for (i = 0; i < n; i++) x[i] = -1; 44 | 45 | MPI_Barrier(comm); 46 | start = MPI_Wtime(); 47 | if (my_rank == 0) 48 | for (i = 0; i < n; i++) 49 | MPI_Send(&x[i], 1, MPI_DOUBLE, 1, 0, comm); 50 | else /* my_rank == 1 */ 51 | for (i = 0; i < n; i++) 52 | MPI_Recv(&x[i], 1, MPI_DOUBLE, 0, 0, comm, &status); 53 | finish = MPI_Wtime(); 54 | printf("Proc %d > First comm took %e seconds\n", my_rank, 55 | finish-start); 56 | fflush(stdout); 57 | 58 | MPI_Barrier(comm); 59 | start = MPI_Wtime(); 60 | if (my_rank == 0) 61 | MPI_Send(x, n, MPI_DOUBLE, 1, 0, comm); 62 | else /* my_rank == 1 */ 63 | MPI_Recv(x, n, MPI_DOUBLE, 0, 0, comm, &status); 64 | finish = MPI_Wtime(); 65 | printf("Proc %d > Second comm took %e seconds\n", my_rank, 66 | finish-start); 67 | fflush(stdout); 68 | 69 | free(x); 70 | MPI_Finalize(); 71 | return 0; 72 | } /* main */ 73 | 74 | /*-------------------------------------------------------------------*/ 75 | void Get_arg( 76 | int argc /* in */, 77 | char* argv[] /* out */, 78 | int* n_p /* out */) { 79 | 80 | if (my_rank == 0) { 81 | if (argc != 2 || comm_sz != 2) { 82 | fprintf(stderr, "usage: mpiexec -n 2 %s \n", 83 | argv[0]); 84 | *n_p = 0; 85 | } else { 86 | *n_p = strtol(argv[1], NULL, 10); 87 | } 88 | } 89 | MPI_Bcast(n_p, 1, MPI_INT, 0, comm); 90 | if (*n_p <= 0 || comm_sz != 2) { 91 | MPI_Finalize(); 92 | exit(0); 93 | } 94 | } /* Get_arg */ 95 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_output.c: -------------------------------------------------------------------------------- 1 | /* File: mpi_output.c 2 | * 3 | * Purpose: A program in which multiple MPI processes try to print 4 | * a message. 5 | * 6 | * Compile: mpicc -g -Wall -o mpi_output mpi_output.c 7 | * Usage: mpiexec -n ./mpi_output 8 | * 9 | * Input: None 10 | * Output: A message from each process 11 | * 12 | * IPP: Section 3.3.1 (pp. 97 and ff.) 13 | */ 14 | #include 15 | #include 16 | 17 | int main(void) { 18 | int my_rank, comm_sz; 19 | 20 | MPI_Init(NULL, NULL); 21 | MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 22 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 23 | 24 | printf("Proc %d of %d > Does anyone have a toothpick?\n", 25 | my_rank, comm_sz); 26 | 27 | MPI_Finalize(); 28 | return 0; 29 | } /* main */ 30 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_trap1.c: -------------------------------------------------------------------------------- 1 | /* File: mpi_trap1.c 2 | * Purpose: Use MPI to implement a parallel version of the trapezoidal 3 | * rule. In this version the endpoints of the interval and 4 | * the number of trapezoids are hardwired. 5 | * 6 | * Input: None. 7 | * Output: Estimate of the integral from a to b of f(x) 8 | * using the trapezoidal rule and n trapezoids. 9 | * 10 | * Compile: mpicc -g -Wall -o mpi_trap1 mpi_trap1.c 11 | * Run: mpiexec -n ./mpi_trap1 12 | * 13 | * Algorithm: 14 | * 1. Each process calculates "its" interval of 15 | * integration. 16 | * 2. Each process estimates the integral of f(x) 17 | * over its interval using the trapezoidal rule. 18 | * 3a. Each process != 0 sends its integral to 0. 19 | * 3b. Process 0 sums the calculations received from 20 | * the individual processes and prints the result. 21 | * 22 | * Note: f(x), a, b, and n are all hardwired. 23 | * 24 | * IPP: Section 3.2.2 (pp. 96 and ff.) 25 | */ 26 | #include 27 | 28 | /* We'll be using MPI routines, definitions, etc. */ 29 | #include 30 | 31 | /* Calculate local integral */ 32 | double Trap(double left_endpt, double right_endpt, int trap_count, 33 | double base_len); 34 | 35 | /* Function we're integrating */ 36 | double f(double x); 37 | 38 | int main(void) { 39 | int my_rank, comm_sz, n = 1024, local_n; 40 | double a = 0.0, b = 3.0, h, local_a, local_b; 41 | double local_int, total_int; 42 | int source; 43 | 44 | /* Let the system do what it needs to start up MPI */ 45 | MPI_Init(NULL, NULL); 46 | 47 | /* Get my process rank */ 48 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 49 | 50 | /* Find out how many processes are being used */ 51 | MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 52 | 53 | h = (b-a)/n; /* h is the same for all processes */ 54 | local_n = n/comm_sz; /* So is the number of trapezoids */ 55 | 56 | /* Length of each process' interval of 57 | * integration = local_n*h. So my interval 58 | * starts at: */ 59 | local_a = a + my_rank*local_n*h; 60 | local_b = local_a + local_n*h; 61 | local_int = Trap(local_a, local_b, local_n, h); 62 | 63 | /* Add up the integrals calculated by each process */ 64 | if (my_rank != 0) { 65 | MPI_Send(&local_int, 1, MPI_DOUBLE, 0, 0, 66 | MPI_COMM_WORLD); 67 | } else { 68 | total_int = local_int; 69 | for (source = 1; source < comm_sz; source++) { 70 | MPI_Recv(&local_int, 1, MPI_DOUBLE, source, 0, 71 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); 72 | total_int += local_int; 73 | } 74 | } 75 | 76 | /* Print the result */ 77 | if (my_rank == 0) { 78 | printf("With n = %d trapezoids, our estimate\n", n); 79 | printf("of the integral from %f to %f = %.15e\n", 80 | a, b, total_int); 81 | } 82 | 83 | /* Shut down MPI */ 84 | MPI_Finalize(); 85 | 86 | return 0; 87 | } /* main */ 88 | 89 | 90 | /*------------------------------------------------------------------ 91 | * Function: Trap 92 | * Purpose: Serial function for estimating a definite integral 93 | * using the trapezoidal rule 94 | * Input args: left_endpt 95 | * right_endpt 96 | * trap_count 97 | * base_len 98 | * Return val: Trapezoidal rule estimate of integral from 99 | * left_endpt to right_endpt using trap_count 100 | * trapezoids 101 | */ 102 | double Trap( 103 | double left_endpt /* in */, 104 | double right_endpt /* in */, 105 | int trap_count /* in */, 106 | double base_len /* in */) { 107 | double estimate, x; 108 | int i; 109 | 110 | estimate = (f(left_endpt) + f(right_endpt))/2.0; 111 | for (i = 1; i <= trap_count-1; i++) { 112 | x = left_endpt + i*base_len; 113 | estimate += f(x); 114 | } 115 | estimate = estimate*base_len; 116 | 117 | return estimate; 118 | } /* Trap */ 119 | 120 | 121 | /*------------------------------------------------------------------ 122 | * Function: f 123 | * Purpose: Compute value of function to be integrated 124 | * Input args: x 125 | */ 126 | double f(double x) { 127 | return x*x; 128 | } /* f */ 129 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/timer.h: -------------------------------------------------------------------------------- 1 | /* File: timer.h 2 | * 3 | * Purpose: Define a macro that returns the number of seconds that 4 | * have elapsed since some point in the past. The timer 5 | * should return times with microsecond accuracy. 6 | * 7 | * Note: The argument passed to the GET_TIME macro should be 8 | * a double, *not* a pointer to a double. 9 | * 10 | * Example: 11 | * #include "timer.h" 12 | * . . . 13 | * double start, finish, elapsed; 14 | * . . . 15 | * GET_TIME(start); 16 | * . . . 17 | * Code to be timed 18 | * . . . 19 | * GET_TIME(finish); 20 | * elapsed = finish - start; 21 | * printf("The code to be timed took %e seconds\n", elapsed); 22 | * 23 | * IPP: Section 3.6.1 (p. 121) and Section 6.1.2 (pp. 273 and ff.) 24 | */ 25 | #ifndef _TIMER_H_ 26 | #define _TIMER_H_ 27 | 28 | #include 29 | 30 | /* The argument now should be a double (not a pointer to a double) */ 31 | #define GET_TIME(now) { \ 32 | struct timeval t; \ 33 | gettimeofday(&t, NULL); \ 34 | now = t.tv_sec + t.tv_usec/1000000.0; \ 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/trap.c: -------------------------------------------------------------------------------- 1 | /* File: trap.c 2 | * Purpose: Calculate definite integral using trapezoidal 3 | * rule. 4 | * 5 | * Input: a, b, n 6 | * Output: Estimate of integral from a to b of f(x) 7 | * using n trapezoids. 8 | * 9 | * Compile: gcc -g -Wall -o trap trap.c 10 | * Usage: ./trap 11 | * 12 | * Note: The function f(x) is hardwired. 13 | * 14 | * IPP: Section 3.2.1 (pp. 94 and ff.) and 5.2 (p. 216) 15 | */ 16 | 17 | #include 18 | 19 | double f(double x); /* Function we're integrating */ 20 | double Trap(double a, double b, int n, double h); 21 | 22 | int main(void) { 23 | double integral; /* Store result in integral */ 24 | double a, b; /* Left and right endpoints */ 25 | int n; /* Number of trapezoids */ 26 | double h; /* Height of trapezoids */ 27 | 28 | printf("Enter a, b, and n\n"); 29 | scanf("%lf", &a); 30 | scanf("%lf", &b); 31 | scanf("%d", &n); 32 | 33 | h = (b-a)/n; 34 | integral = Trap(a, b, n, h); 35 | 36 | printf("With n = %d trapezoids, our estimate\n", n); 37 | printf("of the integral from %f to %f = %.15f\n", 38 | a, b, integral); 39 | 40 | return 0; 41 | } /* main */ 42 | 43 | /*------------------------------------------------------------------ 44 | * Function: Trap 45 | * Purpose: Estimate integral from a to b of f using trap rule and 46 | * n trapezoids 47 | * Input args: a, b, n, h 48 | * Return val: Estimate of the integral 49 | */ 50 | double Trap(double a, double b, int n, double h) { 51 | double integral; 52 | int k; 53 | 54 | integral = (f(a) + f(b))/2.0; 55 | for (k = 1; k <= n-1; k++) { 56 | integral += f(a+k*h); 57 | } 58 | integral = integral*h; 59 | 60 | return integral; 61 | } /* Trap */ 62 | 63 | /*------------------------------------------------------------------ 64 | * Function: f 65 | * Purpose: Compute value of function to be integrated 66 | * Input args: x 67 | */ 68 | double f(double x) { 69 | double return_val; 70 | 71 | return_val = x*x; 72 | return return_val; 73 | } /* f */ 74 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/vector_add.c: -------------------------------------------------------------------------------- 1 | /* File: vector_add.c 2 | * 3 | * Purpose: Implement vector addition 4 | * 5 | * Compile: gcc -g -Wall -o vector_add vector_add.c 6 | * Run: ./vector_add 7 | * 8 | * Input: The order of the vectors, n, and the vectors x and y 9 | * Output: The sum vector z = x+y 10 | * 11 | * Note: 12 | * If the program detects an error (order of vector <= 0 or malloc 13 | * failure), it prints a message and terminates 14 | * 15 | * IPP: Section 3.4.6 (p. 109) 16 | */ 17 | #include 18 | #include 19 | 20 | void Read_n(int* n_p); 21 | void Allocate_vectors(double** x_pp, double** y_pp, double** z_pp, int n); 22 | void Read_vector(double a[], int n, char vec_name[]); 23 | void Print_vector(double b[], int n, char title[]); 24 | void Vector_sum(double x[], double y[], double z[], int n); 25 | 26 | /*---------------------------------------------------------------------*/ 27 | int main(void) { 28 | int n; 29 | double *x, *y, *z; 30 | 31 | Read_n(&n); 32 | Allocate_vectors(&x, &y, &z, n); 33 | 34 | Read_vector(x, n, "x"); 35 | Read_vector(y, n, "y"); 36 | 37 | Vector_sum(x, y, z, n); 38 | 39 | Print_vector(z, n, "The sum is"); 40 | 41 | free(x); 42 | free(y); 43 | free(z); 44 | 45 | return 0; 46 | } /* main */ 47 | 48 | /*--------------------------------------------------------------------- 49 | * Function: Read_n 50 | * Purpose: Get the order of the vectors from stdin 51 | * Out arg: n_p: the order of the vectors 52 | * 53 | * Errors: If n <= 0, the program terminates 54 | */ 55 | void Read_n(int* n_p /* out */) { 56 | printf("What's the order of the vectors?\n"); 57 | scanf("%d", n_p); 58 | if (*n_p <= 0) { 59 | fprintf(stderr, "Order should be positive\n"); 60 | exit(-1); 61 | } 62 | } /* Read_n */ 63 | 64 | /*--------------------------------------------------------------------- 65 | * Function: Allocate_vectors 66 | * Purpose: Allocate storage for the vectors 67 | * In arg: n: the order of the vectors 68 | * Out args: x_pp, y_pp, z_pp: pointers to storage for the vectors 69 | * 70 | * Errors: If one of the mallocs fails, the program terminates 71 | */ 72 | void Allocate_vectors( 73 | double** x_pp /* out */, 74 | double** y_pp /* out */, 75 | double** z_pp /* out */, 76 | int n /* in */) { 77 | *x_pp = malloc(n*sizeof(double)); 78 | *y_pp = malloc(n*sizeof(double)); 79 | *z_pp = malloc(n*sizeof(double)); 80 | if (*x_pp == NULL || *y_pp == NULL || *z_pp == NULL) { 81 | fprintf(stderr, "Can't allocate vectors\n"); 82 | exit(-1); 83 | } 84 | } /* Allocate_vectors */ 85 | 86 | /*--------------------------------------------------------------------- 87 | * Function: Read_vector 88 | * Purpose: Read a vector from stdin 89 | * In args: n: order of the vector 90 | * vec_name: name of vector (e.g., x) 91 | * Out arg: a: the vector to be read in 92 | */ 93 | void Read_vector( 94 | double a[] /* out */, 95 | int n /* in */, 96 | char vec_name[] /* in */) { 97 | int i; 98 | printf("Enter the vector %s\n", vec_name); 99 | for (i = 0; i < n; i++) 100 | scanf("%lf", &a[i]); 101 | } /* Read_vector */ 102 | 103 | /*--------------------------------------------------------------------- 104 | * Function: Print_vector 105 | * Purpose: Print the contents of a vector 106 | * In args: b: the vector to be printed 107 | * n: the order of the vector 108 | * title: title for print out 109 | */ 110 | void Print_vector( 111 | double b[] /* in */, 112 | int n /* in */, 113 | char title[] /* in */) { 114 | int i; 115 | printf("%s\n", title); 116 | for (i = 0; i < n; i++) 117 | printf("%f ", b[i]); 118 | printf("\n"); 119 | } /* Print_vector */ 120 | 121 | /*--------------------------------------------------------------------- 122 | * Function: Vector_sum 123 | * Purpose: Add two vectors 124 | * In args: x: the first vector to be added 125 | * y: the second vector to be added 126 | * n: the order of the vectors 127 | * Out arg: z: the sum vector 128 | */ 129 | void Vector_sum( 130 | double x[] /* in */, 131 | double y[] /* in */, 132 | double z[] /* out */, 133 | int n /* in */) { 134 | int i; 135 | 136 | for (i = 0; i < n; i++) 137 | z[i] = x[i] + y[i]; 138 | } /* Vector_sum */ 139 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/my_rand.c: -------------------------------------------------------------------------------- 1 | /* File: my_rand.c 2 | * 3 | * Purpose: implement a linear congruential random number generator 4 | * 5 | * my_rand: generates a random unsigned int in the range 0 - MR_MODULUS 6 | * my_drand: generates a random double in the range 0 - 1 7 | * 8 | * Notes: 9 | * 1. The generator is taken from the Wikipedia article "Linear congruential 10 | * generator" 11 | * 2. This is *not* a very good random number generator. However, unlike 12 | * the C library function random(), it *is* threadsafe: the "state" of 13 | * the generator is returned in the seed_p argument to each function. 14 | * 3. The main function is just a simple driver. 15 | * 16 | * IPP: Not discussed, but needed by the multithreaded linked list programs 17 | * discussed in Section 4.9.2-4.9.4 (pp. 183-190). 18 | */ 19 | #include 20 | #include 21 | #include "my_rand.h" 22 | 23 | #define MR_MULTIPLIER 279470273 24 | #define MR_INCREMENT 0 25 | #define MR_MODULUS 4294967291U 26 | #define MR_DIVISOR ((double) 4294967291U) 27 | 28 | 29 | #ifdef _MAIN_ 30 | int main(void) { 31 | int n, i; 32 | unsigned seed = 1, x; 33 | double y; 34 | 35 | printf("How many random numbers?\n"); 36 | scanf("%d", &n); 37 | 38 | x = my_rand(&seed); 39 | for (i = 0; i < n; i++) { 40 | x = my_rand(&x); 41 | printf("%u\n", x); 42 | } 43 | for (i = 0; i < n; i++) { 44 | y = my_drand(&x); 45 | printf("%e\n", y); 46 | } 47 | return 0; 48 | } 49 | #endif 50 | 51 | /* Function: my_rand 52 | * In/out arg: seed_p 53 | * Return value: A new pseudo-random unsigned int in the range 54 | * 0 - MR_MODULUS 55 | * 56 | * Notes: 57 | * 1. This is a slightly modified version of the generator in the 58 | * Wikipedia article "Linear congruential generator" 59 | * 2. The seed_p argument stores the "state" for the next call to 60 | * the function. 61 | */ 62 | unsigned my_rand(unsigned* seed_p) { 63 | long long z = *seed_p; 64 | z *= MR_MULTIPLIER; 65 | // z += MR_INCREMENT; 66 | z %= MR_MODULUS; 67 | *seed_p = z; 68 | return *seed_p; 69 | } 70 | 71 | /* Function: my_drand 72 | * In/out arg: seed_p 73 | * Return value: A new pseudo-random double in the range 0 - 1 74 | */ 75 | double my_drand(unsigned* seed_p) { 76 | unsigned x = my_rand(seed_p); 77 | double y = x/MR_DIVISOR; 78 | return y; 79 | } 80 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/my_rand.h: -------------------------------------------------------------------------------- 1 | /* File: my_rand.h 2 | * Purpose: Header file for my_rand.c, which implements a simple 3 | * pseudo-random number generator. 4 | * 5 | * IPP: Not discussed, but needed by the multithreaded linked list programs 6 | * discussed in Section 4.9.2-4.9.4 (pp. 183-190). 7 | */ 8 | #ifndef _MY_RAND_H_ 9 | #define _MY_RAND_H_ 10 | 11 | unsigned my_rand(unsigned* a_p); 12 | double my_drand(unsigned* a_p); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_busy_bar.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * pth_busy_bar.c 3 | * 4 | * Purpose: 5 | * Use busy wait barriers to synchronize threads. 6 | * 7 | * Input: 8 | * none 9 | * Output: 10 | * Time for BARRIER_COUNT barriers 11 | * 12 | * Compile: 13 | * gcc -g -Wall -o pth_busy_bar pth_busy_bar.c -lpthread 14 | * Usage: 15 | * ./pth_busy_bar 16 | * 17 | * Note: 18 | * Compile flag DEBUG will print a message after each barrier 19 | * 20 | * IPP: Section 4.8.1 (p. 177) 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "timer.h" 27 | 28 | #define BARRIER_COUNT 100 29 | 30 | int thread_count; 31 | int barrier_thread_counts[BARRIER_COUNT]; 32 | pthread_mutex_t barrier_mutex; 33 | 34 | void Usage(char* prog_name); 35 | void *Thread_work(void* rank); 36 | 37 | /*--------------------------------------------------------------------*/ 38 | int main(int argc, char* argv[]) { 39 | long thread, i; 40 | pthread_t* thread_handles; 41 | double start, finish; 42 | 43 | if (argc != 2) 44 | Usage(argv[0]); 45 | thread_count = strtol(argv[1], NULL, 10); 46 | 47 | thread_handles = malloc (thread_count*sizeof(pthread_t)); 48 | for (i = 0; i < BARRIER_COUNT; i++) 49 | barrier_thread_counts[i] = 0; 50 | pthread_mutex_init(&barrier_mutex, NULL); 51 | 52 | GET_TIME(start); 53 | for (thread = 0; thread < thread_count; thread++) 54 | pthread_create(&thread_handles[thread], NULL, 55 | Thread_work, (void*) thread); 56 | 57 | for (thread = 0; thread < thread_count; thread++) { 58 | pthread_join(thread_handles[thread], NULL); 59 | } 60 | GET_TIME(finish); 61 | printf("Elapsed time = %e seconds\n", finish - start); 62 | 63 | pthread_mutex_destroy(&barrier_mutex); 64 | free(thread_handles); 65 | return 0; 66 | } /* main */ 67 | 68 | 69 | /*-------------------------------------------------------------------- 70 | * Function: Usage 71 | * Purpose: Print command line for function and terminate 72 | * In arg: prog_name 73 | */ 74 | void Usage(char* prog_name) { 75 | 76 | fprintf(stderr, "usage: %s \n", prog_name); 77 | exit(0); 78 | } /* Usage */ 79 | 80 | 81 | /*------------------------------------------------------------------- 82 | * Function: Thread_work 83 | * Purpose: Run BARRIER_COUNT barriers 84 | * In arg: rank 85 | * Global var: thread_count, barrier_thread_counts, barrier_mutex 86 | * Return val: Ignored 87 | */ 88 | void *Thread_work(void* rank) { 89 | # ifdef DEBUG 90 | long my_rank = (long) rank; 91 | # endif 92 | int i; 93 | 94 | for (i = 0; i < BARRIER_COUNT; i++) { 95 | pthread_mutex_lock(&barrier_mutex); 96 | barrier_thread_counts[i]++; 97 | pthread_mutex_unlock(&barrier_mutex); 98 | while (barrier_thread_counts[i] < thread_count); 99 | # ifdef DEBUG 100 | if (my_rank == 0) { 101 | printf("All threads entered barrier %d\n", i); 102 | fflush(stdout); 103 | } 104 | # endif 105 | } 106 | 107 | return NULL; 108 | } /* Thread_work */ 109 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_cond_bar.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * pth_cond_bar.c 3 | * 4 | * Purpose: 5 | * Use condition wait barriers to synchronize threads. 6 | * 7 | * Compile: 8 | * gcc -g -Wall -o pth_cond_bar pth_cond_bar.c -lpthread 9 | * timer.h must be available 10 | * 11 | * Usage: 12 | * ./pth_cond_bar 13 | * 14 | * Input: 15 | * none 16 | * Output: 17 | * Time for BARRIER_COUNT barriers 18 | * 19 | * Note: 20 | * Verbose output can be enabled with the compile flag -DDEBUG 21 | * 22 | * IPP: Section 4.8.3 (pp. 179 and ff.) 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include "timer.h" 29 | 30 | #define BARRIER_COUNT 100 31 | 32 | int thread_count; 33 | int barrier_thread_count = 0; 34 | pthread_mutex_t barrier_mutex; 35 | pthread_cond_t ok_to_proceed; 36 | 37 | void Usage(char* prog_name); 38 | void *Thread_work(void* rank); 39 | 40 | /*--------------------------------------------------------------------*/ 41 | int main(int argc, char* argv[]) { 42 | long thread; 43 | pthread_t* thread_handles; 44 | double start, finish; 45 | 46 | if (argc != 2) 47 | Usage(argv[0]); 48 | thread_count = strtol(argv[1], NULL, 10); 49 | 50 | thread_handles = malloc (thread_count*sizeof(pthread_t)); 51 | pthread_mutex_init(&barrier_mutex, NULL); 52 | pthread_cond_init(&ok_to_proceed, NULL); 53 | 54 | GET_TIME(start); 55 | for (thread = 0; thread < thread_count; thread++) 56 | pthread_create(&thread_handles[thread], NULL, 57 | Thread_work, (void*) thread); 58 | 59 | for (thread = 0; thread < thread_count; thread++) { 60 | pthread_join(thread_handles[thread], NULL); 61 | } 62 | GET_TIME(finish); 63 | printf("Elapsed time = %e seconds\n", finish - start); 64 | 65 | pthread_mutex_destroy(&barrier_mutex); 66 | pthread_cond_destroy(&ok_to_proceed); 67 | free(thread_handles); 68 | return 0; 69 | } /* main */ 70 | 71 | 72 | /*-------------------------------------------------------------------- 73 | * Function: Usage 74 | * Purpose: Print command line for function and terminate 75 | * In arg: prog_name 76 | */ 77 | void Usage(char* prog_name) { 78 | 79 | fprintf(stderr, "usage: %s \n", prog_name); 80 | exit(0); 81 | } /* Usage */ 82 | 83 | 84 | /*------------------------------------------------------------------- 85 | * Function: Thread_work 86 | * Purpose: Run BARRIER_COUNT barriers 87 | * In arg: rank 88 | * Global var: thread_count, barrier_thread_count, barrier_mutex, 89 | * ok_to_proceed 90 | * Return val: Ignored 91 | */ 92 | void *Thread_work(void* rank) { 93 | # ifdef DEBUG 94 | long my_rank = (long) rank; 95 | # endif 96 | int i; 97 | 98 | for (i = 0; i < BARRIER_COUNT; i++) { 99 | pthread_mutex_lock(&barrier_mutex); 100 | barrier_thread_count++; 101 | if (barrier_thread_count == thread_count) { 102 | barrier_thread_count = 0; 103 | # ifdef DEBUG 104 | printf("Thread %ld > Signalling other threads in barrier %d\n", 105 | my_rank, i); 106 | fflush(stdout); 107 | # endif 108 | pthread_cond_broadcast(&ok_to_proceed); 109 | } else { 110 | // Wait unlocks mutex and puts thread to sleep. 111 | // Put wait in while loop in case some other 112 | // event awakens thread. 113 | while (pthread_cond_wait(&ok_to_proceed, 114 | &barrier_mutex) != 0); 115 | // Mutex is relocked at this point. 116 | # ifdef DEBUG 117 | printf("Thread %ld > Awakened in barrier %d\n", my_rank, i); 118 | fflush(stdout); 119 | # endif 120 | } 121 | pthread_mutex_unlock(&barrier_mutex); 122 | # ifdef DEBUG 123 | if (my_rank == 0) { 124 | printf("All threads completed barrier %d\n", i); 125 | fflush(stdout); 126 | } 127 | # endif 128 | } 129 | 130 | return NULL; 131 | } /* Thread_work */ 132 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_do_nothing.c: -------------------------------------------------------------------------------- 1 | /* File: pth_do_nothing.c 2 | * Purpose: Estimate the overhead associated with starting threads. 3 | * 4 | * Compile: gcc -g -Wall -o pth_do_nothing pth_do_nothing.c -lpthreads 5 | * timer.h needs to be available 6 | * Run: ./pth_do_nothing 7 | * 8 | * Input: None 9 | * Output: Time elapsed from starting first thread to joining 10 | * last. 11 | * 12 | * IPP: Section 4.5 (pp. 167 and ff.) 13 | */ 14 | #include 15 | #include 16 | #include 17 | #include "timer.h" 18 | 19 | const int MAX_THREADS = 1024; 20 | 21 | 22 | /* Thread function */ 23 | void *Thread_function(void* ignore); 24 | 25 | /* No use of shared variables */ 26 | void Usage(char* prog_name); 27 | 28 | int main(int argc, char* argv[]) { 29 | int thread_count; 30 | long thread; /* Use long in case of a 64-bit system */ 31 | pthread_t* thread_handles; 32 | double start, finish, elapsed; 33 | 34 | /* Get number of threads from command line */ 35 | if (argc != 2) Usage(argv[0]); 36 | thread_count = strtol(argv[1], NULL, 10); 37 | if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]); 38 | 39 | thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t)); 40 | 41 | GET_TIME(start); 42 | for (thread = 0; thread < thread_count; thread++) 43 | pthread_create(&thread_handles[thread], NULL, 44 | Thread_function, NULL); 45 | 46 | for (thread = 0; thread < thread_count; thread++) 47 | pthread_join(thread_handles[thread], NULL); 48 | GET_TIME(finish); 49 | elapsed = finish - start; 50 | 51 | printf("The elapsed time is %e seconds\n", elapsed); 52 | 53 | free(thread_handles); 54 | return 0; 55 | } /* main */ 56 | 57 | /*-------------------------------------------------------------------*/ 58 | void Usage(char* prog_name) { 59 | fprintf(stderr, "usage: %s \n", prog_name); 60 | fprintf(stderr, "0 < number of threads <= %d\n", MAX_THREADS); 61 | exit(0); 62 | } /* Usage */ 63 | 64 | /*-------------------------------------------------------------------*/ 65 | void* Thread_function(void* ignore) { 66 | return NULL; 67 | } /* Thread_function */ 68 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_hello.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * pth_hello.c 3 | * 4 | * Purpose: 5 | * Illustrate basic use of pthreads: create some threads, 6 | * each of which prints a message. 7 | * 8 | * Input: 9 | * none 10 | * Output: 11 | * message from each thread 12 | * 13 | * Compile: gcc -g -Wall -o pth_hello pth_hello.c -lpthread 14 | * Usage: ./pth_hello 15 | * 16 | * IPP: Section 4.2 (p. 153 and ff.) 17 | */ 18 | #include 19 | #include 20 | #include 21 | 22 | const int MAX_THREADS = 64; 23 | 24 | /* Global variable: accessible to all threads */ 25 | int thread_count; 26 | 27 | void Usage(char* prog_name); 28 | void *Hello(void* rank); /* Thread function */ 29 | 30 | /*--------------------------------------------------------------------*/ 31 | int main(int argc, char* argv[]) { 32 | long thread; /* Use long in case of a 64-bit system */ 33 | pthread_t* thread_handles; 34 | 35 | /* Get number of threads from command line */ 36 | if (argc != 2) Usage(argv[0]); 37 | thread_count = strtol(argv[1], NULL, 10); 38 | if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]); 39 | 40 | thread_handles = malloc (thread_count*sizeof(pthread_t)); 41 | 42 | for (thread = 0; thread < thread_count; thread++) 43 | pthread_create(&thread_handles[thread], NULL, 44 | Hello, (void*) thread); 45 | 46 | printf("Hello from the main thread\n"); 47 | 48 | for (thread = 0; thread < thread_count; thread++) 49 | pthread_join(thread_handles[thread], NULL); 50 | 51 | free(thread_handles); 52 | return 0; 53 | } /* main */ 54 | 55 | /*-------------------------------------------------------------------*/ 56 | void *Hello(void* rank) { 57 | long my_rank = (long) rank; /* Use long in case of 64-bit system */ 58 | 59 | printf("Hello from thread %ld of %d\n", my_rank, thread_count); 60 | 61 | return NULL; 62 | } /* Hello */ 63 | 64 | /*-------------------------------------------------------------------*/ 65 | void Usage(char* prog_name) { 66 | fprintf(stderr, "usage: %s \n", prog_name); 67 | fprintf(stderr, "0 < number of threads <= %d\n", MAX_THREADS); 68 | exit(0); 69 | } /* Usage */ 70 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_msg.c: -------------------------------------------------------------------------------- 1 | /* File: pth_msg.c 2 | * 3 | * Purpose: Illustrate a synchronization problem with pthreads: create 4 | * some threads, each of which creates and prints a message. 5 | * 6 | * Input: none 7 | * Output: message from each thread 8 | * 9 | * Compile: gcc -g -Wall -o pth_msg pth_msg.c -lpthread 10 | * Usage: pth_msg 11 | * 12 | * IPP: Section 4.7 (pp. 172 and ff.) 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | const int MAX_THREADS = 1024; 20 | const int MSG_MAX = 100; 21 | 22 | /* Global variables: accessible to all threads */ 23 | int thread_count; 24 | char** messages; 25 | 26 | void Usage(char* prog_name); 27 | void *Send_msg(void* rank); /* Thread function */ 28 | 29 | /*--------------------------------------------------------------------*/ 30 | int main(int argc, char* argv[]) { 31 | long thread; 32 | pthread_t* thread_handles; 33 | 34 | if (argc != 2) Usage(argv[0]); 35 | thread_count = strtol(argv[1], NULL, 10); 36 | if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]); 37 | 38 | thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t)); 39 | messages = (char**) malloc(thread_count*sizeof(char*)); 40 | for (thread = 0; thread < thread_count; thread++) 41 | messages[thread] = NULL; 42 | 43 | for (thread = 0; thread < thread_count; thread++) 44 | pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL, 45 | Send_msg, (void*) thread); 46 | 47 | for (thread = 0; thread < thread_count; thread++) { 48 | pthread_join(thread_handles[thread], NULL); 49 | } 50 | 51 | for (thread = 0; thread < thread_count; thread++) 52 | free(messages[thread]); 53 | free(messages); 54 | 55 | free(thread_handles); 56 | return 0; 57 | } /* main */ 58 | 59 | 60 | /*-------------------------------------------------------------------- 61 | * Function: Usage 62 | * Purpose: Print command line for function and terminate 63 | * In arg: prog_name 64 | */ 65 | void Usage(char* prog_name) { 66 | 67 | fprintf(stderr, "usage: %s \n", prog_name); 68 | exit(0); 69 | } /* Usage */ 70 | 71 | 72 | /*------------------------------------------------------------------- 73 | * Function: Send_msg 74 | * Purpose: Create a message and ``send'' it by copying it 75 | * into the global messages array. Receive a message 76 | * and print it. 77 | * In arg: rank 78 | * Global in: thread_count 79 | * Global in/out: messages 80 | * Return val: Ignored 81 | * Note: The my_msg buffer is freed in main 82 | */ 83 | void *Send_msg(void* rank) { 84 | long my_rank = (long) rank; 85 | long dest = (my_rank + 1) % thread_count; 86 | long source = (my_rank + thread_count - 1) % thread_count; 87 | char* my_msg = (char*) malloc(MSG_MAX*sizeof(char)); 88 | 89 | sprintf(my_msg, "Hello to %ld from %ld", dest, my_rank); 90 | messages[dest] = my_msg; 91 | 92 | if (messages[my_rank] != NULL) 93 | printf("Thread %ld > %s\n", my_rank, messages[my_rank]); 94 | else 95 | printf("Thread %ld > No message from %ld\n", my_rank, source); 96 | 97 | return NULL; 98 | } /* Send_msg */ 99 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_msg_sem.c: -------------------------------------------------------------------------------- 1 | /* File: pth_msg_sem.c 2 | * 3 | * Purpose: Illustrate a synchronization problem with pthreads: create 4 | * some threads, each of which creates a message and "sends" it 5 | * to another thread, by copying it into that thread's buffer. 6 | * This version uses semaphores to solve the synchronization 7 | * problem. 8 | * 9 | * Input: none 10 | * Output: message from each thread 11 | * 12 | * Compile: gcc -g -Wall -o pth_msg_sem pth_msg_sem.c -lpthread 13 | * Usage: ./pth_msg_sem 14 | * 15 | * Note: MacOS X (as of 10.6) doesn't have a working implementation 16 | * of unnamed semaphores. See pth_msg_sem_mac.c for an 17 | * alternative implementation. 18 | * 19 | * IPP: Section 4.7 (pp. 174 and ff.) 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include /* Semaphores are not part of Pthreads */ 26 | 27 | const int MAX_THREADS = 1024; 28 | const int MSG_MAX = 100; 29 | 30 | /* Global variables: accessible to all threads */ 31 | int thread_count; 32 | char** messages; 33 | sem_t* semaphores; 34 | 35 | void Usage(char* prog_name); 36 | void *Send_msg(void* rank); /* Thread function */ 37 | 38 | /*--------------------------------------------------------------------*/ 39 | int main(int argc, char* argv[]) { 40 | long thread; 41 | pthread_t* thread_handles; 42 | 43 | if (argc != 2) Usage(argv[0]); 44 | thread_count = strtol(argv[1], NULL, 10); 45 | if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]); 46 | 47 | thread_handles = malloc (thread_count*sizeof(pthread_t)); 48 | messages = malloc(thread_count*sizeof(char*)); 49 | semaphores = malloc(thread_count*sizeof(sem_t)); 50 | for (thread = 0; thread < thread_count; thread++) { 51 | messages[thread] = NULL; 52 | /* Initialize all semaphores to 0 -- i.e., locked */ 53 | sem_init(&semaphores[thread], 0, 0); 54 | } 55 | 56 | for (thread = 0; thread < thread_count; thread++) 57 | pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL, 58 | Send_msg, (void*) thread); 59 | 60 | for (thread = 0; thread < thread_count; thread++) { 61 | pthread_join(thread_handles[thread], NULL); 62 | } 63 | 64 | for (thread = 0; thread < thread_count; thread++) { 65 | free(messages[thread]); 66 | sem_destroy(&semaphores[thread]); 67 | } 68 | free(messages); 69 | free(semaphores); 70 | free(thread_handles); 71 | 72 | return 0; 73 | } /* main */ 74 | 75 | 76 | /*-------------------------------------------------------------------- 77 | * Function: Usage 78 | * Purpose: Print command line for function and terminate 79 | * In arg: prog_name 80 | */ 81 | void Usage(char* prog_name) { 82 | 83 | fprintf(stderr, "usage: %s \n", prog_name); 84 | exit(0); 85 | } /* Usage */ 86 | 87 | 88 | /*------------------------------------------------------------------- 89 | * Function: Send_msg 90 | * Purpose: Create a message and ``send'' it by copying it 91 | * into the global messages array. Receive a message 92 | * and print it. 93 | * In arg: rank 94 | * Global in: thread_count 95 | * Global in/out: messages, semaphores 96 | * Return val: Ignored 97 | * Note: The my_msg buffer is freed in main 98 | */ 99 | void *Send_msg(void* rank) { 100 | long my_rank = (long) rank; 101 | long dest = (my_rank + 1) % thread_count; 102 | char* my_msg = (char*) malloc(MSG_MAX*sizeof(char)); 103 | 104 | sprintf(my_msg, "Hello to %ld from %ld", dest, my_rank); 105 | messages[dest] = my_msg; 106 | sem_post(&semaphores[dest]); /* "Unlock" the semaphore of dest */ 107 | 108 | sem_wait(&semaphores[my_rank]); /* Wait for our semaphore to be unlocked */ 109 | printf("Thread %ld > %s\n", my_rank, messages[my_rank]); 110 | 111 | return NULL; 112 | } /* Send_msg */ 113 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_msg_sem_mac.c: -------------------------------------------------------------------------------- 1 | /* File: pth_msg_sem_mac.c 2 | * 3 | * Purpose: Each thread ``sends a message'' to another thread and prints the 4 | * message it receives. This version uses named semaphores, since 5 | * unnamed semaphores aren't available in MacOS X (as of 10.6). 6 | * 7 | * Compile: gcc -g -Wall -o pth_msg_sem_mac pth_msg_sem_mac.c -lpthread 8 | * Usage: ./pth_msg_sem_mac 9 | * 10 | * Input: none 11 | * Output: message from each thread 12 | * 13 | * Note: I'm grateful to Prof Gregory Benson of the University of 14 | * San Francisco for showing me how to use semaphores with 15 | * MacOS X. 16 | * 17 | * IPP: Section 4.7 (pp. 174 and ff.) 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | const int MAX_THREADS = 1024; 27 | const int MSG_MAX = 100; 28 | 29 | /* Global variable: accessible to all threads */ 30 | int thread_count; 31 | char** messages; 32 | char** snames; 33 | sem_t** sems; 34 | 35 | void Usage(char* prog_name); 36 | void *Send_msg(void* rank); /* Thread function */ 37 | 38 | /*--------------------------------------------------------------------*/ 39 | int main(int argc, char* argv[]) { 40 | long thread; 41 | pthread_t* thread_handles; 42 | 43 | if (argc != 2) Usage(argv[0]); 44 | thread_count = strtol(argv[1], NULL, 10); 45 | if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]); 46 | 47 | thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t)); 48 | messages = (char**) malloc(thread_count*sizeof(char*)); 49 | for (thread = 0; thread < thread_count; thread++) 50 | messages[thread] = NULL; 51 | 52 | sems = (sem_t**) malloc (thread_count*sizeof(sem_t *)); 53 | snames = (char **) malloc (thread_count*sizeof(char *)); 54 | 55 | /* Initialize semaphores to 0: they start "locked". So */ 56 | /* executing a sem_wait will block until they're "unlocked". */ 57 | for (thread = 0; thread < thread_count; thread++) { 58 | snames[thread] = malloc(10*sizeof(char)); 59 | sprintf(snames[thread], "/sem%ld", thread); 60 | sems[thread] = sem_open(snames[thread], O_CREAT, 0777, 0); 61 | } 62 | 63 | for (thread = 0; thread < thread_count; thread++) 64 | pthread_create(&thread_handles[thread], NULL, 65 | Send_msg, (void*) thread); 66 | 67 | for (thread = 0; thread < thread_count; thread++) { 68 | pthread_join(thread_handles[thread], NULL); 69 | } 70 | 71 | for (thread = 0; thread < thread_count; thread++) { 72 | sem_unlink(snames[thread]); 73 | sem_close(sems[thread]); 74 | free(messages[thread]); 75 | free(snames[thread]); 76 | } 77 | 78 | free(sems); 79 | free(messages); 80 | free(thread_handles); 81 | return 0; 82 | } /* main */ 83 | 84 | 85 | /*-------------------------------------------------------------------- 86 | * Function: Usage 87 | * Purpose: Print command line for function and terminate 88 | * In arg: prog_name 89 | */ 90 | void Usage(char* prog_name) { 91 | 92 | fprintf(stderr, "usage: %s \n", prog_name); 93 | exit(0); 94 | } /* Usage */ 95 | 96 | 97 | /*------------------------------------------------------------------- 98 | * Function: Send_msg 99 | * Purpose: The function started by calls to pthread_create 100 | * In arg: rank 101 | * Global var: thread_count, sems 102 | * Return val: Ignored 103 | */ 104 | void *Send_msg(void* rank) { 105 | long my_rank = (long) rank; 106 | long dest = (my_rank + 1) % thread_count; 107 | long source = (my_rank - 1 + thread_count) % thread_count; 108 | char* my_msg = malloc(MSG_MAX*sizeof(char)); 109 | 110 | sprintf(my_msg, "Hello to %ld from %ld", dest, my_rank); 111 | messages[dest] = my_msg; 112 | /* Notify destination thread that it can proceed */ 113 | sem_post(sems[dest]); 114 | 115 | /* Wait for source thread to say OK */ 116 | sem_wait(sems[my_rank]); 117 | if (messages[my_rank] != NULL) 118 | printf("Thread %ld > %s\n", my_rank, messages[my_rank]); 119 | else 120 | printf("Thread %ld > No message from %ld\n", my_rank, source); 121 | 122 | return NULL; 123 | } /* hello */ 124 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1 -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1.dSYM/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleIdentifier 8 | com.apple.xcode.dsym.pth_pi_busy1 9 | CFBundleInfoDictionaryVersion 10 | 6.0 11 | CFBundlePackageType 12 | dSYM 13 | CFBundleSignature 14 | ???? 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleVersion 18 | 1 19 | 20 | 21 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1.dSYM/Contents/Resources/DWARF/pth_pi_busy1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1.dSYM/Contents/Resources/DWARF/pth_pi_busy1 -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex.dSYM/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleIdentifier 8 | com.apple.xcode.dsym.pth_pi_mutex 9 | CFBundleInfoDictionaryVersion 10 | 6.0 11 | CFBundlePackageType 12 | dSYM 13 | CFBundleSignature 14 | ???? 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleVersion 18 | 1 19 | 20 | 21 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex.dSYM/Contents/Resources/DWARF/pth_pi_mutex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex.dSYM/Contents/Resources/DWARF/pth_pi_mutex -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_sem_bar.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * pth_sem_bar.c 3 | * 4 | * Purpose: 5 | * Use semaphore barriers to synchronize threads. 6 | * 7 | * Input: 8 | * none 9 | * Output: 10 | * Time for BARRIER_COUNT barriers 11 | * 12 | * Compile: 13 | * gcc -g -Wall -o pth_sem_bar pth_sem_bar.c -lpthread 14 | * timer.h needs to be available 15 | * 16 | * Usage: 17 | * ./pth_sem_bar 18 | * 19 | * Note: 20 | * Setting compile flag -DDEBUG will cause a message to be 21 | * printed after completion of each barrier. 22 | * 23 | * IPP: Section 4.8.2 (pp. 177 and ff.) 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "timer.h" 31 | 32 | #define BARRIER_COUNT 100 33 | 34 | int thread_count; 35 | int counter; 36 | sem_t barrier_sems[BARRIER_COUNT]; 37 | sem_t count_sem; 38 | 39 | void Usage(char* prog_name); 40 | void *Thread_work(void* rank); 41 | 42 | /*--------------------------------------------------------------------*/ 43 | int main(int argc, char* argv[]) { 44 | long thread, i; 45 | pthread_t* thread_handles; 46 | double start, finish; 47 | 48 | if (argc != 2) 49 | Usage(argv[0]); 50 | thread_count = strtol(argv[1], NULL, 10); 51 | 52 | thread_handles = malloc (thread_count*sizeof(pthread_t)); 53 | for (i = 0; i < BARRIER_COUNT; i++) 54 | sem_init(&barrier_sems[i], 0, 0); 55 | sem_init(&count_sem, 0, 1); 56 | 57 | GET_TIME(start); 58 | for (thread = 0; thread < thread_count; thread++) 59 | pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL, 60 | Thread_work, (void*) thread); 61 | 62 | for (thread = 0; thread < thread_count; thread++) { 63 | pthread_join(thread_handles[thread], NULL); 64 | } 65 | GET_TIME(finish); 66 | printf("Elapsed time = %e seconds\n", finish - start); 67 | 68 | sem_destroy(&count_sem); 69 | for (i = 0; i < BARRIER_COUNT; i++) 70 | sem_destroy(&barrier_sems[i]); 71 | free(thread_handles); 72 | return 0; 73 | } /* main */ 74 | 75 | 76 | /*-------------------------------------------------------------------- 77 | * Function: Usage 78 | * Purpose: Print command line for function and terminate 79 | * In arg: prog_name 80 | */ 81 | void Usage(char* prog_name) { 82 | 83 | fprintf(stderr, "usage: %s \n", prog_name); 84 | exit(0); 85 | } /* Usage */ 86 | 87 | 88 | /*------------------------------------------------------------------- 89 | * Function: Thread_work 90 | * Purpose: Run BARRIER_COUNT barriers 91 | * In arg: rank 92 | * Global var: thread_count, count, barrier_sems, count_sem 93 | * Return val: Ignored 94 | */ 95 | void *Thread_work(void* rank) { 96 | # ifdef DEBUG 97 | long my_rank = (long) rank; 98 | # endif 99 | int i, j; 100 | 101 | for (i = 0; i < BARRIER_COUNT; i++) { 102 | sem_wait(&count_sem); 103 | if (counter == thread_count - 1) { 104 | counter = 0; 105 | sem_post(&count_sem); 106 | for (j = 0; j < thread_count-1; j++) 107 | sem_post(&barrier_sems[i]); 108 | } else { 109 | counter++; 110 | sem_post(&count_sem); 111 | sem_wait(&barrier_sems[i]); 112 | } 113 | # ifdef DEBUG 114 | if (my_rank == 0) { 115 | printf("All threads completed barrier %d\n", i); 116 | fflush(stdout); 117 | } 118 | # endif 119 | } 120 | 121 | return NULL; 122 | } /* Thread_work */ 123 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_tokenize.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * pth_tokenize.c 3 | * 4 | * Purpose: 5 | * Try to use threads to tokenize text input. Illustrate problems 6 | * with function that isn't threadsafe. 7 | * 8 | * Warning: 9 | * This program definitely has problems. 10 | * 11 | * Input: 12 | * Lines of text 13 | * Output: 14 | * For each line of input: 15 | * the line read by the program, and the tokens identified by 16 | * strtok 17 | * 18 | * Compile: 19 | * gcc -g -Wall -o pth_tokenize pth_tokenize.c -lpthread 20 | * Usage: 21 | * pth_tokenize < 22 | * 23 | * Algorithm: 24 | * For each line of input, next thread reads the line and 25 | * "tokenizes" it. 26 | * 27 | * IPP: Section 4.11 (pp. 195 and ff.) 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | const int MAX = 1000; 37 | 38 | int thread_count; 39 | sem_t* sems; 40 | 41 | void Usage(char* prog_name); 42 | void *Tokenize(void* rank); /* Thread function */ 43 | 44 | /*--------------------------------------------------------------------*/ 45 | int main(int argc, char* argv[]) { 46 | long thread; 47 | pthread_t* thread_handles; 48 | 49 | if (argc != 2) 50 | Usage(argv[0]); 51 | thread_count = atoi(argv[1]); 52 | 53 | thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t)); 54 | sems = (sem_t*) malloc(thread_count*sizeof(sem_t)); 55 | // sems[0] should be unlocked, the others should be locked 56 | sem_init(&sems[0], 0, 1); 57 | for (thread = 1; thread < thread_count; thread++) 58 | sem_init(&sems[thread], 0, 0); 59 | 60 | printf("Enter text\n"); 61 | for (thread = 0; thread < thread_count; thread++) 62 | pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL, 63 | Tokenize, (void*) thread); 64 | 65 | for (thread = 0; thread < thread_count; thread++) { 66 | pthread_join(thread_handles[thread], NULL); 67 | } 68 | 69 | for (thread=0; thread < thread_count; thread++) 70 | sem_destroy(&sems[thread]); 71 | 72 | free(sems); 73 | free(thread_handles); 74 | return 0; 75 | } /* main */ 76 | 77 | 78 | /*-------------------------------------------------------------------- 79 | * Function: Usage 80 | * Purpose: Print command line for function and terminate 81 | * In arg: prog_name 82 | */ 83 | void Usage(char* prog_name) { 84 | 85 | fprintf(stderr, "usage: %s \n", prog_name); 86 | exit(0); 87 | } /* Usage */ 88 | 89 | 90 | /*------------------------------------------------------------------- 91 | * Function: Tokenize 92 | * Purpose: Tokenize lines of input 93 | * In arg: rank 94 | * Global vars: thread_count (in), sems (in/out) 95 | * Return val: Ignored 96 | */ 97 | void *Tokenize(void* rank) { 98 | long my_rank = (long) rank; 99 | int count; 100 | int next = (my_rank + 1) % thread_count; 101 | char *fg_rv; 102 | char my_line[MAX]; 103 | char *my_string; 104 | 105 | /* Force sequential reading of the input */ 106 | sem_wait(&sems[my_rank]); 107 | fg_rv = fgets(my_line, MAX, stdin); 108 | sem_post(&sems[next]); 109 | while (fg_rv != NULL) { 110 | printf("Thread %ld > my line = %s", my_rank, my_line); 111 | 112 | count = 0; 113 | my_string = strtok(my_line, " \t\n"); 114 | while ( my_string != NULL ) { 115 | count++; 116 | printf("Thread %ld > string %d = %s\n", my_rank, count, my_string); 117 | my_string = strtok(NULL, " \t\n"); 118 | } 119 | if (my_line != NULL) printf("Thread %ld > After tokenizing, my_line = %s\n", 120 | my_rank, my_line); 121 | 122 | sem_wait(&sems[my_rank]); 123 | fg_rv = fgets(my_line, MAX, stdin); 124 | sem_post(&sems[next]); 125 | } 126 | 127 | return NULL; 128 | } /* Tokenize */ 129 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_tokenize_r.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * pth_tokenize_r.c 3 | * 4 | * Purpose: 5 | * Use threads to tokenize text input. Fix original version 6 | * which wasn't threadsafe. 7 | * 8 | * Input: 9 | * Lines of text 10 | * Output: 11 | * For each line of input: 12 | * the line read by the program, and the tokens identified by 13 | * strtok_r 14 | * 15 | * Compile: 16 | * gcc -g -Wall -o pth_tokenize_r pth_tokenize_r.c -lpthread 17 | * Usage: 18 | * pth_tokenize_r < 19 | * 20 | * Algorithm: 21 | * For each line of input, next thread reads the line and 22 | * "tokenizes" it. 23 | * 24 | * IPP: Section 4.11 (pp. 197 and ff.) 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | const int MAX = 1000; 34 | 35 | int thread_count; 36 | sem_t* sems; 37 | 38 | void Usage(char* prog_name); 39 | void *Tokenize(void* rank); /* Thread function */ 40 | 41 | /*--------------------------------------------------------------------*/ 42 | int main(int argc, char* argv[]) { 43 | long thread; 44 | pthread_t* thread_handles; 45 | 46 | if (argc != 2) 47 | Usage(argv[0]); 48 | thread_count = atoi(argv[1]); 49 | 50 | thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t)); 51 | sems = (sem_t*) malloc(thread_count*sizeof(sem_t)); 52 | // sems[0] should be unlocked, the others should be locked 53 | sem_init(&sems[0], 0, 1); 54 | for (thread = 1; thread < thread_count; thread++) 55 | sem_init(&sems[thread], 0, 0); 56 | 57 | printf("Enter text\n"); 58 | for (thread = 0; thread < thread_count; thread++) 59 | pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL, 60 | Tokenize, (void*) thread); 61 | 62 | for (thread = 0; thread < thread_count; thread++) { 63 | pthread_join(thread_handles[thread], NULL); 64 | } 65 | 66 | for (thread=0; thread < thread_count; thread++) 67 | sem_destroy(&sems[thread]); 68 | 69 | free(sems); 70 | free(thread_handles); 71 | return 0; 72 | } /* main */ 73 | 74 | 75 | /*-------------------------------------------------------------------- 76 | * Function: Usage 77 | * Purpose: Print command line for function and terminate 78 | * In arg: prog_name 79 | */ 80 | void Usage(char* prog_name) { 81 | 82 | fprintf(stderr, "usage: %s \n", prog_name); 83 | exit(0); 84 | } /* Usage */ 85 | 86 | 87 | /*------------------------------------------------------------------- 88 | * Function: Tokenize 89 | * Purpose: Tokenize lines of input 90 | * In arg: rank 91 | * Global vars: thread_count (in), sems (in/out) 92 | * Return val: Ignored 93 | */ 94 | void *Tokenize(void* rank) { 95 | long my_rank = (long) rank; 96 | int count; 97 | int next = (my_rank + 1) % thread_count; 98 | char *fg_rv; 99 | char my_line[MAX]; 100 | char *my_string; 101 | char *saveptr; 102 | 103 | /* Force sequential reading of the input */ 104 | sem_wait(&sems[my_rank]); 105 | fg_rv = fgets(my_line, MAX, stdin); 106 | sem_post(&sems[next]); 107 | while (fg_rv != NULL) { 108 | printf("Thread %ld > my line = %s", my_rank, my_line); 109 | 110 | count = 0; 111 | my_string = strtok_r(my_line, " \t\n", &saveptr); 112 | while ( my_string != NULL ) { 113 | count++; 114 | printf("Thread %ld > string %d = %s\n", my_rank, count, my_string); 115 | my_string = strtok_r(NULL, " \t\n", &saveptr); 116 | } 117 | 118 | sem_wait(&sems[my_rank]); 119 | fg_rv = fgets(my_line, MAX, stdin); 120 | sem_post(&sems[next]); 121 | } 122 | 123 | return NULL; 124 | } /* Tokenize */ 125 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/timer.h: -------------------------------------------------------------------------------- 1 | /* File: timer.h 2 | * 3 | * Purpose: Define a macro that returns the number of seconds that 4 | * have elapsed since some point in the past. The timer 5 | * should return times with microsecond accuracy. 6 | * 7 | * Note: The argument passed to the GET_TIME macro should be 8 | * a double, *not* a pointer to a double. 9 | * 10 | * Example: 11 | * #include "timer.h" 12 | * . . . 13 | * double start, finish, elapsed; 14 | * . . . 15 | * GET_TIME(start); 16 | * . . . 17 | * Code to be timed 18 | * . . . 19 | * GET_TIME(finish); 20 | * elapsed = finish - start; 21 | * printf("The code to be timed took %e seconds\n", elapsed); 22 | * 23 | * IPP: Section 3.6.1 (pp. 121 and ff.) and Section 6.1.2 (pp. 273 and ff.) 24 | */ 25 | #ifndef _TIMER_H_ 26 | #define _TIMER_H_ 27 | 28 | #include 29 | 30 | /* The argument now should be a double (not a pointer to a double) */ 31 | #define GET_TIME(now) { \ 32 | struct timeval t; \ 33 | gettimeofday(&t, NULL); \ 34 | now = t.tv_sec + t.tv_usec/1000000.0; \ 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/bubble.c: -------------------------------------------------------------------------------- 1 | /* File: bubble.c 2 | * 3 | * Purpose: Use bubble sort to sort a list of ints. 4 | * 5 | * Compile: gcc -g -Wall -o bubble bubble.c 6 | * Usage: bubble 7 | * n: number of elements in list 8 | * 'g': generate list using a random number generator 9 | * 'i': user input list 10 | * 11 | * Input: list (optional) 12 | * Output: sorted list 13 | * 14 | * IPP: Section 3.7.1 (pp. 127 and ff.) and Section 5.6.1 15 | * (pp. 232 and ff.) 16 | */ 17 | #include 18 | #include 19 | 20 | /* For random list, 0 <= keys < RMAX */ 21 | const int RMAX = 100; 22 | 23 | void Usage(char* prog_name); 24 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p); 25 | void Generate_list(int a[], int n); 26 | void Print_list(int a[], int n, char* title); 27 | void Read_list(int a[], int n); 28 | void Bubble_sort(int a[], int n); 29 | 30 | /*-----------------------------------------------------------------*/ 31 | int main(int argc, char* argv[]) { 32 | int n; 33 | char g_i; 34 | int* a; 35 | 36 | Get_args(argc, argv, &n, &g_i); 37 | a = (int*) malloc(n*sizeof(int)); 38 | if (g_i == 'g') { 39 | Generate_list(a, n); 40 | Print_list(a, n, "Before sort"); 41 | } else { 42 | Read_list(a, n); 43 | } 44 | 45 | Bubble_sort(a, n); 46 | 47 | Print_list(a, n, "After sort"); 48 | 49 | free(a); 50 | return 0; 51 | } /* main */ 52 | 53 | 54 | /*----------------------------------------------------------------- 55 | * Function: Usage 56 | * Purpose: Summary of how to run program 57 | */ 58 | void Usage(char* prog_name) { 59 | fprintf(stderr, "usage: %s \n", prog_name); 60 | fprintf(stderr, " n: number of elements in list\n"); 61 | fprintf(stderr, " 'g': generate list using a random number generator\n"); 62 | fprintf(stderr, " 'i': user input list\n"); 63 | } /* Usage */ 64 | 65 | 66 | /*----------------------------------------------------------------- 67 | * Function: Get_args 68 | * Purpose: Get and check command line arguments 69 | * In args: argc, argv 70 | * Out args: n_p, g_i_p 71 | */ 72 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p) { 73 | if (argc != 3 ) { 74 | Usage(argv[0]); 75 | exit(0); 76 | } 77 | *n_p = atoi(argv[1]); 78 | *g_i_p = argv[2][0]; 79 | 80 | if (*n_p <= 0 || (*g_i_p != 'g' && *g_i_p != 'i') ) { 81 | Usage(argv[0]); 82 | exit(0); 83 | } 84 | } /* Get_args */ 85 | 86 | 87 | /*----------------------------------------------------------------- 88 | * Function: Generate_list 89 | * Purpose: Use random number generator to generate list elements 90 | * In args: n 91 | * Out args: a 92 | */ 93 | void Generate_list(int a[], int n) { 94 | int i; 95 | 96 | srandom(0); 97 | for (i = 0; i < n; i++) 98 | a[i] = random() % RMAX; 99 | } /* Generate_list */ 100 | 101 | 102 | /*----------------------------------------------------------------- 103 | * Function: Print_list 104 | * Purpose: Print the elements in the list 105 | * In args: a, n 106 | */ 107 | void Print_list(int a[], int n, char* title) { 108 | int i; 109 | 110 | printf("%s:\n", title); 111 | for (i = 0; i < n; i++) 112 | printf("%d ", a[i]); 113 | printf("\n\n"); 114 | } /* Print_list */ 115 | 116 | 117 | /*----------------------------------------------------------------- 118 | * Function: Read_list 119 | * Purpose: Read elements of list from stdin 120 | * In args: n 121 | * Out args: a 122 | */ 123 | void Read_list(int a[], int n) { 124 | int i; 125 | 126 | printf("Please enter the elements of the list\n"); 127 | for (i = 0; i < n; i++) 128 | scanf("%d", &a[i]); 129 | } /* Read_list */ 130 | 131 | 132 | /*----------------------------------------------------------------- 133 | * Function: Bubble_sort 134 | * Purpose: Sort list using bubble sort 135 | * In args: n 136 | * In/out args: a 137 | */ 138 | void Bubble_sort( 139 | int a[] /* in/out */, 140 | int n /* in */) { 141 | int list_length, i, temp; 142 | 143 | for (list_length = n; list_length >= 2; list_length--) 144 | for (i = 0; i < list_length-1; i++) 145 | if (a[i] > a[i+1]) { 146 | temp = a[i]; 147 | a[i] = a[i+1]; 148 | a[i+1] = temp; 149 | } 150 | 151 | } /* Bubble_sort */ 152 | 153 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_fibo.c: -------------------------------------------------------------------------------- 1 | /* File: omp_fibo.c 2 | * 3 | * Purpose: Try to compute n Fibonacci numbers using OpenMP. Show 4 | * what happens if we try to parallelize a loop 5 | * with dependences among the iterations. The program 6 | * has a serious bug. 7 | * 8 | * Compile: gcc -g -Wall -fopenmp -o omp_fibo omp_fibo.c 9 | * Run: ./omp_fibo 10 | * 11 | * Input: none 12 | * Output: A list of Fibonacci numbers 13 | * 14 | * Note: If your output seems to be OK, try increasing the number of 15 | * threads and/or n. 16 | * 17 | * IPP: Section 5.5.2 (pp. 227 and ff.) 18 | */ 19 | #include 20 | #include 21 | #include 22 | 23 | void Usage(char prog_name[]); 24 | 25 | int main(int argc, char* argv[]) { 26 | int thread_count, n, i; 27 | long long* fibo; 28 | 29 | if (argc != 3) Usage(argv[0]); 30 | thread_count = strtol(argv[1], NULL, 10); 31 | n = strtol(argv[2], NULL, 10); 32 | 33 | fibo = malloc(n*sizeof(long long)); 34 | fibo[0] = fibo[1] = 1; 35 | # pragma omp parallel for num_threads(thread_count) 36 | for (i = 2; i < n; i++) 37 | fibo[i] = fibo[i-1] + fibo[i-2]; 38 | 39 | printf("The first n Fibonacci numbers:\n"); 40 | for (i = 0; i < n; i++) 41 | printf("%d\t%lld\n", i, fibo[i]); 42 | 43 | free(fibo); 44 | return 0; 45 | } /* main */ 46 | 47 | void Usage(char prog_name[]) { 48 | fprintf(stderr, "usage: %s \n", 49 | prog_name); 50 | exit(0); 51 | } /* Usage */ 52 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_hello.c: -------------------------------------------------------------------------------- 1 | /* File: omp_hello.c 2 | * 3 | * Purpose: A parallel hello, world program that uses OpenMP 4 | * 5 | * Compile: gcc -g -Wall -fopenmp -o omp_hello omp_hello.c 6 | * Run: ./omp_hello 7 | * 8 | * Input: none 9 | * Output: A message from each thread 10 | * 11 | * IPP: Section 5.1 (pp. 211 and ff.) 12 | */ 13 | #include 14 | #include 15 | #include 16 | 17 | void Hello(void); /* Thread function */ 18 | 19 | /*--------------------------------------------------------------------*/ 20 | int main(int argc, char* argv[]) { 21 | int thread_count = strtol(argv[1], NULL, 10); 22 | 23 | # pragma omp parallel num_threads(thread_count) 24 | Hello(); 25 | 26 | return 0; 27 | } /* main */ 28 | 29 | /*------------------------------------------------------------------- 30 | * Function: Hello 31 | * Purpose: Thread function that prints message 32 | */ 33 | void Hello(void) { 34 | int my_rank = omp_get_thread_num(); 35 | int thread_count = omp_get_num_threads(); 36 | 37 | printf("Hello from thread %d of %d\n", my_rank, thread_count); 38 | 39 | } /* Hello */ 40 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_hello_err_chk.c: -------------------------------------------------------------------------------- 1 | /* File: omp_hello.c 2 | * 3 | * Purpose: A parallel hello, world program that uses OpenMP 4 | * 5 | * Compile: gcc -g -Wall -fopenmp -o omp_hello omp_hello.c 6 | * Run: ./omp_hello 7 | * 8 | * Input: none 9 | * Output: A message from each thread 10 | * 11 | * Note: This version does some basic error checking: it checks 12 | * the command line argument, and it checks the number of 13 | * threads started by the parallel directive. It also 14 | * checks for availability of OpenMP by testing for the 15 | * _OPENMP macro 16 | * 17 | * IPP: Section 5.1.3 (pp. 215 and ff.) 18 | */ 19 | #include 20 | #include 21 | #ifdef _OPENMP 22 | # include 23 | #endif _OPENMP 24 | 25 | void Usage(char* prog_name); 26 | void Hello(int thread_count); /* Thread function */ 27 | 28 | /*--------------------------------------------------------------------*/ 29 | int main(int argc, char* argv[]) { 30 | int thread_count; 31 | 32 | if (argc != 2) Usage(argv[0]); 33 | thread_count = strtol(argv[1], NULL, 10); 34 | if (thread_count <= 0) Usage(argv[0]); 35 | 36 | # pragma omp parallel num_threads(thread_count) 37 | Hello(thread_count); 38 | 39 | return 0; 40 | } /* main */ 41 | 42 | /*-------------------------------------------------------------------- 43 | * Function: Usage 44 | * Purpose: Print a message indicating how program should be started 45 | * and terminate. 46 | */ 47 | void Usage(char *prog_name) { 48 | fprintf(stderr, "usage: %s \n", prog_name); 49 | fprintf(stderr, " thread_count should be positive\n"); 50 | exit(0); 51 | } /* Usage */ 52 | 53 | /*-------------------------------------------------------------------- 54 | * Function: Hello 55 | * Purpose: Thread function that prints message 56 | */ 57 | void Hello(int thread_count) { 58 | # ifdef _OPENMP 59 | int my_rank = omp_get_thread_num(); 60 | int actual_thread_count = omp_get_num_threads(); 61 | # else 62 | int my_rank = 0; 63 | int actual_thread_count = 1; 64 | # endif 65 | 66 | if (my_rank == 0 && thread_count != actual_thread_count) 67 | fprintf(stderr, "Number of threads started != %d\n", thread_count); 68 | printf("Hello from thread %d of %d\n", my_rank, actual_thread_count); 69 | 70 | } /* Hello */ 71 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_msg/omp_msgps.c: -------------------------------------------------------------------------------- 1 | /* File: omp_msgps.c 2 | * 3 | * Purpose: Simulate message-passing using OpenMP. Uses critical and 4 | * atomic directives to protect critical sections. 5 | * 6 | * Compile: gcc -g -Wall -fopenmp 7 | * -o omp_msgps omp_msgps.c queue.c 8 | * needs queue.h 9 | * Usage: ./omp_msgps 11 | * 12 | * Input: None 13 | * Output: Source, destination and contents of each message received. 14 | * 15 | * Notes: 16 | * 1. DEBUG flag for more verbose output 17 | * 18 | * IPP: Section 5.8.2 (pp. 242 and ff.) 19 | */ 20 | #include 21 | #include 22 | #include 23 | #include "queue.h" 24 | 25 | const int MAX_MSG = 10000; 26 | 27 | void Usage(char* prog_name); 28 | void Send_msg(struct queue_s* msg_queues[], int my_rank, 29 | int thread_count, int msg_number); 30 | void Try_receive(struct queue_s* q_p, int my_rank); 31 | int Done(struct queue_s* q_p, int done_sending, int thread_count); 32 | 33 | /*-------------------------------------------------------------------*/ 34 | int main(int argc, char* argv[]) { 35 | int thread_count; 36 | int send_max; 37 | struct queue_s** msg_queues; 38 | int done_sending = 0; 39 | 40 | if (argc != 3) Usage(argv[0]); 41 | thread_count = strtol(argv[1], NULL, 10); 42 | send_max = strtol(argv[2], NULL, 10); 43 | if (thread_count <= 0 || send_max < 0) Usage(argv[0]); 44 | 45 | msg_queues = malloc(thread_count*sizeof(struct queue_node_s*)); 46 | 47 | # pragma omp parallel num_threads(thread_count) \ 48 | default(none) shared(thread_count, send_max, msg_queues, done_sending) 49 | { 50 | int my_rank = omp_get_thread_num(); 51 | int msg_number; 52 | srandom(my_rank); 53 | msg_queues[my_rank] = Allocate_queue(); 54 | 55 | # pragma omp barrier /* Don't let any threads send messages */ 56 | /* until all queues are constructed */ 57 | 58 | for (msg_number = 0; msg_number < send_max; msg_number++) { 59 | Send_msg(msg_queues, my_rank, thread_count, msg_number); 60 | Try_receive(msg_queues[my_rank], my_rank); 61 | } 62 | # pragma omp atomic 63 | done_sending++; 64 | # ifdef DEBUG 65 | printf("Thread %d > done sending\n", my_rank); 66 | # endif 67 | 68 | while (!Done(msg_queues[my_rank], done_sending, thread_count)) 69 | Try_receive(msg_queues[my_rank], my_rank); 70 | 71 | /* My queue is empty, and everyone is done sending */ 72 | /* So my queue won't be accessed again, and it's OK to free it */ 73 | Free_queue(msg_queues[my_rank]); 74 | free(msg_queues[my_rank]); 75 | } /* omp parallel */ 76 | 77 | free(msg_queues); 78 | return 0; 79 | } /* main */ 80 | 81 | /*-------------------------------------------------------------------*/ 82 | void Usage(char *prog_name) { 83 | fprintf(stderr, "usage: %s \n", 84 | prog_name); 85 | fprintf(stderr, " number of messages = number sent by each thread\n"); 86 | exit(0); 87 | } /* Usage */ 88 | 89 | /*-------------------------------------------------------------------*/ 90 | void Send_msg(struct queue_s* msg_queues[], int my_rank, 91 | int thread_count, int msg_number) { 92 | // int mesg = random() % MAX_MSG; 93 | int mesg = -msg_number; 94 | int dest = random() % thread_count; 95 | # pragma omp critical 96 | Enqueue(msg_queues[dest], my_rank, mesg); 97 | # ifdef DEBUG 98 | printf("Thread %d > sent %d to %d\n", my_rank, mesg, dest); 99 | # endif 100 | } /* Send_msg */ 101 | 102 | /*-------------------------------------------------------------------*/ 103 | void Try_receive(struct queue_s* q_p, int my_rank) { 104 | int src, mesg; 105 | int queue_size = q_p->enqueued - q_p->dequeued; 106 | 107 | if (queue_size == 0) return; 108 | else if (queue_size == 1) 109 | # pragma omp critical 110 | Dequeue(q_p, &src, &mesg); 111 | else 112 | Dequeue(q_p, &src, &mesg); 113 | printf("Thread %d > received %d from %d\n", my_rank, mesg, src); 114 | } /* Try_receive */ 115 | 116 | /*-------------------------------------------------------------------*/ 117 | int Done(struct queue_s* q_p, int done_sending, int thread_count) { 118 | int queue_size = q_p->enqueued - q_p->dequeued; 119 | if (queue_size == 0 && done_sending == thread_count) 120 | return 1; 121 | else 122 | return 0; 123 | } /* Done */ 124 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_msg/queue.h: -------------------------------------------------------------------------------- 1 | /* File: queue.h 2 | * Purpose: Header file for queue.c which implements a queue of messages 3 | * or pairs of ints (source + contents) as a linked list. 4 | */ 5 | #ifndef _QUEUE_H_ 6 | #define _QUEUE_H_ 7 | 8 | struct queue_node_s { 9 | int src; 10 | int mesg; 11 | struct queue_node_s* next_p; 12 | }; 13 | 14 | struct queue_s{ 15 | int enqueued; 16 | int dequeued; 17 | struct queue_node_s* front_p; 18 | struct queue_node_s* tail_p; 19 | }; 20 | 21 | struct queue_s* Allocate_queue(void); 22 | void Free_queue(struct queue_s* q_p); 23 | void Print_queue(struct queue_s* q_p); 24 | void Enqueue(struct queue_s* q_p, int src, int mesg); 25 | int Dequeue(struct queue_s* q_p, int* src_p, int* mesg_p); 26 | int Search(struct queue_s* q_p, int mesg, int* src_p); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_msg/queue_lk.h: -------------------------------------------------------------------------------- 1 | /* File: queue_lk.h 2 | * Purpose: Header file for queue_lk.c, which implements a queue with 3 | * OpenMP locks 4 | */ 5 | #ifndef _QUEUE_LK_H_ 6 | #define _QUEUE_LK_H_ 7 | #include 8 | 9 | struct queue_node_s { 10 | int src; 11 | int mesg; 12 | struct queue_node_s* next_p; 13 | }; 14 | 15 | struct queue_s{ 16 | omp_lock_t lock; 17 | int enqueued; 18 | int dequeued; 19 | struct queue_node_s* front_p; 20 | struct queue_node_s* tail_p; 21 | }; 22 | 23 | struct queue_s* Allocate_queue(void); 24 | void Free_queue(struct queue_s* q_p); 25 | void Print_queue(struct queue_s* q_p); 26 | void Enqueue(struct queue_s* q_p, int src, int mesg); 27 | int Dequeue(struct queue_s* q_p, int* src_p, int* mesg_p); 28 | int Search(struct queue_s* q_p, int mesg, int* src_p); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_pi.c: -------------------------------------------------------------------------------- 1 | /* File: omp_pi.c 2 | * Purpose: Estimate pi using OpenMP and the formula 3 | * 4 | * pi = 4*[1 - 1/3 + 1/5 - 1/7 + 1/9 - . . . ] 5 | * 6 | * Compile: gcc -g -Wall -fopenmp -o omp_pi omp_pi.c -lm 7 | * Run: omp_pi 8 | * thread_count is the number of threads 9 | * n is the number of terms of the series to use 10 | * 11 | * Input: none 12 | * Output: The estimate of pi and the value of pi computed by the 13 | * arctan function in the math library 14 | * 15 | * Notes: 16 | * 1. The radius of convergence is only 1. So the series converges 17 | * *very* slowly. 18 | * 19 | * IPP: Section 5.5.4 (pp. 229 and ff.) 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | void Usage(char* prog_name); 28 | 29 | int main(int argc, char* argv[]) { 30 | long long n, i; 31 | int thread_count; 32 | double factor; 33 | double sum = 0.0; 34 | 35 | if (argc != 3) Usage(argv[0]); 36 | thread_count = strtol(argv[1], NULL, 10); 37 | n = strtoll(argv[2], NULL, 10); 38 | if (thread_count < 1 || n < 1) Usage(argv[0]); 39 | 40 | # pragma omp parallel for num_threads(thread_count) \ 41 | reduction(+: sum) private(factor) 42 | for (i = 0; i < n; i++) { 43 | factor = (i % 2 == 0) ? 1.0 : -1.0; 44 | sum += factor/(2*i+1); 45 | # ifdef DEBUG 46 | printf("Thread %d > i = %lld, my_sum = %f\n", my_rank, i, my_sum); 47 | # endif 48 | } 49 | 50 | sum = 4.0*sum; 51 | printf("With n = %lld terms and %d threads,\n", n, thread_count); 52 | printf(" Our estimate of pi = %.14f\n", sum); 53 | printf(" pi = %.14f\n", 4.0*atan(1.0)); 54 | return 0; 55 | } /* main */ 56 | 57 | /*------------------------------------------------------------------ 58 | * Function: Usage 59 | * Purpose: Print a message explaining how to run the program 60 | * In arg: prog_name 61 | */ 62 | void Usage(char* prog_name) { 63 | fprintf(stderr, "usage: %s \n", prog_name); /* Change */ 64 | fprintf(stderr, " thread_count is the number of threads >= 1\n"); /* Change */ 65 | fprintf(stderr, " n is the number of terms and should be >= 1\n"); 66 | exit(0); 67 | } /* Usage */ 68 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_private.c: -------------------------------------------------------------------------------- 1 | /* File: omp_private.c 2 | * 3 | * Purpose: Print the value of a private variable at the beginning 4 | * of a parallel block and after the end of the block 5 | * 6 | * Compile: gcc -g -Wall -fopenmp -o omp_private omp_private.c 7 | * Run: ./omp_private 8 | * 9 | * Input: none 10 | * Output: Value of int at various points in the program 11 | * 12 | * IPP: Section 5.5.4 (p. 231) 13 | */ 14 | #include 15 | #include 16 | #include 17 | 18 | /*--------------------------------------------------------------------*/ 19 | int main(int argc, char* argv[]) { 20 | int x = 5; 21 | int thread_count = strtol(argv[1], NULL, 10); 22 | 23 | # pragma omp parallel num_threads(thread_count) \ 24 | private(x) 25 | { 26 | int my_rank = omp_get_thread_num(); 27 | printf("Thread %d > before initialization, x = %d\n", 28 | my_rank, x); 29 | x = 2*my_rank + 2; 30 | printf("Thread %d > after initialization, x = %d\n", 31 | my_rank, x); 32 | } 33 | printf("After parallel block, x = %d\n", x); 34 | 35 | return 0; 36 | } /* main */ 37 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_tokenize.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * omp_tokenize.c 3 | * 4 | * Purpose: 5 | * Try to use threads to tokenize text input. Illustrate problems 6 | * with function that isn't threadsafe. This program has a serious 7 | * bug. 8 | * 9 | * Compile: 10 | * gcc -g -Wall -fopenmp -o omp_tokenize omp_tokenize.c 11 | * Usage: 12 | * omp_tokenize < 13 | * 14 | * Input: 15 | * Lines of text 16 | * (Desired) Output: 17 | * For each line of input: 18 | * the line read by the program, and the tokens identified by 19 | * strtok 20 | * 21 | * Algorithm: 22 | * For each line of input, next thread reads the line and 23 | * "tokenizes" it. 24 | * 25 | * IPP: Section 5.10 (pp. 256 and ff.) 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | const int MAX_LINES = 1000; 34 | const int MAX_LINE = 80; 35 | 36 | void Usage(char* prog_name); 37 | void Get_text(char* lines[], int* line_count_p); 38 | void Tokenize(char* lines[], int line_count, int thread_count); 39 | 40 | /*--------------------------------------------------------------------*/ 41 | int main(int argc, char* argv[]) { 42 | int thread_count, i; 43 | char* lines[1000]; 44 | int line_count; 45 | 46 | if (argc != 2) Usage(argv[0]); 47 | thread_count = strtol(argv[1], NULL, 10); 48 | 49 | printf("Enter text\n"); 50 | Get_text(lines, &line_count); 51 | Tokenize(lines, line_count, thread_count); 52 | 53 | for (i = 0; i < line_count; i++) 54 | if (lines[i] != NULL) free(lines[i]); 55 | 56 | return 0; 57 | } /* main */ 58 | 59 | 60 | /*-------------------------------------------------------------------- 61 | * Function: Usage 62 | * Purpose: Print command line for function and terminate 63 | * In arg: prog_name 64 | */ 65 | void Usage(char* prog_name) { 66 | 67 | fprintf(stderr, "usage: %s \n", prog_name); 68 | exit(0); 69 | } /* Usage */ 70 | 71 | /*-------------------------------------------------------------------- 72 | * Function: Get_text 73 | * Purpose: Read text and store as an array of strings, one per line 74 | * of input text 75 | * Out args: lines, line_count_p 76 | */ 77 | void Get_text(char* lines[], int* line_count_p) { 78 | char* line = malloc(MAX_LINE*sizeof(char)); 79 | int i = 0; 80 | char* fg_rv; 81 | 82 | fg_rv = fgets(line, MAX_LINE, stdin); 83 | while (fg_rv != NULL) { 84 | lines[i++] = line; 85 | line = malloc(MAX_LINE*sizeof(char)); 86 | fg_rv = fgets(line, MAX_LINE, stdin); 87 | } 88 | *line_count_p = i; 89 | } /* Get_text */ 90 | 91 | /*------------------------------------------------------------------- 92 | * Function: Tokenize 93 | * Purpose: Tokenize lines of input 94 | * In args: line_count, thread_count 95 | * In/out arg: lines 96 | */ 97 | void Tokenize( 98 | char* lines[] /* in/out */, 99 | int line_count /* in */, 100 | int thread_count /* in */) { 101 | int my_rank, i, j; 102 | char *my_token; 103 | 104 | # pragma omp parallel num_threads(thread_count) \ 105 | default(none) private(my_rank, i, j, my_token) shared(lines, line_count) 106 | { 107 | my_rank = omp_get_thread_num(); 108 | # pragma omp for schedule(static, 1) 109 | for (i = 0; i < line_count; i++) { 110 | printf("Thread %d > line %d = %s", my_rank, i, lines[i]); 111 | j = 0; 112 | my_token = strtok(lines[i], " \t\n"); 113 | while ( my_token != NULL ) { 114 | printf("Thread %d > token %d = %s\n", my_rank, j, my_token); 115 | my_token = strtok(NULL, " \t\n"); 116 | j++; 117 | } 118 | if (lines[i] != NULL) 119 | printf("Thread %d > After tokenizing, my line = %s\n", 120 | my_rank, lines[i]); 121 | } /* for i */ 122 | } /* omp parallel */ 123 | 124 | } /* Tokenize */ 125 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_tokenize_r.c: -------------------------------------------------------------------------------- 1 | /* File: 2 | * omp_tokenize_r.c 3 | * 4 | * Purpose: 5 | * Try to use threads to tokenize text input. This version 6 | * uses the thread safe tokenizer strtok_r. 7 | * 8 | * Compile: 9 | * gcc -g -Wall -fopenmp -o omp_tokenize_r omp_tokenize_r.c 10 | * Usage: 11 | * omp_tokenize_r < 12 | * 13 | * Input: 14 | * Lines of text 15 | * Output: 16 | * For each line of input: 17 | * the line read by the program, and the tokens identified by 18 | * strtok 19 | * 20 | * Algorithm: 21 | * For each line of input, next thread reads the line and 22 | * "tokenizes" it. 23 | * 24 | * IPP: Section 5.10 (p. 258) 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | const int MAX_LINES = 1000; 33 | const int MAX_LINE = 80; 34 | 35 | void Usage(char* prog_name); 36 | void Get_text(char* lines[], int* line_count_p); 37 | void Tokenize(char* lines[], int line_count, int thread_count); 38 | 39 | /*--------------------------------------------------------------------*/ 40 | int main(int argc, char* argv[]) { 41 | int thread_count, i; 42 | char* lines[1000]; 43 | int line_count; 44 | 45 | if (argc != 2) Usage(argv[0]); 46 | thread_count = strtol(argv[1], NULL, 10); 47 | 48 | printf("Enter text\n"); 49 | Get_text(lines, &line_count); 50 | Tokenize(lines, line_count, thread_count); 51 | 52 | for (i = 0; i < line_count; i++) 53 | if (lines[i] != NULL) free(lines[i]); 54 | 55 | return 0; 56 | } /* main */ 57 | 58 | 59 | /*-------------------------------------------------------------------- 60 | * Function: Usage 61 | * Purpose: Print command line for function and terminate 62 | * In arg: prog_name 63 | */ 64 | void Usage(char* prog_name) { 65 | 66 | fprintf(stderr, "usage: %s \n", prog_name); 67 | exit(0); 68 | } /* Usage */ 69 | 70 | /*-------------------------------------------------------------------- 71 | * Function: Get_text 72 | * Purpose: Read text and store as an array of strings, one per line 73 | * of input text 74 | * Out args: lines, line_count_p 75 | */ 76 | void Get_text(char* lines[], int* line_count_p) { 77 | char* line = malloc(MAX_LINE*sizeof(char)); 78 | int i = 0; 79 | char* fg_rv; 80 | 81 | fg_rv = fgets(line, MAX_LINE, stdin); 82 | while (fg_rv != NULL) { 83 | lines[i++] = line; 84 | line = malloc(MAX_LINE*sizeof(char)); 85 | fg_rv = fgets(line, MAX_LINE, stdin); 86 | } 87 | *line_count_p = i; 88 | } /* Get_text */ 89 | 90 | /*------------------------------------------------------------------- 91 | * Function: Tokenize 92 | * Purpose: Tokenize lines of input 93 | * In args: line_count, thread_count 94 | * In/out arg: lines 95 | */ 96 | void Tokenize( 97 | char* lines[] /* in/out */, 98 | int line_count /* in */, 99 | int thread_count /* in */) { 100 | int my_rank, i, j; 101 | char *my_token, *saveptr; 102 | 103 | # pragma omp parallel num_threads(thread_count) \ 104 | default(none) private(my_rank, i, j, my_token, saveptr) \ 105 | shared(lines, line_count) 106 | { 107 | my_rank = omp_get_thread_num(); 108 | # pragma omp for schedule(static, 1) 109 | for (i = 0; i < line_count; i++) { 110 | printf("Thread %d > line %d = %s", my_rank, i, lines[i]); 111 | j = 0; 112 | my_token = strtok_r(lines[i], " \t\n", &saveptr); 113 | while ( my_token != NULL ) { 114 | printf("Thread %d > token %d = %s\n", my_rank, j, my_token); 115 | my_token = strtok_r(NULL, " \t\n", &saveptr); 116 | j++; 117 | } 118 | if (lines[i] != NULL) 119 | printf("Thread %d > After tokenizing, my line = %s\n", 120 | my_rank, lines[i]); 121 | } /* for i */ 122 | } /* omp parallel */ 123 | 124 | } /* Tokenize */ 125 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap1.c: -------------------------------------------------------------------------------- 1 | /* File: omp_trap1.c 2 | * Purpose: Estimate definite integral (or area under curve) using trapezoidal 3 | * rule. 4 | * 5 | * Input: a, b, n 6 | * Output: estimate of integral from a to b of f(x) 7 | * using n trapezoids. 8 | * 9 | * Compile: gcc -g -Wall -fopenmp -o omp_trap1 omp_trap1.c 10 | * Usage: ./omp_trap1 11 | * 12 | * Notes: 13 | * 1. The function f(x) is hardwired. 14 | * 2. In this version, each thread explicitly computes the integral 15 | * over its assigned subinterval, a critical directive is used 16 | * for the global sum. 17 | * 3. This version assumes that n is evenly divisible by the 18 | * number of threads 19 | * 20 | * IPP: Section 5.2.1 (pp. 216 and ff.) 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | void Usage(char* prog_name); 29 | double f(double x); /* Function we're integrating */ 30 | void Trap(double a, double b, int n, double* global_result_p); 31 | 32 | int main(int argc, char* argv[]) { 33 | double global_result = 0.0; /* Store result in global_result */ 34 | double a, b; /* Left and right endpoints */ 35 | int n; /* Total number of trapezoids */ 36 | int thread_count; 37 | 38 | if (argc != 2) Usage(argv[0]); 39 | thread_count = strtol(argv[1], NULL, 10); 40 | printf("Enter a, b, and n\n"); 41 | scanf("%lf %lf %d", &a, &b, &n); 42 | if (n % thread_count != 0) Usage(argv[0]); 43 | # pragma omp parallel num_threads(thread_count) 44 | Trap(a, b, n, &global_result); 45 | 46 | printf("With n = %d trapezoids, our estimate\n", n); 47 | printf("of the integral from %f to %f = %.14e\n", 48 | a, b, global_result); 49 | return 0; 50 | } /* main */ 51 | 52 | /*-------------------------------------------------------------------- 53 | * Function: Usage 54 | * Purpose: Print command line for function and terminate 55 | * In arg: prog_name 56 | */ 57 | void Usage(char* prog_name) { 58 | 59 | fprintf(stderr, "usage: %s \n", prog_name); 60 | fprintf(stderr, " number of trapezoids must be evenly divisible by\n"); 61 | fprintf(stderr, " number of threads\n"); 62 | exit(0); 63 | } /* Usage */ 64 | 65 | /*------------------------------------------------------------------ 66 | * Function: f 67 | * Purpose: Compute value of function to be integrated 68 | * Input arg: x 69 | * Return val: f(x) 70 | */ 71 | double f(double x) { 72 | double return_val; 73 | 74 | return_val = x*x; 75 | return return_val; 76 | } /* f */ 77 | 78 | /*------------------------------------------------------------------ 79 | * Function: Trap 80 | * Purpose: Use trapezoidal rule to estimate definite integral 81 | * Input args: 82 | * a: left endpoint 83 | * b: right endpoint 84 | * n: number of trapezoids 85 | * Output arg: 86 | * integral: estimate of integral from a to b of f(x) 87 | */ 88 | void Trap(double a, double b, int n, double* global_result_p) { 89 | double h, x, my_result; 90 | double local_a, local_b; 91 | int i, local_n; 92 | int my_rank = omp_get_thread_num(); 93 | int thread_count = omp_get_num_threads(); 94 | 95 | h = (b-a)/n; 96 | local_n = n/thread_count; 97 | local_a = a + my_rank*local_n*h; 98 | local_b = local_a + local_n*h; 99 | my_result = (f(local_a) + f(local_b))/2.0; 100 | for (i = 1; i <= local_n-1; i++) { 101 | x = local_a + i*h; 102 | my_result += f(x); 103 | } 104 | my_result = my_result*h; 105 | 106 | # pragma omp critical 107 | *global_result_p += my_result; 108 | } /* Trap */ 109 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap2a.c: -------------------------------------------------------------------------------- 1 | /* File: omp_trap2a.c 2 | * Purpose: Estimate definite integral (or area under curve) using trapezoidal 3 | * rule. This version uses a hand-coded reduction after the function 4 | * call. 5 | * 6 | * Input: a, b, n 7 | * Output: estimate of integral from a to b of f(x) 8 | * using n trapezoids. 9 | * 10 | * Compile: gcc -g -Wall -fopenmp -o omp_trap2a omp_trap2a.c -lm 11 | * Usage: ./omp_trap2a 12 | * 13 | * Notes: 14 | * 1. The function f(x) is hardwired. 15 | * 2. This version assumes that n is evenly divisible by the 16 | * number of threads 17 | * IPP: Section 5.4 (p. 222) 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | void Usage(char* prog_name); 26 | double f(double x); /* Function we're integrating */ 27 | double Local_trap(double a, double b, int n); 28 | 29 | int main(int argc, char* argv[]) { 30 | double global_result; /* Store result in global_result */ 31 | double a, b; /* Left and right endpoints */ 32 | int n; /* Total number of trapezoids */ 33 | int thread_count; 34 | 35 | if (argc != 2) Usage(argv[0]); 36 | thread_count = strtol(argv[1], NULL, 10); 37 | printf("Enter a, b, and n\n"); 38 | scanf("%lf %lf %d", &a, &b, &n); 39 | if (n % thread_count != 0) Usage(argv[0]); 40 | 41 | global_result = 0.0; 42 | # pragma omp parallel num_threads(thread_count) 43 | { 44 | double my_result = 0.0; 45 | my_result += Local_trap(a, b, n); 46 | # pragma omp critical 47 | global_result += my_result; 48 | } 49 | 50 | printf("With n = %d trapezoids, our estimate\n", n); 51 | printf("of the integral from %f to %f = %.14e\n", 52 | a, b, global_result); 53 | return 0; 54 | } /* main */ 55 | 56 | /*-------------------------------------------------------------------- 57 | * Function: Usage 58 | * Purpose: Print command line for function and terminate 59 | * In arg: prog_name 60 | */ 61 | void Usage(char* prog_name) { 62 | 63 | fprintf(stderr, "usage: %s \n", prog_name); 64 | fprintf(stderr, " number of trapezoids must be evenly divisible by\n"); 65 | fprintf(stderr, " number of threads\n"); 66 | exit(0); 67 | } /* Usage */ 68 | 69 | /*------------------------------------------------------------------ 70 | * Function: f 71 | * Purpose: Compute value of function to be integrated 72 | * Input arg: x 73 | * Return val: f(x) 74 | */ 75 | double f(double x) { 76 | double return_val; 77 | 78 | return_val = x*x; 79 | return return_val; 80 | } /* f */ 81 | 82 | /*------------------------------------------------------------------ 83 | * Function: Local_trap 84 | * Purpose: Use trapezoidal rule to estimate part of a definite 85 | * integral 86 | * Input args: 87 | * a: left endpoint 88 | * b: right endpoint 89 | * n: number of trapezoids 90 | * Return val: estimate of integral from local_a to local_b 91 | * 92 | * Note: return value should be added in to an OpenMP 93 | * reduction variable to get estimate of entire 94 | * integral 95 | */ 96 | double Local_trap(double a, double b, int n) { 97 | double h, x, my_result; 98 | double local_a, local_b; 99 | int i, local_n; 100 | int my_rank = omp_get_thread_num(); 101 | int thread_count = omp_get_num_threads(); 102 | 103 | h = (b-a)/n; 104 | local_n = n/thread_count; 105 | local_a = a + my_rank*local_n*h; 106 | local_b = local_a + local_n*h; 107 | my_result = (f(local_a) + f(local_b))/2.0; 108 | for (i = 1; i <= local_n-1; i++) { 109 | x = local_a + i*h; 110 | my_result += f(x); 111 | } 112 | my_result = my_result*h; 113 | 114 | return my_result; 115 | } /* Trap */ 116 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap2b.c: -------------------------------------------------------------------------------- 1 | /* File: omp_trap2b.c 2 | * Purpose: Estimate definite integral (or area under curve) using trapezoidal 3 | * rule. This version uses a reduction clause. 4 | * 5 | * Input: a, b, n 6 | * Output: estimate of integral from a to b of f(x) 7 | * using n trapezoids. 8 | * 9 | * Compile: gcc -g -Wall -fopenmp -o omp_trap2b omp_trap2b.c 10 | * Usage: ./omp_trap2b 11 | * 12 | * Notes: 13 | * 1. The function f(x) is hardwired. 14 | * 2. This version assumes that n is evenly divisible by the 15 | * number of threads 16 | * 17 | * IPP: Section 5.4 (pp. 223 and ff.) 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | void Usage(char* prog_name); 26 | double f(double x); /* Function we're integrating */ 27 | double Local_trap(double a, double b, int n); 28 | 29 | int main(int argc, char* argv[]) { 30 | double global_result = 0.0; /* Store result in global_result */ 31 | double a, b; /* Left and right endpoints */ 32 | int n; /* Total number of trapezoids */ 33 | int thread_count; 34 | 35 | if (argc != 2) Usage(argv[0]); 36 | thread_count = strtol(argv[1], NULL, 10); 37 | printf("Enter a, b, and n\n"); 38 | scanf("%lf %lf %d", &a, &b, &n); 39 | if (n % thread_count != 0) Usage(argv[0]); 40 | 41 | # pragma omp parallel num_threads(thread_count) \ 42 | reduction(+: global_result) 43 | global_result += Local_trap(a, b, n); 44 | 45 | printf("With n = %d trapezoids, our estimate\n", n); 46 | printf("of the integral from %f to %f = %.14e\n", 47 | a, b, global_result); 48 | return 0; 49 | } /* main */ 50 | 51 | /*-------------------------------------------------------------------- 52 | * Function: Usage 53 | * Purpose: Print command line for function and terminate 54 | * In arg: prog_name 55 | */ 56 | void Usage(char* prog_name) { 57 | 58 | fprintf(stderr, "usage: %s \n", prog_name); 59 | fprintf(stderr, " number of trapezoids must be evenly divisible by\n"); 60 | fprintf(stderr, " number of threads\n"); 61 | exit(0); 62 | } /* Usage */ 63 | 64 | /*------------------------------------------------------------------ 65 | * Function: f 66 | * Purpose: Compute value of function to be integrated 67 | * Input arg: x 68 | * Return val: f(x) 69 | */ 70 | double f(double x) { 71 | double return_val; 72 | 73 | return_val = x*x; 74 | return return_val; 75 | } /* f */ 76 | 77 | /*------------------------------------------------------------------ 78 | * Function: Local_trap 79 | * Purpose: Use trapezoidal rule to estimate part of a definite 80 | * integral 81 | * Input args: 82 | * a: left endpoint 83 | * b: right endpoint 84 | * n: number of trapezoids 85 | * Return val: estimate of integral from local_a to local_b 86 | * 87 | * Note: return value should be added in to an OpenMP 88 | * reduction variable to get estimate of entire 89 | * integral 90 | */ 91 | double Local_trap(double a, double b, int n) { 92 | double h, x, my_result; 93 | double local_a, local_b; 94 | int i, local_n; 95 | int my_rank = omp_get_thread_num(); 96 | int thread_count = omp_get_num_threads(); 97 | 98 | h = (b-a)/n; 99 | local_n = n/thread_count; 100 | local_a = a + my_rank*local_n*h; 101 | local_b = local_a + local_n*h; 102 | my_result = (f(local_a) + f(local_b))/2.0; 103 | for (i = 1; i <= local_n-1; i++) { 104 | x = local_a + i*h; 105 | my_result += f(x); 106 | } 107 | my_result = my_result*h; 108 | 109 | return my_result; 110 | } /* Trap */ 111 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap3.c: -------------------------------------------------------------------------------- 1 | /* File: omp_trap3.c 2 | * Purpose: Estimate definite integral (or area under curve) using the 3 | * trapezoidal rule. This version uses a parallel for directive 4 | * 5 | * Input: a, b, n 6 | * Output: estimate of integral from a to b of f(x) 7 | * using n trapezoids. 8 | * 9 | * Compile: gcc -g -Wall -fopenmp -o omp_trap3 omp_trap3.c 10 | * Usage: ./omp_trap3 11 | * 12 | * Notes: 13 | * 1. The function f(x) is hardwired. 14 | * 2. In this version, it's not necessary for n to be 15 | * evenly divisible by thread_count. 16 | * 17 | * IPP: Section 5.5 (pp. 224 and ff.) 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | void Usage(char* prog_name); 26 | double f(double x); /* Function we're integrating */ 27 | double Trap(double a, double b, int n, int thread_count); 28 | 29 | int main(int argc, char* argv[]) { 30 | double global_result = 0.0; /* Store result in global_result */ 31 | double a, b; /* Left and right endpoints */ 32 | int n; /* Total number of trapezoids */ 33 | int thread_count; 34 | 35 | if (argc != 2) Usage(argv[0]); 36 | thread_count = strtol(argv[1], NULL, 10); 37 | printf("Enter a, b, and n\n"); 38 | scanf("%lf %lf %d", &a, &b, &n); 39 | 40 | global_result = Trap(a, b, n, thread_count); 41 | 42 | printf("With n = %d trapezoids, our estimate\n", n); 43 | printf("of the integral from %f to %f = %.14e\n", 44 | a, b, global_result); 45 | return 0; 46 | } /* main */ 47 | 48 | /*-------------------------------------------------------------------- 49 | * Function: Usage 50 | * Purpose: Print command line for function and terminate 51 | * In arg: prog_name 52 | */ 53 | void Usage(char* prog_name) { 54 | 55 | fprintf(stderr, "usage: %s \n", prog_name); 56 | exit(0); 57 | } /* Usage */ 58 | 59 | /*------------------------------------------------------------------ 60 | * Function: f 61 | * Purpose: Compute value of function to be integrated 62 | * Input arg: x 63 | * Return val: f(x) 64 | */ 65 | double f(double x) { 66 | double return_val; 67 | 68 | return_val = x*x; 69 | return return_val; 70 | } /* f */ 71 | 72 | /*------------------------------------------------------------------ 73 | * Function: Trap 74 | * Purpose: Use trapezoidal rule to estimate definite integral 75 | * Input args: 76 | * a: left endpoint 77 | * b: right endpoint 78 | * n: number of trapezoids 79 | * Return val: 80 | * approx: estimate of integral from a to b of f(x) 81 | */ 82 | double Trap(double a, double b, int n, int thread_count) { 83 | double h, approx; 84 | int i; 85 | 86 | h = (b-a)/n; 87 | approx = (f(a) + f(b))/2.0; 88 | # pragma omp parallel for num_threads(thread_count) \ 89 | reduction(+: approx) 90 | for (i = 1; i <= n-1; i++) 91 | approx += f(a + i*h); 92 | approx = h*approx; 93 | 94 | return approx; 95 | } /* Trap */ 96 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/trap.c: -------------------------------------------------------------------------------- 1 | /* File: trap.c 2 | * Purpose: Calculate definite integral using trapezoidal 3 | * rule. 4 | * 5 | * Input: a, b, n 6 | * Output: Estimate of integral from a to b of f(x) 7 | * using n trapezoids. 8 | * 9 | * Compile: gcc -g -Wall -o trap trap.c 10 | * Usage: ./trap 11 | * 12 | * Note: The function f(x) is hardwired. 13 | * 14 | * IPP: Section 3.2.1 (pp. 94 and ff.) and 5.2 (p. 216) 15 | */ 16 | 17 | #include 18 | 19 | double f(double x); /* Function we're integrating */ 20 | double Trap(double a, double b, int n, double h); 21 | 22 | int main(void) { 23 | double integral; /* Store result in integral */ 24 | double a, b; /* Left and right endpoints */ 25 | int n; /* Number of trapezoids */ 26 | double h; /* Height of trapezoids */ 27 | 28 | printf("Enter a, b, and n\n"); 29 | scanf("%lf", &a); 30 | scanf("%lf", &b); 31 | scanf("%d", &n); 32 | 33 | h = (b-a)/n; 34 | integral = Trap(a, b, n, h); 35 | 36 | printf("With n = %d trapezoids, our estimate\n", n); 37 | printf("of the integral from %f to %f = %.15f\n", 38 | a, b, integral); 39 | 40 | return 0; 41 | } /* main */ 42 | 43 | /*------------------------------------------------------------------ 44 | * Function: Trap 45 | * Purpose: Estimate integral from a to b of f using trap rule and 46 | * n trapezoids 47 | * Input args: a, b, n, h 48 | * Return val: Estimate of the integral 49 | */ 50 | double Trap(double a, double b, int n, double h) { 51 | double integral; 52 | int k; 53 | 54 | integral = (f(a) + f(b))/2.0; 55 | for (k = 1; k <= n-1; k++) { 56 | integral += f(a+k*h); 57 | } 58 | integral = integral*h; 59 | 60 | return integral; 61 | } /* Trap */ 62 | 63 | /*------------------------------------------------------------------ 64 | * Function: f 65 | * Purpose: Compute value of function to be integrated 66 | * Input args: x 67 | */ 68 | double f(double x) { 69 | double return_val; 70 | 71 | return_val = x*x; 72 | return return_val; 73 | } /* f */ 74 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/frac.h: -------------------------------------------------------------------------------- 1 | /* File: frac.h 2 | * Purpose: Header file for frac.c, which implement common fractions and 3 | * certain operations on common fractions in which the denominator 4 | * is a power of 2 5 | * 6 | * IPP: Section 6.2.12 (pp. 331 and ff.) 7 | */ 8 | #ifndef _FRAC_H_ 9 | #define _FRAC_H_ 10 | 11 | typedef struct { 12 | char* num; // bit array representing numerator 13 | unsigned denom; // base 2 log of denominator 14 | int alloc; // size of bit array 15 | int least_sig_bit; // first nonzero bit 16 | int most_sig_bit; // last nonzero bit 17 | } frac_struct; 18 | typedef frac_struct* frac_t; 19 | 20 | frac_t Alloc_frac(void); 21 | void Free_frac(frac_t frac); 22 | void Add(frac_t frac1, unsigned frac2); 23 | void Left_shift_num(frac_t frac, unsigned b); 24 | void Add_to_num(frac_t frac, unsigned power); 25 | void Reduce(frac_t frac); 26 | void Right_shift_num(frac_t frac, int bits); 27 | void Find_sig_bits(frac_t frac); 28 | int Equals(frac_t frac, unsigned val); 29 | int Equals_bit_array(frac_t frac, unsigned val); 30 | unsigned Convert_num_to_unsigned(frac_t frac); 31 | void Print_frac(frac_t frac, int my_rank, char title[]); 32 | 33 | void Debug_print_frac(frac_t frac); 34 | void Assign(frac_t frac, unsigned num, unsigned denom); 35 | #endif 36 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e: -------------------------------------------------------------------------------- 1 | 17 2 | 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3 | 2 0 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 4 | 2 2 0 2 1 2 2 2 2 2 2 2 2 2 2 2 2 5 | 2 2 2 0 2 1 2 2 2 2 2 2 2 2 2 2 2 6 | 2 2 2 2 0 2 1 2 2 2 2 2 2 2 2 2 2 7 | 2 2 2 2 2 0 2 1 2 2 2 2 2 2 2 2 2 8 | 2 2 2 2 2 2 0 2 1 2 2 2 2 2 2 2 2 9 | 2 2 2 2 2 2 2 0 2 1 2 2 2 2 2 2 2 10 | 2 2 2 2 2 2 2 2 0 2 1 2 2 2 2 2 2 11 | 2 2 2 2 2 2 2 2 2 0 2 1 2 2 2 2 2 12 | 2 2 2 2 2 2 2 2 2 2 0 2 1 2 2 2 2 13 | 2 2 2 2 2 2 2 2 2 2 2 0 2 1 2 2 2 14 | 2 2 2 2 2 2 2 2 2 2 2 2 0 2 1 2 2 15 | 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 1 2 16 | 1 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 17 | 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 0 2 18 | 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 19 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-mpi-dyn-spl20-cut5.out: -------------------------------------------------------------------------------- 1 | Proc 0 > Best tour 0x1002344b0: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 2 | 3 | Cost = 17 4 | Elapsed time = 4.528709e+03 seconds 5 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-mpi-dyn-spl8-cut8.out: -------------------------------------------------------------------------------- 1 | Proc 0 > Best tour 0x100234480: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 2 | 3 | Cost = 17 4 | Elapsed time = 4.594413e+03 seconds 5 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-mpi-stat.out: -------------------------------------------------------------------------------- 1 | Proc 0 > Best tour 0x1002342d0: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 2 | 3 | Cost = 17 4 | Elapsed time = 1.748393e+03 seconds 5 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-pth-dyn-8-spl.out: -------------------------------------------------------------------------------- 1 | Best tour: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 2 | 3 | Cost = 17 4 | Elapsed time = 1.633299e+03 seconds 5 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-pth-stat.out: -------------------------------------------------------------------------------- 1 | Best tour: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 2 | 3 | Cost = 17 4 | Elapsed time = 1.621996e+03 seconds 5 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_dyn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_dyn -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_stat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_stat -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_basic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_basic -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red.dSYM/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleIdentifier 8 | com.apple.xcode.dsym.nbody_red 9 | CFBundleInfoDictionaryVersion 10 | 6.0 11 | CFBundlePackageType 12 | dSYM 13 | CFBundleSignature 14 | ???? 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleVersion 18 | 1 19 | 20 | 21 | -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red.dSYM/Contents/Resources/DWARF/nbody_red: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red.dSYM/Contents/Resources/DWARF/nbody_red -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_dyn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_dyn -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_stat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_stat -------------------------------------------------------------------------------- /AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/timer.h: -------------------------------------------------------------------------------- 1 | /* File: timer.h 2 | * 3 | * Purpose: Define a macro that returns the number of seconds that 4 | * have elapsed since some point in the past. The timer 5 | * should return times with microsecond accuracy. 6 | * 7 | * Note: The argument passed to the GET_TIME macro should be 8 | * a double, *not* a pointer to a double. 9 | * 10 | * Example: 11 | * #include "timer.h" 12 | * . . . 13 | * double start, finish, elapsed; 14 | * . . . 15 | * GET_TIME(start); 16 | * . . . 17 | * Code to be timed 18 | * . . . 19 | * GET_TIME(finish); 20 | * elapsed = finish - start; 21 | * printf("The code to be timed took %e seconds\n", elapsed); 22 | * 23 | * IPP: Section 3.6.1 (pp. 121 and ff.) and Section 6.1.2 (pp. 273 and ff.) 24 | */ 25 | #ifndef _TIMER_H_ 26 | #define _TIMER_H_ 27 | 28 | #include 29 | 30 | /* The argument now should be a double (not a pointer to a double) */ 31 | #define GET_TIME(now) { \ 32 | struct timeval t; \ 33 | gettimeofday(&t, NULL); \ 34 | now = t.tv_sec + t.tv_usec/1000000.0; \ 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /Homeworks/ExampleCodes/Code2/Prime/Solution/performance test/primeMD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code2/Prime/Solution/performance test/primeMD -------------------------------------------------------------------------------- /Homeworks/ExampleCodes/Code2/Prime/Solution/performance test/result.txt: -------------------------------------------------------------------------------- 1 | // condition 2 | 3 | 4 | // define parameters 5 | #define NANO 1000000000 6 | #define Max_Thread_Num 256 // define using how many threads 7 | #define MAXIMUM 0x7fffffffffffffff 8 | #define BLOCK_SIZE 65536 9 | 10 | 11 | // global vars 12 | long int n = 30000000; // how many prime number 13 | 14 | 15 | // result 16 | //---------------------------------------------------------------------------------------------------------------- 17 | 18 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ icpc -lrt -lpthread primeMD.cpp -o primeMD 19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 20 | serial: found 1857859 primes cost = 9.1509818700 21 | mtx : found 2380569 primes cost = 1.3733918010 speedup = 6.663053 22 | atomic: found 2022701 primes cost = 1.8234812670 speedup = 5.018413 23 | dup : found 2191429 primes cost = 1.2591722410 speedup = 7.267458 24 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 25 | serial: found 1857859 primes cost = 9.1485292310 26 | mtx : found 2341218 primes cost = 1.2717440140 speedup = 7.193688 27 | atomic: found 2213863 primes cost = 1.2649233010 speedup = 7.232477 28 | dup : found 2365562 primes cost = 1.2777124400 speedup = 7.160085 29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 30 | serial: found 1857859 primes cost = 9.1639542380 31 | mtx : found 2377058 primes cost = 1.2748670420 speedup = 7.188165 32 | atomic: found 2206272 primes cost = 1.2579527460 speedup = 7.284816 33 | dup : found 2155165 primes cost = 1.9573085450 speedup = 4.681916 34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 35 | serial: found 1857859 primes cost = 9.1582943440 36 | mtx : found 2265501 primes cost = 1.3116203400 speedup = 6.982428 37 | atomic: found 2196891 primes cost = 1.2740057870 speedup = 7.188581 38 | dup : found 2013231 primes cost = 1.8049689870 speedup = 5.073934 39 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 40 | serial: found 1857859 primes cost = 9.1469963760 41 | mtx : found 2294324 primes cost = 1.3071436100 speedup = 6.997698 42 | atomic: found 2006163 primes cost = 1.7818589700 speedup = 5.133401 43 | dup : found 2375195 primes cost = 1.3314744890 speedup = 6.869825 44 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 45 | serial: found 1857859 primes cost = 9.1476415140 46 | mtx : found 2206209 primes cost = 1.2591002780 speedup = 7.265221 47 | atomic: found 2216222 primes cost = 1.2994178030 speedup = 7.039800 48 | dup : found 2359980 primes cost = 1.2721454700 speedup = 7.190720 49 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 50 | serial: found 1857859 primes cost = 9.1980578450 51 | mtx : found 2374529 primes cost = 1.3017162810 speedup = 7.066100 52 | atomic: found 2208447 primes cost = 1.3069308060 speedup = 7.037907 53 | dup : found 2072645 primes cost = 1.8827981040 speedup = 4.885313 54 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 55 | serial: found 1857859 primes cost = 9.1652981570 56 | mtx : found 2213662 primes cost = 1.2680693800 speedup = 7.227758 57 | atomic: found 2188975 primes cost = 1.3110168430 speedup = 6.990984 58 | dup : found 2173835 primes cost = 2.0233755280 speedup = 4.529707 59 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ 60 | 61 | 62 | -------------------------------------------------------------------------------- /Homeworks/ExampleCodes/Code2/Prime/Solution/result verify/primeMD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code2/Prime/Solution/result verify/primeMD -------------------------------------------------------------------------------- /Homeworks/ExampleCodes/Code2/Prime/source code/prime: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code2/Prime/source code/prime -------------------------------------------------------------------------------- /Homeworks/ExampleCodes/Code2/Prime/source code/testResult.txt: -------------------------------------------------------------------------------- 1 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ icpc -lrt -lpthread prime.cpp -o prime 2 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 3 | serial: found 1857859 primes cost = 9.1461615650 4 | mtx : found 1857859 primes cost = 1.8300433560 speedup = 4.997784 5 | atomic: found 1857859 primes cost = 1.8292944590 speedup = 4.999830 6 | dup : found 1857859 primes cost = 1.8282121360 speedup = 5.002790 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 8 | serial: found 1857859 primes cost = 9.1455496690 9 | mtx : found 1857859 primes cost = 1.8180072810 speedup = 5.030535 10 | atomic: found 1857859 primes cost = 1.8124327890 speedup = 5.046008 11 | dup : found 1857859 primes cost = 1.8262520770 speedup = 5.007824 12 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 13 | serial: found 1857859 primes cost = 9.1607252410 14 | mtx : found 1857859 primes cost = 1.8387578680 speedup = 4.982018 15 | atomic: found 1857859 primes cost = 1.8407987380 speedup = 4.976495 16 | dup : found 1857859 primes cost = 1.8388292810 speedup = 4.981825 17 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 18 | serial: found 1857859 primes cost = 9.1694271300 19 | mtx : found 1857859 primes cost = 1.7892989500 speedup = 5.124592 20 | atomic: found 1857859 primes cost = 1.8102597470 speedup = 5.065255 21 | dup : found 1857859 primes cost = 1.8344786180 speedup = 4.998383 22 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 23 | serial: found 1857859 primes cost = 9.1465545060 24 | mtx : found 1857859 primes cost = 1.8231061690 speedup = 5.017017 25 | atomic: found 1857859 primes cost = 1.8275519580 speedup = 5.004812 26 | dup : found 1857859 primes cost = 1.8277835940 speedup = 5.004178 27 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 28 | serial: found 1857859 primes cost = 9.1603896740 29 | mtx : found 1857859 primes cost = 1.8346655100 speedup = 4.992948 30 | atomic: found 1857859 primes cost = 1.8453990010 speedup = 4.963907 31 | dup : found 1857859 primes cost = 1.8232074830 speedup = 5.024327 32 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ 33 | 34 | -------------------------------------------------------------------------------- /Homeworks/ExampleCodes/Code3/multiBody.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int BodyNum=0; 7 | int TimeSteps=0; 8 | 9 | int main(int argc, char** argv ) 10 | { 11 | 12 | int n, t, i, j; 13 | double *pBody;//´æ´¢Á£×ӵĻù±¾ÐÅÏ¢£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ4¸öÁ¬ÐøµÄ¸¡µãÊý£ºmass¡¢x¡¢y¡¢z 14 | double *pForce;//´æ´¢Á£×ÓµÄÊÜÁ¦£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ3¸öÁ¬ÐøµÄ¸¡µãÊý£ºFx¡¢Fy¡¢Fz 15 | double fac, fx, fy, fz; 16 | double dx, dy, dz, sq, dist; 17 | clock_t c_start, c_end; 18 | double run_time; 19 | char *pStr; 20 | FILE *fResult; 21 | 22 | for ( i=1; i 13 | #include 14 | #define NXPROB 1000 15 | #define NYPROB 1000 16 | struct Parms 17 | { 18 | float cx; 19 | float cy; 20 | int nts; 21 | } parms = {0.1, 0.1, 50}; 22 | 23 | main() 24 | { 25 | float u[2][NXPROB][NYPROB]; 26 | int ix, iy, iz, it; 27 | void inidat(), prtdat(), update(); 28 | 29 | /************************************************************************ 30 | ** Initialize grid. 31 | *************************************************************************/ 32 | inidat(NXPROB, NYPROB, u); 33 | prtdat(NXPROB, NYPROB, u, "initial.dat"); 34 | for (ix = 0; ix <= NXPROB-1; ix++) 35 | { 36 | u[1][ix][0] = u[0][ix][0]; 37 | u[1][ix][NYPROB-1] = u[0][ix][NYPROB-1]; 38 | } 39 | for (iy = 0; iy <= NYPROB-1; iy++) 40 | { 41 | u[1][0][iy] = u[0][0][iy]; 42 | u[1][NXPROB-1][iy] = u[0][NXPROB-1][iy]; 43 | } 44 | 45 | /*********************************************************************** 46 | ** Iterate over all timesteps. 47 | ************************************************************************/ 48 | iz = 0; 49 | for (it = 1; it <= parms.nts; it++) 50 | { 51 | update(NXPROB, NYPROB, &u[iz][0][0], &u[1-iz][0][0]); 52 | iz = 1 - iz; 53 | } 54 | 55 | prtdat(NXPROB, NYPROB, &u[iz][0][0], "final.dat"); 56 | } 57 | 58 | /**************************************************************************** 59 | * subroutine update 60 | ****************************************************************************/ 61 | void 62 | update(nx, ny, u1, u2) 63 | int nx, ny; 64 | /*float u1[nx][ny], u2[nx][ny];*/ 65 | float *u1, *u2; 66 | { 67 | int ix, iy; 68 | 69 | for (ix = 1; ix <= nx-2; ix++) 70 | { 71 | for (iy = 1; iy <= ny-2; iy++) 72 | { 73 | *(u2+ix*ny+iy) = *(u1+ix*ny+iy) + 74 | parms.cx * (*(u1+(ix+1)*ny+iy) + *(u1+(ix-1)*ny+iy) - 75 | 2.0 * *(u1+ix*ny+iy) ) + 76 | parms.cy * (*(u1+ix*ny+iy+1) + *(u1+ix*ny+iy-1) - 77 | 2.0 * *(u1+ix*ny+iy) ); 78 | } 79 | } 80 | } 81 | 82 | /***************************************************************************** 83 | * subroutine inidat 84 | *****************************************************************************/ 85 | void 86 | inidat(nx, ny, u1) 87 | int nx, ny; 88 | /*float u1[nx][ny];*/ 89 | float *u1; 90 | { 91 | int ix, iy; 92 | 93 | for (ix = 0; ix <= nx-1; ix++) 94 | { 95 | for (iy = 0; iy <= ny-1; iy++) 96 | { 97 | /* u1[ix][iy] = (float)(ix * (nx - ix - 1) * iy * (ny - iy - 1)); */ 98 | *(u1+ix*ny+iy) = (float)(ix * (nx - ix - 1) * iy * (ny - iy - 1)); 99 | } 100 | } 101 | } 102 | 103 | /************************************************************************** 104 | * subroutine prtdat 105 | **************************************************************************/ 106 | void 107 | prtdat(nx, ny, u1, fnam) 108 | int nx, ny; 109 | /*float u1[nx][ny];*/ 110 | float *u1; 111 | char *fnam; 112 | { 113 | int ix, iy; 114 | FILE *fp; 115 | 116 | fp = fopen(fnam, "w"); 117 | for (iy = ny-1; iy >= 0; iy--) 118 | { 119 | for (ix = 0; ix <= nx-1; ix++) 120 | { 121 | fprintf(fp, "%8.3f", *(u1+ix*ny+iy)); 122 | if (ix != nx-1) 123 | { 124 | fprintf(fp, " "); 125 | } 126 | else 127 | { 128 | fprintf(fp, "\n"); 129 | } 130 | } 131 | } 132 | fclose(fp); 133 | printf("Wrote file: %s\n",fnam); 134 | } 135 | -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/code_debug/primeModified: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeModified -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/code_debug/primeTemp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeTemp -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/code_debug/primeTest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeTest -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/code_debug/primeTest2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeTest2 -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/code_debug/sortBucket: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/sortBucket -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/original_codes/prime: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/original_codes/prime -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/original_codes/testResult.txt: -------------------------------------------------------------------------------- 1 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ icpc -lrt -lpthread prime.cpp -o prime 2 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 3 | serial: found 1857859 primes cost = 9.1461615650 4 | mtx : found 1857859 primes cost = 1.8300433560 speedup = 4.997784 5 | atomic: found 1857859 primes cost = 1.8292944590 speedup = 4.999830 6 | dup : found 1857859 primes cost = 1.8282121360 speedup = 5.002790 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 8 | serial: found 1857859 primes cost = 9.1455496690 9 | mtx : found 1857859 primes cost = 1.8180072810 speedup = 5.030535 10 | atomic: found 1857859 primes cost = 1.8124327890 speedup = 5.046008 11 | dup : found 1857859 primes cost = 1.8262520770 speedup = 5.007824 12 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 13 | serial: found 1857859 primes cost = 9.1607252410 14 | mtx : found 1857859 primes cost = 1.8387578680 speedup = 4.982018 15 | atomic: found 1857859 primes cost = 1.8407987380 speedup = 4.976495 16 | dup : found 1857859 primes cost = 1.8388292810 speedup = 4.981825 17 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 18 | serial: found 1857859 primes cost = 9.1694271300 19 | mtx : found 1857859 primes cost = 1.7892989500 speedup = 5.124592 20 | atomic: found 1857859 primes cost = 1.8102597470 speedup = 5.065255 21 | dup : found 1857859 primes cost = 1.8344786180 speedup = 4.998383 22 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 23 | serial: found 1857859 primes cost = 9.1465545060 24 | mtx : found 1857859 primes cost = 1.8231061690 speedup = 5.017017 25 | atomic: found 1857859 primes cost = 1.8275519580 speedup = 5.004812 26 | dup : found 1857859 primes cost = 1.8277835940 speedup = 5.004178 27 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime 28 | serial: found 1857859 primes cost = 9.1603896740 29 | mtx : found 1857859 primes cost = 1.8346655100 speedup = 4.992948 30 | atomic: found 1857859 primes cost = 1.8453990010 speedup = 4.963907 31 | dup : found 1857859 primes cost = 1.8232074830 speedup = 5.024327 32 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ 33 | 34 | -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/solutions/performance_test/primeMD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/solutions/performance_test/primeMD -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/solutions/performance_test/result.txt: -------------------------------------------------------------------------------- 1 | // condition 2 | 3 | 4 | // define parameters 5 | #define NANO 1000000000 6 | #define Max_Thread_Num 256 // define using how many threads 7 | #define MAXIMUM 0x7fffffffffffffff 8 | #define BLOCK_SIZE 65536 9 | 10 | 11 | // global vars 12 | long int n = 30000000; // how many prime number 13 | 14 | 15 | // result 16 | //---------------------------------------------------------------------------------------------------------------- 17 | 18 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ icpc -lrt -lpthread primeMD.cpp -o primeMD 19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 20 | serial: found 1857859 primes cost = 9.1509818700 21 | mtx : found 2380569 primes cost = 1.3733918010 speedup = 6.663053 22 | atomic: found 2022701 primes cost = 1.8234812670 speedup = 5.018413 23 | dup : found 2191429 primes cost = 1.2591722410 speedup = 7.267458 24 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 25 | serial: found 1857859 primes cost = 9.1485292310 26 | mtx : found 2341218 primes cost = 1.2717440140 speedup = 7.193688 27 | atomic: found 2213863 primes cost = 1.2649233010 speedup = 7.232477 28 | dup : found 2365562 primes cost = 1.2777124400 speedup = 7.160085 29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 30 | serial: found 1857859 primes cost = 9.1639542380 31 | mtx : found 2377058 primes cost = 1.2748670420 speedup = 7.188165 32 | atomic: found 2206272 primes cost = 1.2579527460 speedup = 7.284816 33 | dup : found 2155165 primes cost = 1.9573085450 speedup = 4.681916 34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 35 | serial: found 1857859 primes cost = 9.1582943440 36 | mtx : found 2265501 primes cost = 1.3116203400 speedup = 6.982428 37 | atomic: found 2196891 primes cost = 1.2740057870 speedup = 7.188581 38 | dup : found 2013231 primes cost = 1.8049689870 speedup = 5.073934 39 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 40 | serial: found 1857859 primes cost = 9.1469963760 41 | mtx : found 2294324 primes cost = 1.3071436100 speedup = 6.997698 42 | atomic: found 2006163 primes cost = 1.7818589700 speedup = 5.133401 43 | dup : found 2375195 primes cost = 1.3314744890 speedup = 6.869825 44 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 45 | serial: found 1857859 primes cost = 9.1476415140 46 | mtx : found 2206209 primes cost = 1.2591002780 speedup = 7.265221 47 | atomic: found 2216222 primes cost = 1.2994178030 speedup = 7.039800 48 | dup : found 2359980 primes cost = 1.2721454700 speedup = 7.190720 49 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 50 | serial: found 1857859 primes cost = 9.1980578450 51 | mtx : found 2374529 primes cost = 1.3017162810 speedup = 7.066100 52 | atomic: found 2208447 primes cost = 1.3069308060 speedup = 7.037907 53 | dup : found 2072645 primes cost = 1.8827981040 speedup = 4.885313 54 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD 55 | serial: found 1857859 primes cost = 9.1652981570 56 | mtx : found 2213662 primes cost = 1.2680693800 speedup = 7.227758 57 | atomic: found 2188975 primes cost = 1.3110168430 speedup = 6.990984 58 | dup : found 2173835 primes cost = 2.0233755280 speedup = 4.529707 59 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ 60 | 61 | 62 | -------------------------------------------------------------------------------- /Homeworks/Homework_2/prime_number/solutions/result_verify/primeMD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/solutions/result_verify/primeMD -------------------------------------------------------------------------------- /Homeworks/Homework_2/第二次作业.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/第二次作业.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_3/Homework 3: -------------------------------------------------------------------------------- 1 | 作业3 并行算法的性能评估(10月31日) 2 | 3 | 4 | 假设有一个计算问题,其中串行计算量占15%.为实现并行计算,需要增加1.5%的计算量,这部分计算量是不能并行执行的,并且与所使用处理器/执行内核的数量无关. 此外,每个处理器/执行内核在执行并行计算任务的过程中,还需要执行为所承担的并行任务执行一定的额外操作.这些额外操作的计算量是所承担并行任务量的0.1%. 请问 5 | 6 | a) 在一个有M颗处理器/执行内核的计算平台上,并行程序可取得的最大加速比是多少 7 | b) 为了使得并行计算效率至少为70%,M最大可为多少 8 | -------------------------------------------------------------------------------- /Homeworks/Homework_3/第三次作业.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_3/第三次作业.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_4/Homework_4.txt: -------------------------------------------------------------------------------- 1 | 作业 4 Pthreads实现流水并行算法(11月14日) 2 | 已附加文件: 3 | 文件 简化N-body计算的串行程序 (3.158 KB) 4 | N-Body问题是分子化学和天体物理学的基本计算模型.在问题域中有N个粒子,这些粒子之间互相存在万有引力、以及其他作用力,使得这些粒子发生运动. N-Body计算的目的是预测在未来某个时刻,这些粒子所处的状态.请采用pthread,对附件中的简化N-body串行计算程序并行化.请给出并行算法和测试结果. 5 | 6 | 7 | -------------------------------------------------------------------------------- /Homeworks/Homework_4/SourceCodes/multiBody.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int BodyNum=0; 7 | int TimeSteps=0; 8 | 9 | int main(int argc, char** argv ) 10 | { 11 | 12 | int n, t, i, j; 13 | double *pBody;//´æ´¢Á£×ӵĻù±¾ÐÅÏ¢£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ4¸öÁ¬ÐøµÄ¸¡µãÊý£ºmass¡¢x¡¢y¡¢z 14 | double *pForce;//´æ´¢Á£×ÓµÄÊÜÁ¦£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ3¸öÁ¬ÐøµÄ¸¡µãÊý£ºFx¡¢Fy¡¢Fz 15 | double fac, fx, fy, fz; 16 | double dx, dy, dz, sq, dist; 17 | clock_t c_start, c_end; 18 | double run_time; 19 | char *pStr; 20 | FILE *fResult; 21 | 22 | for ( i=1; i 2 | #include 3 | #include 4 | 5 | int main(int argc, char ** argv) 6 | { 7 | MPI_Comm intra_gcomm, inter_lcomm, inter_rcomm, intra_lcomm, intra_rcomm; 8 | MPI_Status status; 9 | int keyid, flag; 10 | int myid, size; 11 | char message[100]; 12 | 13 | MPI_Init( &argc, &argv ); 14 | MPI_Comm_rank( MPI_COMM_WORLD, &myid ); 15 | MPI_Comm_size( MPI_COMM_WORLD, &size ); 16 | 17 | if (myid==0) printf("Total size : %5d\n",size); 18 | //printf("%d\n",myid); 19 | if ( myid > 3*(size/3) - 1 ) 20 | { 21 | keyid = MPI_UNDEFINED; 22 | flag = 0; 23 | } 24 | else 25 | { 26 | keyid = myid % 3; 27 | flag = 1; 28 | } 29 | 30 | MPI_Comm_split( MPI_COMM_WORLD, keyid, myid, &intra_gcomm ); 31 | if ( keyid == 0 ) 32 | { 33 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 1, &inter_lcomm ); 34 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 2, &inter_rcomm ); 35 | MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm ); 36 | MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm ); 37 | printf("keyid -%d\n",myid); 38 | } 39 | else if ( keyid == 1 ) 40 | { 41 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 1, &inter_rcomm ); 42 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 3, &inter_lcomm ); 43 | MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm ); 44 | MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm ); 45 | printf("keyid -%d\n",myid); 46 | } 47 | else if (keyid == 2 ) 48 | { 49 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 3, &inter_lcomm ); 50 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 3, &inter_rcomm ); 51 | MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm ); 52 | MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm ); 53 | printf("keyid -%d\n",myid); 54 | } 55 | 56 | //MPI_Barrier(MPI_COMM_WORLD); 57 | //printf("barrier-myid %d\n",myid); 58 | int rrank, lrank, rsize, lsize, gsize, grank; 59 | if (flag==0) 60 | printf("process %d is excluded !\n", myid); 61 | else if (flag==1) 62 | { 63 | printf("%d\n",myid); 64 | //if (myid==0) printf("1\n"); 65 | MPI_Comm_size( intra_lcomm, &lsize ); 66 | MPI_Comm_rank( intra_lcomm, &lrank ); 67 | //if (myid==0) printf("2\n"); 68 | MPI_Comm_size( intra_rcomm, &rsize ); 69 | MPI_Comm_rank( intra_rcomm, &rrank ); 70 | //if (myid==0) printf("3\n"); 71 | MPI_Comm_size( intra_gcomm, &gsize ); 72 | MPI_Comm_rank( intra_gcomm, &grank ); 73 | 74 | 75 | if ( myid == 0 ) 76 | { 77 | printf( "color myid size lrank lsize rrank rsize grank gsize\n" ); 78 | printf( "%5d %4d %4d %5d %5d %5d %5d %5d %5d\n",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize ); 79 | for ( int i = 1; i < 3*(size/3); i++) 80 | { 81 | //mpi_any_source不会堵塞,否则会堵塞 82 | MPI_Recv( message, 100, MPI_CHAR, MPI_ANY_SOURCE, 10, MPI_COMM_WORLD, &status ); 83 | printf( "%s\n", message ); 84 | } 85 | } 86 | else 87 | { 88 | printf("%d\n",myid); 89 | sprintf( message, "%5d %4d %4d %5d %5d %5d %5d %5d %5d",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize ); 90 | MPI_Send( message, strlen(message)+1, MPI_CHAR, 0, 10, MPI_COMM_WORLD ); 91 | } 92 | } 93 | 94 | if (myid==0) printf("1\n"); 95 | int ndims=2, cart_rank; 96 | int dims[2], periods[2], coords[2]; 97 | MPI_Comm comm_cart, comm_new; 98 | MPI_Comm_split( MPI_COMM_WORLD, flag, myid, &comm_new ); 99 | if (flag==1) 100 | { 101 | dims[0] = 3; 102 | dims[1] = size / 3; 103 | periods[0] = false; 104 | periods[1] = false; 105 | MPI_Cart_create( comm_new, ndims, dims, periods, false, &comm_cart ); 106 | if ( myid == 0 ) 107 | { 108 | for ( int i = 0; i < 3; i++) 109 | { 110 | for(int j = 0; j < size/3; j++) 111 | { 112 | coords[0] = i; 113 | coords[1] = j; 114 | MPI_Cart_rank( comm_cart, coords, &cart_rank ); 115 | printf( "{%d, %d} myid=%d\n", coords[0], coords[1], cart_rank ); 116 | } 117 | } 118 | } 119 | 120 | MPI_Comm_free( &inter_lcomm ); 121 | MPI_Comm_free( &inter_rcomm ); 122 | MPI_Comm_free( &intra_lcomm ); 123 | MPI_Comm_free( &intra_rcomm ); 124 | MPI_Comm_free( &intra_gcomm ); 125 | MPI_Comm_free( &comm_cart ); 126 | MPI_Comm_free( &comm_new ); 127 | } 128 | MPI_Finalize(); 129 | } 130 | 131 | -------------------------------------------------------------------------------- /Homeworks/Homework_6/homework 6.txt: -------------------------------------------------------------------------------- 1 | 1. 作业信息 2 | 3 | 名称: 4 | 作业6 MPI程序的计算资源管理(11月28日) 5 | 说明 6 | 改写PDF文档4.3.1中的示例程序。从mpiexec创建的进程组中,使用MPI_Comm_split()实现示例程序要求的“环”。若mpiexec所创建的进程数不为3的整数倍,则将序号高的进程余留出来不参加“环” 7 | 截止日期 2016年11月28日 下午11时59分00秒 8 | 满分 100 9 | -------------------------------------------------------------------------------- /Homeworks/Homework_7/第七次作业.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_7/第七次作业.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_7/第七次作业.txt: -------------------------------------------------------------------------------- 1 | {\rtf1\ansi\ansicpg936\cocoartf1404\cocoasubrtf470 2 | {\fonttbl\f0\fnil\fcharset0 LucidaGrande;\f1\fnil\fcharset134 PingFangSC-Regular;} 3 | {\colortbl;\red255\green255\blue255;\red154\green154\blue154;\red23\green152\blue185;\red52\green52\blue52; 4 | \red22\green125\blue151;} 5 | {\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{none\}}{\leveltext\leveltemplateid1\'00;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}} 6 | {\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}} 7 | \paperw11900\paperh16840\margl1440\margr1440\vieww20300\viewh10220\viewkind0 8 | \deftab720 9 | \pard\pardeftab720\partightenfactor0 10 | 11 | \f0\b\fs44 \cf2 \expnd0\expndtw0\kerning0 12 | 1. 13 | \fs32 \cf3 \'a0 14 | \f1 \'d7\'f7\'d2\'b5\'d0\'c5\'cf\'a2 15 | \f0 \ 16 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0 17 | \ls1\ilvl0 18 | \b0\fs24 \cf4 \kerning1\expnd0\expndtw0 19 | \f1 \expnd0\expndtw0\kerning0 20 | \'c3\'fb\'b3\'c6 21 | \f0 : \uc0\u8232 22 | \f1 \'d7\'f7\'d2\'b5 23 | \f0 7 MPI 24 | \f1 \'b5\'e3\'b5\'bd\'b5\'e3\'cd\'a8\'d0\'c5\'a3\'a8 25 | \f0 12 26 | \f1 \'d4\'c2 27 | \f0 5 28 | \f1 \'c8\'d5\'a3\'a9 29 | \f0 \cf0 \uc0\u8232 \cf4 \ 30 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0 31 | \ls1\ilvl0\cf4 \kerning1\expnd0\expndtw0 32 | \f1 \expnd0\expndtw0\kerning0 33 | \'cb\'b5\'c3\'f7 34 | \f0 \uc0\u8232 35 | \f1\fs26 \'c0\'fb\'d3\'c3 36 | \f0 MPI 37 | \f1 \'a3\'ac\'b7\'d6\'b1\'f0\'d3\'c3\'d7\'e8\'c8\'fb\'ca\'bd\'cd\'a8\'d0\'c5\'a1\'a2\'b7\'c7\'d7\'e8\'c8\'fb\'ca\'b9\'cd\'a8\'d0\'c5\'d6\'d8\'d0\'c2\'b1\'e0\'c2\'eb\'ca\'b5\'cf\'d6\'d7\'f7\'d2\'b5 38 | \f0 N-body 39 | \f1 \'bc\'c6\'cb\'e3\'ce\'ca\'cc\'e2\'a1\'a3\'d4\'da\'cd\'ac\'d2\'bb\'b8\'f6\'b6\'e0\'b4\'a6\'c0\'ed\'bb\'fa\'cf\'b5\'cd\'b3\'c9\'cf\'a3\'ac\'ca\'b9\'d3\'c3\'b2\'bb\'cd\'ac\'b5\'c4\'ca\'fd\'be\'dd\'b9\'e6\'c4\'a3\'a3\'ac\'b6\'d4\'b1\'c8 40 | \f0 MPI 41 | \f1 \'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'d3\'eb 42 | \f0 pthread 43 | \f1 \'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'b5\'c4\'bc\'d3\'cb\'d9\'b1\'c8\'a3\'ac\'b7\'d6\'ce\'f6\'c6\'e4\'bc\'d3\'cb\'d9\'b1\'c8\'b2\'ee\'d2\'ec\'b5\'c4\'d4\'ad\'d2\'f2\'a1\'a3 44 | \f0 \uc0\u8232 45 | \f1 \'b4\'ae\'d0\'d0\'a1\'a2 46 | \f0 pthread 47 | \f1 \'b2\'a2\'d0\'d0 48 | \f0 N-Body 49 | \f1 \'bc\'c6\'cb\'e3\'b5\'c4\'b2\'ce\'bf\'bc\'ca\'b5\'cf\'d6\'b4\'fa\'c2\'eb{\field{\*\fldinst{HYPERLINK "http://course.pku.edu.cn/bbcswebdav/pid-208694-dt-content-rid-1288381_2/xid-1288381_2"}}{\fldrslt 50 | \f0 \cf5 NBody.cpp}} 51 | \f0 \'a0\uc0\u8232 \u8232 52 | \fs24 \ 53 | \ls1\ilvl0\kerning1\expnd0\expndtw0 54 | \f1 \expnd0\expndtw0\kerning0 55 | \'bd\'d8\'d6\'b9\'c8\'d5\'c6\'da 56 | \f0 \uc0\u8232 2016 57 | \f1 \'c4\'ea 58 | \f0 12 59 | \f1 \'d4\'c2 60 | \f0 5 61 | \f1 \'c8\'d5 62 | \f0 63 | \f1 \'cf\'c2\'ce\'e7 64 | \f0 11 65 | \f1 \'ca\'b1 66 | \f0 59 67 | \f1 \'b7\'d6 68 | \f0 00 69 | \f1 \'c3\'eb 70 | \f0 \ 71 | \ls1\ilvl0\kerning1\expnd0\expndtw0 72 | \f1 \expnd0\expndtw0\kerning0 73 | \'c2\'fa\'b7\'d6 74 | \f0 \uc0\u8232 100} -------------------------------------------------------------------------------- /Homeworks/Homework_8/第八次作业.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_8/第八次作业.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_8/第八次作业.txt: -------------------------------------------------------------------------------- 1 | {\rtf1\ansi\ansicpg936\cocoartf1404\cocoasubrtf470 2 | {\fonttbl\f0\fnil\fcharset0 LucidaGrande;\f1\fnil\fcharset134 PingFangSC-Regular;} 3 | {\colortbl;\red255\green255\blue255;\red154\green154\blue154;\red23\green152\blue185;\red52\green52\blue52; 4 | } 5 | {\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{none\}}{\leveltext\leveltemplateid1\'00;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}} 6 | {\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}} 7 | \paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0 8 | \deftab720 9 | \pard\pardeftab720\partightenfactor0 10 | 11 | \f0\b\fs43\fsmilli21840 \cf2 \expnd0\expndtw0\kerning0 12 | 1. 13 | \fs31\fsmilli15600 \cf3 \'a0 14 | \f1 \'d7\'f7\'d2\'b5\'d0\'c5\'cf\'a2 15 | \f0 \ 16 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0 17 | \ls1\ilvl0 18 | \b0\fs25 \cf4 \kerning1\expnd0\expndtw0 19 | \f1 \expnd0\expndtw0\kerning0 20 | \'c3\'fb\'b3\'c6 21 | \f0 : \uc0\u8232 22 | \f1 \'d7\'f7\'d2\'b5 23 | \f0 8 MPI 24 | \f1 \'b5\'a5\'b1\'df\'cd\'a8\'d0\'c5\'ba\'cd\'b9\'b2\'cf\'ed\'ce\'c4\'bc\'fe\'b7\'c3\'ce\'ca 25 | \f0 (12 26 | \f1 \'d4\'c2 27 | \f0 12 28 | \f1 \'c8\'d5 29 | \f0 )\cf0 \uc0\u8232 \cf4 \ 30 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0 31 | \ls1\ilvl0\cf4 \kerning1\expnd0\expndtw0 32 | \f1 \expnd0\expndtw0\kerning0 33 | \'cb\'b5\'c3\'f7 34 | \f0 \uc0\u8232 35 | \fs26 \'a0 36 | \f1 \'b2\'ce\'bf\'bc 37 | \f0 4.5.1 38 | \f1 \'bd\'da\'d6\'d0 39 | \f0 2D5P 40 | \f1 \'c4\'a3\'b0\'e5\'bc\'c6\'cb\'e3\'b5\'c4 41 | \f0 MPI 42 | \f1 \'b5\'e3\'b6\'d4\'b5\'e3\'cd\'a8\'d0\'c5\'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'a3\'ac\'d2\'d4\'b5\'a5\'b1\'df\'cd\'a8\'d0\'c5\'ca\'b5\'cf\'d6\'d2\'bb\'b8\'f6 43 | \f0 2D5P 44 | \f1 \'c4\'a3\'b0\'e5\'bc\'c6\'cb\'e3\'b5\'c4 45 | \f0 MPI 46 | \f1 \'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'a3\'ac\'b2\'a2\'bd\'ab\'bc\'c6\'cb\'e3\'bd\'e1\'b9\'fb\'ca\'e4\'b3\'f6\'b5\'bd\'d2\'bb\'b8\'f6\'b6\'fe\'bd\'f8\'d6\'c6\'ce\'c4\'bc\'fe\'d6\'d0\'a1\'a3\'be\'d8\'d5\'f3\'d4\'da\'ca\'e4\'b3\'f6\'ce\'c4\'bc\'fe\'d6\'d0\'b0\'b4\'d5\'d5\'d0\'d0\'d3\'c5\'cf\'c8\'b4\'e6\'b4\'a2\'a1\'a3 47 | \f0 \uc0\u8232 48 | \fs25 \ 49 | \ls1\ilvl0\kerning1\expnd0\expndtw0 50 | \f1 \expnd0\expndtw0\kerning0 51 | \'bd\'d8\'d6\'b9\'c8\'d5\'c6\'da 52 | \f0 \uc0\u8232 2016 53 | \f1 \'c4\'ea 54 | \f0 12 55 | \f1 \'d4\'c2 56 | \f0 12 57 | \f1 \'c8\'d5 58 | \f0 59 | \f1 \'cf\'c2\'ce\'e7 60 | \f0 11 61 | \f1 \'ca\'b1 62 | \f0 59 63 | \f1 \'b7\'d6 64 | \f0 00 65 | \f1 \'c3\'eb 66 | \f0 \ 67 | \ls1\ilvl0\kerning1\expnd0\expndtw0 68 | \f1 \expnd0\expndtw0\kerning0 69 | \'c2\'fa\'b7\'d6 70 | \f0 \uc0\u8232 100} -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/01531136.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/01531136.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/24.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/Fox_example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/Fox_example.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/Li-Fall-2012-CSE633.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/Li-Fall-2012-CSE633.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/lawn129.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/lawn129.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/matrixmult.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/matrixmult.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/Documents/ppagerank_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/ppagerank_report.pdf -------------------------------------------------------------------------------- /Homeworks/Homework_9/第九次作业.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/第九次作业.pdf -------------------------------------------------------------------------------- /Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/150*******李师尧-作业七.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/150*******李师尧-作业七.pdf -------------------------------------------------------------------------------- /Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/mpi_process.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char ** argv) 6 | { 7 | MPI_Comm intra_gcomm, inter_lcomm, inter_rcomm, intra_lcomm, intra_rcomm; 8 | MPI_Status status; 9 | int keyid, flag; 10 | int myid, size; 11 | char message[100]; 12 | 13 | MPI_Init( &argc, &argv ); 14 | MPI_Comm_rank( MPI_COMM_WORLD, &myid ); 15 | MPI_Comm_size( MPI_COMM_WORLD, &size ); 16 | 17 | if (myid==0) printf("Total size : %5d\n",size); 18 | //printf("%d\n",myid); 19 | if ( myid > 3*(size/3) - 1 ) 20 | { 21 | keyid = MPI_UNDEFINED; 22 | flag = 0; 23 | } 24 | else 25 | { 26 | keyid = myid % 3; 27 | flag = 1; 28 | } 29 | 30 | MPI_Comm_split( MPI_COMM_WORLD, keyid, myid, &intra_gcomm ); 31 | if ( keyid == 0 ) 32 | { 33 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 1, &inter_lcomm ); 34 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 2, &inter_rcomm ); 35 | MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm ); 36 | MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm ); 37 | printf("keyid -%d\n",myid); 38 | } 39 | else if ( keyid == 1 ) 40 | { 41 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 1, &inter_rcomm ); 42 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 3, &inter_lcomm ); 43 | MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm ); 44 | MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm ); 45 | printf("keyid -%d\n",myid); 46 | } 47 | else if (keyid == 2 ) 48 | { 49 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 3, &inter_lcomm ); 50 | MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 3, &inter_rcomm ); 51 | MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm ); 52 | MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm ); 53 | printf("keyid -%d\n",myid); 54 | } 55 | 56 | //MPI_Barrier(MPI_COMM_WORLD); 57 | //printf("barrier-myid %d\n",myid); 58 | int rrank, lrank, rsize, lsize, gsize, grank; 59 | if (flag==0) 60 | printf("process %d is excluded !\n", myid); 61 | else if (flag==1) 62 | { 63 | printf("%d\n",myid); 64 | //if (myid==0) printf("1\n"); 65 | MPI_Comm_size( intra_lcomm, &lsize ); 66 | MPI_Comm_rank( intra_lcomm, &lrank ); 67 | //if (myid==0) printf("2\n"); 68 | MPI_Comm_size( intra_rcomm, &rsize ); 69 | MPI_Comm_rank( intra_rcomm, &rrank ); 70 | //if (myid==0) printf("3\n"); 71 | MPI_Comm_size( intra_gcomm, &gsize ); 72 | MPI_Comm_rank( intra_gcomm, &grank ); 73 | 74 | 75 | if ( myid == 0 ) 76 | { 77 | printf( "color myid size lrank lsize rrank rsize grank gsize\n" ); 78 | printf( "%5d %4d %4d %5d %5d %5d %5d %5d %5d\n",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize ); 79 | for ( int i = 1; i < 3*(size/3); i++) 80 | { 81 | //mpi_any_source不会堵塞,否则会堵塞 82 | MPI_Recv( message, 100, MPI_CHAR, MPI_ANY_SOURCE, 10, MPI_COMM_WORLD, &status ); 83 | printf( "%s\n", message ); 84 | } 85 | } 86 | else 87 | { 88 | printf("%d\n",myid); 89 | sprintf( message, "%5d %4d %4d %5d %5d %5d %5d %5d %5d",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize ); 90 | MPI_Send( message, strlen(message)+1, MPI_CHAR, 0, 10, MPI_COMM_WORLD ); 91 | } 92 | } 93 | 94 | if (myid==0) printf("1\n"); 95 | int ndims=2, cart_rank; 96 | int dims[2], periods[2], coords[2]; 97 | MPI_Comm comm_cart, comm_new; 98 | MPI_Comm_split( MPI_COMM_WORLD, flag, myid, &comm_new ); 99 | if (flag==1) 100 | { 101 | dims[0] = 3; 102 | dims[1] = size / 3; 103 | periods[0] = false; 104 | periods[1] = false; 105 | MPI_Cart_create( comm_new, ndims, dims, periods, false, &comm_cart ); 106 | if ( myid == 0 ) 107 | { 108 | for ( int i = 0; i < 3; i++) 109 | { 110 | for(int j = 0; j < size/3; j++) 111 | { 112 | coords[0] = i; 113 | coords[1] = j; 114 | MPI_Cart_rank( comm_cart, coords, &cart_rank ); 115 | printf( "{%d, %d} myid=%d\n", coords[0], coords[1], cart_rank ); 116 | } 117 | } 118 | } 119 | 120 | MPI_Comm_free( &inter_lcomm ); 121 | MPI_Comm_free( &inter_rcomm ); 122 | MPI_Comm_free( &intra_lcomm ); 123 | MPI_Comm_free( &intra_rcomm ); 124 | MPI_Comm_free( &intra_gcomm ); 125 | MPI_Comm_free( &comm_cart ); 126 | MPI_Comm_free( &comm_new ); 127 | } 128 | MPI_Finalize(); 129 | } 130 | 131 | -------------------------------------------------------------------------------- /Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业九 FOX并行算法和PageRank算法.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业九 FOX并行算法和PageRank算法.pdf -------------------------------------------------------------------------------- /Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业八150*******李师尧.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业八150*******李师尧.pdf -------------------------------------------------------------------------------- /Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业六150*******李师尧.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业六150*******李师尧.pdf -------------------------------------------------------------------------------- /Homeworks/Yao_Hong_s_Homework/Homework_1/README.md: -------------------------------------------------------------------------------- 1 | # Yao Hong's Parallel Computing Homework 2 | Parallel programming course at Peking University 3 | 4 | ## Contents 5 | 1. Homeworks and Projects 6 | 7 | * xxxxxxx 8 | 9 | * xxxxxxxx 10 | 11 | * xxxxxxxx 12 | 13 | * xxxxxxxx 14 | 15 | * xxxxxxxxx 16 | 17 | * xxxxxxxxx 18 | 19 | 2. xxxxxxxx 20 | 21 | 22 | ## Warranty 23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. 24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR. 25 | 26 | ## Copyright 27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author. 28 | -------------------------------------------------------------------------------- /Homeworks/Yao_Hong_s_Homework/Homework_1/并行程序设计作业-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_1/并行程序设计作业-1.pdf -------------------------------------------------------------------------------- /Homeworks/Yao_Hong_s_Homework/Homework_2/README.md: -------------------------------------------------------------------------------- 1 | # Yao Hong's Parallel Computing Homework 2 | Parallel programming course at Peking University 3 | 4 | ## Contents 5 | 1. Homeworks and Projects 6 | 7 | * xxxxxxx 8 | 9 | * xxxxxxxx 10 | 11 | * xxxxxxxx 12 | 13 | * xxxxxxxx 14 | 15 | * xxxxxxxxx 16 | 17 | * xxxxxxxxx 18 | 19 | 2. xxxxxxxx 20 | 21 | 22 | ## Warranty 23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. 24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR. 25 | 26 | ## Copyright 27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author. 28 | -------------------------------------------------------------------------------- /Homeworks/Yao_Hong_s_Homework/Homework_2/saxpy.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hongyao on 2018/10/15. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "fio.h" 13 | 14 | int64_t n,n_num,sizeofA,sizeofB,single_num; 15 | int32_t thread_num,threadid; 16 | float *A,*B,alfa; 17 | pthread_barrier_t barrier; 18 | 19 | void *worker(void *arg){ 20 | int64_t i; 21 | int myID = __sync_fetch_and_add(&threadid, 1); 22 | for (i=myID*single_num;i<(myID+1)*single_num;i++) *(B+i)=(*(A+i))*alfa+*(B+i); 23 | return (void*)0; 24 | } 25 | 26 | int main(int argc, char *argv[]){ 27 | int i; 28 | thread_num=atoi(argv[1]); // 设置P 29 | n=atoll(argv[2]); // 设置N 30 | n_num=((int64_t)1< 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "fio.h" 9 | 10 | int64_t n,n_num,m,m_num,sizeofA,sizeofB,sizeofC; 11 | int32_t thread_num,threadid; 12 | int64_t *A,*B,*C,*pp,*qq,*qqa; 13 | pthread_barrier_t barrier; 14 | struct SGROUP {int64_t pianduan;int64_t geshu;int64_t locA;}; //pianduan为该片段,geshu为该片段的个数,locA为排序后该片段的首位在A中的位置 15 | struct SGROUP *groupA; 16 | //比较函数指针 17 | int myCompar(const void *arg1,const void *arg2){ 18 | int64_t *pa=(int64_t*)arg1,*pb=(int64_t*)arg2; 19 | return *pa>*pb; 20 | } 21 | //***********************// 22 | //**********子***********// 23 | //**********线***********// 24 | //**********程***********// 25 | //***********************// 26 | void *worker(void *arg) { 27 | int64_t i,j,lb,ub; 28 | int myID = __sync_fetch_and_add(&threadid, 1); 29 | // printf("xianchengyfhdshd s"); 30 | int64_t loc_size = (m_num /2)/thread_num; 31 | int64_t rest = (m_num /2)%thread_num; 32 | 33 | //**********给线程分配计算资源************* 34 | if (myID < rest) { 35 | lb = loc_size * myID + myID; 36 | ub = lb + loc_size + 1; 37 | } else { 38 | lb = loc_size * myID + rest; 39 | ub = lb + loc_size; 40 | } 41 | 42 | //**********将B数组两位两位地保存在pp中*********** 43 | for (i =lb; i < ub; i++) { 44 | for (j = 0; j < m_num /2; j++) { 45 | if (B[2*j] == i) { 46 | pp[2*i]=B[2*j]; 47 | pp[2*i+1]=B[2*j+1]; 48 | } 49 | } 50 | } 51 | pthread_barrier_wait(&barrier); 52 | //**************开辟空间得到qqa(排序前累积)以及qq(排序后累积)*************** 53 | if (pthread_barrier_wait(&barrier) == PTHREAD_BARRIER_SERIAL_THREAD) { 54 | for (i = 1; i < m_num / 2; i++) { 55 | qqa[i] = qqa[i - 1] + B[2*i - 1]; 56 | qq[i] = qq[i - 1] + pp[2*i - 1]; 57 | } 58 | } 59 | pthread_barrier_wait(&barrier); 60 | //***************保存结构体的前两个数据************** 61 | for (i = lb; i 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "fio.h" 11 | 12 | //Global variable 13 | int64_t n,m,size,size_center; 14 | int thread_num,threadid; 15 | pthread_barrier_t barrier; 16 | float *pa, *pb; 17 | 18 | //********************************// 19 | //**************子****************// 20 | //**************线****************// 21 | //**************程****************// 22 | //********************************// 23 | void *worker(void *arg) { 24 | int64_t i,j,lb,ub; 25 | int myID = __sync_fetch_and_add(&threadid,1); 26 | int64_t loc_size = (n-2)/thread_num; 27 | int64_t rest = (n-2)%thread_num; 28 | //**********给线程分配计算资源************* 29 | if (myID < rest) { 30 | lb = loc_size * myID + myID; 31 | ub = lb + loc_size + 1; 32 | } else { 33 | lb = loc_size * myID + rest; 34 | ub = lb + loc_size; 35 | } 36 | if (pthread_barrier_wait(&barrier) == PTHREAD_BARRIER_SERIAL_THREAD) { 37 | memcpy(pb, pa, sizeof(float) * m); 38 | memcpy(&pb[m*(n-1)], &pa[m*(n-1)], sizeof(float) * m); 39 | } 40 | 41 | for (i=lb+1;i 5 | 6 | 7 | 8 | Yes We Code 9 |
10 | 11 |
12 | 13 | ## Contents 14 | 1. Homeworks and Projects 15 | 16 | * Nine times of homeworks, both my homeworks and my partner Shiyao Li's homeworks. These homeworks including 17 | Pthreads, MPI and CUDA Parallel Programming Interface, and Fortran/C Programming Language. 18 | 19 | * Yao Hong's homeworks. [Yao Hong](https://github.com/hong-yao) 20 | 21 | * Reference Codes and Training Materials are come from Lawrence Livermore National Laboratory, which written by Blaise Barney. Thanks a lot for the open resource. 22 | 23 | * My first and second CUDA Programs. 24 | 25 | * Example codes of my Lectures on HPC to Prof. Shan Tang's group. 26 | 27 | * We Gratefully Acknowledge Associate Prof. Hua-shan Yu from School of Electronics Engineering and Computer Science at Peking University for his help both in course and final project. 28 | 29 | 2. Reference Material's Programs 30 | * Peter Pacheco's Book (An introduction to Parallel Programming)'s Materials. Codes of each chapter. 31 | * Other materials will be updated in the future. 32 | 33 | 3. Reference Papers 34 | * Ristov S, Prodan R, Gusev M, et al. Superlinear speedup in HPC systems: Why and when?[C]. federated conference on computer science and information systems, 2016: 889-898. 35 | 36 | ## Warranty 37 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. 38 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR. 39 | 40 | ## Copyright 41 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author. 42 | -------------------------------------------------------------------------------- /ReferencePapers/Superlinear Speedup in HPC Systems why and when.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/ReferencePapers/Superlinear Speedup in HPC Systems why and when.pdf --------------------------------------------------------------------------------