├── AnIntroductiontoParallelProgrammingPeterPacheco
    └── ipp-source-use
    │   ├── INDEX
    │   ├── INDEX~
    │   ├── README
    │   ├── ch2
    │       └── histogram.c
    │   ├── ch3
    │       ├── bubble.c
    │       ├── mat_vect_mult.c
    │       ├── mpi_hello
    │       ├── mpi_hello.c
    │       ├── mpi_hello.c~
    │       ├── mpi_hello.dSYM
    │       │   └── Contents
    │       │   │   ├── Info.plist
    │       │   │   └── Resources
    │       │   │       └── DWARF
    │       │   │           └── mpi_hello
    │       ├── mpi_many_msgs.c
    │       ├── mpi_mat_vect_mult.c
    │       ├── mpi_mat_vect_time.c
    │       ├── mpi_odd_even.c
    │       ├── mpi_output.c
    │       ├── mpi_trap1.c
    │       ├── mpi_trap2.c
    │       ├── mpi_trap3.c
    │       ├── mpi_trap4.c
    │       ├── mpi_vector_add.c
    │       ├── odd_even.c
    │       ├── timer.h
    │       ├── trap.c
    │       └── vector_add.c
    │   ├── ch4
    │       ├── linked_list.c
    │       ├── mat_vect_mult.c
    │       ├── my_rand.c
    │       ├── my_rand.h
    │       ├── pth_busy_bar.c
    │       ├── pth_cond_bar.c
    │       ├── pth_do_nothing.c
    │       ├── pth_hello.c
    │       ├── pth_ll_mult_mut.c
    │       ├── pth_ll_one_mut.c
    │       ├── pth_ll_rwl.c
    │       ├── pth_mat_vect.c
    │       ├── pth_mat_vect_rand_split.c
    │       ├── pth_mat_vect_rand_split.c~
    │       ├── pth_msg.c
    │       ├── pth_msg_sem.c
    │       ├── pth_msg_sem_mac.c
    │       ├── pth_pi.c
    │       ├── pth_pi_busy1
    │       ├── pth_pi_busy1.c
    │       ├── pth_pi_busy1.dSYM
    │       │   └── Contents
    │       │   │   ├── Info.plist
    │       │   │   └── Resources
    │       │   │       └── DWARF
    │       │   │           └── pth_pi_busy1
    │       ├── pth_pi_busy2.c
    │       ├── pth_pi_mutex
    │       ├── pth_pi_mutex.c
    │       ├── pth_pi_mutex.dSYM
    │       │   └── Contents
    │       │   │   ├── Info.plist
    │       │   │   └── Resources
    │       │   │       └── DWARF
    │       │   │           └── pth_pi_mutex
    │       ├── pth_sem_bar.c
    │       ├── pth_tokenize.c
    │       ├── pth_tokenize_r.c
    │       └── timer.h
    │   ├── ch5
    │       ├── bubble.c
    │       ├── mat_vect_mult.c
    │       ├── odd_even.c
    │       ├── omp_fibo.c
    │       ├── omp_hello.c
    │       ├── omp_hello_err_chk.c
    │       ├── omp_mat_vect.c
    │       ├── omp_mat_vect_rand_split.c
    │       ├── omp_mat_vect_rand_split.c~
    │       ├── omp_msg
    │       │   ├── omp_msglk.c
    │       │   ├── omp_msgps.c
    │       │   ├── queue.c
    │       │   ├── queue.h
    │       │   ├── queue_lk.c
    │       │   └── queue_lk.h
    │       ├── omp_odd_even1.c
    │       ├── omp_odd_even2.c
    │       ├── omp_pi.c
    │       ├── omp_private.c
    │       ├── omp_sin_sum.c
    │       ├── omp_tokenize.c
    │       ├── omp_tokenize_r.c
    │       ├── omp_trap1.c
    │       ├── omp_trap2a.c
    │       ├── omp_trap2b.c
    │       ├── omp_trap3.c
    │       └── trap.c
    │   └── ch6
    │       ├── cyclic_derived.c
    │       ├── frac.c
    │       ├── frac.h
    │       ├── mat_17e
    │       ├── mat_17e-mpi-dyn-spl20-cut5.out
    │       ├── mat_17e-mpi-dyn-spl8-cut8.out
    │       ├── mat_17e-mpi-stat.out
    │       ├── mat_17e-pth-dyn-8-spl.out
    │       ├── mat_17e-pth-stat.out
    │       ├── mpi_nbody_basic.c
    │       ├── mpi_nbody_red.c
    │       ├── mpi_tsp_dyn
    │       ├── mpi_tsp_dyn.c
    │       ├── mpi_tsp_stat
    │       ├── mpi_tsp_stat.c
    │       ├── mpi_tsp_stat.c~
    │       ├── nbody_basic
    │       ├── nbody_basic.c
    │       ├── nbody_basic.c~
    │       ├── nbody_red
    │       ├── nbody_red.c
    │       ├── nbody_red.dSYM
    │           └── Contents
    │           │   ├── Info.plist
    │           │   └── Resources
    │           │       └── DWARF
    │           │           └── nbody_red
    │       ├── omp_nbody_basic.c
    │       ├── omp_nbody_red.c
    │       ├── omp_tsp_dyn.c
    │       ├── omp_tsp_stat.c
    │       ├── pth_nbody_basic.c
    │       ├── pth_nbody_red.c
    │       ├── pth_tsp_dyn
    │       ├── pth_tsp_dyn.c
    │       ├── pth_tsp_stat
    │       ├── pth_tsp_stat.c
    │       ├── timer.h
    │       ├── tsp_iter1.c
    │       ├── tsp_iter2.c
    │       └── tsp_rec.c
├── Homeworks
    ├── CUDA
    │   └── ThreadBlocksAndGrids
    │   │   ├── cudaThreadBlock1.cu
    │   │   └── cudaThreadGrid1.cu
    ├── ExampleCodes
    │   ├── Code1
    │   │   └── PI.cpp
    │   ├── Code2
    │   │   ├── Prime
    │   │   │   ├── Solution
    │   │   │   │   ├── performance test
    │   │   │   │   │   ├── primeMD
    │   │   │   │   │   ├── primeMD.cpp
    │   │   │   │   │   └── result.txt
    │   │   │   │   ├── primeMD.cpp
    │   │   │   │   └── result verify
    │   │   │   │   │   ├── primeMD
    │   │   │   │   │   ├── primeMD.cpp
    │   │   │   │   │   └── result.txt
    │   │   │   └── source code
    │   │   │   │   ├── prime
    │   │   │   │   ├── prime.cpp
    │   │   │   │   └── testResult.txt
    │   │   ├── SplitInOneDimension
    │   │   │   └── pthread_heat2D.cpp
    │   │   ├── SplitInTwoDimension
    │   │   │   ├── pthreadBarried
    │   │   │   │   ├── pthread_Barried.txt
    │   │   │   │   └── pthread_heat2D.c
    │   │   │   └── pthread_heat2D.c
    │   │   └── threadsCommunicationMode
    │   │   │   └── pthread_heat2D.c
    │   ├── Code3
    │   │   ├── multiBody.c
    │   │   └── pthread_MultiBody.cpp
    │   ├── Code4
    │   │   ├── mpi_heat2D.c
    │   │   └── mpi_heat2D_Origin.c
    │   └── Code5
    │   │   ├── mpiMC
    │   │   ├── mpiMC.c
    │   │   ├── mpiMCfort
    │   │   └── mpiMCfort.f
    ├── HeatTransferProblem
    │   └── heat2DCodes
    │   │   ├── mpi_heat2D.c
    │   │   ├── mpi_heat2D_Origin.c
    │   │   └── pthread_heat2D.c
    ├── Homework_1
    │   ├── HomeworkDebug
    │   │   ├── PI
    │   │   ├── PI.cpp
    │   │   ├── PIintel
    │   │   ├── numstep 1000.dat
    │   │   ├── numstep 1000000.dat
    │   │   └── numstep 1000000000.dat
    │   ├── PI.cpp
    │   ├── numstep 1000
    │   ├── numstep 1000000
    │   ├── numstep 1000000000
    │   ├── 第一次作业.pdf
    │   └── 第一次作业题目.pdf
    ├── Homework_2
    │   ├── Homework_2
    │   ├── heat2D
    │   │   ├── code_debug
    │   │   │   ├── Performance.m
    │   │   │   ├── SpeedUp.jpg
    │   │   │   ├── Time Cost.jpg
    │   │   │   ├── heat2DRun.txt
    │   │   │   └── pthread_heat2DRun.txt
    │   │   ├── original_code
    │   │   │   └── ser_heat2D.c
    │   │   └── solution
    │   │   │   ├── SplitInOneDimension
    │   │   │       └── pthread_heat2D.cpp
    │   │   │   ├── SplitInTwoDimension
    │   │   │       ├── pthreadBarried
    │   │   │       │   ├── pthread_Barried.txt
    │   │   │       │   └── pthread_heat2D.c
    │   │   │       └── pthread_heat2D.c
    │   │   │   └── threadsCommunicationMode
    │   │   │       └── pthread_heat2D.c
    │   ├── prime_number
    │   │   ├── code_debug
    │   │   │   ├── bucketSort.cpp
    │   │   │   ├── primeModified
    │   │   │   ├── primeModified.cpp
    │   │   │   ├── primeModified2.cpp
    │   │   │   ├── primeTemp
    │   │   │   ├── primeTemp.cpp
    │   │   │   ├── primeTest
    │   │   │   ├── primeTest.cpp
    │   │   │   ├── primeTest2
    │   │   │   ├── primeTest2.cpp
    │   │   │   ├── primeTest2Result.txt
    │   │   │   └── sortBucket
    │   │   ├── original_codes
    │   │   │   ├── prime
    │   │   │   ├── prime.cpp
    │   │   │   └── testResult.txt
    │   │   └── solutions
    │   │   │   ├── performance_test
    │   │   │       ├── primeMD
    │   │   │       ├── primeMD.cpp
    │   │   │       └── result.txt
    │   │   │   ├── primeMD.cpp
    │   │   │   └── result_verify
    │   │   │       ├── primeMD
    │   │   │       ├── primeMD.cpp
    │   │   │       └── result.txt
    │   └── 第二次作业.pdf
    ├── Homework_3
    │   ├── Homework 3
    │   └── 第三次作业.pdf
    ├── Homework_4
    │   ├── CodeTest
    │   │   └── TestResult.txt
    │   ├── Homework_4.txt
    │   ├── SourceCodes
    │   │   ├── multiBody.c
    │   │   └── pthread_MultiBody.cpp
    │   └── 第四次作业.pdf
    ├── Homework_5
    │   ├── CodeTest
    │   │   ├── Performace.m
    │   │   ├── SpeedUp.jpg
    │   │   ├── Test1
    │   │   │   ├── draw_heatf.c
    │   │   │   ├── heat2D.cpp
    │   │   │   ├── mpi_heat2D.c
    │   │   │   └── mpi_heat2D.f
    │   │   ├── TimeCost.jpg
    │   │   ├── mpi_heat2D.txt
    │   │   └── pthread_heat2D.txt
    │   ├── MyCodes
    │   │   ├── mpi_heat2D.c
    │   │   └── mpi_heat2D_Origin.c
    │   ├── RefrenceCode
    │   │   ├── mpi_heat2D.c
    │   │   └── mpi_heat2D.f
    │   ├── pthread_heat2D.cpp
    │   ├── 第五次作业.pdf
    │   └── 第五次作业.txt
    ├── Homework_6
    │   ├── Code
    │   │   └── mpi_process.cpp
    │   └── homework 6.txt
    ├── Homework_7
    │   ├── OriginalCodes
    │   │   ├── NBody(3).cpp
    │   │   └── mpi_MultiBody.cpp
    │   ├── 第七次作业.pdf
    │   └── 第七次作业.txt
    ├── Homework_8
    │   ├── Code
    │   │   └── 2d5p-win.cpp
    │   ├── 第八次作业.pdf
    │   └── 第八次作业.txt
    ├── Homework_9
    │   ├── Documents
    │   │   ├── 01531136.pdf
    │   │   ├── 24.pdf
    │   │   ├── Fox_example.pdf
    │   │   ├── Li-Fall-2012-CSE633.pdf
    │   │   ├── lawn129.pdf
    │   │   ├── matrixmult.pdf
    │   │   └── ppagerank_report.pdf
    │   └── 第九次作业.pdf
    ├── Shiyao_Li_s_MPI_homework
    │   └── 李师尧MPI作业参考
    │   │   ├── 150*******李师尧-作业七.pdf
    │   │   ├── 2d5p-win.cpp
    │   │   ├── NBody-mpi.cpp
    │   │   ├── mpi_process.cpp
    │   │   ├── 作业九  FOX并行算法和PageRank算法.pdf
    │   │   ├── 作业八150*******李师尧.pdf
    │   │   └── 作业六150*******李师尧.pdf
    └── Yao_Hong_s_Homework
    │   ├── Homework_1
    │       ├── README.md
    │       └── 并行程序设计作业-1.pdf
    │   ├── Homework_2
    │       ├── README.md
    │       ├── saxpy.c
    │       └── 稠密向量的SAXPY_测评报告.pdf
    │   ├── Homework_3
    │       ├── README.md
    │       ├── shuzu.cpp
    │       └── 数组拼接_测评报告.pdf
    │   ├── Homework_4
    │       ├── 2D5P.cpp
    │       ├── 2D5P_测评报告.pdf
    │       └── README.md
    │   ├── Homework_5
    │       ├── HEAT-TRANSFER_测评报告.pdf
    │       ├── README.md
    │       └── rechuandao.cpp
    │   ├── Homework_6
    │       ├── N-body_测评报告.pdf
    │       ├── N_body.cpp
    │       └── README.md
    │   ├── Homework_7
    │       └── README.md
    │   ├── Homework_8
    │       └── README.md
    │   ├── Homework_9
    │       └── README.md
    │   └── README.md
├── LICENSE
├── README.md
└── ReferencePapers
    └── Superlinear Speedup in HPC Systems why and when.pdf


/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/README:
--------------------------------------------------------------------------------
 1 | The files in the various subdirectories are source code from the
 2 | book "An Introduction to Parallel Programming" by Peter Pacheco, 
 3 | Morgan Kaufmann Publishers.
 4 | 
 5 | COMPILING
 6 | ---------
 7 | Most of the programs are self-contained and should compile by
 8 | themselves provided you make sure that the compiler can find the
 9 | appropriate header files (e.g., mpi.h, pthread.h, semaphore.h,
10 | timer.h) and libraries.  Documentation at the beginning of each
11 | program provides a sample command line for compilation on a typical
12 | Linux or MacOS X system.
13 | 
14 | RUNNING
15 | -------
16 | The exact syntax for starting the programs may be system dependent.
17 | See your local expert for details.  Any input needed by the program
18 | can be determined from comments at the beginning of the source
19 | file.
20 | 
21 | I/O
22 | ---
23 | All of the longer applications only use process/thread 0 for I/O.
24 | However, some short test programs (e.g., hello, world) print data
25 | from processes or threads other than 0.  Also in many of the programs,
26 | if a DEBUG macro is defined, the program may print data from more
27 | than one process/thread.
28 | 
29 | DISCLAIMER
30 | ----------
31 | These programs are not production quality.  Their main purpose is
32 | to help the reader explore parallel programming.  They should be
33 | easy to modify for a specific application and you are welcome to
34 | do so.  If you have suggestions or discover any bugs it would be
35 | greatly appreciated if you could let us know by sending email to
36 | peter at usfca dot edu
37 | 
38 | COPYING
39 | -------
40 | All of the programs in these directories are copyright Morgan
41 | Kaufmann Publishers and Peter Pacheco.  However, they may be freely
42 | copied and used in non-commercial software, provided credit is given
43 | to both Morgan Kaufmann Publishers and Peter Pacheco.
44 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/bubble.c:
--------------------------------------------------------------------------------
  1 | /* File:    bubble.c
  2 |  *
  3 |  * Purpose: Use bubble sort to sort a list of ints.
  4 |  *
  5 |  * Compile: gcc -g -Wall -o bubble bubble.c
  6 |  * Usage:   bubble <n> <g|i>
  7 |  *             n:   number of elements in list
  8 |  *            'g':  generate list using a random number generator
  9 |  *            'i':  user input list
 10 |  *
 11 |  * Input:   list (optional)
 12 |  * Output:  sorted list
 13 |  *
 14 |  * IPP:     Section 3.7.1 (pp. 127 and ff.) and Section 5.6.1 
 15 |  *          (pp. 232 and ff.)
 16 |  */
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | 
 20 | /* For random list, 0 <= keys < RMAX */
 21 | const int RMAX = 100;
 22 | 
 23 | void Usage(char* prog_name);
 24 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p);
 25 | void Generate_list(int a[], int n);
 26 | void Print_list(int a[], int n, char* title);
 27 | void Read_list(int a[], int n);
 28 | void Bubble_sort(int a[], int n);
 29 | 
 30 | /*-----------------------------------------------------------------*/
 31 | int main(int argc, char* argv[]) {
 32 |    int  n;
 33 |    char g_i;
 34 |    int* a;
 35 | 
 36 |    Get_args(argc, argv, &n, &g_i);
 37 |    a = (int*) malloc(n*sizeof(int));
 38 |    if (g_i == 'g') {
 39 |       Generate_list(a, n);
 40 |       Print_list(a, n, "Before sort");
 41 |    } else {
 42 |       Read_list(a, n);
 43 |    }
 44 | 
 45 |    Bubble_sort(a, n);
 46 | 
 47 |    Print_list(a, n, "After sort");
 48 |    
 49 |    free(a);
 50 |    return 0;
 51 | }  /* main */
 52 | 
 53 | 
 54 | /*-----------------------------------------------------------------
 55 |  * Function:  Usage
 56 |  * Purpose:   Summary of how to run program
 57 |  */
 58 | void Usage(char* prog_name) {
 59 |    fprintf(stderr, "usage:   %s <n> <g|i>\n", prog_name);
 60 |    fprintf(stderr, "   n:   number of elements in list\n");
 61 |    fprintf(stderr, "  'g':  generate list using a random number generator\n");
 62 |    fprintf(stderr, "  'i':  user input list\n");
 63 | }  /* Usage */
 64 | 
 65 | 
 66 | /*-----------------------------------------------------------------
 67 |  * Function:  Get_args
 68 |  * Purpose:   Get and check command line arguments
 69 |  * In args:   argc, argv
 70 |  * Out args:  n_p, g_i_p
 71 |  */
 72 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p) {
 73 |    if (argc != 3 ) {
 74 |       Usage(argv[0]);
 75 |       exit(0);
 76 |    }
 77 |    *n_p = atoi(argv[1]);
 78 |    *g_i_p = argv[2][0];
 79 | 
 80 |    if (*n_p <= 0 || (*g_i_p != 'g' && *g_i_p != 'i') ) {
 81 |       Usage(argv[0]);
 82 |       exit(0);
 83 |    }
 84 | }  /* Get_args */
 85 | 
 86 | 
 87 | /*-----------------------------------------------------------------
 88 |  * Function:  Generate_list
 89 |  * Purpose:   Use random number generator to generate list elements
 90 |  * In args:   n
 91 |  * Out args:  a
 92 |  */
 93 | void Generate_list(int a[], int n) {
 94 |    int i;
 95 | 
 96 |    srandom(0);
 97 |    for (i = 0; i < n; i++)
 98 |       a[i] = random() % RMAX;
 99 | }  /* Generate_list */
100 | 
101 | 
102 | /*-----------------------------------------------------------------
103 |  * Function:  Print_list
104 |  * Purpose:   Print the elements in the list
105 |  * In args:   a, n
106 |  */
107 | void Print_list(int a[], int n, char* title) {
108 |    int i;
109 | 
110 |    printf("%s:\n", title);
111 |    for (i = 0; i < n; i++)
112 |       printf("%d ", a[i]);
113 |    printf("\n\n");
114 | }  /* Print_list */
115 | 
116 | 
117 | /*-----------------------------------------------------------------
118 |  * Function:  Read_list
119 |  * Purpose:   Read elements of list from stdin
120 |  * In args:   n
121 |  * Out args:  a
122 |  */
123 | void Read_list(int a[], int n) {
124 |    int i;
125 | 
126 |    printf("Please enter the elements of the list\n");
127 |    for (i = 0; i < n; i++)
128 |       scanf("%d", &a[i]);
129 | }  /* Read_list */
130 | 
131 | 
132 | /*-----------------------------------------------------------------
133 |  * Function:     Bubble_sort
134 |  * Purpose:      Sort list using bubble sort
135 |  * In args:      n
136 |  * In/out args:  a
137 |  */
138 | void Bubble_sort(
139 |       int  a[]  /* in/out */, 
140 |       int  n    /* in     */) {
141 |    int list_length, i, temp;
142 | 
143 |    for (list_length = n; list_length >= 2; list_length--)
144 |       for (i = 0; i < list_length-1; i++)
145 |          if (a[i] > a[i+1]) {
146 |             temp = a[i];
147 |             a[i] = a[i+1];
148 |             a[i+1] = temp;
149 |          }
150 | 
151 | }  /* Bubble_sort */
152 | 
153 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.c:
--------------------------------------------------------------------------------
 1 | /* File:       
 2 |  *    mpi_hello.c
 3 |  *
 4 |  * Purpose:    
 5 |  *    A "hello,world" program that uses MPI
 6 |  *
 7 |  * Compile:    
 8 |  *    mpicc -g -Wall -std=C99 -o mpi_hello mpi_hello.c
 9 |  * Usage:        
10 |  *    mpiexec -n<number of processes> ./mpi_hello
11 |  *
12 |  * Input:      
13 |  *    None
14 |  * Output:     
15 |  *    A greeting from each process
16 |  *
17 |  * Algorithm:  
18 |  *    Each process sends a message to process 0, which prints 
19 |  *    the messages it has received, as well as its own message.
20 |  *
21 |  * IPP:  Section 3.1 (pp. 84 and ff.)
22 |  */
23 | #include <stdio.h>
24 | #include <string.h>  /* For strlen             */
25 | #include <mpi.h>     /* For MPI functions, etc */ 
26 | 
27 | const int MAX_STRING = 100;
28 | 
29 | int main(void) {
30 |    char       greeting[MAX_STRING];  /* String storing message */
31 |    int        comm_sz;               /* Number of processes    */
32 |    int        my_rank;               /* My process rank        */
33 | 
34 |    /* Start up MPI */
35 |    MPI_Init(NULL, NULL); 
36 | 
37 |    /* Get the number of processes */
38 |    MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 
39 | 
40 |    /* Get my rank among all the processes */
41 |    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 
42 | 
43 |    if (my_rank != 0) { 
44 |       /* Create message */
45 |       sprintf(greeting, "Greetings from process %d of %d!", 
46 |             my_rank, comm_sz);
47 |       /* Send message to process 0 */
48 |       MPI_Send(greeting, strlen(greeting)+1, MPI_CHAR, 0, 0,
49 |             MPI_COMM_WORLD); 
50 |    } else {  
51 |       /* Print my message */
52 |       printf("Greetings from process %d of %d!\n", my_rank, comm_sz);
53 |       for (int q = 1; q < comm_sz; q++) {
54 |          /* Receive message from process q */
55 |          MPI_Recv(greeting, MAX_STRING, MPI_CHAR, q,
56 |             0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
57 |          /* Print message from process q */
58 |          printf("%s\n", greeting);
59 |       } 
60 |    }
61 | 
62 |    /* Shut down MPI */
63 |    MPI_Finalize(); 
64 | 
65 |    return 0;
66 | }  /* main */
67 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.c~:
--------------------------------------------------------------------------------
 1 | /* File:       
 2 |  *    mpi_hello.c
 3 |  *
 4 |  * Purpose:    
 5 |  *    A "hello,world" program that uses MPI
 6 |  *
 7 |  * Compile:    
 8 |  *    mpicc -g -Wall -std=C99 -o mpi_hello mpi_hello.c
 9 |  * Usage:        
10 |  *    mpiexec -n<number of processes> ./mpi_hello
11 |  *
12 |  * Input:      
13 |  *    None
14 |  * Output:     
15 |  *    A greeting from each process
16 |  *
17 |  * Algorithm:  
18 |  *    Each process sends a message to process 0, which prints 
19 |  *    the messages it has received, as well as its own message.
20 |  *
21 |  * IPP:  Section 3.1 (pp. 84 and ff.)
22 |  */
23 | #include <stdio.h>
24 | #include <string.h>  /* For strlen             */
25 | #include <mpi.h>     /* For MPI functions, etc */ 
26 | 
27 | const int MAX_STRING = 100;
28 | 
29 | int main(void) {
30 |    char       greeting[MAX_STRING];  /* String storing message */
31 |    int        comm_sz;               /* Number of processes    */
32 |    int        my_rank;               /* My process rank        */
33 | 
34 |    /* Start up MPI */
35 |    MPI_Init(NULL, NULL); 
36 | 
37 |    /* Get the number of processes */
38 |    MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 
39 | 
40 |    /* Get my rank among all the processes */
41 |    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 
42 | 
43 |    if (my_rank != 0) { 
44 |       /* Create message */
45 |       sprintf(greeting, "Greetings from process %d of %d!", 
46 |             my_rank, comm_sz);
47 |       /* Send message to process 0 */
48 |       MPI_Send(greeting, strlen(greeting), MPI_CHAR, 0, 0,
49 |             MPI_COMM_WORLD); 
50 |    } else {  
51 |       /* Print my message */
52 |       printf("Greetings from process %d of %d!\n", my_rank, comm_sz);
53 |       for (int q = 1; q < comm_sz; q++) {
54 |          /* Receive message from process q */
55 |          MPI_Recv(greeting, MAX_STRING, MPI_CHAR, q,
56 |             0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
57 |          /* Print message from process q */
58 |          printf("%s\n", greeting);
59 |       } 
60 |    }
61 | 
62 |    /* Shut down MPI */
63 |    MPI_Finalize(); 
64 | 
65 |    return 0;
66 | }  /* main */
67 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.dSYM/Contents/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | 	<dict>
 5 | 		<key>CFBundleDevelopmentRegion</key>
 6 | 		<string>English</string>
 7 | 		<key>CFBundleIdentifier</key>
 8 | 		<string>com.apple.xcode.dsym.mpi_hello</string>
 9 | 		<key>CFBundleInfoDictionaryVersion</key>
10 | 		<string>6.0</string>
11 | 		<key>CFBundlePackageType</key>
12 | 		<string>dSYM</string>
13 | 		<key>CFBundleSignature</key>
14 | 		<string>????</string>
15 | 		<key>CFBundleShortVersionString</key>
16 | 		<string>1.0</string>
17 | 		<key>CFBundleVersion</key>
18 | 		<string>1</string>
19 | 	</dict>
20 | </plist>
21 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.dSYM/Contents/Resources/DWARF/mpi_hello:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_hello.dSYM/Contents/Resources/DWARF/mpi_hello


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_many_msgs.c:
--------------------------------------------------------------------------------
 1 | /* File:     mpi_many_msgs.c
 2 |  * Purpose:  Compare the time needed to send n messages consisting of
 3 |  *           a single double each with the time needed to send one message
 4 |  *           consisting of n doubles.
 5 |  *
 6 |  * Compile:  mpicc -g -Wall -O2 -o mpi_many_msgs mpi_many_msgs.c
 7 |  * Run:      mpiexec -n 2 mpi_many_msgs <number of doubles>
 8 |  *
 9 |  * Input:    none
10 |  * Output:   Elapsed time for n messages of size 1 double and elapsed
11 |  *           time for 1 message of n doubles
12 |  *
13 |  * IPP:      Section 3.5 (pp. 116 and ff.)
14 |  */
15 | #include <stdio.h>
16 | #include <stdlib.h>
17 | #include <mpi.h>
18 | 
19 | int      my_rank;
20 | int      comm_sz;
21 | MPI_Comm comm;
22 | 
23 | void Get_arg(int argc, char* argv[], int* n_p);
24 | 
25 | int main(int argc, char* argv[]) {
26 |    double*     x;
27 |    int         i, n;
28 |    MPI_Status  status;
29 |    double      start, finish;
30 | 
31 |    MPI_Init(&argc, &argv);
32 |    comm = MPI_COMM_WORLD;
33 |    MPI_Comm_size(comm, &comm_sz);
34 |    MPI_Comm_rank(comm, &my_rank);
35 | 
36 |    Get_arg(argc, argv, &n);
37 | 
38 |    x = malloc(n*sizeof(double));
39 | 
40 |    if (my_rank == 0)
41 |       for (i = 0; i < n; i++) x[i] = i;
42 |    else /* my_rank == 1 */
43 |       for (i = 0; i < n; i++) x[i] = -1;
44 | 
45 |    MPI_Barrier(comm);
46 |    start = MPI_Wtime();
47 |    if (my_rank == 0) 
48 |       for (i = 0; i < n; i++)
49 |          MPI_Send(&x[i], 1, MPI_DOUBLE, 1, 0, comm);
50 |    else /* my_rank == 1 */
51 |       for (i = 0; i < n; i++)
52 |          MPI_Recv(&x[i], 1, MPI_DOUBLE, 0, 0, comm, &status);
53 |    finish = MPI_Wtime();
54 |    printf("Proc %d > First comm took %e seconds\n", my_rank,
55 |          finish-start);
56 |    fflush(stdout);
57 | 
58 |    MPI_Barrier(comm);
59 |    start = MPI_Wtime();
60 |    if (my_rank == 0)
61 |       MPI_Send(x, n, MPI_DOUBLE, 1, 0, comm);
62 |    else  /* my_rank == 1 */
63 |       MPI_Recv(x, n, MPI_DOUBLE, 0, 0, comm, &status);
64 |    finish = MPI_Wtime();
65 |    printf("Proc %d > Second comm took %e seconds\n", my_rank,
66 |          finish-start);
67 |    fflush(stdout);
68 | 
69 |    free(x);
70 |    MPI_Finalize();
71 |    return 0;
72 | }  /* main */
73 | 
74 | /*-------------------------------------------------------------------*/
75 | void Get_arg(
76 |       int   argc   /* in  */, 
77 |       char* argv[] /* out */, 
78 |       int*  n_p    /* out */) {
79 | 
80 |    if (my_rank == 0) {
81 |       if (argc != 2 || comm_sz != 2) {
82 |          fprintf(stderr, "usage:  mpiexec -n 2 %s <number of doubles>\n",
83 |                argv[0]);
84 |          *n_p = 0;
85 |       } else {
86 |          *n_p = strtol(argv[1], NULL, 10);
87 |       }
88 |    }
89 |    MPI_Bcast(n_p, 1, MPI_INT, 0, comm);
90 |    if (*n_p <= 0 || comm_sz != 2) {
91 |       MPI_Finalize();
92 |       exit(0);
93 |    }
94 | }  /* Get_arg */
95 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_output.c:
--------------------------------------------------------------------------------
 1 | /* File:     mpi_output.c
 2 |  *
 3 |  * Purpose:  A program in which multiple MPI processes try to print 
 4 |  *           a message.
 5 |  *
 6 |  * Compile:  mpicc -g -Wall -o mpi_output mpi_output.c
 7 |  * Usage:    mpiexec -n<number of processes> ./mpi_output
 8 |  *
 9 |  * Input:    None
10 |  * Output:   A message from each process
11 |  *
12 |  * IPP:      Section 3.3.1  (pp. 97 and ff.)
13 |  */
14 | #include <stdio.h>
15 | #include <mpi.h> 
16 | 
17 | int main(void) {
18 |    int my_rank, comm_sz;
19 | 
20 |    MPI_Init(NULL, NULL); 
21 |    MPI_Comm_size(MPI_COMM_WORLD, &comm_sz); 
22 |    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 
23 | 
24 |    printf("Proc %d of %d > Does anyone have a toothpick?\n",
25 |          my_rank, comm_sz);
26 | 
27 |    MPI_Finalize();
28 |    return 0;
29 | }  /* main */
30 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/mpi_trap1.c:
--------------------------------------------------------------------------------
  1 | /* File:     mpi_trap1.c
  2 |  * Purpose:  Use MPI to implement a parallel version of the trapezoidal 
  3 |  *           rule.  In this version the endpoints of the interval and
  4 |  *           the number of trapezoids are hardwired.
  5 |  *
  6 |  * Input:    None.
  7 |  * Output:   Estimate of the integral from a to b of f(x)
  8 |  *           using the trapezoidal rule and n trapezoids.
  9 |  *
 10 |  * Compile:  mpicc -g -Wall -o mpi_trap1 mpi_trap1.c
 11 |  * Run:      mpiexec -n <number of processes> ./mpi_trap1
 12 |  *
 13 |  * Algorithm:
 14 |  *    1.  Each process calculates "its" interval of
 15 |  *        integration.
 16 |  *    2.  Each process estimates the integral of f(x)
 17 |  *        over its interval using the trapezoidal rule.
 18 |  *    3a. Each process != 0 sends its integral to 0.
 19 |  *    3b. Process 0 sums the calculations received from
 20 |  *        the individual processes and prints the result.
 21 |  *
 22 |  * Note:  f(x), a, b, and n are all hardwired.
 23 |  *
 24 |  * IPP:   Section 3.2.2 (pp. 96 and ff.)
 25 |  */
 26 | #include <stdio.h>
 27 | 
 28 | /* We'll be using MPI routines, definitions, etc. */
 29 | #include <mpi.h>
 30 | 
 31 | /* Calculate local integral  */
 32 | double Trap(double left_endpt, double right_endpt, int trap_count, 
 33 |    double base_len);    
 34 | 
 35 | /* Function we're integrating */
 36 | double f(double x); 
 37 | 
 38 | int main(void) {
 39 |    int my_rank, comm_sz, n = 1024, local_n;   
 40 |    double a = 0.0, b = 3.0, h, local_a, local_b;
 41 |    double local_int, total_int;
 42 |    int source; 
 43 | 
 44 |    /* Let the system do what it needs to start up MPI */
 45 |    MPI_Init(NULL, NULL);
 46 | 
 47 |    /* Get my process rank */
 48 |    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
 49 | 
 50 |    /* Find out how many processes are being used */
 51 |    MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
 52 | 
 53 |    h = (b-a)/n;          /* h is the same for all processes */
 54 |    local_n = n/comm_sz;  /* So is the number of trapezoids  */
 55 | 
 56 |    /* Length of each process' interval of
 57 |     * integration = local_n*h.  So my interval
 58 |     * starts at: */
 59 |    local_a = a + my_rank*local_n*h;
 60 |    local_b = local_a + local_n*h;
 61 |    local_int = Trap(local_a, local_b, local_n, h);
 62 | 
 63 |    /* Add up the integrals calculated by each process */
 64 |    if (my_rank != 0) { 
 65 |       MPI_Send(&local_int, 1, MPI_DOUBLE, 0, 0, 
 66 |             MPI_COMM_WORLD); 
 67 |    } else {
 68 |       total_int = local_int;
 69 |       for (source = 1; source < comm_sz; source++) {
 70 |          MPI_Recv(&local_int, 1, MPI_DOUBLE, source, 0,
 71 |             MPI_COMM_WORLD, MPI_STATUS_IGNORE);
 72 |          total_int += local_int;
 73 |       }
 74 |    } 
 75 | 
 76 |    /* Print the result */
 77 |    if (my_rank == 0) {
 78 |       printf("With n = %d trapezoids, our estimate\n", n);
 79 |       printf("of the integral from %f to %f = %.15e\n",
 80 |           a, b, total_int);
 81 |    }
 82 | 
 83 |    /* Shut down MPI */
 84 |    MPI_Finalize();
 85 | 
 86 |    return 0;
 87 | } /*  main  */
 88 | 
 89 | 
 90 | /*------------------------------------------------------------------
 91 |  * Function:     Trap
 92 |  * Purpose:      Serial function for estimating a definite integral 
 93 |  *               using the trapezoidal rule
 94 |  * Input args:   left_endpt
 95 |  *               right_endpt
 96 |  *               trap_count 
 97 |  *               base_len
 98 |  * Return val:   Trapezoidal rule estimate of integral from
 99 |  *               left_endpt to right_endpt using trap_count
100 |  *               trapezoids
101 |  */
102 | double Trap(
103 |       double left_endpt  /* in */, 
104 |       double right_endpt /* in */, 
105 |       int    trap_count  /* in */, 
106 |       double base_len    /* in */) {
107 |    double estimate, x; 
108 |    int i;
109 | 
110 |    estimate = (f(left_endpt) + f(right_endpt))/2.0;
111 |    for (i = 1; i <= trap_count-1; i++) {
112 |       x = left_endpt + i*base_len;
113 |       estimate += f(x);
114 |    }
115 |    estimate = estimate*base_len;
116 | 
117 |    return estimate;
118 | } /*  Trap  */
119 | 
120 | 
121 | /*------------------------------------------------------------------
122 |  * Function:    f
123 |  * Purpose:     Compute value of function to be integrated
124 |  * Input args:  x
125 |  */
126 | double f(double x) {
127 |    return x*x;
128 | } /* f */
129 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/timer.h:
--------------------------------------------------------------------------------
 1 | /* File:     timer.h
 2 |  *
 3 |  * Purpose:  Define a macro that returns the number of seconds that 
 4 |  *           have elapsed since some point in the past.  The timer
 5 |  *           should return times with microsecond accuracy.
 6 |  *
 7 |  * Note:     The argument passed to the GET_TIME macro should be
 8 |  *           a double, *not* a pointer to a double.
 9 |  *
10 |  * Example:  
11 |  *    #include "timer.h"
12 |  *    . . .
13 |  *    double start, finish, elapsed;
14 |  *    . . .
15 |  *    GET_TIME(start);
16 |  *    . . .
17 |  *    Code to be timed
18 |  *    . . .
19 |  *    GET_TIME(finish);
20 |  *    elapsed = finish - start;
21 |  *    printf("The code to be timed took %e seconds\n", elapsed);
22 |  *
23 |  * IPP:  Section 3.6.1 (p. 121) and Section 6.1.2 (pp. 273 and ff.)
24 |  */
25 | #ifndef _TIMER_H_
26 | #define _TIMER_H_
27 | 
28 | #include <sys/time.h>
29 | 
30 | /* The argument now should be a double (not a pointer to a double) */
31 | #define GET_TIME(now) { \
32 |    struct timeval t; \
33 |    gettimeofday(&t, NULL); \
34 |    now = t.tv_sec + t.tv_usec/1000000.0; \
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/trap.c:
--------------------------------------------------------------------------------
 1 | /* File:    trap.c
 2 |  * Purpose: Calculate definite integral using trapezoidal 
 3 |  *          rule.
 4 |  *
 5 |  * Input:   a, b, n
 6 |  * Output:  Estimate of integral from a to b of f(x)
 7 |  *          using n trapezoids.
 8 |  *
 9 |  * Compile: gcc -g -Wall -o trap trap.c
10 |  * Usage:   ./trap
11 |  *
12 |  * Note:    The function f(x) is hardwired.
13 |  *
14 |  * IPP:     Section 3.2.1 (pp. 94 and ff.) and 5.2 (p. 216)
15 |  */
16 | 
17 | #include <stdio.h>
18 | 
19 | double f(double x);    /* Function we're integrating */
20 | double Trap(double a, double b, int n, double h);
21 | 
22 | int main(void) {
23 |    double  integral;   /* Store result in integral   */
24 |    double  a, b;       /* Left and right endpoints   */
25 |    int     n;          /* Number of trapezoids       */
26 |    double  h;          /* Height of trapezoids       */
27 | 
28 |    printf("Enter a, b, and n\n");
29 |    scanf("%lf", &a);
30 |    scanf("%lf", &b);
31 |    scanf("%d", &n);
32 | 
33 |    h = (b-a)/n;
34 |    integral = Trap(a, b, n, h);
35 |    
36 |    printf("With n = %d trapezoids, our estimate\n", n);
37 |    printf("of the integral from %f to %f = %.15f\n",
38 |       a, b, integral);
39 | 
40 |    return 0;
41 | }  /* main */
42 | 
43 | /*------------------------------------------------------------------
44 |  * Function:    Trap
45 |  * Purpose:     Estimate integral from a to b of f using trap rule and
46 |  *              n trapezoids
47 |  * Input args:  a, b, n, h
48 |  * Return val:  Estimate of the integral 
49 |  */
50 | double Trap(double a, double b, int n, double h) {
51 |    double integral;
52 |    int k;
53 | 
54 |    integral = (f(a) + f(b))/2.0;
55 |    for (k = 1; k <= n-1; k++) {
56 |      integral += f(a+k*h);
57 |    }
58 |    integral = integral*h;
59 | 
60 |    return integral;
61 | }  /* Trap */
62 | 
63 | /*------------------------------------------------------------------
64 |  * Function:    f
65 |  * Purpose:     Compute value of function to be integrated
66 |  * Input args:  x
67 |  */
68 | double f(double x) {
69 |    double return_val;
70 | 
71 |    return_val = x*x;
72 |    return return_val;
73 | }  /* f */
74 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch3/vector_add.c:
--------------------------------------------------------------------------------
  1 | /* File:     vector_add.c
  2 |  *
  3 |  * Purpose:  Implement vector addition
  4 |  *
  5 |  * Compile:  gcc -g -Wall -o vector_add vector_add.c
  6 |  * Run:      ./vector_add
  7 |  *
  8 |  * Input:    The order of the vectors, n, and the vectors x and y
  9 |  * Output:   The sum vector z = x+y
 10 |  *
 11 |  * Note:
 12 |  *    If the program detects an error (order of vector <= 0 or malloc
 13 |  * failure), it prints a message and terminates
 14 |  *
 15 |  * IPP:      Section 3.4.6 (p. 109)
 16 |  */
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | 
 20 | void Read_n(int* n_p);
 21 | void Allocate_vectors(double** x_pp, double** y_pp, double** z_pp, int n);
 22 | void Read_vector(double a[], int n, char vec_name[]);
 23 | void Print_vector(double b[], int n, char title[]);
 24 | void Vector_sum(double x[], double y[], double z[], int n);
 25 | 
 26 | /*---------------------------------------------------------------------*/
 27 | int main(void) {
 28 |    int n;
 29 |    double *x, *y, *z;
 30 | 
 31 |    Read_n(&n);
 32 |    Allocate_vectors(&x, &y, &z, n);
 33 |    
 34 |    Read_vector(x, n, "x");
 35 |    Read_vector(y, n, "y");
 36 |    
 37 |    Vector_sum(x, y, z, n);
 38 | 
 39 |    Print_vector(z, n, "The sum is");
 40 | 
 41 |    free(x);
 42 |    free(y);
 43 |    free(z);
 44 | 
 45 |    return 0;
 46 | }  /* main */
 47 | 
 48 | /*---------------------------------------------------------------------
 49 |  * Function:  Read_n
 50 |  * Purpose:   Get the order of the vectors from stdin
 51 |  * Out arg:   n_p:  the order of the vectors
 52 |  *
 53 |  * Errors:    If n <= 0, the program terminates
 54 |  */
 55 | void Read_n(int* n_p /* out */) {
 56 |    printf("What's the order of the vectors?\n");
 57 |    scanf("%d", n_p);
 58 |    if (*n_p <= 0) {
 59 |       fprintf(stderr, "Order should be positive\n");
 60 |       exit(-1);
 61 |    }
 62 | }  /* Read_n */
 63 | 
 64 | /*---------------------------------------------------------------------
 65 |  * Function:  Allocate_vectors
 66 |  * Purpose:   Allocate storage for the vectors
 67 |  * In arg:    n:  the order of the vectors
 68 |  * Out args:  x_pp, y_pp, z_pp:  pointers to storage for the vectors
 69 |  *
 70 |  * Errors:    If one of the mallocs fails, the program terminates
 71 |  */
 72 | void Allocate_vectors(
 73 |       double**  x_pp  /* out */, 
 74 |       double**  y_pp  /* out */, 
 75 |       double**  z_pp  /* out */, 
 76 |       int       n     /* in  */) {
 77 |    *x_pp = malloc(n*sizeof(double));
 78 |    *y_pp = malloc(n*sizeof(double));
 79 |    *z_pp = malloc(n*sizeof(double));
 80 |    if (*x_pp == NULL || *y_pp == NULL || *z_pp == NULL) {
 81 |       fprintf(stderr, "Can't allocate vectors\n");
 82 |       exit(-1);
 83 |    }
 84 | }  /* Allocate_vectors */
 85 | 
 86 | /*---------------------------------------------------------------------
 87 |  * Function:  Read_vector
 88 |  * Purpose:   Read a vector from stdin
 89 |  * In args:   n:  order of the vector
 90 |  *            vec_name:  name of vector (e.g., x)
 91 |  * Out arg:   a:  the vector to be read in
 92 |  */
 93 | void Read_vector(
 94 |       double  a[]         /* out */, 
 95 |       int     n           /* in  */, 
 96 |       char    vec_name[]  /* in  */) {
 97 |    int i;
 98 |    printf("Enter the vector %s\n", vec_name);
 99 |    for (i = 0; i < n; i++)
100 |       scanf("%lf", &a[i]);
101 | }  /* Read_vector */  
102 | 
103 | /*---------------------------------------------------------------------
104 |  * Function:  Print_vector
105 |  * Purpose:   Print the contents of a vector
106 |  * In args:   b:  the vector to be printed
107 |  *            n:  the order of the vector
108 |  *            title:  title for print out
109 |  */
110 | void Print_vector(
111 |       double  b[]     /* in */, 
112 |       int     n       /* in */, 
113 |       char    title[] /* in */) {
114 |    int i;
115 |    printf("%s\n", title);
116 |    for (i = 0; i < n; i++)
117 |       printf("%f ", b[i]);
118 |    printf("\n");
119 | }  /* Print_vector */
120 | 
121 | /*---------------------------------------------------------------------
122 |  * Function:  Vector_sum
123 |  * Purpose:   Add two vectors
124 |  * In args:   x:  the first vector to be added
125 |  *            y:  the second vector to be added
126 |  *            n:  the order of the vectors
127 |  * Out arg:   z:  the sum vector
128 |  */
129 | void Vector_sum(
130 |       double  x[]  /* in  */, 
131 |       double  y[]  /* in  */, 
132 |       double  z[]  /* out */, 
133 |       int     n    /* in  */) {
134 |    int i;
135 | 
136 |    for (i = 0; i < n; i++)
137 |       z[i] = x[i] + y[i];
138 | }  /* Vector_sum */
139 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/my_rand.c:
--------------------------------------------------------------------------------
 1 | /* File:     my_rand.c
 2 |  *
 3 |  * Purpose:  implement a linear congruential random number generator
 4 |  *
 5 |  * my_rand:  generates a random unsigned int in the range 0 - MR_MODULUS
 6 |  * my_drand: generates a random double in the range 0 - 1
 7 |  *
 8 |  * Notes:
 9 |  * 1.  The generator is taken from the Wikipedia article "Linear congruential
10 |  *     generator"
11 |  * 2.  This is *not* a very good random number generator.  However, unlike
12 |  *     the C library function random(), it *is* threadsafe:  the "state" of
13 |  *     the generator is returned in the seed_p argument to each function.
14 |  * 3.  The main function is just a simple driver.
15 |  *
16 |  * IPP:  Not discussed, but needed by the multithreaded linked list programs
17 |  *       discussed in Section 4.9.2-4.9.4 (pp. 183-190).
18 |  */
19 | #include <stdio.h>
20 | #include <stdlib.h>
21 | #include "my_rand.h"
22 | 
23 | #define MR_MULTIPLIER 279470273 
24 | #define MR_INCREMENT 0
25 | #define MR_MODULUS 4294967291U
26 | #define MR_DIVISOR ((double) 4294967291U)
27 | 
28 | 
29 | #ifdef _MAIN_
30 | int main(void) {
31 |    int n, i; 
32 |    unsigned seed = 1, x;
33 |    double y;
34 | 
35 |    printf("How many random numbers?\n");
36 |    scanf("%d", &n);
37 | 
38 |    x = my_rand(&seed);
39 |    for (i = 0; i < n; i++) {
40 |       x = my_rand(&x);
41 |       printf("%u\n", x);
42 |    }
43 |    for (i = 0; i < n; i++) {
44 |       y = my_drand(&x);
45 |       printf("%e\n", y);
46 |    }
47 |    return 0;
48 | }
49 | #endif
50 | 
51 | /* Function:      my_rand
52 |  * In/out arg:    seed_p
53 |  * Return value:  A new pseudo-random unsigned int in the range
54 |  *                0 - MR_MODULUS
55 |  *
56 |  * Notes:  
57 |  * 1.  This is a slightly modified version of the generator in the 
58 |  *     Wikipedia article "Linear congruential generator"
59 |  * 2.  The seed_p argument stores the "state" for the next call to
60 |  *     the function.
61 |  */
62 | unsigned my_rand(unsigned* seed_p) {
63 |    long long z = *seed_p;
64 |    z *= MR_MULTIPLIER; 
65 | // z += MR_INCREMENT;
66 |    z %= MR_MODULUS;
67 |    *seed_p = z;
68 |    return *seed_p;
69 | }
70 | 
71 | /* Function:      my_drand
72 |  * In/out arg:    seed_p
73 |  * Return value:  A new pseudo-random double in the range 0 - 1
74 |  */
75 | double my_drand(unsigned* seed_p) {
76 |    unsigned x = my_rand(seed_p);
77 |    double y = x/MR_DIVISOR;
78 |    return y;
79 | }
80 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/my_rand.h:
--------------------------------------------------------------------------------
 1 | /* File:     my_rand.h
 2 |  * Purpose:  Header file for my_rand.c, which implements a simple
 3 |  *           pseudo-random number generator.
 4 |  *
 5 |  * IPP:  Not discussed, but needed by the multithreaded linked list programs
 6 |  *       discussed in Section 4.9.2-4.9.4 (pp. 183-190).
 7 |  */
 8 | #ifndef _MY_RAND_H_
 9 | #define _MY_RAND_H_
10 | 
11 | unsigned my_rand(unsigned* a_p);
12 | double my_drand(unsigned* a_p);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_busy_bar.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    pth_busy_bar.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Use busy wait barriers to synchronize threads.
  6 |  *
  7 |  * Input:
  8 |  *    none
  9 |  * Output:
 10 |  *    Time for BARRIER_COUNT barriers
 11 |  *
 12 |  * Compile:
 13 |  *    gcc -g -Wall -o pth_busy_bar pth_busy_bar.c -lpthread
 14 |  * Usage:
 15 |  *    ./pth_busy_bar <thread_count>
 16 |  *
 17 |  * Note:
 18 |  *    Compile flag DEBUG will print a message after each barrier    
 19 |  *
 20 |  * IPP:   Section 4.8.1 (p. 177)
 21 |  */
 22 | 
 23 | #include <stdio.h>
 24 | #include <stdlib.h>
 25 | #include <pthread.h>
 26 | #include "timer.h"
 27 | 
 28 | #define BARRIER_COUNT 100
 29 | 
 30 | int thread_count;
 31 | int barrier_thread_counts[BARRIER_COUNT];
 32 | pthread_mutex_t barrier_mutex;
 33 | 
 34 | void Usage(char* prog_name);
 35 | void *Thread_work(void* rank);
 36 | 
 37 | /*--------------------------------------------------------------------*/
 38 | int main(int argc, char* argv[]) {
 39 |    long       thread, i;
 40 |    pthread_t* thread_handles; 
 41 |    double start, finish;
 42 | 
 43 |    if (argc != 2)
 44 |       Usage(argv[0]);
 45 |    thread_count = strtol(argv[1], NULL, 10);
 46 | 
 47 |    thread_handles = malloc (thread_count*sizeof(pthread_t));
 48 |    for (i = 0; i < BARRIER_COUNT; i++)
 49 |       barrier_thread_counts[i] = 0;
 50 |    pthread_mutex_init(&barrier_mutex, NULL);
 51 | 
 52 |    GET_TIME(start);
 53 |    for (thread = 0; thread < thread_count; thread++)
 54 |       pthread_create(&thread_handles[thread], NULL,
 55 |           Thread_work, (void*) thread);
 56 | 
 57 |    for (thread = 0; thread < thread_count; thread++) {
 58 |       pthread_join(thread_handles[thread], NULL);
 59 |    }
 60 |    GET_TIME(finish);
 61 |    printf("Elapsed time = %e seconds\n", finish - start);
 62 | 
 63 |    pthread_mutex_destroy(&barrier_mutex);
 64 |    free(thread_handles);
 65 |    return 0;
 66 | }  /* main */
 67 | 
 68 | 
 69 | /*--------------------------------------------------------------------
 70 |  * Function:    Usage
 71 |  * Purpose:     Print command line for function and terminate
 72 |  * In arg:      prog_name
 73 |  */
 74 | void Usage(char* prog_name) {
 75 | 
 76 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 77 |    exit(0);
 78 | }  /* Usage */
 79 | 
 80 | 
 81 | /*-------------------------------------------------------------------
 82 |  * Function:    Thread_work
 83 |  * Purpose:     Run BARRIER_COUNT barriers
 84 |  * In arg:      rank
 85 |  * Global var:  thread_count, barrier_thread_counts, barrier_mutex
 86 |  * Return val:  Ignored
 87 |  */
 88 | void *Thread_work(void* rank) {
 89 | #  ifdef DEBUG
 90 |    long my_rank = (long) rank; 
 91 | #  endif
 92 |    int i;
 93 | 
 94 |    for (i = 0; i < BARRIER_COUNT; i++) {
 95 |       pthread_mutex_lock(&barrier_mutex);
 96 |       barrier_thread_counts[i]++;
 97 |       pthread_mutex_unlock(&barrier_mutex);
 98 |       while (barrier_thread_counts[i] < thread_count);
 99 | #     ifdef DEBUG
100 |       if (my_rank == 0) {
101 |          printf("All threads entered barrier %d\n", i);
102 |          fflush(stdout);
103 |       }
104 | #     endif
105 |    }
106 | 
107 |    return NULL;
108 | }  /* Thread_work */
109 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_cond_bar.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    pth_cond_bar.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Use condition wait barriers to synchronize threads.
  6 |  *
  7 |  * Compile:
  8 |  *    gcc -g -Wall -o pth_cond_bar pth_cond_bar.c -lpthread
  9 |  *    timer.h must be available
 10 |  *
 11 |  * Usage:
 12 |  *    ./pth_cond_bar <thread_count>
 13 |  *
 14 |  * Input:
 15 |  *    none
 16 |  * Output:
 17 |  *    Time for BARRIER_COUNT barriers
 18 |  *
 19 |  * Note:    
 20 |  *    Verbose output can be enabled with the compile flag -DDEBUG
 21 |  *
 22 |  * IPP:   Section 4.8.3 (pp. 179 and ff.)
 23 |  */
 24 | 
 25 | #include <stdio.h>
 26 | #include <stdlib.h>
 27 | #include <pthread.h>
 28 | #include "timer.h"
 29 | 
 30 | #define BARRIER_COUNT 100
 31 | 
 32 | int thread_count;
 33 | int barrier_thread_count = 0;
 34 | pthread_mutex_t barrier_mutex;
 35 | pthread_cond_t ok_to_proceed;
 36 | 
 37 | void Usage(char* prog_name);
 38 | void *Thread_work(void* rank);
 39 | 
 40 | /*--------------------------------------------------------------------*/
 41 | int main(int argc, char* argv[]) {
 42 |    long       thread;
 43 |    pthread_t* thread_handles; 
 44 |    double start, finish;
 45 | 
 46 |    if (argc != 2)
 47 |       Usage(argv[0]);
 48 |    thread_count = strtol(argv[1], NULL, 10);
 49 | 
 50 |    thread_handles = malloc (thread_count*sizeof(pthread_t));
 51 |    pthread_mutex_init(&barrier_mutex, NULL);
 52 |    pthread_cond_init(&ok_to_proceed, NULL);
 53 | 
 54 |    GET_TIME(start);
 55 |    for (thread = 0; thread < thread_count; thread++)
 56 |       pthread_create(&thread_handles[thread], NULL,
 57 |           Thread_work, (void*) thread);
 58 | 
 59 |    for (thread = 0; thread < thread_count; thread++) {
 60 |       pthread_join(thread_handles[thread], NULL);
 61 |    }
 62 |    GET_TIME(finish);
 63 |    printf("Elapsed time = %e seconds\n", finish - start);
 64 | 
 65 |    pthread_mutex_destroy(&barrier_mutex);
 66 |    pthread_cond_destroy(&ok_to_proceed);
 67 |    free(thread_handles);
 68 |    return 0;
 69 | }  /* main */
 70 | 
 71 | 
 72 | /*--------------------------------------------------------------------
 73 |  * Function:    Usage
 74 |  * Purpose:     Print command line for function and terminate
 75 |  * In arg:      prog_name
 76 |  */
 77 | void Usage(char* prog_name) {
 78 | 
 79 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 80 |    exit(0);
 81 | }  /* Usage */
 82 | 
 83 | 
 84 | /*-------------------------------------------------------------------
 85 |  * Function:    Thread_work
 86 |  * Purpose:     Run BARRIER_COUNT barriers
 87 |  * In arg:      rank
 88 |  * Global var:  thread_count, barrier_thread_count, barrier_mutex,
 89 |  *              ok_to_proceed
 90 |  * Return val:  Ignored
 91 |  */
 92 | void *Thread_work(void* rank) {
 93 | #  ifdef DEBUG
 94 |    long my_rank = (long) rank; 
 95 | #  endif
 96 |    int i;
 97 | 
 98 |    for (i = 0; i < BARRIER_COUNT; i++) {
 99 |       pthread_mutex_lock(&barrier_mutex);
100 |       barrier_thread_count++;
101 |       if (barrier_thread_count == thread_count) {
102 |          barrier_thread_count = 0;
103 | #        ifdef DEBUG
104 |          printf("Thread %ld > Signalling other threads in barrier %d\n", 
105 |                my_rank, i);
106 |          fflush(stdout);
107 | #        endif
108 |          pthread_cond_broadcast(&ok_to_proceed);
109 |       } else {
110 |          // Wait unlocks mutex and puts thread to sleep.
111 |          //    Put wait in while loop in case some other
112 |          // event awakens thread.
113 |          while (pthread_cond_wait(&ok_to_proceed,
114 |                    &barrier_mutex) != 0);
115 |          // Mutex is relocked at this point.
116 | #        ifdef DEBUG
117 |          printf("Thread %ld > Awakened in barrier %d\n", my_rank, i);
118 |          fflush(stdout);
119 | #        endif
120 |       }
121 |       pthread_mutex_unlock(&barrier_mutex);
122 | #     ifdef DEBUG
123 |       if (my_rank == 0) {
124 |          printf("All threads completed barrier %d\n", i);
125 |          fflush(stdout);
126 |       }
127 | #     endif
128 |    }
129 | 
130 |    return NULL;
131 | }  /* Thread_work */
132 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_do_nothing.c:
--------------------------------------------------------------------------------
 1 | /* File:     pth_do_nothing.c
 2 |  * Purpose:  Estimate the overhead associated with starting threads.
 3 |  *
 4 |  * Compile:  gcc -g -Wall -o pth_do_nothing pth_do_nothing.c -lpthreads
 5 |  *           timer.h needs to be available
 6 |  * Run:      ./pth_do_nothing <number of threads>
 7 |  *
 8 |  * Input:    None
 9 |  * Output:   Time elapsed from starting first thread to joining
10 |  *           last.
11 |  *
12 |  * IPP:      Section 4.5 (pp. 167 and ff.)
13 |  */
14 | #include <stdio.h>
15 | #include <stdlib.h>
16 | #include <pthread.h>
17 | #include "timer.h"
18 | 
19 | const int MAX_THREADS = 1024;
20 | 
21 | 
22 | /* Thread function */
23 | void *Thread_function(void* ignore);
24 | 
25 | /* No use of shared variables */
26 | void Usage(char* prog_name);
27 | 
28 | int main(int argc, char* argv[]) {
29 |    int        thread_count;
30 |    long       thread;  /* Use long in case of a 64-bit system */
31 |    pthread_t* thread_handles;
32 |    double start, finish, elapsed;
33 | 
34 |    /* Get number of threads from command line */
35 |    if (argc != 2) Usage(argv[0]);
36 |    thread_count = strtol(argv[1], NULL, 10);  
37 |    if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]);
38 | 
39 |    thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t)); 
40 | 
41 |    GET_TIME(start);
42 |    for (thread = 0; thread < thread_count; thread++)  
43 |       pthread_create(&thread_handles[thread], NULL,
44 |           Thread_function, NULL);  
45 | 
46 |    for (thread = 0; thread < thread_count; thread++) 
47 |       pthread_join(thread_handles[thread], NULL); 
48 |    GET_TIME(finish);
49 |    elapsed = finish - start;
50 | 
51 |    printf("The elapsed time is %e seconds\n", elapsed);
52 | 
53 |    free(thread_handles);
54 |    return 0;
55 | }  /* main */
56 | 
57 | /*-------------------------------------------------------------------*/
58 | void Usage(char* prog_name) {
59 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
60 |    fprintf(stderr, "0 < number of threads <= %d\n", MAX_THREADS);
61 |    exit(0);
62 | }  /* Usage */
63 | 
64 | /*-------------------------------------------------------------------*/
65 | void* Thread_function(void* ignore) {
66 |    return NULL;
67 | }  /* Thread_function */
68 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_hello.c:
--------------------------------------------------------------------------------
 1 | /* File:  
 2 |  *    pth_hello.c
 3 |  *
 4 |  * Purpose:
 5 |  *    Illustrate basic use of pthreads:  create some threads,
 6 |  *    each of which prints a message.
 7 |  *
 8 |  * Input:
 9 |  *    none
10 |  * Output:
11 |  *    message from each thread
12 |  *
13 |  * Compile:  gcc -g -Wall -o pth_hello pth_hello.c -lpthread
14 |  * Usage:    ./pth_hello <thread_count>
15 |  *
16 |  * IPP:   Section 4.2 (p. 153 and ff.)
17 |  */
18 | #include <stdio.h>
19 | #include <stdlib.h>
20 | #include <pthread.h> 
21 | 
22 | const int MAX_THREADS = 64;
23 | 
24 | /* Global variable:  accessible to all threads */
25 | int thread_count;  
26 | 
27 | void Usage(char* prog_name);
28 | void *Hello(void* rank);  /* Thread function */
29 | 
30 | /*--------------------------------------------------------------------*/
31 | int main(int argc, char* argv[]) {
32 |    long       thread;  /* Use long in case of a 64-bit system */
33 |    pthread_t* thread_handles; 
34 | 
35 |    /* Get number of threads from command line */
36 |    if (argc != 2) Usage(argv[0]);
37 |    thread_count = strtol(argv[1], NULL, 10);  
38 |    if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]);
39 | 
40 |    thread_handles = malloc (thread_count*sizeof(pthread_t)); 
41 | 
42 |    for (thread = 0; thread < thread_count; thread++)  
43 |       pthread_create(&thread_handles[thread], NULL,
44 |           Hello, (void*) thread);  
45 | 
46 |    printf("Hello from the main thread\n");
47 | 
48 |    for (thread = 0; thread < thread_count; thread++) 
49 |       pthread_join(thread_handles[thread], NULL); 
50 | 
51 |    free(thread_handles);
52 |    return 0;
53 | }  /* main */
54 | 
55 | /*-------------------------------------------------------------------*/
56 | void *Hello(void* rank) {
57 |    long my_rank = (long) rank;  /* Use long in case of 64-bit system */ 
58 | 
59 |    printf("Hello from thread %ld of %d\n", my_rank, thread_count);
60 | 
61 |    return NULL;
62 | }  /* Hello */
63 | 
64 | /*-------------------------------------------------------------------*/
65 | void Usage(char* prog_name) {
66 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
67 |    fprintf(stderr, "0 < number of threads <= %d\n", MAX_THREADS);
68 |    exit(0);
69 | }  /* Usage */
70 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_msg.c:
--------------------------------------------------------------------------------
 1 | /* File:     pth_msg.c
 2 |  *
 3 |  * Purpose:  Illustrate a synchronization problem with pthreads:  create 
 4 |  *           some threads, each of which creates and prints a message.
 5 |  *
 6 |  * Input:    none
 7 |  * Output:   message from each thread
 8 |  *
 9 |  * Compile:  gcc -g -Wall -o pth_msg pth_msg.c -lpthread
10 |  * Usage:    pth_msg <thread_count>
11 |  *
12 |  * IPP:      Section 4.7 (pp. 172 and ff.)
13 |  */
14 | 
15 | #include <stdio.h>
16 | #include <stdlib.h>
17 | #include <pthread.h>
18 | 
19 | const int MAX_THREADS = 1024;
20 | const int MSG_MAX = 100;
21 | 
22 | /* Global variables:  accessible to all threads */
23 | int thread_count;
24 | char** messages;
25 | 
26 | void Usage(char* prog_name);
27 | void *Send_msg(void* rank);  /* Thread function */
28 | 
29 | /*--------------------------------------------------------------------*/
30 | int main(int argc, char* argv[]) {
31 |    long       thread;
32 |    pthread_t* thread_handles; 
33 | 
34 |    if (argc != 2) Usage(argv[0]);
35 |    thread_count = strtol(argv[1], NULL, 10);
36 |    if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]);
37 | 
38 |    thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t));
39 |    messages = (char**) malloc(thread_count*sizeof(char*));
40 |    for (thread = 0; thread < thread_count; thread++)
41 |       messages[thread] = NULL;
42 | 
43 |    for (thread = 0; thread < thread_count; thread++)
44 |       pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL,
45 |           Send_msg, (void*) thread);
46 | 
47 |    for (thread = 0; thread < thread_count; thread++) {
48 |       pthread_join(thread_handles[thread], NULL);
49 |    }
50 | 
51 |    for (thread = 0; thread < thread_count; thread++)
52 |       free(messages[thread]);
53 |    free(messages);
54 | 
55 |    free(thread_handles);
56 |    return 0;
57 | }  /* main */
58 | 
59 | 
60 | /*--------------------------------------------------------------------
61 |  * Function:    Usage
62 |  * Purpose:     Print command line for function and terminate
63 |  * In arg:      prog_name
64 |  */
65 | void Usage(char* prog_name) {
66 | 
67 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
68 |    exit(0);
69 | }  /* Usage */
70 | 
71 | 
72 | /*-------------------------------------------------------------------
73 |  * Function:       Send_msg
74 |  * Purpose:        Create a message and ``send'' it by copying it
75 |  *                 into the global messages array.  Receive a message
76 |  *                 and print it.
77 |  * In arg:         rank
78 |  * Global in:      thread_count
79 |  * Global in/out:  messages
80 |  * Return val:     Ignored
81 |  * Note:           The my_msg buffer is freed in main
82 |  */
83 | void *Send_msg(void* rank) {
84 |    long my_rank = (long) rank;
85 |    long dest = (my_rank + 1) % thread_count;
86 |    long source = (my_rank + thread_count - 1) % thread_count;
87 |    char* my_msg = (char*) malloc(MSG_MAX*sizeof(char));
88 | 
89 |    sprintf(my_msg, "Hello to %ld from %ld", dest, my_rank);
90 |    messages[dest] = my_msg;
91 | 
92 |    if (messages[my_rank] != NULL) 
93 |       printf("Thread %ld > %s\n", my_rank, messages[my_rank]);
94 |    else
95 |       printf("Thread %ld > No message from %ld\n", my_rank, source);
96 | 
97 |    return NULL;
98 | }  /* Send_msg */
99 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_msg_sem.c:
--------------------------------------------------------------------------------
  1 | /* File:     pth_msg_sem.c
  2 |  *
  3 |  * Purpose:  Illustrate a synchronization problem with pthreads:  create 
  4 |  *           some threads, each of which creates a message and "sends" it
  5 |  *           to another thread, by copying it into that thread's buffer.
  6 |  *           This version uses semaphores to solve the synchronization
  7 |  *           problem.
  8 |  *
  9 |  * Input:    none
 10 |  * Output:   message from each thread
 11 |  *
 12 |  * Compile:  gcc -g -Wall -o pth_msg_sem pth_msg_sem.c -lpthread
 13 |  * Usage:    ./pth_msg_sem <thread_count>
 14 |  *
 15 |  * Note:     MacOS X (as of 10.6) doesn't have a working implementation
 16 |  *           of unnamed semaphores.  See pth_msg_sem_mac.c for an
 17 |  *           alternative implementation.
 18 |  *
 19 |  * IPP:      Section 4.7 (pp. 174 and ff.)
 20 |  */
 21 | 
 22 | #include <stdio.h>
 23 | #include <stdlib.h>
 24 | #include <pthread.h>
 25 | #include <semaphore.h>  /* Semaphores are not part of Pthreads */
 26 | 
 27 | const int MAX_THREADS = 1024;
 28 | const int MSG_MAX = 100;
 29 | 
 30 | /* Global variables:  accessible to all threads */
 31 | int thread_count;
 32 | char** messages;
 33 | sem_t* semaphores;
 34 | 
 35 | void Usage(char* prog_name);
 36 | void *Send_msg(void* rank);  /* Thread function */
 37 | 
 38 | /*--------------------------------------------------------------------*/
 39 | int main(int argc, char* argv[]) {
 40 |    long       thread;
 41 |    pthread_t* thread_handles; 
 42 | 
 43 |    if (argc != 2) Usage(argv[0]);
 44 |    thread_count = strtol(argv[1], NULL, 10);
 45 |    if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]);
 46 | 
 47 |    thread_handles = malloc (thread_count*sizeof(pthread_t));
 48 |    messages = malloc(thread_count*sizeof(char*));
 49 |    semaphores = malloc(thread_count*sizeof(sem_t));
 50 |    for (thread = 0; thread < thread_count; thread++) {
 51 |       messages[thread] = NULL;
 52 |       /* Initialize all semaphores to 0 -- i.e., locked */
 53 |       sem_init(&semaphores[thread], 0, 0);
 54 |    }
 55 | 
 56 |    for (thread = 0; thread < thread_count; thread++)
 57 |       pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL,
 58 |           Send_msg, (void*) thread);
 59 | 
 60 |    for (thread = 0; thread < thread_count; thread++) {
 61 |       pthread_join(thread_handles[thread], NULL);
 62 |    }
 63 | 
 64 |    for (thread = 0; thread < thread_count; thread++) {
 65 |       free(messages[thread]);
 66 |       sem_destroy(&semaphores[thread]);
 67 |    }
 68 |    free(messages);
 69 |    free(semaphores);
 70 |    free(thread_handles);
 71 | 
 72 |    return 0;
 73 | }  /* main */
 74 | 
 75 | 
 76 | /*--------------------------------------------------------------------
 77 |  * Function:    Usage
 78 |  * Purpose:     Print command line for function and terminate
 79 |  * In arg:      prog_name
 80 |  */
 81 | void Usage(char* prog_name) {
 82 | 
 83 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 84 |    exit(0);
 85 | }  /* Usage */
 86 | 
 87 | 
 88 | /*-------------------------------------------------------------------
 89 |  * Function:       Send_msg
 90 |  * Purpose:        Create a message and ``send'' it by copying it
 91 |  *                 into the global messages array.  Receive a message
 92 |  *                 and print it.
 93 |  * In arg:         rank
 94 |  * Global in:      thread_count
 95 |  * Global in/out:  messages, semaphores
 96 |  * Return val:     Ignored
 97 |  * Note:           The my_msg buffer is freed in main
 98 |  */
 99 | void *Send_msg(void* rank) {
100 |    long my_rank = (long) rank;
101 |    long dest = (my_rank + 1) % thread_count;
102 |    char* my_msg = (char*) malloc(MSG_MAX*sizeof(char));
103 | 
104 |    sprintf(my_msg, "Hello to %ld from %ld", dest, my_rank);
105 |    messages[dest] = my_msg;
106 |    sem_post(&semaphores[dest]);  /* "Unlock" the semaphore of dest */
107 | 
108 |    sem_wait(&semaphores[my_rank]);  /* Wait for our semaphore to be unlocked */
109 |    printf("Thread %ld > %s\n", my_rank, messages[my_rank]);
110 | 
111 |    return NULL;
112 | }  /* Send_msg */
113 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_msg_sem_mac.c:
--------------------------------------------------------------------------------
  1 | /* File:    pth_msg_sem_mac.c
  2 |  *
  3 |  * Purpose: Each thread ``sends a message'' to another thread and prints the
  4 |  *          message it receives.  This version uses named semaphores, since
  5 |  *          unnamed semaphores aren't available in MacOS X (as of 10.6).
  6 |  *
  7 |  * Compile: gcc -g -Wall -o pth_msg_sem_mac pth_msg_sem_mac.c -lpthread
  8 |  * Usage:   ./pth_msg_sem_mac <thread_count>
  9 |  *
 10 |  * Input:   none
 11 |  * Output:  message from each thread
 12 |  *
 13 |  * Note:    I'm grateful to Prof Gregory Benson of the University of
 14 |  *          San Francisco for showing me how to use semaphores with
 15 |  *          MacOS X.
 16 |  *
 17 |  * IPP:     Section 4.7 (pp. 174 and ff.)
 18 |  */
 19 | 
 20 | #include <stdio.h>
 21 | #include <stdlib.h>
 22 | #include <pthread.h>
 23 | #include <semaphore.h>
 24 | #include <fcntl.h>
 25 | 
 26 | const int MAX_THREADS = 1024;
 27 | const int MSG_MAX = 100;
 28 | 
 29 | /* Global variable:  accessible to all threads */
 30 | int thread_count;
 31 | char** messages;
 32 | char** snames;
 33 | sem_t** sems;
 34 | 
 35 | void Usage(char* prog_name);
 36 | void *Send_msg(void* rank);  /* Thread function */
 37 | 
 38 | /*--------------------------------------------------------------------*/
 39 | int main(int argc, char* argv[]) {
 40 |    long       thread;
 41 |    pthread_t* thread_handles;
 42 | 
 43 |    if (argc != 2) Usage(argv[0]);
 44 |    thread_count = strtol(argv[1], NULL, 10);
 45 |    if (thread_count <= 0 || thread_count > MAX_THREADS) Usage(argv[0]);
 46 | 
 47 |    thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t));
 48 |    messages = (char**) malloc(thread_count*sizeof(char*));
 49 |    for (thread = 0; thread < thread_count; thread++)
 50 |       messages[thread] = NULL;
 51 | 
 52 |    sems = (sem_t**) malloc (thread_count*sizeof(sem_t *));
 53 |    snames = (char **) malloc (thread_count*sizeof(char *));
 54 | 
 55 |    /* Initialize semaphores to 0:  they start "locked".  So     */
 56 |    /* executing a sem_wait will block until they're "unlocked". */
 57 |    for (thread = 0; thread < thread_count; thread++) {
 58 |       snames[thread] = malloc(10*sizeof(char));
 59 |       sprintf(snames[thread], "/sem%ld", thread);
 60 |       sems[thread] = sem_open(snames[thread], O_CREAT, 0777, 0);
 61 |    }
 62 | 
 63 |    for (thread = 0; thread < thread_count; thread++)
 64 |       pthread_create(&thread_handles[thread], NULL,
 65 |           Send_msg, (void*) thread);
 66 | 
 67 |    for (thread = 0; thread < thread_count; thread++) {
 68 |       pthread_join(thread_handles[thread], NULL);
 69 |    }
 70 | 
 71 |    for (thread = 0; thread < thread_count; thread++) {
 72 |       sem_unlink(snames[thread]);
 73 |       sem_close(sems[thread]);
 74 |       free(messages[thread]);
 75 |       free(snames[thread]);
 76 |    }
 77 | 
 78 |    free(sems);
 79 |    free(messages);
 80 |    free(thread_handles);
 81 |    return 0;
 82 | }  /* main */
 83 | 
 84 | 
 85 | /*--------------------------------------------------------------------
 86 |  * Function:    Usage
 87 |  * Purpose:     Print command line for function and terminate
 88 |  * In arg:      prog_name
 89 |  */
 90 | void Usage(char* prog_name) {
 91 | 
 92 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 93 |    exit(0);
 94 | }  /* Usage */
 95 | 
 96 | 
 97 | /*-------------------------------------------------------------------
 98 |  * Function:    Send_msg
 99 |  * Purpose:     The function started by calls to pthread_create
100 |  * In arg:      rank
101 |  * Global var:  thread_count, sems
102 |  * Return val:  Ignored
103 |  */
104 | void *Send_msg(void* rank) {
105 |    long my_rank = (long) rank;
106 |    long dest = (my_rank + 1) % thread_count;
107 |    long source = (my_rank - 1 + thread_count) % thread_count;
108 |    char* my_msg = malloc(MSG_MAX*sizeof(char));
109 | 
110 |    sprintf(my_msg, "Hello to %ld from %ld", dest, my_rank);
111 |    messages[dest] = my_msg;
112 |    /* Notify destination thread that it can proceed */
113 |    sem_post(sems[dest]);
114 | 
115 |    /* Wait for source thread to say OK */
116 |    sem_wait(sems[my_rank]);
117 |    if (messages[my_rank] != NULL)
118 |       printf("Thread %ld > %s\n", my_rank, messages[my_rank]);
119 |    else
120 |       printf("Thread %ld > No message from %ld\n", my_rank, source);
121 | 
122 |    return NULL;
123 | }  /* hello */
124 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1.dSYM/Contents/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | 	<dict>
 5 | 		<key>CFBundleDevelopmentRegion</key>
 6 | 		<string>English</string>
 7 | 		<key>CFBundleIdentifier</key>
 8 | 		<string>com.apple.xcode.dsym.pth_pi_busy1</string>
 9 | 		<key>CFBundleInfoDictionaryVersion</key>
10 | 		<string>6.0</string>
11 | 		<key>CFBundlePackageType</key>
12 | 		<string>dSYM</string>
13 | 		<key>CFBundleSignature</key>
14 | 		<string>????</string>
15 | 		<key>CFBundleShortVersionString</key>
16 | 		<string>1.0</string>
17 | 		<key>CFBundleVersion</key>
18 | 		<string>1</string>
19 | 	</dict>
20 | </plist>
21 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1.dSYM/Contents/Resources/DWARF/pth_pi_busy1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_busy1.dSYM/Contents/Resources/DWARF/pth_pi_busy1


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex.dSYM/Contents/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | 	<dict>
 5 | 		<key>CFBundleDevelopmentRegion</key>
 6 | 		<string>English</string>
 7 | 		<key>CFBundleIdentifier</key>
 8 | 		<string>com.apple.xcode.dsym.pth_pi_mutex</string>
 9 | 		<key>CFBundleInfoDictionaryVersion</key>
10 | 		<string>6.0</string>
11 | 		<key>CFBundlePackageType</key>
12 | 		<string>dSYM</string>
13 | 		<key>CFBundleSignature</key>
14 | 		<string>????</string>
15 | 		<key>CFBundleShortVersionString</key>
16 | 		<string>1.0</string>
17 | 		<key>CFBundleVersion</key>
18 | 		<string>1</string>
19 | 	</dict>
20 | </plist>
21 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex.dSYM/Contents/Resources/DWARF/pth_pi_mutex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_pi_mutex.dSYM/Contents/Resources/DWARF/pth_pi_mutex


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_sem_bar.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    pth_sem_bar.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Use semaphore barriers to synchronize threads.
  6 |  *
  7 |  * Input:
  8 |  *    none
  9 |  * Output:
 10 |  *    Time for BARRIER_COUNT barriers
 11 |  *
 12 |  * Compile:
 13 |  *    gcc -g -Wall -o pth_sem_bar pth_sem_bar.c -lpthread
 14 |  *    timer.h needs to be available
 15 |  *
 16 |  * Usage:
 17 |  *    ./pth_sem_bar <thread_count>
 18 |  *
 19 |  * Note:
 20 |  *    Setting compile flag -DDEBUG will cause a message to be
 21 |  *    printed after completion of each barrier.
 22 |  *
 23 |  * IPP:   Section 4.8.2  (pp. 177  and ff.)
 24 |  */
 25 | 
 26 | #include <stdio.h>
 27 | #include <stdlib.h>
 28 | #include <pthread.h>
 29 | #include <semaphore.h>
 30 | #include "timer.h"
 31 | 
 32 | #define BARRIER_COUNT 100
 33 | 
 34 | int thread_count;
 35 | int counter;
 36 | sem_t barrier_sems[BARRIER_COUNT];
 37 | sem_t count_sem;
 38 | 
 39 | void Usage(char* prog_name);
 40 | void *Thread_work(void* rank);
 41 | 
 42 | /*--------------------------------------------------------------------*/
 43 | int main(int argc, char* argv[]) {
 44 |    long       thread, i;
 45 |    pthread_t* thread_handles; 
 46 |    double start, finish;
 47 | 
 48 |    if (argc != 2)
 49 |       Usage(argv[0]);
 50 |    thread_count = strtol(argv[1], NULL, 10);
 51 | 
 52 |    thread_handles = malloc (thread_count*sizeof(pthread_t));
 53 |    for (i = 0; i < BARRIER_COUNT; i++)
 54 |       sem_init(&barrier_sems[i], 0, 0);
 55 |    sem_init(&count_sem, 0, 1);
 56 | 
 57 |    GET_TIME(start);
 58 |    for (thread = 0; thread < thread_count; thread++)
 59 |       pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL,
 60 |           Thread_work, (void*) thread);
 61 | 
 62 |    for (thread = 0; thread < thread_count; thread++) {
 63 |       pthread_join(thread_handles[thread], NULL);
 64 |    }
 65 |    GET_TIME(finish);
 66 |    printf("Elapsed time = %e seconds\n", finish - start);
 67 | 
 68 |    sem_destroy(&count_sem);
 69 |    for (i = 0; i < BARRIER_COUNT; i++)
 70 |       sem_destroy(&barrier_sems[i]);
 71 |    free(thread_handles);
 72 |    return 0;
 73 | }  /* main */
 74 | 
 75 | 
 76 | /*--------------------------------------------------------------------
 77 |  * Function:    Usage
 78 |  * Purpose:     Print command line for function and terminate
 79 |  * In arg:      prog_name
 80 |  */
 81 | void Usage(char* prog_name) {
 82 | 
 83 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 84 |    exit(0);
 85 | }  /* Usage */
 86 | 
 87 | 
 88 | /*-------------------------------------------------------------------
 89 |  * Function:    Thread_work
 90 |  * Purpose:     Run BARRIER_COUNT barriers
 91 |  * In arg:      rank
 92 |  * Global var:  thread_count, count, barrier_sems, count_sem
 93 |  * Return val:  Ignored
 94 |  */
 95 | void *Thread_work(void* rank) {
 96 | #  ifdef DEBUG
 97 |    long my_rank = (long) rank;
 98 | #  endif
 99 |    int i, j;
100 | 
101 |    for (i = 0; i < BARRIER_COUNT; i++) {
102 |       sem_wait(&count_sem);
103 |       if (counter == thread_count - 1) {
104 |          counter = 0;
105 |          sem_post(&count_sem);
106 |          for (j = 0; j < thread_count-1; j++)
107 |             sem_post(&barrier_sems[i]);
108 |       } else {
109 |          counter++;
110 |          sem_post(&count_sem);
111 |          sem_wait(&barrier_sems[i]);
112 |       }
113 | #     ifdef DEBUG
114 |       if (my_rank == 0) {
115 |          printf("All threads completed barrier %d\n", i);
116 |          fflush(stdout);
117 |       }
118 | #     endif
119 |    }
120 | 
121 |    return NULL;
122 | }  /* Thread_work */
123 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_tokenize.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    pth_tokenize.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Try to use threads to tokenize text input.  Illustrate problems 
  6 |  *    with function that isn't threadsafe.
  7 |  *
  8 |  * Warning:
  9 |  *    This program definitely has problems.
 10 |  *
 11 |  * Input:
 12 |  *    Lines of text
 13 |  * Output:
 14 |  *    For each line of input:
 15 |  *       the line read by the program, and the tokens identified by 
 16 |  *       strtok
 17 |  *
 18 |  * Compile:
 19 |  *    gcc -g -Wall -o pth_tokenize pth_tokenize.c -lpthread
 20 |  * Usage:
 21 |  *    pth_tokenize <thread_count> < <input>
 22 |  *
 23 |  * Algorithm:
 24 |  *    For each line of input, next thread reads the line and
 25 |  *    "tokenizes" it.
 26 |  *
 27 |  * IPP:   Section 4.11  (pp. 195 and ff.)
 28 |  */
 29 | 
 30 | #include <stdio.h>
 31 | #include <stdlib.h>
 32 | #include <string.h>
 33 | #include <pthread.h>
 34 | #include <semaphore.h>
 35 | 
 36 | const int MAX = 1000;
 37 | 
 38 | int thread_count;
 39 | sem_t* sems;
 40 | 
 41 | void Usage(char* prog_name);
 42 | void *Tokenize(void* rank);  /* Thread function */
 43 | 
 44 | /*--------------------------------------------------------------------*/
 45 | int main(int argc, char* argv[]) {
 46 |    long        thread;
 47 |    pthread_t* thread_handles; 
 48 | 
 49 |    if (argc != 2)
 50 |       Usage(argv[0]);
 51 |    thread_count = atoi(argv[1]);
 52 | 
 53 |    thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t));
 54 |    sems = (sem_t*) malloc(thread_count*sizeof(sem_t));
 55 |    // sems[0] should be unlocked, the others should be locked
 56 |    sem_init(&sems[0], 0, 1);
 57 |    for (thread = 1; thread < thread_count; thread++)
 58 |       sem_init(&sems[thread], 0, 0);
 59 | 
 60 |    printf("Enter text\n");
 61 |    for (thread = 0; thread < thread_count; thread++)
 62 |       pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL,
 63 |           Tokenize, (void*) thread);
 64 | 
 65 |    for (thread = 0; thread < thread_count; thread++) {
 66 |       pthread_join(thread_handles[thread], NULL);
 67 |    }
 68 | 
 69 |    for (thread=0; thread < thread_count; thread++)
 70 |       sem_destroy(&sems[thread]);
 71 | 
 72 |    free(sems);
 73 |    free(thread_handles);
 74 |    return 0;
 75 | }  /* main */
 76 | 
 77 | 
 78 | /*--------------------------------------------------------------------
 79 |  * Function:    Usage
 80 |  * Purpose:     Print command line for function and terminate
 81 |  * In arg:      prog_name
 82 |  */
 83 | void Usage(char* prog_name) {
 84 | 
 85 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 86 |    exit(0);
 87 | }  /* Usage */
 88 | 
 89 | 
 90 | /*-------------------------------------------------------------------
 91 |  * Function:    Tokenize
 92 |  * Purpose:     Tokenize lines of input
 93 |  * In arg:      rank
 94 |  * Global vars: thread_count (in), sems (in/out)
 95 |  * Return val:  Ignored
 96 |  */
 97 | void *Tokenize(void* rank) {
 98 |    long my_rank = (long) rank;
 99 |    int count;
100 |    int next = (my_rank + 1) % thread_count;
101 |    char *fg_rv;
102 |    char my_line[MAX];
103 |    char *my_string;
104 | 
105 |    /* Force sequential reading of the input */
106 |    sem_wait(&sems[my_rank]);  
107 |    fg_rv = fgets(my_line, MAX, stdin);
108 |    sem_post(&sems[next]);  
109 |    while (fg_rv != NULL) {
110 |       printf("Thread %ld > my line = %s", my_rank, my_line);
111 | 
112 |       count = 0; 
113 |       my_string = strtok(my_line, " \t\n");
114 |       while ( my_string != NULL ) {
115 |          count++;
116 |          printf("Thread %ld > string %d = %s\n", my_rank, count, my_string);
117 |          my_string = strtok(NULL, " \t\n");
118 |       } 
119 |       if (my_line != NULL) printf("Thread %ld > After tokenizing, my_line = %s\n",
120 |         my_rank, my_line);
121 | 
122 |       sem_wait(&sems[my_rank]); 
123 |       fg_rv = fgets(my_line, MAX, stdin);
124 |       sem_post(&sems[next]);  
125 |    }
126 | 
127 |    return NULL;
128 | }  /* Tokenize */
129 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/pth_tokenize_r.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    pth_tokenize_r.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Use threads to tokenize text input.  Fix original version
  6 |  *    which wasn't threadsafe.
  7 |  *
  8 |  * Input:
  9 |  *    Lines of text
 10 |  * Output:
 11 |  *    For each line of input:
 12 |  *       the line read by the program, and the tokens identified by 
 13 |  *       strtok_r
 14 |  *
 15 |  * Compile:
 16 |  *    gcc -g -Wall -o pth_tokenize_r pth_tokenize_r.c -lpthread
 17 |  * Usage:
 18 |  *    pth_tokenize_r <thread_count> < <input>
 19 |  *
 20 |  * Algorithm:
 21 |  *    For each line of input, next thread reads the line and
 22 |  *    "tokenizes" it.
 23 |  *
 24 |  * IPP:  Section 4.11  (pp. 197 and ff.)
 25 |  */
 26 | 
 27 | #include <stdio.h>
 28 | #include <stdlib.h>
 29 | #include <string.h>
 30 | #include <pthread.h>
 31 | #include <semaphore.h>
 32 | 
 33 | const int MAX = 1000;
 34 | 
 35 | int thread_count;
 36 | sem_t* sems;
 37 | 
 38 | void Usage(char* prog_name);
 39 | void *Tokenize(void* rank);  /* Thread function */
 40 | 
 41 | /*--------------------------------------------------------------------*/
 42 | int main(int argc, char* argv[]) {
 43 |    long        thread;
 44 |    pthread_t* thread_handles; 
 45 | 
 46 |    if (argc != 2)
 47 |       Usage(argv[0]);
 48 |    thread_count = atoi(argv[1]);
 49 | 
 50 |    thread_handles = (pthread_t*) malloc (thread_count*sizeof(pthread_t));
 51 |    sems = (sem_t*) malloc(thread_count*sizeof(sem_t));
 52 |    // sems[0] should be unlocked, the others should be locked
 53 |    sem_init(&sems[0], 0, 1);
 54 |    for (thread = 1; thread < thread_count; thread++)
 55 |       sem_init(&sems[thread], 0, 0);
 56 | 
 57 |    printf("Enter text\n");
 58 |    for (thread = 0; thread < thread_count; thread++)
 59 |       pthread_create(&thread_handles[thread], (pthread_attr_t*) NULL,
 60 |           Tokenize, (void*) thread);
 61 | 
 62 |    for (thread = 0; thread < thread_count; thread++) {
 63 |       pthread_join(thread_handles[thread], NULL);
 64 |    }
 65 | 
 66 |    for (thread=0; thread < thread_count; thread++)
 67 |       sem_destroy(&sems[thread]);
 68 | 
 69 |    free(sems);
 70 |    free(thread_handles);
 71 |    return 0;
 72 | }  /* main */
 73 | 
 74 | 
 75 | /*--------------------------------------------------------------------
 76 |  * Function:    Usage
 77 |  * Purpose:     Print command line for function and terminate
 78 |  * In arg:      prog_name
 79 |  */
 80 | void Usage(char* prog_name) {
 81 | 
 82 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 83 |    exit(0);
 84 | }  /* Usage */
 85 | 
 86 | 
 87 | /*-------------------------------------------------------------------
 88 |  * Function:    Tokenize
 89 |  * Purpose:     Tokenize lines of input
 90 |  * In arg:      rank
 91 |  * Global vars: thread_count (in), sems (in/out)
 92 |  * Return val:  Ignored
 93 |  */
 94 | void *Tokenize(void* rank) {
 95 |    long my_rank = (long) rank;
 96 |    int count;
 97 |    int next = (my_rank + 1) % thread_count;
 98 |    char *fg_rv;
 99 |    char my_line[MAX];
100 |    char *my_string;
101 |    char *saveptr;
102 | 
103 |    /* Force sequential reading of the input */
104 |    sem_wait(&sems[my_rank]);  
105 |    fg_rv = fgets(my_line, MAX, stdin);
106 |    sem_post(&sems[next]);  
107 |    while (fg_rv != NULL) {
108 |       printf("Thread %ld > my line = %s", my_rank, my_line);
109 | 
110 |       count = 0; 
111 |       my_string = strtok_r(my_line, " \t\n", &saveptr);
112 |       while ( my_string != NULL ) {
113 |          count++;
114 |          printf("Thread %ld > string %d = %s\n", my_rank, count, my_string);
115 |          my_string = strtok_r(NULL, " \t\n", &saveptr);
116 |       } 
117 | 
118 |       sem_wait(&sems[my_rank]); 
119 |       fg_rv = fgets(my_line, MAX, stdin);
120 |       sem_post(&sems[next]);  
121 |    }
122 | 
123 |    return NULL;
124 | }  /* Tokenize */
125 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch4/timer.h:
--------------------------------------------------------------------------------
 1 | /* File:     timer.h
 2 |  *
 3 |  * Purpose:  Define a macro that returns the number of seconds that 
 4 |  *           have elapsed since some point in the past.  The timer
 5 |  *           should return times with microsecond accuracy.
 6 |  *
 7 |  * Note:     The argument passed to the GET_TIME macro should be
 8 |  *           a double, *not* a pointer to a double.
 9 |  *
10 |  * Example:  
11 |  *    #include "timer.h"
12 |  *    . . .
13 |  *    double start, finish, elapsed;
14 |  *    . . .
15 |  *    GET_TIME(start);
16 |  *    . . .
17 |  *    Code to be timed
18 |  *    . . .
19 |  *    GET_TIME(finish);
20 |  *    elapsed = finish - start;
21 |  *    printf("The code to be timed took %e seconds\n", elapsed);
22 |  *
23 |  * IPP:  Section 3.6.1 (pp. 121 and ff.) and Section 6.1.2 (pp. 273 and ff.)
24 |  */
25 | #ifndef _TIMER_H_
26 | #define _TIMER_H_
27 | 
28 | #include <sys/time.h>
29 | 
30 | /* The argument now should be a double (not a pointer to a double) */
31 | #define GET_TIME(now) { \
32 |    struct timeval t; \
33 |    gettimeofday(&t, NULL); \
34 |    now = t.tv_sec + t.tv_usec/1000000.0; \
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/bubble.c:
--------------------------------------------------------------------------------
  1 | /* File:    bubble.c
  2 |  *
  3 |  * Purpose: Use bubble sort to sort a list of ints.
  4 |  *
  5 |  * Compile: gcc -g -Wall -o bubble bubble.c
  6 |  * Usage:   bubble <n> <g|i>
  7 |  *             n:   number of elements in list
  8 |  *            'g':  generate list using a random number generator
  9 |  *            'i':  user input list
 10 |  *
 11 |  * Input:   list (optional)
 12 |  * Output:  sorted list
 13 |  *
 14 |  * IPP:     Section 3.7.1 (pp. 127 and ff.) and Section 5.6.1 
 15 |  *          (pp. 232 and ff.)
 16 |  */
 17 | #include <stdio.h>
 18 | #include <stdlib.h>
 19 | 
 20 | /* For random list, 0 <= keys < RMAX */
 21 | const int RMAX = 100;
 22 | 
 23 | void Usage(char* prog_name);
 24 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p);
 25 | void Generate_list(int a[], int n);
 26 | void Print_list(int a[], int n, char* title);
 27 | void Read_list(int a[], int n);
 28 | void Bubble_sort(int a[], int n);
 29 | 
 30 | /*-----------------------------------------------------------------*/
 31 | int main(int argc, char* argv[]) {
 32 |    int  n;
 33 |    char g_i;
 34 |    int* a;
 35 | 
 36 |    Get_args(argc, argv, &n, &g_i);
 37 |    a = (int*) malloc(n*sizeof(int));
 38 |    if (g_i == 'g') {
 39 |       Generate_list(a, n);
 40 |       Print_list(a, n, "Before sort");
 41 |    } else {
 42 |       Read_list(a, n);
 43 |    }
 44 | 
 45 |    Bubble_sort(a, n);
 46 | 
 47 |    Print_list(a, n, "After sort");
 48 |    
 49 |    free(a);
 50 |    return 0;
 51 | }  /* main */
 52 | 
 53 | 
 54 | /*-----------------------------------------------------------------
 55 |  * Function:  Usage
 56 |  * Purpose:   Summary of how to run program
 57 |  */
 58 | void Usage(char* prog_name) {
 59 |    fprintf(stderr, "usage:   %s <n> <g|i>\n", prog_name);
 60 |    fprintf(stderr, "   n:   number of elements in list\n");
 61 |    fprintf(stderr, "  'g':  generate list using a random number generator\n");
 62 |    fprintf(stderr, "  'i':  user input list\n");
 63 | }  /* Usage */
 64 | 
 65 | 
 66 | /*-----------------------------------------------------------------
 67 |  * Function:  Get_args
 68 |  * Purpose:   Get and check command line arguments
 69 |  * In args:   argc, argv
 70 |  * Out args:  n_p, g_i_p
 71 |  */
 72 | void Get_args(int argc, char* argv[], int* n_p, char* g_i_p) {
 73 |    if (argc != 3 ) {
 74 |       Usage(argv[0]);
 75 |       exit(0);
 76 |    }
 77 |    *n_p = atoi(argv[1]);
 78 |    *g_i_p = argv[2][0];
 79 | 
 80 |    if (*n_p <= 0 || (*g_i_p != 'g' && *g_i_p != 'i') ) {
 81 |       Usage(argv[0]);
 82 |       exit(0);
 83 |    }
 84 | }  /* Get_args */
 85 | 
 86 | 
 87 | /*-----------------------------------------------------------------
 88 |  * Function:  Generate_list
 89 |  * Purpose:   Use random number generator to generate list elements
 90 |  * In args:   n
 91 |  * Out args:  a
 92 |  */
 93 | void Generate_list(int a[], int n) {
 94 |    int i;
 95 | 
 96 |    srandom(0);
 97 |    for (i = 0; i < n; i++)
 98 |       a[i] = random() % RMAX;
 99 | }  /* Generate_list */
100 | 
101 | 
102 | /*-----------------------------------------------------------------
103 |  * Function:  Print_list
104 |  * Purpose:   Print the elements in the list
105 |  * In args:   a, n
106 |  */
107 | void Print_list(int a[], int n, char* title) {
108 |    int i;
109 | 
110 |    printf("%s:\n", title);
111 |    for (i = 0; i < n; i++)
112 |       printf("%d ", a[i]);
113 |    printf("\n\n");
114 | }  /* Print_list */
115 | 
116 | 
117 | /*-----------------------------------------------------------------
118 |  * Function:  Read_list
119 |  * Purpose:   Read elements of list from stdin
120 |  * In args:   n
121 |  * Out args:  a
122 |  */
123 | void Read_list(int a[], int n) {
124 |    int i;
125 | 
126 |    printf("Please enter the elements of the list\n");
127 |    for (i = 0; i < n; i++)
128 |       scanf("%d", &a[i]);
129 | }  /* Read_list */
130 | 
131 | 
132 | /*-----------------------------------------------------------------
133 |  * Function:     Bubble_sort
134 |  * Purpose:      Sort list using bubble sort
135 |  * In args:      n
136 |  * In/out args:  a
137 |  */
138 | void Bubble_sort(
139 |       int  a[]  /* in/out */, 
140 |       int  n    /* in     */) {
141 |    int list_length, i, temp;
142 | 
143 |    for (list_length = n; list_length >= 2; list_length--)
144 |       for (i = 0; i < list_length-1; i++)
145 |          if (a[i] > a[i+1]) {
146 |             temp = a[i];
147 |             a[i] = a[i+1];
148 |             a[i+1] = temp;
149 |          }
150 | 
151 | }  /* Bubble_sort */
152 | 
153 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_fibo.c:
--------------------------------------------------------------------------------
 1 | /* File:     omp_fibo.c
 2 |  *
 3 |  * Purpose:  Try to compute n Fibonacci numbers using OpenMP.  Show 
 4 |  *           what happens if we try to parallelize a loop
 5 |  *           with dependences among the iterations.  The program
 6 |  *           has a serious bug.
 7 |  *
 8 |  * Compile:  gcc -g -Wall -fopenmp -o omp_fibo omp_fibo.c 
 9 |  * Run:      ./omp_fibo <number of threads> <number of Fibonacci numbers>
10 |  *
11 |  * Input:    none
12 |  * Output:   A list of Fibonacci numbers
13 |  *
14 |  * Note:     If your output seems to be OK, try increasing the number of
15 |  *           threads and/or n.
16 |  *
17 |  * IPP:      Section 5.5.2 (pp. 227 and ff.)
18 |  */
19 | #include <stdio.h>
20 | #include <stdlib.h>
21 | #include <omp.h>
22 | 
23 | void Usage(char prog_name[]);
24 | 
25 | int main(int argc, char* argv[]) {
26 |    int thread_count, n, i;
27 |    long long* fibo;
28 | 
29 |    if (argc != 3) Usage(argv[0]);
30 |    thread_count = strtol(argv[1], NULL, 10);
31 |    n = strtol(argv[2], NULL, 10);
32 | 
33 |    fibo = malloc(n*sizeof(long long));
34 |    fibo[0] = fibo[1] = 1;
35 | #  pragma omp parallel for num_threads(thread_count)
36 |    for (i = 2; i < n; i++)
37 |       fibo[i] = fibo[i-1] + fibo[i-2];
38 | 
39 |    printf("The first n Fibonacci numbers:\n");
40 |    for (i = 0; i < n; i++)
41 |       printf("%d\t%lld\n", i, fibo[i]);
42 | 
43 |    free(fibo);
44 |    return 0;
45 | }  /* main */
46 | 
47 | void Usage(char prog_name[]) {
48 |    fprintf(stderr, "usage:  %s <thread count> <number of Fibonacci numbers>\n",
49 |          prog_name);
50 |    exit(0);
51 | }  /* Usage */
52 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_hello.c:
--------------------------------------------------------------------------------
 1 | /* File:     omp_hello.c
 2 |  *
 3 |  * Purpose:  A parallel hello, world program that uses OpenMP
 4 |  *
 5 |  * Compile:  gcc -g -Wall -fopenmp -o omp_hello omp_hello.c
 6 |  * Run:      ./omp_hello <number of threads>
 7 |  * 
 8 |  * Input:    none
 9 |  * Output:   A message from each thread
10 |  *
11 |  * IPP:      Section 5.1 (pp. 211 and ff.)
12 |  */
13 | #include <stdio.h>
14 | #include <stdlib.h>
15 | #include <omp.h>   
16 | 
17 | void Hello(void);  /* Thread function */
18 | 
19 | /*--------------------------------------------------------------------*/
20 | int main(int argc, char* argv[]) {
21 |    int thread_count = strtol(argv[1], NULL, 10); 
22 | 
23 | #  pragma omp parallel num_threads(thread_count) 
24 |    Hello();
25 | 
26 |    return 0; 
27 | }  /* main */
28 | 
29 | /*-------------------------------------------------------------------
30 |  * Function:    Hello
31 |  * Purpose:     Thread function that prints message
32 |  */
33 | void Hello(void) {
34 |    int my_rank = omp_get_thread_num();
35 |    int thread_count = omp_get_num_threads();
36 | 
37 |    printf("Hello from thread %d of %d\n", my_rank, thread_count);
38 | 
39 | }  /* Hello */
40 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_hello_err_chk.c:
--------------------------------------------------------------------------------
 1 | /* File:     omp_hello.c
 2 |  *
 3 |  * Purpose:  A parallel hello, world program that uses OpenMP
 4 |  *
 5 |  * Compile:  gcc -g -Wall -fopenmp -o omp_hello omp_hello.c
 6 |  * Run:      ./omp_hello <number of threads>
 7 |  * 
 8 |  * Input:    none
 9 |  * Output:   A message from each thread
10 |  *
11 |  * Note:     This version does some basic error checking:  it checks
12 |  *           the command line argument, and it checks the number of
13 |  *           threads started by the parallel directive.  It also
14 |  *           checks for availability of OpenMP by testing for the
15 |  *           _OPENMP macro
16 |  *
17 |  * IPP:      Section 5.1.3 (pp. 215 and ff.)
18 |  */
19 | #include <stdio.h>
20 | #include <stdlib.h>
21 | #ifdef _OPENMP
22 | #  include <omp.h>   
23 | #endif _OPENMP
24 | 
25 | void Usage(char* prog_name);
26 | void Hello(int thread_count);  /* Thread function */
27 | 
28 | /*--------------------------------------------------------------------*/
29 | int main(int argc, char* argv[]) {
30 |    int thread_count;
31 | 
32 |    if (argc != 2) Usage(argv[0]);
33 |    thread_count = strtol(argv[1], NULL, 10); 
34 |    if (thread_count <= 0) Usage(argv[0]);
35 | 
36 | #  pragma omp parallel num_threads(thread_count) 
37 |    Hello(thread_count);
38 | 
39 |    return 0; 
40 | }  /* main */
41 | 
42 | /*--------------------------------------------------------------------
43 |  * Function:  Usage
44 |  * Purpose:   Print a message indicating how program should be started
45 |  *            and terminate.
46 |  */
47 | void Usage(char *prog_name) {
48 |    fprintf(stderr, "usage: %s <thread_count>\n", prog_name);
49 |    fprintf(stderr, "   thread_count should be positive\n");
50 |    exit(0);
51 | }  /* Usage */
52 | 
53 | /*--------------------------------------------------------------------
54 |  * Function:    Hello
55 |  * Purpose:     Thread function that prints message
56 |  */
57 | void Hello(int thread_count) {
58 | #  ifdef _OPENMP
59 |       int my_rank = omp_get_thread_num();
60 |       int actual_thread_count = omp_get_num_threads();
61 | #  else
62 |       int my_rank = 0;
63 |       int actual_thread_count = 1;
64 | #  endif
65 | 
66 |    if (my_rank == 0 && thread_count != actual_thread_count)
67 |       fprintf(stderr, "Number of threads started != %d\n", thread_count);
68 |    printf("Hello from thread %d of %d\n", my_rank, actual_thread_count);
69 | 
70 | }  /* Hello */
71 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_msg/omp_msgps.c:
--------------------------------------------------------------------------------
  1 | /* File:     omp_msgps.c
  2 |  *
  3 |  * Purpose:  Simulate message-passing using OpenMP.  Uses critical and
  4 |  *           atomic directives to protect critical sections.
  5 |  *
  6 |  * Compile:  gcc -g -Wall -fopenmp 
  7 |  *                 -o omp_msgps omp_msgps.c queue.c
  8 |  *           needs queue.h
  9 |  * Usage:    ./omp_msgps <number of threads> <number of messages each
 10 |  *              thread sends>
 11 |  *
 12 |  * Input:    None
 13 |  * Output:   Source, destination and contents of each message received.
 14 |  *
 15 |  * Notes:
 16 |  * 1.  DEBUG flag for more verbose output
 17 |  *
 18 |  * IPP:   Section 5.8.2 (pp. 242 and ff.)
 19 |  */
 20 | #include <stdio.h>
 21 | #include <stdlib.h>
 22 | #include <omp.h>
 23 | #include "queue.h"
 24 | 
 25 | const int MAX_MSG = 10000;
 26 | 
 27 | void Usage(char* prog_name);
 28 | void Send_msg(struct queue_s* msg_queues[], int my_rank, 
 29 |       int thread_count, int msg_number);
 30 | void Try_receive(struct queue_s* q_p, int my_rank);
 31 | int Done(struct queue_s* q_p, int done_sending, int thread_count);
 32 | 
 33 | /*-------------------------------------------------------------------*/
 34 | int main(int argc, char* argv[]) {
 35 |    int thread_count;
 36 |    int send_max;
 37 |    struct queue_s** msg_queues;
 38 |    int done_sending = 0;
 39 | 
 40 |    if (argc != 3) Usage(argv[0]);
 41 |    thread_count = strtol(argv[1], NULL, 10);
 42 |    send_max = strtol(argv[2], NULL, 10);
 43 |    if (thread_count <= 0 || send_max < 0) Usage(argv[0]);
 44 | 
 45 |    msg_queues = malloc(thread_count*sizeof(struct queue_node_s*));
 46 | 
 47 | #  pragma omp parallel num_threads(thread_count) \
 48 |       default(none) shared(thread_count, send_max, msg_queues, done_sending)
 49 |    {
 50 |       int my_rank = omp_get_thread_num();
 51 |       int msg_number;
 52 |       srandom(my_rank);
 53 |       msg_queues[my_rank] = Allocate_queue();
 54 | 
 55 | #     pragma omp barrier /* Don't let any threads send messages  */
 56 |                          /* until all queues are constructed     */
 57 | 
 58 |       for (msg_number = 0; msg_number < send_max; msg_number++) {
 59 |          Send_msg(msg_queues, my_rank, thread_count, msg_number);
 60 |          Try_receive(msg_queues[my_rank], my_rank);
 61 |       }
 62 | #     pragma omp atomic
 63 |       done_sending++;
 64 | #     ifdef DEBUG
 65 |       printf("Thread %d > done sending\n", my_rank);
 66 | #     endif
 67 | 
 68 |       while (!Done(msg_queues[my_rank], done_sending, thread_count))
 69 |          Try_receive(msg_queues[my_rank], my_rank);
 70 | 
 71 |       /* My queue is empty, and everyone is done sending             */
 72 |       /* So my queue won't be accessed again, and it's OK to free it */
 73 |       Free_queue(msg_queues[my_rank]);
 74 |       free(msg_queues[my_rank]);
 75 |    }  /* omp parallel */
 76 | 
 77 |    free(msg_queues);
 78 |    return 0;
 79 | }  /* main */
 80 | 
 81 | /*-------------------------------------------------------------------*/
 82 | void Usage(char *prog_name) {
 83 |    fprintf(stderr, "usage: %s <number of threads> <number of messages>\n",
 84 |          prog_name);
 85 |    fprintf(stderr, "   number of messages = number sent by each thread\n");
 86 |    exit(0);
 87 | }  /* Usage */
 88 | 
 89 | /*-------------------------------------------------------------------*/
 90 | void Send_msg(struct queue_s* msg_queues[], int my_rank, 
 91 |       int thread_count, int msg_number) {
 92 | // int mesg = random() % MAX_MSG;
 93 |    int mesg = -msg_number;
 94 |    int dest = random() % thread_count;
 95 | #  pragma omp critical
 96 |    Enqueue(msg_queues[dest], my_rank, mesg);
 97 | #  ifdef DEBUG
 98 |    printf("Thread %d > sent %d to %d\n", my_rank, mesg, dest);
 99 | #  endif
100 | }  /* Send_msg */
101 | 
102 | /*-------------------------------------------------------------------*/
103 | void Try_receive(struct queue_s* q_p, int my_rank) {
104 |    int src, mesg;
105 |    int queue_size = q_p->enqueued - q_p->dequeued;
106 | 
107 |    if (queue_size == 0) return;
108 |    else if (queue_size == 1)
109 | #     pragma omp critical
110 |       Dequeue(q_p, &src, &mesg);  
111 |    else
112 |       Dequeue(q_p, &src, &mesg);
113 |    printf("Thread %d > received %d from %d\n", my_rank, mesg, src);
114 | }   /* Try_receive */
115 | 
116 | /*-------------------------------------------------------------------*/
117 | int Done(struct queue_s* q_p, int done_sending, int thread_count) {
118 |    int queue_size = q_p->enqueued - q_p->dequeued;
119 |    if (queue_size == 0 && done_sending == thread_count)
120 |       return 1;
121 |    else 
122 |       return 0;
123 | }   /* Done */
124 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_msg/queue.h:
--------------------------------------------------------------------------------
 1 | /* File:     queue.h
 2 |  * Purpose:  Header file for queue.c which implements a queue of messages
 3 |  *           or pairs of ints (source + contents) as a linked list.
 4 |  */
 5 | #ifndef _QUEUE_H_
 6 | #define _QUEUE_H_
 7 | 
 8 | struct queue_node_s {
 9 |    int src;
10 |    int mesg;
11 |    struct queue_node_s* next_p;
12 | };
13 | 
14 | struct queue_s{
15 |    int enqueued;
16 |    int dequeued;
17 |    struct queue_node_s* front_p;
18 |    struct queue_node_s* tail_p;
19 | };
20 | 
21 | struct queue_s* Allocate_queue(void);
22 | void Free_queue(struct queue_s* q_p);
23 | void Print_queue(struct queue_s* q_p);
24 | void Enqueue(struct queue_s* q_p, int src, int mesg);
25 | int Dequeue(struct queue_s* q_p, int* src_p, int* mesg_p);
26 | int Search(struct queue_s* q_p, int mesg, int* src_p);
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_msg/queue_lk.h:
--------------------------------------------------------------------------------
 1 | /* File:     queue_lk.h
 2 |  * Purpose:  Header file for queue_lk.c, which implements a queue with 
 3 |  *           OpenMP locks
 4 |  */
 5 | #ifndef _QUEUE_LK_H_
 6 | #define _QUEUE_LK_H_
 7 | #include <omp.h>
 8 | 
 9 | struct queue_node_s {
10 |    int src;
11 |    int mesg;
12 |    struct queue_node_s* next_p;
13 | };
14 | 
15 | struct queue_s{
16 |    omp_lock_t lock;
17 |    int enqueued;
18 |    int dequeued;
19 |    struct queue_node_s* front_p;
20 |    struct queue_node_s* tail_p;
21 | };
22 | 
23 | struct queue_s* Allocate_queue(void);
24 | void Free_queue(struct queue_s* q_p);
25 | void Print_queue(struct queue_s* q_p);
26 | void Enqueue(struct queue_s* q_p, int src, int mesg);
27 | int Dequeue(struct queue_s* q_p, int* src_p, int* mesg_p);
28 | int Search(struct queue_s* q_p, int mesg, int* src_p);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_pi.c:
--------------------------------------------------------------------------------
 1 | /* File:     omp_pi.c
 2 |  * Purpose:  Estimate pi using OpenMP and the formula
 3 |  *
 4 |  *              pi = 4*[1 - 1/3 + 1/5 - 1/7 + 1/9 - . . . ]
 5 |  *
 6 |  * Compile:  gcc -g -Wall -fopenmp -o omp_pi omp_pi.c -lm              
 7 |  * Run:      omp_pi <thread_count> <n>
 8 |  *           thread_count is the number of threads
 9 |  *           n is the number of terms of the series to use
10 |  *
11 |  * Input:    none            
12 |  * Output:   The estimate of pi and the value of pi computed by the
13 |  *           arctan function in the math library
14 |  *
15 |  * Notes:
16 |  *    1.  The radius of convergence is only 1.  So the series converges
17 |  *        *very* slowly.
18 |  *
19 |  * IPP:   Section 5.5.4 (pp. 229 and ff.)
20 |  */        
21 | 
22 | #include <stdio.h>
23 | #include <stdlib.h>
24 | #include <math.h>
25 | #include <omp.h> 
26 | 
27 | void Usage(char* prog_name);
28 | 
29 | int main(int argc, char* argv[]) {
30 |    long long n, i;
31 |    int thread_count;
32 |    double factor;
33 |    double sum = 0.0;
34 | 
35 |    if (argc != 3) Usage(argv[0]);
36 |    thread_count = strtol(argv[1], NULL, 10);
37 |    n = strtoll(argv[2], NULL, 10);
38 |    if (thread_count < 1 || n < 1) Usage(argv[0]);
39 | 
40 | #  pragma omp parallel for num_threads(thread_count) \
41 |       reduction(+: sum) private(factor)
42 |    for (i = 0; i < n; i++) {
43 |       factor = (i % 2 == 0) ? 1.0 : -1.0; 
44 |       sum += factor/(2*i+1);
45 | #     ifdef DEBUG
46 |       printf("Thread %d > i = %lld, my_sum = %f\n", my_rank, i, my_sum);
47 | #     endif
48 |    }
49 | 
50 |    sum = 4.0*sum;
51 |    printf("With n = %lld terms and %d threads,\n", n, thread_count);
52 |    printf("   Our estimate of pi = %.14f\n", sum);
53 |    printf("                   pi = %.14f\n", 4.0*atan(1.0));
54 |    return 0;
55 | }  /* main */
56 | 
57 | /*------------------------------------------------------------------
58 |  * Function:  Usage
59 |  * Purpose:   Print a message explaining how to run the program
60 |  * In arg:    prog_name
61 |  */
62 | void Usage(char* prog_name) {
63 |    fprintf(stderr, "usage: %s <thread_count> <n>\n", prog_name);  /* Change */
64 |    fprintf(stderr, "   thread_count is the number of threads >= 1\n");  /* Change */
65 |    fprintf(stderr, "   n is the number of terms and should be >= 1\n");
66 |    exit(0);
67 | }  /* Usage */
68 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_private.c:
--------------------------------------------------------------------------------
 1 | /* File:     omp_private.c
 2 |  *
 3 |  * Purpose:  Print the value of a private variable at the beginning
 4 |  *           of a parallel block and after the end of the block
 5 |  *
 6 |  * Compile:  gcc -g -Wall -fopenmp -o omp_private omp_private.c
 7 |  * Run:      ./omp_private <number of threads>
 8 |  * 
 9 |  * Input:    none
10 |  * Output:   Value of int at various points in the program
11 |  *
12 |  * IPP:      Section 5.5.4 (p. 231)
13 |  */
14 | #include <stdio.h>
15 | #include <stdlib.h>
16 | #include <omp.h>   
17 | 
18 | /*--------------------------------------------------------------------*/
19 | int main(int argc, char* argv[]) {
20 |    int x = 5;
21 |    int thread_count = strtol(argv[1], NULL, 10); 
22 | 
23 | #  pragma omp parallel num_threads(thread_count) \
24 |       private(x)
25 |    {
26 |       int my_rank = omp_get_thread_num();
27 |       printf("Thread %d > before initialization, x = %d\n", 
28 |             my_rank, x);
29 |       x = 2*my_rank + 2;
30 |       printf("Thread %d > after initialization, x = %d\n", 
31 |             my_rank, x);
32 |    }
33 |    printf("After parallel block, x = %d\n", x);
34 | 
35 |    return 0; 
36 | }  /* main */
37 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_tokenize.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    omp_tokenize.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Try to use threads to tokenize text input.  Illustrate problems 
  6 |  *    with function that isn't threadsafe.  This program has a serious
  7 |  *    bug.
  8 |  *
  9 |  * Compile:
 10 |  *    gcc -g -Wall -fopenmp -o omp_tokenize omp_tokenize.c
 11 |  * Usage:
 12 |  *    omp_tokenize <thread_count> < <input>
 13 |  *
 14 |  * Input:
 15 |  *    Lines of text
 16 |  * (Desired) Output:
 17 |  *    For each line of input:
 18 |  *       the line read by the program, and the tokens identified by 
 19 |  *       strtok
 20 |  *
 21 |  * Algorithm:
 22 |  *    For each line of input, next thread reads the line and
 23 |  *    "tokenizes" it.
 24 |  *
 25 |  * IPP:   Section 5.10 (pp. 256 and ff.)
 26 |  */
 27 | 
 28 | #include <stdio.h>
 29 | #include <stdlib.h>
 30 | #include <string.h>
 31 | #include <omp.h>
 32 | 
 33 | const int MAX_LINES = 1000;
 34 | const int MAX_LINE = 80;
 35 | 
 36 | void Usage(char* prog_name);
 37 | void Get_text(char* lines[], int* line_count_p);
 38 | void Tokenize(char* lines[], int line_count, int thread_count); 
 39 | 
 40 | /*--------------------------------------------------------------------*/
 41 | int main(int argc, char* argv[]) {
 42 |    int thread_count, i;
 43 |    char* lines[1000];
 44 |    int line_count;
 45 | 
 46 |    if (argc != 2) Usage(argv[0]);
 47 |    thread_count = strtol(argv[1], NULL, 10);
 48 | 
 49 |    printf("Enter text\n");
 50 |    Get_text(lines, &line_count);
 51 |    Tokenize(lines, line_count, thread_count);
 52 | 
 53 |    for (i = 0; i < line_count; i++)
 54 |       if (lines[i] != NULL) free(lines[i]);
 55 | 
 56 |    return 0;
 57 | }  /* main */
 58 | 
 59 | 
 60 | /*--------------------------------------------------------------------
 61 |  * Function:    Usage
 62 |  * Purpose:     Print command line for function and terminate
 63 |  * In arg:      prog_name
 64 |  */
 65 | void Usage(char* prog_name) {
 66 | 
 67 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 68 |    exit(0);
 69 | }  /* Usage */
 70 | 
 71 | /*--------------------------------------------------------------------
 72 |  * Function:  Get_text
 73 |  * Purpose:   Read text and store as an array of strings, one per line
 74 |  *            of input text
 75 |  * Out args:  lines, line_count_p
 76 |  */
 77 | void Get_text(char* lines[], int* line_count_p) {
 78 |    char* line = malloc(MAX_LINE*sizeof(char));
 79 |    int i = 0;
 80 |    char* fg_rv;
 81 | 
 82 |    fg_rv = fgets(line, MAX_LINE, stdin);
 83 |    while (fg_rv != NULL) {
 84 |       lines[i++] = line;
 85 |       line = malloc(MAX_LINE*sizeof(char));
 86 |       fg_rv = fgets(line, MAX_LINE, stdin);
 87 |    }
 88 |    *line_count_p = i;
 89 | }  /* Get_text */
 90 | 
 91 | /*-------------------------------------------------------------------
 92 |  * Function:    Tokenize
 93 |  * Purpose:     Tokenize lines of input
 94 |  * In args:     line_count, thread_count
 95 |  * In/out arg:  lines
 96 |  */
 97 | void Tokenize(
 98 |       char*  lines[]       /* in/out */, 
 99 |       int    line_count    /* in     */, 
100 |       int    thread_count  /* in     */) {
101 |    int my_rank, i, j;
102 |    char *my_token;
103 | 
104 | #  pragma omp parallel num_threads(thread_count) \
105 |       default(none) private(my_rank, i, j, my_token) shared(lines, line_count)
106 |    {
107 |       my_rank = omp_get_thread_num();
108 | #     pragma omp for schedule(static, 1)
109 |       for (i = 0; i < line_count; i++) {
110 |          printf("Thread %d > line %d = %s", my_rank, i, lines[i]);
111 |          j = 0; 
112 |          my_token = strtok(lines[i], " \t\n");
113 |          while ( my_token != NULL ) {
114 |             printf("Thread %d > token %d = %s\n", my_rank, j, my_token);
115 |             my_token = strtok(NULL, " \t\n");
116 |             j++;
117 |          } 
118 |       if (lines[i] != NULL) 
119 |          printf("Thread %d > After tokenizing, my line = %s\n",
120 |             my_rank, lines[i]);
121 |       } /* for i */
122 |    }  /* omp parallel */
123 | 
124 | }  /* Tokenize */
125 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_tokenize_r.c:
--------------------------------------------------------------------------------
  1 | /* File:  
  2 |  *    omp_tokenize_r.c
  3 |  *
  4 |  * Purpose:
  5 |  *    Try to use threads to tokenize text input.  This version
  6 |  *    uses the thread safe tokenizer strtok_r.
  7 |  *
  8 |  * Compile:
  9 |  *    gcc -g -Wall -fopenmp -o omp_tokenize_r omp_tokenize_r.c
 10 |  * Usage:
 11 |  *    omp_tokenize_r <thread_count> < <input>
 12 |  *
 13 |  * Input:
 14 |  *    Lines of text
 15 |  * Output:
 16 |  *    For each line of input:
 17 |  *       the line read by the program, and the tokens identified by 
 18 |  *       strtok
 19 |  *
 20 |  * Algorithm:
 21 |  *    For each line of input, next thread reads the line and
 22 |  *    "tokenizes" it.
 23 |  *
 24 |  * IPP:  Section 5.10 (p. 258)
 25 |  */
 26 | 
 27 | #include <stdio.h>
 28 | #include <stdlib.h>
 29 | #include <string.h>
 30 | #include <omp.h>
 31 | 
 32 | const int MAX_LINES = 1000;
 33 | const int MAX_LINE = 80;
 34 | 
 35 | void Usage(char* prog_name);
 36 | void Get_text(char* lines[], int* line_count_p);
 37 | void Tokenize(char* lines[], int line_count, int thread_count); 
 38 | 
 39 | /*--------------------------------------------------------------------*/
 40 | int main(int argc, char* argv[]) {
 41 |    int thread_count, i;
 42 |    char* lines[1000];
 43 |    int line_count;
 44 | 
 45 |    if (argc != 2) Usage(argv[0]);
 46 |    thread_count = strtol(argv[1], NULL, 10);
 47 | 
 48 |    printf("Enter text\n");
 49 |    Get_text(lines, &line_count);
 50 |    Tokenize(lines, line_count, thread_count);
 51 | 
 52 |    for (i = 0; i < line_count; i++)
 53 |       if (lines[i] != NULL) free(lines[i]);
 54 | 
 55 |    return 0;
 56 | }  /* main */
 57 | 
 58 | 
 59 | /*--------------------------------------------------------------------
 60 |  * Function:    Usage
 61 |  * Purpose:     Print command line for function and terminate
 62 |  * In arg:      prog_name
 63 |  */
 64 | void Usage(char* prog_name) {
 65 | 
 66 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 67 |    exit(0);
 68 | }  /* Usage */
 69 | 
 70 | /*--------------------------------------------------------------------
 71 |  * Function:  Get_text
 72 |  * Purpose:   Read text and store as an array of strings, one per line
 73 |  *            of input text
 74 |  * Out args:  lines, line_count_p
 75 |  */
 76 | void Get_text(char* lines[], int* line_count_p) {
 77 |    char* line = malloc(MAX_LINE*sizeof(char));
 78 |    int i = 0;
 79 |    char* fg_rv;
 80 | 
 81 |    fg_rv = fgets(line, MAX_LINE, stdin);
 82 |    while (fg_rv != NULL) {
 83 |       lines[i++] = line;
 84 |       line = malloc(MAX_LINE*sizeof(char));
 85 |       fg_rv = fgets(line, MAX_LINE, stdin);
 86 |    }
 87 |    *line_count_p = i;
 88 | }  /* Get_text */
 89 | 
 90 | /*-------------------------------------------------------------------
 91 |  * Function:    Tokenize
 92 |  * Purpose:     Tokenize lines of input
 93 |  * In args:     line_count, thread_count
 94 |  * In/out arg:  lines
 95 |  */
 96 | void Tokenize(
 97 |       char*  lines[]       /* in/out */, 
 98 |       int    line_count    /* in     */, 
 99 |       int    thread_count  /* in     */) {
100 |    int my_rank, i, j;
101 |    char *my_token, *saveptr;
102 | 
103 | #  pragma omp parallel num_threads(thread_count) \
104 |       default(none) private(my_rank, i, j, my_token, saveptr) \
105 |       shared(lines, line_count)
106 |    {
107 |       my_rank = omp_get_thread_num();
108 | #     pragma omp for schedule(static, 1)
109 |       for (i = 0; i < line_count; i++) {
110 |          printf("Thread %d > line %d = %s", my_rank, i, lines[i]);
111 |          j = 0; 
112 |          my_token = strtok_r(lines[i], " \t\n", &saveptr);
113 |          while ( my_token != NULL ) {
114 |             printf("Thread %d > token %d = %s\n", my_rank, j, my_token);
115 |             my_token = strtok_r(NULL, " \t\n", &saveptr);
116 |             j++;
117 |          } 
118 |       if (lines[i] != NULL) 
119 |          printf("Thread %d > After tokenizing, my line = %s\n",
120 |             my_rank, lines[i]);
121 |       } /* for i */
122 |    }  /* omp parallel */
123 | 
124 | }  /* Tokenize */
125 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap1.c:
--------------------------------------------------------------------------------
  1 | /* File:    omp_trap1.c
  2 |  * Purpose: Estimate definite integral (or area under curve) using trapezoidal 
  3 |  *          rule.
  4 |  *
  5 |  * Input:   a, b, n
  6 |  * Output:  estimate of integral from a to b of f(x)
  7 |  *          using n trapezoids.
  8 |  *
  9 |  * Compile: gcc -g -Wall -fopenmp -o omp_trap1 omp_trap1.c
 10 |  * Usage:   ./omp_trap1 <number of threads>
 11 |  *
 12 |  * Notes:   
 13 |  *   1.  The function f(x) is hardwired.
 14 |  *   2.  In this version, each thread explicitly computes the integral
 15 |  *       over its assigned subinterval, a critical directive is used
 16 |  *       for the global sum.
 17 |  *   3.  This version assumes that n is evenly divisible by the 
 18 |  *       number of threads
 19 |  *
 20 |  * IPP:  Section 5.2.1 (pp. 216 and ff.)
 21 |  */
 22 | 
 23 | #include <stdio.h>
 24 | #include <stdlib.h>
 25 | #include <math.h>
 26 | #include <omp.h>
 27 | 
 28 | void Usage(char* prog_name);
 29 | double f(double x);    /* Function we're integrating */
 30 | void Trap(double a, double b, int n, double* global_result_p);
 31 | 
 32 | int main(int argc, char* argv[]) {
 33 |    double  global_result = 0.0;  /* Store result in global_result */
 34 |    double  a, b;                 /* Left and right endpoints      */
 35 |    int     n;                    /* Total number of trapezoids    */
 36 |    int     thread_count;
 37 | 
 38 |    if (argc != 2) Usage(argv[0]);
 39 |    thread_count = strtol(argv[1], NULL, 10);
 40 |    printf("Enter a, b, and n\n");
 41 |    scanf("%lf %lf %d", &a, &b, &n);
 42 |    if (n % thread_count != 0) Usage(argv[0]);
 43 | #  pragma omp parallel num_threads(thread_count) 
 44 |    Trap(a, b, n, &global_result);
 45 | 
 46 |    printf("With n = %d trapezoids, our estimate\n", n);
 47 |    printf("of the integral from %f to %f = %.14e\n",
 48 |       a, b, global_result);
 49 |    return 0;
 50 | }  /* main */
 51 | 
 52 | /*--------------------------------------------------------------------
 53 |  * Function:    Usage
 54 |  * Purpose:     Print command line for function and terminate
 55 |  * In arg:      prog_name
 56 |  */
 57 | void Usage(char* prog_name) {
 58 | 
 59 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 60 |    fprintf(stderr, "   number of trapezoids must be evenly divisible by\n");
 61 |    fprintf(stderr, "   number of threads\n");
 62 |    exit(0);
 63 | }  /* Usage */
 64 | 
 65 | /*------------------------------------------------------------------
 66 |  * Function:    f
 67 |  * Purpose:     Compute value of function to be integrated
 68 |  * Input arg:   x
 69 |  * Return val:  f(x)
 70 |  */
 71 | double f(double x) {
 72 |    double return_val;
 73 | 
 74 |    return_val = x*x;
 75 |    return return_val;
 76 | }  /* f */
 77 | 
 78 | /*------------------------------------------------------------------
 79 |  * Function:    Trap
 80 |  * Purpose:     Use trapezoidal rule to estimate definite integral
 81 |  * Input args:  
 82 |  *    a: left endpoint
 83 |  *    b: right endpoint
 84 |  *    n: number of trapezoids
 85 |  * Output arg:
 86 |  *    integral:  estimate of integral from a to b of f(x)
 87 |  */
 88 | void Trap(double a, double b, int n, double* global_result_p) {
 89 |    double  h, x, my_result;
 90 |    double  local_a, local_b;
 91 |    int  i, local_n;
 92 |    int my_rank = omp_get_thread_num();
 93 |    int thread_count = omp_get_num_threads();
 94 | 
 95 |    h = (b-a)/n; 
 96 |    local_n = n/thread_count;  
 97 |    local_a = a + my_rank*local_n*h; 
 98 |    local_b = local_a + local_n*h; 
 99 |    my_result = (f(local_a) + f(local_b))/2.0; 
100 |    for (i = 1; i <= local_n-1; i++) {
101 |      x = local_a + i*h;
102 |      my_result += f(x);
103 |    }
104 |    my_result = my_result*h; 
105 | 
106 | #  pragma omp critical 
107 |    *global_result_p += my_result; 
108 | }  /* Trap */
109 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap2a.c:
--------------------------------------------------------------------------------
  1 | /* File:    omp_trap2a.c
  2 |  * Purpose: Estimate definite integral (or area under curve) using trapezoidal 
  3 |  *          rule.  This version uses a hand-coded reduction after the function
  4 |  *          call.
  5 |  *
  6 |  * Input:   a, b, n
  7 |  * Output:  estimate of integral from a to b of f(x)
  8 |  *          using n trapezoids.
  9 |  *
 10 |  * Compile: gcc -g -Wall -fopenmp -o omp_trap2a omp_trap2a.c -lm
 11 |  * Usage:   ./omp_trap2a <number of threads>
 12 |  *
 13 |  * Notes:   
 14 |  *   1.  The function f(x) is hardwired.
 15 |  *   2.  This version assumes that n is evenly divisible by the 
 16 |  *       number of threads
 17 |  * IPP:  Section 5.4 (p. 222)
 18 |  */
 19 | 
 20 | #include <stdio.h>
 21 | #include <stdlib.h>
 22 | #include <math.h>
 23 | #include <omp.h>
 24 | 
 25 | void Usage(char* prog_name);
 26 | double f(double x);    /* Function we're integrating */
 27 | double Local_trap(double a, double b, int n);
 28 | 
 29 | int main(int argc, char* argv[]) {
 30 |    double  global_result;        /* Store result in global_result */
 31 |    double  a, b;                 /* Left and right endpoints      */
 32 |    int     n;                    /* Total number of trapezoids    */
 33 |    int     thread_count;
 34 | 
 35 |    if (argc != 2) Usage(argv[0]);
 36 |    thread_count = strtol(argv[1], NULL, 10);
 37 |    printf("Enter a, b, and n\n");
 38 |    scanf("%lf %lf %d", &a, &b, &n);
 39 |    if (n % thread_count != 0) Usage(argv[0]);
 40 | 
 41 |    global_result = 0.0;
 42 | #  pragma omp parallel num_threads(thread_count) 
 43 |    {
 44 |       double my_result = 0.0;
 45 |       my_result += Local_trap(a, b, n);
 46 | #     pragma omp critical
 47 |       global_result += my_result;
 48 |    }
 49 | 
 50 |    printf("With n = %d trapezoids, our estimate\n", n);
 51 |    printf("of the integral from %f to %f = %.14e\n",
 52 |       a, b, global_result);
 53 |    return 0;
 54 | }  /* main */
 55 | 
 56 | /*--------------------------------------------------------------------
 57 |  * Function:    Usage
 58 |  * Purpose:     Print command line for function and terminate
 59 |  * In arg:      prog_name
 60 |  */
 61 | void Usage(char* prog_name) {
 62 | 
 63 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 64 |    fprintf(stderr, "   number of trapezoids must be evenly divisible by\n");
 65 |    fprintf(stderr, "   number of threads\n");
 66 |    exit(0);
 67 | }  /* Usage */
 68 | 
 69 | /*------------------------------------------------------------------
 70 |  * Function:    f
 71 |  * Purpose:     Compute value of function to be integrated
 72 |  * Input arg:   x
 73 |  * Return val:  f(x)
 74 |  */
 75 | double f(double x) {
 76 |    double return_val;
 77 | 
 78 |    return_val = x*x;
 79 |    return return_val;
 80 | }  /* f */
 81 | 
 82 | /*------------------------------------------------------------------
 83 |  * Function:    Local_trap
 84 |  * Purpose:     Use trapezoidal rule to estimate part of a definite 
 85 |  *              integral
 86 |  * Input args:  
 87 |  *    a: left endpoint
 88 |  *    b: right endpoint
 89 |  *    n: number of trapezoids
 90 |  * Return val:  estimate of integral from local_a to local_b
 91 |  *
 92 |  * Note:        return value should be added in to an OpenMP
 93 |  *              reduction variable to get estimate of entire
 94 |  *              integral
 95 |  */
 96 | double Local_trap(double a, double b, int n) {
 97 |    double  h, x, my_result;
 98 |    double  local_a, local_b;
 99 |    int  i, local_n;
100 |    int my_rank = omp_get_thread_num();
101 |    int thread_count = omp_get_num_threads();
102 | 
103 |    h = (b-a)/n; 
104 |    local_n = n/thread_count;  
105 |    local_a = a + my_rank*local_n*h; 
106 |    local_b = local_a + local_n*h; 
107 |    my_result = (f(local_a) + f(local_b))/2.0; 
108 |    for (i = 1; i <= local_n-1; i++) {
109 |      x = local_a + i*h;
110 |      my_result += f(x);
111 |    }
112 |    my_result = my_result*h; 
113 | 
114 |    return my_result;
115 | }  /* Trap */
116 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap2b.c:
--------------------------------------------------------------------------------
  1 | /* File:    omp_trap2b.c
  2 |  * Purpose: Estimate definite integral (or area under curve) using trapezoidal 
  3 |  *          rule.  This version uses a reduction clause.
  4 |  *
  5 |  * Input:   a, b, n
  6 |  * Output:  estimate of integral from a to b of f(x)
  7 |  *          using n trapezoids.
  8 |  *
  9 |  * Compile: gcc -g -Wall -fopenmp -o omp_trap2b omp_trap2b.c
 10 |  * Usage:   ./omp_trap2b <number of threads>
 11 |  *
 12 |  * Notes:   
 13 |  *   1.  The function f(x) is hardwired.
 14 |  *   2.  This version assumes that n is evenly divisible by the 
 15 |  *       number of threads
 16 |  *
 17 |  * IPP:  Section 5.4 (pp. 223 and ff.)
 18 |  */
 19 | 
 20 | #include <stdio.h>
 21 | #include <stdlib.h>
 22 | #include <math.h>
 23 | #include <omp.h>
 24 | 
 25 | void Usage(char* prog_name);
 26 | double f(double x);    /* Function we're integrating */
 27 | double Local_trap(double a, double b, int n);
 28 | 
 29 | int main(int argc, char* argv[]) {
 30 |    double  global_result = 0.0;  /* Store result in global_result */
 31 |    double  a, b;                 /* Left and right endpoints      */
 32 |    int     n;                    /* Total number of trapezoids    */
 33 |    int     thread_count;
 34 | 
 35 |    if (argc != 2) Usage(argv[0]);
 36 |    thread_count = strtol(argv[1], NULL, 10);
 37 |    printf("Enter a, b, and n\n");
 38 |    scanf("%lf %lf %d", &a, &b, &n);
 39 |    if (n % thread_count != 0) Usage(argv[0]);
 40 | 
 41 | #  pragma omp parallel num_threads(thread_count) \
 42 |       reduction(+: global_result)
 43 |    global_result += Local_trap(a, b, n);
 44 | 
 45 |    printf("With n = %d trapezoids, our estimate\n", n);
 46 |    printf("of the integral from %f to %f = %.14e\n",
 47 |       a, b, global_result);
 48 |    return 0;
 49 | }  /* main */
 50 | 
 51 | /*--------------------------------------------------------------------
 52 |  * Function:    Usage
 53 |  * Purpose:     Print command line for function and terminate
 54 |  * In arg:      prog_name
 55 |  */
 56 | void Usage(char* prog_name) {
 57 | 
 58 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
 59 |    fprintf(stderr, "   number of trapezoids must be evenly divisible by\n");
 60 |    fprintf(stderr, "   number of threads\n");
 61 |    exit(0);
 62 | }  /* Usage */
 63 | 
 64 | /*------------------------------------------------------------------
 65 |  * Function:    f
 66 |  * Purpose:     Compute value of function to be integrated
 67 |  * Input arg:   x
 68 |  * Return val:  f(x)
 69 |  */
 70 | double f(double x) {
 71 |    double return_val;
 72 | 
 73 |    return_val = x*x;
 74 |    return return_val;
 75 | }  /* f */
 76 | 
 77 | /*------------------------------------------------------------------
 78 |  * Function:    Local_trap
 79 |  * Purpose:     Use trapezoidal rule to estimate part of a definite 
 80 |  *              integral
 81 |  * Input args:  
 82 |  *    a: left endpoint
 83 |  *    b: right endpoint
 84 |  *    n: number of trapezoids
 85 |  * Return val:  estimate of integral from local_a to local_b
 86 |  *
 87 |  * Note:        return value should be added in to an OpenMP
 88 |  *              reduction variable to get estimate of entire
 89 |  *              integral
 90 |  */
 91 | double Local_trap(double a, double b, int n) {
 92 |    double  h, x, my_result;
 93 |    double  local_a, local_b;
 94 |    int  i, local_n;
 95 |    int my_rank = omp_get_thread_num();
 96 |    int thread_count = omp_get_num_threads();
 97 | 
 98 |    h = (b-a)/n; 
 99 |    local_n = n/thread_count;  
100 |    local_a = a + my_rank*local_n*h; 
101 |    local_b = local_a + local_n*h; 
102 |    my_result = (f(local_a) + f(local_b))/2.0; 
103 |    for (i = 1; i <= local_n-1; i++) {
104 |      x = local_a + i*h;
105 |      my_result += f(x);
106 |    }
107 |    my_result = my_result*h; 
108 | 
109 |    return my_result;
110 | }  /* Trap */
111 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/omp_trap3.c:
--------------------------------------------------------------------------------
 1 | /* File:    omp_trap3.c
 2 |  * Purpose: Estimate definite integral (or area under curve) using the
 3 |  *          trapezoidal rule.  This version uses a parallel for directive
 4 |  *
 5 |  * Input:   a, b, n
 6 |  * Output:  estimate of integral from a to b of f(x)
 7 |  *          using n trapezoids.
 8 |  *
 9 |  * Compile: gcc -g -Wall -fopenmp -o omp_trap3 omp_trap3.c
10 |  * Usage:   ./omp_trap3 <number of threads>
11 |  *
12 |  * Notes:   
13 |  *   1.  The function f(x) is hardwired.
14 |  *   2.  In this version, it's not necessary for n to be
15 |  *       evenly divisible by thread_count.
16 |  *
17 |  * IPP:  Section 5.5 (pp. 224 and ff.)
18 |  */
19 | 
20 | #include <stdio.h>
21 | #include <stdlib.h>
22 | #include <math.h>
23 | #include <omp.h>
24 | 
25 | void Usage(char* prog_name);
26 | double f(double x);    /* Function we're integrating */
27 | double Trap(double a, double b, int n, int thread_count);
28 | 
29 | int main(int argc, char* argv[]) {
30 |    double  global_result = 0.0;  /* Store result in global_result */
31 |    double  a, b;                 /* Left and right endpoints      */
32 |    int     n;                    /* Total number of trapezoids    */
33 |    int     thread_count;
34 | 
35 |    if (argc != 2) Usage(argv[0]);
36 |    thread_count = strtol(argv[1], NULL, 10);
37 |    printf("Enter a, b, and n\n");
38 |    scanf("%lf %lf %d", &a, &b, &n);
39 | 
40 |    global_result = Trap(a, b, n, thread_count);
41 | 
42 |    printf("With n = %d trapezoids, our estimate\n", n);
43 |    printf("of the integral from %f to %f = %.14e\n",
44 |       a, b, global_result);
45 |    return 0;
46 | }  /* main */
47 | 
48 | /*--------------------------------------------------------------------
49 |  * Function:    Usage
50 |  * Purpose:     Print command line for function and terminate
51 |  * In arg:      prog_name
52 |  */
53 | void Usage(char* prog_name) {
54 | 
55 |    fprintf(stderr, "usage: %s <number of threads>\n", prog_name);
56 |    exit(0);
57 | }  /* Usage */
58 | 
59 | /*------------------------------------------------------------------
60 |  * Function:    f
61 |  * Purpose:     Compute value of function to be integrated
62 |  * Input arg:   x
63 |  * Return val:  f(x)
64 |  */
65 | double f(double x) {
66 |    double return_val;
67 | 
68 |    return_val = x*x;
69 |    return return_val;
70 | }  /* f */
71 | 
72 | /*------------------------------------------------------------------
73 |  * Function:    Trap
74 |  * Purpose:     Use trapezoidal rule to estimate definite integral
75 |  * Input args:  
76 |  *    a: left endpoint
77 |  *    b: right endpoint
78 |  *    n: number of trapezoids
79 |  * Return val:
80 |  *    approx:  estimate of integral from a to b of f(x)
81 |  */
82 | double Trap(double a, double b, int n, int thread_count) {
83 |    double  h, approx;
84 |    int  i;
85 | 
86 |    h = (b-a)/n; 
87 |    approx = (f(a) + f(b))/2.0; 
88 | #  pragma omp parallel for num_threads(thread_count) \
89 |       reduction(+: approx)
90 |    for (i = 1; i <= n-1; i++)
91 |      approx += f(a + i*h);
92 |    approx = h*approx; 
93 | 
94 |    return approx;
95 | }  /* Trap */
96 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch5/trap.c:
--------------------------------------------------------------------------------
 1 | /* File:    trap.c
 2 |  * Purpose: Calculate definite integral using trapezoidal 
 3 |  *          rule.
 4 |  *
 5 |  * Input:   a, b, n
 6 |  * Output:  Estimate of integral from a to b of f(x)
 7 |  *          using n trapezoids.
 8 |  *
 9 |  * Compile: gcc -g -Wall -o trap trap.c
10 |  * Usage:   ./trap
11 |  *
12 |  * Note:    The function f(x) is hardwired.
13 |  *
14 |  * IPP:     Section 3.2.1 (pp. 94 and ff.) and 5.2 (p. 216)
15 |  */
16 | 
17 | #include <stdio.h>
18 | 
19 | double f(double x);    /* Function we're integrating */
20 | double Trap(double a, double b, int n, double h);
21 | 
22 | int main(void) {
23 |    double  integral;   /* Store result in integral   */
24 |    double  a, b;       /* Left and right endpoints   */
25 |    int     n;          /* Number of trapezoids       */
26 |    double  h;          /* Height of trapezoids       */
27 | 
28 |    printf("Enter a, b, and n\n");
29 |    scanf("%lf", &a);
30 |    scanf("%lf", &b);
31 |    scanf("%d", &n);
32 | 
33 |    h = (b-a)/n;
34 |    integral = Trap(a, b, n, h);
35 |    
36 |    printf("With n = %d trapezoids, our estimate\n", n);
37 |    printf("of the integral from %f to %f = %.15f\n",
38 |       a, b, integral);
39 | 
40 |    return 0;
41 | }  /* main */
42 | 
43 | /*------------------------------------------------------------------
44 |  * Function:    Trap
45 |  * Purpose:     Estimate integral from a to b of f using trap rule and
46 |  *              n trapezoids
47 |  * Input args:  a, b, n, h
48 |  * Return val:  Estimate of the integral 
49 |  */
50 | double Trap(double a, double b, int n, double h) {
51 |    double integral;
52 |    int k;
53 | 
54 |    integral = (f(a) + f(b))/2.0;
55 |    for (k = 1; k <= n-1; k++) {
56 |      integral += f(a+k*h);
57 |    }
58 |    integral = integral*h;
59 | 
60 |    return integral;
61 | }  /* Trap */
62 | 
63 | /*------------------------------------------------------------------
64 |  * Function:    f
65 |  * Purpose:     Compute value of function to be integrated
66 |  * Input args:  x
67 |  */
68 | double f(double x) {
69 |    double return_val;
70 | 
71 |    return_val = x*x;
72 |    return return_val;
73 | }  /* f */
74 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/frac.h:
--------------------------------------------------------------------------------
 1 | /* File:     frac.h
 2 |  * Purpose:  Header file for frac.c, which implement common fractions and 
 3 |  *           certain operations on common fractions in which the denominator 
 4 |  *           is a power of 2
 5 |  *
 6 |  * IPP:      Section 6.2.12  (pp. 331 and ff.)
 7 |  */
 8 | #ifndef _FRAC_H_
 9 | #define _FRAC_H_
10 | 
11 | typedef struct {
12 |    char*    num;            // bit array representing numerator
13 |    unsigned denom;          // base 2 log of denominator
14 |    int      alloc;          // size of bit array
15 |    int      least_sig_bit;  // first nonzero bit
16 |    int      most_sig_bit;   // last nonzero bit
17 | }  frac_struct;
18 | typedef frac_struct* frac_t;
19 | 
20 | frac_t Alloc_frac(void);
21 | void Free_frac(frac_t frac);
22 | void Add(frac_t frac1, unsigned frac2);
23 | void Left_shift_num(frac_t frac, unsigned b);
24 | void Add_to_num(frac_t frac, unsigned power);
25 | void Reduce(frac_t frac);
26 | void Right_shift_num(frac_t frac, int bits);
27 | void Find_sig_bits(frac_t frac);
28 | int Equals(frac_t frac, unsigned val);
29 | int Equals_bit_array(frac_t frac, unsigned val);
30 | unsigned Convert_num_to_unsigned(frac_t frac);
31 | void Print_frac(frac_t frac, int my_rank, char title[]);
32 | 
33 | void Debug_print_frac(frac_t frac);
34 | void Assign(frac_t frac, unsigned num, unsigned denom);
35 | #endif
36 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e:
--------------------------------------------------------------------------------
 1 |  17
 2 |  0  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  1
 3 |  2  0  2  1  2  2  2  2  2  2  2  2  2  2  2  2  2
 4 |  2  2  0  2  1  2  2  2  2  2  2  2  2  2  2  2  2
 5 |  2  2  2  0  2  1  2  2  2  2  2  2  2  2  2  2  2
 6 |  2  2  2  2  0  2  1  2  2  2  2  2  2  2  2  2  2
 7 |  2  2  2  2  2  0  2  1  2  2  2  2  2  2  2  2  2
 8 |  2  2  2  2  2  2  0  2  1  2  2  2  2  2  2  2  2
 9 |  2  2  2  2  2  2  2  0  2  1  2  2  2  2  2  2  2
10 |  2  2  2  2  2  2  2  2  0  2  1  2  2  2  2  2  2
11 |  2  2  2  2  2  2  2  2  2  0  2  1  2  2  2  2  2
12 |  2  2  2  2  2  2  2  2  2  2  0  2  1  2  2  2  2
13 |  2  2  2  2  2  2  2  2  2  2  2  0  2  1  2  2  2
14 |  2  2  2  2  2  2  2  2  2  2  2  2  0  2  1  2  2
15 |  2  2  2  2  2  2  2  2  2  2  2  2  2  0  2  1  2
16 |  1  2  2  2  2  2  2  2  2  2  2  2  2  2  0  2  2
17 |  2  2  1  2  2  2  2  2  2  2  2  2  2  2  2  0  2
18 |  2  1  2  2  2  2  2  2  2  2  2  2  2  2  2  2  0
19 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-mpi-dyn-spl20-cut5.out:
--------------------------------------------------------------------------------
1 | Proc 0 > Best tour 0x1002344b0: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 
2 | 
3 | Cost = 17
4 | Elapsed time = 4.528709e+03 seconds
5 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-mpi-dyn-spl8-cut8.out:
--------------------------------------------------------------------------------
1 | Proc 0 > Best tour 0x100234480: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 
2 | 
3 | Cost = 17
4 | Elapsed time = 4.594413e+03 seconds
5 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-mpi-stat.out:
--------------------------------------------------------------------------------
1 | Proc 0 > Best tour 0x1002342d0: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 
2 | 
3 | Cost = 17
4 | Elapsed time = 1.748393e+03 seconds
5 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-pth-dyn-8-spl.out:
--------------------------------------------------------------------------------
1 | Best tour: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 
2 | 
3 | Cost = 17
4 | Elapsed time = 1.633299e+03 seconds
5 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mat_17e-pth-stat.out:
--------------------------------------------------------------------------------
1 | Best tour: 0 16 1 3 5 7 9 11 13 15 2 4 6 8 10 12 14 0 
2 | 
3 | Cost = 17
4 | Elapsed time = 1.621996e+03 seconds
5 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_dyn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_dyn


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_stat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/mpi_tsp_stat


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_basic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_basic


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red.dSYM/Contents/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | 	<dict>
 5 | 		<key>CFBundleDevelopmentRegion</key>
 6 | 		<string>English</string>
 7 | 		<key>CFBundleIdentifier</key>
 8 | 		<string>com.apple.xcode.dsym.nbody_red</string>
 9 | 		<key>CFBundleInfoDictionaryVersion</key>
10 | 		<string>6.0</string>
11 | 		<key>CFBundlePackageType</key>
12 | 		<string>dSYM</string>
13 | 		<key>CFBundleSignature</key>
14 | 		<string>????</string>
15 | 		<key>CFBundleShortVersionString</key>
16 | 		<string>1.0</string>
17 | 		<key>CFBundleVersion</key>
18 | 		<string>1</string>
19 | 	</dict>
20 | </plist>
21 | 


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red.dSYM/Contents/Resources/DWARF/nbody_red:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/nbody_red.dSYM/Contents/Resources/DWARF/nbody_red


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_dyn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_dyn


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_stat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/pth_tsp_stat


--------------------------------------------------------------------------------
/AnIntroductiontoParallelProgrammingPeterPacheco/ipp-source-use/ch6/timer.h:
--------------------------------------------------------------------------------
 1 | /* File:     timer.h
 2 |  *
 3 |  * Purpose:  Define a macro that returns the number of seconds that 
 4 |  *           have elapsed since some point in the past.  The timer
 5 |  *           should return times with microsecond accuracy.
 6 |  *
 7 |  * Note:     The argument passed to the GET_TIME macro should be
 8 |  *           a double, *not* a pointer to a double.
 9 |  *
10 |  * Example:  
11 |  *    #include "timer.h"
12 |  *    . . .
13 |  *    double start, finish, elapsed;
14 |  *    . . .
15 |  *    GET_TIME(start);
16 |  *    . . .
17 |  *    Code to be timed
18 |  *    . . .
19 |  *    GET_TIME(finish);
20 |  *    elapsed = finish - start;
21 |  *    printf("The code to be timed took %e seconds\n", elapsed);
22 |  *
23 |  * IPP:  Section 3.6.1 (pp. 121 and ff.) and Section 6.1.2 (pp. 273 and ff.)
24 |  */
25 | #ifndef _TIMER_H_
26 | #define _TIMER_H_
27 | 
28 | #include <sys/time.h>
29 | 
30 | /* The argument now should be a double (not a pointer to a double) */
31 | #define GET_TIME(now) { \
32 |    struct timeval t; \
33 |    gettimeofday(&t, NULL); \
34 |    now = t.tv_sec + t.tv_usec/1000000.0; \
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code2/Prime/Solution/performance test/primeMD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code2/Prime/Solution/performance test/primeMD


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code2/Prime/Solution/performance test/result.txt:
--------------------------------------------------------------------------------
 1 | // condition
 2 | 
 3 | 
 4 | // define parameters
 5 | #define NANO           1000000000
 6 | #define Max_Thread_Num 256                                                  // define using how many threads
 7 | #define MAXIMUM        0x7fffffffffffffff
 8 | #define BLOCK_SIZE     65536
 9 | 
10 | 
11 | // global vars
12 | long int n = 30000000;                                                      // how many prime number
13 | 
14 | 
15 | // result
16 | //----------------------------------------------------------------------------------------------------------------
17 | 
18 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ icpc -lrt -lpthread primeMD.cpp -o primeMD
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
20 | serial: found 1857859 primes  cost =    9.1509818700 
21 | mtx   : found 2380569 primes  cost =    1.3733918010    speedup = 6.663053 
22 | atomic: found 2022701 primes  cost =    1.8234812670    speedup = 5.018413 
23 | dup   : found 2191429 primes  cost =    1.2591722410    speedup = 7.267458 
24 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
25 | serial: found 1857859 primes  cost =    9.1485292310 
26 | mtx   : found 2341218 primes  cost =    1.2717440140    speedup = 7.193688 
27 | atomic: found 2213863 primes  cost =    1.2649233010    speedup = 7.232477 
28 | dup   : found 2365562 primes  cost =    1.2777124400    speedup = 7.160085 
29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
30 | serial: found 1857859 primes  cost =    9.1639542380 
31 | mtx   : found 2377058 primes  cost =    1.2748670420    speedup = 7.188165 
32 | atomic: found 2206272 primes  cost =    1.2579527460    speedup = 7.284816 
33 | dup   : found 2155165 primes  cost =    1.9573085450    speedup = 4.681916 
34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
35 | serial: found 1857859 primes  cost =    9.1582943440 
36 | mtx   : found 2265501 primes  cost =    1.3116203400    speedup = 6.982428 
37 | atomic: found 2196891 primes  cost =    1.2740057870    speedup = 7.188581 
38 | dup   : found 2013231 primes  cost =    1.8049689870    speedup = 5.073934 
39 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
40 | serial: found 1857859 primes  cost =    9.1469963760 
41 | mtx   : found 2294324 primes  cost =    1.3071436100    speedup = 6.997698 
42 | atomic: found 2006163 primes  cost =    1.7818589700    speedup = 5.133401 
43 | dup   : found 2375195 primes  cost =    1.3314744890    speedup = 6.869825 
44 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
45 | serial: found 1857859 primes  cost =    9.1476415140 
46 | mtx   : found 2206209 primes  cost =    1.2591002780    speedup = 7.265221 
47 | atomic: found 2216222 primes  cost =    1.2994178030    speedup = 7.039800 
48 | dup   : found 2359980 primes  cost =    1.2721454700    speedup = 7.190720 
49 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
50 | serial: found 1857859 primes  cost =    9.1980578450 
51 | mtx   : found 2374529 primes  cost =    1.3017162810    speedup = 7.066100 
52 | atomic: found 2208447 primes  cost =    1.3069308060    speedup = 7.037907 
53 | dup   : found 2072645 primes  cost =    1.8827981040    speedup = 4.885313 
54 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
55 | serial: found 1857859 primes  cost =    9.1652981570 
56 | mtx   : found 2213662 primes  cost =    1.2680693800    speedup = 7.227758 
57 | atomic: found 2188975 primes  cost =    1.3110168430    speedup = 6.990984 
58 | dup   : found 2173835 primes  cost =    2.0233755280    speedup = 4.529707 
59 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code2/Prime/Solution/result verify/primeMD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code2/Prime/Solution/result verify/primeMD


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code2/Prime/source code/prime:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code2/Prime/source code/prime


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code2/Prime/source code/testResult.txt:
--------------------------------------------------------------------------------
 1 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ icpc -lrt -lpthread prime.cpp -o prime
 2 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
 3 | serial: found 1857859 primes  cost =    9.1461615650 
 4 | mtx   : found 1857859 primes  cost =    1.8300433560    speedup = 4.997784 
 5 | atomic: found 1857859 primes  cost =    1.8292944590    speedup = 4.999830 
 6 | dup   : found 1857859 primes  cost =    1.8282121360    speedup = 5.002790 
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
 8 | serial: found 1857859 primes  cost =    9.1455496690 
 9 | mtx   : found 1857859 primes  cost =    1.8180072810    speedup = 5.030535 
10 | atomic: found 1857859 primes  cost =    1.8124327890    speedup = 5.046008 
11 | dup   : found 1857859 primes  cost =    1.8262520770    speedup = 5.007824 
12 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
13 | serial: found 1857859 primes  cost =    9.1607252410 
14 | mtx   : found 1857859 primes  cost =    1.8387578680    speedup = 4.982018 
15 | atomic: found 1857859 primes  cost =    1.8407987380    speedup = 4.976495 
16 | dup   : found 1857859 primes  cost =    1.8388292810    speedup = 4.981825 
17 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
18 | serial: found 1857859 primes  cost =    9.1694271300 
19 | mtx   : found 1857859 primes  cost =    1.7892989500    speedup = 5.124592 
20 | atomic: found 1857859 primes  cost =    1.8102597470    speedup = 5.065255 
21 | dup   : found 1857859 primes  cost =    1.8344786180    speedup = 4.998383 
22 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
23 | serial: found 1857859 primes  cost =    9.1465545060 
24 | mtx   : found 1857859 primes  cost =    1.8231061690    speedup = 5.017017 
25 | atomic: found 1857859 primes  cost =    1.8275519580    speedup = 5.004812 
26 | dup   : found 1857859 primes  cost =    1.8277835940    speedup = 5.004178 
27 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
28 | serial: found 1857859 primes  cost =    9.1603896740 
29 | mtx   : found 1857859 primes  cost =    1.8346655100    speedup = 4.992948 
30 | atomic: found 1857859 primes  cost =    1.8453990010    speedup = 4.963907 
31 | dup   : found 1857859 primes  cost =    1.8232074830    speedup = 5.024327 
32 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ 
33 | 
34 | 


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code3/multiBody.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <time.h>
  4 | #include <string.h>
  5 | 
  6 | int BodyNum=0;
  7 | int TimeSteps=0;                                                                   
  8 |                                                                    
  9 | int main(int argc, char** argv )
 10 | {
 11 |      
 12 |   int n, t, i, j;
 13 |   double *pBody;//´æ´¢Á£×ÓµÄ»ù±¾ÐÅÏ¢£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ4¸öÁ¬ÐøµÄ¸¡µãÊý£ºmass¡¢x¡¢y¡¢z
 14 |   double *pForce;//´æ´¢Á£×ÓµÄÊÜÁ¦£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ3¸öÁ¬ÐøµÄ¸¡µãÊý£ºFx¡¢Fy¡¢Fz
 15 |   double fac, fx, fy, fz;
 16 |   double dx, dy, dz, sq, dist; 
 17 |   clock_t c_start, c_end;
 18 |   double run_time;
 19 |   char *pStr;
 20 |   FILE *fResult;
 21 | 
 22 |   for ( i=1; i<argc; i++ ) {
 23 | 	  pStr=strstr(argv[i], "-s=");
 24 | 	  if ( pStr!=NULL) sscanf(pStr, "-s=%d", &BodyNum);
 25 | 	  pStr=strstr(argv[i], "-t=");
 26 | 	  if ( pStr!=NULL) sscanf(pStr, "-t=%d", &TimeSteps);
 27 | 
 28 |   }
 29 | 
 30 |   if ( BodyNum*TimeSteps==0) {
 31 | 	  printf("usage: ser_nbody -s=number-of-bodies -t=number-of-steps\n");
 32 | 	  return 0;
 33 |   }
 34 |   c_start = clock(); 
 35 | 
 36 |   pForce = new double[3*BodyNum];
 37 |   pBody = new double[4*BodyNum];
 38 | 
 39 |   /*  Initialize mass and positions in array p to make a test case
 40 |       Initialize force to 0
 41 |   */
 42 |   for ( i=0; i<BodyNum; i++)
 43 |     {
 44 |       *(pBody+4*i) = 10.05 + i;
 45 |       *(pBody+4*i+1) = 30.0*i;
 46 |       *(pBody+4*i+2) = 20.0*i;
 47 |       *(pBody+4*i+3) = 10.0*i;
 48 |       *(pForce+3*i) = 0;
 49 |       *(pForce+3*i+1) = 0;
 50 |       *(pForce+3*i+2) = 0;
 51 |     }
 52 | 
 53 |   t = 0;
 54 |   while ( t<TimeSteps){
 55 |     /*  Loop over points calculating force between each pair.*/
 56 | 
 57 |     for ( i=0; i<BodyNum; i++ )
 58 |       for ( j=i+1; j<BodyNum; j++ )
 59 |         {/*Calculate force between particle i and j according to Newton's Law*/
 60 |           dx = *(pBody+4*i+1) - *(pBody+4*j+1);
 61 |           dy = *(pBody+4*i+2) - *(pBody+4*j+2);
 62 |           dz = *(pBody+4*i+3) - *(pBody+4*j+3);
 63 |           sq = dx*dx + dy*dy + dz*dz;
 64 |           dist = sqrt(sq);
 65 |           fac = (*(pBody+4*i)) * (*(pBody+4*j)) / ( dist * sq );
 66 |           fx = fac * dx;
 67 |           fy = fac * dy;
 68 |           fz = fac * dz;
 69 | 
 70 |           /*Add in force and opposite force to particle i and j */
 71 |           *(pForce+3*i) = *(pForce+3*i) - fx;
 72 |           *(pForce+3*i+1) = *(pForce+3*i+1) - fy;
 73 |           *(pForce+3*i+2) = *(pForce+3*i+2) - fz;
 74 |           *(pForce+3*j) = *(pForce+3*j) + fx;
 75 |           *(pForce+3*j+1) = *(pForce+3*j+1) + fy;
 76 |           *(pForce+3*j+2) = *(pForce+3*j+2) + fz;
 77 |         }
 78 |     for ( i=0; i<BodyNum; i++ ){ 
 79 | 	  *(pBody+4*i+1) = *(pBody+4*i+1) + (*(pForce+3*i)) / (*(pBody+4*i));
 80 | 	  *(pForce+3*i) = 0;
 81 | 	  *(pBody+4*i+2) = *(pBody+4*i+2) + (*(pForce+3*i+1)) / (*(pBody+4*i));
 82 | 	  *(pForce+3*i+1) = 0;
 83 | 	  *(pBody+4*i+3) = *(pBody+4*i+3) + (*(pForce+3*i+2)) / (*(pBody+4*i));
 84 | 	  *(pForce+3*i+2) = 0;
 85 |     }
 86 |     t++;
 87 |   }
 88 | 
 89 | 
 90 |   fResult=fopen("result_ser_nbody.txt", "w");
 91 | 
 92 |   char result[50];
 93 |   for (i=0; i<BodyNum; i++)   { 	  
 94 | 	  sprintf(result, "(%10.4f %10.4f %10.4f %10.4f)\n", *(pBody+4*i), *(pBody+4*i+1), *(pBody+4*i+2), *(pBody+4*i+3));
 95 | 	  fwrite(result, sizeof(char), strlen(result), fResult);
 96 |    }
 97 |    fclose(fResult);
 98 | 
 99 |   delete[] pForce;
100 |   delete[] pBody;  
101 |   
102 |   c_end =clock();
103 |   run_time = (double)( c_end - c_start) / CLOCKS_PER_SEC; 
104 |   printf("runtime is : %f\n", run_time);
105 | }
106 | 


--------------------------------------------------------------------------------
/Homeworks/ExampleCodes/Code5/mpiMC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/ExampleCodes/Code5/mpiMC


--------------------------------------------------------------------------------
/Homeworks/Homework_1/HomeworkDebug/PI:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_1/HomeworkDebug/PI


--------------------------------------------------------------------------------
/Homeworks/Homework_1/HomeworkDebug/PIintel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_1/HomeworkDebug/PIintel


--------------------------------------------------------------------------------
/Homeworks/Homework_1/HomeworkDebug/numstep 1000.dat:
--------------------------------------------------------------------------------
 1 | numstep = 1000
 2 | 
 3 | ----------------------------------------------------------------------------------------------------------------
 4 | GUN C++ result:
 5 | 
 6 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ g++ -lrt PI.cpp -o PI -lpthread
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
 8 | serial: PI=3.141592736923122686  cost=0.0000010379   
 9 | syn:    PI=3.141592736923125351  cost=0.0000572578       speedup=0.018127
10 | asyn:   PI=3.141592736923127127  cost=0.0000274991       speedup=0.037743
11 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
12 | serial: PI=3.141592736923122686  cost=0.0000003886   
13 | syn:    PI=3.141592736923128459  cost=0.0000224216       speedup=0.017332
14 | asyn:   PI=3.141592736923127127  cost=0.0000093448       speedup=0.041585
15 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
16 | serial: PI=3.141592736923122686  cost=0.0000008289   
17 | syn:    PI=3.141592736923124018  cost=0.0000454596       speedup=0.018234
18 | asyn:   PI=3.141592736923127127  cost=0.0000195189       speedup=0.042467
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
20 | serial: PI=3.141592736923122686  cost=0.0000012461   
21 | syn:    PI=3.141592736923125351  cost=0.0000751160       speedup=0.016589
22 | asyn:   PI=3.141592736923127127  cost=0.0000283168       speedup=0.044006
23 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
24 | 
25 | 
26 | -----------------------------------------------------------------------------------------------------------------
27 | Intel C++ result:
28 | 
29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ icpc -lrt PI.cpp -o PIintel -lpthread
30 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
31 | serial: PI=3.141592736923126683  cost=0.0000000738   
32 | syn:    PI=3.141592736923125795  cost=0.0000204523       speedup=0.003608
33 | asyn:   PI=3.141592736923126683  cost=0.0000210150       speedup=0.003512
34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
35 | serial: PI=3.141592736923126683  cost=0.0000000755   
36 | syn:    PI=3.141592736923123130  cost=0.0000217552       speedup=0.003470
37 | asyn:   PI=3.141592736923126683  cost=0.0000087166       speedup=0.008662
38 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
39 | serial: PI=3.141592736923126683  cost=0.0000000919   
40 | syn:    PI=3.141592736923125795  cost=0.0000268792       speedup=0.003419
41 | asyn:   PI=3.141592736923126683  cost=0.0000104282       speedup=0.008813
42 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
43 | serial: PI=3.141592736923126683  cost=0.0000002541   
44 | syn:    PI=3.141592736923122686  cost=0.0000772971       speedup=0.003287
45 | asyn:   PI=3.141592736923126683  cost=0.0000337883       speedup=0.007520
46 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
47 | 
48 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_1/HomeworkDebug/numstep 1000000.dat:
--------------------------------------------------------------------------------
 1 | numstep = 1000000
 2 | 
 3 | ----------------------------------------------------------------------------------------------------------------
 4 | GUN C++ result:
 5 | 
 6 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ g++ -lrt PI.cpp -o PI -lpthread
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
 8 | serial: PI=3.141592653589764250  cost=0.0003543964   
 9 | syn:    PI=3.141592653589845519  cost=0.0098485969       speedup=0.035984
10 | asyn:   PI=3.141592653589871276  cost=0.0001317195       speedup=2.690539
11 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
12 | serial: PI=3.141592653589764250  cost=0.0010452804   
13 | syn:    PI=3.141592653589879269  cost=0.0092117469       speedup=0.113473
14 | asyn:   PI=3.141592653589871276  cost=0.0001322126       speedup=7.906057
15 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
16 | serial: PI=3.141592653589764250  cost=0.0003772998   
17 | syn:    PI=3.141592653589916129  cost=0.0092676635       speedup=0.040711
18 | asyn:   PI=3.141592653589871276  cost=0.0001325238       speedup=2.847034
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
20 | serial: PI=3.141592653589764250  cost=0.0007099929   
21 | syn:    PI=3.141592653589895257  cost=0.0096420047       speedup=0.073635
22 | asyn:   PI=3.141592653589871276  cost=0.0001590840       speedup=4.463006
23 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
24 | 
25 | 
26 | 
27 | 
28 | ----------------------------------------------------------------------------------------------------------------
29 | Intel C++ result:
30 | 
31 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ icpc -lrt PI.cpp -o PIintel -lpthread
32 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
33 | serial: PI=3.141592653589903250  cost=0.0000659315   
34 | syn:    PI=3.141592653589905026  cost=0.0091225747       speedup=0.007227
35 | asyn:   PI=3.141592653589878381  cost=0.0000418219       speedup=1.576483
36 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
37 | serial: PI=3.141592653589903250  cost=0.0000697021   
38 | syn:    PI=3.141592653589889927  cost=0.0089021471       speedup=0.007830
39 | asyn:   PI=3.141592653589878381  cost=0.0000410668       speedup=1.697286
40 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
41 | serial: PI=3.141592653589903250  cost=0.0000743886   
42 | syn:    PI=3.141592653589872608  cost=0.0085270968       speedup=0.008724
43 | asyn:   PI=3.141592653589878381  cost=0.0000376591       speedup=1.975315
44 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
45 | serial: PI=3.141592653589903250  cost=0.0001054100   
46 | syn:    PI=3.141592653589897477  cost=0.0096323882       speedup=0.010943
47 | asyn:   PI=3.141592653589878825  cost=0.0000412095       speedup=2.557905
48 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_1/HomeworkDebug/numstep 1000000000.dat:
--------------------------------------------------------------------------------
 1 | numstep = 1000000000
 2 | 
 3 | ----------------------------------------------------------------------------------------------------------------
 4 | GUN C++ result:
 5 | 
 6 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ g++ -lrt PI.cpp -o PI -lpthread
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
 8 | serial: PI=3.141592653589970752  cost=3.0262435430   
 9 | syn:    PI=3.141592653590014272  cost=96.0214065546      speedup=0.031516
10 | asyn:   PI=3.141592653589768691  cost=0.9690057261       speedup=3.123040
11 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
12 | serial: PI=3.141592653589970752  cost=3.0270991392   
13 | syn:    PI=3.141592653589965867  cost=96.9586887184      speedup=0.031221
14 | asyn:   PI=3.141592653589769135  cost=0.9684171159       speedup=3.125822
15 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
16 | serial: PI=3.141592653589970752  cost=3.9262531529   
17 | syn:    PI=3.141592653590016937  cost=93.0404317600      speedup=0.042199
18 | asyn:   PI=3.141592653589769135  cost=0.9677674304       speedup=4.057021
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
20 | serial: PI=3.141592653589970752  cost=3.0258156693   
21 | syn:    PI=3.141592653589996953  cost=90.9921872340      speedup=0.033254
22 | asyn:   PI=3.141592653589769135  cost=0.9679361918       speedup=3.126049
23 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
24 | 
25 | 
26 | ----------------------------------------------------------------------------------------------------------------
27 | Intel C++ result:
28 | 
29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ icpc -lrt PI.cpp -o PIintel -lpthread
30 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
31 | serial: PI=3.141592653589768247  cost=0.0604297714   
32 | syn:    PI=3.141592653589946327  cost=87.9888211927      speedup=0.000687
33 | asyn:   PI=3.141592653589883266  cost=0.0146258311       speedup=4.131715
34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
35 | serial: PI=3.141592653589768247  cost=0.0618733369   
36 | syn:    PI=3.141592653590016493  cost=87.0125411040      speedup=0.000711
37 | asyn:   PI=3.141592653589883266  cost=0.0145947627       speedup=4.239421
38 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
39 | serial: PI=3.141592653589768247  cost=0.0610866492   
40 | syn:    PI=3.141592653590014272  cost=87.9378264560      speedup=0.000695
41 | asyn:   PI=3.141592653589883710  cost=0.0149004488       speedup=4.099652
42 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
43 | serial: PI=3.141592653589768247  cost=0.9604721446   
44 | syn:    PI=3.141592653590027151  cost=87.9769116144      speedup=0.010917
45 | asyn:   PI=3.141592653589883710  cost=0.0152294994       speedup=63.066560
46 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
47 | serial: PI=3.141592653589768247  cost=0.0607802520   
48 | syn:    PI=3.141592653590011164  cost=87.9901090121      speedup=0.000691
49 | asyn:   PI=3.141592653589883710  cost=0.0147874561       speedup=4.110257
50 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
51 | 
52 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_1/numstep 1000:
--------------------------------------------------------------------------------
 1 | numstep = 1000
 2 | 
 3 | ----------------------------------------------------------------------------------------------------------------
 4 | GUN C++ result:
 5 | 
 6 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ g++ -lrt PI.cpp -o PI -lpthread
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
 8 | serial: PI=3.141592736923122686  cost=0.0000010379   
 9 | syn:    PI=3.141592736923125351  cost=0.0000572578       speedup=0.018127
10 | asyn:   PI=3.141592736923127127  cost=0.0000274991       speedup=0.037743
11 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
12 | serial: PI=3.141592736923122686  cost=0.0000003886   
13 | syn:    PI=3.141592736923128459  cost=0.0000224216       speedup=0.017332
14 | asyn:   PI=3.141592736923127127  cost=0.0000093448       speedup=0.041585
15 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
16 | serial: PI=3.141592736923122686  cost=0.0000008289   
17 | syn:    PI=3.141592736923124018  cost=0.0000454596       speedup=0.018234
18 | asyn:   PI=3.141592736923127127  cost=0.0000195189       speedup=0.042467
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
20 | serial: PI=3.141592736923122686  cost=0.0000012461   
21 | syn:    PI=3.141592736923125351  cost=0.0000751160       speedup=0.016589
22 | asyn:   PI=3.141592736923127127  cost=0.0000283168       speedup=0.044006
23 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
24 | 
25 | 
26 | -----------------------------------------------------------------------------------------------------------------
27 | Intel C++ result:
28 | 
29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ icpc -lrt PI.cpp -o PIintel -lpthread
30 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
31 | serial: PI=3.141592736923126683  cost=0.0000000738   
32 | syn:    PI=3.141592736923125795  cost=0.0000204523       speedup=0.003608
33 | asyn:   PI=3.141592736923126683  cost=0.0000210150       speedup=0.003512
34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
35 | serial: PI=3.141592736923126683  cost=0.0000000755   
36 | syn:    PI=3.141592736923123130  cost=0.0000217552       speedup=0.003470
37 | asyn:   PI=3.141592736923126683  cost=0.0000087166       speedup=0.008662
38 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
39 | serial: PI=3.141592736923126683  cost=0.0000000919   
40 | syn:    PI=3.141592736923125795  cost=0.0000268792       speedup=0.003419
41 | asyn:   PI=3.141592736923126683  cost=0.0000104282       speedup=0.008813
42 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
43 | serial: PI=3.141592736923126683  cost=0.0000002541   
44 | syn:    PI=3.141592736923122686  cost=0.0000772971       speedup=0.003287
45 | asyn:   PI=3.141592736923126683  cost=0.0000337883       speedup=0.007520
46 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
47 | 
48 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_1/numstep 1000000:
--------------------------------------------------------------------------------
 1 | numstep = 1000000
 2 | 
 3 | ----------------------------------------------------------------------------------------------------------------
 4 | GUN C++ result:
 5 | 
 6 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ g++ -lrt PI.cpp -o PI -lpthread
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
 8 | serial: PI=3.141592653589764250  cost=0.0003543964   
 9 | syn:    PI=3.141592653589845519  cost=0.0098485969       speedup=0.035984
10 | asyn:   PI=3.141592653589871276  cost=0.0001317195       speedup=2.690539
11 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
12 | serial: PI=3.141592653589764250  cost=0.0010452804   
13 | syn:    PI=3.141592653589879269  cost=0.0092117469       speedup=0.113473
14 | asyn:   PI=3.141592653589871276  cost=0.0001322126       speedup=7.906057
15 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
16 | serial: PI=3.141592653589764250  cost=0.0003772998   
17 | syn:    PI=3.141592653589916129  cost=0.0092676635       speedup=0.040711
18 | asyn:   PI=3.141592653589871276  cost=0.0001325238       speedup=2.847034
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
20 | serial: PI=3.141592653589764250  cost=0.0007099929   
21 | syn:    PI=3.141592653589895257  cost=0.0096420047       speedup=0.073635
22 | asyn:   PI=3.141592653589871276  cost=0.0001590840       speedup=4.463006
23 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
24 | 
25 | 
26 | 
27 | 
28 | ----------------------------------------------------------------------------------------------------------------
29 | Intel C++ result:
30 | 
31 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ icpc -lrt PI.cpp -o PIintel -lpthread
32 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
33 | serial: PI=3.141592653589903250  cost=0.0000659315   
34 | syn:    PI=3.141592653589905026  cost=0.0091225747       speedup=0.007227
35 | asyn:   PI=3.141592653589878381  cost=0.0000418219       speedup=1.576483
36 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
37 | serial: PI=3.141592653589903250  cost=0.0000697021   
38 | syn:    PI=3.141592653589889927  cost=0.0089021471       speedup=0.007830
39 | asyn:   PI=3.141592653589878381  cost=0.0000410668       speedup=1.697286
40 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
41 | serial: PI=3.141592653589903250  cost=0.0000743886   
42 | syn:    PI=3.141592653589872608  cost=0.0085270968       speedup=0.008724
43 | asyn:   PI=3.141592653589878381  cost=0.0000376591       speedup=1.975315
44 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
45 | serial: PI=3.141592653589903250  cost=0.0001054100   
46 | syn:    PI=3.141592653589897477  cost=0.0096323882       speedup=0.010943
47 | asyn:   PI=3.141592653589878825  cost=0.0000412095       speedup=2.557905
48 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_1/numstep 1000000000:
--------------------------------------------------------------------------------
 1 | numstep = 1000000000
 2 | 
 3 | ----------------------------------------------------------------------------------------------------------------
 4 | GUN C++ result:
 5 | 
 6 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ g++ -lrt PI.cpp -o PI -lpthread
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
 8 | serial: PI=3.141592653589970752  cost=3.0262435430   
 9 | syn:    PI=3.141592653590014272  cost=96.0214065546      speedup=0.031516
10 | asyn:   PI=3.141592653589768691  cost=0.9690057261       speedup=3.123040
11 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
12 | serial: PI=3.141592653589970752  cost=3.0270991392   
13 | syn:    PI=3.141592653589965867  cost=96.9586887184      speedup=0.031221
14 | asyn:   PI=3.141592653589769135  cost=0.9684171159       speedup=3.125822
15 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
16 | serial: PI=3.141592653589970752  cost=3.9262531529   
17 | syn:    PI=3.141592653590016937  cost=93.0404317600      speedup=0.042199
18 | asyn:   PI=3.141592653589769135  cost=0.9677674304       speedup=4.057021
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PI
20 | serial: PI=3.141592653589970752  cost=3.0258156693   
21 | syn:    PI=3.141592653589996953  cost=90.9921872340      speedup=0.033254
22 | asyn:   PI=3.141592653589769135  cost=0.9679361918       speedup=3.126049
23 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
24 | 
25 | 
26 | ----------------------------------------------------------------------------------------------------------------
27 | Intel C++ result:
28 | 
29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ icpc -lrt PI.cpp -o PIintel -lpthread
30 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
31 | serial: PI=3.141592653589768247  cost=0.0604297714   
32 | syn:    PI=3.141592653589946327  cost=87.9888211927      speedup=0.000687
33 | asyn:   PI=3.141592653589883266  cost=0.0146258311       speedup=4.131715
34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
35 | serial: PI=3.141592653589768247  cost=0.0618733369   
36 | syn:    PI=3.141592653590016493  cost=87.0125411040      speedup=0.000711
37 | asyn:   PI=3.141592653589883266  cost=0.0145947627       speedup=4.239421
38 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
39 | serial: PI=3.141592653589768247  cost=0.0610866492   
40 | syn:    PI=3.141592653590014272  cost=87.9378264560      speedup=0.000695
41 | asyn:   PI=3.141592653589883710  cost=0.0149004488       speedup=4.099652
42 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
43 | serial: PI=3.141592653589768247  cost=0.9604721446   
44 | syn:    PI=3.141592653590027151  cost=87.9769116144      speedup=0.010917
45 | asyn:   PI=3.141592653589883710  cost=0.0152294994       speedup=63.066560
46 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ ./PIintel
47 | serial: PI=3.141592653589768247  cost=0.0607802520   
48 | syn:    PI=3.141592653590011164  cost=87.9901090121      speedup=0.000691
49 | asyn:   PI=3.141592653589883710  cost=0.0147874561       speedup=4.110257
50 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homework 1$ 
51 | 
52 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_1/第一次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_1/第一次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_1/第一次作业题目.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_1/第一次作业题目.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_2/Homework_2:
--------------------------------------------------------------------------------
1 | 作业2(10月24日)
2 | 
3 | 
4 | 1、阅读附件中求素数问题的程序，并尝试进一步进行性能改进。给出改进的措施，以及改进后程序的输出结果。prime.cpp 
5 | 2、附件是热传导问题的串行程序示例。请根据该示例编写pthread并行程序。给出计算划分的方法，以及在不同规模下串、并行程序各自的运行时间
6 | ，分析并行程序效率变化的原因。ser_heat2D.c。
7 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_2/heat2D/code_debug/Performance.m:
--------------------------------------------------------------------------------
 1 | %-----------------------------------------
 2 | %%%% This program is designed for parallel code performance plot...
 3 | %%%% YANG YANG
 4 | %%%% Peking Univerisity
 5 | %%%% Jan 5 2017
 6 | %------------------------------------------
 7 | %% data
 8 | numThread = 6;   % number of computing threads
 9 | N = [120^2, 600^2, 1200^2, 2400^2];  % Problem scale
10 | speedUp=[0.716638, 2.132516, 1.773081, 1.696943];
11 | parallelCost = [12.2399506860, 110.7361543450, ...
12 |     549.4440846960, 2490.0252091020];
13 | serialCost = [8.7716123170, 236.1466142150, ...
14 |     974.2090435070, 4225.4302848400];
15 | 
16 | 
17 | %% data plot
18 | % speed up plot
19 | figure('Color',[1 1 1]);
20 | plot(N,speedUp,'ro-','linewidth',2,'markersize',20);
21 | xlabel('Probelm Scale');
22 | ylabel('Speed Up');
23 | title('Speed Up figure');
24 | 
25 | % time cost plot
26 | figure('Color',[1 1 1]);
27 | plot(N,parallelCost,'ro-','linewidth',2,'markersize',20);
28 | hold on;
29 | plot(N,serialCost,'bx-','linewidth',2,'markersize',20);
30 | hold off;
31 | xlabel('Probelm Scale');
32 | ylabel('Time Cost (s)');
33 | title('Time Cost figure');
34 | legend('Parallel','Serial');
35 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_2/heat2D/code_debug/SpeedUp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/heat2D/code_debug/SpeedUp.jpg


--------------------------------------------------------------------------------
/Homeworks/Homework_2/heat2D/code_debug/Time Cost.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/heat2D/code_debug/Time Cost.jpg


--------------------------------------------------------------------------------
/Homeworks/Homework_2/heat2D/original_code/ser_heat2D.c:
--------------------------------------------------------------------------------
  1 | /****************************************************************************
  2 |  * FILE: ser_heat2D.c
  3 |  * DESCRIPTION:  
  4 |  *   Serial HEAT2D Example - C Version
  5 |  *   This example is based on a simplified 
  6 |  *   two-dimensional heat equation domain decomposition.  The initial 
  7 |  *   temperature is computed to be high in the middle of the domain and 
  8 |  *   zero at the boundaries.  The boundaries are held at zero throughout 
  9 |  *   the simulation.  During the time-stepping, an array containing two 
 10 |  *   domains is used; these domains alternate between old data and new data.
 11 |  ****************************************************************************/
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #define NXPROB 1000
 15 | #define NYPROB 1000
 16 | struct Parms
 17 | { 
 18 |   float cx;
 19 |   float cy;
 20 |   int nts;
 21 | } parms = {0.1, 0.1, 50};
 22 | 
 23 | main()
 24 | {
 25 |   float u[2][NXPROB][NYPROB];
 26 |   int ix, iy, iz, it;
 27 |   void inidat(), prtdat(), update();
 28 | 
 29 |   /************************************************************************
 30 |   **  Initialize grid.
 31 |   *************************************************************************/
 32 |   inidat(NXPROB, NYPROB, u);
 33 |   prtdat(NXPROB, NYPROB, u, "initial.dat");
 34 |   for (ix = 0; ix <= NXPROB-1; ix++)
 35 |   {
 36 |     u[1][ix][0] = u[0][ix][0];
 37 |     u[1][ix][NYPROB-1] = u[0][ix][NYPROB-1];
 38 |   }
 39 |   for (iy = 0; iy <= NYPROB-1; iy++)
 40 |   {
 41 |     u[1][0][iy] = u[0][0][iy];
 42 |     u[1][NXPROB-1][iy] = u[0][NXPROB-1][iy];
 43 |   }
 44 | 
 45 |   /***********************************************************************
 46 |   **  Iterate over all timesteps.
 47 |   ************************************************************************/
 48 |   iz = 0;
 49 |   for (it = 1; it <= parms.nts; it++)
 50 |   {
 51 |     update(NXPROB, NYPROB, &u[iz][0][0], &u[1-iz][0][0]);
 52 |     iz = 1 - iz;
 53 |   }
 54 | 
 55 |   prtdat(NXPROB, NYPROB, &u[iz][0][0], "final.dat");
 56 | }
 57 | 
 58 | /****************************************************************************
 59 |  *  subroutine update
 60 |  ****************************************************************************/
 61 | void
 62 | update(nx, ny, u1, u2)
 63 | int nx, ny;
 64 | /*float u1[nx][ny], u2[nx][ny];*/
 65 | float *u1, *u2;
 66 | {
 67 |   int ix, iy;
 68 | 
 69 |   for (ix = 1; ix <= nx-2; ix++)
 70 |   {
 71 |     for (iy = 1; iy <= ny-2; iy++)
 72 |     {
 73 |       *(u2+ix*ny+iy) = *(u1+ix*ny+iy)  + 
 74 | 		       parms.cx * (*(u1+(ix+1)*ny+iy) + *(u1+(ix-1)*ny+iy) - 
 75 | 				   2.0 * *(u1+ix*ny+iy)                      ) +
 76 | 		       parms.cy * (*(u1+ix*ny+iy+1) + *(u1+ix*ny+iy-1) - 
 77 | 				   2.0 * *(u1+ix*ny+iy)                  );
 78 |     }
 79 |   }
 80 | }
 81 | 
 82 | /*****************************************************************************
 83 |  *  subroutine inidat
 84 |  *****************************************************************************/
 85 | void
 86 | inidat(nx, ny, u1)
 87 | int nx, ny;
 88 | /*float u1[nx][ny];*/
 89 | float *u1;
 90 | {
 91 |   int ix, iy;
 92 | 
 93 |   for (ix = 0; ix <= nx-1; ix++)
 94 |   {
 95 |     for (iy = 0; iy <= ny-1; iy++)
 96 |     {
 97 |       /* u1[ix][iy] = (float)(ix * (nx - ix - 1) * iy * (ny - iy - 1)); */
 98 |       *(u1+ix*ny+iy) = (float)(ix * (nx - ix - 1) * iy * (ny - iy - 1));
 99 |     }
100 |   }
101 | }
102 | 
103 | /**************************************************************************
104 |  * subroutine prtdat
105 |  **************************************************************************/
106 | void
107 | prtdat(nx, ny, u1, fnam)
108 | int nx, ny;
109 | /*float u1[nx][ny];*/
110 | float *u1;
111 | char *fnam;
112 | {
113 |   int ix, iy;
114 |   FILE *fp;
115 | 
116 |   fp = fopen(fnam, "w");
117 |   for (iy = ny-1; iy >= 0; iy--)
118 |   {
119 |     for (ix = 0; ix <= nx-1; ix++)
120 |     {
121 |       fprintf(fp, "%8.3f", *(u1+ix*ny+iy));
122 |       if (ix != nx-1)
123 |       {
124 |         fprintf(fp, " ");
125 |       }
126 |       else
127 |       {
128 |         fprintf(fp, "\n");
129 |       }
130 |     }
131 |   }
132 |   fclose(fp);
133 |   printf("Wrote file: %s\n",fnam);
134 | }
135 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/code_debug/primeModified:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeModified


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/code_debug/primeTemp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeTemp


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/code_debug/primeTest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeTest


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/code_debug/primeTest2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/primeTest2


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/code_debug/sortBucket:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/code_debug/sortBucket


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/original_codes/prime:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/original_codes/prime


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/original_codes/testResult.txt:
--------------------------------------------------------------------------------
 1 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ icpc -lrt -lpthread prime.cpp -o prime
 2 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
 3 | serial: found 1857859 primes  cost =    9.1461615650 
 4 | mtx   : found 1857859 primes  cost =    1.8300433560    speedup = 4.997784 
 5 | atomic: found 1857859 primes  cost =    1.8292944590    speedup = 4.999830 
 6 | dup   : found 1857859 primes  cost =    1.8282121360    speedup = 5.002790 
 7 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
 8 | serial: found 1857859 primes  cost =    9.1455496690 
 9 | mtx   : found 1857859 primes  cost =    1.8180072810    speedup = 5.030535 
10 | atomic: found 1857859 primes  cost =    1.8124327890    speedup = 5.046008 
11 | dup   : found 1857859 primes  cost =    1.8262520770    speedup = 5.007824 
12 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
13 | serial: found 1857859 primes  cost =    9.1607252410 
14 | mtx   : found 1857859 primes  cost =    1.8387578680    speedup = 4.982018 
15 | atomic: found 1857859 primes  cost =    1.8407987380    speedup = 4.976495 
16 | dup   : found 1857859 primes  cost =    1.8388292810    speedup = 4.981825 
17 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
18 | serial: found 1857859 primes  cost =    9.1694271300 
19 | mtx   : found 1857859 primes  cost =    1.7892989500    speedup = 5.124592 
20 | atomic: found 1857859 primes  cost =    1.8102597470    speedup = 5.065255 
21 | dup   : found 1857859 primes  cost =    1.8344786180    speedup = 4.998383 
22 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
23 | serial: found 1857859 primes  cost =    9.1465545060 
24 | mtx   : found 1857859 primes  cost =    1.8231061690    speedup = 5.017017 
25 | atomic: found 1857859 primes  cost =    1.8275519580    speedup = 5.004812 
26 | dup   : found 1857859 primes  cost =    1.8277835940    speedup = 5.004178 
27 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ ./prime
28 | serial: found 1857859 primes  cost =    9.1603896740 
29 | mtx   : found 1857859 primes  cost =    1.8346655100    speedup = 4.992948 
30 | atomic: found 1857859 primes  cost =    1.8453990010    speedup = 4.963907 
31 | dup   : found 1857859 primes  cost =    1.8232074830    speedup = 5.024327 
32 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/source code$ 
33 | 
34 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/solutions/performance_test/primeMD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/solutions/performance_test/primeMD


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/solutions/performance_test/result.txt:
--------------------------------------------------------------------------------
 1 | // condition
 2 | 
 3 | 
 4 | // define parameters
 5 | #define NANO           1000000000
 6 | #define Max_Thread_Num 256                                                  // define using how many threads
 7 | #define MAXIMUM        0x7fffffffffffffff
 8 | #define BLOCK_SIZE     65536
 9 | 
10 | 
11 | // global vars
12 | long int n = 30000000;                                                      // how many prime number
13 | 
14 | 
15 | // result
16 | //----------------------------------------------------------------------------------------------------------------
17 | 
18 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ icpc -lrt -lpthread primeMD.cpp -o primeMD
19 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
20 | serial: found 1857859 primes  cost =    9.1509818700 
21 | mtx   : found 2380569 primes  cost =    1.3733918010    speedup = 6.663053 
22 | atomic: found 2022701 primes  cost =    1.8234812670    speedup = 5.018413 
23 | dup   : found 2191429 primes  cost =    1.2591722410    speedup = 7.267458 
24 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
25 | serial: found 1857859 primes  cost =    9.1485292310 
26 | mtx   : found 2341218 primes  cost =    1.2717440140    speedup = 7.193688 
27 | atomic: found 2213863 primes  cost =    1.2649233010    speedup = 7.232477 
28 | dup   : found 2365562 primes  cost =    1.2777124400    speedup = 7.160085 
29 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
30 | serial: found 1857859 primes  cost =    9.1639542380 
31 | mtx   : found 2377058 primes  cost =    1.2748670420    speedup = 7.188165 
32 | atomic: found 2206272 primes  cost =    1.2579527460    speedup = 7.284816 
33 | dup   : found 2155165 primes  cost =    1.9573085450    speedup = 4.681916 
34 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
35 | serial: found 1857859 primes  cost =    9.1582943440 
36 | mtx   : found 2265501 primes  cost =    1.3116203400    speedup = 6.982428 
37 | atomic: found 2196891 primes  cost =    1.2740057870    speedup = 7.188581 
38 | dup   : found 2013231 primes  cost =    1.8049689870    speedup = 5.073934 
39 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
40 | serial: found 1857859 primes  cost =    9.1469963760 
41 | mtx   : found 2294324 primes  cost =    1.3071436100    speedup = 6.997698 
42 | atomic: found 2006163 primes  cost =    1.7818589700    speedup = 5.133401 
43 | dup   : found 2375195 primes  cost =    1.3314744890    speedup = 6.869825 
44 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
45 | serial: found 1857859 primes  cost =    9.1476415140 
46 | mtx   : found 2206209 primes  cost =    1.2591002780    speedup = 7.265221 
47 | atomic: found 2216222 primes  cost =    1.2994178030    speedup = 7.039800 
48 | dup   : found 2359980 primes  cost =    1.2721454700    speedup = 7.190720 
49 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
50 | serial: found 1857859 primes  cost =    9.1980578450 
51 | mtx   : found 2374529 primes  cost =    1.3017162810    speedup = 7.066100 
52 | atomic: found 2208447 primes  cost =    1.3069308060    speedup = 7.037907 
53 | dup   : found 2072645 primes  cost =    1.8827981040    speedup = 4.885313 
54 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ ./primeMD
55 | serial: found 1857859 primes  cost =    9.1652981570 
56 | mtx   : found 2213662 primes  cost =    1.2680693800    speedup = 7.227758 
57 | atomic: found 2188975 primes  cost =    1.3110168430    speedup = 6.990984 
58 | dup   : found 2173835 primes  cost =    2.0233755280    speedup = 4.529707 
59 | yangyang@yangyang-XPS-8900:~/Desktop/Parallel Programming/Homeworks/Homework 2/prime/Solution/performance test$ 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_2/prime_number/solutions/result_verify/primeMD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/prime_number/solutions/result_verify/primeMD


--------------------------------------------------------------------------------
/Homeworks/Homework_2/第二次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_2/第二次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_3/Homework 3:
--------------------------------------------------------------------------------
1 | 作业3 并行算法的性能评估(10月31日)
2 | 
3 | 
4 | 假设有一个计算问题,其中串行计算量占15%.为实现并行计算,需要增加1.5%的计算量,这部分计算量是不能并行执行的,并且与所使用处理器/执行内核的数量无关. 此外,每个处理器/执行内核在执行并行计算任务的过程中,还需要执行为所承担的并行任务执行一定的额外操作.这些额外操作的计算量是所承担并行任务量的0.1%. 请问
5 |  
6 | a) 在一个有M颗处理器/执行内核的计算平台上,并行程序可取得的最大加速比是多少
7 | b) 为了使得并行计算效率至少为70%,M最大可为多少
8 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_3/第三次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_3/第三次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_4/Homework_4.txt:
--------------------------------------------------------------------------------
1 | 作业 4 Pthreads实现流水并行算法(11月14日)
2 | 已附加文件:	
3 | 文件 简化N-body计算的串行程序 (3.158 KB)
4 |    N-Body问题是分子化学和天体物理学的基本计算模型.在问题域中有N个粒子,这些粒子之间互相存在万有引力、以及其他作用力，使得这些粒子发生运动. N-Body计算的目的是预测在未来某个时刻，这些粒子所处的状态.请采用pthread，对附件中的简化N-body串行计算程序并行化.请给出并行算法和测试结果.
5 |  
6 | 
7 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_4/SourceCodes/multiBody.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <time.h>
  4 | #include <string.h>
  5 | 
  6 | int BodyNum=0;
  7 | int TimeSteps=0;                                                                   
  8 |                                                                    
  9 | int main(int argc, char** argv )
 10 | {
 11 |      
 12 |   int n, t, i, j;
 13 |   double *pBody;//´æ´¢Á£×ÓµÄ»ù±¾ÐÅÏ¢£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ4¸öÁ¬ÐøµÄ¸¡µãÊý£ºmass¡¢x¡¢y¡¢z
 14 |   double *pForce;//´æ´¢Á£×ÓµÄÊÜÁ¦£¬Ã¿¸öÁ£×ÓÕ¼ÓÃ3¸öÁ¬ÐøµÄ¸¡µãÊý£ºFx¡¢Fy¡¢Fz
 15 |   double fac, fx, fy, fz;
 16 |   double dx, dy, dz, sq, dist; 
 17 |   clock_t c_start, c_end;
 18 |   double run_time;
 19 |   char *pStr;
 20 |   FILE *fResult;
 21 | 
 22 |   for ( i=1; i<argc; i++ ) {
 23 | 	  pStr=strstr(argv[i], "-s=");
 24 | 	  if ( pStr!=NULL) sscanf(pStr, "-s=%d", &BodyNum);
 25 | 	  pStr=strstr(argv[i], "-t=");
 26 | 	  if ( pStr!=NULL) sscanf(pStr, "-t=%d", &TimeSteps);
 27 | 
 28 |   }
 29 | 
 30 |   if ( BodyNum*TimeSteps==0) {
 31 | 	  printf("usage: ser_nbody -s=number-of-bodies -t=number-of-steps\n");
 32 | 	  return 0;
 33 |   }
 34 |   c_start = clock(); 
 35 | 
 36 |   pForce = new double[3*BodyNum];
 37 |   pBody = new double[4*BodyNum];
 38 | 
 39 |   /*  Initialize mass and positions in array p to make a test case
 40 |       Initialize force to 0
 41 |   */
 42 |   for ( i=0; i<BodyNum; i++)
 43 |     {
 44 |       *(pBody+4*i) = 10.05 + i;
 45 |       *(pBody+4*i+1) = 30.0*i;
 46 |       *(pBody+4*i+2) = 20.0*i;
 47 |       *(pBody+4*i+3) = 10.0*i;
 48 |       *(pForce+3*i) = 0;
 49 |       *(pForce+3*i+1) = 0;
 50 |       *(pForce+3*i+2) = 0;
 51 |     }
 52 | 
 53 |   t = 0;
 54 |   while ( t<TimeSteps){
 55 |     /*  Loop over points calculating force between each pair.*/
 56 | 
 57 |     for ( i=0; i<BodyNum; i++ )
 58 |       for ( j=i+1; j<BodyNum; j++ )
 59 |         {/*Calculate force between particle i and j according to Newton's Law*/
 60 |           dx = *(pBody+4*i+1) - *(pBody+4*j+1);
 61 |           dy = *(pBody+4*i+2) - *(pBody+4*j+2);
 62 |           dz = *(pBody+4*i+3) - *(pBody+4*j+3);
 63 |           sq = dx*dx + dy*dy + dz*dz;
 64 |           dist = sqrt(sq);
 65 |           fac = (*(pBody+4*i)) * (*(pBody+4*j)) / ( dist * sq );
 66 |           fx = fac * dx;
 67 |           fy = fac * dy;
 68 |           fz = fac * dz;
 69 | 
 70 |           /*Add in force and opposite force to particle i and j */
 71 |           *(pForce+3*i) = *(pForce+3*i) - fx;
 72 |           *(pForce+3*i+1) = *(pForce+3*i+1) - fy;
 73 |           *(pForce+3*i+2) = *(pForce+3*i+2) - fz;
 74 |           *(pForce+3*j) = *(pForce+3*j) + fx;
 75 |           *(pForce+3*j+1) = *(pForce+3*j+1) + fy;
 76 |           *(pForce+3*j+2) = *(pForce+3*j+2) + fz;
 77 |         }
 78 |     for ( i=0; i<BodyNum; i++ ){ 
 79 | 	  *(pBody+4*i+1) = *(pBody+4*i+1) + (*(pForce+3*i)) / (*(pBody+4*i));
 80 | 	  *(pForce+3*i) = 0;
 81 | 	  *(pBody+4*i+2) = *(pBody+4*i+2) + (*(pForce+3*i+1)) / (*(pBody+4*i));
 82 | 	  *(pForce+3*i+1) = 0;
 83 | 	  *(pBody+4*i+3) = *(pBody+4*i+3) + (*(pForce+3*i+2)) / (*(pBody+4*i));
 84 | 	  *(pForce+3*i+2) = 0;
 85 |     }
 86 |     t++;
 87 |   }
 88 | 
 89 | 
 90 |   fResult=fopen("result_ser_nbody.txt", "w");
 91 | 
 92 |   char result[50];
 93 |   for (i=0; i<BodyNum; i++)   { 	  
 94 | 	  sprintf(result, "(%10.4f %10.4f %10.4f %10.4f)\n", *(pBody+4*i), *(pBody+4*i+1), *(pBody+4*i+2), *(pBody+4*i+3));
 95 | 	  fwrite(result, sizeof(char), strlen(result), fResult);
 96 |    }
 97 |    fclose(fResult);
 98 | 
 99 |   delete[] pForce;
100 |   delete[] pBody;  
101 |   
102 |   c_end =clock();
103 |   run_time = (double)( c_end - c_start) / CLOCKS_PER_SEC; 
104 |   printf("runtime is : %f\n", run_time);
105 | }
106 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_4/第四次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_4/第四次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_5/CodeTest/Performace.m:
--------------------------------------------------------------------------------
 1 | %-------------------------------------------------------------------------
 2 | %%%% Progrm is designed for plot time cost in code performance test...
 3 | %%%% YANG YANG
 4 | %%%% Peking University
 5 | %%%% Jan 5 2017
 6 | %------------------------------------------
 7 | %% data
 8 | % problem scale
 9 | N = [120^2, 600^2, 1200^2];           
10 | % time cost
11 | serial_cost = [14.198089, 357.691399, 1428.974339]; 
12 | pthread_cost = [12.167301, 128.956751, 466.923103];
13 | mpi_cost = [1.9713819026947021E+01, 4.8343813109397888E+02, 1.9419226799011230E+03];
14 | 
15 | pthread_speedup = serial_cost ./ pthread_cost;
16 | mpi_speedup = serial_cost ./ mpi_cost;
17 | 
18 | %% plot data
19 | % speed up plot
20 | figure('Color',[1 1 1]);
21 | plot(N,pthread_speedup,'ro-','linewidth',2,'markersize',10);
22 | hold on;
23 | plot(N,mpi_speedup,'bx-','linewidth',2,'markersize',10);
24 | hold off;
25 | xlabel('Problem scale');
26 | ylabel('Speed Up');
27 | legend('Pthread','MPI');
28 | title('Speed Up Figure');
29 | 
30 | % time cost plot
31 | figure('Color',[1 1 1]);
32 | plot(N,serial_cost,'m*-','linewidth',2,'markersize',10);
33 | hold on;
34 | plot(N,pthread_cost,'ro-','linewidth',2,'markersize',10);
35 | plot(N,mpi_cost,'b.-','linewidth',2,'markersize',10);
36 | hold off;
37 | xlabel('Problem Scale');
38 | ylabel('Time Cost (sec)');
39 | title('Time Cost Figure');
40 | legend('Serial','Pthread','MPI');
41 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_5/CodeTest/SpeedUp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_5/CodeTest/SpeedUp.jpg


--------------------------------------------------------------------------------
/Homeworks/Homework_5/CodeTest/TimeCost.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_5/CodeTest/TimeCost.jpg


--------------------------------------------------------------------------------
/Homeworks/Homework_5/CodeTest/pthread_heat2D.txt:
--------------------------------------------------------------------------------
 1 | yangyang@yangyang-XPS-8900:~/Desktop/CodeTest/CodeTest13$ ulimit -s unlimited
 2 | yangyang@yangyang-XPS-8900:~/Desktop/CodeTest/CodeTest13$ icpc -pthread pthread_heat2D.cpp -o heat2D
 3 | yangyang@yangyang-XPS-8900:~/Desktop/CodeTest/CodeTest13$ ./heat2D
 4 | 
 5 |  Problem Scale: NXPROB = 1200, NYPROB = 1200， nts = 500000
 6 | 
 7 | serial: 1428.974339
 8 | pthread: 466.923103
 9 | yangyang@yangyang-XPS-8900:~/Desktop/CodeTest/CodeTest13$ 
10 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_5/第五次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_5/第五次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_5/第五次作业.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_5/第五次作业.txt


--------------------------------------------------------------------------------
/Homeworks/Homework_6/Code/mpi_process.cpp:
--------------------------------------------------------------------------------
  1 | #include<stdio.h>
  2 | #include<string.h>
  3 | #include<mpi.h>
  4 | 
  5 | int main(int argc, char ** argv) 
  6 | {
  7 |     MPI_Comm intra_gcomm, inter_lcomm, inter_rcomm, intra_lcomm, intra_rcomm;
  8 |     MPI_Status status;
  9 |     int keyid, flag;
 10 |     int myid, size;
 11 |     char message[100];
 12 |     
 13 |     MPI_Init( &argc, &argv );
 14 |     MPI_Comm_rank( MPI_COMM_WORLD, &myid );
 15 |     MPI_Comm_size( MPI_COMM_WORLD, &size );
 16 |     
 17 | 	if (myid==0) printf("Total size : %5d\n",size);
 18 | 	//printf("%d\n",myid);
 19 | 	if ( myid > 3*(size/3) - 1 )
 20 | 	{
 21 | 		keyid = MPI_UNDEFINED;
 22 | 		flag = 0;
 23 | 	}
 24 | 	else 
 25 | 	{
 26 | 		keyid = myid % 3;
 27 | 		flag = 1;
 28 | 	}
 29 | 
 30 | 	MPI_Comm_split( MPI_COMM_WORLD, keyid, myid, &intra_gcomm );
 31 | 	if ( keyid == 0 ) 
 32 | 	{
 33 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 1, &inter_lcomm );
 34 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 2, &inter_rcomm );
 35 | 		MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm );
 36 | 		MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm );
 37 | 		printf("keyid -%d\n",myid);
 38 | 	}
 39 | 	else if ( keyid == 1 ) 
 40 | 	{
 41 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 1, &inter_rcomm );
 42 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 3, &inter_lcomm );
 43 | 		MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm );
 44 | 		MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm );
 45 | 		printf("keyid -%d\n",myid);
 46 | 	}
 47 | 	else if (keyid == 2 ) 
 48 | 	{
 49 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 3, &inter_lcomm );
 50 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 3, &inter_rcomm );
 51 | 		MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm );
 52 | 		MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm );
 53 | 		printf("keyid -%d\n",myid);
 54 | 	}
 55 | 
 56 | 	//MPI_Barrier(MPI_COMM_WORLD);
 57 | 	//printf("barrier-myid %d\n",myid);
 58 | 	int rrank, lrank, rsize, lsize, gsize, grank;
 59 | 	if (flag==0)  
 60 | 		printf("process %d is excluded !\n", myid);
 61 | 	else if (flag==1)
 62 | 	{
 63 | 		printf("%d\n",myid);
 64 | 		//if (myid==0) printf("1\n");
 65 | 		MPI_Comm_size( intra_lcomm, &lsize );
 66 | 		MPI_Comm_rank( intra_lcomm, &lrank );
 67 | 		//if (myid==0) printf("2\n");
 68 | 		MPI_Comm_size( intra_rcomm, &rsize );
 69 | 		MPI_Comm_rank( intra_rcomm, &rrank );
 70 | 		//if (myid==0) printf("3\n");
 71 | 		MPI_Comm_size( intra_gcomm, &gsize );
 72 | 		MPI_Comm_rank( intra_gcomm, &grank );
 73 | 		
 74 | 		
 75 | 		if ( myid == 0 ) 
 76 | 		{
 77 | 			printf( "color myid size lrank lsize rrank rsize grank gsize\n" );
 78 | 			printf( "%5d %4d %4d %5d %5d %5d %5d %5d %5d\n",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize );
 79 | 			for ( int i = 1; i < 3*(size/3); i++) 
 80 | 			{
 81 | 				//mpi_any_source不会堵塞，否则会堵塞
 82 | 				MPI_Recv( message, 100, MPI_CHAR, MPI_ANY_SOURCE, 10, MPI_COMM_WORLD, &status );
 83 | 				printf( "%s\n", message );
 84 | 			}
 85 | 		} 
 86 | 		else
 87 | 		{
 88 | 			printf("%d\n",myid);
 89 | 			sprintf( message, "%5d %4d %4d %5d %5d %5d %5d %5d %5d",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize );
 90 | 			MPI_Send( message, strlen(message)+1, MPI_CHAR, 0, 10, MPI_COMM_WORLD );
 91 | 		}
 92 | 	}
 93 | 
 94 | 	if (myid==0) printf("1\n");
 95 | 	int ndims=2, cart_rank;	
 96 | 	int dims[2], periods[2], coords[2];
 97 | 	MPI_Comm comm_cart, comm_new; 
 98 | 	MPI_Comm_split( MPI_COMM_WORLD, flag, myid, &comm_new );
 99 | 	if (flag==1) 
100 | 	{
101 | 		dims[0] = 3; 
102 | 		dims[1] = size / 3;
103 | 		periods[0] = false; 
104 | 		periods[1] = false;
105 | 		MPI_Cart_create( comm_new, ndims, dims, periods, false, &comm_cart );
106 | 		if ( myid == 0 ) 
107 | 		{
108 | 			for ( int i = 0; i < 3; i++) 
109 | 			{
110 | 				for(int j = 0; j < size/3; j++)
111 | 				{
112 | 					coords[0] = i;
113 | 					coords[1] = j;
114 | 					MPI_Cart_rank( comm_cart, coords, &cart_rank );
115 | 					printf( "{%d, %d} myid=%d\n", coords[0], coords[1], cart_rank );
116 | 				}
117 | 			}
118 | 		}
119 | 
120 | 		MPI_Comm_free( &inter_lcomm );
121 | 		MPI_Comm_free( &inter_rcomm );
122 | 		MPI_Comm_free( &intra_lcomm );
123 | 		MPI_Comm_free( &intra_rcomm );
124 | 		MPI_Comm_free( &intra_gcomm );
125 | 		MPI_Comm_free( &comm_cart );
126 | 		MPI_Comm_free( &comm_new );
127 | 	}
128 | 	MPI_Finalize();
129 | }
130 | 
131 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_6/homework 6.txt:
--------------------------------------------------------------------------------
1 | 1.   作业信息
2 | 
3 | 名称:
4 | 作业6 MPI程序的计算资源管理（11月28日）
5 | 说明
6 | 改写PDF文档4.3.1中的示例程序。从mpiexec创建的进程组中，使用MPI_Comm_split()实现示例程序要求的“环”。若mpiexec所创建的进程数不为3的整数倍，则将序号高的进程余留出来不参加“环”
7 | 截止日期 2016年11月28日 下午11时59分00秒
8 | 满分 100
9 | 


--------------------------------------------------------------------------------
/Homeworks/Homework_7/第七次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_7/第七次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_7/第七次作业.txt:
--------------------------------------------------------------------------------
 1 | {\rtf1\ansi\ansicpg936\cocoartf1404\cocoasubrtf470
 2 | {\fonttbl\f0\fnil\fcharset0 LucidaGrande;\f1\fnil\fcharset134 PingFangSC-Regular;}
 3 | {\colortbl;\red255\green255\blue255;\red154\green154\blue154;\red23\green152\blue185;\red52\green52\blue52;
 4 | \red22\green125\blue151;}
 5 | {\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{none\}}{\leveltext\leveltemplateid1\'00;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}}
 6 | {\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}
 7 | \paperw11900\paperh16840\margl1440\margr1440\vieww20300\viewh10220\viewkind0
 8 | \deftab720
 9 | \pard\pardeftab720\partightenfactor0
10 | 
11 | \f0\b\fs44 \cf2 \expnd0\expndtw0\kerning0
12 | 1.
13 | \fs32 \cf3  \'a0 
14 | \f1 \'d7\'f7\'d2\'b5\'d0\'c5\'cf\'a2
15 | \f0 \
16 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
17 | \ls1\ilvl0
18 | \b0\fs24 \cf4 \kerning1\expnd0\expndtw0 		
19 | \f1 \expnd0\expndtw0\kerning0
20 | \'c3\'fb\'b3\'c6
21 | \f0 : \uc0\u8232 
22 | \f1 \'d7\'f7\'d2\'b5
23 | \f0 7 MPI
24 | \f1 \'b5\'e3\'b5\'bd\'b5\'e3\'cd\'a8\'d0\'c5\'a3\'a8
25 | \f0 12
26 | \f1 \'d4\'c2
27 | \f0 5
28 | \f1 \'c8\'d5\'a3\'a9
29 | \f0 \cf0 \uc0\u8232 \cf4 \
30 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
31 | \ls1\ilvl0\cf4 \kerning1\expnd0\expndtw0 		
32 | \f1 \expnd0\expndtw0\kerning0
33 | \'cb\'b5\'c3\'f7
34 | \f0  \uc0\u8232 
35 | \f1\fs26 \'c0\'fb\'d3\'c3
36 | \f0 MPI
37 | \f1 \'a3\'ac\'b7\'d6\'b1\'f0\'d3\'c3\'d7\'e8\'c8\'fb\'ca\'bd\'cd\'a8\'d0\'c5\'a1\'a2\'b7\'c7\'d7\'e8\'c8\'fb\'ca\'b9\'cd\'a8\'d0\'c5\'d6\'d8\'d0\'c2\'b1\'e0\'c2\'eb\'ca\'b5\'cf\'d6\'d7\'f7\'d2\'b5
38 | \f0 N-body
39 | \f1 \'bc\'c6\'cb\'e3\'ce\'ca\'cc\'e2\'a1\'a3\'d4\'da\'cd\'ac\'d2\'bb\'b8\'f6\'b6\'e0\'b4\'a6\'c0\'ed\'bb\'fa\'cf\'b5\'cd\'b3\'c9\'cf\'a3\'ac\'ca\'b9\'d3\'c3\'b2\'bb\'cd\'ac\'b5\'c4\'ca\'fd\'be\'dd\'b9\'e6\'c4\'a3\'a3\'ac\'b6\'d4\'b1\'c8
40 | \f0 MPI
41 | \f1 \'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'d3\'eb
42 | \f0 pthread
43 | \f1 \'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'b5\'c4\'bc\'d3\'cb\'d9\'b1\'c8\'a3\'ac\'b7\'d6\'ce\'f6\'c6\'e4\'bc\'d3\'cb\'d9\'b1\'c8\'b2\'ee\'d2\'ec\'b5\'c4\'d4\'ad\'d2\'f2\'a1\'a3
44 | \f0  \uc0\u8232 
45 | \f1 \'b4\'ae\'d0\'d0\'a1\'a2
46 | \f0 pthread
47 | \f1 \'b2\'a2\'d0\'d0
48 | \f0 N-Body
49 | \f1 \'bc\'c6\'cb\'e3\'b5\'c4\'b2\'ce\'bf\'bc\'ca\'b5\'cf\'d6\'b4\'fa\'c2\'eb{\field{\*\fldinst{HYPERLINK "http://course.pku.edu.cn/bbcswebdav/pid-208694-dt-content-rid-1288381_2/xid-1288381_2"}}{\fldrslt 
50 | \f0 \cf5 NBody.cpp}}
51 | \f0 \'a0\uc0\u8232 \u8232 
52 | \fs24 \
53 | \ls1\ilvl0\kerning1\expnd0\expndtw0 		
54 | \f1 \expnd0\expndtw0\kerning0
55 | \'bd\'d8\'d6\'b9\'c8\'d5\'c6\'da
56 | \f0  \uc0\u8232 2016
57 | \f1 \'c4\'ea
58 | \f0 12
59 | \f1 \'d4\'c2
60 | \f0 5
61 | \f1 \'c8\'d5
62 | \f0  
63 | \f1 \'cf\'c2\'ce\'e7
64 | \f0 11
65 | \f1 \'ca\'b1
66 | \f0 59
67 | \f1 \'b7\'d6
68 | \f0 00
69 | \f1 \'c3\'eb
70 | \f0  \
71 | \ls1\ilvl0\kerning1\expnd0\expndtw0 		
72 | \f1 \expnd0\expndtw0\kerning0
73 | \'c2\'fa\'b7\'d6
74 | \f0  \uc0\u8232 100}


--------------------------------------------------------------------------------
/Homeworks/Homework_8/第八次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_8/第八次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_8/第八次作业.txt:
--------------------------------------------------------------------------------
 1 | {\rtf1\ansi\ansicpg936\cocoartf1404\cocoasubrtf470
 2 | {\fonttbl\f0\fnil\fcharset0 LucidaGrande;\f1\fnil\fcharset134 PingFangSC-Regular;}
 3 | {\colortbl;\red255\green255\blue255;\red154\green154\blue154;\red23\green152\blue185;\red52\green52\blue52;
 4 | }
 5 | {\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{none\}}{\leveltext\leveltemplateid1\'00;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}}
 6 | {\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}
 7 | \paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0
 8 | \deftab720
 9 | \pard\pardeftab720\partightenfactor0
10 | 
11 | \f0\b\fs43\fsmilli21840 \cf2 \expnd0\expndtw0\kerning0
12 | 1.
13 | \fs31\fsmilli15600 \cf3  \'a0 
14 | \f1 \'d7\'f7\'d2\'b5\'d0\'c5\'cf\'a2
15 | \f0 \
16 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
17 | \ls1\ilvl0
18 | \b0\fs25 \cf4 \kerning1\expnd0\expndtw0 		
19 | \f1 \expnd0\expndtw0\kerning0
20 | \'c3\'fb\'b3\'c6
21 | \f0 : \uc0\u8232 
22 | \f1 \'d7\'f7\'d2\'b5
23 | \f0 8 MPI
24 | \f1 \'b5\'a5\'b1\'df\'cd\'a8\'d0\'c5\'ba\'cd\'b9\'b2\'cf\'ed\'ce\'c4\'bc\'fe\'b7\'c3\'ce\'ca
25 | \f0 (12
26 | \f1 \'d4\'c2
27 | \f0 12
28 | \f1 \'c8\'d5
29 | \f0 )\cf0 \uc0\u8232 \cf4 \
30 | \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
31 | \ls1\ilvl0\cf4 \kerning1\expnd0\expndtw0 		
32 | \f1 \expnd0\expndtw0\kerning0
33 | \'cb\'b5\'c3\'f7
34 | \f0  \uc0\u8232 
35 | \fs26 \'a0
36 | \f1 \'b2\'ce\'bf\'bc
37 | \f0 4.5.1
38 | \f1 \'bd\'da\'d6\'d0
39 | \f0 2D5P
40 | \f1 \'c4\'a3\'b0\'e5\'bc\'c6\'cb\'e3\'b5\'c4
41 | \f0 MPI
42 | \f1 \'b5\'e3\'b6\'d4\'b5\'e3\'cd\'a8\'d0\'c5\'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'a3\'ac\'d2\'d4\'b5\'a5\'b1\'df\'cd\'a8\'d0\'c5\'ca\'b5\'cf\'d6\'d2\'bb\'b8\'f6
43 | \f0 2D5P
44 | \f1 \'c4\'a3\'b0\'e5\'bc\'c6\'cb\'e3\'b5\'c4
45 | \f0 MPI
46 | \f1 \'b2\'a2\'d0\'d0\'b3\'cc\'d0\'f2\'a3\'ac\'b2\'a2\'bd\'ab\'bc\'c6\'cb\'e3\'bd\'e1\'b9\'fb\'ca\'e4\'b3\'f6\'b5\'bd\'d2\'bb\'b8\'f6\'b6\'fe\'bd\'f8\'d6\'c6\'ce\'c4\'bc\'fe\'d6\'d0\'a1\'a3\'be\'d8\'d5\'f3\'d4\'da\'ca\'e4\'b3\'f6\'ce\'c4\'bc\'fe\'d6\'d0\'b0\'b4\'d5\'d5\'d0\'d0\'d3\'c5\'cf\'c8\'b4\'e6\'b4\'a2\'a1\'a3
47 | \f0  \uc0\u8232 
48 | \fs25 \
49 | \ls1\ilvl0\kerning1\expnd0\expndtw0 		
50 | \f1 \expnd0\expndtw0\kerning0
51 | \'bd\'d8\'d6\'b9\'c8\'d5\'c6\'da
52 | \f0  \uc0\u8232 2016
53 | \f1 \'c4\'ea
54 | \f0 12
55 | \f1 \'d4\'c2
56 | \f0 12
57 | \f1 \'c8\'d5
58 | \f0  
59 | \f1 \'cf\'c2\'ce\'e7
60 | \f0 11
61 | \f1 \'ca\'b1
62 | \f0 59
63 | \f1 \'b7\'d6
64 | \f0 00
65 | \f1 \'c3\'eb
66 | \f0  \
67 | \ls1\ilvl0\kerning1\expnd0\expndtw0 		
68 | \f1 \expnd0\expndtw0\kerning0
69 | \'c2\'fa\'b7\'d6
70 | \f0  \uc0\u8232 100}


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/01531136.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/01531136.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/24.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/24.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/Fox_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/Fox_example.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/Li-Fall-2012-CSE633.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/Li-Fall-2012-CSE633.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/lawn129.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/lawn129.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/matrixmult.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/matrixmult.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/Documents/ppagerank_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/Documents/ppagerank_report.pdf


--------------------------------------------------------------------------------
/Homeworks/Homework_9/第九次作业.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Homework_9/第九次作业.pdf


--------------------------------------------------------------------------------
/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/150*******李师尧-作业七.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/150*******李师尧-作业七.pdf


--------------------------------------------------------------------------------
/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/mpi_process.cpp:
--------------------------------------------------------------------------------
  1 | #include<stdio.h>
  2 | #include<string.h>
  3 | #include<mpi.h>
  4 | 
  5 | int main(int argc, char ** argv) 
  6 | {
  7 |     MPI_Comm intra_gcomm, inter_lcomm, inter_rcomm, intra_lcomm, intra_rcomm;
  8 |     MPI_Status status;
  9 |     int keyid, flag;
 10 |     int myid, size;
 11 |     char message[100];
 12 |     
 13 |     MPI_Init( &argc, &argv );
 14 |     MPI_Comm_rank( MPI_COMM_WORLD, &myid );
 15 |     MPI_Comm_size( MPI_COMM_WORLD, &size );
 16 |     
 17 | 	if (myid==0) printf("Total size : %5d\n",size);
 18 | 	//printf("%d\n",myid);
 19 | 	if ( myid > 3*(size/3) - 1 )
 20 | 	{
 21 | 		keyid = MPI_UNDEFINED;
 22 | 		flag = 0;
 23 | 	}
 24 | 	else 
 25 | 	{
 26 | 		keyid = myid % 3;
 27 | 		flag = 1;
 28 | 	}
 29 | 
 30 | 	MPI_Comm_split( MPI_COMM_WORLD, keyid, myid, &intra_gcomm );
 31 | 	if ( keyid == 0 ) 
 32 | 	{
 33 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 1, &inter_lcomm );
 34 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 2, &inter_rcomm );
 35 | 		MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm );
 36 | 		MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm );
 37 | 		printf("keyid -%d\n",myid);
 38 | 	}
 39 | 	else if ( keyid == 1 ) 
 40 | 	{
 41 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 1, &inter_rcomm );
 42 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 2, 3, &inter_lcomm );
 43 | 		MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm );
 44 | 		MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm );
 45 | 		printf("keyid -%d\n",myid);
 46 | 	}
 47 | 	else if (keyid == 2 ) 
 48 | 	{
 49 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 0, 3, &inter_lcomm );
 50 | 		MPI_Intercomm_create( intra_gcomm, 0, MPI_COMM_WORLD, 1, 3, &inter_rcomm );
 51 | 		MPI_Intercomm_merge( inter_lcomm, keyid, &intra_lcomm );
 52 | 		MPI_Intercomm_merge( inter_rcomm, keyid, &intra_rcomm );
 53 | 		printf("keyid -%d\n",myid);
 54 | 	}
 55 | 
 56 | 	//MPI_Barrier(MPI_COMM_WORLD);
 57 | 	//printf("barrier-myid %d\n",myid);
 58 | 	int rrank, lrank, rsize, lsize, gsize, grank;
 59 | 	if (flag==0)  
 60 | 		printf("process %d is excluded !\n", myid);
 61 | 	else if (flag==1)
 62 | 	{
 63 | 		printf("%d\n",myid);
 64 | 		//if (myid==0) printf("1\n");
 65 | 		MPI_Comm_size( intra_lcomm, &lsize );
 66 | 		MPI_Comm_rank( intra_lcomm, &lrank );
 67 | 		//if (myid==0) printf("2\n");
 68 | 		MPI_Comm_size( intra_rcomm, &rsize );
 69 | 		MPI_Comm_rank( intra_rcomm, &rrank );
 70 | 		//if (myid==0) printf("3\n");
 71 | 		MPI_Comm_size( intra_gcomm, &gsize );
 72 | 		MPI_Comm_rank( intra_gcomm, &grank );
 73 | 		
 74 | 		
 75 | 		if ( myid == 0 ) 
 76 | 		{
 77 | 			printf( "color myid size lrank lsize rrank rsize grank gsize\n" );
 78 | 			printf( "%5d %4d %4d %5d %5d %5d %5d %5d %5d\n",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize );
 79 | 			for ( int i = 1; i < 3*(size/3); i++) 
 80 | 			{
 81 | 				//mpi_any_source不会堵塞，否则会堵塞
 82 | 				MPI_Recv( message, 100, MPI_CHAR, MPI_ANY_SOURCE, 10, MPI_COMM_WORLD, &status );
 83 | 				printf( "%s\n", message );
 84 | 			}
 85 | 		} 
 86 | 		else
 87 | 		{
 88 | 			printf("%d\n",myid);
 89 | 			sprintf( message, "%5d %4d %4d %5d %5d %5d %5d %5d %5d",keyid, myid, size, lrank, lsize, rrank, rsize, grank, gsize );
 90 | 			MPI_Send( message, strlen(message)+1, MPI_CHAR, 0, 10, MPI_COMM_WORLD );
 91 | 		}
 92 | 	}
 93 | 
 94 | 	if (myid==0) printf("1\n");
 95 | 	int ndims=2, cart_rank;	
 96 | 	int dims[2], periods[2], coords[2];
 97 | 	MPI_Comm comm_cart, comm_new; 
 98 | 	MPI_Comm_split( MPI_COMM_WORLD, flag, myid, &comm_new );
 99 | 	if (flag==1) 
100 | 	{
101 | 		dims[0] = 3; 
102 | 		dims[1] = size / 3;
103 | 		periods[0] = false; 
104 | 		periods[1] = false;
105 | 		MPI_Cart_create( comm_new, ndims, dims, periods, false, &comm_cart );
106 | 		if ( myid == 0 ) 
107 | 		{
108 | 			for ( int i = 0; i < 3; i++) 
109 | 			{
110 | 				for(int j = 0; j < size/3; j++)
111 | 				{
112 | 					coords[0] = i;
113 | 					coords[1] = j;
114 | 					MPI_Cart_rank( comm_cart, coords, &cart_rank );
115 | 					printf( "{%d, %d} myid=%d\n", coords[0], coords[1], cart_rank );
116 | 				}
117 | 			}
118 | 		}
119 | 
120 | 		MPI_Comm_free( &inter_lcomm );
121 | 		MPI_Comm_free( &inter_rcomm );
122 | 		MPI_Comm_free( &intra_lcomm );
123 | 		MPI_Comm_free( &intra_rcomm );
124 | 		MPI_Comm_free( &intra_gcomm );
125 | 		MPI_Comm_free( &comm_cart );
126 | 		MPI_Comm_free( &comm_new );
127 | 	}
128 | 	MPI_Finalize();
129 | }
130 | 
131 | 


--------------------------------------------------------------------------------
/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业九  FOX并行算法和PageRank算法.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业九  FOX并行算法和PageRank算法.pdf


--------------------------------------------------------------------------------
/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业八150*******李师尧.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业八150*******李师尧.pdf


--------------------------------------------------------------------------------
/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业六150*******李师尧.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Shiyao_Li_s_MPI_homework/李师尧MPI作业参考/作业六150*******李师尧.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_1/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_1/并行程序设计作业-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_1/并行程序设计作业-1.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_2/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_2/saxpy.c:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by hongyao on 2018/10/15.
 3 | //
 4 | 
 5 | #include <pthread.h>
 6 | #include <string.h>
 7 | #include <stdlib.h>
 8 | #include <stdio.h>
 9 | #include <unistd.h>
10 | #include <vector>
11 | #include <math.h>
12 | #include "fio.h"
13 | 
14 | int64_t n,n_num,sizeofA,sizeofB,single_num;
15 | int32_t thread_num,threadid;
16 | float *A,*B,alfa;
17 | pthread_barrier_t barrier;
18 | 
19 | void  *worker(void *arg){
20 |     int64_t i;
21 |     int myID = __sync_fetch_and_add(&threadid, 1);
22 |     for (i=myID*single_num;i<(myID+1)*single_num;i++) *(B+i)=(*(A+i))*alfa+*(B+i);
23 |     return (void*)0;
24 | }
25 | 
26 | int main(int argc, char *argv[]){
27 |     int i;
28 |     thread_num=atoi(argv[1]);        //      设置P
29 |     n=atoll(argv[2]);                        //      设置N
30 |     n_num=((int64_t)1<<n);           //      设置2^N大小的数组
31 |     single_num=n_num/thread_num;      //单个线程的分配数量
32 |     alfa=atof(argv[3]);
33 |     threadid=0;
34 |     sizeofA = (sizeof(float)) *n_num;       //A数组空间大小
35 |     sizeofB = (sizeof(float)) *n_num;       //B数组空间大小
36 |     A= (float *)malloc(sizeof(float)*n_num);   //开辟A数组空间
37 |     B= (float *)malloc(sizeof(float)*n_num);     //开辟B数组空间
38 |     input_data(A, sizeofA, B, sizeofB);        //输入AB。
39 |     printf("hello: thread is running!\n");
40 |     // printf("A[2]=%f\n;B[2]=%f\nalfa=%f", A[2], B[2],alfa);  //检查
41 |     pthread_barrier_init(&barrier,NULL,thread_num);    //设置栅樟
42 |     pthread_t *threads = new pthread_t[thread_num];
43 |     for(int i=0; i<thread_num; i++)  pthread_create(&(threads[i]),NULL,worker,&thread_num);
44 |     for(int i=0; i<thread_num; i++)  pthread_join(threads[i],NULL);
45 |     output_data(B,sizeofB ); //输出B
46 |     //printf("B[2]=%f\n", B[2]);
47 |     //printf("B[1]=%f\n", *(B+1));
48 |     delete[ ] threads;
49 |     pthread_barrier_destroy(&barrier);      //删除栅樟
50 |     return EXIT_SUCCESS;
51 | }
52 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_2/稠密向量的SAXPY_测评报告.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_2/稠密向量的SAXPY_测评报告.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_3/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_3/shuzu.cpp:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | #include <string.h>
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | #include <unistd.h>
  6 | #include <vector>
  7 | #include <math.h>
  8 | #include "fio.h"
  9 | 
 10 | int64_t n,n_num,m,m_num,sizeofA,sizeofB,sizeofC;
 11 | int32_t thread_num,threadid;
 12 | int64_t *A,*B,*C,*pp,*qq,*qqa;
 13 | pthread_barrier_t barrier;
 14 | struct SGROUP {int64_t pianduan;int64_t geshu;int64_t locA;}; //pianduan为该片段，geshu为该片段的个数，locA为排序后该片段的首位在A中的位置
 15 | struct SGROUP *groupA;
 16 | //比较函数指针
 17 | int myCompar(const void *arg1,const void *arg2){
 18 |     int64_t *pa=(int64_t*)arg1,*pb=(int64_t*)arg2;
 19 |     return *pa>*pb;
 20 | }
 21 | //***********************//
 22 | //**********子***********//
 23 | //**********线***********//
 24 | //**********程***********//
 25 | //***********************//
 26 | void  *worker(void *arg) {
 27 |     int64_t i,j,lb,ub;
 28 |     int myID = __sync_fetch_and_add(&threadid, 1);
 29 | //    printf("xianchengyfhdshd s");
 30 |     int64_t loc_size = (m_num /2)/thread_num;
 31 |     int64_t rest = (m_num /2)%thread_num;
 32 | 
 33 |     //**********给线程分配计算资源*************
 34 |     if (myID < rest) {
 35 |         lb = loc_size * myID + myID;
 36 |         ub = lb + loc_size + 1;
 37 |     } else {
 38 |         lb = loc_size * myID + rest;
 39 |         ub = lb + loc_size;
 40 |     }
 41 | 
 42 |     //**********将B数组两位两位地保存在pp中***********
 43 |     for (i =lb; i < ub; i++) {
 44 |         for (j = 0; j < m_num /2; j++) {
 45 |             if (B[2*j] == i) {
 46 |                 pp[2*i]=B[2*j];
 47 |                 pp[2*i+1]=B[2*j+1];
 48 |             }
 49 |         }
 50 |     }
 51 |     pthread_barrier_wait(&barrier);
 52 | //**************开辟空间得到qqa（排序前累积）以及qq（排序后累积）***************
 53 |     if (pthread_barrier_wait(&barrier) == PTHREAD_BARRIER_SERIAL_THREAD) {
 54 |         for (i = 1; i < m_num / 2; i++) {
 55 |             qqa[i] = qqa[i - 1] + B[2*i - 1];
 56 |             qq[i] = qq[i - 1] + pp[2*i - 1];
 57 |         }
 58 |     }
 59 |     pthread_barrier_wait(&barrier);
 60 | //***************保存结构体的前两个数据**************
 61 |     for (i = lb; i <ub; i++) {
 62 |         groupA[i].pianduan = B[2*i];
 63 |         groupA[i].geshu = B[2*i+1];
 64 |     }
 65 |     pthread_barrier_wait(&barrier);
 66 |     //**************保存结构体的第三个数据**************
 67 |     for (i = lb; i <ub; i++) {
 68 |         groupA[i].locA = qq[groupA[i].pianduan];
 69 |     }
 70 |     pthread_barrier_wait(&barrier);
 71 |     //***************先对每个片段排序，再拷贝排序前A的片段到C的相应的位置****************
 72 |      for (i = lb; i < ub; i++) {
 73 |          qsort(&A[qqa[i]], groupA[i].geshu, sizeof(int64_t), myCompar);
 74 |          memcpy(&C[groupA[i].locA], &A[qqa[i]], sizeof(int64_t) * groupA[i].geshu);
 75 |      }
 76 | 
 77 |     return (void *) 0;
 78 | }
 79 | 
 80 | int main(int argc, char *argv[ ] ){
 81 |     int i;
 82 |     thread_num=atoi(argv[1]);        //      设置P
 83 |     n=atoll(argv[2]);                        //      设置N
 84 |     n_num=((int64_t) 1<<n);           //       设置2^N大小的数组
 85 |     m=atoll(argv[3]);                        //      设置M
 86 |     m_num=((int64_t) 1<<(m+1));           //      设置2*2^M大小的数组
 87 |     threadid=0;
 88 |     pp= (int64_t *)malloc(sizeof(int64_t)*n_num);   //将B重新排序后的数组
 89 |     qq= (int64_t *)malloc(sizeof(int64_t)*m_num/2);   //A排序后B中个数的累加数组
 90 |     qqa= (int64_t *)malloc(sizeof(int64_t)*m_num/2);   //A排序前B中个数的累加数组
 91 |     qq[0]=0;
 92 |     qqa[0]=0;
 93 |     sizeofA = (sizeof(int64_t)) *n_num;              //A数组空间大小
 94 |     sizeofB = (sizeof(int64_t)) *m_num;              //B数组空间大小
 95 |     sizeofC = (sizeof(int64_t)) *n_num;              //C数组空间大小
 96 |     A= (int64_t *)malloc(sizeof(int64_t)*n_num);     //开辟A数组空间
 97 |     B= (int64_t *)malloc(sizeof(int64_t)*m_num);     //开辟B数组空间
 98 |     C= (int64_t *)malloc(sizeof(int64_t)*n_num);     //开辟输出数组C数的组空间
 99 |     groupA=(struct SGROUP*)malloc((m_num/2)*sizeof(SGROUP));
100 |     input_data(A, sizeofA, B, sizeofB);        //导入A、B。
101 |     pthread_barrier_init(&barrier,NULL,thread_num);    //设置栅樟
102 |     pthread_t *threads = new pthread_t[thread_num];
103 |     for(int i=0; i<thread_num; i++)  pthread_create(&(threads[i]),NULL,worker,&thread_num);
104 |     for(int i=0; i<thread_num; i++)  pthread_join(threads[i],NULL);
105 |     output_data(C,sizeofC ); //输出C
106 |     delete[ ] threads;
107 |     pthread_barrier_destroy(&barrier);      //删除栅樟*/
108 |     return EXIT_SUCCESS;
109 | }
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | //
119 | // Created by 洪垚 on 2018/10/29.
120 | //
121 | 
122 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_3/数组拼接_测评报告.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_3/数组拼接_测评报告.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_4/2D5P.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by hongyao on 2018/11/1.
 3 | //
 4 | #include <stdio.h>
 5 | #include <math.h>
 6 | #include <string.h>
 7 | #include <stdlib.h>
 8 | #include <unistd.h>
 9 | #include <pthread.h>
10 | #include "fio.h"
11 | 
12 | //Global variable
13 | int64_t n,m,size,size_center;
14 | int thread_num,threadid;
15 | pthread_barrier_t barrier;
16 | float *pa, *pb;
17 | 
18 | //********************************//
19 | //**************子****************//
20 | //**************线****************//
21 | //**************程****************//
22 | //********************************//
23 | void  *worker(void *arg) {
24 |     int64_t i,j,lb,ub;
25 |     int myID = __sync_fetch_and_add(&threadid,1);
26 |     int64_t loc_size = (n-2)/thread_num;
27 |     int64_t rest = (n-2)%thread_num;
28 |     //**********给线程分配计算资源*************
29 |     if (myID < rest) {
30 |         lb = loc_size * myID + myID;
31 |         ub = lb + loc_size + 1;
32 |     } else {
33 |         lb = loc_size * myID + rest;
34 |         ub = lb + loc_size;
35 |     }
36 |     if (pthread_barrier_wait(&barrier) == PTHREAD_BARRIER_SERIAL_THREAD) {
37 |         memcpy(pb, pa, sizeof(float) * m);
38 |         memcpy(&pb[m*(n-1)], &pa[m*(n-1)], sizeof(float) * m);
39 |     }
40 | 
41 |     for (i=lb+1;i<ub+1;i++){
42 |         pb[m*i]=pa[m*i];
43 |         pb[m*(i+1)-1]=pa[m*(i+1)-1];
44 |         for (j=i*m+1;j<i*m+m-1;j++){
45 |             pb[j] = (pa[j] + pa[j - 1] + pa[j + 1] + pa[j-m] +pa[j+m]) / 5.0;
46 |         }
47 |     }
48 | }
49 | 
50 | //********************************//
51 | //**************主****************//
52 | //**************线****************//
53 | //**************程****************//
54 | //********************************//
55 | int main(int argc, char** argv ) {
56 |     thread_num = atoi(argv[1]);
57 |     n = atoll(argv[2]);
58 |     m = atoll(argv[3]);
59 |     size = ((int64_t) 1 << (n + m));
60 |     size_center = size-2*m-2*n+4;
61 |     threadid=0;
62 |     n = 1L<<n;
63 |     m = 1L<<m;
64 |     if (posix_memalign((void **) &pa, getpagesize(), size * sizeof(float))) {
65 |         perror("posix_memalign");
66 |         return EXIT_SUCCESS;
67 |     }
68 |     if (posix_memalign((void **) &pb, getpagesize(), size * sizeof(float))) {
69 |         perror("posix_memalign");
70 |         return EXIT_SUCCESS;
71 |     }
72 |     pthread_barrier_init(&barrier,NULL,thread_num);    //设置栅樟
73 |     input_data(pa, size * sizeof(float));
74 |     pthread_t *threads = new pthread_t[thread_num];
75 |     for (int i = 0; i < thread_num; i++) pthread_create(&(threads[i]), NULL, worker, &thread_num);
76 |     for (int i = 0; i < thread_num; i++) pthread_join(threads[i], NULL);
77 |     output_data(pb, size*sizeof(float));
78 |     pthread_barrier_destroy(&barrier);      //删除栅樟*/
79 |     free(pa);
80 |     free(pb);
81 |     return EXIT_SUCCESS;
82 | }


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_4/2D5P_测评报告.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_4/2D5P_测评报告.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_4/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_5/HEAT-TRANSFER_测评报告.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_5/HEAT-TRANSFER_测评报告.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_5/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_6/N-body_测评报告.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/Homeworks/Yao_Hong_s_Homework/Homework_6/N-body_测评报告.pdf


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_6/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_7/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_8/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/Homework_9/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * xxxxxxx
 8 |   
 9 |   * xxxxxxxx
10 | 
11 |   * xxxxxxxx 
12 | 
13 |   * xxxxxxxx
14 | 
15 |   * xxxxxxxxx
16 | 
17 |   * xxxxxxxxx
18 | 
19 | 2. xxxxxxxx
20 | 
21 | 
22 | ## Warranty 
23 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
24 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
25 | 
26 | ## Copyright
27 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
28 | 


--------------------------------------------------------------------------------
/Homeworks/Yao_Hong_s_Homework/README.md:
--------------------------------------------------------------------------------
 1 | # Yao Hong's Parallel Computing Homework
 2 | Parallel programming course at Peking University
 3 | 
 4 | ## Contents
 5 | 1. Homeworks and Projects
 6 | 
 7 |   * Parallel programming course report
 8 | 
 9 | 2. Code
10 |   * Homework codes
11 | 
12 | 
13 | ## Warranty 
14 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
15 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
16 | 
17 | ## Copyright
18 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ParallelProgrammingCourse
 2 | Parallel programming course at Peking University
 3 | 
 4 | <center>
 5 | <img src="http://logok.org/wp-content/uploads/2014/04/Peking-University-logo.png" width="50%" height="50%" />
 6 | </center>
 7 |  
 8 |  Yes We Code
 9 | <center>
10 | <img src="https://octodex.github.com/images/baracktocat.jpg" width="50%" height="50%" />
11 | </center>
12 | 
13 | ## Contents
14 | 1. Homeworks and Projects
15 | 
16 |   * Nine times of homeworks, both my homeworks and my partner Shiyao Li's homeworks. These homeworks including
17 |   Pthreads, MPI and CUDA Parallel Programming Interface, and Fortran/C Programming Language.
18 |   
19 |   * Yao Hong's homeworks. [Yao Hong](https://github.com/hong-yao)
20 | 
21 |   * Reference Codes and Training Materials are come from Lawrence Livermore National Laboratory, which written by Blaise Barney. Thanks a lot for the open resource. 
22 | 
23 |   * My first and second CUDA Programs.
24 | 
25 |   * Example codes of my Lectures on HPC to Prof. Shan Tang's group.
26 | 
27 |   * We Gratefully Acknowledge Associate Prof. Hua-shan Yu from School of Electronics Engineering and Computer Science at Peking University for his help both in course and final project.  
28 | 
29 | 2. Reference Material's Programs 
30 |   * Peter Pacheco's Book (An introduction to Parallel Programming)'s Materials. Codes of each chapter.
31 |   * Other materials will be updated in the future. 
32 | 
33 | 3. Reference Papers
34 |   * Ristov S, Prodan R, Gusev M, et al. Superlinear speedup in HPC systems: Why and when?[C]. federated conference on computer science and information systems, 2016: 889-898.
35 | 
36 | ## Warranty 
37 | Maybe, there are many mistakes in the both documents and Codes, because of the limitation of our knowledge and strength. As a result: THESE DOCUMENTS AND CODES ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
38 | I MAKE NO WARRANTIES, EXPRESS OR IMPLIED, THAT THEY ARE FREE OF ERROR.
39 | 
40 | ## Copyright
41 | You can use and copy these works for any academic purpose, Except just copy to finish your homework or republish these works without proper declare their original author.
42 | 


--------------------------------------------------------------------------------
/ReferencePapers/Superlinear Speedup in HPC Systems why and when.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangyang14641/ParallelProgrammingCourse/9d36d2fae1d5a42aa4d8d88ee884182ebc3ccd78/ReferencePapers/Superlinear Speedup in HPC Systems why and when.pdf


--------------------------------------------------------------------------------