├── .DS_Store
├── 2DArray.h
├── README.md
├── README.txt
├── apple.def
├── hello.c
├── linked.c
├── linux_gnu.def
├── linux_intel.def
├── linux_pgi.def
├── make.def
├── makefile
├── mandel.c
├── matmul.c
├── matmul_recur.cpp
├── omp_hands_on.pdf
├── pi.c
├── pi_mc.c
├── prod_cons.c
├── random.c
├── random.h
├── solutions
    ├── hello_par.c
    ├── linked_omp25.c
    ├── linked_omp3_tasks.c
    ├── makefile
    ├── mandel_par.c
    ├── matmul_par.c
    ├── pi_loop.c
    ├── pi_mc.c
    ├── pi_mc_par.c
    ├── pi_spmd_final.c
    ├── pi_spmd_simple.c
    ├── prod_cons_par.c
    ├── random.h
    ├── random_par.c
    └── random_seq_lcg.c
└── win_intel.def


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgmattso/OpenMP_intro_tutorial/56a6b0160e3f5c0a5711658436e3f9657ee57b09/.DS_Store


--------------------------------------------------------------------------------
/2DArray.h:
--------------------------------------------------------------------------------
 1 | template < typename T >
 2 | T **Allocate2DArray( int nRows, int nCols)
 3 | {
 4 |     //(step 1) allocate memory for array of elements of column
 5 |     T **ppi = (T **) malloc(sizeof(T *)*nRows);
 6 | 
 7 |     //(step 2) allocate memory for array of elements of each row
 8 |     T *curPtr = (T *) malloc(sizeof(T) * nRows * nCols);
 9 | 
10 |     // Now point the pointers in the right place
11 |     for( int i = 0; i < nRows; ++i)
12 |     {
13 |         *(ppi + i) = curPtr;
14 |          curPtr += nCols;
15 |     }
16 |     return ppi;
17 | }
18 | 
19 | template < typename T >
20 | void Free2DArray(T** Array)
21 | {
22 |     free(*Array);
23 |     free(Array);
24 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | This directory contains exercises and solutions for a hands-on
 4 | OpenMP course.  Information about these programs can be found
 5 | in the comments and in the slides from the course.
 6 | 
 7 | To use these programs, copy the appropriate "def" file into
 8 | "make.def".  For example on a linux system running the gnu 
 9 | compilers, I'd type
10 | 
11 |   cp linux_gnu.def make.def
12 | 
13 | Then build the programs and test them
14 | 
15 |    make test
16 | 
17 | The solutions directory uses the same make.def file so to build
18 | the solutions, just type "make test".  The directory extras
19 | contains additional exercises for more advanced students.  These
20 | have not been as carefully tested and may have problems building
21 | and running on some systems.
22 | 
23 | We have tested these programs under Linux with the gnu and Intel compilers,
24 | and Windows 7 with the intel compiler. We also tested these
25 | on OS-X with the gnu environment loaded with Apple's xcode.   Apple's
26 | OpenMP environment at this time does not support threadprivate
27 | variables so the pi_mc solutions will not build (and needs to 
28 | be commented out from the makefile).
29 | 
30 | We have used these programs with the PGI compiler (pgi.def) 
31 | but we have not tested this case recently and it may need some work.
32 | 
33 | For windows users, to run these on Windows 7 we used the following
34 | procedure.  First go to the start menu, and select 
35 |    
36 |     INtel parallel studio 2011/command prompt/ ia64 visual studio 2010 mode
37 | 
38 | cd to the appropriate folder.  Copy win_intel.def to make.def and then
39 | use nmake to build
40 | 
41 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | This directory contains exercises and solutions for a hands-on
 4 | OpenMP course.  Information about these programs can be found
 5 | in the comments and in the slides from the course.
 6 | 
 7 | To use these programs, copy the appropriate "def" file into
 8 | "make.def".  For example on a linux system running the gnu 
 9 | compilers, I'd type
10 | 
11 |   cp linux_gnu.def make.def
12 | 
13 | Then build the programs and test them
14 | 
15 |    make test
16 | 
17 | The solutions directory uses the same make.def file so to build
18 | the solutions, just type "make test".  The directory extras
19 | contains additional exercises for more advanced students.  These
20 | have not been as carefully tested and may have problems building
21 | and running on some systems.
22 | 
23 | We have tested these programs under Linux with the gnu and Intel compilers,
24 | and Windows 7 with the intel compiler. We also tested these
25 | on OS-X with the gnu environment loaded with Apple's xcode.   Apple's
26 | OpenMP environment at this time does not support threadprivate
27 | variables so the pi_mc solutions will not build (and needs to 
28 | be commented out from the makefile).
29 | 
30 | We have used these programs with the PGI compiler (pgi.def) 
31 | but we have not tested this case recently and it may need some work.
32 | 
33 | For windows users, to run these on Windows 7 we used the following
34 | procedure.  First go to the start menu, and select 
35 |    
36 |     INtel parallel studio 2011/command prompt/ ia64 visual studio 2010 mode
37 | 
38 | cd to the appropriate folder.  Copy win_intel.def to make.def and then
39 | use nmake to build
40 | 
41 | 


--------------------------------------------------------------------------------
/apple.def:
--------------------------------------------------------------------------------
 1 | # for g++ compiler on Apple OS-X
 2 | # copy to make.def
 3 | CC          = g++
 4 | CLINKER     = $(CC)
 5 | OPTFLAGS    = -fopenmp -DAPPLE
 6 | LIBS        = -lm
 7 | PRE         = ./
 8 | 
 9 | CFLAGS	  = $(OPTFLAGS)
10 | 
11 | OBJ=o
12 | EXE=
13 | RM=rm
14 | 


--------------------------------------------------------------------------------
/hello.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | int main ()  
3 | {
4 |   printf("Hello World \n");
5 | }
6 | 


--------------------------------------------------------------------------------
/linked.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <omp.h>
 4 | 
 5 | #ifndef N
 6 | #define N 5
 7 | #endif
 8 | #ifndef FS
 9 | #define FS 38
10 | #endif
11 | 
12 | struct node {
13 |    int data;
14 |    int fibdata;
15 |    struct node* next;
16 | };
17 | 
18 | int fib(int n) {
19 |    int x, y;
20 |    if (n < 2) {
21 |       return (n);
22 |    } else {
23 |       x = fib(n - 1);
24 |       y = fib(n - 2);
25 | 	  return (x + y);
26 |    }
27 | }
28 | 
29 | void processwork(struct node* p) 
30 | {
31 |    int n;
32 |    n = p->data;
33 |    p->fibdata = fib(n);
34 | }
35 | 
36 | struct node* init_list(struct node* p) {
37 |     int i;
38 |     struct node* head = NULL;
39 |     struct node* temp = NULL;
40 |     
41 |     head = (struct node*)malloc(sizeof(struct node));
42 |     p = head;
43 |     p->data = FS;
44 |     p->fibdata = 0;
45 |     for (i=0; i< N; i++) {
46 |        temp  =  (struct node*)malloc(sizeof(struct node));
47 |        p->next = temp;
48 |        p = temp;
49 |        p->data = FS + i + 1;
50 |        p->fibdata = i+1;
51 |     }
52 |     p->next = NULL;
53 |     return head;
54 | }
55 | 
56 | int main(int argc, char *argv[]) {
57 |      double start, end;
58 |      struct node *p=NULL;
59 |      struct node *temp=NULL;
60 |      struct node *head=NULL;
61 |      
62 | 	 printf("Process linked list\n");
63 |      printf("  Each linked list node will be processed by function 'processwork()'\n");
64 |      printf("  Each ll node will compute %d fibonacci numbers beginning with %d\n",N,FS);      
65 |  
66 |      p = init_list(p);
67 |      head = p;
68 | 
69 |      start = omp_get_wtime();
70 |      {
71 |         while (p != NULL) {
72 | 		   processwork(p);
73 | 		   p = p->next;
74 |         }
75 |      }
76 | 
77 |      end = omp_get_wtime();
78 |      p = head;
79 | 	 while (p != NULL) {
80 |         printf("%d : %d\n",p->data, p->fibdata);
81 |         temp = p->next;
82 |         free (p);
83 |         p = temp;
84 |      }  
85 | 	 free (p);
86 | 
87 |      printf("Compute Time: %f seconds\n", end - start);
88 | 
89 |      return 0;
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/linux_gnu.def:
--------------------------------------------------------------------------------
 1 | # for g++ compiler on Linux
 2 | # copy to make.def
 3 | CC          = g++
 4 | CLINKER     = $(CC)
 5 | OPTFLAGS    = -fopenmp 
 6 | LIBS        = -lm
 7 | PRE         = ./
 8 | 
 9 | CFLAGS	  = $(OPTFLAGS)
10 | 
11 | OBJ=o
12 | EXE=
13 | RM=rm
14 | 


--------------------------------------------------------------------------------
/linux_intel.def:
--------------------------------------------------------------------------------
 1 | # for intel compiler on linux (most exercises work with icc as well)
 2 | # copy to make.def
 3 | CC         = icpc
 4 | CPP         = $(CC)
 5 | OPTFLAGS   = -openmp
 6 | 
 7 | CLINKER     = $(CC)
 8 | LIBS        = -lm
 9 | 
10 | CFLAGS    = $(OPTFLAGS)
11 | PRE= ./
12 | OBJ=o
13 | EXE=
14 | RM=rm
15 | 
16 | 


--------------------------------------------------------------------------------
/linux_pgi.def:
--------------------------------------------------------------------------------
 1 | CC          = pgcc
 2 | CLINKER     = $(CC)
 3 | OPTFLAGS    = -mp -fast 
 4 | LIBS        =
 5 | 
 6 | CFLAGS	  = $(OPTFLAGS) 
 7 | 
 8 | OBJ=o
 9 | EXE=.exe
10 | RM=rm 
11 | 


--------------------------------------------------------------------------------
/make.def:
--------------------------------------------------------------------------------
 1 | # for g++ compiler on Apple OS-X
 2 | # copy to make.def
 3 | CC          = g++
 4 | CLINKER     = $(CC)
 5 | OPTFLAGS    = -fopenmp -DAPPLE
 6 | LIBS        = -lm
 7 | PRE         = ./
 8 | 
 9 | CFLAGS	  = $(OPTFLAGS)
10 | 
11 | OBJ=o
12 | EXE=
13 | RM=rm
14 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | # 
 2 | 
 3 | include make.def
 4 | 
 5 | EXES=hello$(EXE) pi$(EXE) matmul$(EXE) pi_mc$(EXE) prod_cons$(EXE) \
 6 |      matmul_recur$(EXE) mandel$(EXE) linked$(EXE)
 7 | 
 8 | all: $(EXES)
 9 | 
10 | hello$(EXE): hello.$(OBJ) 
11 | 	$(CLINKER) $(OPTFLAGS) -o hello hello.$(OBJ) $(LIBS)
12 | 
13 | pi$(EXE): pi.$(OBJ) 
14 | 	$(CLINKER) $(OPTFLAGS) -o pi pi.$(OBJ) $(LIBS)
15 | 
16 | pi_mc$(EXE): pi_mc.$(OBJ) random.$(OBJ)
17 | 	$(CLINKER) $(OPTFLAGS) -o pi_mc random.$(OBJ) pi_mc.$(OBJ) $(LIBS)
18 | 
19 | matmul$(EXE): matmul.$(OBJ) 
20 | 	$(CLINKER) $(OPTFLAGS) -o matmul matmul.$(OBJ) $(LIBS)
21 | 
22 | prod_cons$(EXE): prod_cons.$(OBJ)
23 | 	$(CLINKER) $(OPTFLAGS) -o prod_cons prod_cons.$(OBJ) $(LIBS)
24 | 
25 | matmul_recur$(EXE):  matmul_recur.$(OBJ) 
26 | 	$(CLINKER) $(OPTFLAGS) -o matmul_recur matmul_recur.$(OBJ) $(LIBS)
27 | 
28 | mandel$(EXE):  mandel.$(OBJ) 
29 | 	$(CLINKER) $(OPTFLAGS) -o mandel mandel.$(OBJ) $(LIBS)
30 | 
31 | linked$(EXE):  linked.$(OBJ) 
32 | 	$(CLINKER) $(OPTFLAGS) -o linked linked.$(OBJ) $(LIBS)
33 | 
34 | test: $(EXES)
35 | 	$(PRE)hello$(EXE) 
36 | 	$(PRE)pi$(EXE) 
37 | 	$(PRE)matmul$(EXE) 
38 | 	$(PRE)pi_mc$(EXE) 
39 | 	$(PRE)prod_cons$(EXE) 
40 | 	$(PRE)matmul_recur$(EXE) 
41 | 	$(PRE)mandel$(EXE)
42 | 	$(PRE)linked$(EXE)
43 | 
44 | clean:
45 | 	$(RM) $(EXES) *.$(OBJ)
46 | 
47 | .SUFFIXES:
48 | .SUFFIXES: .c .cpp .$(OBJ)
49 | 
50 | .c.$(OBJ):
51 | 	$(CC) $(CFLAGS) -c $<
52 | 
53 | .cpp.$(OBJ):
54 | 	$(CC) $(CFLAGS) -c $<
55 | 


--------------------------------------------------------------------------------
/mandel.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | **  PROGRAM: Mandelbrot area
 3 | **
 4 | **  PURPOSE: Program to compute the area of a  Mandelbrot set.
 5 | **           Correct answer should be around 1.510659.
 6 | **           WARNING: this program may contain errors
 7 | **
 8 | **  USAGE:   Program runs without input ... just run the executable
 9 | **            
10 | **  HISTORY: Written:  (Mark Bull, August 2011).
11 | **           Changed "comples" to "d_comples" to avoid collsion with 
12 | **           math.h complex type (Tim Mattson, September 2011)
13 | */
14 | 
15 | #include <stdio.h>
16 | #include <stdlib.h>
17 | #include <math.h>
18 | #include <omp.h>
19 | 
20 | # define NPOINTS 1000
21 | # define MAXITER 1000
22 | 
23 | void testpoint(void);
24 | 
25 | struct d_complex{
26 |    double r;
27 |    double i;
28 | };
29 | 
30 | struct d_complex c;
31 | int numoutside = 0;
32 | 
33 | int main(){
34 |    int i, j;
35 |    double area, error, eps  = 1.0e-5;
36 | 
37 | 
38 | //   Loop over grid of points in the complex plane which contains the Mandelbrot set,
39 | //   testing each point to see whether it is inside or outside the set.
40 | 
41 | #pragma omp parallel for default(shared) private(c,eps)
42 |    for (i=0; i<NPOINTS; i++) {
43 |      for (j=0; j<NPOINTS; j++) {
44 |        c.r = -2.0+2.5*(double)(i)/(double)(NPOINTS)+eps;
45 |        c.i = 1.125*(double)(j)/(double)(NPOINTS)+eps;
46 |        testpoint();
47 |      }
48 |    }
49 | 
50 | // Calculate area of set and error estimate and output the results
51 |    
52 | area=2.0*2.5*1.125*(double)(NPOINTS*NPOINTS-numoutside)/(double)(NPOINTS*NPOINTS);
53 |    error=area/(double)NPOINTS;
54 | 
55 |    printf("Area of Mandlebrot set = %12.8f +/- %12.8f\n",area,error);
56 |    printf("Correct answer should be around 1.510659\n");
57 | 
58 | }
59 | 
60 | void testpoint(void){
61 | 
62 | // Does the iteration z=z*z+c, until |z| > 2 when point is known to be outside set
63 | // If loop count reaches MAXITER, point is considered to be inside the set
64 | 
65 |        struct d_complex z;
66 |        int iter;
67 |        double temp;
68 | 
69 |        z=c;
70 |        for (iter=0; iter<MAXITER; iter++){
71 |          temp = (z.r*z.r)-(z.i*z.i)+c.r;
72 |          z.i = z.r*z.i*2+c.i;
73 |          z.r = temp;
74 |          if ((z.r*z.r+z.i*z.i)>4.0) {
75 |            numoutside++;
76 |            break;
77 |          }
78 |        }
79 | 
80 | }
81 | 
82 | 


--------------------------------------------------------------------------------
/matmul.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | **  PROGRAM: Matrix Multiply
  3 | **
  4 | **  PURPOSE: This is a simple matrix multiply program. 
  5 | **           It will compute the product
  6 | **
  7 | **                C  = A * B
  8 | **
  9 | **           A and B are set to constant matrices so we
 10 | **           can make a quick test of the multiplication.
 11 | **
 12 | **  USAGE:   Right now, I hardwire the martix dimensions. 
 13 | **           later, I'll take them from the command line.
 14 | **
 15 | **  HISTORY: Written by Tim Mattson, Nov 1999.
 16 | */
 17 | #ifdef APPLE
 18 | #include <stdlib.h>
 19 | #else
 20 | #include <malloc.h>
 21 | #endif
 22 | #include <stdio.h>
 23 | #include <omp.h>
 24 | 
 25 | #define ORDER 1000
 26 | #define AVAL 3.0
 27 | #define BVAL 5.0
 28 | #define TOL  0.001
 29 | 
 30 | int main(int argc, char **argv)
 31 | {
 32 | 	int Ndim, Pdim, Mdim;   /* A[N][P], B[P][M], C[N][M] */
 33 | 	int i,j,k;
 34 | 	double *A, *B, *C, cval, tmp, err, errsq;
 35 |       double dN, mflops;
 36 | 	double start_time, run_time;
 37 | 
 38 | 
 39 | 	Ndim = ORDER;
 40 | 	Pdim = ORDER;
 41 | 	Mdim = ORDER;
 42 | 
 43 |    	A = (double *)malloc(Ndim*Pdim*sizeof(double));
 44 |       B = (double *)malloc(Pdim*Mdim*sizeof(double));
 45 |       C = (double *)malloc(Ndim*Mdim*sizeof(double));
 46 | 
 47 | 	/* Initialize matrices */
 48 | 
 49 | 	for (i=0; i<Ndim; i++)
 50 | 		for (j=0; j<Pdim; j++)
 51 | 			*(A+(i*Ndim+j)) = AVAL;
 52 | 
 53 | 	for (i=0; i<Pdim; i++)
 54 | 		for (j=0; j<Mdim; j++)
 55 | 			*(B+(i*Pdim+j)) = BVAL;
 56 | 
 57 | 	for (i=0; i<Ndim; i++)
 58 | 		for (j=0; j<Mdim; j++)
 59 | 			*(C+(i*Ndim+j)) = 0.0;
 60 | 
 61 | 	/* Do the matrix product */
 62 | 
 63 | 	start_time = omp_get_wtime(); 
 64 | 	for (i=0; i<Ndim; i++){
 65 | 		for (j=0; j<Mdim; j++){
 66 | 			tmp = 0.0;
 67 | 			for(k=0;k<Pdim;k++){
 68 | 				/* C(i,j) = sum(over k) A(i,k) * B(k,j) */
 69 | 				tmp += *(A+(i*Ndim+k)) *  *(B+(k*Pdim+j));
 70 | 			}
 71 | 			*(C+(i*Ndim+j)) = tmp;
 72 | 		}
 73 | 	}
 74 | 	/* Check the answer */
 75 | 
 76 | 	run_time = omp_get_wtime() - start_time;
 77 |  
 78 | 	printf(" Order %d multiplication in %f seconds \n", ORDER, run_time);
 79 | 
 80 |       dN = (double)ORDER;
 81 |       mflops = 2.0 * dN * dN * dN/(1000000.0* run_time);
 82 |  
 83 | 	printf(" Order %d multiplication at %f mflops\n", ORDER, mflops);
 84 | 
 85 | 	cval = Pdim * AVAL * BVAL;
 86 | 	errsq = 0.0;
 87 | 	for (i=0; i<Ndim; i++){
 88 | 		for (j=0; j<Mdim; j++){
 89 | 			err = *(C+i*Ndim+j) - cval;
 90 | 		    errsq += err * err;
 91 | 		}
 92 | 	}
 93 | 
 94 | 	if (errsq > TOL) 
 95 | 		printf("\n Errors in multiplication: %f",errsq);
 96 | 	else
 97 | 		printf("\n Hey, it worked");
 98 | 
 99 | 	printf("\n all done \n");
100 | }
101 | 


--------------------------------------------------------------------------------
/matmul_recur.cpp:
--------------------------------------------------------------------------------
  1 | // Several versions of serial codes for matrix-matrix multiplication
  2 | 
  3 | #include <stdio.h>   
  4 | #include <stdlib.h> 
  5 | #include <omp.h>
  6 | #include "2DArray.h"
  7 | 
  8 | // define sizes of matrices to be used
  9 | #define MM 1000
 10 | #define NN 1000
 11 | #define PP 1000
 12 | 
 13 | double dabs(double d){return (d<0.0?d:(-d));}
 14 | 
 15 | // Default triple-nested loop for matrix-matrix multiplication
 16 | void matmult1(int m, int n, int p, double **A, double **B, double **C)   
 17 | {   
 18 | 	int i, j, k; 
 19 | 
 20 | 	for (i = 0; i < m; i++)   
 21 | 		for (j = 0; j < n; j++){          
 22 | 			C[i][j]=0;   
 23 | 			for (k = 0; k < p; k++)   
 24 | 				C[i][j] += A[i][k]*B[k][j];   
 25 | 		}   
 26 | }  
 27 | 
 28 | 
 29 | /*   
 30 |   Recursive code for matrix multiplication.   
 31 |   The recursion uses the formula  
 32 |   C00 = A00*B00 + A01*B10  
 33 |   C01 = A00*B01 + B01*B11  
 34 |   C10 = A10*B00 + A11*B10  
 35 |   C11 = A10*B01 + A11*B11  
 36 | */  
 37 |   
 38 | void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)    
 39 | /*  
 40 |   subroutine that uses the simple triple loop to multiply  
 41 |   a submatrix from A with a submatrix from B and store the  
 42 |   result in a submatrix of C.   
 43 |   (We could use a tiled version,for better performance)  
 44 | */  
 45 | // mf, ml; /* first and last+1 i index */  
 46 | // nf, nl; /* first and last+1 j index */  
 47 | // pf, pl; /* first and last+1 k index */  
 48 | {
 49 | 	int i,j,k;   
 50 | 	for (i = mf; i < ml; i++)   
 51 | 		for (j = nf; j < nl; j++)   
 52 | 			for (k = pf; k < pl; k++)   
 53 | 				C[i][j] += A[i][k]*B[k][j];   
 54 | }   
 55 |   
 56 | #define GRAIN  32768 /* product size below which matmultleaf is used */  
 57 |   
 58 | void matmultrec(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)
 59 | /*    
 60 |   recursive subroutine to compute the product of two  
 61 |   submatrices of A and B and store the result in C  
 62 | */  
 63 | // mf, ml; /* first and last+1 i index */  
 64 | // nf, nl; /* first and last+1 j index */  
 65 | // pf, pl; /* first and last+1 k index */  
 66 | 
 67 | {     
 68 | //
 69 | // Check sizes of matrices; 
 70 | // if below threshold then compute product w/o recursion
 71 | //
 72 | 	if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN)   
 73 | 		matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C);   
 74 | 	else {   
 75 | //
 76 | // Apply OpenMP tasks to the eight recursive calls below
 77 | //   be sure to not create data races between tasks
 78 | //
 79 |    // C00 += A00 * B00
 80 | 		matmultrec(mf, mf+(ml-mf)/2, nf, nf+(nl-nf)/2, pf, pf+(pl-pf)/2, A, B, C);   
 81 |    // C01 += A00 * B01
 82 | 		matmultrec(mf, mf+(ml-mf)/2, nf+(nl-nf)/2, nl, pf, pf+(pl-pf)/2, A, B, C);   
 83 |    // C00 += A01 * B10
 84 | 		matmultrec(mf, mf+(ml-mf)/2, nf, nf+(nl-nf)/2, pf+(pl-pf)/2, pl, A, B, C);   
 85 |    // C01 += A01 * B11
 86 | 		matmultrec(mf, mf+(ml-mf)/2, nf+(nl-nf)/2, nl, pf+(pl-pf)/2, pl, A, B, C);   
 87 |    // C10 += A10 * B00
 88 | 		matmultrec(mf+(ml-mf)/2, ml, nf, nf+(nl-nf)/2, pf, pf+(pl-pf)/2, A, B, C);   
 89 |    // C11 += A10 * B01
 90 | 		matmultrec(mf+(ml-mf)/2, ml, nf+(nl-nf)/2, nl, pf, pf+(pl-pf)/2, A, B, C);   
 91 |    // C10 += A11 * B10
 92 | 		matmultrec(mf+(ml-mf)/2, ml, nf, nf+(nl-nf)/2, pf+(pl-pf)/2, pl, A, B, C);   
 93 |    // C11 += A11 * B11
 94 | 		matmultrec(mf+(ml-mf)/2, ml, nf+(nl-nf)/2, nl, pf+(pl-pf)/2, pl, A, B, C);   
 95 | 	}   
 96 | }   
 97 |               
 98 | 
 99 | //
100 | //  "Helper" function to intialize C and start recursive routine
101 | //
102 | void matmultr(int m, int n, int p, double **A, double **B, double **C)
103 | {   
104 | 	int i,j; 
105 | 
106 | 	for (i = 0; i < m; i++)   
107 | 		for (j=0; j < n; j++)   
108 | 			C[i][j] = 0;   
109 | 
110 | 	matmultrec(0, m, 0, n, 0, p, A, B, C);   
111 | }  
112 | 
113 | int CheckResults(int m, int n, double **C, double **C1)
114 | {
115 | #define ERR_THRESHOLD 0.001
116 |    int code = 0;
117 | //
118 | //  May need to take into consideration the floating point roundoff error
119 | //    due to parallel execution
120 | //
121 |   for (int i = 0; i < m; i++) {
122 |     for (int j = 0; j < n; j++) {
123 |       if (dabs(C[i][j] - C1[i][j]) > ERR_THRESHOLD ) {
124 |         printf("%f  %f at [%d][%d]\n", C[i][j], C1[i][j], i, j);
125 |         code = 1;
126 |       }
127 |     }
128 |   }
129 |   return code;
130 | }
131 | 
132 |   
133 | int main(int argc, char* argv[])   
134 | {      
135 | 	int i, j; 
136 | 	double start, time1, time2;
137 | 
138 |    int M = MM;
139 |    int N = NN;
140 |    int P = PP;
141 |  
142 | //
143 | // If 3 values on command line, use those for matrix sizes
144 | //
145 |    if (argc != 4) {
146 |       printf("Suggested Usage: %s <M> <N> <P> \n", argv[0]);
147 |       printf("Using default values\n");
148 |    }
149 |    else {
150 |       M = atoi(argv[1]);
151 |       N = atoi(argv[2]);
152 |       P = atoi(argv[3]);
153 |    }
154 | 
155 | 	double  **A = Allocate2DArray< double >(M, P);
156 | 	double  **B = Allocate2DArray< double >(P, N);
157 | 
158 | 	double **C1 = Allocate2DArray< double >(M, N);
159 | 	double **C4 = Allocate2DArray< double >(M, N);
160 | 
161 | //
162 | // Initialize with random values
163 | //
164 | 	for (i = 0; i < M; i++) {   
165 | 		for (j = 0; j < P; j++) {   
166 | 			A[i][j] = (double)(rand()%100) / 10.0;   
167 | 		}      
168 | 	}   
169 | 
170 | 	for (i = 0; i < P; i++) {   
171 | 		for (j = 0; j < N; j++) {   
172 | 			B[i][j] = (double)(rand()%100) / 10.0;   
173 | 		}      
174 | 	}   
175 | 
176 |    printf("Matrix Dimensions: M = %d  P = %d  N = %d\n\n", M, P, N);
177 | 	printf("Execute matmult1\n");
178 | 	start = omp_get_wtime();
179 | 	matmult1(M, N, P, A, B, C1);
180 | 	time1 = omp_get_wtime() - start;
181 | 	printf("Time = %f seconds\n\n",time1);
182 | 
183 | 	printf("Execute matmultr\n");
184 | 	start = omp_get_wtime();
185 | 	matmultr(M, N, P, A, B, C4);
186 | 	time2 = omp_get_wtime() - start;
187 | 	printf("Time = %f seconds\n\n",time2);
188 | 
189 |    printf("Checking...");
190 |    if (CheckResults(M, N, C1, C4))
191 |      printf("Error in Recursive Matrix Multiplication\n\n");
192 |    else {
193 |      printf("OKAY\n\n");
194 |      printf("Speedup = %5.1fX\n", time1/time2);
195 |    }
196 | 
197 | 
198 | 	Free2DArray< double >(A);
199 | 	Free2DArray< double >(B);
200 | 	Free2DArray< double >(C1);
201 | 	Free2DArray< double >(C4);
202 | 
203 | 	return 0;   
204 | }  
205 | 


--------------------------------------------------------------------------------
/omp_hands_on.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tgmattso/OpenMP_intro_tutorial/56a6b0160e3f5c0a5711658436e3f9657ee57b09/omp_hands_on.pdf


--------------------------------------------------------------------------------
/pi.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | This program will numerically compute the integral of
 4 | 
 5 |                   4/(1+x*x) 
 6 | 				  
 7 | from 0 to 1.  The value of this integral is pi -- which 
 8 | is great since it gives us an easy way to check the answer.
 9 | 
10 | The is the original sequential program.  It uses the timer
11 | from the OpenMP runtime library
12 | 
13 | History: Written by Tim Mattson, 11/99.
14 | 
15 | */
16 | #include <stdio.h>
17 | #include <omp.h>
18 | static long num_steps = 100000000;
19 | double step;
20 | int main ()
21 | {
22 | 	  int i;
23 | 	  double x, pi, sum = 0.0;
24 | 	  double start_time, run_time;
25 | 
26 | 	  step = 1.0/(double) num_steps;
27 | 
28 |         	 
29 | 	  start_time = omp_get_wtime();
30 | 
31 | 	  for (i=1;i<= num_steps; i++){
32 | 		  x = (i-0.5)*step;
33 | 		  sum = sum + 4.0/(1.0+x*x);
34 | 	  }
35 | 
36 | 	  pi = step * sum;
37 | 	  run_time = omp_get_wtime() - start_time;
38 | 	  printf("\n pi with %ld steps is %lf in %lf seconds\n ",num_steps,pi,run_time);
39 | }	  
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/pi_mc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | NAME:
  4 |    Pi_mc:  PI Monte Carlo
  5 | 
  6 | Purpose:
  7 |    This program uses a Monte Carlo algorithm to compute PI as an
  8 |    example of how random number generators are used to solve problems.
  9 |    Note that if your goal is to find digits of pi, there are much 
 10 |    better algorithms you could use.
 11 | 
 12 | Usage:
 13 |    To keep the program as simple as possible, you must edit the file
 14 |    and change the value of num_trials to change the number of samples
 15 |    used.  Then compile and run the program.
 16 | 
 17 | Algorithm:
 18 |    The basic idea behind the algorithm is easy to visualize.  Draw a 
 19 |    square on a wall.  Inside the square, draw a circle.  Now randomly throw 
 20 |    darts at the wall.  some darts will land inside the square.  Of those, 
 21 |    some will fall inside the circle.   The probability of landing inside
 22 |    the circle or the square is proportional to their areas.
 23 | 
 24 |    We can use a random number generator to "throw the darts" and count
 25 |    how many "darts" fall inside the square and how many inside the 
 26 |    cicle.  Dividing these two numbers gives us the ratio of their areas
 27 |    and from that we can compute pi.
 28 | 
 29 | Algorithm details:
 30 |    To turn this into code, I need a bit more detail.  Assume the circle
 31 |    is centered inside the square.  the circle will have a radius of r and 
 32 |    each side of the square will be of area 2*r (i.e. the diameter of the
 33 |    circle).  
 34 | 
 35 |        A(circle) = pi * r^2
 36 |        A(square) = (2*r)*(2*r) = 4*r^2
 37 | 
 38 |        ratio = A(circle)/A(square) = pi/4
 39 | 
 40 |    Since the probability (P) of a dart falling inside a figure (i.e. the square 
 41 |    or the circle) is proportional to the area, we have
 42 | 
 43 |        ratio = P(circle)/P(square) = pi/4
 44 | 
 45 |    If I throw N darts as computed by random numbers evenly distributed 
 46 |    over the area of the square
 47 | 
 48 |       P(sqaure) = N/N    .... i.e. every dart lands in the square
 49 |       P(circle) = N(circle)/N
 50 | 
 51 |       ratio = (N(circle)/N)/(N/N)  = N(circle)/N
 52 | 
 53 |    Hence, to find the area, I compute N random "darts" and count how many fall
 54 |    inside the circle.  The equation for a circle is
 55 | 
 56 |       x^2 + y^2 = r^2 
 57 | 
 58 |    So I randomly compute "x" and "y" evenly distributed from -r to r and 
 59 |    count the "dart" as falling inside the cicle if
 60 | 
 61 |       x^2 + y^2 < or = r
 62 | 
 63 | Results:  
 64 |    Remember, our goal is to demonstrate a simple monte carlo algorithm, 
 65 |    not compute pi.  But just for the record, here are some results (Intel compiler
 66 |    version 10.0, Windows XP, core duo laptop)
 67 | 
 68 |        100        3.160000
 69 |        1000       3.148000
 70 |        10000      3.154000
 71 |        100000     3.139920
 72 |        1000000    3.141456
 73 |        10000000   3.141590
 74 |        100000000  3.141581
 75 | 
 76 |    As a point of reference, the first 7 digits of the true value of pi 
 77 |    is 3.141592 
 78 | 
 79 | 
 80 | History: 
 81 |    Written by Tim Mattson, 9/2007.
 82 | 
 83 | */
 84 | #include <stdio.h>
 85 | #include <omp.h>
 86 | #include "random.h"
 87 | 
 88 | // 
 89 | // The monte carlo pi program
 90 | //
 91 | 
 92 | static long num_trials = 10000;
 93 | 
 94 | int main ()
 95 | {
 96 |    long i;  long Ncirc = 0;
 97 |    double pi, x, y, test;
 98 |    double r = 1.0;   // radius of circle. Side of squrare is 2*r 
 99 | 
100 |    seed(-r, r);  // The circle and square are centered at the origin
101 | #pragma omp parallel for private(x,y,test) reduction(+:Ncirc)
102 |    for(i=0;i<num_trials; i++)
103 |    {
104 |       x = drandom(); 
105 |       y = drandom();
106 | 
107 |       test = x*x + y*y;
108 | 
109 |       if (test <= r*r) Ncirc++;
110 |     }
111 | 
112 |     pi = 4.0 * ((double)Ncirc/(double)num_trials);
113 | 
114 |     printf("\n %ld trials, pi is %lf \n",num_trials, pi);
115 | 
116 |     return 0;
117 | }
118 | 	  
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/prod_cons.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | **  PROGRAM: A simple serial producer/consumer program
 3 | **
 4 | **  One function generates (i.e. produces) an array of random values.  
 5 | **  A second functions consumes that array and sums it.
 6 | **
 7 | **  HISTORY: Written by Tim Mattson, April 2007.
 8 | */
 9 | #include <omp.h>
10 | #ifdef APPLE
11 | #include <stdlib.h>
12 | #else
13 | #include <malloc.h>
14 | #endif
15 | #include <stdio.h>
16 | 
17 | #define N        10000
18 | 
19 | /* Some random number constants from numerical recipies */
20 | #define SEED       2531
21 | #define RAND_MULT  1366
22 | #define RAND_ADD   150889
23 | #define RAND_MOD   714025
24 | int randy = SEED;
25 | 
26 | /* function to fill an array with random numbers */
27 | void fill_rand(int length, double *a)
28 | {
29 |    int i; 
30 |    for (i=0;i<length;i++) {
31 |      randy = (RAND_MULT * randy + RAND_ADD) % RAND_MOD;
32 |      *(a+i) = ((double) randy)/((double) RAND_MOD);
33 |    }   
34 | }
35 | 
36 | /* function to sum the elements of an array */
37 | double Sum_array(int length, double *a)
38 | {
39 |    int i;  double sum = 0.0;
40 |    for (i=0;i<length;i++)  sum += *(a+i);  
41 |    return sum; 
42 | }
43 |   
44 | int main()
45 | {
46 |   double *A, sum, runtime;
47 |   int flag = 0;
48 | 
49 |   A = (double *)malloc(N*sizeof(double));
50 | 
51 |   runtime = omp_get_wtime();
52 | 
53 |   fill_rand(N, A);        // Producer: fill an array of data
54 | 
55 |   sum = Sum_array(N, A);  // Consumer: sum the array
56 |    
57 |   runtime = omp_get_wtime() - runtime;
58 | 
59 |   printf(" In %f seconds, The sum is %f \n",runtime,sum);
60 | }
61 |  
62 | 


--------------------------------------------------------------------------------
/random.c:
--------------------------------------------------------------------------------
 1 | 
 2 | //**********************************************************
 3 | // Pseudo random number generator:
 4 | //     double random
 5 | //     void seed (lower_limit, higher_limit)
 6 | //**********************************************************
 7 | //
 8 | // A simple linear congruential random number generator
 9 | // (Numerical Recipies chapter 7, 1st ed.) with parameters
10 | // from the table on page 198j.
11 | //
12 | //  Uses a linear congruential generator to return a value between
13 | //  0 and 1, then scales and shifts it to fill the desired range.  This
14 | //  range is set when the random number generator seed is called.
15 | // 
16 | // USAGE:
17 | //
18 | //      pseudo random sequence is seeded with a range
19 | //
20 | //            void seed(lower_limit, higher_limit)
21 | //   
22 | //      and then subsequent calls to the random number generator generates values
23 | //      in the sequence:
24 | //
25 | //            double drandom()
26 | //
27 | // History: 
28 | //      Written by Tim Mattson, 9/2007.
29 | //      changed to drandom() to avoid collision with standard libraries, 11/2011
30 | 
31 | static long MULTIPLIER  = 1366;
32 | static long ADDEND      = 150889;
33 | static long PMOD        = 714025;
34 | long random_last = 0;
35 | double random_low, random_hi;
36 | 
37 | double drandom()
38 | {
39 |     long random_next;
40 |     double ret_val;
41 | 
42 | // 
43 | // compute an integer random number from zero to mod
44 | //
45 |     random_next = (MULTIPLIER  * random_last + ADDEND)% PMOD;
46 |     random_last = random_next;
47 | 
48 | //
49 | // shift into preset range
50 | //
51 |     ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low;
52 |     return ret_val;
53 | }
54 | //
55 | // set the seed and the range
56 | //
57 | void seed(double low_in, double hi_in)
58 | {
59 |    if(low_in < hi_in)
60 |    { 
61 |       random_low = low_in;
62 |       random_hi  = hi_in;
63 |    }
64 |    else
65 |    {
66 |       random_low = hi_in;
67 |       random_hi  = low_in;
68 |    }
69 |    random_last = PMOD/ADDEND;  // just pick something
70 | 
71 | }
72 | //**********************************************************
73 | // end of pseudo random generator code.
74 | //**********************************************************
75 | 
76 | 


--------------------------------------------------------------------------------
/random.h:
--------------------------------------------------------------------------------
1 | double drandom();
2 | void seed(double low_in, double hi_in);
3 | 
4 | 


--------------------------------------------------------------------------------
/solutions/hello_par.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <omp.h>
 3 |  
 4 | int main ()  
 5 | {
 6 |    int nthreads = 4;
 7 |    omp_set_num_threads(nthreads);
 8 | 
 9 |    #pragma omp parallel
10 |    {
11 |       int id = omp_get_thread_num();
12 | 
13 |       printf("Hello World from thread = %d", id);
14 |       printf(" with %d threads\n",omp_get_num_threads());
15 |    }  
16 | 
17 |    printf("all done, with hopefully %d threads\n",nthreads);
18 | 
19 | }
20 | 
21 |  
22 | 
23 | 


--------------------------------------------------------------------------------
/solutions/linked_omp25.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include "omp.h"
  4 | 
  5 | #define N 5
  6 | #define FS 38
  7 | #define NMAX 10
  8 | 
  9 | struct node {
 10 |    int data;
 11 |    int fibdata;
 12 |    struct node* next;
 13 | };
 14 | 
 15 | int fib(int n) {
 16 |    int x, y;
 17 |    if (n < 2) {
 18 |       return (n);
 19 |    } else {
 20 |       x = fib(n - 1);
 21 |       y = fib(n - 2);
 22 | 	  return (x + y);
 23 |    }
 24 | }
 25 | 
 26 | void processwork(struct node* p) 
 27 | {
 28 |    int n;
 29 |    n = p->data;
 30 |    p->fibdata = fib(n);
 31 | }
 32 | 
 33 | struct node* init_list(struct node* p) {
 34 |     int i;
 35 |     struct node* head = NULL;
 36 |     struct node* temp = NULL;
 37 |     
 38 |     head = malloc(sizeof(struct node));
 39 |     p = head;
 40 |     p->data = FS;
 41 |     p->fibdata = 0;
 42 |     for (i=0; i< N; i++) {
 43 |        temp  = malloc(sizeof(struct node));
 44 |        p->next = temp;
 45 |        p = temp;
 46 |        p->data = FS + i + 1;
 47 |        p->fibdata = i+1;
 48 |     }
 49 |     p->next = NULL;
 50 |     return head;
 51 | }
 52 | 
 53 | int main(int argc, char *argv[]) {
 54 |      double start, end;
 55 |      struct node *p=NULL;
 56 |      struct node *temp=NULL;
 57 |      struct node *head=NULL;
 58 |      struct node *parr[NMAX]; 
 59 |      int i, count=0;
 60 |      
 61 |      printf("Process linked list\n");
 62 |      printf("  Each linked list node will be processed by function 'processwork()'\n");
 63 |      printf("  Each ll node will compute %d fibonacci numbers beginning with %d\n",N,FS);      
 64 |  
 65 |      p = init_list(p);
 66 |      head = p;
 67 | 
 68 | 
 69 |      start = omp_get_wtime();
 70 |      {
 71 |         while (p != NULL) {
 72 | 		   processwork(p);
 73 | 		   p = p->next;
 74 |         }
 75 |      }
 76 | 
 77 |      end = omp_get_wtime();
 78 | 
 79 |      printf("serial Compute Time: %f seconds\n", end - start);
 80 | 
 81 | 
 82 |      p = head;
 83 | 
 84 |      start = omp_get_wtime();
 85 |      {
 86 |         // count number of items in the list.  Strictly speaking this isn't 
 87 |         // needed since we know there are N elements in the list.  But in 
 88 |         // most cases you don't know this and need to count nodes. 
 89 |         while (p != NULL) {
 90 | 	  	   p = p->next;
 91 |                count++;
 92 |         }
 93 |       
 94 |         // traverse the list and collect pointers into an array.
 95 |         p = head;
 96 |         for(i=0; i<count; i++) {
 97 |                parr[i] = p;
 98 |                p = p->next;
 99 |         }
100 |        
101 |         // do the work in parallel 
102 |         #pragma omp parallel 
103 |         {
104 |            #pragma omp single
105 |                printf(" %d threads \n",omp_get_num_threads());
106 |            #pragma omp for schedule(static,1)
107 |            for(i=0; i<count; i++)
108 | 		   processwork(parr[i]);
109 |         }
110 |      }
111 | 
112 |      end = omp_get_wtime();
113 |      p = head;
114 | 	 while (p != NULL) {
115 |         printf("%d : %d\n",p->data, p->fibdata);
116 |         temp = p->next;
117 |         free (p);
118 |         p = temp;
119 |      }  
120 |      free (p);
121 | 
122 |      printf("Compute Time: %f seconds\n", end - start);
123 | 
124 |      return 0;
125 | }
126 | 
127 | 


--------------------------------------------------------------------------------
/solutions/linked_omp3_tasks.c:
--------------------------------------------------------------------------------
  1 | #include <omp.h>
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | 
  5 | 
  6 | #ifndef N
  7 | #define N 5
  8 | #endif
  9 | #ifndef FS
 10 | #define FS 38
 11 | #endif
 12 | 
 13 | struct node {
 14 |    int data;
 15 |    int fibdata;
 16 |    struct node* next;
 17 | };
 18 | 
 19 | struct node* init_list(struct node* p);
 20 | void processwork(struct node* p); 
 21 | int fib(int n); 
 22 | 
 23 | int fib(int n) 
 24 | {
 25 |    int x, y;
 26 |    if (n < 2) {
 27 |       return (n);
 28 |    } else {
 29 |       x = fib(n - 1);
 30 |       y = fib(n - 2);
 31 | 	  return (x + y);
 32 |    }
 33 | }
 34 | 
 35 | void processwork(struct node* p) 
 36 | {
 37 |    int n, temp;
 38 |    n = p->data;
 39 |    temp = fib(n);
 40 | 
 41 |    p->fibdata = temp;
 42 | 
 43 | }
 44 | 
 45 | struct node* init_list(struct node* p) 
 46 | {
 47 |     int i;
 48 |     struct node* head = NULL;
 49 |     struct node* temp = NULL;
 50 |     
 51 |     head = malloc(sizeof(struct node));
 52 |     p = head;
 53 |     p->data = FS;
 54 |     p->fibdata = 0;
 55 |     for (i=0; i< N; i++) {
 56 |        temp  = malloc(sizeof(struct node));
 57 |        p->next = temp;
 58 |        p = temp;
 59 |        p->data = FS + i + 1;
 60 |        p->fibdata = i+1;
 61 |     }
 62 |     p->next = NULL;
 63 |     return head;
 64 | }
 65 | 
 66 | int main() 
 67 | {
 68 |      double start, end;
 69 |      struct node *p=NULL;
 70 |      struct node *temp=NULL;
 71 |      struct node *head=NULL;
 72 | 
 73 |      printf("Process linked list\n");
 74 |      printf("  Each linked list node will be processed by function 'processwork()'\n");
 75 |      printf("  Each ll node will compute %d fibonacci numbers beginning with %d\n",N,FS);      
 76 | 
 77 |      p = init_list(p);
 78 |      head = p;
 79 | 
 80 |      start = omp_get_wtime();
 81 | 
 82 | 	#pragma omp parallel 
 83 | 	{
 84 |             #pragma omp master
 85 |                   printf("Threads:      %d\n", omp_get_num_threads());
 86 | 
 87 | 		#pragma omp single
 88 | 		{
 89 | 			p=head;
 90 | 			while (p) {
 91 | 				#pragma omp task firstprivate(p) //first private is required
 92 | 				{
 93 | 					processwork(p);
 94 | 				}
 95 | 			  p = p->next;
 96 | 		   }
 97 | 		}
 98 | 	}
 99 | 
100 |      end = omp_get_wtime();
101 |      p = head;
102 | 	 while (p != NULL) {
103 |         printf("%d : %d\n",p->data, p->fibdata);
104 |         temp = p->next;
105 |         free (p);
106 |         p = temp;
107 |      }  
108 | 	 free (p);
109 | 
110 |      printf("Compute Time: %f seconds\n", end - start);
111 | 
112 |      return 0;
113 | }
114 | 
115 | 


--------------------------------------------------------------------------------
/solutions/makefile:
--------------------------------------------------------------------------------
 1 | # 
 2 | include ../make.def
 3 | 
 4 | EXES=hello_par$(EXE) pi_spmd_simple$(EXE) pi_spmd_final$(EXE) \
 5 |      pi_loop$(EXE) matmul_par$(EXE) \
 6 |      prod_cons_par$(EXE)  mandel_par$(EXE) pi_mc$(EXE) \
 7 |      pi_mc_par$(EXE) linked_omp25$(EXE) linked_omp3_tasks$(EXE) 
 8 | 
 9 | all: $(EXES)
10 | 
11 | 
12 | hello_par$(EXE): hello_par.$(OBJ) 
13 | 	$(CLINKER) $(OPTFLAGS) -o hello_par hello_par.$(OBJ) $(LIBS)
14 | 
15 | pi_spmd_simple$(EXE): pi_spmd_simple.$(OBJ) 
16 | 	$(CLINKER) $(OPTFLAGS) -o pi_spmd_simple pi_spmd_simple.$(OBJ) $(LIBS)
17 | 
18 | pi_spmd_final$(EXE): pi_spmd_final.$(OBJ) 
19 | 	$(CLINKER) $(OPTFLAGS) -o pi_spmd_final pi_spmd_final.$(OBJ) $(LIBS)
20 | 
21 | pi_loop$(EXE): pi_loop.$(OBJ) 
22 | 	$(CLINKER) $(OPTFLAGS) -o pi_loop pi_loop.$(OBJ) $(LIBS)
23 | 
24 | pi_mc$(EXE): pi_mc.$(OBJ)
25 | 	$(CLINKER) $(OPTFLAGS) -o pi_mc pi_mc.$(OBJ) $(LIBS)
26 | 
27 | pi_mc_par$(EXE): pi_mc_par.$(OBJ) random_par.$(OBJ) 
28 | 	$(CLINKER) $(OPTFLAGS) -o pi_mc_par pi_mc_par.$(OBJ) random_par.$(OBJ) $(LIBS)
29 | 
30 | matmul_par$(EXE): matmul_par.$(OBJ) 
31 | 	$(CLINKER) $(OPTFLAGS) -o matmul_par matmul_par.$(OBJ) $(LIBS)
32 | 
33 | prod_cons_par$(EXE): prod_cons_par.$(OBJ)
34 | 	$(CLINKER) $(OPTFLAGS) -o prod_cons_par prod_cons_par.$(OBJ) $(LIBS)
35 | 
36 | mandel_par$(EXE): mandel_par.$(OBJ) 
37 | 	$(CLINKER) $(OPTFLAGS) -o mandel_par mandel_par.$(OBJ) $(LIBS)
38 | 
39 | linked_omp25$(EXE): linked_omp25.$(OBJ) 
40 | 	$(CLINKER) $(OPTFLAGS) -o linked_omp25 linked_omp25.$(OBJ) $(LIBS)
41 | 
42 | linked_omp3_tasks$(EXE): linked_omp3_tasks.$(OBJ) 
43 | 	$(CLINKER) $(OPTFLAGS) -o linked_omp3_tasks linked_omp3_tasks.$(OBJ) $(LIBS)
44 | 
45 | test: $(EXES)
46 | 	$(PRE)hello_par$(EXE) 
47 | 	$(PRE)pi_spmd_simple$(EXE) 
48 | 	$(PRE)pi_spmd_final$(EXE) 
49 | 	$(PRE)pi_loop$(EXE) 
50 | 	$(PRE)matmul_par$(EXE) 
51 | 	$(PRE)prod_cons_par$(EXE)  
52 | 	$(PRE)mandel_par$(EXE) 
53 | 	$(PRE)pi_mc$(EXE) 
54 | 	$(PRE)pi_mc_par$(EXE) 
55 | 	$(PRE)linked_omp25$(EXE) 
56 | 	$(PRE)linked_omp3_tasks$(EXE) 
57 | 
58 | clean:
59 | 	$(RM) $(EXES) *.$(OBJ)
60 | 
61 | .SUFFIXES:
62 | .SUFFIXES: .c .cpp  .$(OBJ)
63 | 
64 | .c.$(OBJ):
65 | 	$(CC) $(CFLAGS) -c $<
66 | 
67 | .cpp.$(OBJ):
68 | 	$(CC) $(CFLAGS) -c $<
69 | 


--------------------------------------------------------------------------------
/solutions/mandel_par.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | **  PROGRAM: Mandelbrot area (solution)
 3 | **
 4 | **  PURPOSE: Program to compute the area of a  Mandelbrot set.
 5 | **           The correct answer should be around 1.510659.
 6 | **
 7 | **  USAGE:   Program runs without input ... just run the executable
 8 | **
 9 | **  ADDITIONAL EXERCISES:  Experiment with the schedule clause to fix 
10 | **               the load imbalance.   Experiment with atomic vs. critical vs.
11 | **               reduction for numoutside.
12 | **            
13 | **  HISTORY: Written:  (Mark Bull, August 2011).
14 | **
15 | **           Changed "comples" to "d_comples" to avoid collsion with 
16 | **           math.h complex type.   Fixed data environment errors
17 | **          (Tim Mattson, September 2011)
18 | */
19 | 
20 | #include <stdio.h>
21 | #include <stdlib.h>
22 | #include <math.h>
23 | #include <omp.h>
24 | 
25 | # define NPOINTS 1000
26 | # define MAXITER 1000
27 | 
28 | struct d_complex{
29 |    double r;
30 |    double i;
31 | };
32 | 
33 | void testpoint(struct d_complex);
34 | 
35 | struct d_complex c;
36 | int numoutside = 0;
37 | 
38 | int main(){
39 |    int i, j;
40 |    double area, error, eps  = 1.0e-5;
41 | 
42 | 
43 | //   Loop over grid of points in the complex plane which contains the Mandelbrot set,
44 | //   testing each point to see whether it is inside or outside the set.
45 |    omp_set_num_threads(4);
46 | #pragma omp parallel for default(shared) firstprivate(eps)  private(c, j)
47 |    for (i=0; i<NPOINTS; i++) {
48 |      for (j=0; j<NPOINTS; j++) {
49 |        c.r = -2.0+2.5*(double)(i)/(double)(NPOINTS)+eps;
50 |        c.i = 1.125*(double)(j)/(double)(NPOINTS)+eps;
51 |        testpoint(c);
52 |      }
53 |    }
54 | 
55 | // Calculate area of set and error estimate and output the results
56 |    
57 | area=2.0*2.5*1.125*(double)(NPOINTS*NPOINTS-numoutside)/(double)(NPOINTS*NPOINTS);
58 |    error=area/(double)NPOINTS;
59 | 
60 |    printf("Area of Mandlebrot set = %12.8f +/- %12.8f\n",area,error);
61 | 
62 | }
63 | 
64 | void testpoint(struct d_complex c){
65 | 
66 | // Does the iteration z=z*z+c, until |z| > 2 when point is known to be outside set
67 | // If loop count reaches MAXITER, point is considered to be inside the set
68 | 
69 |        struct d_complex z;
70 |        int iter;
71 |        double temp;
72 | 
73 |        z=c;
74 |        for (iter=0; iter<MAXITER; iter++){
75 |          temp = (z.r*z.r)-(z.i*z.i)+c.r;
76 |          z.i = z.r*z.i*2+c.i;
77 |          z.r = temp;
78 |          if ((z.r*z.r+z.i*z.i)>4.0) {
79 |         #pragma omp atomic
80 |            numoutside++;
81 |            break;
82 |          }
83 |        }
84 | 
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/solutions/matmul_par.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | **  PROGRAM: Parallel Matrix Multiply (using OpenMP)
  3 | **
  4 | **  PURPOSE: This is a simple matrix multiply program. 
  5 | **           It will compute the product
  6 | **
  7 | **                C  = A * B
  8 | **
  9 | **           A and B are set to constant matrices so we
 10 | **           can make a quick test of the multiplication.
 11 | **
 12 | **  USAGE:   Right now, I hardwire the martix dimensions. 
 13 | **           later, I'll take them from the command line.
 14 | **  
 15 | **  HISTORY: Written by Tim Mattson, Nov 1999.
 16 | */
 17 | #ifdef APPLE
 18 | #include <stdlib.h>
 19 | #else
 20 | #include <malloc.h>
 21 | #endif
 22 | #include <stdio.h>
 23 | #include <omp.h>
 24 | 
 25 | #define ORDER 1000
 26 | #define AVAL 3.0
 27 | #define BVAL 5.0
 28 | #define TOL  0.001
 29 | 
 30 | int main(int argc, char *argv[])
 31 | {
 32 | 	int Ndim, Pdim, Mdim;   /* A[N][P], B[P][M], C[N][M] */
 33 | 	int i,j,k;
 34 | 	
 35 | 	double *A, *B, *C, cval, tmp, err, errsq;
 36 |       double dN, mflops;
 37 | 	double start_time, run_time;
 38 | 
 39 | 
 40 | 	Ndim = ORDER;
 41 | 	Pdim = ORDER;
 42 | 	Mdim = ORDER;
 43 | 
 44 | 	A = (double *)malloc(Ndim*Pdim*sizeof(double));
 45 |       B = (double *)malloc(Pdim*Mdim*sizeof(double));
 46 |       C = (double *)malloc(Ndim*Mdim*sizeof(double));
 47 | 
 48 | 	/* Initialize matrices */
 49 | 
 50 | 	for (i=0; i<Ndim; i++)
 51 | 		for (j=0; j<Pdim; j++)
 52 | 			*(A+(i*Ndim+j)) = AVAL;
 53 | 
 54 | 	for (i=0; i<Pdim; i++)
 55 | 		for (j=0; j<Mdim; j++)
 56 | 			*(B+(i*Pdim+j)) = BVAL;
 57 | 
 58 | 	for (i=0; i<Ndim; i++)
 59 | 		for (j=0; j<Mdim; j++)
 60 | 			*(C+(i*Ndim+j)) = 0.0;
 61 | 	
 62 | 		start_time = omp_get_wtime();
 63 | 
 64 | 	/* Do the matrix product */
 65 | 
 66 | #pragma omp parallel for private(tmp, i, j, k)  
 67 | 	for (i=0; i<Ndim; i++){
 68 | 		for (j=0; j<Mdim; j++){
 69 |  
 70 |                         tmp = 0.0;
 71 | 
 72 | 			for(k=0;k<Pdim;k++){
 73 | 				/* C(i,j) = sum(over k) A(i,k) * B(k,j) */
 74 | 				tmp += *(A+(i*Ndim+k)) *  *(B+(k*Pdim+j));
 75 | 			}
 76 | 			*(C+(i*Ndim+j)) = tmp;
 77 | 		}
 78 | 	}
 79 | 	/* Check the answer */
 80 | 
 81 | 	run_time = omp_get_wtime() - start_time;
 82 | 
 83 | 	printf(" Order %d multiplication in %f seconds \n", ORDER, run_time);
 84 |       printf(" %d threads\n",omp_get_max_threads());
 85 |       dN = (double)ORDER;
 86 |       mflops = 2.0 * dN * dN * dN/(1000000.0* run_time);
 87 | 
 88 |       printf(" Order %d multiplication at %f mflops\n", ORDER, mflops);
 89 | 
 90 | 	cval = Pdim * AVAL * BVAL;
 91 | 	errsq = 0.0;
 92 | 	for (i=0; i<Ndim; i++){
 93 | 		for (j=0; j<Mdim; j++){
 94 | 			err = *(C+i*Ndim+j) - cval;
 95 | 		    errsq += err * err;
 96 | 		}
 97 | 	}
 98 | 
 99 | 	if (errsq > TOL) 
100 | 		printf("\n Errors in multiplication: %f",errsq);
101 | 	else
102 | 		printf("\n Hey, it worked");
103 | 
104 | 	printf("\n all done \n");
105 | }
106 | 


--------------------------------------------------------------------------------
/solutions/pi_loop.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | This program will numerically compute the integral of
 4 | 
 5 |                   4/(1+x*x) 
 6 | 				  
 7 | from 0 to 1.  The value of this integral is pi -- which 
 8 | is great since it gives us an easy way to check the answer.
 9 | 
10 | The program was parallelized using OpenMP by adding just
11 | four lines 
12 | 
13 | (1) A line to include omp.h -- the include file that 
14 | contains OpenMP's function prototypes and constants.
15 | 
16 | (2) A pragma that tells OpenMP to create a team of threads
17 | 
18 | (3) A pragma to cause one of the threads to print the
19 | number of threads being used by the program.
20 | 
21 | (4) A pragma to split up loop iterations among the team
22 | of threads.  This pragma includes 2 clauses to (1) create a 
23 | private variable and (2) to cause the threads to compute their
24 | sums locally and then combine their local sums into a 
25 | single global value.
26 | 
27 | History: Written by Tim Mattson, 11/99.
28 | 
29 | */
30 | #include <stdio.h>
31 | #include <omp.h>
32 | static long num_steps = 100000000;
33 | double step;
34 | int main ()
35 | {
36 | 	  int i;
37 | 	  double x, pi, sum = 0.0;
38 | 	  double start_time, run_time;
39 | 
40 | 	  step = 1.0/(double) num_steps;
41 | 	 for (i=1;i<=4;i++){
42 |           sum = 0.0;
43 |           omp_set_num_threads(i);
44 | 	  start_time = omp_get_wtime();
45 | #pragma omp parallel  
46 | {
47 | #pragma omp single
48 | 	  printf(" num_threads = %d",omp_get_num_threads());
49 | 
50 | #pragma omp for reduction(+:sum)
51 | 	  for (i=1;i<= num_steps; i++){
52 | 		  x = (i-0.5)*step;
53 | 		  sum = sum + 4.0/(1.0+x*x);
54 | 	  }
55 | }
56 | 	  pi = step * sum;
57 | 	  run_time = omp_get_wtime() - start_time;
58 | 	  printf("\n pi is %f in %f seconds and %d threads\n",pi,run_time,i);
59 | }
60 | }	  
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/solutions/pi_mc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | NAME:
  4 |    Pi_mc:  PI Monte Carlo
  5 | 
  6 | Purpose:
  7 |    This program uses a Monte Carlo algorithm to compute PI as an
  8 |    example of how random number generators are used to solve problems.
  9 |    Note that if your goal is to find digits of pi, there are much 
 10 |    better algorithms you could use.
 11 | 
 12 | Usage:
 13 |    To keep the program as simple as possible, you must edit the file
 14 |    and change the value of num_trials to change the number of samples
 15 |    used.  Then compile and run the program.
 16 | 
 17 | Algorithm:
 18 |    The basic idea behind the algorithm is easy to visualize.  Draw a 
 19 |    square on a wall.  Inside the square, draw a circle.  Now randomly throw 
 20 |    darts at the wall.  some darts will land inside the square.  Of those, 
 21 |    some will fall inside the circle.   The probability of landing inside
 22 |    the circle or the square is proportional to their areas.
 23 | 
 24 |    We can use a random number generator to "throw the darts" and count
 25 |    how many "darts" fall inside the square and how many inside the 
 26 |    cicle.  Dividing these two numbers gives us the ratio of their areas
 27 |    and from that we can compute pi.
 28 | 
 29 | Algorithm details:
 30 |    To turn this into code, I need a bit more detail.  Assume the circle
 31 |    is centered inside the square.  the circle will have a radius of r and 
 32 |    each side of the square will be of area 2*r (i.e. the diameter of the
 33 |    circle).  
 34 | 
 35 |        A(circle) = pi * r^2
 36 |        A(square) = (2*r)*(2*r) = 4*r^2
 37 | 
 38 |        ratio = A(circle)/A(square) = pi/4
 39 | 
 40 |    Since the probability (P) of a dart falling inside a figure (i.e. the square 
 41 |    or the circle) is proportional to the area, we have
 42 | 
 43 |        ratio = P(circle)/P(square) = pi/4
 44 | 
 45 |    If I throw N darts as computed by random numbers evenly distributed 
 46 |    over the area of the square
 47 | 
 48 |       P(sqaure) = N/N    .... i.e. every dart lands in the square
 49 |       P(circle) = N(circle)/N
 50 | 
 51 |       ratio = (N(circle)/N)/(N/N)  = N(circle)/N
 52 | 
 53 |    Hence, to find the area, I compute N random "darts" and count how many fall
 54 |    inside the circle.  The equation for a circle is
 55 | 
 56 |       x^2 + y^2 = r^2 
 57 | 
 58 |    So I randomly compute "x" and "y" evenly distributed from -r to r and 
 59 |    count the "dart" as falling inside the cicle if
 60 | 
 61 |       x^2 + y^2 < or = r
 62 | 
 63 | Supporting functions:
 64 |    For maximum portability, this file includes a very simple random number
 65 |    generator.  This is not a high quality generator and should not be used
 66 |    for serious work.
 67 | 
 68 |    The Generator is a linear congruential generator with constants selected
 69 |    to yield decent results for sequences with fewer than 2^28 numbers.  The
 70 |    pseudo random sequence is seeded with a range
 71 | 
 72 |        void seed(lower_limit, higher_limit)
 73 |    
 74 |    and then subsequent calls to the random number generator generates values
 75 |    in the sequence:
 76 | 
 77 |        double random()
 78 | 
 79 | Results:  
 80 |    Remember, our goal is to demonstrate a simple monte carlo algorithm, 
 81 |    not compute pi.  But just for the record, here are some results (Intel compiler
 82 |    version 10.0, Windows XP, core duo laptop)
 83 | 
 84 |        100        3.160000
 85 |        1000       3.148000
 86 |        10000      3.154000
 87 |        100000     3.139920
 88 |        1000000    3.141456
 89 |        10000000   3.141590
 90 |        100000000  3.141581
 91 | 
 92 |    As a point of reference, the first 7 digits of the true value of pi 
 93 |    is 3.141592 
 94 | 
 95 | 
 96 | History: 
 97 |    Written by Tim Mattson, 9/2007.
 98 | 
 99 | */
100 | #include <stdio.h>
101 | #include <omp.h>
102 | 
103 | //**********************************************************
104 | // Pseudo random number generator:
105 | //     double random
106 | //     void seed (lower_limit, higher_limit)
107 | //**********************************************************
108 | //
109 | // A simple linear congruential random number generator
110 | // (Numerical Recipies chapter 7, 1st ed.) with parameters
111 | // from the table on page 198j.
112 | //
113 | //  Uses a linear congruential generator to return a value between
114 | //  0 and 1, then scales and shifts it to fill the desired range.  This
115 | //  range is set when the random number generator seed is called.
116 | //
117 | static long MULTIPLIER  = 1366;
118 | static long ADDEND      = 150889;
119 | static long PMOD        = 714025;
120 | long random_last = 0;
121 | double random_low, random_hi;
122 | 
123 | double drandom()
124 | {
125 |     long random_next;
126 |     double ret_val;
127 | 
128 | // 
129 | // compute an integer random number from zero to mod
130 | //
131 |     random_next = (MULTIPLIER  * random_last + ADDEND)% PMOD;
132 |     random_last = random_next;
133 | 
134 | //
135 | // shift into preset range
136 | //
137 |     ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low;
138 |     return ret_val;
139 | }
140 | //
141 | // set the seed and the range
142 | //
143 | void seed(double low_in, double hi_in)
144 | {
145 |    if(low_in < hi_in)
146 |    { 
147 |       random_low = low_in;
148 |       random_hi  = hi_in;
149 |    }
150 |    else
151 |    {
152 |       random_low = hi_in;
153 |       random_hi  = low_in;
154 |    }
155 |    random_last = PMOD/ADDEND;  // just pick something
156 | 
157 | }
158 | //**********************************************************
159 | // end of pseudo random generator code.
160 | //**********************************************************
161 | 
162 | // 
163 | // The monte carlo pi program
164 | //
165 | 
166 | static long num_trials = 100000;
167 | 
168 | int main ()
169 | {
170 |    long i;  long Ncirc = 0;
171 |    double pi, x, y, test;
172 |    double r = 1.0;   // radius of circle. Side of squrare is 2*r 
173 | 
174 |    seed(-r, r);  // The circle and square are centered at the origin
175 | 
176 |    for(i=0;i<num_trials; i++)
177 |    {
178 |       x = drandom(); 
179 |       y = drandom();
180 | 
181 |       test = x*x + y*y;
182 | 
183 |       if (test <= r*r) Ncirc++;
184 |     }
185 | 
186 |     pi = 4.0 * ((double)Ncirc/(double)num_trials);
187 | 
188 |     printf("\n %ld trials, pi is %lf \n",num_trials, pi);
189 | 
190 |     return 0;
191 | }
192 | 	  
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/solutions/pi_mc_par.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>
 3 | #include <omp.h>
 4 | #include "random.h"
 5 | 
 6 | 
 7 | static long num_trials = 1000000;
 8 | 
 9 | int main ()
10 | {
11 |    long i;  long Ncirc = 0;
12 |    double pi, x, y, test, time;
13 |    double r = 1.0;   // radius of circle. Side of squrare is 2*r 
14 | 
15 |    time = omp_get_wtime();
16 |    #pragma omp parallel
17 |    {
18 | 
19 |       #pragma omp single
20 |           printf(" %d threads ",omp_get_num_threads());
21 | 
22 |       seed(-r, r);  
23 |       #pragma omp for reduction(+:Ncirc) private(x,y,test)
24 |       for(i=0;i<num_trials; i++)
25 |       {
26 |          x = drandom(); 
27 |          y = drandom();
28 | 
29 |          test = x*x + y*y;
30 | 
31 |          if (test <= r*r) Ncirc++;
32 |        }
33 |     }
34 | 
35 |     pi = 4.0 * ((double)Ncirc/(double)num_trials);
36 | 
37 |     printf("\n %ld trials, pi is %lf ",num_trials, pi);
38 |     printf(" in %lf seconds\n",omp_get_wtime()-time);
39 | 
40 |     return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/solutions/pi_spmd_final.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | NAME:   PI SPMD final version without false sharing
 4 | 
 5 | This program will numerically compute the integral of
 6 | 
 7 |                   4/(1+x*x) 
 8 | 				  
 9 | from 0 to 1.  The value of this integral is pi -- which 
10 | is great since it gives us an easy way to check the answer.
11 | 
12 | The program was parallelized using OpenMP and an SPMD 
13 | algorithm.  The following OpenMP specific lines were 
14 | added: 
15 | 
16 | (1) A line to include omp.h -- the include file that 
17 | contains OpenMP's function prototypes and constants.
18 | 
19 | (2) A pragma that tells OpenMP to create a team of threads
20 | with an integer variable i being created for each thread.
21 | 
22 | (3) two function calls: one to get the thread ID (ranging
23 | from 0 to one less than the number of threads), and the other
24 | returning the total number of threads.
25 | 
26 | (4) A "single" construct so only one thread prints the number
27 | of threads.
28 | 
29 | (5) A cyclic distribution of the loop by changing loop control
30 | expressions to run from the thread ID incremented by the number 
31 | of threads.  Local sums accumlated into sum[id].
32 | 
33 | (6) A barrier to make sure everyone's done.
34 | 
35 | (7) A single construct so only one thread combines the local
36 | sums into a single global sum.
37 | 
38 | Note that this program avoids the false sharing problem
39 | by storing partial sums into a private scalar.
40 | 
41 | History: Written by Tim Mattson, 11/99.
42 | 
43 | */
44 | 
45 | #include <stdio.h>
46 | #include <omp.h>
47 | 
48 | #define MAX_THREADS 4
49 | 
50 | static long num_steps = 100000000;
51 | double step;
52 | int main ()
53 | {
54 | 	  int i,j;
55 | 	  double pi, full_sum = 0.0;
56 | 	  double start_time, run_time;
57 | 	  double sum[MAX_THREADS];
58 | 
59 | 	  step = 1.0/(double) num_steps;
60 | 
61 | 
62 | for(j=1;j<=MAX_THREADS ;j++){
63 |    omp_set_num_threads(j);
64 |    full_sum = 0.0;
65 | 	  start_time = omp_get_wtime();
66 | #pragma omp parallel private(i)
67 | {
68 | 	  int id = omp_get_thread_num();
69 | 	  int numthreads = omp_get_num_threads();
70 | 	  double x;
71 | 
72 | 	  double partial_sum = 0;
73 | 
74 | #pragma omp single
75 | 	  printf(" num_threads = %d",numthreads);
76 | 
77 | 	  for (i=id;i< num_steps; i+=numthreads){
78 | 		  x = (i+0.5)*step;
79 | 		  partial_sum += + 4.0/(1.0+x*x);
80 | 	  }
81 | #pragma omp critical
82 | 		  full_sum += partial_sum;
83 | }
84 |       
85 | 	  pi = step * full_sum;
86 | 	  run_time = omp_get_wtime() - start_time;
87 | 	  printf("\n pi is %f in %f seconds %d threds \n ",pi,run_time,j);
88 | }
89 | }	  
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/solutions/pi_spmd_simple.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | NAME: PI SPMD ... a simple version.
 4 | 
 5 | This program will numerically compute the integral of
 6 | 
 7 |                   4/(1+x*x) 
 8 | 				  
 9 | from 0 to 1.  The value of this integral is pi -- which 
10 | is great since it gives us an easy way to check the answer.
11 | 
12 | The program was parallelized using OpenMP and an SPMD 
13 | algorithm.  The following OpenMP specific lines were 
14 | added: 
15 | 
16 | (1) A line to include omp.h -- the include file that 
17 | contains OpenMP's function prototypes and constants.
18 | 
19 | (2) A pragma that tells OpenMP to create a team of threads
20 | with an integer variable i being created for each thread.
21 | 
22 | (3) two function calls: one to get the thread ID (ranging
23 | from 0 to one less than the number of threads), and the other
24 | returning the total number of threads.
25 | 
26 | (4) A cyclic distribution of the loop by changing loop control
27 | expressions to run from the thread ID incremented by the number 
28 | of threads.  Local sums accumlated into sum[id].
29 | 
30 | Note that this program will show low performance due to 
31 | false sharing.  In particular, sum[id] is unique to each
32 | thread, but adfacent values of this array share a cache line
33 | causing cache thrashing as the program runs.
34 | 
35 | History: Written by Tim Mattson, 11/99.
36 | 
37 | */
38 | 
39 | #include <stdio.h>
40 | #include <omp.h>
41 | 
42 | #define MAX_THREADS 4
43 | 
44 | static long num_steps = 100000000;
45 | double step;
46 | int main ()
47 | {
48 | 	  int i,j;
49 | 	  double pi, full_sum = 0.0;
50 | 	  double start_time, run_time;
51 | 	  double sum[MAX_THREADS];
52 | 
53 | 	  step = 1.0/(double) num_steps;
54 | 
55 | 
56 |    for (j=1;j<=MAX_THREADS ;j++) {
57 | 
58 |       omp_set_num_threads(j);
59 |       full_sum=0.0;
60 |       start_time = omp_get_wtime();
61 | 
62 |       #pragma omp parallel
63 |       {
64 |         int i;
65 | 	  int id = omp_get_thread_num();
66 | 	  int numthreads = omp_get_num_threads();
67 | 	  double x;
68 | 
69 | 	  sum[id] = 0.0;
70 | 
71 |         if (id == 0) 
72 |              printf(" num_threads = %d",numthreads);
73 | 
74 | 	  for (i=id;i< num_steps; i+=numthreads){
75 | 		  x = (i+0.5)*step;
76 | 		  sum[id] = sum[id] + 4.0/(1.0+x*x);
77 | 	  }
78 |       }
79 | 
80 | 	for(full_sum = 0.0, i=0;i<j;i++)
81 | 	    full_sum += sum[i];
82 | 
83 |       pi = step * full_sum;
84 |       run_time = omp_get_wtime() - start_time;
85 |       printf("\n pi is %f in %f seconds %d thrds \n",pi,run_time,j);
86 |    }
87 | }	  
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/solutions/prod_cons_par.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | **  PROGRAM: A simple SPMD producer/consumer program
 3 | **
 4 | **  PURPOSE: this is just a stupid little program to play around
 5 | **  with different ways data is shared between threads.
 6 | **
 7 | **  HISTORY: Written by Tim Mattson, April 2007.
 8 | */
 9 | #include "omp.h"
10 | #ifndef APPLE
11 | #include <malloc.h>
12 | #endif
13 | #include <stdio.h>
14 | #include <stdlib.h>
15 | 
16 | #define N        10000
17 | #define Nthreads 2
18 | 
19 | /* Some random number constants from numerical recipies */
20 | #define SEED       2531
21 | #define RAND_MULT  1366
22 | #define RAND_ADD   150889
23 | #define RAND_MOD   714025
24 | int randy = SEED;
25 | 
26 | /* function to fill an array with random numbers */
27 | void fill_rand(int length, double *a)
28 | {
29 |    int i; 
30 |    for (i=0;i<length;i++) {
31 |      randy = (RAND_MULT * randy + RAND_ADD) % RAND_MOD;
32 |      *(a+i) = ((double) randy)/((double) RAND_MOD);
33 |    }   
34 | }
35 | 
36 | /* function to sum the elements of an array */
37 | double Sum_array(int length, double *a)
38 | {
39 |    int i;  double sum = 0.0;
40 |    for (i=0;i<length;i++)  sum += *(a+i);  
41 |    return sum; 
42 | }
43 |   
44 | int main()
45 | {
46 |   double *A, sum, runtime;
47 |   int numthreads, flag = 0;
48 | 
49 |   omp_set_num_threads(Nthreads);
50 | 
51 |   A = (double *)malloc(N*sizeof(double));
52 | 
53 |   #pragma omp parallel
54 |   {
55 |      #pragma omp master
56 |      {
57 |         numthreads = omp_get_num_threads();
58 |         if(numthreads != 2)
59 |         {
60 |            printf("error: incorect number of threads, %d \n",numthreads);
61 |            exit(-1);
62 |         }
63 |         runtime = omp_get_wtime();
64 |      }
65 |      #pragma omp barrier
66 | 
67 |      #pragma omp sections
68 |      {
69 |         #pragma omp section
70 |         {
71 |            fill_rand(N, A);
72 |            #pragma omp flush
73 |            flag = 1;
74 |            #pragma omp flush (flag)
75 |         }
76 |         #pragma omp section
77 |         {
78 |            #pragma omp flush (flag)
79 |            while (flag != 1){
80 |               #pragma omp flush (flag)
81 |            }
82 | 
83 |            #pragma omp flush 
84 |            sum = Sum_array(N, A);
85 |         }
86 |       }
87 |       #pragma omp master
88 |          runtime = omp_get_wtime() - runtime;
89 |    }  
90 | 
91 |    printf(" with %d threads and %lf seconds, The sum is %lf \n",numthreads,runtime,sum);
92 | }
93 |  
94 | 


--------------------------------------------------------------------------------
/solutions/random.h:
--------------------------------------------------------------------------------
1 | double drandom();
2 | void seed(double low_in, double hi_in);
3 | 
4 | 


--------------------------------------------------------------------------------
/solutions/random_par.c:
--------------------------------------------------------------------------------
  1 | 
  2 | //**********************************************************
  3 | // Parallel Pseudo random number generator:
  4 | //
  5 | // USAGE:
  6 | //
  7 | //  The pseudo random sequence is seeded with a range
  8 | //
  9 | //            void seed(lower_limit, higher_limit)
 10 | //   
 11 | //  and then subsequent calls to the random number generator 
 12 | //  generates values in the sequence:
 13 | //
 14 | //            double drandom()
 15 | //
 16 | //  A leap frog method is used to assure non-overlapping
 17 | //  sequences for each thread.
 18 | //
 19 | //  Note: these functions are to be called from inside the
 20 | //  the OpenMP parallel region that will use the sequence.
 21 | //
 22 | //  BACKGROUND:
 23 | //
 24 | //  We are using a modulus of 2^31-1 and a multiplier from 
 25 | //  the Hoaglin LCGs in the following article:
 26 | //
 27 | //    http://random.mat.sbg.ac.at/~charly/server/node3.html#lcg
 28 | //
 29 | //   we are using a zero addend just to make the leap frog 
 30 | //   algorithm easier to implement.
 31 | //
 32 | //  HISTORY:
 33 | //
 34 | //  9/2008: Written by Tim Mattson by cutting and pasting 
 35 | //  from a generator written by Larry Meadows
 36 | //
 37 | //***********************************************************
 38 | #include <omp.h>
 39 | 
 40 | static unsigned long long MULTIPLIER  = 764261123;
 41 | static unsigned long long PMOD        = 2147483647;
 42 | static unsigned long long mult_n;
 43 | double random_low, random_hi;
 44 | 
 45 | #define MAX_THREADS 128
 46 | static unsigned long long pseed[MAX_THREADS][4]; //[4] to padd to cache line
 47 |                                                  //size to avoid false sharing
 48 | unsigned long long random_last = 0;
 49 | #pragma omp threadprivate(random_last)
 50 | 
 51 | 
 52 | double drandom()
 53 | {
 54 |     unsigned long long random_next;
 55 |     double ret_val;
 56 | 
 57 | // 
 58 | // compute an integer random number from zero to mod
 59 | //
 60 |     random_next = (unsigned long long)((mult_n  * random_last)% PMOD);
 61 |     random_last = random_next;
 62 | 
 63 | //
 64 | // shift into preset range
 65 | //
 66 |     ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low;
 67 |     return ret_val;
 68 | }
 69 | 
 70 | //
 71 | // set the seed, the multiplier and the range
 72 | //
 73 | void seed(double low_in, double hi_in)
 74 | {
 75 |    int i, id, nthreads;
 76 |    unsigned long long iseed;
 77 |    id = omp_get_thread_num();
 78 | 
 79 |    #pragma omp single
 80 |    {
 81 |       if(low_in < hi_in)
 82 |       { 
 83 |          random_low = low_in;
 84 |          random_hi  = hi_in;
 85 |       }
 86 |       else
 87 |       {
 88 |          random_low = hi_in;
 89 |          random_hi  = low_in;
 90 |       }
 91 |   
 92 | //
 93 | // The Leapfrog method ... adjust the multiplier so you stride through
 94 | // the sequence by increments of "nthreads" and adust seeds so each 
 95 | // thread starts with the right offset
 96 | //
 97 | 
 98 |       nthreads = omp_get_num_threads();
 99 |       iseed = PMOD/MULTIPLIER;     // just pick a reasonable seed
100 |       pseed[0][0] = iseed;
101 |       mult_n = MULTIPLIER;
102 |       for (i = 1; i < nthreads; ++i)
103 |       {
104 | 	iseed = (unsigned long long)((MULTIPLIER * iseed) % PMOD);
105 | 	pseed[i][0] = iseed;
106 | 	mult_n = (mult_n * MULTIPLIER) % PMOD;
107 |       }
108 | 
109 |    }
110 |    random_last = (unsigned long long) pseed[id][0];
111 | }
112 | 
113 | 


--------------------------------------------------------------------------------
/solutions/random_seq_lcg.c:
--------------------------------------------------------------------------------
 1 | 
 2 | //**********************************************************
 3 | // Pseudo random number generator:
 4 | //     double drandom
 5 | //     void seed (lower_limit, higher_limit)
 6 | //**********************************************************
 7 | //
 8 | // A simple linear congruential random number generator
 9 | // (Numerical Recipies chapter 7, 1st ed.) with parameters
10 | // from the table on page 198j.
11 | //
12 | //  Uses a linear congruential generator to return a value between
13 | //  0 and 1, then scales and shifts it to fill the desired range.  This
14 | //  range is set when the random number generator seed is called.
15 | // 
16 | // USAGE:
17 | //
18 | //      pseudo random sequence is seeded with a range
19 | //
20 | //            void seed(lower_limit, higher_limit)
21 | //   
22 | //      and then subsequent calls to the random number generator generates values
23 | //      in the sequence:
24 | //
25 | //            double random()
26 | //
27 | // History: 
28 | //      Written by Tim Mattson, 9/2007.
29 | 
30 | static long MULTIPLIER  = 1366;
31 | static long ADDEND      = 150889;
32 | static long PMOD        = 714025;
33 | long random_last = 0.0;
34 | double random_low, random_hi;
35 | 
36 | double drandom()
37 | {
38 |     long random_next;
39 |     double ret_val;
40 | 
41 | // 
42 | // compute an integer random number from zero to mod
43 | //
44 |     random_next = (MULTIPLIER  * random_last + ADDEND)% PMOD;
45 |     random_last = random_next;
46 | 
47 | //
48 | // shift into preset range
49 | //
50 |     ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low;
51 |     return ret_val;
52 | }
53 | //
54 | // set the seed and the range
55 | //
56 | void seed(double low_in, double hi_in)
57 | {
58 |    if(low_in < hi_in)
59 |    { 
60 |       random_low = low_in;
61 |       random_hi  = hi_in;
62 |    }
63 |    else
64 |    {
65 |       random_low = hi_in;
66 |       random_hi  = low_in;
67 |    }
68 |    random_last = PMOD/ADDEND;  // just pick something
69 | 
70 | }
71 | //**********************************************************
72 | // end of pseudo random generator code.
73 | //**********************************************************
74 | 
75 | 


--------------------------------------------------------------------------------
/win_intel.def:
--------------------------------------------------------------------------------
 1 | # for Intel compiler on windows
 2 | # copy to make.def
 3 | CC          = icl
 4 | CLINKER     = $(CC)
 5 | OPTFLAGS    = /Qopenmp
 6 | LIBS        =
 7 | PRE         =
 8 | 
 9 | CFLAGS	  = $(OPTFLAGS) 
10 | 
11 | OBJ=obj
12 | EXE=.exe
13 | RM=del
14 | 


--------------------------------------------------------------------------------