├── .DS_Store ├── 2DArray.h ├── README.md ├── README.txt ├── apple.def ├── hello.c ├── linked.c ├── linux_gnu.def ├── linux_intel.def ├── linux_pgi.def ├── make.def ├── makefile ├── mandel.c ├── matmul.c ├── matmul_recur.cpp ├── omp_hands_on.pdf ├── pi.c ├── pi_mc.c ├── prod_cons.c ├── random.c ├── random.h ├── solutions ├── hello_par.c ├── linked_omp25.c ├── linked_omp3_tasks.c ├── makefile ├── mandel_par.c ├── matmul_par.c ├── pi_loop.c ├── pi_mc.c ├── pi_mc_par.c ├── pi_spmd_final.c ├── pi_spmd_simple.c ├── prod_cons_par.c ├── random.h ├── random_par.c └── random_seq_lcg.c └── win_intel.def /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgmattso/OpenMP_intro_tutorial/56a6b0160e3f5c0a5711658436e3f9657ee57b09/.DS_Store -------------------------------------------------------------------------------- /2DArray.h: -------------------------------------------------------------------------------- 1 | template < typename T > 2 | T **Allocate2DArray( int nRows, int nCols) 3 | { 4 | //(step 1) allocate memory for array of elements of column 5 | T **ppi = (T **) malloc(sizeof(T *)*nRows); 6 | 7 | //(step 2) allocate memory for array of elements of each row 8 | T *curPtr = (T *) malloc(sizeof(T) * nRows * nCols); 9 | 10 | // Now point the pointers in the right place 11 | for( int i = 0; i < nRows; ++i) 12 | { 13 | *(ppi + i) = curPtr; 14 | curPtr += nCols; 15 | } 16 | return ppi; 17 | } 18 | 19 | template < typename T > 20 | void Free2DArray(T** Array) 21 | { 22 | free(*Array); 23 | free(Array); 24 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | This directory contains exercises and solutions for a hands-on 4 | OpenMP course. Information about these programs can be found 5 | in the comments and in the slides from the course. 6 | 7 | To use these programs, copy the appropriate "def" file into 8 | "make.def". For example on a linux system running the gnu 9 | compilers, I'd type 10 | 11 | cp linux_gnu.def make.def 12 | 13 | Then build the programs and test them 14 | 15 | make test 16 | 17 | The solutions directory uses the same make.def file so to build 18 | the solutions, just type "make test". The directory extras 19 | contains additional exercises for more advanced students. These 20 | have not been as carefully tested and may have problems building 21 | and running on some systems. 22 | 23 | We have tested these programs under Linux with the gnu and Intel compilers, 24 | and Windows 7 with the intel compiler. We also tested these 25 | on OS-X with the gnu environment loaded with Apple's xcode. Apple's 26 | OpenMP environment at this time does not support threadprivate 27 | variables so the pi_mc solutions will not build (and needs to 28 | be commented out from the makefile). 29 | 30 | We have used these programs with the PGI compiler (pgi.def) 31 | but we have not tested this case recently and it may need some work. 32 | 33 | For windows users, to run these on Windows 7 we used the following 34 | procedure. First go to the start menu, and select 35 | 36 | INtel parallel studio 2011/command prompt/ ia64 visual studio 2010 mode 37 | 38 | cd to the appropriate folder. Copy win_intel.def to make.def and then 39 | use nmake to build 40 | 41 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | This directory contains exercises and solutions for a hands-on 4 | OpenMP course. Information about these programs can be found 5 | in the comments and in the slides from the course. 6 | 7 | To use these programs, copy the appropriate "def" file into 8 | "make.def". For example on a linux system running the gnu 9 | compilers, I'd type 10 | 11 | cp linux_gnu.def make.def 12 | 13 | Then build the programs and test them 14 | 15 | make test 16 | 17 | The solutions directory uses the same make.def file so to build 18 | the solutions, just type "make test". The directory extras 19 | contains additional exercises for more advanced students. These 20 | have not been as carefully tested and may have problems building 21 | and running on some systems. 22 | 23 | We have tested these programs under Linux with the gnu and Intel compilers, 24 | and Windows 7 with the intel compiler. We also tested these 25 | on OS-X with the gnu environment loaded with Apple's xcode. Apple's 26 | OpenMP environment at this time does not support threadprivate 27 | variables so the pi_mc solutions will not build (and needs to 28 | be commented out from the makefile). 29 | 30 | We have used these programs with the PGI compiler (pgi.def) 31 | but we have not tested this case recently and it may need some work. 32 | 33 | For windows users, to run these on Windows 7 we used the following 34 | procedure. First go to the start menu, and select 35 | 36 | INtel parallel studio 2011/command prompt/ ia64 visual studio 2010 mode 37 | 38 | cd to the appropriate folder. Copy win_intel.def to make.def and then 39 | use nmake to build 40 | 41 | -------------------------------------------------------------------------------- /apple.def: -------------------------------------------------------------------------------- 1 | # for g++ compiler on Apple OS-X 2 | # copy to make.def 3 | CC = g++ 4 | CLINKER = $(CC) 5 | OPTFLAGS = -fopenmp -DAPPLE 6 | LIBS = -lm 7 | PRE = ./ 8 | 9 | CFLAGS = $(OPTFLAGS) 10 | 11 | OBJ=o 12 | EXE= 13 | RM=rm 14 | -------------------------------------------------------------------------------- /hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main () 3 | { 4 | printf("Hello World \n"); 5 | } 6 | -------------------------------------------------------------------------------- /linked.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #ifndef N 6 | #define N 5 7 | #endif 8 | #ifndef FS 9 | #define FS 38 10 | #endif 11 | 12 | struct node { 13 | int data; 14 | int fibdata; 15 | struct node* next; 16 | }; 17 | 18 | int fib(int n) { 19 | int x, y; 20 | if (n < 2) { 21 | return (n); 22 | } else { 23 | x = fib(n - 1); 24 | y = fib(n - 2); 25 | return (x + y); 26 | } 27 | } 28 | 29 | void processwork(struct node* p) 30 | { 31 | int n; 32 | n = p->data; 33 | p->fibdata = fib(n); 34 | } 35 | 36 | struct node* init_list(struct node* p) { 37 | int i; 38 | struct node* head = NULL; 39 | struct node* temp = NULL; 40 | 41 | head = (struct node*)malloc(sizeof(struct node)); 42 | p = head; 43 | p->data = FS; 44 | p->fibdata = 0; 45 | for (i=0; i< N; i++) { 46 | temp = (struct node*)malloc(sizeof(struct node)); 47 | p->next = temp; 48 | p = temp; 49 | p->data = FS + i + 1; 50 | p->fibdata = i+1; 51 | } 52 | p->next = NULL; 53 | return head; 54 | } 55 | 56 | int main(int argc, char *argv[]) { 57 | double start, end; 58 | struct node *p=NULL; 59 | struct node *temp=NULL; 60 | struct node *head=NULL; 61 | 62 | printf("Process linked list\n"); 63 | printf(" Each linked list node will be processed by function 'processwork()'\n"); 64 | printf(" Each ll node will compute %d fibonacci numbers beginning with %d\n",N,FS); 65 | 66 | p = init_list(p); 67 | head = p; 68 | 69 | start = omp_get_wtime(); 70 | { 71 | while (p != NULL) { 72 | processwork(p); 73 | p = p->next; 74 | } 75 | } 76 | 77 | end = omp_get_wtime(); 78 | p = head; 79 | while (p != NULL) { 80 | printf("%d : %d\n",p->data, p->fibdata); 81 | temp = p->next; 82 | free (p); 83 | p = temp; 84 | } 85 | free (p); 86 | 87 | printf("Compute Time: %f seconds\n", end - start); 88 | 89 | return 0; 90 | } 91 | 92 | -------------------------------------------------------------------------------- /linux_gnu.def: -------------------------------------------------------------------------------- 1 | # for g++ compiler on Linux 2 | # copy to make.def 3 | CC = g++ 4 | CLINKER = $(CC) 5 | OPTFLAGS = -fopenmp 6 | LIBS = -lm 7 | PRE = ./ 8 | 9 | CFLAGS = $(OPTFLAGS) 10 | 11 | OBJ=o 12 | EXE= 13 | RM=rm 14 | -------------------------------------------------------------------------------- /linux_intel.def: -------------------------------------------------------------------------------- 1 | # for intel compiler on linux (most exercises work with icc as well) 2 | # copy to make.def 3 | CC = icpc 4 | CPP = $(CC) 5 | OPTFLAGS = -openmp 6 | 7 | CLINKER = $(CC) 8 | LIBS = -lm 9 | 10 | CFLAGS = $(OPTFLAGS) 11 | PRE= ./ 12 | OBJ=o 13 | EXE= 14 | RM=rm 15 | 16 | -------------------------------------------------------------------------------- /linux_pgi.def: -------------------------------------------------------------------------------- 1 | CC = pgcc 2 | CLINKER = $(CC) 3 | OPTFLAGS = -mp -fast 4 | LIBS = 5 | 6 | CFLAGS = $(OPTFLAGS) 7 | 8 | OBJ=o 9 | EXE=.exe 10 | RM=rm 11 | -------------------------------------------------------------------------------- /make.def: -------------------------------------------------------------------------------- 1 | # for g++ compiler on Apple OS-X 2 | # copy to make.def 3 | CC = g++ 4 | CLINKER = $(CC) 5 | OPTFLAGS = -fopenmp -DAPPLE 6 | LIBS = -lm 7 | PRE = ./ 8 | 9 | CFLAGS = $(OPTFLAGS) 10 | 11 | OBJ=o 12 | EXE= 13 | RM=rm 14 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # 2 | 3 | include make.def 4 | 5 | EXES=hello$(EXE) pi$(EXE) matmul$(EXE) pi_mc$(EXE) prod_cons$(EXE) \ 6 | matmul_recur$(EXE) mandel$(EXE) linked$(EXE) 7 | 8 | all: $(EXES) 9 | 10 | hello$(EXE): hello.$(OBJ) 11 | $(CLINKER) $(OPTFLAGS) -o hello hello.$(OBJ) $(LIBS) 12 | 13 | pi$(EXE): pi.$(OBJ) 14 | $(CLINKER) $(OPTFLAGS) -o pi pi.$(OBJ) $(LIBS) 15 | 16 | pi_mc$(EXE): pi_mc.$(OBJ) random.$(OBJ) 17 | $(CLINKER) $(OPTFLAGS) -o pi_mc random.$(OBJ) pi_mc.$(OBJ) $(LIBS) 18 | 19 | matmul$(EXE): matmul.$(OBJ) 20 | $(CLINKER) $(OPTFLAGS) -o matmul matmul.$(OBJ) $(LIBS) 21 | 22 | prod_cons$(EXE): prod_cons.$(OBJ) 23 | $(CLINKER) $(OPTFLAGS) -o prod_cons prod_cons.$(OBJ) $(LIBS) 24 | 25 | matmul_recur$(EXE): matmul_recur.$(OBJ) 26 | $(CLINKER) $(OPTFLAGS) -o matmul_recur matmul_recur.$(OBJ) $(LIBS) 27 | 28 | mandel$(EXE): mandel.$(OBJ) 29 | $(CLINKER) $(OPTFLAGS) -o mandel mandel.$(OBJ) $(LIBS) 30 | 31 | linked$(EXE): linked.$(OBJ) 32 | $(CLINKER) $(OPTFLAGS) -o linked linked.$(OBJ) $(LIBS) 33 | 34 | test: $(EXES) 35 | $(PRE)hello$(EXE) 36 | $(PRE)pi$(EXE) 37 | $(PRE)matmul$(EXE) 38 | $(PRE)pi_mc$(EXE) 39 | $(PRE)prod_cons$(EXE) 40 | $(PRE)matmul_recur$(EXE) 41 | $(PRE)mandel$(EXE) 42 | $(PRE)linked$(EXE) 43 | 44 | clean: 45 | $(RM) $(EXES) *.$(OBJ) 46 | 47 | .SUFFIXES: 48 | .SUFFIXES: .c .cpp .$(OBJ) 49 | 50 | .c.$(OBJ): 51 | $(CC) $(CFLAGS) -c $< 52 | 53 | .cpp.$(OBJ): 54 | $(CC) $(CFLAGS) -c $< 55 | -------------------------------------------------------------------------------- /mandel.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** PROGRAM: Mandelbrot area 3 | ** 4 | ** PURPOSE: Program to compute the area of a Mandelbrot set. 5 | ** Correct answer should be around 1.510659. 6 | ** WARNING: this program may contain errors 7 | ** 8 | ** USAGE: Program runs without input ... just run the executable 9 | ** 10 | ** HISTORY: Written: (Mark Bull, August 2011). 11 | ** Changed "comples" to "d_comples" to avoid collsion with 12 | ** math.h complex type (Tim Mattson, September 2011) 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | # define NPOINTS 1000 21 | # define MAXITER 1000 22 | 23 | void testpoint(void); 24 | 25 | struct d_complex{ 26 | double r; 27 | double i; 28 | }; 29 | 30 | struct d_complex c; 31 | int numoutside = 0; 32 | 33 | int main(){ 34 | int i, j; 35 | double area, error, eps = 1.0e-5; 36 | 37 | 38 | // Loop over grid of points in the complex plane which contains the Mandelbrot set, 39 | // testing each point to see whether it is inside or outside the set. 40 | 41 | #pragma omp parallel for default(shared) private(c,eps) 42 | for (i=0; i 2 when point is known to be outside set 63 | // If loop count reaches MAXITER, point is considered to be inside the set 64 | 65 | struct d_complex z; 66 | int iter; 67 | double temp; 68 | 69 | z=c; 70 | for (iter=0; iter4.0) { 75 | numoutside++; 76 | break; 77 | } 78 | } 79 | 80 | } 81 | 82 | -------------------------------------------------------------------------------- /matmul.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** PROGRAM: Matrix Multiply 3 | ** 4 | ** PURPOSE: This is a simple matrix multiply program. 5 | ** It will compute the product 6 | ** 7 | ** C = A * B 8 | ** 9 | ** A and B are set to constant matrices so we 10 | ** can make a quick test of the multiplication. 11 | ** 12 | ** USAGE: Right now, I hardwire the martix dimensions. 13 | ** later, I'll take them from the command line. 14 | ** 15 | ** HISTORY: Written by Tim Mattson, Nov 1999. 16 | */ 17 | #ifdef APPLE 18 | #include 19 | #else 20 | #include 21 | #endif 22 | #include 23 | #include 24 | 25 | #define ORDER 1000 26 | #define AVAL 3.0 27 | #define BVAL 5.0 28 | #define TOL 0.001 29 | 30 | int main(int argc, char **argv) 31 | { 32 | int Ndim, Pdim, Mdim; /* A[N][P], B[P][M], C[N][M] */ 33 | int i,j,k; 34 | double *A, *B, *C, cval, tmp, err, errsq; 35 | double dN, mflops; 36 | double start_time, run_time; 37 | 38 | 39 | Ndim = ORDER; 40 | Pdim = ORDER; 41 | Mdim = ORDER; 42 | 43 | A = (double *)malloc(Ndim*Pdim*sizeof(double)); 44 | B = (double *)malloc(Pdim*Mdim*sizeof(double)); 45 | C = (double *)malloc(Ndim*Mdim*sizeof(double)); 46 | 47 | /* Initialize matrices */ 48 | 49 | for (i=0; i TOL) 95 | printf("\n Errors in multiplication: %f",errsq); 96 | else 97 | printf("\n Hey, it worked"); 98 | 99 | printf("\n all done \n"); 100 | } 101 | -------------------------------------------------------------------------------- /matmul_recur.cpp: -------------------------------------------------------------------------------- 1 | // Several versions of serial codes for matrix-matrix multiplication 2 | 3 | #include 4 | #include 5 | #include 6 | #include "2DArray.h" 7 | 8 | // define sizes of matrices to be used 9 | #define MM 1000 10 | #define NN 1000 11 | #define PP 1000 12 | 13 | double dabs(double d){return (d<0.0?d:(-d));} 14 | 15 | // Default triple-nested loop for matrix-matrix multiplication 16 | void matmult1(int m, int n, int p, double **A, double **B, double **C) 17 | { 18 | int i, j, k; 19 | 20 | for (i = 0; i < m; i++) 21 | for (j = 0; j < n; j++){ 22 | C[i][j]=0; 23 | for (k = 0; k < p; k++) 24 | C[i][j] += A[i][k]*B[k][j]; 25 | } 26 | } 27 | 28 | 29 | /* 30 | Recursive code for matrix multiplication. 31 | The recursion uses the formula 32 | C00 = A00*B00 + A01*B10 33 | C01 = A00*B01 + B01*B11 34 | C10 = A10*B00 + A11*B10 35 | C11 = A10*B01 + A11*B11 36 | */ 37 | 38 | void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C) 39 | /* 40 | subroutine that uses the simple triple loop to multiply 41 | a submatrix from A with a submatrix from B and store the 42 | result in a submatrix of C. 43 | (We could use a tiled version,for better performance) 44 | */ 45 | // mf, ml; /* first and last+1 i index */ 46 | // nf, nl; /* first and last+1 j index */ 47 | // pf, pl; /* first and last+1 k index */ 48 | { 49 | int i,j,k; 50 | for (i = mf; i < ml; i++) 51 | for (j = nf; j < nl; j++) 52 | for (k = pf; k < pl; k++) 53 | C[i][j] += A[i][k]*B[k][j]; 54 | } 55 | 56 | #define GRAIN 32768 /* product size below which matmultleaf is used */ 57 | 58 | void matmultrec(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C) 59 | /* 60 | recursive subroutine to compute the product of two 61 | submatrices of A and B and store the result in C 62 | */ 63 | // mf, ml; /* first and last+1 i index */ 64 | // nf, nl; /* first and last+1 j index */ 65 | // pf, pl; /* first and last+1 k index */ 66 | 67 | { 68 | // 69 | // Check sizes of matrices; 70 | // if below threshold then compute product w/o recursion 71 | // 72 | if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN) 73 | matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C); 74 | else { 75 | // 76 | // Apply OpenMP tasks to the eight recursive calls below 77 | // be sure to not create data races between tasks 78 | // 79 | // C00 += A00 * B00 80 | matmultrec(mf, mf+(ml-mf)/2, nf, nf+(nl-nf)/2, pf, pf+(pl-pf)/2, A, B, C); 81 | // C01 += A00 * B01 82 | matmultrec(mf, mf+(ml-mf)/2, nf+(nl-nf)/2, nl, pf, pf+(pl-pf)/2, A, B, C); 83 | // C00 += A01 * B10 84 | matmultrec(mf, mf+(ml-mf)/2, nf, nf+(nl-nf)/2, pf+(pl-pf)/2, pl, A, B, C); 85 | // C01 += A01 * B11 86 | matmultrec(mf, mf+(ml-mf)/2, nf+(nl-nf)/2, nl, pf+(pl-pf)/2, pl, A, B, C); 87 | // C10 += A10 * B00 88 | matmultrec(mf+(ml-mf)/2, ml, nf, nf+(nl-nf)/2, pf, pf+(pl-pf)/2, A, B, C); 89 | // C11 += A10 * B01 90 | matmultrec(mf+(ml-mf)/2, ml, nf+(nl-nf)/2, nl, pf, pf+(pl-pf)/2, A, B, C); 91 | // C10 += A11 * B10 92 | matmultrec(mf+(ml-mf)/2, ml, nf, nf+(nl-nf)/2, pf+(pl-pf)/2, pl, A, B, C); 93 | // C11 += A11 * B11 94 | matmultrec(mf+(ml-mf)/2, ml, nf+(nl-nf)/2, nl, pf+(pl-pf)/2, pl, A, B, C); 95 | } 96 | } 97 | 98 | 99 | // 100 | // "Helper" function to intialize C and start recursive routine 101 | // 102 | void matmultr(int m, int n, int p, double **A, double **B, double **C) 103 | { 104 | int i,j; 105 | 106 | for (i = 0; i < m; i++) 107 | for (j=0; j < n; j++) 108 | C[i][j] = 0; 109 | 110 | matmultrec(0, m, 0, n, 0, p, A, B, C); 111 | } 112 | 113 | int CheckResults(int m, int n, double **C, double **C1) 114 | { 115 | #define ERR_THRESHOLD 0.001 116 | int code = 0; 117 | // 118 | // May need to take into consideration the floating point roundoff error 119 | // due to parallel execution 120 | // 121 | for (int i = 0; i < m; i++) { 122 | for (int j = 0; j < n; j++) { 123 | if (dabs(C[i][j] - C1[i][j]) > ERR_THRESHOLD ) { 124 | printf("%f %f at [%d][%d]\n", C[i][j], C1[i][j], i, j); 125 | code = 1; 126 | } 127 | } 128 | } 129 | return code; 130 | } 131 | 132 | 133 | int main(int argc, char* argv[]) 134 | { 135 | int i, j; 136 | double start, time1, time2; 137 | 138 | int M = MM; 139 | int N = NN; 140 | int P = PP; 141 | 142 | // 143 | // If 3 values on command line, use those for matrix sizes 144 | // 145 | if (argc != 4) { 146 | printf("Suggested Usage: %s

\n", argv[0]); 147 | printf("Using default values\n"); 148 | } 149 | else { 150 | M = atoi(argv[1]); 151 | N = atoi(argv[2]); 152 | P = atoi(argv[3]); 153 | } 154 | 155 | double **A = Allocate2DArray< double >(M, P); 156 | double **B = Allocate2DArray< double >(P, N); 157 | 158 | double **C1 = Allocate2DArray< double >(M, N); 159 | double **C4 = Allocate2DArray< double >(M, N); 160 | 161 | // 162 | // Initialize with random values 163 | // 164 | for (i = 0; i < M; i++) { 165 | for (j = 0; j < P; j++) { 166 | A[i][j] = (double)(rand()%100) / 10.0; 167 | } 168 | } 169 | 170 | for (i = 0; i < P; i++) { 171 | for (j = 0; j < N; j++) { 172 | B[i][j] = (double)(rand()%100) / 10.0; 173 | } 174 | } 175 | 176 | printf("Matrix Dimensions: M = %d P = %d N = %d\n\n", M, P, N); 177 | printf("Execute matmult1\n"); 178 | start = omp_get_wtime(); 179 | matmult1(M, N, P, A, B, C1); 180 | time1 = omp_get_wtime() - start; 181 | printf("Time = %f seconds\n\n",time1); 182 | 183 | printf("Execute matmultr\n"); 184 | start = omp_get_wtime(); 185 | matmultr(M, N, P, A, B, C4); 186 | time2 = omp_get_wtime() - start; 187 | printf("Time = %f seconds\n\n",time2); 188 | 189 | printf("Checking..."); 190 | if (CheckResults(M, N, C1, C4)) 191 | printf("Error in Recursive Matrix Multiplication\n\n"); 192 | else { 193 | printf("OKAY\n\n"); 194 | printf("Speedup = %5.1fX\n", time1/time2); 195 | } 196 | 197 | 198 | Free2DArray< double >(A); 199 | Free2DArray< double >(B); 200 | Free2DArray< double >(C1); 201 | Free2DArray< double >(C4); 202 | 203 | return 0; 204 | } 205 | -------------------------------------------------------------------------------- /omp_hands_on.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tgmattso/OpenMP_intro_tutorial/56a6b0160e3f5c0a5711658436e3f9657ee57b09/omp_hands_on.pdf -------------------------------------------------------------------------------- /pi.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | This program will numerically compute the integral of 4 | 5 | 4/(1+x*x) 6 | 7 | from 0 to 1. The value of this integral is pi -- which 8 | is great since it gives us an easy way to check the answer. 9 | 10 | The is the original sequential program. It uses the timer 11 | from the OpenMP runtime library 12 | 13 | History: Written by Tim Mattson, 11/99. 14 | 15 | */ 16 | #include 17 | #include 18 | static long num_steps = 100000000; 19 | double step; 20 | int main () 21 | { 22 | int i; 23 | double x, pi, sum = 0.0; 24 | double start_time, run_time; 25 | 26 | step = 1.0/(double) num_steps; 27 | 28 | 29 | start_time = omp_get_wtime(); 30 | 31 | for (i=1;i<= num_steps; i++){ 32 | x = (i-0.5)*step; 33 | sum = sum + 4.0/(1.0+x*x); 34 | } 35 | 36 | pi = step * sum; 37 | run_time = omp_get_wtime() - start_time; 38 | printf("\n pi with %ld steps is %lf in %lf seconds\n ",num_steps,pi,run_time); 39 | } 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /pi_mc.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | NAME: 4 | Pi_mc: PI Monte Carlo 5 | 6 | Purpose: 7 | This program uses a Monte Carlo algorithm to compute PI as an 8 | example of how random number generators are used to solve problems. 9 | Note that if your goal is to find digits of pi, there are much 10 | better algorithms you could use. 11 | 12 | Usage: 13 | To keep the program as simple as possible, you must edit the file 14 | and change the value of num_trials to change the number of samples 15 | used. Then compile and run the program. 16 | 17 | Algorithm: 18 | The basic idea behind the algorithm is easy to visualize. Draw a 19 | square on a wall. Inside the square, draw a circle. Now randomly throw 20 | darts at the wall. some darts will land inside the square. Of those, 21 | some will fall inside the circle. The probability of landing inside 22 | the circle or the square is proportional to their areas. 23 | 24 | We can use a random number generator to "throw the darts" and count 25 | how many "darts" fall inside the square and how many inside the 26 | cicle. Dividing these two numbers gives us the ratio of their areas 27 | and from that we can compute pi. 28 | 29 | Algorithm details: 30 | To turn this into code, I need a bit more detail. Assume the circle 31 | is centered inside the square. the circle will have a radius of r and 32 | each side of the square will be of area 2*r (i.e. the diameter of the 33 | circle). 34 | 35 | A(circle) = pi * r^2 36 | A(square) = (2*r)*(2*r) = 4*r^2 37 | 38 | ratio = A(circle)/A(square) = pi/4 39 | 40 | Since the probability (P) of a dart falling inside a figure (i.e. the square 41 | or the circle) is proportional to the area, we have 42 | 43 | ratio = P(circle)/P(square) = pi/4 44 | 45 | If I throw N darts as computed by random numbers evenly distributed 46 | over the area of the square 47 | 48 | P(sqaure) = N/N .... i.e. every dart lands in the square 49 | P(circle) = N(circle)/N 50 | 51 | ratio = (N(circle)/N)/(N/N) = N(circle)/N 52 | 53 | Hence, to find the area, I compute N random "darts" and count how many fall 54 | inside the circle. The equation for a circle is 55 | 56 | x^2 + y^2 = r^2 57 | 58 | So I randomly compute "x" and "y" evenly distributed from -r to r and 59 | count the "dart" as falling inside the cicle if 60 | 61 | x^2 + y^2 < or = r 62 | 63 | Results: 64 | Remember, our goal is to demonstrate a simple monte carlo algorithm, 65 | not compute pi. But just for the record, here are some results (Intel compiler 66 | version 10.0, Windows XP, core duo laptop) 67 | 68 | 100 3.160000 69 | 1000 3.148000 70 | 10000 3.154000 71 | 100000 3.139920 72 | 1000000 3.141456 73 | 10000000 3.141590 74 | 100000000 3.141581 75 | 76 | As a point of reference, the first 7 digits of the true value of pi 77 | is 3.141592 78 | 79 | 80 | History: 81 | Written by Tim Mattson, 9/2007. 82 | 83 | */ 84 | #include 85 | #include 86 | #include "random.h" 87 | 88 | // 89 | // The monte carlo pi program 90 | // 91 | 92 | static long num_trials = 10000; 93 | 94 | int main () 95 | { 96 | long i; long Ncirc = 0; 97 | double pi, x, y, test; 98 | double r = 1.0; // radius of circle. Side of squrare is 2*r 99 | 100 | seed(-r, r); // The circle and square are centered at the origin 101 | #pragma omp parallel for private(x,y,test) reduction(+:Ncirc) 102 | for(i=0;i 10 | #ifdef APPLE 11 | #include 12 | #else 13 | #include 14 | #endif 15 | #include 16 | 17 | #define N 10000 18 | 19 | /* Some random number constants from numerical recipies */ 20 | #define SEED 2531 21 | #define RAND_MULT 1366 22 | #define RAND_ADD 150889 23 | #define RAND_MOD 714025 24 | int randy = SEED; 25 | 26 | /* function to fill an array with random numbers */ 27 | void fill_rand(int length, double *a) 28 | { 29 | int i; 30 | for (i=0;i 2 | #include 3 | 4 | int main () 5 | { 6 | int nthreads = 4; 7 | omp_set_num_threads(nthreads); 8 | 9 | #pragma omp parallel 10 | { 11 | int id = omp_get_thread_num(); 12 | 13 | printf("Hello World from thread = %d", id); 14 | printf(" with %d threads\n",omp_get_num_threads()); 15 | } 16 | 17 | printf("all done, with hopefully %d threads\n",nthreads); 18 | 19 | } 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /solutions/linked_omp25.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "omp.h" 4 | 5 | #define N 5 6 | #define FS 38 7 | #define NMAX 10 8 | 9 | struct node { 10 | int data; 11 | int fibdata; 12 | struct node* next; 13 | }; 14 | 15 | int fib(int n) { 16 | int x, y; 17 | if (n < 2) { 18 | return (n); 19 | } else { 20 | x = fib(n - 1); 21 | y = fib(n - 2); 22 | return (x + y); 23 | } 24 | } 25 | 26 | void processwork(struct node* p) 27 | { 28 | int n; 29 | n = p->data; 30 | p->fibdata = fib(n); 31 | } 32 | 33 | struct node* init_list(struct node* p) { 34 | int i; 35 | struct node* head = NULL; 36 | struct node* temp = NULL; 37 | 38 | head = malloc(sizeof(struct node)); 39 | p = head; 40 | p->data = FS; 41 | p->fibdata = 0; 42 | for (i=0; i< N; i++) { 43 | temp = malloc(sizeof(struct node)); 44 | p->next = temp; 45 | p = temp; 46 | p->data = FS + i + 1; 47 | p->fibdata = i+1; 48 | } 49 | p->next = NULL; 50 | return head; 51 | } 52 | 53 | int main(int argc, char *argv[]) { 54 | double start, end; 55 | struct node *p=NULL; 56 | struct node *temp=NULL; 57 | struct node *head=NULL; 58 | struct node *parr[NMAX]; 59 | int i, count=0; 60 | 61 | printf("Process linked list\n"); 62 | printf(" Each linked list node will be processed by function 'processwork()'\n"); 63 | printf(" Each ll node will compute %d fibonacci numbers beginning with %d\n",N,FS); 64 | 65 | p = init_list(p); 66 | head = p; 67 | 68 | 69 | start = omp_get_wtime(); 70 | { 71 | while (p != NULL) { 72 | processwork(p); 73 | p = p->next; 74 | } 75 | } 76 | 77 | end = omp_get_wtime(); 78 | 79 | printf("serial Compute Time: %f seconds\n", end - start); 80 | 81 | 82 | p = head; 83 | 84 | start = omp_get_wtime(); 85 | { 86 | // count number of items in the list. Strictly speaking this isn't 87 | // needed since we know there are N elements in the list. But in 88 | // most cases you don't know this and need to count nodes. 89 | while (p != NULL) { 90 | p = p->next; 91 | count++; 92 | } 93 | 94 | // traverse the list and collect pointers into an array. 95 | p = head; 96 | for(i=0; inext; 99 | } 100 | 101 | // do the work in parallel 102 | #pragma omp parallel 103 | { 104 | #pragma omp single 105 | printf(" %d threads \n",omp_get_num_threads()); 106 | #pragma omp for schedule(static,1) 107 | for(i=0; idata, p->fibdata); 116 | temp = p->next; 117 | free (p); 118 | p = temp; 119 | } 120 | free (p); 121 | 122 | printf("Compute Time: %f seconds\n", end - start); 123 | 124 | return 0; 125 | } 126 | 127 | -------------------------------------------------------------------------------- /solutions/linked_omp3_tasks.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | #ifndef N 7 | #define N 5 8 | #endif 9 | #ifndef FS 10 | #define FS 38 11 | #endif 12 | 13 | struct node { 14 | int data; 15 | int fibdata; 16 | struct node* next; 17 | }; 18 | 19 | struct node* init_list(struct node* p); 20 | void processwork(struct node* p); 21 | int fib(int n); 22 | 23 | int fib(int n) 24 | { 25 | int x, y; 26 | if (n < 2) { 27 | return (n); 28 | } else { 29 | x = fib(n - 1); 30 | y = fib(n - 2); 31 | return (x + y); 32 | } 33 | } 34 | 35 | void processwork(struct node* p) 36 | { 37 | int n, temp; 38 | n = p->data; 39 | temp = fib(n); 40 | 41 | p->fibdata = temp; 42 | 43 | } 44 | 45 | struct node* init_list(struct node* p) 46 | { 47 | int i; 48 | struct node* head = NULL; 49 | struct node* temp = NULL; 50 | 51 | head = malloc(sizeof(struct node)); 52 | p = head; 53 | p->data = FS; 54 | p->fibdata = 0; 55 | for (i=0; i< N; i++) { 56 | temp = malloc(sizeof(struct node)); 57 | p->next = temp; 58 | p = temp; 59 | p->data = FS + i + 1; 60 | p->fibdata = i+1; 61 | } 62 | p->next = NULL; 63 | return head; 64 | } 65 | 66 | int main() 67 | { 68 | double start, end; 69 | struct node *p=NULL; 70 | struct node *temp=NULL; 71 | struct node *head=NULL; 72 | 73 | printf("Process linked list\n"); 74 | printf(" Each linked list node will be processed by function 'processwork()'\n"); 75 | printf(" Each ll node will compute %d fibonacci numbers beginning with %d\n",N,FS); 76 | 77 | p = init_list(p); 78 | head = p; 79 | 80 | start = omp_get_wtime(); 81 | 82 | #pragma omp parallel 83 | { 84 | #pragma omp master 85 | printf("Threads: %d\n", omp_get_num_threads()); 86 | 87 | #pragma omp single 88 | { 89 | p=head; 90 | while (p) { 91 | #pragma omp task firstprivate(p) //first private is required 92 | { 93 | processwork(p); 94 | } 95 | p = p->next; 96 | } 97 | } 98 | } 99 | 100 | end = omp_get_wtime(); 101 | p = head; 102 | while (p != NULL) { 103 | printf("%d : %d\n",p->data, p->fibdata); 104 | temp = p->next; 105 | free (p); 106 | p = temp; 107 | } 108 | free (p); 109 | 110 | printf("Compute Time: %f seconds\n", end - start); 111 | 112 | return 0; 113 | } 114 | 115 | -------------------------------------------------------------------------------- /solutions/makefile: -------------------------------------------------------------------------------- 1 | # 2 | include ../make.def 3 | 4 | EXES=hello_par$(EXE) pi_spmd_simple$(EXE) pi_spmd_final$(EXE) \ 5 | pi_loop$(EXE) matmul_par$(EXE) \ 6 | prod_cons_par$(EXE) mandel_par$(EXE) pi_mc$(EXE) \ 7 | pi_mc_par$(EXE) linked_omp25$(EXE) linked_omp3_tasks$(EXE) 8 | 9 | all: $(EXES) 10 | 11 | 12 | hello_par$(EXE): hello_par.$(OBJ) 13 | $(CLINKER) $(OPTFLAGS) -o hello_par hello_par.$(OBJ) $(LIBS) 14 | 15 | pi_spmd_simple$(EXE): pi_spmd_simple.$(OBJ) 16 | $(CLINKER) $(OPTFLAGS) -o pi_spmd_simple pi_spmd_simple.$(OBJ) $(LIBS) 17 | 18 | pi_spmd_final$(EXE): pi_spmd_final.$(OBJ) 19 | $(CLINKER) $(OPTFLAGS) -o pi_spmd_final pi_spmd_final.$(OBJ) $(LIBS) 20 | 21 | pi_loop$(EXE): pi_loop.$(OBJ) 22 | $(CLINKER) $(OPTFLAGS) -o pi_loop pi_loop.$(OBJ) $(LIBS) 23 | 24 | pi_mc$(EXE): pi_mc.$(OBJ) 25 | $(CLINKER) $(OPTFLAGS) -o pi_mc pi_mc.$(OBJ) $(LIBS) 26 | 27 | pi_mc_par$(EXE): pi_mc_par.$(OBJ) random_par.$(OBJ) 28 | $(CLINKER) $(OPTFLAGS) -o pi_mc_par pi_mc_par.$(OBJ) random_par.$(OBJ) $(LIBS) 29 | 30 | matmul_par$(EXE): matmul_par.$(OBJ) 31 | $(CLINKER) $(OPTFLAGS) -o matmul_par matmul_par.$(OBJ) $(LIBS) 32 | 33 | prod_cons_par$(EXE): prod_cons_par.$(OBJ) 34 | $(CLINKER) $(OPTFLAGS) -o prod_cons_par prod_cons_par.$(OBJ) $(LIBS) 35 | 36 | mandel_par$(EXE): mandel_par.$(OBJ) 37 | $(CLINKER) $(OPTFLAGS) -o mandel_par mandel_par.$(OBJ) $(LIBS) 38 | 39 | linked_omp25$(EXE): linked_omp25.$(OBJ) 40 | $(CLINKER) $(OPTFLAGS) -o linked_omp25 linked_omp25.$(OBJ) $(LIBS) 41 | 42 | linked_omp3_tasks$(EXE): linked_omp3_tasks.$(OBJ) 43 | $(CLINKER) $(OPTFLAGS) -o linked_omp3_tasks linked_omp3_tasks.$(OBJ) $(LIBS) 44 | 45 | test: $(EXES) 46 | $(PRE)hello_par$(EXE) 47 | $(PRE)pi_spmd_simple$(EXE) 48 | $(PRE)pi_spmd_final$(EXE) 49 | $(PRE)pi_loop$(EXE) 50 | $(PRE)matmul_par$(EXE) 51 | $(PRE)prod_cons_par$(EXE) 52 | $(PRE)mandel_par$(EXE) 53 | $(PRE)pi_mc$(EXE) 54 | $(PRE)pi_mc_par$(EXE) 55 | $(PRE)linked_omp25$(EXE) 56 | $(PRE)linked_omp3_tasks$(EXE) 57 | 58 | clean: 59 | $(RM) $(EXES) *.$(OBJ) 60 | 61 | .SUFFIXES: 62 | .SUFFIXES: .c .cpp .$(OBJ) 63 | 64 | .c.$(OBJ): 65 | $(CC) $(CFLAGS) -c $< 66 | 67 | .cpp.$(OBJ): 68 | $(CC) $(CFLAGS) -c $< 69 | -------------------------------------------------------------------------------- /solutions/mandel_par.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** PROGRAM: Mandelbrot area (solution) 3 | ** 4 | ** PURPOSE: Program to compute the area of a Mandelbrot set. 5 | ** The correct answer should be around 1.510659. 6 | ** 7 | ** USAGE: Program runs without input ... just run the executable 8 | ** 9 | ** ADDITIONAL EXERCISES: Experiment with the schedule clause to fix 10 | ** the load imbalance. Experiment with atomic vs. critical vs. 11 | ** reduction for numoutside. 12 | ** 13 | ** HISTORY: Written: (Mark Bull, August 2011). 14 | ** 15 | ** Changed "comples" to "d_comples" to avoid collsion with 16 | ** math.h complex type. Fixed data environment errors 17 | ** (Tim Mattson, September 2011) 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | # define NPOINTS 1000 26 | # define MAXITER 1000 27 | 28 | struct d_complex{ 29 | double r; 30 | double i; 31 | }; 32 | 33 | void testpoint(struct d_complex); 34 | 35 | struct d_complex c; 36 | int numoutside = 0; 37 | 38 | int main(){ 39 | int i, j; 40 | double area, error, eps = 1.0e-5; 41 | 42 | 43 | // Loop over grid of points in the complex plane which contains the Mandelbrot set, 44 | // testing each point to see whether it is inside or outside the set. 45 | omp_set_num_threads(4); 46 | #pragma omp parallel for default(shared) firstprivate(eps) private(c, j) 47 | for (i=0; i 2 when point is known to be outside set 67 | // If loop count reaches MAXITER, point is considered to be inside the set 68 | 69 | struct d_complex z; 70 | int iter; 71 | double temp; 72 | 73 | z=c; 74 | for (iter=0; iter4.0) { 79 | #pragma omp atomic 80 | numoutside++; 81 | break; 82 | } 83 | } 84 | 85 | } 86 | 87 | -------------------------------------------------------------------------------- /solutions/matmul_par.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** PROGRAM: Parallel Matrix Multiply (using OpenMP) 3 | ** 4 | ** PURPOSE: This is a simple matrix multiply program. 5 | ** It will compute the product 6 | ** 7 | ** C = A * B 8 | ** 9 | ** A and B are set to constant matrices so we 10 | ** can make a quick test of the multiplication. 11 | ** 12 | ** USAGE: Right now, I hardwire the martix dimensions. 13 | ** later, I'll take them from the command line. 14 | ** 15 | ** HISTORY: Written by Tim Mattson, Nov 1999. 16 | */ 17 | #ifdef APPLE 18 | #include 19 | #else 20 | #include 21 | #endif 22 | #include 23 | #include 24 | 25 | #define ORDER 1000 26 | #define AVAL 3.0 27 | #define BVAL 5.0 28 | #define TOL 0.001 29 | 30 | int main(int argc, char *argv[]) 31 | { 32 | int Ndim, Pdim, Mdim; /* A[N][P], B[P][M], C[N][M] */ 33 | int i,j,k; 34 | 35 | double *A, *B, *C, cval, tmp, err, errsq; 36 | double dN, mflops; 37 | double start_time, run_time; 38 | 39 | 40 | Ndim = ORDER; 41 | Pdim = ORDER; 42 | Mdim = ORDER; 43 | 44 | A = (double *)malloc(Ndim*Pdim*sizeof(double)); 45 | B = (double *)malloc(Pdim*Mdim*sizeof(double)); 46 | C = (double *)malloc(Ndim*Mdim*sizeof(double)); 47 | 48 | /* Initialize matrices */ 49 | 50 | for (i=0; i TOL) 100 | printf("\n Errors in multiplication: %f",errsq); 101 | else 102 | printf("\n Hey, it worked"); 103 | 104 | printf("\n all done \n"); 105 | } 106 | -------------------------------------------------------------------------------- /solutions/pi_loop.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | This program will numerically compute the integral of 4 | 5 | 4/(1+x*x) 6 | 7 | from 0 to 1. The value of this integral is pi -- which 8 | is great since it gives us an easy way to check the answer. 9 | 10 | The program was parallelized using OpenMP by adding just 11 | four lines 12 | 13 | (1) A line to include omp.h -- the include file that 14 | contains OpenMP's function prototypes and constants. 15 | 16 | (2) A pragma that tells OpenMP to create a team of threads 17 | 18 | (3) A pragma to cause one of the threads to print the 19 | number of threads being used by the program. 20 | 21 | (4) A pragma to split up loop iterations among the team 22 | of threads. This pragma includes 2 clauses to (1) create a 23 | private variable and (2) to cause the threads to compute their 24 | sums locally and then combine their local sums into a 25 | single global value. 26 | 27 | History: Written by Tim Mattson, 11/99. 28 | 29 | */ 30 | #include 31 | #include 32 | static long num_steps = 100000000; 33 | double step; 34 | int main () 35 | { 36 | int i; 37 | double x, pi, sum = 0.0; 38 | double start_time, run_time; 39 | 40 | step = 1.0/(double) num_steps; 41 | for (i=1;i<=4;i++){ 42 | sum = 0.0; 43 | omp_set_num_threads(i); 44 | start_time = omp_get_wtime(); 45 | #pragma omp parallel 46 | { 47 | #pragma omp single 48 | printf(" num_threads = %d",omp_get_num_threads()); 49 | 50 | #pragma omp for reduction(+:sum) 51 | for (i=1;i<= num_steps; i++){ 52 | x = (i-0.5)*step; 53 | sum = sum + 4.0/(1.0+x*x); 54 | } 55 | } 56 | pi = step * sum; 57 | run_time = omp_get_wtime() - start_time; 58 | printf("\n pi is %f in %f seconds and %d threads\n",pi,run_time,i); 59 | } 60 | } 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /solutions/pi_mc.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | NAME: 4 | Pi_mc: PI Monte Carlo 5 | 6 | Purpose: 7 | This program uses a Monte Carlo algorithm to compute PI as an 8 | example of how random number generators are used to solve problems. 9 | Note that if your goal is to find digits of pi, there are much 10 | better algorithms you could use. 11 | 12 | Usage: 13 | To keep the program as simple as possible, you must edit the file 14 | and change the value of num_trials to change the number of samples 15 | used. Then compile and run the program. 16 | 17 | Algorithm: 18 | The basic idea behind the algorithm is easy to visualize. Draw a 19 | square on a wall. Inside the square, draw a circle. Now randomly throw 20 | darts at the wall. some darts will land inside the square. Of those, 21 | some will fall inside the circle. The probability of landing inside 22 | the circle or the square is proportional to their areas. 23 | 24 | We can use a random number generator to "throw the darts" and count 25 | how many "darts" fall inside the square and how many inside the 26 | cicle. Dividing these two numbers gives us the ratio of their areas 27 | and from that we can compute pi. 28 | 29 | Algorithm details: 30 | To turn this into code, I need a bit more detail. Assume the circle 31 | is centered inside the square. the circle will have a radius of r and 32 | each side of the square will be of area 2*r (i.e. the diameter of the 33 | circle). 34 | 35 | A(circle) = pi * r^2 36 | A(square) = (2*r)*(2*r) = 4*r^2 37 | 38 | ratio = A(circle)/A(square) = pi/4 39 | 40 | Since the probability (P) of a dart falling inside a figure (i.e. the square 41 | or the circle) is proportional to the area, we have 42 | 43 | ratio = P(circle)/P(square) = pi/4 44 | 45 | If I throw N darts as computed by random numbers evenly distributed 46 | over the area of the square 47 | 48 | P(sqaure) = N/N .... i.e. every dart lands in the square 49 | P(circle) = N(circle)/N 50 | 51 | ratio = (N(circle)/N)/(N/N) = N(circle)/N 52 | 53 | Hence, to find the area, I compute N random "darts" and count how many fall 54 | inside the circle. The equation for a circle is 55 | 56 | x^2 + y^2 = r^2 57 | 58 | So I randomly compute "x" and "y" evenly distributed from -r to r and 59 | count the "dart" as falling inside the cicle if 60 | 61 | x^2 + y^2 < or = r 62 | 63 | Supporting functions: 64 | For maximum portability, this file includes a very simple random number 65 | generator. This is not a high quality generator and should not be used 66 | for serious work. 67 | 68 | The Generator is a linear congruential generator with constants selected 69 | to yield decent results for sequences with fewer than 2^28 numbers. The 70 | pseudo random sequence is seeded with a range 71 | 72 | void seed(lower_limit, higher_limit) 73 | 74 | and then subsequent calls to the random number generator generates values 75 | in the sequence: 76 | 77 | double random() 78 | 79 | Results: 80 | Remember, our goal is to demonstrate a simple monte carlo algorithm, 81 | not compute pi. But just for the record, here are some results (Intel compiler 82 | version 10.0, Windows XP, core duo laptop) 83 | 84 | 100 3.160000 85 | 1000 3.148000 86 | 10000 3.154000 87 | 100000 3.139920 88 | 1000000 3.141456 89 | 10000000 3.141590 90 | 100000000 3.141581 91 | 92 | As a point of reference, the first 7 digits of the true value of pi 93 | is 3.141592 94 | 95 | 96 | History: 97 | Written by Tim Mattson, 9/2007. 98 | 99 | */ 100 | #include 101 | #include 102 | 103 | //********************************************************** 104 | // Pseudo random number generator: 105 | // double random 106 | // void seed (lower_limit, higher_limit) 107 | //********************************************************** 108 | // 109 | // A simple linear congruential random number generator 110 | // (Numerical Recipies chapter 7, 1st ed.) with parameters 111 | // from the table on page 198j. 112 | // 113 | // Uses a linear congruential generator to return a value between 114 | // 0 and 1, then scales and shifts it to fill the desired range. This 115 | // range is set when the random number generator seed is called. 116 | // 117 | static long MULTIPLIER = 1366; 118 | static long ADDEND = 150889; 119 | static long PMOD = 714025; 120 | long random_last = 0; 121 | double random_low, random_hi; 122 | 123 | double drandom() 124 | { 125 | long random_next; 126 | double ret_val; 127 | 128 | // 129 | // compute an integer random number from zero to mod 130 | // 131 | random_next = (MULTIPLIER * random_last + ADDEND)% PMOD; 132 | random_last = random_next; 133 | 134 | // 135 | // shift into preset range 136 | // 137 | ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low; 138 | return ret_val; 139 | } 140 | // 141 | // set the seed and the range 142 | // 143 | void seed(double low_in, double hi_in) 144 | { 145 | if(low_in < hi_in) 146 | { 147 | random_low = low_in; 148 | random_hi = hi_in; 149 | } 150 | else 151 | { 152 | random_low = hi_in; 153 | random_hi = low_in; 154 | } 155 | random_last = PMOD/ADDEND; // just pick something 156 | 157 | } 158 | //********************************************************** 159 | // end of pseudo random generator code. 160 | //********************************************************** 161 | 162 | // 163 | // The monte carlo pi program 164 | // 165 | 166 | static long num_trials = 100000; 167 | 168 | int main () 169 | { 170 | long i; long Ncirc = 0; 171 | double pi, x, y, test; 172 | double r = 1.0; // radius of circle. Side of squrare is 2*r 173 | 174 | seed(-r, r); // The circle and square are centered at the origin 175 | 176 | for(i=0;i 3 | #include 4 | #include "random.h" 5 | 6 | 7 | static long num_trials = 1000000; 8 | 9 | int main () 10 | { 11 | long i; long Ncirc = 0; 12 | double pi, x, y, test, time; 13 | double r = 1.0; // radius of circle. Side of squrare is 2*r 14 | 15 | time = omp_get_wtime(); 16 | #pragma omp parallel 17 | { 18 | 19 | #pragma omp single 20 | printf(" %d threads ",omp_get_num_threads()); 21 | 22 | seed(-r, r); 23 | #pragma omp for reduction(+:Ncirc) private(x,y,test) 24 | for(i=0;i 46 | #include 47 | 48 | #define MAX_THREADS 4 49 | 50 | static long num_steps = 100000000; 51 | double step; 52 | int main () 53 | { 54 | int i,j; 55 | double pi, full_sum = 0.0; 56 | double start_time, run_time; 57 | double sum[MAX_THREADS]; 58 | 59 | step = 1.0/(double) num_steps; 60 | 61 | 62 | for(j=1;j<=MAX_THREADS ;j++){ 63 | omp_set_num_threads(j); 64 | full_sum = 0.0; 65 | start_time = omp_get_wtime(); 66 | #pragma omp parallel private(i) 67 | { 68 | int id = omp_get_thread_num(); 69 | int numthreads = omp_get_num_threads(); 70 | double x; 71 | 72 | double partial_sum = 0; 73 | 74 | #pragma omp single 75 | printf(" num_threads = %d",numthreads); 76 | 77 | for (i=id;i< num_steps; i+=numthreads){ 78 | x = (i+0.5)*step; 79 | partial_sum += + 4.0/(1.0+x*x); 80 | } 81 | #pragma omp critical 82 | full_sum += partial_sum; 83 | } 84 | 85 | pi = step * full_sum; 86 | run_time = omp_get_wtime() - start_time; 87 | printf("\n pi is %f in %f seconds %d threds \n ",pi,run_time,j); 88 | } 89 | } 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /solutions/pi_spmd_simple.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | NAME: PI SPMD ... a simple version. 4 | 5 | This program will numerically compute the integral of 6 | 7 | 4/(1+x*x) 8 | 9 | from 0 to 1. The value of this integral is pi -- which 10 | is great since it gives us an easy way to check the answer. 11 | 12 | The program was parallelized using OpenMP and an SPMD 13 | algorithm. The following OpenMP specific lines were 14 | added: 15 | 16 | (1) A line to include omp.h -- the include file that 17 | contains OpenMP's function prototypes and constants. 18 | 19 | (2) A pragma that tells OpenMP to create a team of threads 20 | with an integer variable i being created for each thread. 21 | 22 | (3) two function calls: one to get the thread ID (ranging 23 | from 0 to one less than the number of threads), and the other 24 | returning the total number of threads. 25 | 26 | (4) A cyclic distribution of the loop by changing loop control 27 | expressions to run from the thread ID incremented by the number 28 | of threads. Local sums accumlated into sum[id]. 29 | 30 | Note that this program will show low performance due to 31 | false sharing. In particular, sum[id] is unique to each 32 | thread, but adfacent values of this array share a cache line 33 | causing cache thrashing as the program runs. 34 | 35 | History: Written by Tim Mattson, 11/99. 36 | 37 | */ 38 | 39 | #include 40 | #include 41 | 42 | #define MAX_THREADS 4 43 | 44 | static long num_steps = 100000000; 45 | double step; 46 | int main () 47 | { 48 | int i,j; 49 | double pi, full_sum = 0.0; 50 | double start_time, run_time; 51 | double sum[MAX_THREADS]; 52 | 53 | step = 1.0/(double) num_steps; 54 | 55 | 56 | for (j=1;j<=MAX_THREADS ;j++) { 57 | 58 | omp_set_num_threads(j); 59 | full_sum=0.0; 60 | start_time = omp_get_wtime(); 61 | 62 | #pragma omp parallel 63 | { 64 | int i; 65 | int id = omp_get_thread_num(); 66 | int numthreads = omp_get_num_threads(); 67 | double x; 68 | 69 | sum[id] = 0.0; 70 | 71 | if (id == 0) 72 | printf(" num_threads = %d",numthreads); 73 | 74 | for (i=id;i< num_steps; i+=numthreads){ 75 | x = (i+0.5)*step; 76 | sum[id] = sum[id] + 4.0/(1.0+x*x); 77 | } 78 | } 79 | 80 | for(full_sum = 0.0, i=0;i 12 | #endif 13 | #include 14 | #include 15 | 16 | #define N 10000 17 | #define Nthreads 2 18 | 19 | /* Some random number constants from numerical recipies */ 20 | #define SEED 2531 21 | #define RAND_MULT 1366 22 | #define RAND_ADD 150889 23 | #define RAND_MOD 714025 24 | int randy = SEED; 25 | 26 | /* function to fill an array with random numbers */ 27 | void fill_rand(int length, double *a) 28 | { 29 | int i; 30 | for (i=0;i 39 | 40 | static unsigned long long MULTIPLIER = 764261123; 41 | static unsigned long long PMOD = 2147483647; 42 | static unsigned long long mult_n; 43 | double random_low, random_hi; 44 | 45 | #define MAX_THREADS 128 46 | static unsigned long long pseed[MAX_THREADS][4]; //[4] to padd to cache line 47 | //size to avoid false sharing 48 | unsigned long long random_last = 0; 49 | #pragma omp threadprivate(random_last) 50 | 51 | 52 | double drandom() 53 | { 54 | unsigned long long random_next; 55 | double ret_val; 56 | 57 | // 58 | // compute an integer random number from zero to mod 59 | // 60 | random_next = (unsigned long long)((mult_n * random_last)% PMOD); 61 | random_last = random_next; 62 | 63 | // 64 | // shift into preset range 65 | // 66 | ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low; 67 | return ret_val; 68 | } 69 | 70 | // 71 | // set the seed, the multiplier and the range 72 | // 73 | void seed(double low_in, double hi_in) 74 | { 75 | int i, id, nthreads; 76 | unsigned long long iseed; 77 | id = omp_get_thread_num(); 78 | 79 | #pragma omp single 80 | { 81 | if(low_in < hi_in) 82 | { 83 | random_low = low_in; 84 | random_hi = hi_in; 85 | } 86 | else 87 | { 88 | random_low = hi_in; 89 | random_hi = low_in; 90 | } 91 | 92 | // 93 | // The Leapfrog method ... adjust the multiplier so you stride through 94 | // the sequence by increments of "nthreads" and adust seeds so each 95 | // thread starts with the right offset 96 | // 97 | 98 | nthreads = omp_get_num_threads(); 99 | iseed = PMOD/MULTIPLIER; // just pick a reasonable seed 100 | pseed[0][0] = iseed; 101 | mult_n = MULTIPLIER; 102 | for (i = 1; i < nthreads; ++i) 103 | { 104 | iseed = (unsigned long long)((MULTIPLIER * iseed) % PMOD); 105 | pseed[i][0] = iseed; 106 | mult_n = (mult_n * MULTIPLIER) % PMOD; 107 | } 108 | 109 | } 110 | random_last = (unsigned long long) pseed[id][0]; 111 | } 112 | 113 | -------------------------------------------------------------------------------- /solutions/random_seq_lcg.c: -------------------------------------------------------------------------------- 1 | 2 | //********************************************************** 3 | // Pseudo random number generator: 4 | // double drandom 5 | // void seed (lower_limit, higher_limit) 6 | //********************************************************** 7 | // 8 | // A simple linear congruential random number generator 9 | // (Numerical Recipies chapter 7, 1st ed.) with parameters 10 | // from the table on page 198j. 11 | // 12 | // Uses a linear congruential generator to return a value between 13 | // 0 and 1, then scales and shifts it to fill the desired range. This 14 | // range is set when the random number generator seed is called. 15 | // 16 | // USAGE: 17 | // 18 | // pseudo random sequence is seeded with a range 19 | // 20 | // void seed(lower_limit, higher_limit) 21 | // 22 | // and then subsequent calls to the random number generator generates values 23 | // in the sequence: 24 | // 25 | // double random() 26 | // 27 | // History: 28 | // Written by Tim Mattson, 9/2007. 29 | 30 | static long MULTIPLIER = 1366; 31 | static long ADDEND = 150889; 32 | static long PMOD = 714025; 33 | long random_last = 0.0; 34 | double random_low, random_hi; 35 | 36 | double drandom() 37 | { 38 | long random_next; 39 | double ret_val; 40 | 41 | // 42 | // compute an integer random number from zero to mod 43 | // 44 | random_next = (MULTIPLIER * random_last + ADDEND)% PMOD; 45 | random_last = random_next; 46 | 47 | // 48 | // shift into preset range 49 | // 50 | ret_val = ((double)random_next/(double)PMOD)*(random_hi-random_low)+random_low; 51 | return ret_val; 52 | } 53 | // 54 | // set the seed and the range 55 | // 56 | void seed(double low_in, double hi_in) 57 | { 58 | if(low_in < hi_in) 59 | { 60 | random_low = low_in; 61 | random_hi = hi_in; 62 | } 63 | else 64 | { 65 | random_low = hi_in; 66 | random_hi = low_in; 67 | } 68 | random_last = PMOD/ADDEND; // just pick something 69 | 70 | } 71 | //********************************************************** 72 | // end of pseudo random generator code. 73 | //********************************************************** 74 | 75 | -------------------------------------------------------------------------------- /win_intel.def: -------------------------------------------------------------------------------- 1 | # for Intel compiler on windows 2 | # copy to make.def 3 | CC = icl 4 | CLINKER = $(CC) 5 | OPTFLAGS = /Qopenmp 6 | LIBS = 7 | PRE = 8 | 9 | CFLAGS = $(OPTFLAGS) 10 | 11 | OBJ=obj 12 | EXE=.exe 13 | RM=del 14 | --------------------------------------------------------------------------------