├── 00-hello.cpp ├── 08-lastprivate.cpp ├── 03-parallel-cout.cpp ├── 01-header-and-env.cpp ├── 05-parallel-for.cpp ├── 07-firstprivate.cpp ├── 16-loop-dependencies.cpp ├── 02-parallel.cpp ├── 15-ordered.cpp ├── 11-barrier.cpp ├── 06-scoping.cpp ├── 10-mutex.cpp ├── 04-nested.cpp ├── 09-single-master-critical.cpp ├── README.md ├── Makefile ├── 12-atomic.cpp ├── 14-scheduling.cpp ├── LICENSE ├── 13-reduction.cpp └── 17-sections.cpp /00-hello.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int 4 | main() 5 | { 6 | #ifdef _OPENMP 7 | std::cout << "Hello, OpenMP!" << std::endl; 8 | #else 9 | std::cout << "OpenMP is not enabled." << std::endl; 10 | #endif 11 | return 0; 12 | } 13 | 14 | // XXX: try compiling with and without -openmp compiler flag 15 | -------------------------------------------------------------------------------- /08-lastprivate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int 4 | main() 5 | { 6 | const int size = 1000; 7 | int i = -1, a[size]; 8 | 9 | #pragma omp parallel for private(i) 10 | for (i = 0; i < size; i++) 11 | a[i] = i; 12 | 13 | std::cout << "i = " << i << std::endl; 14 | return 0; 15 | } 16 | 17 | // XXX: try changing private to lastprivate 18 | -------------------------------------------------------------------------------- /03-parallel-cout.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int 6 | main() 7 | { 8 | #pragma omp parallel 9 | { 10 | std::cout << "Hello from thread " << omp_get_thread_num() << " of " << 11 | omp_get_num_threads() << std::endl; 12 | } 13 | return 0; 14 | } 15 | 16 | // XXX: why is the output mangled? 17 | // XXX: use std::stringstream to fix it 18 | -------------------------------------------------------------------------------- /01-header-and-env.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // main OpenMP include header 4 | #include 5 | 6 | int 7 | main() 8 | { 9 | // omp_get_max_threads() and many other functions are declared in omp.h 10 | std::cout << "OpenMP will use " << omp_get_max_threads() << 11 | " threads maximum." << std::endl; 12 | return 0; 13 | } 14 | 15 | // XXX: execute with OMP_NUM_THREADS=2 and OMP_NUM_THREADS=3 16 | -------------------------------------------------------------------------------- /05-parallel-for.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int 4 | main() 5 | { 6 | const int size = 50; 7 | int a[size]; 8 | 9 | #pragma omp parallel for 10 | for (int i = 0; i < size; i++) 11 | a[i] = i; // loop iterations are executed in parallel 12 | 13 | for (int i = 0; i < size; i++) 14 | std::cout << a[i] << std::endl; 15 | 16 | return 0; 17 | } 18 | 19 | // XXX: create a separate "omp parallel" region with a for-loop in it 20 | -------------------------------------------------------------------------------- /07-firstprivate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | int i = 10; 8 | 9 | #pragma omp parallel private(i) 10 | { 11 | // 'i' is uninitialized here since it is thread-private 12 | printf("thread %d, i = %d\n", omp_get_thread_num(), i); 13 | i = 200 + omp_get_thread_num(); 14 | } 15 | printf("i = %d\n", i); 16 | return 0; 17 | } 18 | 19 | // XXX: change private to firstprivate to initialize 'i' 20 | -------------------------------------------------------------------------------- /16-loop-dependencies.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | const int n = 10000; 8 | long a[n]; 9 | 10 | for (int i = 0; i < n; i++) 11 | a[i] = i; 12 | 13 | for (int i = 1; i < n; i++) 14 | a[i] += a[i-1]; 15 | 16 | std::cout << a[n-1] << std::endl; 17 | return 0; 18 | } 19 | 20 | // XXX: Parallelize the second loop. 21 | // XXX: Why is the result of parallel program incorrect? How to fix it? 22 | -------------------------------------------------------------------------------- /02-parallel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | #pragma omp parallel // OpenMP parallel region 8 | { 9 | // this will be executed in parallel by N threads 10 | printf("Hello from thread %d of %d\n", omp_get_thread_num(), 11 | omp_get_num_threads()); 12 | } 13 | return 0; 14 | } 15 | 16 | // XXX: execute with OMP_NUM_THREADS=2 and OMP_NUM_THREADS=3 17 | // XXX: add num_threads(5) to pragma omp parallel 18 | -------------------------------------------------------------------------------- /15-ordered.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | const int niter = 10; 8 | 9 | #pragma omp parallel for ordered // loop must be marked as ordered 10 | for (int i = 0; i < niter; i++) { 11 | int thr = omp_get_thread_num(); 12 | printf("unordered iter %d of %d on thread %d\n", i, niter, thr); 13 | #pragma omp ordered 14 | printf("ordered iter %d of %d on thread %d\n", i, niter, thr); 15 | } 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /11-barrier.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | #pragma omp parallel 8 | { 9 | printf("Hello from thread %d of %d\n", omp_get_thread_num(), 10 | omp_get_num_threads()); 11 | #pragma omp barrier // all threads wait here 12 | printf("Thread %d of %d have passed the barrier\n", 13 | omp_get_thread_num(), omp_get_num_threads()); 14 | } 15 | return 0; 16 | } 17 | 18 | // XXX: execute with OMP_NUM_THREADS=10 19 | // XXX: try removing the barrier 20 | -------------------------------------------------------------------------------- /06-scoping.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | int a = -1; 8 | 9 | #pragma omp parallel 10 | { 11 | // "a" is shared by default. All threads have access to the same "a". 12 | 13 | int b; // not visible outside this scope; each thread has its own copy 14 | a = omp_get_thread_num() + 100; 15 | b = omp_get_thread_num() + 200; 16 | } 17 | std::cout << "a = " << a << std::endl; 18 | // b = 0; // error 19 | return 0; 20 | } 21 | 22 | // XXX: add default(none) 23 | // XXX: make "a" private 24 | -------------------------------------------------------------------------------- /10-mutex.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int 6 | main() 7 | { 8 | omp_lock_t lock; 9 | 10 | omp_init_lock(&lock); 11 | 12 | #pragma omp parallel num_threads(4) 13 | { 14 | omp_set_lock(&lock); // mutual exclusion (mutex) 15 | std::cout << "Thread " << omp_get_thread_num() << 16 | " has acquired the lock. Sleeping 2 seconds..." << std::endl; 17 | sleep(2); 18 | std::cout << "Thread " << omp_get_thread_num() << 19 | " is releasing the lock..." << std:: endl; 20 | omp_unset_lock(&lock); 21 | } 22 | omp_destroy_lock(&lock); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /04-nested.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int 5 | main() 6 | { 7 | omp_set_nested(1); // enable nested parallelism (also, OMP_NESTED) 8 | 9 | #pragma omp parallel num_threads(2) 10 | { 11 | printf("Level 1, thread %d of %d\n", omp_get_thread_num(), 12 | omp_get_num_threads()); 13 | #pragma omp parallel num_threads(2) 14 | { 15 | printf("Level 2, thread %d of %d\n", omp_get_thread_num(), 16 | omp_get_num_threads()); 17 | } 18 | } 19 | return 0; 20 | } 21 | 22 | // XXX: try disabling nested parallelism 23 | // XXX: be careful - don't create too many threads in your programs 24 | -------------------------------------------------------------------------------- /09-single-master-critical.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // omp critical -> execute by one thread at a time 5 | // omp single -> execute by any one thread 6 | // omp master -> execute by the master thread (id == 0) 7 | 8 | int 9 | main() 10 | { 11 | #pragma omp parallel num_threads(8) 12 | { 13 | #pragma omp critical 14 | std::cout << "Hello from thread " << omp_get_thread_num() << " of " << 15 | omp_get_num_threads() << std::endl; 16 | } 17 | return 0; 18 | } 19 | 20 | // XXX: what happens when we change "critical" to "single"? 21 | // XXX: what happens when we change "critical" to "master"? 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenMP tutorial for MolSSI Summer School 2 | 3 | ### Getting the files 4 | 5 | Clone the git repository: 6 | 7 | git clone https://github.com/ilyak/openmp-tutorial.git 8 | 9 | ### Compilation 10 | 11 | Adjust CXX (C++ compiler) and CXXFLAGS (compilation flags) in the Makefile if 12 | necessary. 13 | 14 | To compile individual examples: 15 | 16 | make 00-hello 17 | make 01-header-and-env 18 | ... 19 | 20 | To compile all programs: 21 | 22 | make 23 | 24 | ### Useful links 25 | 26 | https://3ions.com/ 27 | 28 | https://www.openmp.org/specifications/ 29 | 30 | https://computing.llnl.gov/tutorials/openMP/ 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CXX= g++ 2 | CXXFLAGS= -Wall -fopenmp 3 | 4 | PROGS= 00-hello \ 5 | 01-header-and-env \ 6 | 02-parallel \ 7 | 03-parallel-cout \ 8 | 04-nested \ 9 | 05-parallel-for \ 10 | 06-scoping \ 11 | 07-firstprivate \ 12 | 08-lastprivate \ 13 | 09-single-master-critical \ 14 | 10-mutex \ 15 | 11-barrier \ 16 | 12-atomic \ 17 | 13-reduction \ 18 | 14-scheduling \ 19 | 15-ordered \ 20 | 16-loop-dependencies \ 21 | 17-sections 22 | 23 | all: $(PROGS) 24 | 25 | clean: 26 | rm -f *.o $(PROGS) 27 | 28 | .PHONY: all clean 29 | -------------------------------------------------------------------------------- /12-atomic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | double 4 | two_body_energy(int i, int j) 5 | { 6 | return (2.0 * i + 3.0 * j) / 10.0; // some dummy return value 7 | } 8 | 9 | int 10 | main() 11 | { 12 | const int nbodies = 1000; 13 | double energy = 0.0; 14 | 15 | for (int i = 0; i < nbodies; i++) { 16 | for (int j = i+1; j < nbodies; j++) { 17 | double eij = two_body_energy(i, j); 18 | energy += eij; 19 | } 20 | } 21 | 22 | std::cout << "energy = " << energy << std::endl; 23 | return 0; 24 | } 25 | 26 | // XXX: parallelize the outer loop 27 | // XXX: why is the result incorrect? 28 | // XXX: use omp atomic to fix the bug 29 | -------------------------------------------------------------------------------- /14-scheduling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define CHUNK_SIZE 5 5 | 6 | // scheduling: 7 | // static -> statically preassign iterations to threads 8 | // dynamic -> each thread gets more work when its done at runtime 9 | // guided -> similar to dynamic with automatically adjusted chunk size 10 | // auto -> let the compiler decide! 11 | 12 | int 13 | main() 14 | { 15 | const int niter = 25; 16 | 17 | #pragma omp parallel for schedule(static, CHUNK_SIZE) 18 | for (int i = 0; i < niter; i++) { 19 | int thr = omp_get_thread_num(); 20 | printf("iter %d of %d on thread %d\n", i, niter, thr); 21 | } 22 | 23 | return 0; 24 | } 25 | 26 | // XXX: play with the chunk size 27 | // XXX: try dynamic scheduling, guided scheduling 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Ilya Kaliman 2 | 3 | Permission to use, copy, modify, and distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /13-reduction.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | double 4 | two_body_energy(int i, int j) 5 | { 6 | return (2.0 * i + 3.0 * j) / 10.0; // some dummy return value 7 | } 8 | 9 | int 10 | main() 11 | { 12 | const int nbodies = 1000; 13 | double energy = 0.0; 14 | 15 | #pragma omp parallel for reduction(+:energy) 16 | for (int i = 0; i < nbodies; i++) { 17 | for (int j = i+1; j < nbodies; j++) { 18 | double eij = two_body_energy(i, j); 19 | energy += eij; 20 | } 21 | } 22 | 23 | std::cout << "energy = " << energy << std::endl; 24 | return 0; 25 | } 26 | 27 | // XXX: there are other reduction operations (* - && || max min ...) 28 | // XXX: reduction takes care of the scope 29 | // XXX: what happens when we change the initial value of energy? 30 | // XXX: what happens when we try to make energy "shared"? 31 | -------------------------------------------------------------------------------- /17-sections.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int 6 | main() 7 | { 8 | #pragma omp parallel sections num_threads(4) 9 | { 10 | #pragma omp section // independent thread 11 | { 12 | int thr = omp_get_thread_num(); 13 | printf("section 1, thread %d - sleeping 1 second\n", thr); 14 | sleep(1); 15 | printf("section 1 done\n"); 16 | } 17 | #pragma omp section // independent thread 18 | { 19 | int thr = omp_get_thread_num(); 20 | printf("section 2, thread %d - sleeping 2 second\n", thr); 21 | sleep(2); 22 | printf("section 2 done\n"); 23 | } 24 | #pragma omp section // independent thread 25 | { 26 | int thr = omp_get_thread_num(); 27 | printf("section 3, thread %d - sleeping 3 second\n", thr); 28 | sleep(3); 29 | printf("section 3 done\n"); 30 | } 31 | // printf("not in omp section"); // error - code must be in section 32 | } 33 | return 0; 34 | } 35 | 36 | // XXX: what happens when we change num_threads to 2 37 | --------------------------------------------------------------------------------