├── .gitignore ├── LICENSE ├── Matrix ├── CMakeLists.txt └── main.cpp ├── README.md ├── als-movie-recommender ├── CMakeLists.txt ├── README.md ├── data.txt └── main.cpp ├── avrage-reduction ├── CMakeLists.txt └── main.cpp ├── data-sharing ├── CMakeLists.txt └── main.cpp ├── heap-vs-stack ├── CMakeLists.txt └── main.cpp ├── hello-mp ├── CMakeLists.txt └── main.cpp ├── integral ├── CMakeLists.txt └── main.cpp ├── linkedlist ├── CMakeLists.txt └── main.cpp ├── parallel-loop-1 ├── CMakeLists.txt └── main.cpp └── scheduling ├── CMakeLists.txt └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | CMakeCache.txt 2 | CMakeFiles 3 | CMakeScripts 4 | Makefile 5 | cmake_install.cmake 6 | install_manifest.txt 7 | CTestTestfile.cmake 8 | cmake-build-debug/ 9 | .idea/ 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Mustafa Atik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Matrix/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(Matrix) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(Matrix ${SOURCE_FILES}) 8 | 9 | # for openmp 10 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") -------------------------------------------------------------------------------- /Matrix/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | struct Matrix { 7 | double *data; 8 | int rows, cols; 9 | 10 | Matrix(int rows, int cols) { 11 | this->rows = rows; 12 | this->cols = cols; 13 | data = new double[rows*cols]; 14 | } 15 | 16 | double get(int row, int col) { 17 | return data[row * cols + col]; 18 | } 19 | 20 | double set(int row, int col, double val) { 21 | data[row * cols + col] = val; 22 | } 23 | 24 | Matrix& operator +(const Matrix& m2) 25 | { 26 | return sum(m2); 27 | } 28 | 29 | Matrix& sum(Matrix matrix) { 30 | Matrix *s = new Matrix(rows, cols); 31 | for (int i = 0; i < rows * cols; ++i) { 32 | s->data[i] = data[i] + matrix.data[i]; 33 | } 34 | return *s; 35 | } 36 | 37 | Matrix& substract(Matrix matrix) { 38 | Matrix *s = new Matrix(rows, cols); 39 | for (int i = 0; i < rows * cols; ++i) { 40 | s->data[i] = data[i] - matrix.data[i]; 41 | } 42 | return *s; 43 | } 44 | 45 | Matrix& multiply(Matrix matrix) { 46 | if (!(shape()[0] == matrix.shape()[1] && shape()[1] == matrix.shape()[0])) 47 | throw std::invalid_argument("shape does not fit each other"); 48 | 49 | Matrix *s = new Matrix(rows, cols); 50 | for (int i = 0; i < rows * cols; ++i) { 51 | s->data[i] = data[i] - matrix.data[i]; 52 | } 53 | return *s; 54 | } 55 | 56 | Matrix& T() { 57 | Matrix *s = new Matrix(cols, rows); 58 | for (int i = 0; i < rows; ++i) { 59 | for (int j = 0; j < cols; ++j) { 60 | s->set(j, i, get(i, j)); 61 | } 62 | } 63 | return * s; 64 | } 65 | 66 | int* shape() { 67 | int * shape = new int[2]; 68 | shape[0] = rows; 69 | shape[1] = cols; 70 | return shape; 71 | } 72 | 73 | std::string toString() { 74 | std::stringstream ss; 75 | ss << "[" << std::endl << " ["; 76 | for (int i = 0; i < rows * cols; ++i) { 77 | if (i != 0 && i % cols == 0) 78 | ss << "]" << std::endl << " ["; 79 | ss << " " << data[i] << " "; 80 | } 81 | ss << "]" << std::endl << "]"; 82 | return ss.str(); 83 | } 84 | }; 85 | 86 | 87 | int main() { 88 | Matrix m1 = Matrix(2, 3); 89 | Matrix m2 = Matrix(2, 3); 90 | for (int i = 0; i < 6; ++i) { 91 | m1.set((int)i / 3, i % 3, i); 92 | m2.set((int)i / 3, i % 3, i); 93 | } 94 | 95 | m1.set(0, 1, 6); 96 | std::cout << m1.toString(); 97 | std::cout << m2.toString(); 98 | std::cout << (m1 + m2).toString(); 99 | std::cout << m2.T().T().T().toString(); 100 | return 0; 101 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # openmp-examples 2 | This repository contains OpenMP-examples which I created while learning OpenMP. 3 | 4 | I follow Tim Mattson's [Introduction to OpenMP](https://www.youtube.com/watch?v=nE-xN4Bf8XI&list=PLLX-Q6B8xqZ8n8bwjGdzBJ25X2utwnoEG) video playlist on youtube. 5 | 6 | Other useful links are as follows: 7 | - http://bisqwit.iki.fi/story/howto/openmp/ 8 | - https://www.viva64.com/en/a/0054/ 9 | 10 | ### Details 11 | I am learning OpenMP because I have a master degree program assignment about running machine learning algorithms in parallel. 12 | 13 | In addition to OpenMP, I am going to do same things by using Apache Spark. In summary, this is quite a personal playground repository. 14 | It does not mean to cover all the details or become a comprehensible tutorial guide. 15 | 16 | ### Repositories 17 | | Name | Description | 18 | |------------------|-----------------------------------------------------------------| 19 | | hello-mp | Hello world in open-mp | 20 | | parallel-loop-1 | traversing a loop in parallel | 21 | | avrage-reduction | calculating avg by using `atomic` and `reduction` concepts | 22 | | heap-vs-stack | sharing(global) and not sharing data(local) between threads. | 23 | | data-sharing | private, first-private, initialization concepts | 24 | | integral | calculating integral in various ways to demonstrate approaches | 25 | | scheduling | thread scheduling | 26 | | linkedlist | consuming linkedlist in multiple threads in a thread-safety way | 27 | | Als | movie recommender system using alternating least squares method | 28 | -------------------------------------------------------------------------------- /als-movie-recommender/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(als_list) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(als_list ${SOURCE_FILES}) 8 | 9 | 10 | 11 | # for openmp 12 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 13 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") 14 | include_directories("/Users/muatik/Downloads/eigen") -------------------------------------------------------------------------------- /als-movie-recommender/README.md: -------------------------------------------------------------------------------- 1 | # Movie recommender system using alternating least squares method 2 | 3 | For more information: 4 | https://goo.gl/sMihJu 5 | -------------------------------------------------------------------------------- /als-movie-recommender/data.txt: -------------------------------------------------------------------------------- 1 | 1,1,5 2 | 1,2,4 3 | 1,3,5 4 | 1,8,1 5 | 1,9,1 6 | 1,4,4 7 | 2,1,5 8 | 2,4,5 9 | 2,7,1 10 | 2,9,1 11 | 3,1,5 12 | 3,4,5 13 | 3,2,4 14 | 3,3,5 15 | 3,7,1 16 | 3,8,1 17 | 3,9,1 18 | 4,2,5 19 | 4,3,5 20 | 4,8,1 21 | 5,1,1 22 | 5,2,1 23 | 5,8,5 24 | 5,7,4 25 | 5,9,5 26 | 6,7,5 27 | 6,1,1 28 | 6,2,1 29 | 6,9,5 -------------------------------------------------------------------------------- /als-movie-recommender/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //#include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | using std::cin; 11 | using std::cout; 12 | using std::endl; 13 | using namespace Eigen; 14 | using namespace std; 15 | 16 | // install eigen 17 | // mpic++ -I /Users/mustafaatik/Downloads/eigen/ main.cpp -o main && mpirun --allow-run-as-root -np 2 ./main 18 | // http://stackoverflow.com/questions/15939757/is-it-possible-to-run-openmpi-on-a-local-computer-and-a-remote-cluster 19 | // http://mpitutorial.com/tutorials/running-an-mpi-cluster-within-a-lan/ 20 | // https://www.open-mpi.org/papers/sc-2009/jjhursey-iu-booth.pdf 21 | // http://stackoverflow.com/questions/2546298/vector-usage-in-mpic 22 | // http://stackoverflow.com/questions/38592854/how-to-send-the-data-in-an-eigenmatrixxd-with-mpi 23 | 24 | void mat2csv(MatrixXd mat); 25 | void factorize(); 26 | void als(std::vector> ratings, int maxUserId, int maxMovieId, int rank, int threadNumber); 27 | 28 | std::vector< std::vector > load_csv (const std::string & path) { 29 | std::ifstream indata; 30 | indata.open(path); 31 | std::string line; 32 | std::vector values; 33 | uint rows = 0; 34 | std::vector< std::vector > ratings; 35 | while (std::getline(indata, line)) { 36 | std::stringstream lineStream(line); 37 | std::string cell; 38 | std::vector data; 39 | while (std::getline(lineStream, cell, ',')) { 40 | data.push_back(std::stod(cell)); 41 | } 42 | // user - movie -rating 43 | ratings.push_back(data); 44 | } 45 | return ratings; 46 | // return Map>(values.data(), rows, values.size()/rows); 47 | } 48 | 49 | 50 | int main(int argc, char ** argv) 51 | { 52 | factorize(); 53 | return 0; 54 | } 55 | 56 | 57 | 58 | void factorize() { 59 | std::vector< std::vector > ratings = load_csv("../data.txt"); 60 | 61 | int maxUserId = 7; 62 | int maxUMovieId= 9; 63 | int cols = maxUMovieId, rows = maxUserId; 64 | 65 | for (int i = 4; i < 5; ++i) { 66 | cout << "Thread number: " << i << endl; 67 | als(ratings, maxUserId, maxUMovieId, 4, 1); 68 | } 69 | } 70 | 71 | 72 | 73 | 74 | void als(std::vector> ratings, int maxUserId, int maxMovieId, int rank, int threadNumber) { 75 | 76 | MatrixXd U = MatrixXd::Random(maxUserId, rank); 77 | MatrixXd M = MatrixXd::Random(rank, maxMovieId); 78 | 79 | MatrixXd A = MatrixXd::Zero(maxUserId, maxMovieId); 80 | MatrixXd W = MatrixXd::Zero(maxUserId, maxMovieId); 81 | for (int i = 0; i < ratings.size(); ++i) { 82 | int ui = ratings.at(i).at(0) - 1; // userId index 83 | int mi = ratings.at(i).at(1) - 1; // movieId index 84 | int ri = ratings.at(i).at(2); // rating 85 | A(ui, mi) = ri; 86 | W(ui, mi) = 1; 87 | } 88 | 89 | cout << A << endl << endl; 90 | double Eta = 0.03; 91 | double eta = Eta; 92 | int maxIteration = 2000; 93 | cout << U * M << endl << endl; 94 | 95 | int ui, mi, ri; 96 | double error; 97 | Eigen::VectorXd tempU; 98 | std::vector errorHistory; 99 | 100 | double start = omp_get_wtime(); 101 | for (int iteration = 0; iteration < maxIteration; ++iteration) { 102 | 103 | #pragma omp parallel for num_threads(threadNumber) schedule(static) private(ui, mi, ri, error, tempU) 104 | for (int i = 0; i < ratings.size(); ++i) { 105 | ui = ratings.at(i).at(0) - 1; // userId index 106 | mi = ratings.at(i).at(1) - 1; // movieId index 107 | ri = ratings.at(i).at(2); // rating 108 | error = ri - U.row(ui) * M.col(mi); 109 | 110 | tempU = U.row(ui) + eta * error * M.col(mi).transpose(); 111 | M.col(mi) = M.col(mi) + eta * error * U.row(ui).transpose(); 112 | U.row(ui) = tempU; 113 | } 114 | 115 | if (iteration % ((int) maxIteration / 15) == 0) { 116 | eta *= 0.8; 117 | errorHistory.push_back(W.cwiseProduct(A - U * M).array().square().sum()); 118 | } 119 | } 120 | double elapsed = omp_get_wtime() - start; 121 | cout << "elapsed: " << elapsed << endl; 122 | cout << "error:" << endl; 123 | for (int j = 0; j < errorHistory.size(); ++j) { 124 | cout << "error: " << errorHistory.at(j) << endl; 125 | } 126 | cout << U * M << endl << endl; 127 | // mat2csv(A); 128 | mat2csv(U*M); 129 | } 130 | 131 | 132 | void mat2csv(MatrixXd mat) { 133 | std::stringstream buf; 134 | for (int i = 0; i < mat.rows(); ++i) { 135 | 136 | for (int j = 0; j < mat.cols(); ++j) { 137 | buf << mat(i, j) << ", "; 138 | } 139 | buf << "\n"; 140 | } 141 | cout << endl << buf.str() << endl; 142 | } -------------------------------------------------------------------------------- /avrage-reduction/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(avrage_reduction) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(avrage_reduction ${SOURCE_FILES}) 8 | 9 | 10 | # for openmp 11 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 12 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") -------------------------------------------------------------------------------- /avrage-reduction/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void avg(); 5 | void avg_reduction(); 6 | 7 | int main() { 8 | // avg(); 9 | avg_reduction(); 10 | return 0; 11 | } 12 | 13 | 14 | void avg_round_robin() { 15 | int N = 600000000; 16 | double tavg = 0; 17 | 18 | double timer_start = omp_get_wtime(); 19 | omp_set_num_threads(16); 20 | #pragma omp parallel 21 | { 22 | double avg; 23 | int id = omp_get_thread_num(); 24 | int nthreads = omp_get_num_threads(); 25 | 26 | for (int i = id; i < N; i+=nthreads) { 27 | avg += i; 28 | } 29 | #pragma omp atomic 30 | tavg += avg; 31 | } 32 | double timer_elapsed = omp_get_wtime() - timer_start; 33 | tavg = tavg / N; 34 | 35 | std::cout << tavg << " took " << timer_elapsed << std::endl; 36 | // 1 threads took 2.1 37 | // 4 threads took 0.7 38 | // 48 threads took 0.65 39 | } 40 | 41 | void avg_reduction() { 42 | int N = 600000000; 43 | int j = 0; 44 | double tavg = 0; 45 | 46 | double timer_start = omp_get_wtime(); 47 | omp_set_num_threads(48); 48 | 49 | #pragma omp parallel for reduction(+:tavg) 50 | for (j = 0; j < N; ++j) { 51 | tavg += j; 52 | } 53 | 54 | double timer_elapsed = omp_get_wtime() - timer_start; 55 | tavg = tavg / N; 56 | 57 | std::cout << tavg << " took " << timer_elapsed << std::endl; 58 | // 1 threads took 2.1 59 | // 4 threads took 0.69 60 | // 48 threads took 0.65 61 | } -------------------------------------------------------------------------------- /data-sharing/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(data_sharing) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(data_sharing ${SOURCE_FILES}) 8 | 9 | 10 | # for openmp 11 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 12 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") -------------------------------------------------------------------------------- /data-sharing/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void compare_cases(); 5 | using namespace std; 6 | 7 | double G = 2.1; 8 | 9 | int main() { 10 | compare_cases(); 11 | return 0; 12 | } 13 | 14 | void compare_cases() { 15 | int a=1, b=2, c=3, t=4; 16 | omp_set_num_threads(3); 17 | #pragma omp parallel private(a), firstprivate(b) 18 | { 19 | // a will be private and, but not be initialized 20 | // b will be private and initialized 21 | // c will be shared; 22 | // t will be local; that is it is private. 23 | int t = 5; 24 | 25 | static int s = 8; // will be shared. 26 | if (omp_get_thread_num() == 0) 27 | s = 2; 28 | 29 | printf("thread id: %d, a: %d, b: %d, c: %d, t: %d, s: %d, G: %f, \n", omp_get_thread_num(), a, b, c, t, s, G); 30 | a = 21; 31 | b = 22; 32 | c = 23; 33 | t = 24; 34 | } 35 | 36 | printf("\nout of the parallel region\n"); 37 | printf("thread id: %d, a: %d, b: %d, c: %d, t: %d, G: %f, \n", omp_get_thread_num(), a, b, c, t, G); 38 | 39 | // OUTPUT: 40 | // thread id: 0, a: 0, b: 2, c: 3, t: 5, s: 2, G: 2.100000, 41 | // thread id: 2, a: 32752, b: 2, c: 3, t: 5, s: 2, G: 2.100000, 42 | // thread id: 1, a: 32767, b: 2, c: 3, t: 5, s: 2, G: 2.100000, 43 | // 44 | // out of the parallel region 45 | // thread id: 0, a: 1, b: 2, c: 23, t: 4, G: 2.100000, 46 | } 47 | -------------------------------------------------------------------------------- /heap-vs-stack/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(heap_vs_stack) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(heap_vs_stack ${SOURCE_FILES}) 8 | 9 | # for openmp 10 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") -------------------------------------------------------------------------------- /heap-vs-stack/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | 6 | int heap_sum = 0; 7 | omp_set_num_threads(3); 8 | #pragma omp parallel 9 | { 10 | int stack_sum=0; 11 | stack_sum++; 12 | heap_sum++; 13 | printf("stack sum is %d\n", stack_sum); 14 | printf("heap sum is %d\n", heap_sum); 15 | } 16 | std::cout << "final heap sum is " << heap_sum << std::endl; 17 | 18 | // output: 19 | // stack sum is 1 20 | // stack sum is 1 21 | // stack sum is 1 22 | // heap sum is 3 23 | // heap sum is 3 24 | // heap sum is 3 25 | // final heap sum is 3 26 | return 0; 27 | } -------------------------------------------------------------------------------- /hello-mp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(hello_mp) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(hello_mp ${SOURCE_FILES}) 8 | 9 | # for openmp 10 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") -------------------------------------------------------------------------------- /hello-mp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | 6 | int main() { 7 | #pragma omp parallel 8 | { 9 | int ID = omp_get_thread_num(); 10 | printf("hello %d\n", ID); 11 | printf("hello again %d\n", ID); 12 | } 13 | // output: 14 | // hello 1 15 | // hello 0 16 | // hello 3 17 | // hello 2 18 | // hello again 1 19 | // hello again 0 20 | // hello again 3 21 | // hello again 2 22 | return 0; 23 | } -------------------------------------------------------------------------------- /integral/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(integral) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(integral ${SOURCE_FILES}) 8 | 9 | # for openmp 10 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") -------------------------------------------------------------------------------- /integral/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void integral_roundrobin(); 5 | 6 | void integral_atomic(); 7 | 8 | void integral_reduction(); 9 | void integral_better_reduction(); 10 | 11 | int main() { 12 | // integral_roundrobin(); 13 | integral_reduction(); 14 | } 15 | 16 | 17 | void integral_roundrobin() { 18 | int NTHREADS = 48, nthreads; 19 | long num_steps = 100000000; 20 | double step = 0; 21 | double pi = 0.0; 22 | double sum[NTHREADS]; 23 | 24 | step = 1.0 / (double) num_steps; 25 | double timer_start = omp_get_wtime(); 26 | omp_set_num_threads(NTHREADS); 27 | 28 | #pragma omp parallel 29 | { 30 | int i, id, lnthreads; 31 | double x; 32 | 33 | lnthreads = omp_get_num_threads(); 34 | id = omp_get_thread_num(); 35 | if (id == 0) 36 | nthreads = lnthreads; 37 | 38 | for (i = id, sum[id]=0; i < num_steps; i+=lnthreads) { 39 | x = (i+0.5) * step; 40 | sum[id] += 4.0 / (1.0 + x*x); 41 | } 42 | 43 | } 44 | for (int i = 0; i < nthreads; ++i) { 45 | pi += sum[i] * step; 46 | } 47 | 48 | double timer_took = omp_get_wtime() - timer_start; 49 | std::cout << pi << " took " << timer_took; 50 | 51 | // 1 threads --> 0.57 seconds. 52 | // 4 threads --> 1.34 seconds. 53 | // 24 threads --> 0.59 seconds. 54 | // 48 threads --> 0.46 seconds. 55 | } 56 | 57 | 58 | 59 | void integral_atomic() { 60 | int NTHREADS = 4; 61 | long num_steps = 100000000; 62 | double step = 0; 63 | double pi = 0.0; 64 | 65 | step = 1.0 / (double) num_steps; 66 | double timer_start = omp_get_wtime(); 67 | omp_set_num_threads(NTHREADS); 68 | 69 | #pragma omp parallel 70 | { 71 | int i, id, lnthreads; 72 | double x, sum = 0; 73 | 74 | lnthreads = omp_get_num_threads(); 75 | id = omp_get_thread_num(); 76 | 77 | for (i = id; i < num_steps; i+=lnthreads) { 78 | x = (i+0.5) * step; 79 | sum += 4.0 / (1.0 + x*x); 80 | } 81 | 82 | #pragma omp atomic 83 | pi += sum * step; 84 | 85 | } 86 | 87 | double timer_took = omp_get_wtime() - timer_start; 88 | std::cout << pi << " took " << timer_took; 89 | // 1 threads --> 0.53 seconds. 90 | // 4 threads --> 0.25 seconds. 91 | // 24 threads --> 0.24 seconds. 92 | // 48 threads --> 0.21 seconds. 93 | } 94 | 95 | 96 | 97 | void integral_reduction() { 98 | int NTHREADS = 48; 99 | long num_steps = 100000000; 100 | double step = 0; 101 | double pi = 0.0; 102 | double sum = 0; 103 | int i = 0; 104 | 105 | step = 1.0 / (double) num_steps; 106 | 107 | omp_set_num_threads(NTHREADS); 108 | double timer_start = omp_get_wtime(); 109 | 110 | #pragma omp parallel for reduction(+:sum) 111 | for (i = 0; i < num_steps; ++i) { 112 | int x = (i+0.5) * step; 113 | sum += 4.0 / (1.0 + x*x); 114 | } 115 | 116 | pi = sum * step; 117 | 118 | double timer_took = omp_get_wtime() - timer_start; 119 | std::cout << pi << " took " << timer_took; 120 | // 1 threads --> 0.55 seconds. 121 | // 4 threads --> 0.24 seconds. 122 | // 24 threads --> 0.24 seconds. 123 | // 48 threads --> 0.23 seconds. 124 | } 125 | 126 | void integral_better_reduction() { 127 | // this version is better because it can work in the case of non-threaded environments. 128 | int NTHREADS = 48; 129 | long num_steps = 100000000; 130 | double step = 0; 131 | double pi = 0.0; 132 | double sum = 0; 133 | int i = 0, x; 134 | 135 | step = 1.0 / (double) num_steps; 136 | 137 | omp_set_num_threads(NTHREADS); 138 | double timer_start = omp_get_wtime(); 139 | 140 | #pragma omp parallel for private(x) reduction(+:sum) 141 | for (i = 0; i < num_steps; ++i) { 142 | x = (i+0.5) * step; 143 | sum += 4.0 / (1.0 + x*x); 144 | } 145 | 146 | pi = sum * step; 147 | 148 | double timer_took = omp_get_wtime() - timer_start; 149 | std::cout << pi << " took " << timer_took; 150 | // 1 threads --> 0.55 seconds. 151 | // 4 threads --> 0.24 seconds. 152 | // 24 threads --> 0.24 seconds. 153 | // 48 threads --> 0.23 seconds. 154 | } -------------------------------------------------------------------------------- /linkedlist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(linkedlist) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(linkedlist ${SOURCE_FILES}) 8 | 9 | 10 | 11 | # for openmp 12 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 13 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") 14 | -------------------------------------------------------------------------------- /linkedlist/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | struct Node { 7 | int data; 8 | struct Node* next = NULL; 9 | Node() {} 10 | Node(int data) { 11 | this->data = data; 12 | } 13 | Node(int data, Node* node) { 14 | this->data = data; 15 | this->next = node; 16 | } 17 | }; 18 | 19 | void processNode(Node *pNode); 20 | 21 | struct Queue { 22 | Node *head = NULL, *tail = NULL; 23 | 24 | Queue& add(int data) { 25 | add(new Node(data)); 26 | return *this; 27 | } 28 | 29 | void add(Node *node) { 30 | #pragma omp critical 31 | { 32 | if (head == NULL) { 33 | head = node; 34 | tail = node; 35 | } else { 36 | tail->next = node; 37 | tail = node; 38 | } 39 | } 40 | } 41 | 42 | Node* remove() { 43 | Node *node; 44 | #pragma omp critical 45 | { 46 | node = head; 47 | if (head != NULL) 48 | head = head->next; 49 | } 50 | return node; 51 | } 52 | 53 | }; 54 | 55 | int main() { 56 | srand(12); 57 | Queue queue; 58 | 59 | #pragma omp parallel for 60 | for (int i = 0; i < 500; ++i) { 61 | queue.add(i); 62 | } 63 | 64 | size_t totalProcessedCounter = 0; 65 | double timer_started = omp_get_wtime(); 66 | omp_set_num_threads(5); 67 | #pragma omp parallel 68 | { 69 | Node *n; 70 | size_t processedCounter = 0; 71 | double started = omp_get_wtime(); 72 | 73 | while ((n = queue.remove()) != NULL) { 74 | processNode(n); 75 | processedCounter++; 76 | } 77 | 78 | double elapsed = omp_get_wtime() - started; 79 | printf("Thread id: %d, processed: %d nodes, took: %f seconds \n", 80 | omp_get_thread_num(), processedCounter, elapsed); 81 | #pragma omp atomic 82 | totalProcessedCounter += processedCounter; 83 | } 84 | 85 | double elapsed = omp_get_wtime() - timer_started; 86 | std::cout << "end. processed " << totalProcessedCounter <<" nodes. took " << elapsed << " in total " << std::endl; 87 | // Thread id: 4, processed: 100 nodes, took: 5.114557 seconds 88 | // Thread id: 0, processed: 111 nodes, took: 5.118820 seconds 89 | // Thread id: 2, processed: 102 nodes, took: 5.125992 seconds 90 | // Thread id: 3, processed: 95 nodes, took: 5.158186 seconds 91 | // Thread id: 1, processed: 92 nodes, took: 5.173112 seconds 92 | // end. processed 500 nodes. took 5.17337 in total 93 | 94 | return 0; 95 | } 96 | 97 | void processNode(Node *node) { 98 | int r = rand() % 9 + 1; // between 1 and 9 99 | usleep(r * 10000); // sleeps 0.001 or 0.090 second 100 | } -------------------------------------------------------------------------------- /parallel-loop-1/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(openmp_example_1) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(openmp_example_1 ${SOURCE_FILES}) 8 | 9 | # for openmp 10 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") 12 | -------------------------------------------------------------------------------- /parallel-loop-1/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace std; 4 | 5 | int main() { 6 | cout << "the begin of loop" << endl; 7 | 8 | #pragma omp parallel for 9 | for (int i = 0; i < 10; ++i) { 10 | cout << i; 11 | } 12 | cout << endl << "the end of loop" << endl; 13 | 14 | // Expected output will be similar to this: 15 | // the begin of loop 16 | // 6378049152 17 | // the end of loop 18 | return 0; 19 | } -------------------------------------------------------------------------------- /scheduling/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(scheduling) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | set(SOURCE_FILES main.cpp) 7 | add_executable(scheduling ${SOURCE_FILES}) 8 | 9 | # for openmp 10 | set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-4.9") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fopenmp") 12 | -------------------------------------------------------------------------------- /scheduling/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void schedule_static(); 5 | void schedule_dynamic(); 6 | void schedule_auto(); 7 | 8 | int main() { 9 | // schedule_static(); 10 | // schedule_dynamic(); 11 | // schedule_auto(); 12 | 13 | int i, N = 10; 14 | 15 | #pragma omp parallel for num_threads(2) schedule(static) 16 | for (i = 0; i < N; i++) { 17 | if (omp_get_thread_num() == 0) 18 | printf("Thread %d is doing iteration %d.\n", omp_get_thread_num( ), i); 19 | } 20 | // Thread 0 is doing iteration 0. 21 | // Thread 0 is doing iteration 1. 22 | // Thread 0 is doing iteration 2. 23 | // Thread 0 is doing iteration 3. 24 | // Thread 0 is doing iteration 4. 25 | 26 | #pragma omp parallel for num_threads(2) schedule(static) 27 | for (i = 0; i < N; i++) { 28 | if (omp_get_thread_num() == 1) 29 | printf("Thread %d is doing iteration %d.\n", omp_get_thread_num( ), i); 30 | } 31 | // Thread 1 is doing iteration 5. 32 | // Thread 1 is doing iteration 6. 33 | // Thread 1 is doing iteration 7. 34 | // Thread 1 is doing iteration 8. 35 | // Thread 1 is doing iteration 9. 36 | 37 | #pragma omp parallel for num_threads(2) schedule(static, 4) 38 | for (i = 0; i < N; i++) { 39 | if (omp_get_thread_num() == 0) 40 | printf("Thread %d is doing iteration %d.\n", omp_get_thread_num( ), i); 41 | } 42 | // Thread 0 is doing iteration 0. 43 | // Thread 0 is doing iteration 1. 44 | // Thread 0 is doing iteration 2. 45 | // Thread 0 is doing iteration 3. 46 | // Thread 0 is doing iteration 8. 47 | // Thread 0 is doing iteration 9. 48 | 49 | #pragma omp parallel for num_threads(2) schedule(dynamic, 3) 50 | for (i = 0; i < N; i++) { 51 | if (omp_get_thread_num() == 1) 52 | printf("Thread %d is doing iteration %d.\n", omp_get_thread_num( ), i); 53 | } 54 | // Thread 1 is doing iteration 3. 55 | // Thread 1 is doing iteration 4. 56 | // Thread 1 is doing iteration 5. 57 | // or 58 | // Thread 1 is doing iteration 0. 59 | // Thread 1 is doing iteration 1. 60 | // Thread 1 is doing iteration 2. 61 | 62 | return 0; 63 | } 64 | 65 | void schedule_static() { 66 | int N = 1000000000; 67 | int i; 68 | double avg = 0; 69 | 70 | omp_set_num_threads(16); 71 | 72 | double timer_started = omp_get_wtime(); 73 | 74 | #pragma omp parallel for reduction(+:avg) schedule(static) 75 | for (i = 0; i < N; ++i) { 76 | avg += i; 77 | } 78 | 79 | avg /= N; 80 | 81 | double elapsed = omp_get_wtime() - timer_started; 82 | 83 | std::cout << avg << " took " << elapsed << std::endl; 84 | // 5e+08 took 1.41788 85 | } 86 | 87 | void schedule_dynamic() { 88 | int N = 1000000000; 89 | int i; 90 | double avg = 0; 91 | 92 | omp_set_num_threads(16); 93 | 94 | double timer_started = omp_get_wtime(); 95 | #pragma omp parallel for reduction(+:avg) schedule(dynamic, 1000) 96 | for (i = 0; i < N; ++i) { 97 | avg += i; 98 | } 99 | 100 | avg /= N; 101 | 102 | double elapsed = omp_get_wtime() - timer_started; 103 | 104 | std::cout << avg << " took " << elapsed << std::endl; 105 | // 5e+08 took 1.34788 106 | } 107 | 108 | void schedule_auto() { 109 | int N = 1000000000; 110 | int i; 111 | double avg = 0; 112 | 113 | omp_set_num_threads(16); 114 | 115 | double timer_started = omp_get_wtime(); 116 | 117 | #pragma omp parallel for reduction(+:avg) schedule(auto) 118 | for (i = 0; i < N; ++i) { 119 | avg += i; 120 | } 121 | 122 | avg /= N; 123 | 124 | double elapsed = omp_get_wtime() - timer_started; 125 | 126 | std::cout << avg << " took " << elapsed << std::endl; 127 | // 5e+08 took 1.39788 128 | } 129 | --------------------------------------------------------------------------------