├── Concurrency ├── Concurrency.pdf ├── code │ ├── 1-Processes-and-Threads │ │ ├── 1-Single-Process.c │ │ ├── 2-Fork.c │ │ ├── 3-Multiple-Threads.c │ │ └── Makefile │ ├── 2-Create-and-Join │ │ ├── 1-no-Join.c │ │ ├── 2-Master-Join.c │ │ ├── 3-Multiple-Threads.c │ │ ├── 4-Detach.c │ │ ├── 5-Input-Output.c │ │ ├── 6-Input-Output-better.c │ │ └── Makefile │ ├── 3-Mutex │ │ ├── 1-No-Sync.c │ │ ├── 2-Atomic.c │ │ ├── 3-Mutex.c │ │ ├── 4-Timed-Lock.c │ │ ├── 5-Try-Lock.c │ │ ├── 6-Busy.c │ │ ├── 7-not-Busy.c │ │ ├── 8-Deadlock.c │ │ ├── 9-no-Deadlock-Hierarchy.c │ │ ├── A-no-Deadlock-Try-Backoff.c │ │ ├── Makefile │ │ ├── repeat.sh │ │ └── timing.h │ ├── 4-Condition-Variables │ │ ├── 1-no-Condition-Variable.c │ │ ├── 2-Minimal.c │ │ ├── 3-Better.c │ │ ├── 4-Condition-Variable.c │ │ ├── 5-Bad-Signal.c │ │ ├── 6-Broadcast.c │ │ └── Makefile │ ├── X-Circular-Buffer │ │ ├── Makefile │ │ ├── circbuf.c │ │ ├── circbuf.h │ │ └── test.c │ └── Y-Thread-Safe-CB │ │ ├── Makefile │ │ ├── circbuf.c │ │ ├── circbuf.h │ │ ├── test.c │ │ ├── tscircbuf.c │ │ └── tscircbuf.h └── readme.md ├── GPU ├── DSSC-EXAME-README.pdf ├── Jacobi-project │ ├── .DS_Store │ ├── aux │ │ ├── background.html │ │ ├── background.md │ │ ├── eqn.PNG │ │ ├── hints.html │ │ ├── hints.md │ │ ├── jacobiEq1.jpg │ │ ├── jacobiFigure1.jpg │ │ ├── jacobiFigure2.jpg │ │ ├── ref2.png │ │ └── ref_Init.png │ ├── code │ │ ├── Makefile │ │ ├── jacobi.c │ │ └── plot.plt │ └── readme.md ├── par_transp │ └── main.c └── readme.md ├── HPC ├── codes │ ├── 00_simple.c │ ├── 00_simple_nowait.c │ ├── 00_simple_taskwait.c │ ├── 02_tasks.c │ ├── 02_tasks_wrong.c │ ├── 03_variable_workload.c │ ├── 03_variable_workload.v2.c │ ├── 04_tasks_reduction.c │ ├── 04_unpredictable_pattern.c │ ├── 05_taskgroup_reduction.c │ ├── dag.c │ ├── linked_list.c │ ├── linked_list.deadlock.c │ ├── quicksort.v0.c │ ├── quicksort.v1.c │ ├── quicksort.v2.c │ └── readme.md ├── mpi.pdf ├── openmp_outline.pdf ├── readme.md ├── tasks.new.pdf └── tasks.pdf ├── README.md ├── access_Leonardo.pdf └── intro_to_course.pdf /Concurrency/Concurrency.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/Concurrency/Concurrency.pdf -------------------------------------------------------------------------------- /Concurrency/code/1-Processes-and-Threads/1-Single-Process.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | void huge_function() 8 | { 9 | sleep(600); 10 | } 11 | 12 | int main() 13 | { 14 | printf("PID: %ld, PPID: %ld\n", (long)getpid(), (long)getppid()); 15 | huge_function(); 16 | 17 | return EXIT_SUCCESS; 18 | } 19 | -------------------------------------------------------------------------------- /Concurrency/code/1-Processes-and-Threads/2-Fork.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | void huge_function() 8 | { 9 | sleep(600); 10 | } 11 | 12 | int main() 13 | { 14 | pid_t pid = fork(); 15 | switch (pid) { 16 | case -1: 17 | fprintf(stderr, "fork failure.\n"); 18 | return EXIT_FAILURE; 19 | break; 20 | 21 | case 0: 22 | printf("CHILD\n\tPID: %ld, PPID: %ld\n", (long)getpid(), (long)getppid()); 23 | huge_function(); 24 | break; 25 | 26 | default: 27 | printf("PARENT\n\tPID: %ld, PPID: %ld\n", (long)getpid(), (long)getppid()); 28 | huge_function(); 29 | break; 30 | } 31 | 32 | return EXIT_SUCCESS; 33 | } -------------------------------------------------------------------------------- /Concurrency/code/1-Processes-and-Threads/3-Multiple-Threads.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | void* thread_function(void* unused) 10 | { 11 | (void)unused; 12 | 13 | printf("PID: %ld, thread_id: %d\n", (long)getpid(), gettid()); 14 | sleep(1200); 15 | 16 | return NULL; 17 | } 18 | 19 | 20 | 21 | int main() 22 | { 23 | pthread_t thread; 24 | 25 | if (pthread_create(&thread, NULL, thread_function, NULL)) { 26 | fprintf(stderr, "pthread_create failure.\n"); 27 | return EXIT_FAILURE; 28 | } 29 | 30 | thread_function(NULL); 31 | 32 | if (pthread_join(thread, NULL)) { 33 | fprintf(stderr, "pthread_joint failure.\n"); 34 | return EXIT_FAILURE; 35 | } 36 | 37 | return EXIT_SUCCESS; 38 | } 39 | -------------------------------------------------------------------------------- /Concurrency/code/1-Processes-and-Threads/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Wextra -pedantic -g 3 | 4 | .SUFFIXES : 5 | 6 | .PHONY : all 7 | all: 1-Single-Process.x 2-Fork.x 3-Multiple-Threads.x 8 | 9 | .PHONY : clean 10 | clean : 11 | rm -f *.x 12 | 13 | 14 | 1-Single-Process.x : 1-Single-Process.c 15 | $(CC) $(CFLAGS) -o $@ $< 16 | 17 | 2-Fork.x : 2-Fork.c 18 | $(CC) $(CFLAGS) -o $@ $< 19 | 20 | 3-Multiple-Threads.x : 3-Multiple-Threads.c 21 | $(CC) $(CFLAFS) -pthread -o $@ $< 22 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/1-no-Join.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | void* thread_function(void* unused) 8 | { 9 | (void)unused; 10 | 11 | printf("Child thread\n"); 12 | 13 | return NULL; 14 | } 15 | 16 | 17 | int main() 18 | { 19 | printf("Main thread\n"); 20 | 21 | pthread_t thread; 22 | 23 | if (pthread_create(&thread, NULL, thread_function, NULL)) { 24 | fprintf(stderr, "pthread_create failure.\n"); 25 | return EXIT_FAILURE; 26 | } 27 | 28 | sleep(1); 29 | 30 | return EXIT_SUCCESS; 31 | } 32 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/2-Master-Join.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | void* thread_function(void* unused) 7 | { 8 | (void)unused; 9 | 10 | printf("Child thread\n"); 11 | 12 | return NULL; 13 | } 14 | 15 | 16 | int main() 17 | { 18 | printf("Main thread\n"); 19 | 20 | pthread_t thread; 21 | 22 | if (pthread_create(&thread, NULL, thread_function, NULL)) { 23 | fprintf(stderr, "pthread_create failure.\n"); 24 | return EXIT_FAILURE; 25 | } 26 | 27 | if (pthread_join(thread, NULL)) { 28 | fprintf(stderr, "pthread_joint failure.\n"); 29 | return EXIT_FAILURE; 30 | } 31 | 32 | return EXIT_SUCCESS; 33 | } 34 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/3-Multiple-Threads.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | pthread_t thread_2_handle; 8 | 9 | 10 | void* thread_2_fn(void* unused) 11 | { 12 | (void)unused; 13 | 14 | printf("Thread 2\n"); 15 | 16 | return NULL; 17 | } 18 | 19 | 20 | void* thread_1_fn(void* unused) 21 | { 22 | (void)unused; 23 | 24 | printf("Thread 1\n"); 25 | 26 | if (pthread_create(&thread_2_handle, NULL, thread_2_fn, NULL)) { 27 | fprintf(stderr, "pthread_create failure.\n"); 28 | } 29 | 30 | return NULL; 31 | } 32 | 33 | 34 | 35 | int main() 36 | { 37 | pthread_t thread_1_handle; 38 | 39 | printf("Main thread\n"); 40 | 41 | if (pthread_create(&thread_1_handle, NULL, thread_1_fn, NULL)) { 42 | fprintf(stderr, "pthread_create failure.\n"); 43 | } 44 | 45 | // BEWARE: this is just a poor man example 46 | // NEVER use sleep to synchronize! 47 | sleep(1); 48 | 49 | if (pthread_join(thread_1_handle, NULL)) { 50 | fprintf(stderr, "pthread_join failure.\n"); 51 | } 52 | 53 | if (pthread_join(thread_2_handle, NULL)) { 54 | fprintf(stderr, "pthread_join failure.\n"); 55 | } 56 | 57 | return EXIT_SUCCESS; 58 | } 59 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/4-Detach.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | void* thread_function(void* unused) 8 | { 9 | (void)unused; 10 | 11 | if (pthread_detach(pthread_self())) { 12 | fprintf(stderr, "pthread_detach failure.\n"); 13 | } 14 | 15 | printf("Child thread\n"); 16 | 17 | return NULL; 18 | } 19 | 20 | 21 | int main() 22 | { 23 | printf("Main thread\n"); 24 | 25 | pthread_t thread; 26 | 27 | if (pthread_create(&thread, NULL, thread_function, NULL)) { 28 | fprintf(stderr, "pthread_create failure.\n"); 29 | return EXIT_FAILURE; 30 | } 31 | 32 | sleep(1); 33 | 34 | return EXIT_SUCCESS; 35 | } 36 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/5-Input-Output.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct thread_input { 6 | unsigned int numerator; 7 | unsigned int denominator; 8 | }; 9 | 10 | struct thread_output { 11 | unsigned int quotient; 12 | unsigned int reminder; 13 | }; 14 | 15 | 16 | void* thread_function(void* arg) 17 | { 18 | struct thread_input* input = (struct thread_input*)arg; 19 | struct thread_output* output = (struct thread_output*)malloc(sizeof(struct thread_output)); 20 | 21 | if (!output) 22 | return NULL; 23 | 24 | output->quotient = input->numerator / input->denominator; 25 | output->reminder = input->numerator % input->denominator; 26 | 27 | return output; 28 | } 29 | 30 | 31 | int main() 32 | { 33 | int exit_code = EXIT_FAILURE; 34 | 35 | struct thread_input* input = (struct thread_input*)malloc(sizeof(struct thread_input)); 36 | if (!input) { 37 | fprintf(stderr, "malloc failure.\n"); 38 | return exit_code; 39 | } 40 | 41 | input->numerator = 25; 42 | input->denominator = 7; 43 | 44 | pthread_t thread; 45 | if (pthread_create(&thread, NULL, thread_function, (void*)input)) { 46 | fprintf(stderr, "pthread_create failure.\n"); 47 | goto cleanup_input; 48 | } 49 | 50 | struct thread_output* output; 51 | if (pthread_join(thread, (void**)&output)) { 52 | fprintf(stderr, "pthread_join failure.\n"); 53 | goto cleanup_input; 54 | } 55 | 56 | if (!output) { 57 | fprintf(stderr, "thread malloc failure.\n"); 58 | goto cleanup_input; 59 | } 60 | 61 | printf("%d divided by %d is %d with reminder %d.\n", 62 | input->numerator, 63 | input->denominator, 64 | output->quotient, 65 | output->reminder); 66 | 67 | exit_code = EXIT_SUCCESS; 68 | 69 | free(output); 70 | 71 | cleanup_input: 72 | free(input); 73 | 74 | return exit_code; 75 | } 76 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/6-Input-Output-better.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | struct thread_output; 7 | 8 | struct thread_input { 9 | unsigned int numerator; 10 | unsigned int denominator; 11 | struct thread_output* output; 12 | }; 13 | 14 | struct thread_output { 15 | unsigned int quotient; 16 | unsigned int reminder; 17 | }; 18 | 19 | 20 | void* thread_function(void* arg) 21 | { 22 | struct thread_input* input = (struct thread_input*)arg; 23 | 24 | input->output->quotient = input->numerator / input->denominator; 25 | input->output->reminder = input->numerator % input->denominator; 26 | 27 | return NULL; 28 | } 29 | 30 | 31 | int main() 32 | { 33 | int exit_code = EXIT_FAILURE; 34 | 35 | struct thread_input* input = (struct thread_input*)malloc(sizeof(struct thread_input)); 36 | if (!input) { 37 | fprintf(stderr, "malloc failure.\n"); 38 | return exit_code; 39 | } 40 | 41 | input->output = (struct thread_output*)malloc(sizeof(struct thread_output)); 42 | if (!input->output) { 43 | fprintf(stderr, "malloc failure.\n"); 44 | goto cleanup_input; 45 | } 46 | 47 | input->numerator = 25; 48 | input->denominator = 7; 49 | 50 | pthread_t thread; 51 | if (pthread_create(&thread, NULL, thread_function, (void*)input)) { 52 | fprintf(stderr, "pthread_create failure.\n"); 53 | goto cleanup_output; 54 | } 55 | 56 | if (pthread_join(thread, NULL)) { 57 | fprintf(stderr, "pthread_join failure.\n"); 58 | goto cleanup_output; 59 | } 60 | 61 | printf("%d divided by %d is %d with reminder %d.\n", 62 | input->numerator, 63 | input->denominator, 64 | input->output->quotient, 65 | input->output->reminder); 66 | 67 | exit_code = EXIT_SUCCESS; 68 | 69 | cleanup_output: 70 | free(input->output); 71 | 72 | cleanup_input: 73 | free(input); 74 | 75 | return exit_code; 76 | } 77 | -------------------------------------------------------------------------------- /Concurrency/code/2-Create-and-Join/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Wextra -pedantic -g 3 | 4 | .SUFFIXES : 5 | 6 | .PHONY : all 7 | all: 1-no-Join.x 2-Master-Join.x 3-Multiple-Threads.x 4-Detach.x 5-Input-Output.x 6-Input-Output-better.x 8 | 9 | .PHONY : clean 10 | clean : 11 | rm -f *.x 12 | 13 | %.x : %.c 14 | $(CC) $(CFLAGS) -pthread -o $@ $< 15 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/1-No-Sync.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "timing.h" 6 | 7 | #define DEFAULT_LEN 10000 8 | #define DEFAULT_THREADS 2 9 | 10 | int len; 11 | int counter = 0; 12 | 13 | 14 | void* threadfunc(void* unused) 15 | { 16 | (void)unused; 17 | 18 | for (int i = 0; i < len; ++i) 19 | ++counter; 20 | 21 | return NULL; 22 | } 23 | 24 | 25 | int main(int argc, char* argv[]) 26 | { 27 | int retcode = EXIT_FAILURE; 28 | double dt = 0.0; 29 | 30 | int nthread = DEFAULT_THREADS; 31 | len = DEFAULT_LEN; 32 | if (argc > 1) 33 | len = atoi(argv[1]); 34 | 35 | if (argc > 2) 36 | nthread = atoi(argv[2]); 37 | 38 | pthread_t* threads = (pthread_t*)malloc(nthread * sizeof(pthread_t)); 39 | if (!threads) 40 | return retcode; 41 | 42 | dt -= cputime_ms(); 43 | 44 | for (int i = 0; i < nthread; ++i) 45 | { 46 | if (pthread_create(&threads[i], NULL, threadfunc, NULL)) 47 | goto cleanup; 48 | } 49 | 50 | for (int i = 0; i < nthread; ++i) 51 | { 52 | if (pthread_join(threads[i], NULL)) 53 | goto cleanup; 54 | } 55 | 56 | dt += cputime_ms(); 57 | 58 | retcode = EXIT_SUCCESS; 59 | 60 | printf("counter = %d; expected = %d\n", counter, len * nthread); 61 | printf("elapsed time = %lfms\n", dt); 62 | 63 | cleanup: 64 | free (threads); 65 | 66 | return retcode; 67 | } 68 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/2-Atomic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "timing.h" 6 | 7 | #define DEFAULT_LEN 10000 8 | #define DEFAULT_THREADS 2 9 | 10 | int len; 11 | _Atomic int counter = 0; 12 | 13 | 14 | void* threadfunc(void* unused) 15 | { 16 | (void)unused; 17 | 18 | for (int i = 0; i < len; ++i) 19 | ++counter; 20 | 21 | return NULL; 22 | } 23 | 24 | 25 | int main(int argc, char* argv[]) 26 | { 27 | int retcode = EXIT_FAILURE; 28 | double dt = 0.0; 29 | 30 | int nthread = DEFAULT_THREADS; 31 | len = DEFAULT_LEN; 32 | if (argc > 1) 33 | len = atoi(argv[1]); 34 | 35 | if (argc > 2) 36 | nthread = atoi(argv[2]); 37 | 38 | pthread_t* threads = (pthread_t*)malloc(nthread * sizeof(pthread_t)); 39 | if (!threads) 40 | return retcode; 41 | 42 | dt -= cputime_ms(); 43 | 44 | for (int i = 0; i < nthread; ++i) 45 | { 46 | if (pthread_create(&threads[i], NULL, threadfunc, NULL)) 47 | goto cleanup; 48 | } 49 | 50 | for (int i = 0; i < nthread; ++i) 51 | { 52 | if (pthread_join(threads[i], NULL)) 53 | goto cleanup; 54 | } 55 | 56 | dt += cputime_ms(); 57 | 58 | retcode = EXIT_SUCCESS; 59 | 60 | printf("counter = %d; expected = %d\n", counter, len * nthread); 61 | printf("elapsed time = %lfms\n", dt); 62 | 63 | cleanup: 64 | free (threads); 65 | 66 | return retcode; 67 | } 68 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/3-Mutex.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "timing.h" 6 | 7 | #define DEFAULT_LEN 10000 8 | #define DEFAULT_THREADS 2 9 | 10 | int len; 11 | int counter = 0; 12 | pthread_mutex_t counter_mtx = PTHREAD_MUTEX_INITIALIZER; 13 | 14 | 15 | void* threadfunc(void* unused) 16 | { 17 | (void)unused; 18 | 19 | for (int i = 0; i < len; ++i) { 20 | if (pthread_mutex_lock(&counter_mtx)) 21 | continue; 22 | 23 | ++counter; 24 | 25 | if (pthread_mutex_unlock(&counter_mtx)) 26 | continue; 27 | } 28 | 29 | return NULL; 30 | } 31 | 32 | 33 | int main(int argc, char* argv[]) 34 | { 35 | int retcode = EXIT_FAILURE; 36 | double dt = 0.0; 37 | 38 | int nthread = DEFAULT_THREADS; 39 | len = DEFAULT_LEN; 40 | if (argc > 1) 41 | len = atoi(argv[1]); 42 | 43 | if (argc > 2) 44 | nthread = atoi(argv[2]); 45 | 46 | pthread_t* threads = (pthread_t*)malloc(nthread * sizeof(pthread_t)); 47 | if (!threads) 48 | return retcode; 49 | 50 | dt -= cputime_ms(); 51 | 52 | for (int i = 0; i < nthread; ++i) 53 | { 54 | if (pthread_create(&threads[i], NULL, threadfunc, NULL)) 55 | goto cleanup; 56 | } 57 | 58 | for (int i = 0; i < nthread; ++i) 59 | { 60 | if (pthread_join(threads[i], NULL)) 61 | goto cleanup; 62 | } 63 | 64 | dt += cputime_ms(); 65 | 66 | retcode = EXIT_SUCCESS; 67 | 68 | printf("counter = %d; expected = %d\n", counter, len * nthread); 69 | printf("elapsed time = %lfms\n", dt); 70 | 71 | cleanup: 72 | free (threads); 73 | 74 | return retcode; 75 | } 76 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/4-Timed-Lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 9 | 10 | 11 | void* threadfunc(void* unused) 12 | { 13 | (void)unused; 14 | 15 | struct timespec ts; 16 | do { 17 | clock_gettime(CLOCK_REALTIME, &ts); 18 | ts.tv_sec += 1; 19 | printf("trying to lock the mutex...\n"); 20 | } while (pthread_mutex_timedlock(&mtx, &ts)); 21 | printf("finally I locked it!\n"); 22 | 23 | sleep(1); 24 | pthread_mutex_unlock(&mtx); 25 | 26 | return NULL; 27 | } 28 | 29 | 30 | int main() 31 | { 32 | int retcode = EXIT_FAILURE; 33 | 34 | pthread_t thread; 35 | 36 | pthread_mutex_lock(&mtx); 37 | 38 | if (pthread_create(&thread, NULL, threadfunc, NULL)) 39 | return retcode; 40 | 41 | sleep(7); 42 | pthread_mutex_unlock(&mtx); 43 | 44 | if (pthread_join(thread, NULL)) 45 | return retcode; 46 | 47 | retcode = EXIT_SUCCESS; 48 | 49 | return retcode; 50 | } 51 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/5-Try-Lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 8 | 9 | 10 | void* threadfunc(void* unused) 11 | { 12 | (void)unused; 13 | while (pthread_mutex_trylock(&mtx)) { 14 | printf("mutex is locked, I will try again...\n"); 15 | sleep(1); 16 | } 17 | printf("finally I locked it!\n"); 18 | 19 | sleep(1); 20 | pthread_mutex_unlock(&mtx); 21 | 22 | 23 | return NULL; 24 | } 25 | 26 | 27 | 28 | int main() 29 | { 30 | int retcode = EXIT_FAILURE; 31 | 32 | pthread_t thread; 33 | 34 | pthread_mutex_lock(&mtx); 35 | 36 | if (pthread_create(&thread, NULL, threadfunc, NULL)) 37 | return retcode; 38 | 39 | sleep(7); 40 | pthread_mutex_unlock(&mtx); 41 | 42 | if (pthread_join(thread, NULL)) 43 | return retcode; 44 | 45 | retcode = EXIT_SUCCESS; 46 | 47 | return retcode; 48 | } 49 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/6-Busy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | pthread_spinlock_t splk; 8 | 9 | void* threadfunc(void* unused) 10 | { 11 | (void)unused; 12 | 13 | printf("Acquiring the spinlock... "); 14 | fflush(stdout); 15 | 16 | pthread_spin_lock(&splk); 17 | printf("acquired\n"); 18 | sleep(4); 19 | pthread_spin_unlock(&splk); 20 | 21 | return NULL; 22 | } 23 | 24 | 25 | 26 | int main() 27 | { 28 | int retcode = EXIT_FAILURE; 29 | 30 | pthread_t thread; 31 | 32 | if (pthread_spin_init(&splk, PTHREAD_PROCESS_PRIVATE)) 33 | return retcode; 34 | 35 | pthread_spin_lock(&splk); 36 | 37 | if (pthread_create(&thread, NULL, threadfunc, NULL)) 38 | goto spinlock_cleanup; 39 | 40 | sleep(12); 41 | pthread_spin_unlock(&splk); 42 | 43 | if (!pthread_join(thread, NULL)) 44 | retcode = EXIT_SUCCESS; 45 | 46 | spinlock_cleanup: 47 | if (pthread_spin_destroy(&splk)) 48 | return retcode; 49 | 50 | return retcode; 51 | } 52 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/7-not-Busy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 8 | 9 | void* threadfunc(void* unused) 10 | { 11 | (void)unused; 12 | 13 | printf("Acquiring the mutex... "); 14 | fflush(stdout); 15 | 16 | pthread_mutex_lock(&mtx); 17 | printf("acquired\n"); 18 | sleep(4); 19 | pthread_mutex_unlock(&mtx); 20 | 21 | return NULL; 22 | } 23 | 24 | 25 | 26 | int main() 27 | { 28 | int retcode = EXIT_FAILURE; 29 | 30 | pthread_t thread; 31 | 32 | pthread_mutex_lock(&mtx); 33 | 34 | if (pthread_create(&thread, NULL, threadfunc, NULL)) 35 | return retcode; 36 | 37 | sleep(12); 38 | pthread_mutex_unlock(&mtx); 39 | 40 | if (!pthread_join(thread, NULL)) 41 | retcode = EXIT_SUCCESS; 42 | 43 | return retcode; 44 | } 45 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/8-Deadlock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | struct resource { 8 | double value; 9 | pthread_mutex_t mtx; 10 | }; 11 | 12 | struct resource r1 = {.value = 0.0, 13 | .mtx = PTHREAD_MUTEX_INITIALIZER}; 14 | 15 | struct resource r2 = {.value = 0.0, 16 | .mtx = PTHREAD_MUTEX_INITIALIZER}; 17 | 18 | 19 | void* threadswap(void* resources) 20 | { 21 | struct resource* first = ((struct resource**)resources)[0]; 22 | struct resource* second = ((struct resource**)resources)[1]; 23 | 24 | pthread_mutex_lock(&first->mtx); 25 | usleep(10000); // just to simulate some load 26 | pthread_mutex_lock(&second->mtx); 27 | 28 | double tmp = first->value; 29 | first->value = second->value; 30 | second->value = tmp; 31 | 32 | pthread_mutex_unlock(&second->mtx); 33 | pthread_mutex_unlock(&first->mtx); 34 | 35 | return NULL; 36 | } 37 | 38 | 39 | int main() 40 | { 41 | int retcode = EXIT_FAILURE; 42 | 43 | pthread_mutex_lock(&r1.mtx); 44 | r1.value = 37.0; 45 | pthread_mutex_unlock(&r1.mtx); 46 | 47 | pthread_mutex_lock(&r2.mtx); 48 | r2.value = -4.0; 49 | pthread_mutex_unlock(&r2.mtx); 50 | 51 | pthread_t thread1, thread2; 52 | struct resource* resources_1[] = {&r1, &r2}; 53 | struct resource* resources_2[] = {&r2, &r1}; 54 | 55 | if (pthread_create(&thread1, NULL, threadswap, resources_1)) 56 | return retcode; 57 | 58 | if (pthread_create(&thread2, NULL, threadswap, resources_2)) 59 | goto thread1_cleanup; 60 | 61 | if (pthread_join(thread2, NULL)) 62 | goto thread1_cleanup; 63 | 64 | retcode = EXIT_SUCCESS; 65 | 66 | thread1_cleanup: 67 | if (pthread_join(thread1, NULL)) 68 | return EXIT_FAILURE; 69 | 70 | if (retcode == EXIT_FAILURE) 71 | return retcode; 72 | 73 | pthread_mutex_lock(&r1.mtx); 74 | printf("r1.value = %lf\n", r1.value); 75 | pthread_mutex_unlock(&r1.mtx); 76 | 77 | pthread_mutex_lock(&r2.mtx); 78 | printf("r2.value = %lf\n", r2.value); 79 | pthread_mutex_unlock(&r2.mtx); 80 | 81 | return EXIT_SUCCESS; 82 | } 83 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/9-no-Deadlock-Hierarchy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | struct resource { 8 | double value; 9 | pthread_mutex_t mtx; 10 | }; 11 | 12 | struct resource r1 = {.value = 0.0, 13 | .mtx = PTHREAD_MUTEX_INITIALIZER}; 14 | 15 | struct resource r2 = {.value = 0.0, 16 | .mtx = PTHREAD_MUTEX_INITIALIZER}; 17 | 18 | 19 | void* threadswap(void* resources) 20 | { 21 | struct resource* first = ((struct resource**)resources)[0]; 22 | struct resource* second = ((struct resource**)resources)[1]; 23 | 24 | pthread_mutex_lock(&first->mtx); 25 | usleep(10000); // just to simulate some load 26 | pthread_mutex_lock(&second->mtx); 27 | 28 | double tmp = first->value; 29 | first->value = second->value; 30 | second->value = tmp; 31 | 32 | pthread_mutex_unlock(&second->mtx); 33 | pthread_mutex_unlock(&first->mtx); 34 | 35 | return NULL; 36 | } 37 | 38 | 39 | int main() 40 | { 41 | int retcode = EXIT_FAILURE; 42 | 43 | pthread_mutex_lock(&r1.mtx); 44 | r1.value = 37.0; 45 | pthread_mutex_unlock(&r1.mtx); 46 | 47 | pthread_mutex_lock(&r2.mtx); 48 | r2.value = -4.0; 49 | pthread_mutex_unlock(&r2.mtx); 50 | 51 | pthread_t thread1, thread2; 52 | struct resource* resources_1[] = {&r1, &r2}; 53 | struct resource* resources_2[] = {&r1, &r2}; 54 | 55 | if (pthread_create(&thread1, NULL, threadswap, resources_1)) 56 | return retcode; 57 | 58 | if (pthread_create(&thread2, NULL, threadswap, resources_2)) 59 | goto thread1_cleanup; 60 | 61 | if (pthread_join(thread2, NULL)) 62 | goto thread1_cleanup; 63 | 64 | retcode = EXIT_SUCCESS; 65 | 66 | thread1_cleanup: 67 | if (pthread_join(thread1, NULL)) 68 | return EXIT_FAILURE; 69 | 70 | if (retcode == EXIT_FAILURE) 71 | return retcode; 72 | 73 | pthread_mutex_lock(&r1.mtx); 74 | printf("r1.value = %lf\n", r1.value); 75 | pthread_mutex_unlock(&r1.mtx); 76 | 77 | pthread_mutex_lock(&r2.mtx); 78 | printf("r2.value = %lf\n", r2.value); 79 | pthread_mutex_unlock(&r2.mtx); 80 | 81 | return EXIT_SUCCESS; 82 | } 83 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/A-no-Deadlock-Try-Backoff.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | struct resource { 9 | double value; 10 | pthread_mutex_t mtx; 11 | }; 12 | 13 | struct resource r1 = {.value = 0.0, 14 | .mtx = PTHREAD_MUTEX_INITIALIZER}; 15 | 16 | struct resource r2 = {.value = 0.0, 17 | .mtx = PTHREAD_MUTEX_INITIALIZER}; 18 | 19 | 20 | void* threadswap(void* resources) 21 | { 22 | struct resource* first = ((struct resource**)resources)[0]; 23 | struct resource* second = ((struct resource**)resources)[1]; 24 | 25 | for (;;) { 26 | pthread_mutex_lock(&first->mtx); 27 | usleep(10000); // just to simulate some load 28 | if (pthread_mutex_trylock(&second->mtx)) { 29 | pthread_mutex_unlock(&first->mtx); 30 | sched_yield(); 31 | } else { 32 | break; 33 | } 34 | } 35 | 36 | double tmp = first->value; 37 | first->value = second->value; 38 | second->value = tmp; 39 | 40 | pthread_mutex_unlock(&second->mtx); 41 | pthread_mutex_unlock(&first->mtx); 42 | 43 | return NULL; 44 | } 45 | 46 | 47 | int main() 48 | { 49 | int retcode = EXIT_FAILURE; 50 | 51 | pthread_mutex_lock(&r1.mtx); 52 | r1.value = 37.0; 53 | pthread_mutex_unlock(&r1.mtx); 54 | 55 | pthread_mutex_lock(&r2.mtx); 56 | r2.value = -4.0; 57 | pthread_mutex_unlock(&r2.mtx); 58 | 59 | pthread_t thread1, thread2; 60 | struct resource* resources_1[] = {&r1, &r2}; 61 | struct resource* resources_2[] = {&r2, &r1}; 62 | 63 | if (pthread_create(&thread1, NULL, threadswap, resources_1)) 64 | return retcode; 65 | 66 | if (pthread_create(&thread2, NULL, threadswap, resources_2)) 67 | goto thread1_cleanup; 68 | 69 | if (pthread_join(thread2, NULL)) 70 | goto thread1_cleanup; 71 | 72 | retcode = EXIT_SUCCESS; 73 | 74 | thread1_cleanup: 75 | if (pthread_join(thread1, NULL)) 76 | return EXIT_FAILURE; 77 | 78 | if (retcode == EXIT_FAILURE) 79 | return retcode; 80 | 81 | pthread_mutex_lock(&r1.mtx); 82 | printf("r1.value = %lf\n", r1.value); 83 | pthread_mutex_unlock(&r1.mtx); 84 | 85 | pthread_mutex_lock(&r2.mtx); 86 | printf("r2.value = %lf\n", r2.value); 87 | pthread_mutex_unlock(&r2.mtx); 88 | 89 | return EXIT_SUCCESS; 90 | } 91 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Wextra -pedantic -g 3 | 4 | .SUFFIXES : 5 | 6 | .PHONY : all 7 | all: 1-No-Sync.x 2-Atomic.x 3-Mutex.x 4-Timed-Lock.x \ 8 | 5-Try-Lock.x 6-Busy.x 7-not-Busy.x 8-Deadlock.x \ 9 | 9-no-Deadlock-Hierarchy.x A-no-Deadlock-Try-Backoff.x 10 | 11 | .PHONY : clean 12 | clean : 13 | rm -f *.x 14 | 15 | %.x : %.c timing.h 16 | $(CC) $(CFLAGS) -pthread -o $@ $< 17 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/repeat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in $(seq $1) 4 | do 5 | echo "------- i = $i -------" 6 | $2 7 | done 8 | -------------------------------------------------------------------------------- /Concurrency/code/3-Mutex/timing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | static inline double cputime_ms() 7 | { 8 | struct timespec ts; 9 | clock_gettime(CLOCK_MONOTONIC, &ts); 10 | 11 | return 1.0e3 * (double)ts.tv_sec + 1.0e-6 * (double)ts.tv_nsec; 12 | } 13 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/1-no-Condition-Variable.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Adapted from: M. Kerrisk, The Linux Programming Interface. 3 | * 4 | * https://github.com/prm239/kerrisk/tree/master 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define DEFAULT_TOTAL 20 13 | 14 | 15 | int avail; 16 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 17 | 18 | 19 | void* producer(void* tot) 20 | { 21 | const int total = atoi((char*)tot); 22 | printf("[PRODUCER]: %d\n", total); 23 | 24 | for (int i = 0; i < total; ++i) { 25 | sleep(1); 26 | 27 | pthread_mutex_lock(&mtx); 28 | ++avail; 29 | pthread_mutex_unlock(&mtx); 30 | } 31 | 32 | printf("[PRODUCER]: end.\n"); 33 | 34 | return NULL; 35 | } 36 | 37 | 38 | void* consumer(void* tot) 39 | { 40 | int total = *(int *)tot; 41 | printf("[CONSUMER]: %d\n", total); 42 | 43 | while (total) { 44 | pthread_mutex_lock(&mtx); 45 | 46 | while (avail) { 47 | --avail; 48 | --total; 49 | } 50 | 51 | pthread_mutex_unlock(&mtx); 52 | } 53 | 54 | printf("[CONSUMER]: end.\n"); 55 | 56 | return NULL; 57 | } 58 | 59 | 60 | int main(int argc, char* argv[]) 61 | { 62 | int retcode = EXIT_FAILURE; 63 | 64 | int to_consume = 0; 65 | int producers = argc > 1 ? argc - 1 : 0; 66 | int started_producers = 0; 67 | 68 | pthread_t* producers_th = (pthread_t*)malloc(sizeof(pthread_t) * producers); 69 | if (!producers_th) 70 | return retcode; 71 | 72 | pthread_t consumer_th; 73 | 74 | for (int i = 0; i < producers; ++i) { 75 | to_consume += atoi(argv[i + 1]); 76 | if (pthread_create(&producers_th[i], NULL, producer, argv[i + 1])) 77 | goto producers_cleanup; 78 | ++started_producers; 79 | } 80 | 81 | if (pthread_create(&consumer_th, NULL, consumer, &to_consume)) 82 | goto producers_cleanup; 83 | 84 | retcode = EXIT_SUCCESS; 85 | 86 | pthread_join(consumer_th, NULL); 87 | 88 | producers_cleanup: 89 | for (int i = 0; i < started_producers; ++i) 90 | pthread_join(producers_th[i], NULL); 91 | 92 | free(producers_th); 93 | 94 | return retcode; 95 | } 96 | 97 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/2-Minimal.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define DEFAULT_MAIN_SLEEP 3 7 | #define DEFAULT_THREAD_SLEEP 2 8 | 9 | 10 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 11 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 12 | 13 | 14 | void* threadfunc(void* vsleep_s) 15 | { 16 | int sleep_s = *((int *)vsleep_s); 17 | 18 | printf("Inside threadfunc:\n"); 19 | sleep(sleep_s); 20 | pthread_mutex_lock(&mtx); 21 | 22 | printf("Going to sleep... "); 23 | fflush(stdout); 24 | pthread_cond_wait(&cv, &mtx); 25 | printf("Waken up!\n"); 26 | 27 | pthread_mutex_unlock(&mtx); 28 | 29 | return NULL; 30 | } 31 | 32 | 33 | int main(int argc, char* argv[]) 34 | { 35 | int retcode = EXIT_FAILURE; 36 | 37 | int main_sleep = DEFAULT_MAIN_SLEEP; 38 | int thread_sleep = DEFAULT_THREAD_SLEEP; 39 | 40 | if (argc > 1) 41 | main_sleep = atoi(argv[1]); 42 | if (argc > 2) 43 | thread_sleep = atoi(argv[2]); 44 | 45 | 46 | pthread_t thread; 47 | 48 | if (pthread_create(&thread, NULL, threadfunc, &thread_sleep)) 49 | return retcode; 50 | 51 | sleep(main_sleep); 52 | pthread_cond_signal(&cv); 53 | 54 | pthread_join(thread, NULL); 55 | 56 | retcode = EXIT_SUCCESS; 57 | 58 | 59 | return retcode; 60 | } 61 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/3-Better.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define DEFAULT_MAIN_SLEEP 3 8 | #define DEFAULT_THREAD_SLEEP 2 9 | 10 | 11 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 12 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 13 | bool set = false; 14 | 15 | void* threadfunc(void* vsleep_s) 16 | { 17 | int sleep_s = *((int *)vsleep_s); 18 | 19 | printf("Inside threadfunc:\n"); 20 | sleep(sleep_s); 21 | pthread_mutex_lock(&mtx); 22 | 23 | printf("Going to sleep... "); 24 | fflush(stdout); 25 | 26 | while (!set) 27 | pthread_cond_wait(&cv, &mtx); 28 | 29 | printf("Waken up!\n"); 30 | 31 | pthread_mutex_unlock(&mtx); 32 | 33 | return NULL; 34 | } 35 | 36 | 37 | int main(int argc, char* argv[]) 38 | { 39 | int retcode = EXIT_FAILURE; 40 | 41 | int main_sleep = DEFAULT_MAIN_SLEEP; 42 | int thread_sleep = DEFAULT_THREAD_SLEEP; 43 | 44 | if (argc > 1) 45 | main_sleep = atoi(argv[1]); 46 | if (argc > 2) 47 | thread_sleep = atoi(argv[2]); 48 | 49 | 50 | pthread_t thread; 51 | 52 | if (pthread_create(&thread, NULL, threadfunc, &thread_sleep)) 53 | return retcode; 54 | 55 | sleep(main_sleep); 56 | pthread_mutex_lock(&mtx); 57 | set = true; 58 | pthread_mutex_unlock(&mtx); 59 | 60 | pthread_cond_signal(&cv); 61 | 62 | pthread_join(thread, NULL); 63 | 64 | retcode = EXIT_SUCCESS; 65 | 66 | 67 | return retcode; 68 | } 69 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/4-Condition-Variable.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Adapted from: M. Kerrisk, The Linux Programming Interface. 3 | * 4 | * https://github.com/prm239/kerrisk/tree/master 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define DEFAULT_TOTAL 20 13 | 14 | 15 | int avail; 16 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 17 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 18 | 19 | 20 | void* producer(void* tot) 21 | { 22 | const int total = atoi((char*)tot); 23 | printf("[PRODUCER]: %d\n", total); 24 | 25 | for (int i = 0; i < total; ++i) { 26 | sleep(1); 27 | 28 | pthread_mutex_lock(&mtx); 29 | ++avail; 30 | pthread_mutex_unlock(&mtx); 31 | } 32 | 33 | pthread_cond_signal(&cv); 34 | 35 | printf("[PRODUCER]: end.\n"); 36 | 37 | return NULL; 38 | } 39 | 40 | 41 | void* consumer(void* tot) 42 | { 43 | int total = *(int *)tot; 44 | printf("[CONSUMER]: %d\n", total); 45 | 46 | while (total) { 47 | pthread_mutex_lock(&mtx); 48 | 49 | while (!avail) 50 | pthread_cond_wait(&cv, &mtx); 51 | 52 | while (avail) { 53 | --avail; 54 | --total; 55 | } 56 | 57 | pthread_mutex_unlock(&mtx); 58 | } 59 | 60 | printf("[CONSUMER]: end.\n"); 61 | 62 | return NULL; 63 | } 64 | 65 | 66 | int main(int argc, char* argv[]) 67 | { 68 | int retcode = EXIT_FAILURE; 69 | 70 | int to_consume = 0; 71 | int producers = argc > 1 ? argc - 1 : 0; 72 | int started_producers = 0; 73 | 74 | pthread_t* producers_th = (pthread_t*)malloc(sizeof(pthread_t) * producers); 75 | if (!producers_th) 76 | return retcode; 77 | 78 | pthread_t consumer_th; 79 | 80 | for (int i = 0; i < producers; ++i) { 81 | to_consume += atoi(argv[i + 1]); 82 | if (pthread_create(&producers_th[i], NULL, producer, argv[i + 1])) 83 | goto producers_cleanup; 84 | ++started_producers; 85 | } 86 | 87 | if (pthread_create(&consumer_th, NULL, consumer, &to_consume)) 88 | goto producers_cleanup; 89 | 90 | retcode = EXIT_SUCCESS; 91 | 92 | pthread_join(consumer_th, NULL); 93 | 94 | producers_cleanup: 95 | for (int i = 0; i < started_producers; ++i) 96 | pthread_join(producers_th[i], NULL); 97 | 98 | free(producers_th); 99 | 100 | return retcode; 101 | } 102 | 103 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/5-Bad-Signal.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define WTHREADS 10 7 | 8 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 9 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 10 | int order = 0; 11 | 12 | 13 | void* threadfunc(void* vorder) 14 | { 15 | int myorder = *((int*)vorder); 16 | 17 | pthread_mutex_lock(&mtx); 18 | 19 | while (order != myorder) 20 | pthread_cond_wait(&cv, &mtx); 21 | 22 | printf("Set %d\n", myorder); 23 | ++order; 24 | 25 | pthread_mutex_unlock(&mtx); 26 | 27 | 28 | pthread_cond_signal(&cv); 29 | 30 | return NULL; 31 | } 32 | 33 | 34 | int main() 35 | { 36 | int retcode = EXIT_FAILURE; 37 | 38 | pthread_t threads[WTHREADS]; 39 | int all_order[WTHREADS]; 40 | 41 | int running_threads = 0; 42 | for (int i = 0; i < WTHREADS; ++i) { 43 | all_order[i] = WTHREADS - i; 44 | if (pthread_create(&threads[i], NULL, threadfunc, &all_order[i])) 45 | goto threads_cleanup; 46 | ++running_threads; 47 | } 48 | 49 | printf("About to start...\n"); 50 | sleep(1); 51 | pthread_mutex_lock(&mtx); 52 | ++order; 53 | pthread_mutex_unlock(&mtx); 54 | 55 | pthread_cond_signal(&cv); 56 | 57 | 58 | retcode = EXIT_SUCCESS; 59 | 60 | threads_cleanup: 61 | for (int i = 0; i < running_threads; ++i) 62 | pthread_join(threads[i], NULL); 63 | 64 | return retcode; 65 | } 66 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/6-Broadcast.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define WTHREADS 10 7 | 8 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 9 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; 10 | int order = 0; 11 | 12 | 13 | void* threadfunc(void* vorder) 14 | { 15 | int myorder = *((int*)vorder); 16 | 17 | pthread_mutex_lock(&mtx); 18 | 19 | while (order != myorder) 20 | pthread_cond_wait(&cv, &mtx); 21 | 22 | printf("Set %d\n", myorder); 23 | ++order; 24 | 25 | pthread_mutex_unlock(&mtx); 26 | 27 | 28 | pthread_cond_broadcast(&cv); 29 | 30 | return NULL; 31 | } 32 | 33 | 34 | int main() 35 | { 36 | int retcode = EXIT_FAILURE; 37 | 38 | pthread_t threads[WTHREADS]; 39 | int all_order[WTHREADS]; 40 | 41 | int running_threads = 0; 42 | for (int i = 0; i < WTHREADS; ++i) { 43 | all_order[i] = WTHREADS - i; 44 | if (pthread_create(&threads[i], NULL, threadfunc, &all_order[i])) 45 | goto threads_cleanup; 46 | ++running_threads; 47 | } 48 | 49 | printf("About to start...\n"); 50 | sleep(1); 51 | pthread_mutex_lock(&mtx); 52 | ++order; 53 | pthread_mutex_unlock(&mtx); 54 | 55 | pthread_cond_broadcast(&cv); 56 | 57 | 58 | retcode = EXIT_SUCCESS; 59 | 60 | threads_cleanup: 61 | for (int i = 0; i < running_threads; ++i) 62 | pthread_join(threads[i], NULL); 63 | 64 | return retcode; 65 | } 66 | -------------------------------------------------------------------------------- /Concurrency/code/4-Condition-Variables/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Wextra -pedantic -g 3 | 4 | .SUFFIXES : 5 | 6 | .PHONY : all 7 | all: 1-no-Condition-Variable.x 2-Minimal.x 3-Better.x 4-Condition-Variable.x \ 8 | 5-Bad-Signal.x 6-Broadcast.x 9 | 10 | .PHONY : clean 11 | clean : 12 | rm -f *.x 13 | 14 | %.x : %.c 15 | $(CC) $(CFLAGS) -pthread -o $@ $< 16 | -------------------------------------------------------------------------------- /Concurrency/code/X-Circular-Buffer/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Wextra -pedantic -g 3 | 4 | .SUFFIXES : 5 | 6 | .PHONY : all 7 | all: libcircbuf.so test.x 8 | 9 | .PHONY : clean 10 | clean : 11 | rm -f *.x *.o *.so 12 | 13 | circbuf.o : circbuf.c 14 | $(CC) -c -fPIC $(CFLAGS) -o $@ $< 15 | 16 | libcircbuf.so : circbuf.o 17 | $(CC) -shared -o $@ $< 18 | 19 | test.x : test.c libcircbuf.so 20 | $(CC) $(CFLAGS) -I. -L. '-Wl,-rpath=$$ORIGIN' -lcircbuf -o $@ $< 21 | -------------------------------------------------------------------------------- /Concurrency/code/X-Circular-Buffer/circbuf.c: -------------------------------------------------------------------------------- 1 | #include "circbuf.h" 2 | 3 | #include 4 | 5 | 6 | inline static void ptr_advance(const struct circbuf* cb, 7 | void** ptr) 8 | { 9 | char* ptr_c = (char*)(*ptr); 10 | char* const end_buffer = (char*)cb->buffer + cb->capacity * cb->el_size; 11 | ptr_c += cb->el_size; 12 | *ptr = ptr_c < end_buffer ? ptr_c : cb->buffer; 13 | } 14 | 15 | 16 | void cb_init(struct circbuf* cb, 17 | void* buf, 18 | size_t el_size, 19 | size_t capacity) 20 | { 21 | cb->buffer = buf; 22 | cb->el_size = el_size; 23 | cb->capacity = capacity; 24 | 25 | cb->head = buf; 26 | cb->tail = buf; 27 | cb->size = 0; 28 | } 29 | 30 | void cb_unset(struct circbuf* cb) 31 | { 32 | cb_init(cb, NULL, 0, 0); 33 | } 34 | 35 | size_t cb_size(const struct circbuf* cb) 36 | { 37 | return cb->size; 38 | } 39 | 40 | size_t cb_capacity(const struct circbuf* cb) 41 | { 42 | return cb->capacity; 43 | } 44 | 45 | bool cb_push(struct circbuf* cb, const void* el) 46 | { 47 | if (cb->size == cb->capacity) 48 | return false; 49 | 50 | memcpy(cb->head, el, cb->el_size); 51 | ptr_advance(cb, &cb->head); 52 | ++cb->size; 53 | return true; 54 | } 55 | 56 | bool cb_pop(struct circbuf* cb, void* el) 57 | { 58 | if (!cb->size) 59 | return 0; 60 | 61 | memcpy(el, cb->tail, cb->el_size); 62 | ptr_advance(cb, &cb->tail); 63 | --cb->size; 64 | return true; 65 | } 66 | 67 | 68 | #ifdef DEBUG 69 | 70 | ptrdiff_t cb_head_offset(const struct circbuf* cb) 71 | { 72 | ptrdiff_t dptr = (char*)cb->head - (char*)cb->buffer; 73 | return dptr / cb->el_size; 74 | } 75 | 76 | ptrdiff_t cb_tail_offset(const struct circbuf* cb) 77 | { 78 | ptrdiff_t dptr = (char*)cb->tail - (char*)cb->buffer; 79 | return dptr / cb->el_size; 80 | } 81 | 82 | #endif //DEBUG 83 | -------------------------------------------------------------------------------- /Concurrency/code/X-Circular-Buffer/circbuf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct circbuf { 7 | void* buffer; 8 | size_t el_size; 9 | size_t capacity; 10 | 11 | void* head; 12 | void* tail; 13 | size_t size; 14 | }; 15 | 16 | 17 | void cb_init(struct circbuf* cb, 18 | void* buf, 19 | size_t el_size, 20 | size_t capacity); 21 | void cb_unset(struct circbuf* cb); 22 | size_t cb_size(const struct circbuf* cb); 23 | size_t cb_capacity(const struct circbuf* cb); 24 | bool cb_push(struct circbuf* cb, const void* el); 25 | bool cb_pop(struct circbuf* cb, void* el); 26 | 27 | #ifdef DEBUG 28 | ptrdiff_t cb_head_offset(const struct circbuf* cb); 29 | ptrdiff_t cb_tail_offset(const struct circbuf* cb); 30 | #endif //DEBUG 31 | -------------------------------------------------------------------------------- /Concurrency/code/X-Circular-Buffer/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "circbuf.h" 5 | 6 | #define DEFAULT_LEN 10 7 | #define MULTIPLICITY_FACTOR 10 8 | 9 | #ifdef DEBUG 10 | #define OFFSET_ENABLED 1 11 | #else 12 | #define OFFSET_ENABLED 0 13 | #endif //DEBUG 14 | 15 | void print_test_result(size_t n, const char* name, bool passed) 16 | { 17 | printf("TEST %2lu: %-50s [%.6s]\n", 18 | n, 19 | name, 20 | passed ? "PASSED" : "FAILED"); 21 | } 22 | 23 | 24 | bool test_push_until_full(struct circbuf* cb, 25 | size_t* written, 26 | double* checkbuf) 27 | { 28 | const size_t capacity = cb_capacity(cb); 29 | const size_t size = cb_size(cb); 30 | const size_t empty = size < capacity ? capacity - size : 0; 31 | *written = 0; 32 | 33 | double el; 34 | for (size_t i = 0; i < empty; ++i) { 35 | el = i; 36 | if (!cb_push(cb, &el)) { 37 | return false; 38 | } else { 39 | checkbuf[*written] = el; 40 | ++*written; 41 | } 42 | } 43 | 44 | ++el; 45 | if (cb_push(cb, &el)) 46 | return false; 47 | 48 | return empty == *written; 49 | } 50 | 51 | 52 | bool test_pop_until_empty(struct circbuf* cb, 53 | const size_t* written, 54 | const double* checkbuf) 55 | { 56 | const size_t size = cb_size(cb); 57 | 58 | if (*written != size) 59 | return false; 60 | 61 | double el; 62 | for (size_t i = 0; i < size; ++i) { 63 | if (!cb_pop(cb, &el)) 64 | return false; 65 | 66 | if (el != checkbuf[i]) 67 | return false; 68 | } 69 | 70 | if (cb_pop(cb, &el)) 71 | return false; 72 | 73 | return !cb_size(cb); 74 | } 75 | 76 | 77 | bool test_push_then_pop(struct circbuf* cb) 78 | { 79 | const size_t capacity = cb_capacity(cb); 80 | const size_t steps = capacity * MULTIPLICITY_FACTOR; 81 | 82 | double el; 83 | while (cb_size(cb)) 84 | if (!cb_pop(cb, &el)) 85 | return false; 86 | 87 | for (size_t i = 0; i < steps; ++i) { 88 | el = i; 89 | if (!cb_push(cb, &el)) 90 | return false; 91 | 92 | if (!cb_pop(cb, &el)) 93 | return false; 94 | 95 | if (el != i) 96 | return false; 97 | 98 | #ifdef DEBUG 99 | if (cb_head_offset(cb) != cb_tail_offset(cb)) 100 | return false; 101 | #endif //DEBUG 102 | } 103 | 104 | return true; 105 | } 106 | 107 | 108 | bool test_multiple_push_then_pop(struct circbuf* cb, 109 | size_t batches, 110 | const size_t* push_n, 111 | const double* push_v, 112 | const size_t* pop_n, 113 | bool* inserted) 114 | { 115 | size_t push_idx = 0; 116 | size_t pop_idx = 0; 117 | 118 | for (size_t batch = 0; batch < batches; ++batch) 119 | { 120 | // push 121 | for (size_t i = 0; i < push_n[batch]; ++i) { 122 | if (cb_push(cb, &push_v[push_idx])) 123 | inserted[push_idx] = true; 124 | else 125 | inserted[push_idx] = false; 126 | 127 | ++push_idx; 128 | } 129 | 130 | // pop 131 | for (size_t i = 0; i < pop_n[batch]; ++i) { 132 | double el; 133 | if (!cb_pop(cb, &el)) 134 | continue; 135 | 136 | if (inserted[pop_idx]) { 137 | if (el != push_v[pop_idx]) 138 | return false; 139 | } 140 | 141 | ++pop_idx; 142 | } 143 | } 144 | 145 | return true; 146 | } 147 | 148 | 149 | 150 | int main(int argc, char* argv[]) 151 | { 152 | int retcode = EXIT_FAILURE; 153 | 154 | int len = DEFAULT_LEN; 155 | if (argc == 2) 156 | len = atoi(argv[1]); 157 | 158 | double* buffer = (double*)malloc(len * sizeof(double)); 159 | if (!buffer) { 160 | fprintf(stderr, "Impossible to allocate buffer.\n"); 161 | return retcode; 162 | } 163 | 164 | double* checkbuf = (double*)malloc(len * sizeof(double)); 165 | if (!checkbuf) { 166 | fprintf(stderr, "Impossible to allocate checkbuf.\n"); 167 | goto free_buffer; 168 | } 169 | 170 | size_t written; 171 | 172 | retcode = EXIT_SUCCESS; 173 | 174 | struct circbuf cb; 175 | cb_init(&cb, buffer, sizeof(double), len); 176 | 177 | printf("circbuf test suite (offset tests %s)\n\n", 178 | OFFSET_ENABLED ? "ENABLED" : "DISABLED"); 179 | 180 | { 181 | // TEST 0: empty at the beginning 182 | bool passed = cb_size(&cb) == 0; 183 | #ifdef DEBUG 184 | passed &= cb_head_offset(&cb) == 0; 185 | passed &= cb_tail_offset(&cb) == 0; 186 | #endif //DEBUG 187 | print_test_result(0, "empty at the beginning", passed); 188 | } 189 | 190 | { 191 | // TEST 1: full circbuf from empty 192 | bool ret_test = test_push_until_full(&cb, &written, checkbuf); 193 | bool ok_written = written == (size_t)len; 194 | bool ok_inserted = true; 195 | for (size_t i = 0; i < written; ++i) { 196 | if (checkbuf[i] != i) { 197 | ok_inserted = false; 198 | break; 199 | } 200 | } 201 | 202 | bool passed = ret_test && ok_written && ok_inserted; 203 | #ifdef DEBUG 204 | passed &= cb_head_offset(&cb) == 0; 205 | passed &= cb_tail_offset(&cb) == 0; 206 | #endif //DEBUG 207 | print_test_result(1, "full circbuf from empty", passed); 208 | } 209 | 210 | { 211 | // TEST 2: empty circbuf from full 212 | bool passed = test_pop_until_empty(&cb, &written, checkbuf); 213 | #ifdef DEBUG 214 | passed &= cb_head_offset(&cb) == 0; 215 | passed &= cb_head_offset(&cb) == 0; 216 | #endif //DEBUG 217 | print_test_result(2, "empty circbuf from full", passed); 218 | } 219 | 220 | { 221 | // TEST 3: push then pop 222 | bool passed = test_push_then_pop(&cb); 223 | print_test_result(3, "push then pop", passed); 224 | } 225 | 226 | { 227 | // TEST 4: multiple push then pop 228 | size_t batches = 5; 229 | size_t push_n[] = {3, 4, 5, 0, 2}; 230 | size_t pop_n[] = {3, 3, 3, 3, 3}; 231 | double push_v[] = { 0.0, 1.0, 2.0, 3.0, 4.0, 232 | 5.0, 6.0, 7.0, 8.0, 9.0, 233 | 10.0, 11.0, 12.0, 13.0, 14.0}; 234 | bool inserted[15]; 235 | 236 | bool passed = test_multiple_push_then_pop(&cb, 237 | batches, 238 | push_n, 239 | push_v, 240 | pop_n, 241 | inserted); 242 | print_test_result(4, "multiple push then pop", passed); 243 | } 244 | 245 | 246 | cb_unset(&cb); 247 | free(checkbuf); 248 | free_buffer: 249 | free(buffer); 250 | 251 | return retcode; 252 | } 253 | -------------------------------------------------------------------------------- /Concurrency/code/Y-Thread-Safe-CB/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Wall -Wextra -pedantic -g 3 | 4 | .SUFFIXES : 5 | 6 | .PHONY : all 7 | all: libtscircbuf.so test.x 8 | 9 | .PHONY : clean 10 | clean : 11 | rm -f *.x *.o *.so 12 | 13 | circbuf.o : circbuf.c circbuf.h 14 | $(CC) -c -fPIC $(CFLAGS) -o $@ $< 15 | 16 | tscircbuf.o : tscircbuf.c tscircbuf.h 17 | $(CC) -c -fPIC $(CFLAGS) -pthread -o $@ $< 18 | 19 | libtscircbuf.so : circbuf.o tscircbuf.o 20 | $(CC) -pthread -shared -o $@ $^ 21 | 22 | test.x: test.c libtscircbuf.so 23 | $(CC) $(CFLAGS) -pthread -I. -L. '-Wl,-rpath=$$ORIGIN' -ltscircbuf -o $@ $< 24 | -------------------------------------------------------------------------------- /Concurrency/code/Y-Thread-Safe-CB/circbuf.c: -------------------------------------------------------------------------------- 1 | #include "circbuf.h" 2 | 3 | #include 4 | 5 | 6 | inline static void ptr_advance(const struct circbuf* cb, 7 | void** ptr) 8 | { 9 | char* ptr_c = (char*)(*ptr); 10 | char* const end_buffer = (char*)cb->buffer + cb->capacity * cb->el_size; 11 | ptr_c += cb->el_size; 12 | *ptr = ptr_c < end_buffer ? ptr_c : cb->buffer; 13 | } 14 | 15 | 16 | void cb_init(struct circbuf* cb, 17 | void* buf, 18 | size_t el_size, 19 | size_t capacity) 20 | { 21 | cb->buffer = buf; 22 | cb->el_size = el_size; 23 | cb->capacity = capacity; 24 | 25 | cb->head = buf; 26 | cb->tail = buf; 27 | cb->size = 0; 28 | } 29 | 30 | void cb_unset(struct circbuf* cb) 31 | { 32 | cb_init(cb, NULL, 0, 0); 33 | } 34 | 35 | size_t cb_size(const struct circbuf* cb) 36 | { 37 | return cb->size; 38 | } 39 | 40 | size_t cb_capacity(const struct circbuf* cb) 41 | { 42 | return cb->capacity; 43 | } 44 | 45 | bool cb_push(struct circbuf* cb, const void* el) 46 | { 47 | if (cb->size == cb->capacity) 48 | return false; 49 | 50 | memcpy(cb->head, el, cb->el_size); 51 | ptr_advance(cb, &cb->head); 52 | ++cb->size; 53 | return true; 54 | } 55 | 56 | bool cb_pop(struct circbuf* cb, void* el) 57 | { 58 | if (!cb->size) 59 | return 0; 60 | 61 | memcpy(el, cb->tail, cb->el_size); 62 | ptr_advance(cb, &cb->tail); 63 | --cb->size; 64 | return true; 65 | } 66 | 67 | 68 | #ifdef DEBUG 69 | 70 | ptrdiff_t cb_head_offset(const struct circbuf* cb) 71 | { 72 | ptrdiff_t dptr = (char*)cb->head - (char*)cb->buffer; 73 | return dptr / cb->el_size; 74 | } 75 | 76 | ptrdiff_t cb_tail_offset(const struct circbuf* cb) 77 | { 78 | ptrdiff_t dptr = (char*)cb->tail - (char*)cb->buffer; 79 | return dptr / cb->el_size; 80 | } 81 | 82 | #endif //DEBUG 83 | -------------------------------------------------------------------------------- /Concurrency/code/Y-Thread-Safe-CB/circbuf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct circbuf { 7 | void* buffer; 8 | size_t el_size; 9 | size_t capacity; 10 | 11 | void* head; 12 | void* tail; 13 | size_t size; 14 | }; 15 | 16 | 17 | void cb_init(struct circbuf* cb, 18 | void* buf, 19 | size_t el_size, 20 | size_t capacity); 21 | void cb_unset(struct circbuf* cb); 22 | size_t cb_size(const struct circbuf* cb); 23 | size_t cb_capacity(const struct circbuf* cb); 24 | bool cb_push(struct circbuf* cb, const void* el); 25 | bool cb_pop(struct circbuf* cb, void* el); 26 | 27 | #ifdef DEBUG 28 | ptrdiff_t cb_head_offset(const struct circbuf* cb); 29 | ptrdiff_t cb_tail_offset(const struct circbuf* cb); 30 | #endif //DEBUG 31 | -------------------------------------------------------------------------------- /Concurrency/code/Y-Thread-Safe-CB/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "tscircbuf.h" 7 | 8 | #define DEFAULT_LEN 1000 9 | #define DEFAULT_PRODUCERS 4 10 | #define DEFAULT_CONSUMERS 4 11 | 12 | #define TRUE (void*)true 13 | #define FALSE (void*)false 14 | 15 | 16 | void print_test_result(size_t n, const char* name, bool passed) 17 | { 18 | printf("TEST %2lu: %-50s [%.6s]\n", 19 | n, 20 | name, 21 | passed ? "PASSED" : "FAILED"); 22 | } 23 | 24 | 25 | void* producer_1(void* vtscb) 26 | { 27 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 28 | double el = 7.0; 29 | 30 | sleep(1); 31 | if (!tscb_try_push(tscb, &el)) 32 | return FALSE; 33 | 34 | return TRUE; 35 | } 36 | 37 | 38 | void* consumer_1(void* vtscb) 39 | { 40 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 41 | double el; 42 | 43 | if (tscb_size(tscb)) // BEWARE: fragile! sleep-safe! 44 | return FALSE; 45 | 46 | if (!tscb_wait_and_pop(tscb, &el)) 47 | return FALSE; 48 | 49 | if (el != 7.0) 50 | return FALSE; 51 | 52 | if (tscb_size(tscb)) 53 | return FALSE; 54 | 55 | return TRUE; 56 | } 57 | 58 | 59 | void* producer_2(void* vtscb) 60 | { 61 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 62 | double el = 0.0; 63 | 64 | for (size_t i = 0; i < DEFAULT_LEN + 1; ++i) { 65 | if (!tscb_wait_and_push(tscb, &el)) { 66 | return FALSE; 67 | } 68 | ++el; 69 | } 70 | 71 | return TRUE; 72 | } 73 | 74 | 75 | void* consumer_2(void* vtscb) 76 | { 77 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 78 | double el; 79 | 80 | sleep(1); 81 | 82 | for (size_t i = 0; i < DEFAULT_LEN; ++i) { 83 | if (!tscb_try_pop(tscb, &el)) 84 | return FALSE; 85 | 86 | if (el != (double)i) 87 | return FALSE; 88 | } 89 | 90 | sleep(1); 91 | 92 | if (!tscb_try_pop(tscb, &el)) 93 | return FALSE; 94 | 95 | if (el != (double)DEFAULT_LEN) 96 | return FALSE; 97 | 98 | if (tscb_size(tscb)) 99 | return FALSE; 100 | 101 | return TRUE; 102 | } 103 | 104 | 105 | void* producer_3(void* vtscb) 106 | { 107 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 108 | double el = 0.0; 109 | 110 | for (size_t i = 0; i < DEFAULT_LEN / DEFAULT_PRODUCERS; ++i) { 111 | if (!tscb_try_push(tscb, &el)) { 112 | printf("producer_3 fail: %ld\n", i); 113 | return FALSE; 114 | } 115 | } 116 | 117 | return TRUE; 118 | } 119 | 120 | 121 | void* consumer_3(void* vtscb) 122 | { 123 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 124 | double el; 125 | 126 | for (size_t i = 0; i < DEFAULT_LEN / DEFAULT_CONSUMERS; ++i) { 127 | if (!tscb_wait_and_pop(tscb, &el)) { 128 | printf("consumer_3 fail: %ld\n", i); 129 | return FALSE; 130 | } 131 | } 132 | 133 | return TRUE; 134 | } 135 | 136 | 137 | void* producer_4(void* vtscb) 138 | { 139 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 140 | 141 | sleep(5); 142 | tscb_abort_wait(tscb); 143 | 144 | return TRUE; 145 | } 146 | 147 | 148 | void* consumer_4(void* vtscb) 149 | { 150 | struct tscircbuf* tscb = (struct tscircbuf*)vtscb; 151 | double el; 152 | 153 | tscb_wait_and_pop(tscb, &el); 154 | 155 | return TRUE; 156 | } 157 | 158 | 159 | int main(int argc, char* argv[]) 160 | { 161 | int retcode = EXIT_FAILURE; 162 | 163 | int len = DEFAULT_LEN; 164 | if (argc == 2) 165 | len = atoi(argv[1]); 166 | 167 | 168 | double* buffer = (double*)malloc(len * sizeof(double)); 169 | if (!buffer) { 170 | fprintf(stderr, "Impossible to allocate buffer.\n"); 171 | return retcode; 172 | } 173 | 174 | struct tscircbuf tscb; 175 | if (!tscb_init(&tscb, buffer, sizeof(double), len)) 176 | goto free_buffer; 177 | 178 | 179 | pthread_t producers[DEFAULT_PRODUCERS], consumers[DEFAULT_CONSUMERS]; 180 | 181 | pthread_create(&producers[0], NULL, producer_1, (void*)&tscb); 182 | pthread_create(&consumers[0], NULL, consumer_1, (void*)&tscb); 183 | 184 | void *ret_consumer, *ret_producer; 185 | pthread_join(consumers[0], &ret_consumer); 186 | pthread_join(producers[0], &ret_producer); 187 | 188 | bool passed = (bool)ret_producer & (bool)ret_consumer; 189 | print_test_result(1, "producer: try, consumer: wait", passed); 190 | 191 | 192 | pthread_create(&producers[0], NULL, producer_2, (void*)&tscb); 193 | pthread_create(&consumers[0], NULL, consumer_2, (void*)&tscb); 194 | 195 | pthread_join(consumers[0], &ret_consumer); 196 | pthread_join(producers[0], &ret_producer); 197 | 198 | passed = (bool)ret_producer & (bool)ret_consumer; 199 | print_test_result(2, "producer: wait, consumer: try", passed); 200 | 201 | 202 | for (size_t i = 0; i < DEFAULT_PRODUCERS; ++i) 203 | pthread_create(&producers[i], NULL, producer_3, (void*)&tscb); 204 | 205 | passed = true; 206 | for (size_t i = 0; i < DEFAULT_PRODUCERS; ++i) { 207 | pthread_join(producers[i], &ret_producer); 208 | passed &= (bool)ret_producer; 209 | } 210 | 211 | size_t expected_size = DEFAULT_LEN / DEFAULT_PRODUCERS * DEFAULT_PRODUCERS; 212 | passed &= (tscb_size(&tscb) == expected_size); 213 | 214 | for (size_t i = 0; i < DEFAULT_CONSUMERS; ++i) { 215 | pthread_create(&consumers[i], NULL, consumer_3, (void*)&tscb); 216 | } 217 | 218 | for (size_t i = 0; i < DEFAULT_CONSUMERS; ++i) { 219 | pthread_join(consumers[i], &ret_consumer); 220 | passed &= (bool)ret_consumer; 221 | } 222 | 223 | expected_size -= DEFAULT_LEN / DEFAULT_CONSUMERS * DEFAULT_CONSUMERS; 224 | size_t actual_size = tscb_size(&tscb); 225 | passed &= (actual_size == expected_size); 226 | 227 | print_test_result(3, "multiple producers: try, multiple consumers: wait", passed); 228 | 229 | for (size_t i = 0; i < actual_size; ++i) { 230 | double el; 231 | if (!tscb_try_pop(&tscb, &el)) 232 | break; 233 | } 234 | 235 | passed = tscb_size(&tscb) == 0; 236 | 237 | print_test_result(4, "empty buffer", passed); 238 | 239 | 240 | pthread_create(&producers[0], NULL, producer_4, (void*)&tscb); 241 | pthread_create(&consumers[0], NULL, consumer_4, (void*)&tscb); 242 | pthread_create(&consumers[1], NULL, consumer_4, (void*)&tscb); 243 | 244 | pthread_join(consumers[1], &ret_consumer); 245 | passed = (bool)ret_consumer; 246 | pthread_join(consumers[0], &ret_consumer); 247 | passed &= (bool)ret_consumer; 248 | pthread_join(producers[0], &ret_producer); 249 | passed &= (bool)ret_producer; 250 | 251 | print_test_result(5, "abort wait", passed); 252 | 253 | 254 | tscb_unset(&tscb); 255 | 256 | free_buffer: 257 | free(buffer); 258 | 259 | return retcode; 260 | } 261 | -------------------------------------------------------------------------------- /Concurrency/code/Y-Thread-Safe-CB/tscircbuf.c: -------------------------------------------------------------------------------- 1 | #include "tscircbuf.h" 2 | 3 | 4 | bool tscb_init(struct tscircbuf* tscb, 5 | void* buf, 6 | size_t el_size, 7 | size_t capacity) 8 | { 9 | cb_init(&tscb->cb, buf, el_size, capacity); 10 | 11 | if (pthread_mutex_init(&tscb->mtx, NULL)) 12 | return false; 13 | 14 | if (pthread_cond_init(&tscb->cv, NULL)) 15 | return false; 16 | 17 | tscb->abort = false; 18 | 19 | return true; 20 | } 21 | 22 | 23 | bool tscb_unset(struct tscircbuf* tscb) 24 | { 25 | if (pthread_cond_destroy(&tscb->cv)) 26 | return false; 27 | 28 | if (pthread_mutex_destroy(&tscb->mtx)) 29 | return false; 30 | 31 | tscb->abort = true; 32 | 33 | cb_unset(&tscb->cb); 34 | 35 | return true; 36 | } 37 | 38 | 39 | size_t tscb_size(struct tscircbuf* tscb) 40 | { 41 | pthread_mutex_lock(&tscb->mtx); 42 | size_t size = cb_size(&tscb->cb); 43 | pthread_mutex_unlock(&tscb->mtx); 44 | 45 | return size; 46 | } 47 | 48 | 49 | size_t tscb_capacity(const struct tscircbuf* tscb) 50 | { 51 | return cb_capacity(&tscb->cb); 52 | } 53 | 54 | 55 | bool tscb_try_push(struct tscircbuf* tscb, const void* el) 56 | { 57 | pthread_mutex_lock(&tscb->mtx); 58 | 59 | if (cb_size(&tscb->cb) == cb_capacity(&tscb->cb)) { 60 | pthread_mutex_unlock(&tscb->mtx); 61 | return false; 62 | } 63 | 64 | bool retval = cb_push(&tscb->cb, el); 65 | pthread_mutex_unlock(&tscb->mtx); 66 | pthread_cond_broadcast(&tscb->cv); 67 | 68 | return retval; 69 | } 70 | 71 | 72 | bool tscb_wait_and_push(struct tscircbuf* tscb, const void* el) 73 | { 74 | pthread_mutex_lock(&tscb->mtx); 75 | while (cb_size(&tscb->cb) == cb_capacity(&tscb->cb) && !tscb->abort) 76 | pthread_cond_wait(&tscb->cv, &tscb->mtx); 77 | 78 | if (tscb->abort) { 79 | pthread_mutex_unlock(&tscb->mtx); 80 | return false; 81 | } 82 | 83 | bool retval = cb_push(&tscb->cb, el); 84 | pthread_mutex_unlock(&tscb->mtx); 85 | pthread_cond_broadcast(&tscb->cv); 86 | 87 | return retval; 88 | } 89 | 90 | 91 | bool tscb_try_pop(struct tscircbuf* tscb, void* el) 92 | { 93 | pthread_mutex_lock(&tscb->mtx); 94 | 95 | if (cb_size(&tscb->cb) == 0) { 96 | pthread_mutex_unlock(&tscb->mtx); 97 | return false; 98 | } 99 | 100 | bool retval = cb_pop(&tscb->cb, el); 101 | pthread_mutex_unlock(&tscb->mtx); 102 | pthread_cond_broadcast(&tscb->cv); 103 | 104 | return retval; 105 | } 106 | 107 | 108 | bool tscb_wait_and_pop(struct tscircbuf* tscb, void* el) 109 | { 110 | pthread_mutex_lock(&tscb->mtx); 111 | while (cb_size(&tscb->cb) == 0 && !tscb->abort) 112 | pthread_cond_wait(&tscb->cv, &tscb->mtx); 113 | 114 | if (tscb->abort) { 115 | pthread_mutex_unlock(&tscb->mtx); 116 | return false; 117 | } 118 | 119 | bool retval = cb_pop(&tscb->cb, el); 120 | pthread_mutex_unlock(&tscb->mtx); 121 | pthread_cond_broadcast(&tscb->cv); 122 | 123 | return retval; 124 | } 125 | 126 | 127 | void tscb_abort_wait(struct tscircbuf* tscb) 128 | { 129 | pthread_mutex_lock(&tscb->mtx); 130 | tscb->abort = true; 131 | pthread_mutex_unlock(&tscb->mtx); 132 | 133 | pthread_cond_broadcast(&tscb->cv); 134 | } 135 | 136 | 137 | void tscb_reset_abort(struct tscircbuf* tscb) 138 | { 139 | pthread_mutex_lock(&tscb->mtx); 140 | tscb->abort = false; 141 | pthread_mutex_unlock(&tscb->mtx); 142 | 143 | pthread_cond_broadcast(&tscb->cv); 144 | } 145 | 146 | -------------------------------------------------------------------------------- /Concurrency/code/Y-Thread-Safe-CB/tscircbuf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "circbuf.h" 4 | #include 5 | 6 | 7 | struct tscircbuf { 8 | struct circbuf cb; 9 | pthread_mutex_t mtx; 10 | pthread_cond_t cv; 11 | bool abort; 12 | }; 13 | 14 | bool tscb_init(struct tscircbuf* tscb, 15 | void* buf, 16 | size_t el_size, 17 | size_t capacity); 18 | bool tscb_unset(struct tscircbuf* tscb); 19 | size_t tscb_size(struct tscircbuf* tscb); 20 | size_t tscb_capacity(const struct tscircbuf* tscb); 21 | bool tscb_try_push(struct tscircbuf* tscb, const void* el); 22 | bool tscb_wait_and_push(struct tscircbuf* tscb, const void* el); 23 | bool tscb_try_pop(struct tscircbuf* tscb, void* el); 24 | bool tscb_wait_and_pop(struct tscircbuf* tscb, void* el); 25 | void tscb_abort_wait(struct tscircbuf* tscb); 26 | void tscb_reset_abort(struct tscircbuf* tscb); 27 | -------------------------------------------------------------------------------- /Concurrency/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /GPU/DSSC-EXAME-README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/DSSC-EXAME-README.pdf -------------------------------------------------------------------------------- /GPU/Jacobi-project/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/.DS_Store -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/background.html: -------------------------------------------------------------------------------- 1 |

Background

2 |

The algorithm used in this program solves Laplace’s equation on an evenly spaced grid through the use of a simple Jacobi iteration technique. The equation is a partial differential equation named after Pierre-Simon Laplace and are important in many fields of science: namely the fields of electromagnetism, astronomy and fluid dynamics. The physical interpretation of the equations is they describe the behavior of potentials.

3 |

The equation has the form: Equation 1

4 |

A practical solution to this equation is the use of a Jacobi iteration that employs numerical second derivatives. Lets assume that we would like to know the state of heat flow across a metal surface where the source is coming from one of the corners of the square surface.

5 |

To tackle this, we would set up a two dimensional grid to represent the surface, and we will divide it evenly into square regions. We can simulate the heat source by defining the boundary conditions along the sides of the grid. In this case, we will be setting the bottom left corner to 100.0 and with an increasing gradient toward the other corners until it is zero. Once these conditions are set, the algorithm will use numerical solutions to the second derivatives in each direction to update the current matrix elements. And although we won’t check for convergence, the flow of the surface will eventually hit a steady-state.

6 |

The Algorithm

7 |

Following is a high-level description of the algorithm you will be implementing. Figure 1 shows a diagram of the grid that will result from the algorithm.

8 |
    9 |
  1. Allocate and specify a 2D array defining an evenly spaced grid of square dimension. You will need to leave space for the boundaries, as they do not belong to the main grid (i.e. a 1024 x 1024 matrix would need to be allocated as 1026x1026 to leave room for the borders.

  2. 10 |
  3. Setup the initial constant boundary conditions. The value at the lower left hand corner of the of the grid will be fixed at 100.00, and the value ascending and to the right will be set to a linear gradient reaching zero at the opposite corners (see Figure 1). The rest of the borders will be fixed at zero. Please note, these boundaries will remain constant throughout the simulation.

  4. 11 |
  5. Setup the initial condition of the inner grid elements as 0.5.

  6. 12 |
  7. Begin and continue for a fixed number of cycles the iterative process. At each iteration, the value of each inner matrix element needs to be recomputed from elements of the current iteration. The updating formula, based on numerical computation of second derivatives, is:

    13 |
    14 | Equation 2
    Equation 2
    15 |
  8. 16 |
  9. After updating, copy the new matrix into the old’s memory and continue iterations until completion.

  10. 17 |
18 |
19 | Figure1
Figure1
20 |
21 | -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/background.md: -------------------------------------------------------------------------------- 1 | ## Background 2 | 3 | The algorithm used in this program solves Laplace’s equation on an 4 | evenly spaced grid through the use of a simple Jacobi iteration 5 | technique. The equation is a partial differential equation named after 6 | Pierre-Simon Laplace and are important in many fields of science: namely 7 | the fields of electromagnetism, astronomy and fluid dynamics. The 8 | physical interpretation of the equations is they describe the behavior 9 | of potentials. 10 | 11 | The equation has the form: 12 | ![Equation 1](./jacobiEq1.jpg) 13 | 14 | A practical solution to this equation is the use of a Jacobi iteration 15 | that employs numerical second derivatives. Lets assume that we would 16 | like to know the state of heat flow across a metal surface where the 17 | source is coming from one of the corners of the square surface. 18 | 19 | To tackle this, we would set up a two dimensional grid to represent the 20 | surface, and we will divide it evenly into square regions. We can 21 | simulate the heat source by defining the boundary conditions along the 22 | sides of the grid. In this case, we will be setting the bottom left 23 | corner to 100.0 and with an increasing gradient toward the other corners 24 | until it is zero. Once these conditions are set, the algorithm will use 25 | numerical solutions to the second derivatives in each direction to 26 | update the current matrix elements. And although we won’t check for 27 | convergence, the flow of the surface will eventually hit a 28 | steady-state. 29 | 30 | 31 | ### The Algorithm 32 | 33 | Following is a high-level description of the algorithm you will be 34 | implementing. Figure 1 shows a diagram of the grid that will result from 35 | the algorithm. 36 | 37 | 1. Allocate and specify a 2D array defining an evenly spaced grid of 38 | square dimension. You will need to leave space for the boundaries, 39 | as they do not belong to the main grid (i.e. a 1024 x 1024 matrix 40 | would need to be allocated as 1026x1026 to leave room for the 41 | borders. 42 | 43 | 2. Setup the initial constant boundary conditions. The value at the 44 | lower left hand corner of the of the grid will be fixed at 100.00, 45 | and the value ascending and to the right will be set to a linear 46 | gradient reaching zero at the opposite corners (see Figure 1). The 47 | rest of the borders will be fixed at zero. Please note, these 48 | boundaries will remain constant throughout the simulation. 49 | 50 | 3. Setup the initial condition of the inner grid elements as 0.5. 51 | 52 | 4. Begin and continue for a fixed number of cycles the iterative 53 | process. At each iteration, the value of each inner matrix element 54 | needs to be recomputed from elements of the current iteration. The 55 | updating formula, based on numerical computation of second 56 | derivatives, is: 57 | 58 | ![Equation 2](eqn.PNG) 59 | 60 | 5. After updating, copy the new matrix into the old's memory and 61 | continue iterations until completion. 62 | 63 | ![Figure1](jacobiFigure1.jpg) 64 | 65 | 66 | -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/eqn.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/eqn.PNG -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/hints.html: -------------------------------------------------------------------------------- 1 |

MPI - 1D Decomposition

2 |

Assignments

3 |

The parameters of the algorithm are such:

4 |
    5 |
  1. The grid matrix must be completely distributed, no replicating the matrix on all processors. In this exercise, only use a 1 dimensional decomposition (see Figure 2).

    6 |
    7 | Figure 2
    Figure 2
    8 |
  2. 9 |
  3. The whole process must be parallel, that includes initialization of the grid and boundary conditions, the iterative evolution and the final dump on file of the resulting grid.

  4. 10 |
  5. Implement an efficient data exchange between processes.

  6. 11 |
  7. Handle dimensions even if not multiple of the number of processes (optional for merit).

  8. 12 |
13 |

Here is a guideline for the process that parallel programmers use to do this:

14 |
    15 |
  1. Study the serial algorithm and see where parallelism can be exploited. Also think about how the data can be divided. Best way to do this is on a piece of paper, drawing out the layout conceptually before you even touch the code.

  2. 16 |
  3. Still on paper, figure out how this conceptualization moves to being expressed in the parallel programming language you want to use. What MPI calls do you need to use? Which processors will be doing what work? STILL ON PAPER.

  4. 17 |
  5. Now begin programming the algorithm up in MPI.

  6. 18 |
  7. Test the program on a small matrix and processor count to make sure it is doing what you expect it to do.

  8. 19 |
  9. Once you are satisfied it works, scale it up.

  10. 20 |
21 |

With this in mind, go through this process to implement a 1-D decomposition of the Jacobi iteration algorithm.

22 |

Tips

23 |
    24 |
  • To set up the initial matrix, you will need to figure out which values go in what chunk of the distributed matrix. Think carefully about the data that each parallel chunk of work needs to have on it.

  • 25 |
  • Notice the value of each matrix element depends on the adjacent elements from the previous matrix. In the distributed matrix, this has consequences for the boundary elements, in that if you straightforwardly divide the matrix up by rows, elements that are needed to compute a matrix element will reside on a different processor. Think carefully about how to allocate the piece of the matrix on the current processor, and what communication needs to be performed before computing the matrix elements. Figure 2. is an illustration of one communication patter that can be used.

  • 26 |
  • It is requested to write a function that will print the distributed matrix, so that you have the ability to check to see if things are going the way you want.

  • 27 |
  • To perform a data exchange with a “dummy” process you can use MPI_PROC_NULL

  • 28 |
  • A reference of MPI routines can be found at: http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node182.html

  • 29 |
30 | -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/hints.md: -------------------------------------------------------------------------------- 1 | ## MPI - 1D Decomposition 2 | 3 | ### Assignments 4 | 5 | The parameters of the algorithm are such: 6 | 7 | 8 | 1. The grid matrix must be completely distributed, no replicating the 9 | matrix on all processors. In this exercise, only use a 1 dimensional decomposition (see 10 | [Figure 2](#Figure_2)). 11 | 12 | ![Figure 2](jacobiFigure2.jpg) 13 | 14 | 2. The whole process must be parallel, that includes initialization of 15 | the grid and boundary conditions, the iterative evolution and the final dump on file of the resulting grid. 16 | 17 | 3. Implement an efficient data exchange between processes. 18 | 19 | 4. Handle dimensions even if not multiple of the number of processes. 20 | 21 | Here is a guideline for the process that parallel programmers use to do 22 | this: 23 | 24 | 1. Study the serial algorithm and see where parallelism can be 25 | exploited. Also think about how the data can be divided. Best way 26 | to do this is on a piece of paper, drawing out the layout 27 | conceptually before you even touch the code. 28 | 29 | 2. Still on paper, figure out how this conceptualization moves to being 30 | expressed in the parallel programming language you want to use. 31 | What MPI calls do you need to use? Which processors will be doing 32 | what work? STILL ON PAPER. 33 | 34 | 3. Now begin programming the algorithm up in MPI. 35 | 36 | 4. Test the program on a small matrix and processor count to make sure 37 | it is doing what you expect it to do. 38 | 39 | 5. Once you are satisfied it works, scale it up. 40 | 41 | With this in mind, go through this process to implement a 1-D 42 | decomposition of the Jacobi iteration algorithm. 43 | 44 | 45 | ### Tips 46 | 47 | - To set up the initial matrix, you will need to figure out which 48 | values go in what chunk of the distributed matrix. Think carefully 49 | about the data that each parallel chunk of work needs to have on 50 | it. 51 | 52 | - Notice the value of each matrix element depends on the adjacent 53 | elements from the previous matrix. In the distributed matrix, this 54 | has consequences for the boundary elements, in that if you 55 | straightforwardly divide the matrix up by rows, elements that are 56 | needed to compute a matrix element will reside on a different 57 | processor. Think carefully about how to allocate the piece of the 58 | matrix on the current processor, and what communication needs to be 59 | performed before computing the matrix elements. [Figure 60 | 2](#Figure_2). is an illustration of one communication patter that 61 | can be used. 62 | 63 | 64 | - It is requested to write a function that will print the 65 | distributed matrix, so that you have the ability to check to see 66 | if things are going the way you want. 67 | 68 | - To perform a data exchange with a “dummy” process you can use 69 | [MPI_PROC_NULL](http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node53.html) 70 | 71 | - A reference of MPI routines can be found at: 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/jacobiEq1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/jacobiEq1.jpg -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/jacobiFigure1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/jacobiFigure1.jpg -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/jacobiFigure2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/jacobiFigure2.jpg -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/ref2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/ref2.png -------------------------------------------------------------------------------- /GPU/Jacobi-project/aux/ref_Init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/ref_Init.png -------------------------------------------------------------------------------- /GPU/Jacobi-project/code/Makefile: -------------------------------------------------------------------------------- 1 | # inputs for the executables 2 | 3 | dim = 10 4 | it = 100 5 | r = 3 6 | c = 3 7 | 8 | CC=cc 9 | CFLAGS=-O3 10 | 11 | SRCS=$(wildcard *.c) 12 | EXE=$(SRCS:.c=.x) 13 | 14 | all: $(EXE) 15 | 16 | run: clean all 17 | ./$(EXE) $(dim) $(it) $(r) $(c) 18 | 19 | %.x: %.c 20 | $(CC) $< $(CFLAGS) -o $@ 21 | 22 | clean: 23 | @rm -f *~ $(EXE) solution.dat 24 | 25 | plot: 26 | @gnuplot -p plot.plt 27 | 28 | .PHONY: clean plot all 29 | 30 | -------------------------------------------------------------------------------- /GPU/Jacobi-project/code/jacobi.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | /*** function declarations ***/ 8 | 9 | // save matrix to file 10 | void save_gnuplot( double *M, size_t dim ); 11 | 12 | // evolve Jacobi 13 | void evolve( double * matrix, double *matrix_new, size_t dimension ); 14 | 15 | // return the elapsed time 16 | double seconds( void ); 17 | 18 | /*** end function declaration ***/ 19 | 20 | int main(int argc, char* argv[]){ 21 | 22 | // timing variables 23 | double t_start, t_end, increment; 24 | 25 | // indexes for loops 26 | size_t i, j, it; 27 | 28 | // initialize matrix 29 | double *matrix, *matrix_new, *tmp_matrix; 30 | 31 | size_t dimension = 0, iterations = 0, row_peek = 0, col_peek = 0; 32 | size_t byte_dimension = 0; 33 | 34 | // check on input parameters 35 | if(argc != 5) { 36 | fprintf(stderr,"\nwrong number of arguments. Usage: ./a.out dim it n m\n"); 37 | return 1; 38 | } 39 | 40 | dimension = atoi(argv[1]); 41 | iterations = atoi(argv[2]); 42 | row_peek = atoi(argv[3]); 43 | col_peek = atoi(argv[4]); 44 | 45 | printf("matrix size = %zu\n", dimension); 46 | printf("number of iterations = %zu\n", iterations); 47 | printf("element for checking = Mat[%zu,%zu]\n",row_peek, col_peek); 48 | 49 | if((row_peek > dimension) || (col_peek > dimension)){ 50 | fprintf(stderr, "Cannot Peek a matrix element outside of the matrix dimension\n"); 51 | fprintf(stderr, "Arguments n and m must be smaller than %zu\n", dimension); 52 | return 1; 53 | } 54 | 55 | 56 | byte_dimension = sizeof(double) * ( dimension + 2 ) * ( dimension + 2 ); 57 | matrix = ( double* )malloc( byte_dimension ); 58 | matrix_new = ( double* )malloc( byte_dimension ); 59 | 60 | memset( matrix, 0, byte_dimension ); 61 | memset( matrix_new, 0, byte_dimension ); 62 | 63 | //fill initial values 64 | for( i = 1; i <= dimension; ++i ) 65 | for( j = 1; j <= dimension; ++j ) 66 | matrix[ ( i * ( dimension + 2 ) ) + j ] = 0.5; 67 | 68 | // set up borders 69 | increment = 100.0 / ( dimension + 1 ); 70 | 71 | for( i=1; i <= dimension+1; ++i ){ 72 | matrix[ i * ( dimension + 2 ) ] = i * increment; 73 | matrix[ ( ( dimension + 1 ) * ( dimension + 2 ) ) + ( dimension + 1 - i ) ] = i * increment; 74 | matrix_new[ i * ( dimension + 2 ) ] = i * increment; 75 | matrix_new[ ( ( dimension + 1 ) * ( dimension + 2 ) ) + ( dimension + 1 - i ) ] = i * increment; 76 | } 77 | 78 | // start algorithm 79 | t_start = seconds(); 80 | for( it = 0; it < iterations; ++it ){ 81 | 82 | evolve( matrix, matrix_new, dimension ); 83 | 84 | // swap the pointers 85 | tmp_matrix = matrix; 86 | matrix = matrix_new; 87 | matrix_new = tmp_matrix; 88 | 89 | } 90 | t_end = seconds(); 91 | 92 | printf( "\nelapsed time = %f seconds\n", t_end - t_start ); 93 | printf( "\nmatrix[%zu,%zu] = %f\n", row_peek, col_peek, matrix[ ( row_peek + 1 ) * ( dimension + 2 ) + ( col_peek + 1 ) ] ); 94 | 95 | save_gnuplot( matrix, dimension ); 96 | 97 | free( matrix ); 98 | free( matrix_new ); 99 | 100 | return 0; 101 | } 102 | 103 | void evolve( double * matrix, double *matrix_new, size_t dimension ){ 104 | 105 | size_t i , j; 106 | 107 | //This will be a row dominant program. 108 | for( i = 1 ; i <= dimension; ++i ) 109 | for( j = 1; j <= dimension; ++j ) 110 | matrix_new[ ( i * ( dimension + 2 ) ) + j ] = ( 0.25 ) * 111 | ( matrix[ ( ( i - 1 ) * ( dimension + 2 ) ) + j ] + 112 | matrix[ ( i * ( dimension + 2 ) ) + ( j + 1 ) ] + 113 | matrix[ ( ( i + 1 ) * ( dimension + 2 ) ) + j ] + 114 | matrix[ ( i * ( dimension + 2 ) ) + ( j - 1 ) ] ); 115 | } 116 | 117 | void save_gnuplot( double *M, size_t dimension ){ 118 | 119 | size_t i , j; 120 | const double h = 0.1; 121 | FILE *file; 122 | 123 | file = fopen( "solution.dat", "w" ); 124 | 125 | for( i = 0; i < dimension + 2; ++i ) 126 | for( j = 0; j < dimension + 2; ++j ) 127 | fprintf(file, "%f\t%f\t%f\n", h * j, -h * i, M[ ( i * ( dimension + 2 ) ) + j ] ); 128 | 129 | fclose( file ); 130 | 131 | } 132 | 133 | // A Simple timer for measuring the walltime 134 | double seconds(){ 135 | 136 | struct timeval tmp; 137 | double sec; 138 | gettimeofday( &tmp, (struct timezone *)0 ); 139 | sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0; 140 | return sec; 141 | } 142 | 143 | -------------------------------------------------------------------------------- /GPU/Jacobi-project/code/plot.plt: -------------------------------------------------------------------------------- 1 | unset colorbox 2 | set palette rgb 33,13,10 3 | set size square 4 | plot 'solution.dat' with image -------------------------------------------------------------------------------- /GPU/Jacobi-project/readme.md: -------------------------------------------------------------------------------- 1 | # Laplace Equation by Jacobi method 2 | ## Background 3 | 4 | Please refer to [background](./aux/background.md) 5 | 6 | ## Exercises 7 | 1. Parallelize and optimize your C++ Jacobi code version following 8 | [**these assignments**](./aux/hints.md) 9 | 10 | 2. Perform a performance analysis of the code scaling, provide 11 | scalability charts and a brief explanation (matrix size 1200 and 12 | 12000, 10 iterations) 13 | 14 | 15 | ## Reference result (matrix size 60, 2000 iterations) 16 | ### Initial distribution of temperature 17 | 18 | Drawing 19 | 20 | ### Final distribution of temperature (after 2000 iterations) 21 | 22 | Drawing 23 | -------------------------------------------------------------------------------- /GPU/par_transp/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #ifdef DEBUG 7 | #define SIZE 8 8 | #else 9 | #define SIZE 32000 10 | #endif 11 | 12 | void print_loc( double * mat, int loc_size ){ 13 | 14 | int i, j; 15 | for( i = 0; i < loc_size; i++ ){ 16 | for( j = 0; j < SIZE; j++ ){ 17 | fprintf( stdout, "%.3g ", mat[ j + ( i * SIZE ) ] ); 18 | } 19 | fprintf( stdout, "\n" ); 20 | 21 | } 22 | } 23 | 24 | void print_par( double * mat, int loc_size, int rank, int npes ){ 25 | 26 | int count; 27 | 28 | if( rank ) MPI_Send( mat, loc_size * SIZE, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD ); 29 | else{ 30 | 31 | double * buf = (double *) calloc( loc_size * SIZE, sizeof(double) ); 32 | print_loc( mat, loc_size ); 33 | for( count = 1; count < npes; count ++){ 34 | MPI_Recv( buf, loc_size * SIZE, MPI_DOUBLE, count, count, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); 35 | print_loc( buf, loc_size ); 36 | } 37 | free( buf ); 38 | } 39 | } 40 | 41 | int main( int argc, char * argv[] ){ 42 | 43 | int npes, rank; 44 | int i, j, count; 45 | double * mat, * buf; 46 | 47 | MPI_Init( &argc, & argv ); 48 | MPI_Comm_size( MPI_COMM_WORLD, &npes ); 49 | MPI_Comm_rank( MPI_COMM_WORLD, &rank ); 50 | 51 | #ifdef OPENACC 52 | int ngpu = acc_get_num_devices(acc_device_nvidia); 53 | int igpu = rank % ngpu; 54 | acc_set_device_num(igpu, acc_device_nvidia); 55 | acc_init(acc_device_nvidia); 56 | if( !rank ) fprintf(stdout, "NUM GPU: %d\n", ngpu); 57 | fprintf(stdout, "GPU ID: %d, PID: %d\n", igpu, rank); 58 | fflush( stdout ); 59 | #endif 60 | 61 | MPI_Barrier( MPI_COMM_WORLD ); 62 | 63 | int loc_size = SIZE / npes; 64 | 65 | mat = (double *) calloc( loc_size * SIZE, sizeof(double) ); 66 | buf = (double *) calloc( SIZE * loc_size, sizeof(double) ); 67 | 68 | #pragma acc enter data create ( mat[ 0 : loc_size * SIZE ], buf[ 0 : SIZE * loc_size ] ) 69 | 70 | #pragma acc parallel loop collapse(2) present( mat ) 71 | for( i = 0; i < loc_size; i++ ){ 72 | for( j = 0; j < SIZE; j++ ){ 73 | mat[ j + ( i * SIZE ) ] = j + ( ( ( rank * loc_size ) + i ) * SIZE ) ; 74 | } 75 | } 76 | 77 | #ifdef DEBUG 78 | #pragma acc update self ( mat[ 0 : loc_size * SIZE ] ) 79 | print_par( mat, loc_size, rank, npes ); 80 | #endif 81 | 82 | //1) prepare the contigous block of data for the All2all 83 | #pragma acc parallel loop collapse(3) present( mat, buf ) 84 | for( count = 0; count < npes; count ++ ){ 85 | for( i = 0; i < loc_size; i++ ){ 86 | for( j = 0; j < loc_size; j++ ){ 87 | int i_g = i + ( count * loc_size ); 88 | int j_g = j + ( count * loc_size ); 89 | buf[ j + ( i_g * loc_size ) ] = mat[ j_g + ( i * SIZE ) ]; 90 | } 91 | } 92 | } 93 | 94 | //2) perform all2all in place 95 | #pragma acc host_data use_device( buf ) 96 | MPI_Alltoall( MPI_IN_PLACE, loc_size * loc_size, MPI_DOUBLE, buf, loc_size * loc_size, MPI_DOUBLE, MPI_COMM_WORLD); 97 | 98 | //3) local_tranposition of data into blocks 99 | #pragma acc parallel loop collapse(3) present( mat, buf ) 100 | for( count = 0; count < npes; count ++ ){ 101 | for( i = 0; i < loc_size; i++ ){ 102 | for( j = 0; j < loc_size; j++ ){ 103 | int i_g = i + ( count * loc_size ); 104 | mat[ i_g + ( j * SIZE ) ] = buf[ j + ( i_g * loc_size ) ]; 105 | } 106 | } 107 | } 108 | 109 | #ifdef DEBUG 110 | #pragma acc update self ( mat[ 0 : loc_size * SIZE ] ) 111 | print_par( mat, loc_size, rank, npes ); 112 | #endif 113 | 114 | MPI_Finalize(); 115 | 116 | return 0; 117 | } 118 | 119 | 120 | -------------------------------------------------------------------------------- /GPU/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /HPC/codes/00_simple.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | int main( int argc, char **argv ) 42 | { 43 | #pragma omp parallel 44 | { 45 | 46 | #pragma omp single 47 | { 48 | printf( " »Yuk yuk, here is thread %d from " 49 | "within the single region\n", omp_get_thread_num() ); 50 | 51 | #pragma omp task 52 | { 53 | printf( "\tHi, here is thread %d " 54 | "running task A\n", omp_get_thread_num() ); 55 | } 56 | 57 | #pragma omp task 58 | { 59 | printf( "\tHi, here is thread %d " 60 | "running task B\n", omp_get_thread_num() ); 61 | } 62 | 63 | } 64 | 65 | printf(" :Hi, here is thread %d after the end " 66 | "of the single region, I was stuck waiting " 67 | "all the others\n", omp_get_thread_num() ); 68 | } 69 | 70 | return 0; 71 | 72 | } 73 | -------------------------------------------------------------------------------- /HPC/codes/00_simple_nowait.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | 42 | int main( int argc, char **argv ) 43 | { 44 | 45 | #pragma omp parallel 46 | { 47 | 48 | #pragma omp single nowait 49 | { 50 | printf( " »Yuk yuk, here is thread %d from " 51 | "within the single region\n", omp_get_thread_num() ); 52 | 53 | #pragma omp task 54 | { 55 | printf( "\tHi, here is thread %d " 56 | "running task A\n", omp_get_thread_num() ); 57 | } 58 | 59 | #pragma omp task 60 | { 61 | printf( "\tHi, here is thread %d " 62 | "running task B\n", omp_get_thread_num() ); 63 | } 64 | 65 | } 66 | 67 | printf(" :Hi, here is thread %d after the end " 68 | "of the single region, I'm stuck waiting " 69 | "all the others\n", omp_get_thread_num() ); 70 | } 71 | 72 | return 0; 73 | 74 | } 75 | -------------------------------------------------------------------------------- /HPC/codes/00_simple_taskwait.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | 42 | int main( int argc, char **argv ) 43 | { 44 | 45 | #pragma omp parallel 46 | { 47 | int me = omp_get_thread_num(); 48 | 49 | #pragma omp single nowait 50 | { 51 | printf( " »Yuk yuk, here is thread %d from " 52 | "within the single region\n", omp_get_thread_num() ); 53 | 54 | #pragma omp task 55 | { 56 | printf( "\tHi, here is thread %d " 57 | "running task A\n", omp_get_thread_num() ); 58 | } 59 | 60 | #pragma omp task 61 | { 62 | printf( "\tHi, here is thread %d " 63 | "running task B\n", omp_get_thread_num() ); 64 | } 65 | 66 | #pragma omp taskwait 67 | printf(" «Yuk yuk, it is still me, thread %d " 68 | "inside single region after all tasks ended\n", me); 69 | 70 | } 71 | 72 | printf(" :Hi, here is thread %d after the end " 73 | "of the single region, I'm stuck waiting " 74 | "all the others\n", omp_get_thread_num() ); 75 | } 76 | 77 | return 0; 78 | 79 | } 80 | -------------------------------------------------------------------------------- /HPC/codes/02_tasks.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #define N_default 2000 // how long is the main array 42 | #define max_default 2000 // the maximum argument to heavy_work_? functions 43 | 44 | #if defined(_OPENMP) 45 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 46 | (double)ts.tv_nsec * 1e-9) 47 | 48 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 49 | (double)myts.tv_nsec * 1e-9) 50 | 51 | #else 52 | 53 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 54 | (double)ts.tv_nsec * 1e-9) 55 | #endif 56 | 57 | #if !defined(NTHREADS) // threads for the first level of parallelism 58 | // this value should be equal to the number_of_sockets-1 59 | #define NTHREADS 3 60 | #endif 61 | 62 | 63 | 64 | #if defined(DEBUG) 65 | #define PRINTF(...) printf(__VA_ARGS__) 66 | #else 67 | #define PRINTF(...) 68 | #endif 69 | 70 | typedef unsigned int uint; 71 | double heavy_work_0( uint ); 72 | double heavy_work_1( uint ); 73 | double heavy_work_2( uint ); 74 | 75 | 76 | 77 | int main( int argc, char **argv ) 78 | { 79 | 80 | int N = N_default; 81 | int max_value = max_default; 82 | int nthreads = 1; 83 | 84 | struct timespec ts; 85 | 86 | /* ----------------------------------------------------------------------------- 87 | * initialize 88 | * ----------------------------------------------------------------------------- 89 | */ 90 | 91 | // check whether some arg has been passed on 92 | if ( argc > 1 ) 93 | { 94 | N = atoi( *(argv+1) ); 95 | if( argc > 2 ) 96 | max_value = atoi( *(argv+2) ); 97 | } 98 | 99 | srand48(1234321); 100 | double result = 0; 101 | 102 | int *array = (int*)malloc( N*sizeof(double) ); 103 | 104 | 105 | // this mimic a stream of number you can not 106 | // initialize in parallel 107 | // note: that also means that data resides 108 | // in one's thread DRAM 109 | // 110 | for( int ii = 0; ii < N; ii++ ) 111 | array[ii] = 100 + (lrand48() % max_value); 112 | 113 | 114 | printf("serial summation\n" ); 115 | 116 | double tstart = CPU_TIME; 117 | 118 | for( int ii = 0; ii < N; ii++ ) 119 | result += heavy_work_0(array[ii]) + 120 | heavy_work_1(array[ii]) + 121 | heavy_work_2(array[ii]) ; 122 | 123 | double tstop = CPU_TIME; 124 | printf("serial summation results to be %g and took %g sec\n", result, tstop-tstart); 125 | 126 | printf("omp summation\n" ); 127 | 128 | result = 0; 129 | tstart = CPU_TIME; 130 | 131 | #pragma omp parallel shared(result) 132 | { 133 | 134 | #pragma omp single // having or not a taskwait here is irrelevant 135 | // since there are no instructions after the 136 | // single region 137 | { 138 | 139 | #pragma omp task // result is shared, no need for "shared(result)" clause 140 | { 141 | double myresult = 0; 142 | for( int jj = 0; jj < N; jj++ ) 143 | myresult += heavy_work_0( array[jj] ); 144 | #pragma omp atomic update 145 | result += myresult; 146 | } 147 | 148 | #pragma omp task // result is shared 149 | { 150 | double myresult = 0; 151 | for( int jj = 0; jj < N; jj++ ) 152 | myresult += heavy_work_1( array[jj] ); 153 | #pragma omp atomic update 154 | result += myresult; 155 | } 156 | 157 | #pragma omp task // result is shared 158 | { 159 | double myresult = 0; 160 | for( int jj = 0; jj < N; jj++ ) 161 | myresult += heavy_work_2(array[jj] ); 162 | #pragma omp atomic update 163 | result += myresult; 164 | } 165 | 166 | } 167 | 168 | // all the threads will pile-up here, waiting for all 169 | // of them to arrive here. 170 | } 171 | 172 | 173 | 174 | double tend = CPU_TIME; 175 | 176 | 177 | /* ----------------------------------------------------------------------------- 178 | * finalize 179 | * ----------------------------------------------------------------------------- 180 | */ 181 | 182 | free(array); 183 | 184 | printf("The result is %g\nrun took %g of wall-clock time\n\n", 185 | result, tend - tstart ); 186 | 187 | 188 | return 0; 189 | } 190 | 191 | 192 | 193 | double heavy_work_0( uint N ) 194 | { 195 | double guess = 3.141572 / 3; 196 | 197 | for( int i = 0; i < N; i++ ) 198 | { 199 | guess = exp( guess ); 200 | guess = sin( guess ); 201 | 202 | } 203 | 204 | return guess; 205 | } 206 | 207 | double heavy_work_1( uint N ) 208 | { 209 | double guess = 3.141572 / 3; 210 | 211 | for( int i = 0; i < N; i++ ) 212 | { 213 | guess = log( guess ); 214 | guess = exp( sqrt(guess)/guess ); 215 | } 216 | 217 | return guess; 218 | } 219 | 220 | double heavy_work_2( uint N ) 221 | { 222 | double guess = 3.141572 / 3; 223 | 224 | for( int i = 0; i < N; i++ ) 225 | { 226 | guess = sqrt( guess ); 227 | guess = exp( 1+1.0/guess ); 228 | } 229 | 230 | return guess; 231 | } 232 | -------------------------------------------------------------------------------- /HPC/codes/02_tasks_wrong.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #define N_default 2000 // how long is the main array 42 | #define max_default 2000 // the maximum argument to heavy_work_? functions 43 | 44 | #if defined(_OPENMP) 45 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 46 | (double)ts.tv_nsec * 1e-9) 47 | 48 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 49 | (double)myts.tv_nsec * 1e-9) 50 | 51 | #else 52 | 53 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 54 | (double)ts.tv_nsec * 1e-9) 55 | #endif 56 | 57 | #if !defined(NTHREADS) // threads for the first level of parallelism 58 | // this value should be equal to the number_of_sockets-1 59 | #define NTHREADS 3 60 | #endif 61 | 62 | 63 | 64 | #if defined(DEBUG) 65 | #define PRINTF(...) printf(__VA_ARGS__); 66 | #else 67 | #define PRINTF(...) 68 | #endif 69 | 70 | typedef unsigned int uint; 71 | double heavy_work_0( uint ); 72 | double heavy_work_1( uint ); 73 | double heavy_work_2( uint ); 74 | 75 | 76 | 77 | int main( int argc, char **argv ) 78 | { 79 | 80 | int N = N_default; 81 | int max_value = max_default; 82 | int nthreads = 1; 83 | 84 | struct timespec ts, myts; 85 | 86 | /* ----------------------------------------------------------------------------- 87 | * initialize 88 | * ----------------------------------------------------------------------------- 89 | */ 90 | 91 | // check whether some arg has been passed on 92 | if ( argc > 1 ) 93 | { 94 | N = atoi( *(argv+1) ); 95 | if( argc > 2 ) 96 | max_value = atoi( *(argv+2) ); 97 | } 98 | 99 | srand48(1234321); 100 | double result = 0; 101 | 102 | int *array = (int*)malloc( N*sizeof(double) ); 103 | 104 | 105 | // this mimic a stream of number you can not 106 | // initialize in parallel 107 | // note: that also means that data resides 108 | // in one's thread DRAM 109 | // 110 | for( int ii = 0; ii < N; ii++ ) 111 | array[ii] = 100 + (lrand48() % max_value); 112 | 113 | 114 | printf("serial summation\n" ); 115 | 116 | double tstart = CPU_TIME; 117 | 118 | for( int ii = 0; ii < N; ii++ ) 119 | result += heavy_work_0(array[ii]) + 120 | heavy_work_1(array[ii]) + 121 | heavy_work_2(array[ii]) ; 122 | 123 | double tstop = CPU_TIME; 124 | printf("serial summation results to be %g and took %g sec\n", result, tstop-tstart); 125 | 126 | printf("omp summation\n" ); 127 | 128 | result = 0; 129 | tstart = CPU_TIME; 130 | 131 | #pragma omp parallel shared(result) 132 | { 133 | #if defined(DEBUG) 134 | int me = omp_get_thread_num(); 135 | #endif 136 | double result1, result2, result3; 137 | 138 | #pragma omp single 139 | { 140 | PRINTF(" : Thread %d is generating the tasks\n", me); 141 | 142 | #pragma omp task 143 | { 144 | PRINTF(" + Thread %d is executing T1\n", omp_get_thread_num()); 145 | for( int jj = 0; jj < N; jj++ ) 146 | result1 += heavy_work_0( array[jj] ); 147 | } 148 | 149 | #pragma omp task 150 | { 151 | PRINTF(" + Thread %d is executing T2\n", omp_get_thread_num()); 152 | for( int jj = 0; jj < N; jj++ ) 153 | result2 += heavy_work_1( array[jj] ); 154 | } 155 | 156 | #pragma omp task 157 | { 158 | PRINTF(" + Thread %d is executing T3\n", omp_get_thread_num()); 159 | for( int jj = 0; jj < N; jj++ ) 160 | result3 += heavy_work_2(array[jj] ); 161 | } 162 | 163 | } 164 | 165 | #pragma omp barrier 166 | PRINTF("\tThread %d is here (%g %g %g)\n", me, result1, result2, result3 ); 167 | 168 | #pragma omp atomic update 169 | result += result1; 170 | #pragma omp atomic update 171 | result += result2; 172 | #pragma omp atomic update 173 | result += result3; 174 | 175 | PRINTF("\tThread %d is here (%g)\n", me, result ); 176 | } 177 | 178 | 179 | 180 | double tend = CPU_TIME; 181 | 182 | 183 | /* ----------------------------------------------------------------------------- 184 | * finalize 185 | * ----------------------------------------------------------------------------- 186 | */ 187 | 188 | free(array); 189 | 190 | printf("The result is %g\nrun took %g of wall-clock time\n\n", 191 | result, tend - tstart ); 192 | 193 | 194 | return 0; 195 | } 196 | 197 | 198 | 199 | double heavy_work_0( uint N ) 200 | { 201 | double guess = 3.141572 / 3; 202 | 203 | for( int i = 0; i < N; i++ ) 204 | { 205 | guess = exp( guess ); 206 | guess = sin( guess ); 207 | 208 | } 209 | 210 | return guess; 211 | } 212 | 213 | double heavy_work_1( uint N ) 214 | { 215 | double guess = 3.141572 / 3; 216 | 217 | for( int i = 0; i < N; i++ ) 218 | { 219 | guess = log( guess ); 220 | guess = exp( sqrt(guess)/guess ); 221 | } 222 | 223 | return guess; 224 | } 225 | 226 | double heavy_work_2( uint N ) 227 | { 228 | double guess = 3.141572 / 3; 229 | 230 | for( int i = 0; i < N; i++ ) 231 | { 232 | guess = sqrt( guess ); 233 | guess = exp( 1+1.0/guess ); 234 | } 235 | 236 | return guess; 237 | } 238 | -------------------------------------------------------------------------------- /HPC/codes/03_variable_workload.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #if defined(_OPENMP) 42 | #define CPU_TIME ({struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 43 | (double)ts.tv_nsec * 1e-9};) 44 | 45 | #define CPU_TIME_th ({struct timespec ts; clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 46 | (double)ts.tv_nsec * 1e-9;}) 47 | 48 | #else 49 | 50 | #define CPU_TIME ({struct timespec ts; clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 51 | (double)ts.tv_nsec * 1e-9;}) 52 | #endif 53 | 54 | // ------------------------------------------------------------- 55 | 56 | // using the compile-time parameter TASKS_GRANULARITY we 57 | // can regulate how many tasks we are crating and, consequently, 58 | // how much workload will be assigne to each task 59 | // see below, around lines 215, 245 60 | // 61 | 62 | #if !defined(TASKS_GRANULARITY ) 63 | 64 | 65 | // when compiling with this option, a single task will execute TASK_GRANULARITY 66 | // iterations od the for loop. 67 | // To activate that, compile adding -DTASK_GRANULARITY=#INT_VALUE 68 | // 69 | 70 | #define ROUND_N_TO_GRANULARITY {N += (N%TASKS_GRANULARITY); printf("tasks will be created with granularity: %d\n", TASKS_GRANULARITY);} 71 | #define CREATE_TASKS for ( int i = 0; i < N; i+= TASKS_GRANULARITY ) 72 | #define TASK_FOR for ( int JJ = i; JJ < i+TASKS_GRANULARITY; JJ++ ) 73 | #define TASKS_SIZE TASKS_GRANULARITY 74 | 75 | #else 76 | 77 | #define ROUND_N_TO_GRANULARITY 78 | #define CREATE_TASKS for ( int i = 0; i < N; i++ ) 79 | #define TASK_FOR 80 | #define JJ i 81 | #define TASKS_SIZE 1 82 | 83 | #endif 84 | 85 | // ------------------------------------------------------------- 86 | 87 | // if RANDOMLY_DECREASING is defined, the decreasing work case 88 | // will be randomized with a work that will be decreasing but with 89 | // some randomization added 90 | // 91 | 92 | #if defined(RANDOMLY_DECREASING) 93 | #define DECREASING_WORK( I ) workload - ((I) + rand_r(&seeds[me]) % (10+((I)/10))) 94 | #else 95 | #define DECREASING_WORK( I ) workload - (I) 96 | #endif 97 | 98 | // ------------------------------------------------------------- 99 | 100 | 101 | #define REPETITIONS 10 102 | 103 | #define NSTRATEGIES 2 104 | #define FOR 0 105 | #define TASKS 1 106 | char *STRATEGIES_NAMES[] = {"FORloop", "TASKS"}; 107 | 108 | #define NTIMINGS 2 109 | #define RND_WORK 0 110 | #define DECR_WORK 1 111 | char *TIMINGS_NAMES[] = {"RANDOM work", "DECREASING work"}; 112 | 113 | 114 | // ------------------------------------------------------------- 115 | 116 | double heavy_work( int N ); // a nonsense routine that just crunches floating point ops 117 | 118 | 119 | 120 | int main( int argc, char **argv ) 121 | { 122 | 123 | int nthreads; 124 | int N = 10000; 125 | int workload = 40000; 126 | double wtstart, wtend; 127 | 128 | 129 | 130 | if ( argc > 1 ) 131 | { 132 | N = atoi( *(argv+1) ); 133 | if ( argc > 2 ) 134 | workload = atoi(*(argv+2)); 135 | } 136 | 137 | #pragma omp parallel 138 | #pragma omp master 139 | nthreads = omp_get_num_threads(); 140 | 141 | printf("using %d threads with N = %d\n\n", nthreads, N); 142 | 143 | double timings[NTIMINGS][NSTRATEGIES][nthreads]; 144 | double wtimings[NTIMINGS][NSTRATEGIES] = {0.0}; 145 | double min_timings[NTIMINGS][NSTRATEGIES] = {0.0}; 146 | double max_timings[NTIMINGS][NSTRATEGIES] = {0.0}; 147 | 148 | #if defined(DEBUG) 149 | unsigned int howmanytasks[nthreads]; 150 | #endif 151 | 152 | memset( timings, 0, NTIMINGS*NSTRATEGIES*nthreads*sizeof(double)); 153 | #if defined(DEBUG) 154 | memset( howmanytasks, 0, nthreads*sizeof(int)); 155 | #endif 156 | 157 | ROUND_N_TO_GRANULARITY ; 158 | 159 | for ( int R = 0; R < REPETITIONS; R++ ) 160 | { 161 | printf("shot %d/%d.. ", R+1, REPETITIONS); 162 | fflush(stdout); 163 | 164 | /* ······················································ * 165 | * * 166 | * First, we run the random work and the randomly * 167 | * decreasing work with standard for loops * 168 | * * 169 | * ······················································ */ 170 | 171 | 172 | // ----------------------------------------------------- random work, FOR 173 | wtstart = CPU_TIME; 174 | #pragma omp parallel shared(N, workload) 175 | { 176 | struct timespec myts; 177 | int myid = omp_get_thread_num(); 178 | int status = myid; 179 | 180 | srand( myid*123 ); 181 | double tstart = CPU_TIME_th; 182 | #pragma omp for schedule(dynamic, TASKS_SIZE) 183 | for( int i = 0; i < N; i++ ) 184 | heavy_work( 10 + rand_r(&status) % workload ); 185 | 186 | double tend = CPU_TIME_th; 187 | timings[RND_WORK][FOR][myid] += tend - tstart; 188 | } 189 | wtend = CPU_TIME; 190 | wtimings[RND_WORK][FOR] += wtend - wtstart; 191 | 192 | 193 | // ----------------------------------------------------- randomly decreasing work, FOR 194 | wtstart = CPU_TIME; 195 | #pragma omp parallel shared(N, workload) 196 | { 197 | struct timespec myts; 198 | int myid = omp_get_thread_num(); 199 | int status = myid; 200 | 201 | srand( myid*123 ); 202 | double tstart = CPU_TIME_th; 203 | 204 | #pragma omp for schedule(dynamic, TASKS_SIZE) 205 | for( int i = 0; i < N; i++ ) 206 | heavy_work( DECREASING_WORK(i) ); 207 | 208 | double tend = CPU_TIME_th; 209 | timings[DECR_WORK][FOR][myid] += tend - tstart; 210 | } 211 | wtend = CPU_TIME; 212 | wtimings[DECR_WORK][FOR] += wtend - wtstart; 213 | 214 | 215 | /* ······················································ * 216 | * * 217 | * Now, we run the random work and the randomly * 218 | * decreasing work using TASKS * 219 | * * 220 | * ······················································ */ 221 | 222 | unsigned int seeds[nthreads]; 223 | 224 | // ----------------------------------------------------- random work, TASKS 225 | 226 | wtstart = CPU_TIME; 227 | #pragma omp parallel shared(N, workload) 228 | { 229 | struct timespec myts; 230 | int myid = omp_get_thread_num(); 231 | 232 | srand( myid*123 ); 233 | double tstart = CPU_TIME_th; 234 | #pragma omp single nowait 235 | { 236 | CREATE_TASKS 237 | #pragma omp task 238 | { 239 | int me = omp_get_thread_num(); 240 | TASK_FOR 241 | heavy_work( 10 + rand_r(&seeds[me]) % workload ); 242 | } 243 | } 244 | #pragma omp barrier 245 | 246 | double tend = CPU_TIME_th; 247 | timings[RND_WORK][TASKS][myid] += tend - tstart; 248 | } 249 | wtend = CPU_TIME; 250 | wtimings[RND_WORK][TASKS] += wtend - wtstart; 251 | 252 | 253 | // ----------------------------------------------------- randomly decreasing work, TASKS 254 | 255 | wtstart = CPU_TIME; 256 | #pragma omp parallel shared(N, workload) 257 | { 258 | struct timespec myts; 259 | int myid = omp_get_thread_num(); 260 | 261 | srand( myid*123 ); 262 | double tstart = CPU_TIME_th; 263 | 264 | #pragma omp single nowait 265 | { 266 | CREATE_TASKS 267 | #pragma omp task 268 | { 269 | int me = omp_get_thread_num(); 270 | #if defined(DEBUG) 271 | howmanytasks[me]++; 272 | #endif 273 | TASK_FOR 274 | heavy_work( DECREASING_WORK(JJ) ); 275 | } 276 | 277 | } 278 | #pragma omp barrier 279 | double tend = CPU_TIME_th; 280 | timings[DECR_WORK][TASKS][myid] += tend - tstart; 281 | } 282 | wtend = CPU_TIME; 283 | wtimings[DECR_WORK][TASKS] += wtend - wtstart; 284 | } 285 | 286 | 287 | /* ······················································ * 288 | * * 289 | * Here below we collect the data on timings * 290 | * * 291 | * ······················································ */ 292 | 293 | 294 | double INV_REP = 1.0 / REPETITIONS; 295 | for ( int k = 0; k < NTIMINGS; k++ ) 296 | { 297 | printf("\ntimings %s:\n", TIMINGS_NAMES[k] ); 298 | double std_dev = 0; 299 | for ( int j = 0; j < NSTRATEGIES; j++ ) 300 | { 301 | min_timings[k][j] = timings[k][j][0]; 302 | max_timings[k][j] = timings[k][j][0]; 303 | std_dev = timings[k][j][0]*timings[k][j][0]; 304 | for( int i = 1; i < nthreads; i++) 305 | { 306 | timings[k][j][0] += timings[k][j][i]; 307 | std_dev += timings[k][j][i] * timings[k][j][i]; 308 | min_timings[k][j] = (min_timings[k][j] < timings[k][j][i]) ? min_timings[k][j] : timings[k][j][i]; 309 | max_timings[k][j] = (max_timings[k][j] > timings[k][j][i]) ? max_timings[k][j] : timings[k][j][i]; 310 | } 311 | timings[k][j][0] /= nthreads; 312 | std_dev = sqrt( std_dev/(nthreads-1) - nthreads/(nthreads-1)*timings[k][j][0]*timings[k][j][0] ); 313 | 314 | printf("\t%16s : w-clock %9.7g, avg %9.7g +- %9.7g, min: %9.7g, max: %9.7g\n", 315 | STRATEGIES_NAMES[j], 316 | wtimings[k][j]*INV_REP, timings[k][j][0]*INV_REP, std_dev*INV_REP, min_timings[k][j]*INV_REP, max_timings[k][j]*INV_REP ); 317 | } 318 | } 319 | 320 | 321 | #if defined(DEBUG) 322 | for ( int t = 0; t < nthreads; t++ ) 323 | printf("thread %d has processed %u tasks\n", t, howmanytasks[t] ); 324 | #endif 325 | 326 | 327 | return 0; 328 | } 329 | 330 | 331 | // 332 | // ---------------------------------------------- 333 | // 334 | // crunching numbers without having either 335 | // overflows or underflows 336 | // 337 | 338 | double heavy_work( int N ) 339 | { 340 | double guess = 3.141572 / 5 * N; 341 | guess = ( guess > 200 ? 111 : guess); 342 | 343 | for( int i = 0; i < N; i++ ) 344 | { 345 | guess = exp( guess ); 346 | guess = sin( guess ); 347 | 348 | } 349 | return guess; 350 | } 351 | -------------------------------------------------------------------------------- /HPC/codes/03_variable_workload.v2.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #define N_default 10000 // how long is the main array 42 | #define min_default 100 // the minimum argument to heavy_work_? functions 43 | #define max_default 20000 // the maximum argument to heavy_work_? functions 44 | #define chunk_default 10 // the size of the small work chunks 45 | 46 | #define NANO_PAUSE 100 // the sleeping time when checking for initialization 47 | #define uSEC 1000 // a microsecond 48 | 49 | 50 | #if defined(_OPENMP) 51 | #define CPU_TIME ({struct timespec ts; clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 52 | (double)ts.tv_nsec * 1e-9};) 53 | 54 | #define CPU_TIME_th ({struct timespec ts; clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 55 | (double)ts.tv_nsec * 1e-9;}) 56 | 57 | #else 58 | 59 | #define CPU_TIME ({struct timespec ts; clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 60 | (double)ts.tv_nsec * 1e-9;}) 61 | #endif 62 | 63 | 64 | #if defined(DEBUG) 65 | #define PRINTF(...) printf(__VA_ARGS__); 66 | #define PRINTFS(...) _Pragma("omp single") \ 67 | printf(__VA_ARGS__); 68 | #else 69 | #define PRINTF(...) 70 | #define PRINTFS(...) 71 | #endif 72 | 73 | typedef unsigned int uint; 74 | double heavy_work_0( uint ); 75 | double heavy_work_1( uint ); 76 | double heavy_work_2( uint ); 77 | 78 | 79 | 80 | int main( int argc, char **argv ) 81 | { 82 | 83 | int N = N_default; 84 | int min_value = min_default; 85 | int max_value = max_default; 86 | int chunk = N / chunk_default; 87 | 88 | 89 | /* ----------------------------------------------------------------------------- 90 | * initialize 91 | * ----------------------------------------------------------------------------- 92 | */ 93 | 94 | // check whether some arg has been passed on 95 | if ( argc > 1 ) 96 | { 97 | N = atoi( *(argv+1) ); 98 | if( argc > 2 ) 99 | { 100 | max_value = atoi( *(argv+2) ); 101 | if( argc > 3 ) 102 | chunk = atoi( *(argv+3) ); 103 | } 104 | } 105 | 106 | srand48(1234321); 107 | double result = 0; 108 | 109 | int *array = (int*)malloc( N*sizeof(double) ); 110 | 111 | 112 | 113 | #if !defined(_OPENMP) 114 | 115 | /* ----------------------------------------------------------------------------- 116 | * SERIAL RUN 117 | * ----------------------------------------------------------------------------- 118 | */ 119 | 120 | 121 | printf("serial summation\n" ); 122 | 123 | double tstart = CPU_TIME; 124 | 125 | // this mimic a stream of number you can not 126 | // initialize in parallel 127 | // note: that also means that data resides 128 | // in one's thread DRAM 129 | // 130 | for( int ii = 0; ii < N; ii++ ) 131 | array[ii] = min_value + lrand48() % max_value; 132 | 133 | #if defined(DEBUG) 134 | double partial_result1, partial_result2, partial_result3 = 0; 135 | for( int ii = 0; ii < N; ii++ ) 136 | partial_result1 += heavy_work_0(array[ii]); 137 | 138 | for( int ii = 0; ii < N; ii++ ) 139 | partial_result2 += heavy_work_1(array[ii]); 140 | 141 | for( int ii = 0; ii < N; ii++ ) 142 | partial_result3 += heavy_work_2(array[ii]); 143 | 144 | result = partial_result1 + partial_result2 + partial_result3; 145 | 146 | double tend = CPU_TIME; 147 | 148 | printf("partial results are: %g %g %g\n", partial_result1, partial_result2, partial_result3 ); 149 | 150 | #else 151 | 152 | for( int ii = 0; ii < N; ii++ ) 153 | result += heavy_work_0(array[ii]) + 154 | heavy_work_1(array[ii]) + heavy_work_2(array[ii]); 155 | 156 | double tend = CPU_TIME; 157 | #endif 158 | 159 | #else 160 | 161 | /* ----------------------------------------------------------------------------- 162 | * PARALLEL RUN 163 | * ----------------------------------------------------------------------------- 164 | */ 165 | 166 | 167 | double tstart = CPU_TIME; 168 | 169 | #pragma omp parallel proc_bind(close) reduction(+:result) 170 | { 171 | 172 | #pragma omp single 173 | { 174 | int idx = 0; 175 | int first = 0; 176 | int last = chunk 177 | ; 178 | #if defined (MIMIC_SLOWER_INITIALIZATION) 179 | // 180 | // when compiling with this option, data are 181 | // initialized in random chunks with a random 182 | // pause within one chunk and the subsequeunt 183 | // 184 | 185 | struct timespec nanot = {0, 200*uSEC}; 186 | nanosleep( &nanot, NULL ); 187 | #endif 188 | 189 | #if defined(DEBUG) 190 | struct timespec myts; 191 | double tstart = CPU_TIME_th; 192 | int me = omp_get_thread_num(); 193 | #endif 194 | 195 | while( first < N ) 196 | { 197 | last = (last >= N)? N : last; 198 | for( int kk = first; kk < last; kk++, idx++ ) 199 | array[idx] = min_value + lrand48() % max_value; 200 | 201 | PRINTF("* initializer (thread %d) : %g sec, initialized chunk from %d to %d\n", 202 | me, CPU_TIME_th - tstart, first, last); 203 | 204 | #pragma omp task firstprivate(first, last) shared(result) untied 205 | // note: by default rules on the scope, variables "first" and "last" 206 | // would have been automatically firstprivate since they 207 | // are local private variables in the enclosing single 208 | // region 209 | { 210 | 211 | double myresult = 0; 212 | for( int ii = first; ii < last; ii++) 213 | myresult += heavy_work_0(array[ii]); 214 | #if defined(DEBUG) 215 | double current_result; 216 | #pragma omp atomic read 217 | current_result = result; 218 | PRINTF("\t thread %d runs chunk %d >> %d for work0 (result is %g, adding %g)\n", 219 | me, first, last, current_result, myresult); 220 | #endif 221 | #pragma omp atomic update 222 | result += myresult; 223 | } 224 | 225 | #pragma omp task firstprivate(first, last) shared(result) untied 226 | // note: by default rules on the scope, variables "first" and "last" 227 | // would have been automatically firstprivate since they 228 | // are local private variables in the enclosing single 229 | // region 230 | { 231 | double myresult = 0; 232 | for( int ii = first; ii < last; ii++) 233 | myresult += heavy_work_1(array[ii]); 234 | #if defined(DEBUG) 235 | double current_result; 236 | #pragma omp atomic read 237 | current_result = result; 238 | PRINTF("\t thread %d runs chunk %d >> %d for work1 (result is %g, adding %g)\n", 239 | me, first, last, current_result, myresult); 240 | #endif 241 | #pragma omp atomic update 242 | result += myresult; 243 | } 244 | 245 | #pragma omp task firstprivate(first, last) shared(result) untied 246 | // note: by default rules on the scope, variables "first" and "last" 247 | // would have been automatically firstprivate since they 248 | // are local private variables in the enclosing single 249 | // region 250 | { 251 | double myresult = 0; 252 | for( int ii = first; ii < last; ii++) 253 | myresult += heavy_work_2(array[ii]); 254 | #if defined(DEBUG) 255 | double current_result; 256 | #pragma omp atomic read 257 | current_result = result; 258 | PRINTF("\t thread %d runs chunk %d >> %d for work2 (result is %g, adding %g)\n", 259 | me, first, last, current_result, myresult); 260 | #endif 261 | #pragma omp atomic update 262 | result += myresult; 263 | } 264 | 265 | first += chunk; 266 | last += chunk; 267 | 268 | 269 | #if defined (MIMIC_SLOWER_INITIALIZATION) 270 | nanot.tv_nsec = 200*uSEC + lrand48() % 100*uSEC; 271 | nanosleep( &nanot, NULL ); 272 | #endif 273 | 274 | } 275 | PRINTF("* initializer thread: initialization lasted %g seconds\n", CPU_TIME_th - tstart ); 276 | } 277 | 278 | printf("thread waiting..\n"); 279 | 280 | // threads will wait here to receive the tasks 281 | } // close parallel region 282 | 283 | 284 | double tend = CPU_TIME; 285 | #endif 286 | 287 | 288 | 289 | /* ----------------------------------------------------------------------------- 290 | * finalize 291 | * ----------------------------------------------------------------------------- 292 | */ 293 | 294 | free(array); 295 | 296 | printf("The result is %g\nrun took %g of wall-clock time\n\n", 297 | result, tend - tstart ); 298 | 299 | 300 | return 0; 301 | } 302 | 303 | 304 | 305 | double heavy_work_0( uint N ) 306 | { 307 | double guess = 3.141572 / 3; 308 | 309 | for( int i = 0; i < N; i++ ) 310 | { 311 | guess = exp( guess ); 312 | guess = sin( guess ); 313 | 314 | } 315 | 316 | return guess; 317 | } 318 | 319 | double heavy_work_1( uint N ) 320 | { 321 | double guess = 3.141572 / 3; 322 | 323 | for( int i = 0; i < N; i++ ) 324 | { 325 | guess = log( guess ); 326 | guess = exp( sqrt(guess)/guess ); 327 | } 328 | 329 | return guess; 330 | } 331 | 332 | double heavy_work_2( uint N ) 333 | { 334 | double guess = 3.141572 / 3; 335 | 336 | for( int i = 0; i < N; i++ ) 337 | { 338 | guess = sqrt( guess ); 339 | guess = exp( 1+1.0/guess ); 340 | } 341 | 342 | return guess; 343 | } 344 | -------------------------------------------------------------------------------- /HPC/codes/04_tasks_reduction.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #define N_default 20000 // how long is the main array 42 | #define min_default 100 // the minimum argument to heavy_work_? functions 43 | #define max_default 20000 // the maximum argument to heavy_work_? functions 44 | #define chunkf_default 10 // the size of the small work chunks 45 | 46 | 47 | #define NANO_PAUSE 100 // the sleeping time when checking for initialization 48 | #define uSEC 1000 // a microsecond 49 | 50 | #if defined(_OPENMP) 51 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 52 | (double)ts.tv_nsec * 1e-9) 53 | 54 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 55 | (double)myts.tv_nsec * 1e-9) 56 | 57 | #else 58 | 59 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 60 | (double)ts.tv_nsec * 1e-9) 61 | #endif 62 | 63 | 64 | #if defined(DEBUG) 65 | #define PRINTF(...) printf(__VA_ARGS__); 66 | #define PRINTFS(...) _Pragma("omp single") \ 67 | printf(__VA_ARGS__); 68 | #else 69 | #define PRINTF(...) 70 | #define PRINTFS(...) 71 | #endif 72 | 73 | typedef unsigned int uint; 74 | double heavy_work_0( uint ); 75 | double heavy_work_1( uint ); 76 | double heavy_work_2( uint ); 77 | 78 | 79 | 80 | int main( int argc, char **argv ) 81 | { 82 | 83 | int N = N_default; 84 | int min_value = min_default; 85 | int max_value = max_default; 86 | int chunkf = chunkf_default; 87 | int chunk = N / chunkf_default; 88 | 89 | struct timespec ts; 90 | 91 | /* ----------------------------------------------------------------------------- 92 | * initialize 93 | * ----------------------------------------------------------------------------- 94 | */ 95 | 96 | // check whether some arg has been passed on 97 | if ( argc > 1 ) 98 | { 99 | N = atoi( *(argv+1) ); 100 | if( argc > 2 ) 101 | { 102 | max_value = atoi( *(argv+2) ); 103 | if( argc > 3 ) 104 | chunkf = atoi( *(argv+3) ); 105 | } 106 | } 107 | 108 | srand48(1234321); 109 | double result = 0; 110 | 111 | int *array = (int*)malloc( N*sizeof(double) ); 112 | 113 | #if !defined(_OPENMP) 114 | 115 | printf("serial summation\n" ); 116 | 117 | double tstart = CPU_TIME; 118 | 119 | // this mimic a stream of number you can not 120 | // initialize in parallel 121 | // note: that also means that data resides 122 | // in one's thread DRAM 123 | // 124 | for( int ii = 0; ii < N; ii++ ) 125 | array[ii] = min_value + lrand48() % max_value; 126 | 127 | #if defined(DEBUG) 128 | double partial_result1, partial_result2, partial_result3 = 0; 129 | for( int ii = 0; ii < N; ii++ ) 130 | partial_result1 += heavy_work_0(array[ii]); 131 | 132 | for( int ii = 0; ii < N; ii++ ) 133 | partial_result2 += heavy_work_1(array[ii]); 134 | 135 | for( int ii = 0; ii < N; ii++ ) 136 | partial_result3 += heavy_work_2(array[ii]); 137 | 138 | result = partial_result1 + partial_result2 + partial_result3; 139 | 140 | double tend = CPU_TIME; 141 | 142 | printf("partial results are: %g %g %g\n", partial_result1, partial_result2, partial_result3 ); 143 | 144 | #else 145 | 146 | for( int ii = 0; ii < N; ii++ ) 147 | result += heavy_work_0(array[ii]) + 148 | heavy_work_1(array[ii]) + heavy_work_2(array[ii]); 149 | 150 | double tend = CPU_TIME; 151 | #endif 152 | 153 | #else 154 | 155 | 156 | 157 | double tstart = CPU_TIME; 158 | 159 | #pragma omp parallel proc_bind(close) reduction(+:result) 160 | { 161 | 162 | #pragma omp single nowait 163 | { 164 | int idx = 0; 165 | int first = 0; 166 | int last = chunk; 167 | #if defined (MIMIC_SLOWER_INITIALIZATION) 168 | struct timespec nanot = {0, 200*uSEC}; 169 | nanosleep( &nanot, NULL ); 170 | #endif 171 | #if defined(DEBUG) 172 | struct timespec myts; 173 | double tstart = CPU_TIME_th; 174 | int me = omp_get_thread_num(); 175 | #endif 176 | 177 | while( first < N ) 178 | { 179 | last = (last >= N)?N:last; 180 | for( int kk = first; kk < last; kk++, idx++ ) 181 | array[idx] = min_value + lrand48() % max_value; 182 | 183 | PRINTF("* initializer (thread %d) : %g sec, initialized chunk from %d to %d\n", 184 | me, CPU_TIME_th - tstart, first, last); 185 | 186 | #pragma omp task firstprivate(first, last) shared(result) untied 187 | { 188 | double myresult = 0; 189 | for( int ii = first; ii < last; ii++) 190 | myresult += heavy_work_0(array[ii]); 191 | #pragma omp atomic update 192 | result += myresult; 193 | } 194 | #pragma omp task firstprivate(first, last) shared(result) untied 195 | { 196 | double myresult = 0; 197 | for( int ii = first; ii < last; ii++) 198 | myresult += heavy_work_1(array[ii]); 199 | #pragma omp atomic update 200 | result += myresult; 201 | } 202 | #pragma omp task firstprivate(first, last) shared(result) untied 203 | { 204 | double myresult = 0; 205 | for( int ii = first; ii < last; ii++) 206 | myresult += heavy_work_2(array[ii]); 207 | #pragma omp atomic update 208 | result += myresult; 209 | } 210 | 211 | first += chunk; 212 | last += chunk; 213 | 214 | 215 | #if defined (MIMIC_SLOWER_INITIALIZATION) 216 | nanot.tv_nsec = 200*uSEC + lrand48() % 100*uSEC; 217 | nanosleep( &nanot, NULL ); 218 | #endif 219 | 220 | } 221 | PRINTF("* initializer thread: initialization lasted %g seconds\n", CPU_TIME_th - tstart ); 222 | } 223 | 224 | #pragma omp taskwait 225 | } // close parallel region 226 | 227 | 228 | double tend = CPU_TIME; 229 | #endif 230 | 231 | 232 | 233 | /* ----------------------------------------------------------------------------- 234 | * finalize 235 | * ----------------------------------------------------------------------------- 236 | */ 237 | 238 | free(array); 239 | 240 | printf("The result is %g\nrun took %g of wall-clock time\n\n", 241 | result, tend - tstart ); 242 | 243 | 244 | return 0; 245 | } 246 | 247 | 248 | 249 | double heavy_work_0( uint N ) 250 | { 251 | double guess = 3.141572 / 3; 252 | 253 | for( int i = 0; i < N; i++ ) 254 | { 255 | guess = exp( guess ); 256 | guess = sin( guess ); 257 | 258 | } 259 | 260 | return guess; 261 | } 262 | 263 | double heavy_work_1( uint N ) 264 | { 265 | double guess = 3.141572 / 3; 266 | 267 | for( int i = 0; i < N; i++ ) 268 | { 269 | guess = log( guess ); 270 | guess = exp( sqrt(guess)/guess ); 271 | } 272 | 273 | return guess; 274 | } 275 | 276 | double heavy_work_2( uint N ) 277 | { 278 | double guess = 3.141572 / 3; 279 | 280 | for( int i = 0; i < N; i++ ) 281 | { 282 | guess = sqrt( guess ); 283 | guess = exp( 1+1.0/guess ); 284 | } 285 | 286 | return guess; 287 | } 288 | -------------------------------------------------------------------------------- /HPC/codes/04_unpredictable_pattern.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #if defined(_OPENMP) 42 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 43 | (double)ts.tv_nsec * 1e-9) 44 | 45 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 46 | (double)myts.tv_nsec * 1e-9) 47 | 48 | #else 49 | 50 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 51 | (double)ts.tv_nsec * 1e-9) 52 | #endif 53 | 54 | // ------------------------------------------------------------- 55 | 56 | // using the compile-time parameter TASKS_GRANULARITY we 57 | // can regulate how many tasks we are creating and, consequently, 58 | // how much workload will be assigned to each task 59 | // see below, around lines 215, 245 60 | // 61 | #if defined(TASKS_GRANULARITY ) 62 | 63 | #define ROUND_N_TO_GRANULARITY {N += (N%TASKS_GRANULARITY); printf("tasks will be created with granularity: %d\n", TASKS_GRANULARITY);} 64 | #define CREATE_TASKS for ( int i = 0; i < N; i+= TASKS_GRANULARITY ) 65 | #define TASK_FOR for ( int JJ = i; JJ < i+TASKS_GRANULARITY; JJ++ ) 66 | #define TASKS_SIZE TASKS_GRANULARITY 67 | 68 | #else 69 | 70 | #define ROUND_N_TO_GRANULARITY 71 | #define CREATE_TASKS for ( int i = 0; i < N; i++ ) 72 | #define TASK_FOR 73 | #define JJ i 74 | #define TASKS_SIZE 1 75 | 76 | #endif 77 | 78 | // ------------------------------------------------------------- 79 | 80 | // if RANDOMLY_DECREASING is defined, the decreasing work case 81 | // will be randomized with a work that will be decreasing but with 82 | // some randomization added 83 | // 84 | 85 | #if defined(RANDOMLY_DECREASING) 86 | #define DECREASING_WORK( I ) workload - ((I) + rand_r(&seeds[me]) % (10+((I)/10))) 87 | #else 88 | #define DECREASING_WORK( I ) workload - (I) 89 | #endif 90 | 91 | // ------------------------------------------------------------- 92 | 93 | 94 | #define REPETITIONS 10 95 | 96 | #define NSTRATEGIES 2 97 | #define FOR 0 98 | #define TASKS 1 99 | char *STRATEGIES_NAMES[] = {"FORloop", "TASKS"}; 100 | 101 | #define NTIMINGS 1 102 | #define RND_WORK 0 103 | char *TIMINGS_NAMES[] = {"RANDOM work"}; 104 | 105 | 106 | // ------------------------------------------------------------- 107 | 108 | double heavy_work( int N ); 109 | 110 | 111 | 112 | int main( int argc, char **argv ) 113 | { 114 | 115 | int nthreads; 116 | int N = 10000; 117 | int workload = 40000; 118 | double wtstart, wtend; 119 | struct timespec ts; 120 | 121 | 122 | if ( argc > 1 ) 123 | { 124 | N = atoi( *(argv+1) ); 125 | if ( argc > 2 ) 126 | workload = atoi(*(argv+2)); 127 | } 128 | 129 | #pragma omp parallel 130 | #pragma omp master 131 | nthreads = omp_get_num_threads(); 132 | 133 | printf("using %d threads with N = %d\n\n", nthreads, N); 134 | 135 | double timings[NTIMINGS][NSTRATEGIES][nthreads]; 136 | double wtimings[NTIMINGS][NSTRATEGIES] = {0.0}; 137 | double min_timings[NTIMINGS][NSTRATEGIES] = {0.0}; 138 | double max_timings[NTIMINGS][NSTRATEGIES] = {0.0}; 139 | 140 | #if defined(DEBUG) 141 | unsigned int howmanytasks[nthreads]; 142 | #endif 143 | 144 | memset( timings, 0, NTIMINGS*NSTRATEGIES*nthreads*sizeof(double)); 145 | #if defined(DEBUG) 146 | memset( howmanytasks, 0, nthreads*sizeof(int)); 147 | #endif 148 | 149 | ROUND_N_TO_GRANULARITY ; 150 | 151 | for ( int R = 0; R < REPETITIONS; R++ ) 152 | { 153 | printf("shot %d/%d.. ", R+1, REPETITIONS); 154 | fflush(stdout); 155 | 156 | // ----------------------------------------------------- random work, FOR 157 | wtstart = CPU_TIME; 158 | #pragma omp parallel shared(N, workload) 159 | { 160 | struct timespec myts; 161 | int myid = omp_get_thread_num(); 162 | int status = myid; 163 | unsigned int half_workload = workload/2; 164 | 165 | srand( myid*123 + time(NULL) ); 166 | double tstart = CPU_TIME_th; 167 | #pragma omp for schedule(dynamic, TASKS_SIZE) 168 | for( int i = 0; i < N; i++ ) 169 | { 170 | unsigned int work = 10 + rand_r(&status) % workload; 171 | if ( work > half_workload ) 172 | heavy_work( work ); 173 | } 174 | double tend = CPU_TIME_th; 175 | timings[RND_WORK][FOR][myid] += tend - tstart; 176 | } 177 | wtend = CPU_TIME; 178 | wtimings[RND_WORK][FOR] += wtend - wtstart; 179 | 180 | 181 | // ----------------------------------------------------- TASKS 182 | 183 | unsigned int seeds[nthreads]; 184 | 185 | // ----------------------------------------------------- random work, TASKS 186 | 187 | wtstart = CPU_TIME; 188 | #pragma omp parallel shared(N, workload) 189 | { 190 | struct timespec myts; 191 | int myid = omp_get_thread_num(); 192 | 193 | srand( myid*123 + time(NULL) ); 194 | double tstart = CPU_TIME_th; 195 | #pragma omp single nowait 196 | { 197 | unsigned int half_workload = workload/2; 198 | 199 | CREATE_TASKS 200 | #pragma omp task 201 | { 202 | int me = omp_get_thread_num(); 203 | TASK_FOR 204 | { 205 | unsigned int work = 10 + rand_r(&seeds[me]) % workload; 206 | if ( work > half_workload ) 207 | heavy_work( work ); 208 | } 209 | } 210 | } 211 | #pragma omp barrier 212 | 213 | double tend = CPU_TIME_th; 214 | timings[RND_WORK][TASKS][myid] += tend - tstart; 215 | } 216 | wtend = CPU_TIME; 217 | wtimings[RND_WORK][TASKS] += wtend - wtstart; 218 | 219 | } 220 | 221 | double INV_REP = 1.0 / REPETITIONS; 222 | for ( int k = 0; k < NTIMINGS; k++ ) 223 | { 224 | printf("\ntimings %s:\n", TIMINGS_NAMES[k] ); 225 | double std_dev = 0; 226 | for ( int j = 0; j < NSTRATEGIES; j++ ) 227 | { 228 | min_timings[k][j] = timings[k][j][0]; 229 | max_timings[k][j] = timings[k][j][0]; 230 | std_dev = timings[k][j][0]*timings[k][j][0]; 231 | for( int i = 1; i < nthreads; i++) 232 | { 233 | timings[k][j][0] += timings[k][j][i]; 234 | std_dev += timings[k][j][i] * timings[k][j][i]; 235 | min_timings[k][j] = (min_timings[k][j] < timings[k][j][i]) ? min_timings[k][j] : timings[k][j][i]; 236 | max_timings[k][j] = (max_timings[k][j] > timings[k][j][i]) ? max_timings[k][j] : timings[k][j][i]; 237 | } 238 | timings[k][j][0] /= nthreads; 239 | std_dev = sqrt( std_dev/(nthreads-1) - nthreads/(nthreads-1)*timings[k][j][0]*timings[k][j][0] ); 240 | 241 | printf("\t%16s : w-clock %9.7g, avg %9.7g +- %9.7g, min: %9.7g, max: %9.7g\n", 242 | STRATEGIES_NAMES[j], 243 | wtimings[k][j]*INV_REP, timings[k][j][0]*INV_REP, std_dev*INV_REP, min_timings[k][j]*INV_REP, max_timings[k][j]*INV_REP ); 244 | } 245 | } 246 | 247 | #if defined(DEBUG) 248 | for ( int t = 0; t < nthreads; t++ ) 249 | printf("thread %d has processed %u tasks\n", t, howmanytasks[t] ); 250 | #endif 251 | return 0; 252 | } 253 | 254 | 255 | double heavy_work( int N ) 256 | { 257 | double guess = 3.141572 / 5 * N; 258 | guess = ( guess > 200 ? 111 : guess); 259 | 260 | for( int i = 0; i < N; i++ ) 261 | { 262 | guess = exp( guess ); 263 | guess = sin( guess ); 264 | 265 | } 266 | return guess; 267 | } 268 | -------------------------------------------------------------------------------- /HPC/codes/05_taskgroup_reduction.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | 41 | #define N_default 20000 // how long is the main array 42 | #define min_default 100 // the minimum argument to heavy_work_? functions 43 | #define max_default 20000 // the maximum argument to heavy_work_? functions 44 | #define chunkf_default 10 // the size of the small work chunks 45 | 46 | 47 | #define NANO_PAUSE 100 // the sleeping time when checking for initialization 48 | #define uSEC 1000 // a microsecond 49 | 50 | #if defined(_OPENMP) 51 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 52 | (double)ts.tv_nsec * 1e-9) 53 | 54 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 55 | (double)myts.tv_nsec * 1e-9) 56 | 57 | #else 58 | 59 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 60 | (double)ts.tv_nsec * 1e-9) 61 | #endif 62 | 63 | 64 | #if defined(DEBUG) 65 | #define PRINTF(...) printf(__VA_ARGS__); 66 | #define PRINTFS(...) _Pragma("omp single") \ 67 | printf(__VA_ARGS__); 68 | #else 69 | #define PRINTF(...) 70 | #define PRINTFS(...) 71 | #endif 72 | 73 | typedef unsigned int uint; 74 | double heavy_work_0( uint ); 75 | double heavy_work_1( uint ); 76 | double heavy_work_2( uint ); 77 | 78 | 79 | 80 | int main( int argc, char **argv ) 81 | { 82 | 83 | int N = N_default; 84 | int min_value = min_default; 85 | int max_value = max_default; 86 | int chunkf = chunkf_default; 87 | int chunk = N / chunkf_default; 88 | 89 | struct timespec ts; 90 | 91 | /* ----------------------------------------------------------------------------- 92 | * initialize 93 | * ----------------------------------------------------------------------------- 94 | */ 95 | 96 | // check whether some arg has been passed on 97 | if ( argc > 1 ) 98 | { 99 | N = atoi( *(argv+1) ); 100 | if( argc > 2 ) 101 | { 102 | max_value = atoi( *(argv+2) ); 103 | if( argc > 3 ) 104 | chunkf = atoi( *(argv+3) ); 105 | } 106 | } 107 | 108 | srand48(1234321); 109 | double result = 0; 110 | 111 | int *array = (int*)malloc( N*sizeof(double) ); 112 | 113 | #if !defined(_OPENMP) 114 | 115 | printf("serial summation\n" ); 116 | 117 | double tstart = CPU_TIME; 118 | 119 | // this mimic a stream of number you can not 120 | // initialize in parallel 121 | // note: that also means that data resides 122 | // in one's thread DRAM 123 | // 124 | for( int ii = 0; ii < N; ii++ ) 125 | array[ii] = min_value + lrand48() % max_value; 126 | 127 | #if defined(DEBUG) 128 | double partial_result1, partial_result2, partial_result3 = 0; 129 | for( int ii = 0; ii < N; ii++ ) 130 | partial_result1 += heavy_work_0(array[ii]); 131 | 132 | for( int ii = 0; ii < N; ii++ ) 133 | partial_result2 += heavy_work_1(array[ii]); 134 | 135 | for( int ii = 0; ii < N; ii++ ) 136 | partial_result3 += heavy_work_2(array[ii]); 137 | 138 | result = partial_result1 + partial_result2 + partial_result3; 139 | 140 | double tend = CPU_TIME; 141 | 142 | printf("partial results are: %g %g %g\n", partial_result1, partial_result2, partial_result3 ); 143 | 144 | #else 145 | 146 | for( int ii = 0; ii < N; ii++ ) 147 | result += heavy_work_0(array[ii]) + 148 | heavy_work_1(array[ii]) + heavy_work_2(array[ii]); 149 | 150 | double tend = CPU_TIME; 151 | #endif 152 | 153 | #else 154 | 155 | 156 | 157 | double tstart = CPU_TIME; 158 | 159 | #pragma omp parallel proc_bind(close) 160 | { 161 | 162 | #pragma omp single nowait 163 | { 164 | #pragma omp taskgroup task_reduction(+:result) 165 | { 166 | int idx = 0; 167 | int first = 0; 168 | int last = chunk; 169 | #if defined (MIMIC_SLOWER_INITIALIZATION) 170 | struct timespec nanot = {0, 200*uSEC}; 171 | nanosleep( &nanot, NULL ); 172 | #endif 173 | #if defined(DEBUG) 174 | struct timespec myts; 175 | double tstart = CPU_TIME_th; 176 | int me = omp_get_thread_num(); 177 | #endif 178 | 179 | while( first < N ) 180 | { 181 | last = (last >= N)?N:last; 182 | for( int kk = first; kk < last; kk++, idx++ ) 183 | array[idx] = min_value + lrand48() % max_value; 184 | 185 | PRINTF("* initializer (thread %d) : %g sec, initialized chunk from %d to %d\n", 186 | me, CPU_TIME_th - tstart, first, last); 187 | 188 | #pragma omp task in_reduction(+:result) firstprivate(first, last) untied 189 | { 190 | double myresult = 0; 191 | for( int ii = first; ii < last; ii++) 192 | myresult += heavy_work_0(array[ii]); 193 | result += myresult; 194 | } 195 | #pragma omp task in_reduction(+:result) firstprivate(first, last) untied 196 | { 197 | double myresult = 0; 198 | for( int ii = first; ii < last; ii++) 199 | myresult += heavy_work_1(array[ii]); 200 | result += myresult; 201 | } 202 | #pragma omp task in_reduction(+:result) firstprivate(first, last) untied 203 | { 204 | double myresult = 0; 205 | for( int ii = first; ii < last; ii++) 206 | myresult += heavy_work_2(array[ii]); 207 | result += myresult; 208 | } 209 | 210 | first += chunk; 211 | last += chunk; 212 | 213 | 214 | #if defined (MIMIC_SLOWER_INITIALIZATION) 215 | nanot.tv_nsec = 200*uSEC + lrand48() % 100*uSEC; 216 | nanosleep( &nanot, NULL ); 217 | #endif 218 | 219 | } 220 | } 221 | PRINTF("* initializer thread: initialization lasted %g seconds\n", CPU_TIME_th - tstart ); 222 | } 223 | 224 | #pragma omp taskwait 225 | } // close parallel region 226 | 227 | 228 | double tend = CPU_TIME; 229 | #endif 230 | 231 | 232 | 233 | /* ----------------------------------------------------------------------------- 234 | * finalize 235 | * ----------------------------------------------------------------------------- 236 | */ 237 | 238 | free(array); 239 | 240 | printf("The result is %g\nrun took %g of wall-clock time\n\n", 241 | result, tend - tstart ); 242 | 243 | 244 | return 0; 245 | } 246 | 247 | 248 | 249 | double heavy_work_0( uint N ) 250 | { 251 | double guess = 3.141572 / 3; 252 | 253 | for( int i = 0; i < N; i++ ) 254 | { 255 | guess = exp( guess ); 256 | guess = sin( guess ); 257 | 258 | } 259 | 260 | return guess; 261 | } 262 | 263 | double heavy_work_1( uint N ) 264 | { 265 | double guess = 3.141572 / 3; 266 | 267 | for( int i = 0; i < N; i++ ) 268 | { 269 | guess = log( guess ); 270 | guess = exp( sqrt(guess)/guess ); 271 | } 272 | 273 | return guess; 274 | } 275 | 276 | double heavy_work_2( uint N ) 277 | { 278 | double guess = 3.141572 / 3; 279 | 280 | for( int i = 0; i < N; i++ ) 281 | { 282 | guess = sqrt( guess ); 283 | guess = exp( 1+1.0/guess ); 284 | } 285 | 286 | return guess; 287 | } 288 | -------------------------------------------------------------------------------- /HPC/codes/linked_list.c: -------------------------------------------------------------------------------- 1 | 2 | #if defined(__STDC__) 3 | # if (__STDC_VERSION__ >= 199901L) 4 | # define _XOPEN_SOURCE 700 5 | # endif 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | // ========================================================================= 17 | // 18 | // define useful quantities 19 | // 20 | 21 | typedef unsigned long long ull; 22 | #define TIME_CUT 1000000009 23 | 24 | 25 | #if defined(_OPENMP) 26 | 27 | int me; 28 | #pragma omp threadprivate(me) 29 | 30 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_REALTIME, &ts ), \ 31 | (ull)ts.tv_sec * 1000000000 + \ 32 | (ull)ts.tv_nsec); }) 33 | 34 | #define CPU_TIME_th ({ struct timespec ts; (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), \ 35 | (ull)myts.tv_sec*1000000000 + \ 36 | (ull)myts.tv_nsec); }) 37 | 38 | #else 39 | 40 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), \ 41 | (ull)ts.tv_sec * 1000000000 + \ 42 | (ull)ts.tv_nsec); }) 43 | #endif 44 | 45 | 46 | #if defined(DEBUG) 47 | #define TIMESTAP (CPU_TIME % TIME_CUT) 48 | #define dbgout(...) printf( __VA_ARGS__ ); 49 | #else 50 | #define TIMESTAP 51 | #define dbgout(...) 52 | #endif 53 | 54 | 55 | // 56 | // ========================================================================= 57 | // 58 | // define data structures 59 | // 60 | 61 | #define DONT_USE_TASKYIELD 0 62 | #define USE_TASKYIELD 1 63 | 64 | typedef struct llnode 65 | { 66 | int data; 67 | #if defined(_OPENMP) 68 | omp_lock_t lock; 69 | #endif 70 | 71 | struct llnode *next; 72 | struct llnode *prev; 73 | } llnode_t; 74 | 75 | // 76 | // ========================================================================= 77 | // 78 | // declare data structures 79 | // 80 | 81 | int clashes; 82 | 83 | // 84 | // ========================================================================= 85 | // 86 | // prototypes 87 | // 88 | 89 | llnode_t* get_head ( llnode_t *); 90 | int walk ( llnode_t *); 91 | int delete ( llnode_t * ); 92 | int find ( llnode_t *, int, llnode_t **, llnode_t ** ); 93 | int find_and_insert ( llnode_t *, int ); 94 | 95 | #if defined(_OPENMP) 96 | int find_and_insert_parallel ( llnode_t *, int, int ); 97 | #endif 98 | 99 | // 100 | // ========================================================================= 101 | // ========================================================================= 102 | 103 | 104 | // ······················································ 105 | 106 | llnode_t *get_head ( llnode_t *start ) 107 | /* 108 | * walk the list basck to find the list head 109 | * returns the head 110 | */ 111 | { 112 | while( start->prev != NULL ) 113 | start = start->prev; 114 | 115 | return start; 116 | } 117 | 118 | // ······················································ 119 | 120 | int walk ( llnode_t *start ) 121 | /* 122 | * walk the list starting from the node start 123 | * as first, the list is walked back until the list head 124 | * if mode == 1, the list is then walked ahed printing 125 | * the first 100 nodes. 126 | */ 127 | { 128 | int n = 0; 129 | if ( start != NULL ) 130 | { 131 | n = 1; 132 | int prev_value = start->data; 133 | printf("%9d [-]", start->data ); 134 | start = start->next; 135 | while( start != NULL) 136 | { 137 | if (++n < 100 ) 138 | printf( "%9d %s ", 139 | start->data, 140 | (start->data < prev_value? "[!]":"[ok]") ); 141 | else if ( n == 100) 142 | printf( "..." ); 143 | prev_value = start->data; 144 | start = start->next; 145 | } 146 | } 147 | printf("\n"); 148 | return n; 149 | } 150 | 151 | 152 | // ······················································ 153 | 154 | int delete ( llnode_t *head ) 155 | /* 156 | * delete all the nodes 157 | * destroy every lock 158 | */ 159 | { 160 | while ( head != NULL ) 161 | { 162 | llnode_t *prev = head; 163 | head = head->next; 164 | #if defined(_OPENMP) 165 | omp_destroy_lock( &(prev->lock) ); 166 | #endif 167 | free( prev ); 168 | } 169 | return 0; 170 | } 171 | 172 | 173 | // ······················································ 174 | 175 | int find ( llnode_t *head, int value, llnode_t **prev, llnode_t **next ) 176 | { 177 | *prev = NULL, *next = NULL; 178 | 179 | if ( head == NULL ) 180 | // The first node must exist in this simple 181 | // implementation. 182 | // To improve that, pass **head instead 183 | // of *head 184 | return -1; 185 | 186 | int nsteps = 0; 187 | llnode_t *ptr = NULL; 188 | 189 | if ( head-> data > value ) 190 | { 191 | // we need to walk back 192 | // 193 | ptr = head->prev; 194 | *next = head; 195 | while ( (ptr != NULL) && (ptr->data > value) ) 196 | { 197 | *next = ptr; 198 | ptr = ptr->prev; 199 | nsteps++; 200 | } 201 | *prev = ptr; 202 | } 203 | else 204 | { 205 | // we need to walk ahead 206 | // 207 | ptr = head->next; 208 | *prev = head; 209 | while ( (ptr != NULL) && (ptr->data < value) ) 210 | { 211 | *prev = ptr; 212 | ptr = ptr->next; 213 | nsteps++; 214 | } 215 | *next = ptr; 216 | } 217 | 218 | return nsteps; 219 | } 220 | 221 | 222 | int find_and_insert( llnode_t *head, int value ) 223 | { 224 | if ( head == NULL ) 225 | // The first node must exist in this simple 226 | // implementation. 227 | // To improve that, pass **head instead 228 | // of *head 229 | return -1; 230 | 231 | llnode_t *prev = NULL, *next = NULL; 232 | 233 | find ( head, value, &prev, &next ); 234 | 235 | llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) ); 236 | if ( new == NULL ) 237 | // signals a problem in mem alloc 238 | return -2; 239 | 240 | new->data = value; 241 | new->prev = prev; 242 | new->next = next; 243 | if( prev != NULL ) 244 | prev->next = new; 245 | if( next != NULL ) 246 | next->prev = new; 247 | 248 | return 0; 249 | } 250 | 251 | 252 | 253 | #if defined(_OPENMP) 254 | 255 | 256 | // ······················································ 257 | 258 | 259 | int find_and_insert_parallel( llnode_t *head, int value, int use_taskyield ) 260 | { 261 | if ( head == NULL ) 262 | return -1; 263 | 264 | llnode_t *prev = NULL, *next = NULL; 265 | 266 | dbgout("[ %llu ] > T %d process value %d\n", TIMESTAMP, me, value ); 267 | 268 | find ( head, value, &prev, &next ); 269 | 270 | dbgout("[ %llu ] T %d V %d found p: %d and n: %d\n", TIMESTAMP, me, value, 271 | prev!=NULL?prev->data:-1, next!=NULL?next->data:-1); 272 | 273 | // to our best knowledge, ptr is the first node with data > value 274 | // and prev is the last node with data < value 275 | // then, we should create a new node between prev and ptr 276 | 277 | // acquire the lock of prev and next 278 | // 279 | 280 | int locks_acquired = 0; 281 | while( !locks_acquired ) 282 | { 283 | if( prev != NULL ) 284 | { 285 | if ( use_taskyield ) { 286 | while ( omp_test_lock(&(prev->lock)) == 0 ) { 287 | #pragma omp taskyield 288 | } } 289 | else 290 | omp_set_lock(&(prev->lock)); 291 | 292 | locks_acquired = 1; 293 | } 294 | 295 | if ( next != NULL ) 296 | { 297 | locks_acquired = omp_test_lock(&(next->lock)); 298 | if( !locks_acquired && (prev!=NULL) ) 299 | omp_unset_lock(&(prev->lock)); 300 | if ( use_taskyield ) { 301 | #pragma omp taskyield 302 | } 303 | } 304 | } 305 | 306 | 307 | dbgout("[ %llu ] T %d V %d locked: (p: %d p>n: %d) (n: %d ndata:-1),((prev!=NULL)&&(prev->next!=NULL)?(prev->next)->data:-1), 310 | (next!=NULL?next->data:-1),((next!=NULL)&&(next->prev!=NULL)?(next->prev)->data:-1) ); 311 | 312 | // meanwhile, did somebody already insert a node between prev and next? 313 | if( ( (prev != NULL) && (prev-> next != next) ) || 314 | ( (next != NULL) && (next-> prev != prev) ) ) 315 | { 316 | // yes, that happened 317 | // let's keep track of how many clashes 318 | // 319 | #pragma omp atomic update 320 | clashes++; 321 | 322 | if( (prev != NULL) && (prev-> next != next) ) 323 | { 324 | // the next pointer has changed 325 | // prev is not null, so that is our still valid point 326 | // we'll walk ahead from there 327 | // 328 | dbgout("[ %llu ]\t>> T %d V %d next has changed: from %d to %d\n", 329 | TIMESTAMP, me, value, 330 | (next!=NULL?next->data:-1),(prev->next!=NULL?(prev->next)->data:-1) ); 331 | 332 | if (next != NULL) 333 | // free the lock on the old next 334 | omp_unset_lock(&(next->lock)); 335 | 336 | dbgout("[ %llu ]\t\t>>> T %d V %d restart from %d to walk ahead\n", 337 | TIMESTAMP, me, value, prev->data); 338 | 339 | // search again, while always keeping prev locked 340 | next = prev->next; 341 | while(next) 342 | { 343 | dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n", 344 | TIMESTAMP, me, value, next->data ); 345 | omp_set_lock(&(next->lock)); 346 | 347 | if( next->data >= value ) 348 | break; 349 | omp_unset_lock(&(prev->lock)); 350 | prev = next; 351 | next = next->next; 352 | } 353 | } 354 | 355 | else if ( next->prev != prev ) 356 | // note that next can not be NULL 357 | { 358 | // the prev pointer has changed 359 | // next is not null, so that is our still valid point 360 | // we walk back from there 361 | // 362 | dbgout("[ %llu ]\t>> T %d V %d prev has changed: from %d to %d\n", 363 | TIMESTAMP, me, value, 364 | (prev!=NULL?prev->data:-1),(next->prev!=NULL?(next->prev)->data:-1) ); 365 | 366 | if (prev != NULL) 367 | // free the lock on the old next 368 | omp_unset_lock(&(prev->lock)); 369 | 370 | dbgout("[ %llu ]\t\t>> T %d V %d restart from %d to walk back\n", 371 | TIMESTAMP, me, value, next->data); 372 | 373 | // search again, while always keeping prev locked 374 | prev = next->prev; 375 | while(prev) 376 | { 377 | dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n", 378 | TIMESTAMP, me, value, prev->data); 379 | omp_set_lock(&(prev->lock)); 380 | if( prev->data <= value ) 381 | break; 382 | omp_unset_lock(&(next->lock)); 383 | next = prev; 384 | prev = prev->prev; 385 | } 386 | } 387 | else if ( next == NULL ) 388 | { 389 | printf("Some serious error occurred, a prev = next = NULL situation arose!\n"); 390 | return -3; 391 | } 392 | } 393 | 394 | // 395 | // insertion code 396 | // 397 | 398 | llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) ); 399 | if ( new == NULL ) 400 | return -2; 401 | 402 | new->data = value; 403 | new->prev = prev; 404 | new->next = next; 405 | omp_init_lock( &(new->lock) ); 406 | if ( prev != NULL ) 407 | prev->next = new; 408 | if ( next != NULL) 409 | next->prev = new; 410 | 411 | // release locks 412 | // 413 | if ( prev != NULL ) { 414 | omp_unset_lock(&(prev->lock)); 415 | dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n", 416 | TIMESTAMP, me, value, prev->data);} 417 | 418 | if( next != NULL ) { 419 | omp_unset_lock(&(next->lock)); 420 | dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n", 421 | TIMESTAMP, me, value, next->data);} 422 | 423 | dbgout("T %d V %d has done\n", me, value); 424 | return 0; 425 | } 426 | 427 | #endif 428 | 429 | 430 | // ······················································ 431 | 432 | int main ( int argc, char **argv ) 433 | { 434 | int N, mode; 435 | 436 | { 437 | int a = 1; 438 | N = ( argc > 1 ? atoi(*(argv+a++)) : 1000000 ); 439 | #if defined(_OPENMP) 440 | mode = ( argc > a ? atoi(*(argv+a++)) : DONT_USE_TASKYIELD ); 441 | #endif 442 | int seed = ( argc > a ? atoi(*(argv+a++)) : 98765 ); 443 | 444 | srand( seed ); 445 | } 446 | 447 | 448 | llnode_t *head = (llnode_t*)malloc(sizeof(llnode_t)); 449 | head->data = rand(); 450 | head->prev = NULL; 451 | head->next = NULL; 452 | #if defined(_OPENMP) 453 | omp_init_lock( &(head->lock) ); 454 | #endif 455 | 456 | ull timing = CPU_TIME; 457 | 458 | #if !defined(_OPENMP) 459 | 460 | int n = 1; 461 | while ( n < N ) 462 | { 463 | int new_value = rand(); 464 | int ret = find_and_insert( head, new_value ); 465 | if ( ret < 0 ) 466 | { 467 | printf("I've got a problem inserting node %d\n", n); 468 | delete( head ); 469 | } 470 | n++; 471 | } 472 | 473 | #else 474 | 475 | #pragma omp parallel 476 | { 477 | me = omp_get_thread_num(); 478 | #pragma omp single 479 | { 480 | printf("running with %d threads\n", omp_get_num_threads()); 481 | int n = 1; 482 | 483 | while ( n < N ) 484 | { 485 | int new_value = rand(); 486 | 487 | #pragma omp task 488 | find_and_insert_parallel( head, new_value, mode ); 489 | 490 | n++; 491 | } 492 | } 493 | } 494 | 495 | #endif 496 | 497 | timing = CPU_TIME - timing; 498 | 499 | head = get_head( head ); 500 | 501 | int actual_nodes = walk( head); 502 | if ( actual_nodes != N ) 503 | printf("shame on me! %d nodes instaed of %d have been found!", 504 | actual_nodes, N); 505 | 506 | delete ( head ); 507 | 508 | char string[23] = {0}; 509 | #if defined(_OPENMP) 510 | sprintf( string, " with %d clashes", clashes); 511 | #endif 512 | printf("generation took %g seconds (wtime) %s\n", ((double)timing/1e9), string); 513 | 514 | 515 | return 0; 516 | } 517 | -------------------------------------------------------------------------------- /HPC/codes/linked_list.deadlock.c: -------------------------------------------------------------------------------- 1 | 2 | #if defined(__STDC__) 3 | # if (__STDC_VERSION__ >= 199901L) 4 | # define _XOPEN_SOURCE 700 5 | # endif 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | // ========================================================================= 17 | // 18 | // define useful quantities 19 | // 20 | 21 | typedef unsigned long long ull; 22 | #define TIME_CUT 1000000009 23 | 24 | 25 | #if defined(_OPENMP) 26 | 27 | int me; 28 | #pragma omp threadprivate(me) 29 | 30 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_REALTIME, &ts ), \ 31 | (ull)ts.tv_sec * 1000000000 + \ 32 | (ull)ts.tv_nsec); }) 33 | 34 | #define CPU_TIME_th ({ struct timespec ts; (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), \ 35 | (ull)myts.tv_sec*1000000000 + \ 36 | (ull)myts.tv_nsec); }) 37 | 38 | #else 39 | 40 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), \ 41 | (ull)ts.tv_sec * 1000000000 + \ 42 | (ull)ts.tv_nsec); }) 43 | #endif 44 | 45 | 46 | #if defined(DEBUG) 47 | #define TIMESTAP (CPU_TIME % TIME_CUT) 48 | #define dbgout(...) printf( __VA_ARGS__ ); 49 | #else 50 | #define TIMESTAP 51 | #define dbgout(...) 52 | #endif 53 | 54 | 55 | // 56 | // ========================================================================= 57 | // 58 | // define data structures 59 | // 60 | 61 | #define DONT_USE_TASKYIELD 0 62 | #define USE_TASKYIELD 1 63 | 64 | typedef struct llnode 65 | { 66 | int data; 67 | #if defined(_OPENMP) 68 | omp_lock_t lock; 69 | #endif 70 | 71 | struct llnode *next; 72 | struct llnode *prev; 73 | } llnode_t; 74 | 75 | // 76 | // ========================================================================= 77 | // 78 | // declare data structures 79 | // 80 | 81 | int clashes; 82 | 83 | // 84 | // ========================================================================= 85 | // 86 | // prototypes 87 | // 88 | 89 | llnode_t* get_head ( llnode_t *); 90 | int walk ( llnode_t *); 91 | int delete ( llnode_t * ); 92 | int find ( llnode_t *, int, llnode_t **, llnode_t ** ); 93 | int find_and_insert ( llnode_t *, int ); 94 | 95 | #if defined(_OPENMP) 96 | int find_and_insert_parallel ( llnode_t *, int, int ); 97 | #endif 98 | 99 | // 100 | // ========================================================================= 101 | // ========================================================================= 102 | 103 | 104 | // ······················································ 105 | 106 | llnode_t *get_head ( llnode_t *start ) 107 | /* 108 | * walk the list basck to find the list head 109 | * returns the head 110 | */ 111 | { 112 | while( start->prev != NULL ) 113 | start = start->prev; 114 | 115 | return start; 116 | } 117 | 118 | // ······················································ 119 | 120 | int walk ( llnode_t *start ) 121 | /* 122 | * walk the list starting from the node start 123 | * as first, the list is walked back until the list head 124 | * if mode == 1, the list is then walked ahed printing 125 | * the first 100 nodes. 126 | */ 127 | { 128 | int n = 0; 129 | if ( start != NULL ) 130 | { 131 | n = 1; 132 | int prev_value = start->data; 133 | printf("%9d [-]", start->data ); 134 | start = start->next; 135 | while( start != NULL) 136 | { 137 | if (++n < 100 ) 138 | printf( "%9d %s ", 139 | start->data, 140 | (start->data < prev_value? "[!]":"[ok]") ); 141 | else if ( n == 100) 142 | printf( "..." ); 143 | prev_value = start->data; 144 | start = start->next; 145 | } 146 | } 147 | printf("\n"); 148 | return n; 149 | } 150 | 151 | 152 | // ······················································ 153 | 154 | int delete ( llnode_t *head ) 155 | /* 156 | * delete all the nodes 157 | * destroy every lock 158 | */ 159 | { 160 | while ( head != NULL ) 161 | { 162 | llnode_t *prev = head; 163 | head = head->next; 164 | #if defined(_OPENMP) 165 | omp_destroy_lock( &(prev->lock) ); 166 | #endif 167 | free( prev ); 168 | } 169 | return 0; 170 | } 171 | 172 | 173 | // ······················································ 174 | 175 | int find ( llnode_t *head, int value, llnode_t **prev, llnode_t **next ) 176 | { 177 | *prev = NULL, *next = NULL; 178 | 179 | if ( head == NULL ) 180 | // The first node must exist in this simple 181 | // implementation. 182 | // To improve that, pass **head instead 183 | // of *head 184 | return -1; 185 | 186 | int nsteps = 0; 187 | llnode_t *ptr = NULL; 188 | 189 | if ( head-> data > value ) 190 | { 191 | // we need to walk back 192 | // 193 | ptr = head->prev; 194 | *next = head; 195 | while ( (ptr != NULL) && (ptr->data > value) ) 196 | { 197 | *next = ptr; 198 | ptr = ptr->prev; 199 | nsteps++; 200 | } 201 | *prev = ptr; 202 | } 203 | else 204 | { 205 | // we need to walk ahead 206 | // 207 | ptr = head->next; 208 | *prev = head; 209 | while ( (ptr != NULL) && (ptr->data < value) ) 210 | { 211 | *prev = ptr; 212 | ptr = ptr->next; 213 | nsteps++; 214 | } 215 | *next = ptr; 216 | } 217 | 218 | return nsteps; 219 | } 220 | 221 | 222 | int find_and_insert( llnode_t *head, int value ) 223 | { 224 | if ( head == NULL ) 225 | // The first node must exist in this simple 226 | // implementation. 227 | // To improve that, pass **head instead 228 | // of *head 229 | return -1; 230 | 231 | llnode_t *prev = NULL, *next = NULL; 232 | 233 | find ( head, value, &prev, &next ); 234 | 235 | llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) ); 236 | if ( new == NULL ) 237 | // signals a problem in mem alloc 238 | return -2; 239 | 240 | new->data = value; 241 | new->prev = prev; 242 | new->next = next; 243 | if( prev != NULL ) 244 | prev->next = new; 245 | if( next != NULL ) 246 | next->prev = new; 247 | 248 | return 0; 249 | } 250 | 251 | 252 | 253 | #if defined(_OPENMP) 254 | 255 | 256 | // ······················································ 257 | 258 | 259 | int find_and_insert_parallel( llnode_t *head, int value, int use_taskyield ) 260 | { 261 | if ( head == NULL ) 262 | return -1; 263 | 264 | llnode_t *prev = NULL, *next = NULL; 265 | 266 | dbgout("[ %llu ] > T %d process value %d\n", TIMESTAMP, me, value ); 267 | 268 | find ( head, value, &prev, &next ); 269 | 270 | dbgout("[ %llu ] T %d V %d found p: %d and n: %d\n", TIMESTAMP, me, value, 271 | prev!=NULL?prev->data:-1, next!=NULL?next->data:-1); 272 | 273 | // to our best knowledge, ptr is the first node with data > value 274 | // and prev is the last node with data < value 275 | // then, we should create a new node between prev and ptr 276 | 277 | // acquire the lock of prev and next 278 | // 279 | if ( use_taskyield ) 280 | { 281 | if ( prev != NULL ) 282 | while ( omp_test_lock(&(prev->lock)) == 0 ) { 283 | #pragma omp taskyield 284 | } 285 | prev->owner=me; 286 | if ( next != NULL ) 287 | while ( omp_test_lock(&(next->lock)) == 0 ) { 288 | #pragma omp taskyield 289 | } 290 | } 291 | else 292 | { 293 | if( prev != NULL ) 294 | omp_set_lock(&(prev->lock)); 295 | 296 | if( next != NULL ) 297 | omp_set_lock(&(next->lock)); 298 | } 299 | 300 | 301 | dbgout("[ %llu ] T %d V %d locked: (p: %d p>n: %d) (n: %d ndata:-1),((prev!=NULL)&&(prev->next!=NULL)?(prev->next)->data:-1), 304 | (next!=NULL?next->data:-1),((next!=NULL)&&(next->prev!=NULL)?(next->prev)->data:-1) ); 305 | 306 | // meanwhile, did somebody already insert a node between prev and next? 307 | if( ( (prev != NULL) && (prev-> next != next) ) || 308 | ( (next != NULL) && (next-> prev != prev) ) ) 309 | { 310 | // yes, that happened 311 | // let's keep track of how many clashes 312 | // 313 | #pragma omp atomic update 314 | clashes++; 315 | 316 | if( (prev != NULL) && (prev-> next != next) ) 317 | { 318 | // the next pointer has changed 319 | // prev is not null, so that is our still valid point 320 | // we'll walk ahead from there 321 | // 322 | dbgout("[ %llu ]\t>> T %d V %d next has changed: from %d to %d\n", 323 | TIMESTAMP, me, value, 324 | (next!=NULL?next->data:-1),(prev->next!=NULL?(prev->next)->data:-1) ); 325 | 326 | if (next != NULL) 327 | // free the lock on the old next 328 | omp_unset_lock(&(next->lock)); 329 | 330 | dbgout("[ %llu ]\t\t>>> T %d V %d restart from %d to walk ahead\n", 331 | TIMESTAMP, me, value, prev->data); 332 | 333 | // search again, while always keeping prev locked 334 | next = prev->next; 335 | while(next) 336 | { 337 | dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n", 338 | TIMESTAMP, me, value, next->data ); 339 | omp_set_lock(&(next->lock)); 340 | 341 | if( next->data >= value ) 342 | break; 343 | omp_unset_lock(&(prev->lock)); 344 | prev = next; 345 | next = next->next; 346 | } 347 | } 348 | 349 | else if ( next->prev != prev ) 350 | // note that next can not be NULL 351 | { 352 | // the prev pointer has changed 353 | // next is not null, so that is our still valid point 354 | // we walk back from there 355 | // 356 | dbgout("[ %llu ]\t>> T %d V %d prev has changed: from %d to %d\n", 357 | TIMESTAMP, me, value, 358 | (prev!=NULL?prev->data:-1),(next->prev!=NULL?(next->prev)->data:-1) ); 359 | 360 | if (prev != NULL) 361 | // free the lock on the old next 362 | omp_unset_lock(&(prev->lock)); 363 | 364 | dbgout("[ %llu ]\t\t>> T %d V %d restart from %d to walk back\n", 365 | TIMESTAMP, me, value, next->data); 366 | 367 | // search again, while always keeping prev locked 368 | prev = next->prev; 369 | while(prev) 370 | { 371 | dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n", 372 | TIMESTAMP, me, value, prev->data); 373 | omp_set_lock(&(prev->lock)); 374 | if( prev->data <= value ) 375 | break; 376 | omp_unset_lock(&(next->lock)); 377 | next = prev; 378 | prev = prev->prev; 379 | } 380 | } 381 | else if ( next == NULL ) 382 | { 383 | printf("Some serious error occurred, a prev = next = NULL situation arose!\n"); 384 | return -3; 385 | } 386 | } 387 | 388 | // 389 | // insertion code 390 | // 391 | 392 | llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) ); 393 | if ( new == NULL ) 394 | return -2; 395 | 396 | new->data = value; 397 | new->prev = prev; 398 | new->next = next; 399 | omp_init_lock( &(new->lock) ); 400 | if ( prev != NULL ) 401 | prev->next = new; 402 | if ( next != NULL) 403 | next->prev = new; 404 | 405 | // release locks 406 | // 407 | if ( prev != NULL ) { 408 | omp_unset_lock(&(prev->lock)); 409 | dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n", 410 | TIMESTAMP, me, value, prev->data);} 411 | 412 | if( next != NULL ) { 413 | omp_unset_lock(&(next->lock)); 414 | dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n", 415 | TIMESTAMP, me, value, next->data);} 416 | 417 | dbgout("T %d V %d has done\n", me, value); 418 | return 0; 419 | } 420 | 421 | #endif 422 | 423 | 424 | // ······················································ 425 | 426 | int main ( int argc, char **argv ) 427 | { 428 | int N, mode; 429 | 430 | { 431 | int a = 1; 432 | N = ( argc > 1 ? atoi(*(argv+a++)) : 1000000 ); 433 | #if defined(_OPENMP) 434 | mode = ( argc > a ? atoi(*(argv+a++)) : DONT_USE_TASKYIELD ); 435 | #endif 436 | int seed = ( argc > a ? atoi(*(argv+a++)) : 98765 ); 437 | 438 | srand( seed ); 439 | } 440 | 441 | 442 | llnode_t *head = (llnode_t*)malloc(sizeof(llnode_t)); 443 | head->data = rand(); 444 | head->prev = NULL; 445 | head->next = NULL; 446 | #if defined(_OPENMP) 447 | omp_init_lock( &(head->lock) ); 448 | #endif 449 | 450 | ull timing = CPU_TIME; 451 | 452 | #if !defined(_OPENMP) 453 | 454 | int n = 1; 455 | while ( n < N ) 456 | { 457 | int new_value = rand(); 458 | int ret = find_and_insert( head, new_value ); 459 | if ( ret < 0 ) 460 | { 461 | printf("I've got a problem inserting node %d\n", n); 462 | delete( head ); 463 | } 464 | n++; 465 | } 466 | 467 | #else 468 | 469 | #pragma omp parallel 470 | { 471 | me = omp_get_thread_num(); 472 | #pragma omp single 473 | { 474 | printf("running with %d threads\n", omp_get_num_threads()); 475 | int n = 1; 476 | 477 | while ( n < N ) 478 | { 479 | int new_value = rand(); 480 | 481 | #pragma omp task 482 | find_and_insert_parallel( head, new_value, mode ); 483 | 484 | n++; 485 | } 486 | } 487 | } 488 | 489 | #endif 490 | 491 | timing = CPU_TIME - timing; 492 | 493 | head = get_head( head ); 494 | 495 | int actual_nodes = walk( head); 496 | if ( actual_nodes != N ) 497 | printf("shame on me! %d nodes instaed of %d have been found!", 498 | actual_nodes, N); 499 | 500 | delete ( head ); 501 | 502 | char string[23] = {0}; 503 | #if defined(_OPENMP) 504 | sprintf( string, " with %d clashes", clashes); 505 | #endif 506 | printf("generation took %g seconds (wtime) %s\n", ((double)timing/1e9), string); 507 | 508 | 509 | return 0; 510 | } 511 | -------------------------------------------------------------------------------- /HPC/codes/quicksort.v0.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | 42 | 43 | #if defined(_OPENMP) 44 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 45 | (double)ts.tv_nsec * 1e-9) 46 | 47 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 48 | (double)myts.tv_nsec * 1e-9) 49 | 50 | #else 51 | 52 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 53 | (double)ts.tv_nsec * 1e-9) 54 | #endif 55 | 56 | #if defined(DEBUG) 57 | #define VERBOSE 58 | #endif 59 | 60 | #if defined(VERBOSE) 61 | #define PRINTF(...) printf(__VA_ARGS__) 62 | #else 63 | #define PRINTF(...) 64 | #endif 65 | 66 | 67 | 68 | 69 | #define MAX( a, b ) ( (a)->data[HOT] >(b)->data[HOT]? (a) : (b) ); 70 | #define MIN( a, b ) ( (a)->data[HOT] <(b)->data[HOT]? (a) : (b) ); 71 | 72 | #if !defined(DATA_SIZE) 73 | #define DATA_SIZE 8 74 | #endif 75 | #define HOT 0 76 | 77 | #if (!defined(DEBUG) || defined(_OPENMP)) 78 | #define N_dflt 100000 79 | #else 80 | #define N_dflt 10000 81 | #endif 82 | 83 | typedef struct 84 | { 85 | double data[DATA_SIZE]; 86 | } data_t; 87 | 88 | typedef int (compare_t)(const void*, const void*); 89 | typedef int (verify_t)(data_t *, int, int, int); 90 | 91 | extern inline compare_t compare; 92 | extern inline compare_t compare_ge; 93 | verify_t verify_partitioning; 94 | verify_t verify_sorting; 95 | verify_t show_array; 96 | 97 | extern inline int partitioning( data_t *, int, int, compare_t ); 98 | void pqsort( data_t *, int, int, compare_t ); 99 | 100 | 101 | 102 | int main ( int argc, char **argv ) 103 | { 104 | 105 | 106 | // --------------------------------------------- 107 | // get the arguments 108 | // 109 | 110 | 111 | int N = N_dflt; 112 | 113 | /* check command-line arguments */ 114 | { 115 | int a = 0; 116 | 117 | if ( argc > ++a ) N = atoi(*(argv+a)); 118 | } 119 | 120 | // --------------------------------------------- 121 | // generate the array 122 | // 123 | 124 | data_t *data = (data_t*)malloc(N*sizeof(data_t)); 125 | long int seed; 126 | #if defined(_OPENMP) 127 | #pragma omp parallel 128 | { 129 | int me = omp_get_thread_num(); 130 | short int seed = time(NULL) % ( (1 << sizeof(short int))-1 ); 131 | short int seeds[3] = {seed-me, seed+me, seed+me*2}; 132 | 133 | #pragma omp for 134 | for ( int i = 0; i < N; i++ ) 135 | data[i].data[HOT] = erand48( seeds ); 136 | } 137 | #else 138 | { 139 | seed = time(NULL); 140 | srand48(seed); 141 | 142 | PRINTF("ssed is % ld\n", seed); 143 | 144 | for ( int i = 0; i < N; i++ ) 145 | data[i].data[HOT] = drand48(); 146 | } 147 | #endif 148 | 149 | 150 | // --------------------------------------------- 151 | // process 152 | // 153 | struct timespec ts; 154 | int nthreads = 1; 155 | double tstart = CPU_TIME; 156 | 157 | #if defined(_OPENMP) 158 | 159 | #pragma omp parallel 160 | { 161 | #pragma omp single 162 | { 163 | nthreads = omp_get_num_threads(); 164 | pqsort( data, 0, N, compare_ge ); 165 | } 166 | } 167 | 168 | #else 169 | 170 | pqsort( data, 0, N, compare_ge ); 171 | #endif 172 | 173 | double tend = CPU_TIME; 174 | 175 | // --------------------------------------------- 176 | // release the memory and stop 177 | // 178 | 179 | if ( verify_sorting( data, 0, N, 0) ) 180 | printf("%d\t%g sec\n", nthreads, tend-tstart); 181 | else 182 | printf("the array is not sorted correctly\n"); 183 | 184 | free( data ); 185 | 186 | return 0; 187 | } 188 | 189 | 190 | #define SWAP(A,B,SIZE) do {int sz = (SIZE); char *a = (A); char *b = (B); \ 191 | do { char _temp = *a;*a++ = *b;*b++ = _temp;} while (--sz);} while (0) 192 | 193 | inline int partitioning( data_t *data, int start, int end, compare_t cmp_ge ) 194 | { 195 | 196 | // pick up the meadian of [0], [mid] and [end] as pivot 197 | // 198 | /* to be done */ 199 | 200 | // pick up the last element as pivot 201 | // 202 | --end; 203 | void *pivot = (void*)&data[end]; 204 | 205 | int pointbreak = end-1; 206 | for ( int i = start; i <= pointbreak; i++ ) 207 | if( cmp_ge( (void*)&data[i], pivot ) ) 208 | { 209 | while( (pointbreak > i) && cmp_ge( (void*)&data[pointbreak], pivot ) ) pointbreak--; 210 | if (pointbreak > i ) 211 | SWAP( (void*)&data[i], (void*)&data[pointbreak--], sizeof(data_t) ); 212 | } 213 | pointbreak += !cmp_ge( (void*)&data[pointbreak], pivot ) ; 214 | SWAP( (void*)&data[pointbreak], pivot, sizeof(data_t) ); 215 | 216 | return pointbreak; 217 | } 218 | 219 | 220 | void pqsort( data_t *data, int start, int end, compare_t cmp_ge ) 221 | { 222 | 223 | #if defined(DEBUG) 224 | #define CHECK { \ 225 | if ( verify_partitioning( data, start, end, mid ) ) { \ 226 | printf( "partitioning is wrong\n"); \ 227 | printf("%4d, %4d (%4d, %g) -> %4d, %4d + %4d, %4d\n", \ 228 | start, end, mid, data[mid].data[HOT],start, mid, mid+1, end); \ 229 | show_array( data, start, end, 0 ); }} 230 | #else 231 | #define CHECK 232 | #endif 233 | 234 | int size = end-start; 235 | if ( size > 2 ) 236 | { 237 | int mid = partitioning( data, start, end, cmp_ge ); 238 | 239 | CHECK; 240 | 241 | #pragma omp task shared(data) firstprivate(start, mid) 242 | pqsort( data, start, mid, cmp_ge ); 243 | #pragma omp task shared(data) firstprivate(mid, end) // note: this may not be a task 244 | pqsort( data, mid+1, end , cmp_ge ); 245 | } 246 | else 247 | { 248 | if ( (size == 2) && cmp_ge ( (void*)&data[start], (void*)&data[end-1] ) ) 249 | SWAP( (void*)&data[start], (void*)&data[end-1], sizeof(data_t) ); 250 | } 251 | } 252 | 253 | 254 | 255 | 256 | 257 | int verify_sorting( data_t *data, int start, int end, int not_used ) 258 | { 259 | int i = start; 260 | while( (++i < end) && (data[i].data[HOT] >= data[i-1].data[HOT]) ); 261 | return ( i == end ); 262 | } 263 | 264 | int verify_partitioning( data_t *data, int start, int end, int mid ) 265 | { 266 | int failure = 0; 267 | int fail = 0; 268 | 269 | for( int i = start; i < mid; i++ ) 270 | if ( compare( (void*)&data[i], (void*)&data[mid] ) >= 0 ) 271 | fail++; 272 | 273 | failure += fail; 274 | if ( fail ) 275 | { 276 | printf("failure in first half\n"); 277 | fail = 0; 278 | } 279 | 280 | for( int i = mid+1; i < end; i++ ) 281 | if ( compare( (void*)&data[i], (void*)&data[mid] ) < 0 ) 282 | fail++; 283 | 284 | failure += fail; 285 | if ( fail ) 286 | printf("failure in second half\n"); 287 | 288 | return failure; 289 | } 290 | 291 | 292 | int show_array( data_t *data, int start, int end, int not_used ) 293 | { 294 | for ( int i = start; i < end; i++ ) 295 | printf( "%f ", data[i].data[HOT] ); 296 | printf("\n"); 297 | return 0; 298 | } 299 | 300 | 301 | inline int compare( const void *A, const void *B ) 302 | { 303 | data_t *a = (data_t*)A; 304 | data_t *b = (data_t*)B; 305 | 306 | double diff = a->data[HOT] - b->data[HOT]; 307 | return ( (diff > 0) - (diff < 0) ); 308 | } 309 | 310 | inline int compare_ge( const void *A, const void *B ) 311 | { 312 | data_t *a = (data_t*)A; 313 | data_t *b = (data_t*)B; 314 | 315 | return (a->data[HOT] >= b->data[HOT]); 316 | } 317 | -------------------------------------------------------------------------------- /HPC/codes/quicksort.v1.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | 42 | 43 | #if defined(_OPENMP) 44 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 45 | (double)ts.tv_nsec * 1e-9) 46 | 47 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 48 | (double)myts.tv_nsec * 1e-9) 49 | 50 | #else 51 | 52 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 53 | (double)ts.tv_nsec * 1e-9) 54 | #endif 55 | 56 | #if defined(DEBUG) 57 | #define VERBOSE 58 | #endif 59 | 60 | #if defined(VERBOSE) 61 | #define PRINTF(...) printf(__VA_ARGS__) 62 | #else 63 | #define PRINTF(...) 64 | #endif 65 | 66 | 67 | 68 | 69 | #define MAX( a, b ) ( (a)->data[HOT] >(b)->data[HOT]? (a) : (b) ); 70 | #define MIN( a, b ) ( (a)->data[HOT] <(b)->data[HOT]? (a) : (b) ); 71 | 72 | #if !defined(DATA_SIZE) 73 | #define DATA_SIZE 8 74 | #endif 75 | #define HOT 0 76 | 77 | #if (!defined(DEBUG) || defined(_OPENMP)) 78 | #define N_dflt 100000 79 | #else 80 | #define N_dflt 10000 81 | #endif 82 | 83 | typedef struct 84 | { 85 | double data[DATA_SIZE]; 86 | } data_t; 87 | 88 | typedef int (compare_t)(const void*, const void*); 89 | typedef int (verify_t)(data_t *, int, int, int); 90 | 91 | extern inline compare_t compare; 92 | extern inline compare_t compare_ge; 93 | verify_t verify_partitioning; 94 | verify_t verify_sorting; 95 | verify_t show_array; 96 | 97 | extern inline int partitioning( data_t *, int, int, compare_t ); 98 | void pqsort( data_t *, int, int, compare_t ); 99 | 100 | 101 | 102 | int main ( int argc, char **argv ) 103 | { 104 | 105 | 106 | // --------------------------------------------- 107 | // get the arguments 108 | // 109 | 110 | 111 | int N = N_dflt; 112 | 113 | /* check command-line arguments */ 114 | { 115 | int a = 0; 116 | 117 | if ( argc > ++a ) N = atoi(*(argv+a)); 118 | } 119 | 120 | // --------------------------------------------- 121 | // generate the array 122 | // 123 | 124 | data_t *data = (data_t*)malloc(N*sizeof(data_t)); 125 | long int seed; 126 | #if defined(_OPENMP) 127 | #pragma omp parallel 128 | { 129 | int me = omp_get_thread_num(); 130 | short int seed = time(NULL) % ( (1 << sizeof(short int))-1 ); 131 | short int seeds[3] = {seed-me, seed+me, seed+me*2}; 132 | 133 | #pragma omp for 134 | for ( int i = 0; i < N; i++ ) 135 | data[i].data[HOT] = erand48( seeds ); 136 | } 137 | #else 138 | { 139 | seed = time(NULL); 140 | srand48(seed); 141 | 142 | PRINTF("ssed is % ld\n", seed); 143 | 144 | for ( int i = 0; i < N; i++ ) 145 | data[i].data[HOT] = drand48(); 146 | } 147 | #endif 148 | 149 | 150 | // --------------------------------------------- 151 | // process 152 | // 153 | struct timespec ts; 154 | int nthreads = 1; 155 | double tstart = CPU_TIME; 156 | 157 | #if defined(_OPENMP) 158 | 159 | #pragma omp parallel 160 | { 161 | #pragma omp single 162 | { 163 | nthreads = omp_get_num_threads(); 164 | pqsort( data, 0, N, compare_ge ); 165 | } 166 | } 167 | 168 | #else 169 | 170 | pqsort( data, 0, N, compare_ge ); 171 | #endif 172 | 173 | double tend = CPU_TIME; 174 | 175 | // --------------------------------------------- 176 | // release the memory and stop 177 | // 178 | 179 | if ( verify_sorting( data, 0, N, 0) ) 180 | printf("%d\t%g sec\n", nthreads, tend-tstart); 181 | else 182 | printf("the array is not sorted correctly\n"); 183 | 184 | free( data ); 185 | 186 | return 0; 187 | } 188 | 189 | 190 | #define SWAP(A,B,SIZE) do {int sz = (SIZE); char *a = (A); char *b = (B); \ 191 | do { char _temp = *a;*a++ = *b;*b++ = _temp;} while (--sz);} while (0) 192 | 193 | inline int partitioning( data_t *data, int start, int end, compare_t cmp_ge ) 194 | { 195 | 196 | // pick up the meadian of [0], [mid] and [end] as pivot 197 | // 198 | /* to be done */ 199 | 200 | // pick up the last element as pivot 201 | // 202 | --end; 203 | void *pivot = (void*)&data[end]; 204 | 205 | int pointbreak = end-1; 206 | for ( int i = start; i <= pointbreak; i++ ) 207 | if( cmp_ge( (void*)&data[i], pivot ) ) 208 | { 209 | while( (pointbreak > i) && cmp_ge( (void*)&data[pointbreak], pivot ) ) pointbreak--; 210 | if (pointbreak > i ) 211 | SWAP( (void*)&data[i], (void*)&data[pointbreak--], sizeof(data_t) ); 212 | } 213 | pointbreak += !cmp_ge( (void*)&data[pointbreak], pivot ) ; 214 | SWAP( (void*)&data[pointbreak], pivot, sizeof(data_t) ); 215 | 216 | return pointbreak; 217 | } 218 | 219 | 220 | void pqsort( data_t *data, int start, int end, compare_t cmp_ge ) 221 | { 222 | 223 | #if defined(DEBUG) 224 | #define CHECK { \ 225 | if ( verify_partitioning( data, start, end, mid ) ) { \ 226 | printf( "partitioning is wrong\n"); \ 227 | printf("%4d, %4d (%4d, %g) -> %4d, %4d + %4d, %4d\n", \ 228 | start, end, mid, data[mid].data[HOT],start, mid, mid+1, end); \ 229 | show_array( data, start, end, 0 ); }} 230 | #else 231 | #define CHECK 232 | #endif 233 | 234 | #define CHECKSWAP( a, b) { if ( cmp_ge ( (void*)&data[start+(a)], (void*)&data[start+(b)] ) )\ 235 | SWAP( (void*)&data[start+(a)], (void*)&data[start+(b)], sizeof(data_t) );} 236 | 237 | int size = end-start; 238 | 239 | switch ( size ) 240 | { 241 | case 1: break; 242 | 243 | case 2: 244 | if ( cmp_ge ( (void*)&data[start], (void*)&data[end-1] ) ) 245 | SWAP( (void*)&data[start], (void*)&data[end-1], sizeof(data_t) ); 246 | break; 247 | 248 | case 3: 249 | CHECKSWAP( 1, 2 ); 250 | CHECKSWAP( 0, 2 ); 251 | CHECKSWAP( 0, 1 ); 252 | break; 253 | 254 | case 4: 255 | CHECKSWAP( 0, 1 ); 256 | CHECKSWAP( 2, 3 ); 257 | CHECKSWAP( 0, 2 ); 258 | CHECKSWAP( 1, 3 ); 259 | CHECKSWAP( 1, 2 ); 260 | break; 261 | 262 | case 5: 263 | CHECKSWAP( 0, 1 ); 264 | CHECKSWAP( 3, 4 ); 265 | CHECKSWAP( 2, 4 ); 266 | CHECKSWAP( 2, 3 ); 267 | CHECKSWAP( 0, 3 ); 268 | CHECKSWAP( 0, 2 ); 269 | CHECKSWAP( 1, 4 ); 270 | CHECKSWAP( 1, 3 ); 271 | CHECKSWAP( 1, 2 ); 272 | break; 273 | 274 | case 6: 275 | CHECKSWAP( 1, 2 ); 276 | CHECKSWAP( 0, 2 ); 277 | CHECKSWAP( 0, 1 ); 278 | CHECKSWAP( 4, 5 ); 279 | CHECKSWAP( 3, 5 ); 280 | CHECKSWAP( 3, 4 ); 281 | CHECKSWAP( 0, 3 ); 282 | CHECKSWAP( 1, 4 ); 283 | CHECKSWAP( 2, 5 ); 284 | CHECKSWAP( 2, 4 ); 285 | CHECKSWAP( 1, 3 ); 286 | CHECKSWAP( 2, 3 ); 287 | break; 288 | 289 | default: { 290 | int mid = partitioning( data, start, end, cmp_ge ); 291 | 292 | CHECK; 293 | 294 | if ( mid > start ) 295 | #pragma omp task default(none) shared(data, cmp_ge) firstprivate(start, mid) untied 296 | pqsort( data, start, mid, cmp_ge ); 297 | 298 | if ( end > mid+1 ) 299 | #pragma omp task default(none) shared(data, cmp_ge) firstprivate(mid, end) untied 300 | pqsort( data, mid+1, end , cmp_ge );} 301 | 302 | break; 303 | } 304 | 305 | } 306 | 307 | 308 | 309 | 310 | 311 | int verify_sorting( data_t *data, int start, int end, int not_used ) 312 | { 313 | int i = start; 314 | while( (++i < end) && (data[i].data[HOT] >= data[i-1].data[HOT]) ); 315 | return ( i == end ); 316 | } 317 | 318 | int verify_partitioning( data_t *data, int start, int end, int mid ) 319 | { 320 | int failure = 0; 321 | int fail = 0; 322 | 323 | for( int i = start; i < mid; i++ ) 324 | if ( compare( (void*)&data[i], (void*)&data[mid] ) >= 0 ) 325 | fail++; 326 | 327 | failure += fail; 328 | if ( fail ) 329 | { 330 | printf("failure in first half\n"); 331 | fail = 0; 332 | } 333 | 334 | for( int i = mid+1; i < end; i++ ) 335 | if ( compare( (void*)&data[i], (void*)&data[mid] ) < 0 ) 336 | fail++; 337 | 338 | failure += fail; 339 | if ( fail ) 340 | printf("failure in second half\n"); 341 | 342 | return failure; 343 | } 344 | 345 | 346 | int show_array( data_t *data, int start, int end, int not_used ) 347 | { 348 | for ( int i = start; i < end; i++ ) 349 | printf( "%f ", data[i].data[HOT] ); 350 | printf("\n"); 351 | return 0; 352 | } 353 | 354 | 355 | inline int compare( const void *A, const void *B ) 356 | { 357 | data_t *a = (data_t*)A; 358 | data_t *b = (data_t*)B; 359 | 360 | double diff = a->data[HOT] - b->data[HOT]; 361 | return ( (diff > 0) - (diff < 0) ); 362 | } 363 | 364 | inline int compare_ge( const void *A, const void *B ) 365 | { 366 | data_t *a = (data_t*)A; 367 | data_t *b = (data_t*)B; 368 | 369 | return (a->data[HOT] >= b->data[HOT]); 370 | } 371 | -------------------------------------------------------------------------------- /HPC/codes/quicksort.v2.c: -------------------------------------------------------------------------------- 1 | 2 | /* ────────────────────────────────────────────────────────────────────────── * 3 | │ │ 4 | │ This file is part of the exercises for the Lectures on │ 5 | │ "Foundations of High Performance Computing" │ 6 | │ given at │ 7 | │ Master in HPC and │ 8 | │ Master in Data Science and Scientific Computing │ 9 | │ @ SISSA, ICTP and University of Trieste │ 10 | │ │ 11 | │ contact: luca.tornatore@inaf.it │ 12 | │ │ 13 | │ This is free software; you can redistribute it and/or modify │ 14 | │ it under the terms of the GNU General Public License as published by │ 15 | │ the Free Software Foundation; either version 3 of the License, or │ 16 | │ (at your option) any later version. │ 17 | │ This code is distributed in the hope that it will be useful, │ 18 | │ but WITHOUT ANY WARRANTY; without even the implied warranty of │ 19 | │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the │ 20 | │ GNU General Public License for more details. │ 21 | │ │ 22 | │ You should have received a copy of the GNU General Public License │ 23 | │ along with this program. If not, see │ 24 | │ │ 25 | * ────────────────────────────────────────────────────────────────────────── */ 26 | 27 | 28 | #if defined(__STDC__) 29 | # if (__STDC_VERSION__ >= 199901L) 30 | # define _XOPEN_SOURCE 700 31 | # endif 32 | #endif 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | 42 | 43 | #if defined(_OPENMP) 44 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \ 45 | (double)ts.tv_nsec * 1e-9) 46 | 47 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec + \ 48 | (double)myts.tv_nsec * 1e-9) 49 | 50 | #else 51 | 52 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \ 53 | (double)ts.tv_nsec * 1e-9) 54 | #endif 55 | 56 | #if defined(DEBUG) 57 | #define VERBOSE 58 | #endif 59 | 60 | #if defined(VERBOSE) 61 | #define PRINTF(...) printf(__VA_ARGS__) 62 | #else 63 | #define PRINTF(...) 64 | #endif 65 | 66 | 67 | 68 | 69 | #define MAX( a, b ) ( (a)->data[HOT] >(b)->data[HOT]? (a) : (b) ); 70 | #define MIN( a, b ) ( (a)->data[HOT] <(b)->data[HOT]? (a) : (b) ); 71 | 72 | #if !defined(DATA_SIZE) 73 | #define DATA_SIZE 8 74 | #endif 75 | #define HOT 0 76 | 77 | #if (!defined(DEBUG) || defined(_OPENMP)) 78 | #define N_dflt 100000 79 | #else 80 | #define N_dflt 10000 81 | #endif 82 | 83 | #define TaskTh_dflt 64 84 | 85 | typedef struct 86 | { 87 | double data[DATA_SIZE]; 88 | } data_t; 89 | 90 | typedef int (compare_t)(const void*, const void*); 91 | typedef int (verify_t)(data_t *, int, int, int); 92 | 93 | extern inline compare_t compare; 94 | extern inline compare_t compare_ge; 95 | extern inline compare_t compare_g; 96 | verify_t verify_partitioning; 97 | verify_t verify_sorting; 98 | verify_t show_array; 99 | 100 | extern inline int partitioning( data_t *, int, int ); 101 | void pqsort( data_t *, int, int ); 102 | void insertion_sort( data_t *, int, int ); 103 | 104 | int task_cutoff = TaskTh_dflt; 105 | int insertion_cutoff = TaskTh_dflt / 2; 106 | 107 | #pragma omp threadprivate( task_cutoff, insertion_cutoff ) 108 | 109 | int main ( int argc, char **argv ) 110 | { 111 | 112 | 113 | // --------------------------------------------- 114 | // get the arguments 115 | // 116 | 117 | 118 | int N = N_dflt; 119 | 120 | /* check command-line arguments */ 121 | { 122 | int a = 0; 123 | 124 | if ( argc > ++a ){ 125 | N = atoi(*(argv+a)); 126 | if ( argc > ++a ) { 127 | task_cutoff = atoi(*(argv+a)); 128 | if ( argc > ++a ) { 129 | insertion_cutoff = atoi(*(argv+a)); } 130 | else insertion_cutoff = task_cutoff/2;}} 131 | } 132 | 133 | // --------------------------------------------- 134 | // generate the array 135 | // 136 | 137 | data_t *data = (data_t*)malloc(N*sizeof(data_t)); 138 | long int seed; 139 | #if defined(_OPENMP) 140 | #pragma omp parallel 141 | { 142 | int me = omp_get_thread_num(); 143 | short int seed = time(NULL) % ( (1 << sizeof(short int))-1 ); 144 | short int seeds[3] = {seed-me, seed+me, seed+me*2}; 145 | 146 | #pragma omp for 147 | for ( int i = 0; i < N; i++ ) 148 | data[i].data[HOT] = erand48( seeds ); 149 | } 150 | #else 151 | { 152 | seed = time(NULL); 153 | srand48(seed); 154 | 155 | PRINTF("ssed is % ld\n", seed); 156 | 157 | for ( int i = 0; i < N; i++ ) 158 | data[i].data[HOT] = drand48(); 159 | } 160 | #endif 161 | 162 | 163 | // --------------------------------------------- 164 | // process 165 | // 166 | struct timespec ts; 167 | int nthreads = 1; 168 | double tstart = CPU_TIME; 169 | 170 | #if defined(_OPENMP) 171 | 172 | #pragma omp parallel copyin( task_cutoff, insertion_cutoff ) 173 | { 174 | #pragma omp single 175 | { 176 | nthreads = omp_get_num_threads(); 177 | pqsort( data, 0, N ); 178 | } 179 | } 180 | 181 | #else 182 | 183 | // uncomment the following call to use 184 | // exactly the same routine than the omp version 185 | pqsort( data, 0, N ); 186 | 187 | // uncomment the following call to test 188 | // the insertion sort routine 189 | /* insertion_sort( data, 0, N); */ 190 | 191 | // uncomment the following call to use 192 | // the library qsort routine 193 | /* qsort( data, N, sizeof(data_t), compare); */ 194 | 195 | #endif 196 | 197 | double tend = CPU_TIME; 198 | 199 | // --------------------------------------------- 200 | // release the memory and stop 201 | // 202 | 203 | if ( verify_sorting( data, 0, N, 0) ) 204 | printf("%d\t%g sec\n", nthreads, tend-tstart); 205 | else 206 | printf("the array is not sorted correctly\n"); 207 | 208 | free( data ); 209 | 210 | return 0; 211 | } 212 | 213 | 214 | #define SWAP(A,B,SIZE) do {int sz = (SIZE); char *a = (A); char *b = (B); \ 215 | do { char _temp = *a;*a++ = *b;*b++ = _temp;} while (--sz);} while (0) 216 | 217 | inline int partitioning( data_t *data, int start, int end ) 218 | { 219 | 220 | // pick up the meadian of [0], [mid] and [end] as pivot 221 | // 222 | /* to be done */ 223 | 224 | // pick up the last element as pivot 225 | // 226 | --end; 227 | void *pivot = (void*)&data[end]; 228 | 229 | int pointbreak = end-1; 230 | for ( int i = start; i <= pointbreak; i++ ) 231 | if( compare_ge( (void*)&data[i], pivot ) ) 232 | { 233 | while( (pointbreak > i) && compare_ge( (void*)&data[pointbreak], pivot ) ) pointbreak--; 234 | if (pointbreak > i ) 235 | SWAP( (void*)&data[i], (void*)&data[pointbreak--], sizeof(data_t) ); 236 | } 237 | pointbreak += !compare_ge( (void*)&data[pointbreak], pivot ) ; 238 | SWAP( (void*)&data[pointbreak], pivot, sizeof(data_t) ); 239 | 240 | return pointbreak; 241 | } 242 | 243 | 244 | void pqsort( data_t *data, int start, int end ) 245 | { 246 | 247 | #if defined(DEBUG) 248 | #define CHECK { \ 249 | if ( verify_partitioning( data, start, end, mid ) ) { \ 250 | printf( "partitioning is wrong\n"); \ 251 | printf("%4d, %4d (%4d, %g) -> %4d, %4d + %4d, %4d\n", \ 252 | start, end, mid, data[mid].data[HOT],start, mid, mid+1, end); \ 253 | show_array( data, start, end, 0 ); }} 254 | #define CHECK_S { \ 255 | if ( !verify_sorting( data, start, end, 0 ) ) \ 256 | printf("error between %d and %d\n", start, end ); } 257 | #else 258 | #define CHECK 259 | #define CHECK_S 260 | #endif 261 | 262 | #define CHECKSWAP( a, b) { if ( compare_ge ( (void*)&data[start+(a)], (void*)&data[start+(b)] ) )\ 263 | SWAP( (void*)&data[start+(a)], (void*)&data[start+(b)], sizeof(data_t) );} 264 | 265 | int size = end-start; 266 | 267 | switch ( size ) 268 | { 269 | case 1: break; 270 | case 2: { if ( compare_ge ( (void*)&data[start], (void*)&data[end-1] ) ) 271 | SWAP( (void*)&data[start], (void*)&data[end-1], sizeof(data_t) ); } break; 272 | case 3: { CHECKSWAP( 1, 2 ); 273 | CHECKSWAP( 0, 2 ); 274 | CHECKSWAP( 0, 1 ); } break; 275 | default: { if ( size < insertion_cutoff ) { 276 | insertion_sort( data, start, end ); 277 | CHECK_S; } 278 | else { 279 | 280 | int mid = partitioning( data, start, end ); 281 | 282 | CHECK; 283 | 284 | int mid_start = mid-start; 285 | if ( mid_start > 0 ) 286 | #pragma omp task default(none) final( mid_start < task_cutoff ) mergeable \ 287 | shared(data) firstprivate(start, mid) untied 288 | pqsort( data, start, mid ); 289 | 290 | int end_mid = end -(mid+1); 291 | if ( end_mid ) 292 | #pragma omp task default(none) final( end_mid < task_cutoff ) mergeable \ 293 | shared(data) firstprivate(mid, end) untied 294 | pqsort( data, mid+1, end ); 295 | } } break; 296 | } 297 | 298 | } 299 | 300 | 301 | 302 | void insertion_sort( data_t *data, int start, int end ) 303 | { 304 | { 305 | int min_idx = start; 306 | for ( int i = start+1; i < end; i++ ) 307 | if ( compare_g( (void*)&data[min_idx], (void*)&data[i] ) ) 308 | min_idx = i; 309 | 310 | SWAP( (void*)&data[start], (void*)&data[min_idx], sizeof(data_t) ); 311 | } 312 | 313 | for ( int head = start+1, run = start+1; (run = ++head) < end; ) 314 | { 315 | while ( (run > 0) && compare_g( (void*)&data[run-1], (void*)&data[run] ) ) { 316 | SWAP( (void*)&data[run-1], (void*)&data[run], sizeof(data_t) ); --run;} 317 | } 318 | 319 | } 320 | 321 | 322 | 323 | 324 | 325 | int verify_sorting( data_t *data, int start, int end, int mid ) 326 | { 327 | int i = start; 328 | while( (++i < end) && (data[i].data[HOT] >= data[i-1].data[HOT]) ); 329 | return ( i == end ); 330 | } 331 | 332 | int verify_partitioning( data_t *data, int start, int end, int mid ) 333 | { 334 | int failure = 0; 335 | int fail = 0; 336 | 337 | for( int i = start; i < mid; i++ ) 338 | if ( compare( (void*)&data[i], (void*)&data[mid] ) >= 0 ) 339 | fail++; 340 | 341 | failure += fail; 342 | if ( fail ) 343 | { 344 | printf("failure in first half\n"); 345 | fail = 0; 346 | } 347 | 348 | for( int i = mid+1; i < end; i++ ) 349 | if ( compare( (void*)&data[i], (void*)&data[mid] ) < 0 ) 350 | fail++; 351 | 352 | failure += fail; 353 | if ( fail ) 354 | printf("failure in second half\n"); 355 | 356 | return failure; 357 | } 358 | 359 | 360 | int show_array( data_t *data, int start, int end, int not_used ) 361 | { 362 | for ( int i = start; i < end; i++ ) 363 | printf( "%f ", data[i].data[HOT] ); 364 | printf("\n"); 365 | return 0; 366 | } 367 | 368 | 369 | inline int compare( const void *A, const void *B ) 370 | { 371 | data_t *a = (data_t*)A; 372 | data_t *b = (data_t*)B; 373 | 374 | double diff = a->data[HOT] - b->data[HOT]; 375 | return ( (diff > 0) - (diff < 0) ); 376 | } 377 | 378 | inline int compare_ge( const void *A, const void *B ) 379 | { 380 | data_t *a = (data_t*)A; 381 | data_t *b = (data_t*)B; 382 | 383 | return (a->data[HOT] >= b->data[HOT]); 384 | } 385 | 386 | inline int compare_g( const void *A, const void *B ) 387 | { 388 | data_t *a = (data_t*)A; 389 | data_t *b = (data_t*)B; 390 | 391 | return (a->data[HOT] > b->data[HOT]); 392 | } 393 | -------------------------------------------------------------------------------- /HPC/codes/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /HPC/mpi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/mpi.pdf -------------------------------------------------------------------------------- /HPC/openmp_outline.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/openmp_outline.pdf -------------------------------------------------------------------------------- /HPC/readme.md: -------------------------------------------------------------------------------- 1 | TBD 2 | -------------------------------------------------------------------------------- /HPC/tasks.new.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/tasks.new.pdf -------------------------------------------------------------------------------- /HPC/tasks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/tasks.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced High Performance Computing 2023 2 | -------------------------------------------------------------------------------- /access_Leonardo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/access_Leonardo.pdf -------------------------------------------------------------------------------- /intro_to_course.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/intro_to_course.pdf --------------------------------------------------------------------------------