├── Concurrency
    ├── Concurrency.pdf
    ├── code
    │   ├── 1-Processes-and-Threads
    │   │   ├── 1-Single-Process.c
    │   │   ├── 2-Fork.c
    │   │   ├── 3-Multiple-Threads.c
    │   │   └── Makefile
    │   ├── 2-Create-and-Join
    │   │   ├── 1-no-Join.c
    │   │   ├── 2-Master-Join.c
    │   │   ├── 3-Multiple-Threads.c
    │   │   ├── 4-Detach.c
    │   │   ├── 5-Input-Output.c
    │   │   ├── 6-Input-Output-better.c
    │   │   └── Makefile
    │   ├── 3-Mutex
    │   │   ├── 1-No-Sync.c
    │   │   ├── 2-Atomic.c
    │   │   ├── 3-Mutex.c
    │   │   ├── 4-Timed-Lock.c
    │   │   ├── 5-Try-Lock.c
    │   │   ├── 6-Busy.c
    │   │   ├── 7-not-Busy.c
    │   │   ├── 8-Deadlock.c
    │   │   ├── 9-no-Deadlock-Hierarchy.c
    │   │   ├── A-no-Deadlock-Try-Backoff.c
    │   │   ├── Makefile
    │   │   ├── repeat.sh
    │   │   └── timing.h
    │   ├── 4-Condition-Variables
    │   │   ├── 1-no-Condition-Variable.c
    │   │   ├── 2-Minimal.c
    │   │   ├── 3-Better.c
    │   │   ├── 4-Condition-Variable.c
    │   │   ├── 5-Bad-Signal.c
    │   │   ├── 6-Broadcast.c
    │   │   └── Makefile
    │   ├── X-Circular-Buffer
    │   │   ├── Makefile
    │   │   ├── circbuf.c
    │   │   ├── circbuf.h
    │   │   └── test.c
    │   └── Y-Thread-Safe-CB
    │   │   ├── Makefile
    │   │   ├── circbuf.c
    │   │   ├── circbuf.h
    │   │   ├── test.c
    │   │   ├── tscircbuf.c
    │   │   └── tscircbuf.h
    └── readme.md
├── GPU
    ├── DSSC-EXAME-README.pdf
    ├── Jacobi-project
    │   ├── .DS_Store
    │   ├── aux
    │   │   ├── background.html
    │   │   ├── background.md
    │   │   ├── eqn.PNG
    │   │   ├── hints.html
    │   │   ├── hints.md
    │   │   ├── jacobiEq1.jpg
    │   │   ├── jacobiFigure1.jpg
    │   │   ├── jacobiFigure2.jpg
    │   │   ├── ref2.png
    │   │   └── ref_Init.png
    │   ├── code
    │   │   ├── Makefile
    │   │   ├── jacobi.c
    │   │   └── plot.plt
    │   └── readme.md
    ├── par_transp
    │   └── main.c
    └── readme.md
├── HPC
    ├── codes
    │   ├── 00_simple.c
    │   ├── 00_simple_nowait.c
    │   ├── 00_simple_taskwait.c
    │   ├── 02_tasks.c
    │   ├── 02_tasks_wrong.c
    │   ├── 03_variable_workload.c
    │   ├── 03_variable_workload.v2.c
    │   ├── 04_tasks_reduction.c
    │   ├── 04_unpredictable_pattern.c
    │   ├── 05_taskgroup_reduction.c
    │   ├── dag.c
    │   ├── linked_list.c
    │   ├── linked_list.deadlock.c
    │   ├── quicksort.v0.c
    │   ├── quicksort.v1.c
    │   ├── quicksort.v2.c
    │   └── readme.md
    ├── mpi.pdf
    ├── openmp_outline.pdf
    ├── readme.md
    ├── tasks.new.pdf
    └── tasks.pdf
├── README.md
├── access_Leonardo.pdf
└── intro_to_course.pdf


/Concurrency/Concurrency.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/Concurrency/Concurrency.pdf


--------------------------------------------------------------------------------
/Concurrency/code/1-Processes-and-Threads/1-Single-Process.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <sys/types.h>
 5 | 
 6 | 
 7 | void huge_function()
 8 | {
 9 |     sleep(600);
10 | }
11 | 
12 | int main()
13 | {
14 |     printf("PID: %ld, PPID: %ld\n", (long)getpid(), (long)getppid());
15 |     huge_function();
16 | 
17 |     return EXIT_SUCCESS;
18 | }
19 | 


--------------------------------------------------------------------------------
/Concurrency/code/1-Processes-and-Threads/2-Fork.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <sys/types.h>
 5 | 
 6 | 
 7 | void huge_function()
 8 | {
 9 |     sleep(600);
10 | }
11 | 
12 | int main()
13 | {
14 |     pid_t pid = fork();
15 |     switch (pid) {
16 |     case -1:
17 |         fprintf(stderr, "fork failure.\n");
18 |         return EXIT_FAILURE;
19 |         break;
20 | 
21 |     case 0:
22 |         printf("CHILD\n\tPID: %ld, PPID: %ld\n", (long)getpid(), (long)getppid());
23 |         huge_function();
24 |         break;
25 | 
26 |     default:
27 |         printf("PARENT\n\tPID: %ld, PPID: %ld\n", (long)getpid(), (long)getppid());
28 |         huge_function();
29 |         break;
30 |     }
31 | 
32 |     return EXIT_SUCCESS;
33 | }


--------------------------------------------------------------------------------
/Concurrency/code/1-Processes-and-Threads/3-Multiple-Threads.c:
--------------------------------------------------------------------------------
 1 | #define _GNU_SOURCE
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <unistd.h>
 5 | #include <sys/types.h>
 6 | #include <pthread.h>
 7 | 
 8 | 
 9 | void* thread_function(void* unused)
10 | {
11 |     (void)unused;
12 | 
13 |     printf("PID: %ld, thread_id: %d\n", (long)getpid(), gettid());
14 |     sleep(1200);
15 |             
16 |     return NULL;
17 | }
18 | 
19 | 
20 | 
21 | int main()   
22 | {
23 |     pthread_t thread;
24 | 
25 |     if (pthread_create(&thread, NULL, thread_function, NULL)) {
26 |         fprintf(stderr, "pthread_create failure.\n");
27 |         return EXIT_FAILURE;
28 |     }
29 | 
30 |     thread_function(NULL);
31 | 
32 |     if (pthread_join(thread, NULL)) {
33 |         fprintf(stderr, "pthread_joint failure.\n");
34 |         return EXIT_FAILURE;
35 |     }
36 | 
37 |     return EXIT_SUCCESS;
38 | }
39 | 


--------------------------------------------------------------------------------
/Concurrency/code/1-Processes-and-Threads/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -Wall -Wextra -pedantic -g
 3 | 
 4 | .SUFFIXES :
 5 | 
 6 | .PHONY : all
 7 | all: 1-Single-Process.x 2-Fork.x 3-Multiple-Threads.x
 8 | 
 9 | .PHONY : clean
10 | clean :
11 | 	rm -f *.x
12 | 
13 | 
14 | 1-Single-Process.x : 1-Single-Process.c
15 | 	$(CC) $(CFLAGS) -o $@ $<
16 | 
17 | 2-Fork.x : 2-Fork.c
18 | 	$(CC) $(CFLAGS) -o $@ $<
19 | 
20 | 3-Multiple-Threads.x : 3-Multiple-Threads.c
21 | 	$(CC) $(CFLAFS) -pthread -o $@ $<
22 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/1-no-Join.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | void* thread_function(void* unused)
 8 | {
 9 |     (void)unused;
10 | 
11 |     printf("Child thread\n");
12 | 
13 |     return NULL;
14 | }
15 | 
16 | 
17 | int main()
18 | {
19 |     printf("Main thread\n");
20 | 
21 |     pthread_t thread;
22 | 
23 |     if (pthread_create(&thread, NULL, thread_function, NULL)) {
24 |         fprintf(stderr, "pthread_create failure.\n");
25 |         return EXIT_FAILURE;
26 |     }
27 | 
28 |     sleep(1);
29 | 
30 |     return EXIT_SUCCESS;
31 | }
32 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/2-Master-Join.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | 
 6 | void* thread_function(void* unused)
 7 | {
 8 |     (void)unused;
 9 | 
10 |     printf("Child thread\n");
11 | 
12 |     return NULL;
13 | }
14 | 
15 | 
16 | int main()
17 | {
18 |     printf("Main thread\n");
19 | 
20 |     pthread_t thread;
21 | 
22 |     if (pthread_create(&thread, NULL, thread_function, NULL)) {
23 |         fprintf(stderr, "pthread_create failure.\n");
24 |         return EXIT_FAILURE;
25 |     }
26 | 
27 |     if (pthread_join(thread, NULL)) {
28 |         fprintf(stderr, "pthread_joint failure.\n");
29 |         return EXIT_FAILURE;
30 |     }
31 | 
32 |     return EXIT_SUCCESS;
33 | }
34 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/3-Multiple-Threads.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | pthread_t thread_2_handle;
 8 | 
 9 | 
10 | void* thread_2_fn(void* unused)
11 | {
12 |     (void)unused;
13 | 
14 |     printf("Thread 2\n");
15 | 
16 |     return NULL;
17 | }
18 | 
19 | 
20 | void* thread_1_fn(void* unused)
21 | {
22 |     (void)unused;
23 | 
24 |     printf("Thread 1\n");
25 | 
26 |     if (pthread_create(&thread_2_handle, NULL, thread_2_fn, NULL)) {
27 |         fprintf(stderr, "pthread_create failure.\n");
28 |     }
29 | 
30 |     return NULL;
31 | }
32 | 
33 | 
34 | 
35 | int main()
36 | {
37 |     pthread_t thread_1_handle;
38 | 
39 |     printf("Main thread\n");
40 | 
41 |     if (pthread_create(&thread_1_handle, NULL, thread_1_fn, NULL)) {
42 |         fprintf(stderr, "pthread_create failure.\n");
43 |     }
44 | 
45 |     // BEWARE: this is just a poor man example
46 |     //         NEVER use sleep to synchronize!
47 |     sleep(1);
48 | 
49 |     if (pthread_join(thread_1_handle, NULL)) {
50 |         fprintf(stderr, "pthread_join failure.\n");
51 |     }
52 | 
53 |     if (pthread_join(thread_2_handle, NULL)) {
54 |         fprintf(stderr, "pthread_join failure.\n");
55 |     }
56 | 
57 |     return EXIT_SUCCESS;
58 | }
59 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/4-Detach.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | void* thread_function(void* unused)
 8 | {
 9 |     (void)unused;
10 |     
11 |     if (pthread_detach(pthread_self())) {
12 |         fprintf(stderr, "pthread_detach failure.\n");
13 |     }
14 | 
15 |     printf("Child thread\n");
16 | 
17 |     return NULL;
18 | }
19 | 
20 | 
21 | int main()
22 | {
23 |     printf("Main thread\n");
24 | 
25 |     pthread_t thread;
26 | 
27 |     if (pthread_create(&thread, NULL, thread_function, NULL)) {
28 |         fprintf(stderr, "pthread_create failure.\n");
29 |         return EXIT_FAILURE;
30 |     }
31 | 
32 |     sleep(1);
33 | 
34 |     return EXIT_SUCCESS;
35 | }
36 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/5-Input-Output.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | struct thread_input {
 6 |     unsigned int numerator;
 7 |     unsigned int denominator;
 8 | };
 9 | 
10 | struct thread_output {
11 |     unsigned int quotient;
12 |     unsigned int reminder;
13 | };
14 | 
15 | 
16 | void* thread_function(void* arg)
17 | {
18 |     struct thread_input* input = (struct thread_input*)arg;
19 |     struct thread_output* output = (struct thread_output*)malloc(sizeof(struct thread_output));
20 | 
21 |     if (!output)
22 |         return NULL;
23 | 
24 |     output->quotient = input->numerator / input->denominator;
25 |     output->reminder = input->numerator % input->denominator;
26 | 
27 |     return output;
28 | }
29 | 
30 | 
31 | int main()
32 | {
33 |     int exit_code = EXIT_FAILURE;
34 | 
35 |     struct thread_input* input = (struct thread_input*)malloc(sizeof(struct thread_input));
36 |     if (!input) {
37 |         fprintf(stderr, "malloc failure.\n");
38 |         return exit_code;
39 |     }
40 | 
41 |     input->numerator = 25;
42 |     input->denominator = 7;
43 | 
44 |     pthread_t thread;
45 |     if (pthread_create(&thread, NULL, thread_function, (void*)input)) {
46 |        fprintf(stderr, "pthread_create failure.\n");
47 |        goto cleanup_input;
48 |     }
49 | 
50 |     struct thread_output* output;
51 |     if (pthread_join(thread, (void**)&output)) {
52 |         fprintf(stderr, "pthread_join failure.\n");
53 |         goto cleanup_input;
54 |     }
55 | 
56 |     if (!output) {
57 |         fprintf(stderr, "thread malloc failure.\n");
58 |         goto cleanup_input;
59 |     }
60 | 
61 |     printf("%d divided by %d is %d with reminder %d.\n",
62 |            input->numerator,
63 |            input->denominator,
64 |            output->quotient,
65 |            output->reminder);
66 | 
67 |     exit_code = EXIT_SUCCESS;
68 | 
69 |     free(output);
70 | 
71 | cleanup_input:
72 |     free(input);
73 | 
74 |     return exit_code;
75 | }
76 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/6-Input-Output-better.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | 
 6 | struct thread_output;
 7 | 
 8 | struct thread_input {
 9 |     unsigned int numerator;
10 |     unsigned int denominator;
11 |     struct thread_output* output;
12 | };
13 | 
14 | struct thread_output {
15 |     unsigned int quotient;
16 |     unsigned int reminder;
17 | };
18 | 
19 | 
20 | void* thread_function(void* arg)
21 | {
22 |     struct thread_input* input = (struct thread_input*)arg;
23 | 
24 |     input->output->quotient = input->numerator / input->denominator;
25 |     input->output->reminder = input->numerator % input->denominator;
26 | 
27 |     return NULL;
28 | }
29 | 
30 | 
31 | int main()
32 | {
33 |     int exit_code = EXIT_FAILURE;
34 | 
35 |     struct thread_input* input = (struct thread_input*)malloc(sizeof(struct thread_input));
36 |     if (!input) {
37 |         fprintf(stderr, "malloc failure.\n");
38 |         return exit_code;
39 |     }
40 | 
41 |     input->output = (struct thread_output*)malloc(sizeof(struct thread_output));
42 |     if (!input->output) {
43 |         fprintf(stderr, "malloc failure.\n");
44 |         goto cleanup_input;
45 |     }
46 | 
47 |     input->numerator = 25;
48 |     input->denominator = 7;
49 | 
50 |     pthread_t thread;
51 |     if (pthread_create(&thread, NULL, thread_function, (void*)input)) {
52 |        fprintf(stderr, "pthread_create failure.\n");
53 |        goto cleanup_output;
54 |     }
55 | 
56 |     if (pthread_join(thread, NULL)) {
57 |         fprintf(stderr, "pthread_join failure.\n");
58 |         goto cleanup_output;
59 |     }
60 | 
61 |     printf("%d divided by %d is %d with reminder %d.\n",
62 |            input->numerator,
63 |            input->denominator,
64 |            input->output->quotient,
65 |            input->output->reminder);
66 | 
67 |     exit_code = EXIT_SUCCESS;
68 | 
69 | cleanup_output:
70 |     free(input->output);
71 | 
72 | cleanup_input:
73 |     free(input);
74 | 
75 |     return exit_code;
76 | }
77 | 


--------------------------------------------------------------------------------
/Concurrency/code/2-Create-and-Join/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -Wall -Wextra -pedantic -g
 3 | 
 4 | .SUFFIXES :
 5 | 
 6 | .PHONY : all
 7 | all: 1-no-Join.x 2-Master-Join.x 3-Multiple-Threads.x 4-Detach.x 5-Input-Output.x 6-Input-Output-better.x
 8 | 
 9 | .PHONY : clean
10 | clean :
11 | 	rm -f *.x
12 | 
13 | %.x : %.c
14 | 	$(CC) $(CFLAGS) -pthread -o $@ $<
15 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/1-No-Sync.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | #include "timing.h"
 6 | 
 7 | #define DEFAULT_LEN 10000
 8 | #define DEFAULT_THREADS 2
 9 | 
10 | int len;
11 | int counter = 0;
12 | 
13 | 
14 | void* threadfunc(void* unused)
15 | {
16 |     (void)unused;
17 | 
18 |     for (int i = 0; i < len; ++i)
19 |         ++counter;
20 | 
21 |     return NULL;
22 | }
23 | 
24 | 
25 | int main(int argc, char* argv[])
26 | {
27 |     int retcode = EXIT_FAILURE;
28 |     double dt = 0.0;
29 | 
30 |     int nthread = DEFAULT_THREADS;
31 |     len = DEFAULT_LEN;
32 |     if (argc > 1)
33 |         len = atoi(argv[1]);
34 | 
35 |     if (argc > 2)
36 |         nthread = atoi(argv[2]);
37 | 
38 |     pthread_t* threads = (pthread_t*)malloc(nthread * sizeof(pthread_t));
39 |     if (!threads)
40 |         return retcode;
41 | 
42 |     dt -= cputime_ms();
43 | 
44 |     for (int i = 0; i < nthread; ++i)
45 |     {
46 |         if (pthread_create(&threads[i], NULL, threadfunc, NULL))
47 |             goto cleanup;
48 |     }
49 | 
50 |     for (int i = 0; i < nthread; ++i)
51 |     {
52 |         if (pthread_join(threads[i], NULL))
53 |             goto cleanup;
54 |     }
55 |    
56 |     dt += cputime_ms(); 
57 | 
58 |     retcode = EXIT_SUCCESS;
59 | 
60 |     printf("counter = %d; expected = %d\n", counter, len * nthread);
61 |     printf("elapsed time = %lfms\n", dt);
62 | 
63 | cleanup:
64 |     free (threads);
65 | 
66 |     return retcode;
67 | }
68 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/2-Atomic.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | #include "timing.h"
 6 | 
 7 | #define DEFAULT_LEN 10000
 8 | #define DEFAULT_THREADS 2
 9 | 
10 | int len;
11 | _Atomic int counter = 0;
12 | 
13 | 
14 | void* threadfunc(void* unused)
15 | {
16 |     (void)unused;
17 | 
18 |     for (int i = 0; i < len; ++i)
19 |         ++counter;
20 | 
21 |     return NULL;
22 | }
23 | 
24 | 
25 | int main(int argc, char* argv[])
26 | {
27 |     int retcode = EXIT_FAILURE;
28 |     double dt = 0.0;
29 | 
30 |     int nthread = DEFAULT_THREADS;
31 |     len = DEFAULT_LEN;
32 |     if (argc > 1)
33 |         len = atoi(argv[1]);
34 | 
35 |     if (argc > 2)
36 |         nthread = atoi(argv[2]);
37 | 
38 |     pthread_t* threads = (pthread_t*)malloc(nthread * sizeof(pthread_t));
39 |     if (!threads)
40 |         return retcode;
41 | 
42 |     dt -= cputime_ms();
43 | 
44 |     for (int i = 0; i < nthread; ++i)
45 |     {
46 |         if (pthread_create(&threads[i], NULL, threadfunc, NULL))
47 |             goto cleanup;
48 |     }
49 | 
50 |     for (int i = 0; i < nthread; ++i)
51 |     {
52 |         if (pthread_join(threads[i], NULL))
53 |             goto cleanup;
54 |     }
55 |     
56 |     dt += cputime_ms();
57 | 
58 |     retcode = EXIT_SUCCESS;
59 | 
60 |     printf("counter = %d; expected = %d\n", counter, len * nthread);
61 |     printf("elapsed time = %lfms\n", dt);
62 | 
63 | cleanup:
64 |     free (threads);
65 | 
66 |     return retcode;
67 | }
68 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/3-Mutex.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | #include "timing.h"
 6 | 
 7 | #define DEFAULT_LEN 10000
 8 | #define DEFAULT_THREADS 2
 9 | 
10 | int len;
11 | int counter = 0;
12 | pthread_mutex_t counter_mtx = PTHREAD_MUTEX_INITIALIZER;
13 | 
14 | 
15 | void* threadfunc(void* unused)
16 | {
17 |     (void)unused;
18 | 
19 |     for (int i = 0; i < len; ++i) {
20 |         if (pthread_mutex_lock(&counter_mtx))
21 |             continue;
22 | 
23 |         ++counter;
24 | 
25 |         if (pthread_mutex_unlock(&counter_mtx))
26 |             continue;
27 |     }
28 | 
29 |     return NULL;
30 | }
31 | 
32 | 
33 | int main(int argc, char* argv[])
34 | {
35 |     int retcode = EXIT_FAILURE;
36 |     double dt = 0.0;
37 | 
38 |     int nthread = DEFAULT_THREADS;
39 |     len = DEFAULT_LEN;
40 |     if (argc > 1)
41 |         len = atoi(argv[1]);
42 | 
43 |     if (argc > 2)
44 |         nthread = atoi(argv[2]);
45 | 
46 |     pthread_t* threads = (pthread_t*)malloc(nthread * sizeof(pthread_t));
47 |     if (!threads)
48 |         return retcode;
49 | 
50 |     dt -= cputime_ms();
51 | 
52 |     for (int i = 0; i < nthread; ++i)
53 |     {
54 |         if (pthread_create(&threads[i], NULL, threadfunc, NULL))
55 |             goto cleanup;
56 |     }
57 | 
58 |     for (int i = 0; i < nthread; ++i)
59 |     {
60 |         if (pthread_join(threads[i], NULL))
61 |             goto cleanup;
62 |     }
63 |     
64 |     dt += cputime_ms();
65 | 
66 |     retcode = EXIT_SUCCESS;
67 | 
68 |     printf("counter = %d; expected = %d\n", counter, len * nthread);
69 |     printf("elapsed time = %lfms\n", dt);
70 | 
71 | cleanup:
72 |     free (threads);
73 | 
74 |     return retcode;
75 | }
76 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/4-Timed-Lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | #include <time.h>
 6 | 
 7 | 
 8 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
 9 | 
10 | 
11 | void* threadfunc(void* unused)
12 | {
13 |     (void)unused;
14 |     
15 |     struct timespec ts;
16 |     do {
17 |         clock_gettime(CLOCK_REALTIME, &ts);
18 |         ts.tv_sec += 1;
19 |         printf("trying to lock the mutex...\n");
20 |     } while (pthread_mutex_timedlock(&mtx, &ts));
21 |     printf("finally I locked it!\n");
22 |     
23 |     sleep(1);
24 |     pthread_mutex_unlock(&mtx);
25 | 
26 |     return NULL;
27 | }
28 | 
29 | 
30 | int main()
31 | {
32 |     int retcode = EXIT_FAILURE;
33 | 
34 |     pthread_t thread;
35 | 
36 |     pthread_mutex_lock(&mtx);
37 | 
38 |     if (pthread_create(&thread, NULL, threadfunc, NULL))
39 |         return retcode;
40 | 
41 |     sleep(7);
42 |     pthread_mutex_unlock(&mtx);
43 | 
44 |     if (pthread_join(thread, NULL))
45 |         return retcode;
46 | 
47 |     retcode = EXIT_SUCCESS;
48 | 
49 |     return retcode;
50 | }
51 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/5-Try-Lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
 8 | 
 9 | 
10 | void* threadfunc(void* unused)
11 | {
12 |     (void)unused;
13 |     while (pthread_mutex_trylock(&mtx)) {
14 |         printf("mutex is locked, I will try again...\n");
15 |         sleep(1);
16 |     }
17 |     printf("finally I locked it!\n");
18 | 
19 |     sleep(1);
20 |     pthread_mutex_unlock(&mtx);
21 |     
22 | 
23 |     return NULL;
24 | }
25 | 
26 | 
27 | 
28 | int main()
29 | {
30 |     int retcode = EXIT_FAILURE;
31 | 
32 |     pthread_t thread;
33 | 
34 |     pthread_mutex_lock(&mtx);
35 | 
36 |     if (pthread_create(&thread, NULL, threadfunc, NULL))
37 |         return retcode;
38 | 
39 |     sleep(7);
40 |     pthread_mutex_unlock(&mtx);
41 |     
42 |     if (pthread_join(thread, NULL))
43 |         return retcode;
44 | 
45 |     retcode = EXIT_SUCCESS;
46 | 
47 |     return retcode;
48 | }
49 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/6-Busy.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | pthread_spinlock_t splk;
 8 | 
 9 | void* threadfunc(void* unused)
10 | {
11 |     (void)unused;
12 | 
13 |     printf("Acquiring the spinlock... ");
14 |     fflush(stdout);
15 |     
16 |     pthread_spin_lock(&splk);
17 |     printf("acquired\n");
18 |     sleep(4);
19 |     pthread_spin_unlock(&splk);
20 | 
21 |     return NULL;
22 | }
23 | 
24 | 
25 | 
26 | int main()
27 | {
28 |     int retcode = EXIT_FAILURE;
29 | 
30 |     pthread_t thread;
31 | 
32 |     if (pthread_spin_init(&splk, PTHREAD_PROCESS_PRIVATE))
33 |         return retcode;
34 | 
35 |     pthread_spin_lock(&splk);
36 | 
37 |     if (pthread_create(&thread, NULL, threadfunc, NULL))
38 |         goto spinlock_cleanup;
39 | 
40 |     sleep(12);
41 |     pthread_spin_unlock(&splk);
42 | 
43 |     if (!pthread_join(thread, NULL))
44 |         retcode = EXIT_SUCCESS;
45 | 
46 | spinlock_cleanup:
47 |     if (pthread_spin_destroy(&splk))
48 |         return retcode;
49 | 
50 |     return retcode;
51 | }
52 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/7-not-Busy.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
 8 | 
 9 | void* threadfunc(void* unused)
10 | {
11 |     (void)unused;
12 | 
13 |     printf("Acquiring the mutex... ");
14 |     fflush(stdout);
15 |     
16 |     pthread_mutex_lock(&mtx);
17 |     printf("acquired\n");
18 |     sleep(4);
19 |     pthread_mutex_unlock(&mtx);
20 | 
21 |     return NULL;
22 | }
23 | 
24 | 
25 | 
26 | int main()
27 | {
28 |     int retcode = EXIT_FAILURE;
29 | 
30 |     pthread_t thread;
31 | 
32 |     pthread_mutex_lock(&mtx);
33 | 
34 |     if (pthread_create(&thread, NULL, threadfunc, NULL))
35 |         return retcode;
36 | 
37 |     sleep(12);
38 |     pthread_mutex_unlock(&mtx);
39 | 
40 |     if (!pthread_join(thread, NULL))
41 |         retcode = EXIT_SUCCESS;
42 | 
43 |     return retcode;
44 | }
45 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/8-Deadlock.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | struct resource {
 8 |     double value;
 9 |     pthread_mutex_t mtx;
10 | };
11 | 
12 | struct resource r1 = {.value = 0.0,
13 |                       .mtx = PTHREAD_MUTEX_INITIALIZER};
14 | 
15 | struct resource r2 = {.value = 0.0,
16 |                       .mtx = PTHREAD_MUTEX_INITIALIZER};
17 | 
18 | 
19 | void* threadswap(void* resources)
20 | {
21 |     struct resource* first = ((struct resource**)resources)[0];
22 |     struct resource* second = ((struct resource**)resources)[1];
23 | 
24 |     pthread_mutex_lock(&first->mtx);
25 |     usleep(10000); // just to simulate some load
26 |     pthread_mutex_lock(&second->mtx);
27 | 
28 |     double tmp = first->value;
29 |     first->value = second->value;
30 |     second->value = tmp;
31 | 
32 |     pthread_mutex_unlock(&second->mtx);
33 |     pthread_mutex_unlock(&first->mtx);
34 | 
35 |     return NULL;
36 | }
37 | 
38 | 
39 | int main()
40 | {
41 |     int retcode = EXIT_FAILURE;
42 | 
43 |     pthread_mutex_lock(&r1.mtx);
44 |     r1.value = 37.0;
45 |     pthread_mutex_unlock(&r1.mtx);
46 | 
47 |     pthread_mutex_lock(&r2.mtx);
48 |     r2.value = -4.0;
49 |     pthread_mutex_unlock(&r2.mtx);
50 | 
51 |     pthread_t thread1, thread2;
52 |     struct resource* resources_1[] = {&r1, &r2};
53 |     struct resource* resources_2[] = {&r2, &r1};
54 | 
55 |     if (pthread_create(&thread1, NULL, threadswap, resources_1))
56 |         return retcode;
57 | 
58 |     if (pthread_create(&thread2, NULL, threadswap, resources_2))
59 |         goto thread1_cleanup;
60 | 
61 |     if (pthread_join(thread2, NULL))
62 |         goto thread1_cleanup;
63 | 
64 |     retcode = EXIT_SUCCESS;
65 | 
66 | thread1_cleanup:
67 |     if (pthread_join(thread1, NULL))
68 |         return EXIT_FAILURE;
69 | 
70 |     if (retcode == EXIT_FAILURE)
71 |         return retcode;
72 | 
73 |     pthread_mutex_lock(&r1.mtx);
74 |     printf("r1.value = %lf\n", r1.value);
75 |     pthread_mutex_unlock(&r1.mtx);
76 | 
77 |     pthread_mutex_lock(&r2.mtx);
78 |     printf("r2.value = %lf\n", r2.value);
79 |     pthread_mutex_unlock(&r2.mtx);
80 |     
81 |     return EXIT_SUCCESS;
82 | }
83 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/9-no-Deadlock-Hierarchy.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | struct resource {
 8 |     double value;
 9 |     pthread_mutex_t mtx;
10 | };
11 | 
12 | struct resource r1 = {.value = 0.0,
13 |                       .mtx = PTHREAD_MUTEX_INITIALIZER};
14 | 
15 | struct resource r2 = {.value = 0.0,
16 |                       .mtx = PTHREAD_MUTEX_INITIALIZER};
17 | 
18 | 
19 | void* threadswap(void* resources)
20 | {
21 |     struct resource* first = ((struct resource**)resources)[0];
22 |     struct resource* second = ((struct resource**)resources)[1];
23 | 
24 |     pthread_mutex_lock(&first->mtx);
25 |     usleep(10000); // just to simulate some load
26 |     pthread_mutex_lock(&second->mtx);
27 | 
28 |     double tmp = first->value;
29 |     first->value = second->value;
30 |     second->value = tmp;
31 | 
32 |     pthread_mutex_unlock(&second->mtx);
33 |     pthread_mutex_unlock(&first->mtx);
34 | 
35 |     return NULL;
36 | }
37 | 
38 | 
39 | int main()
40 | {
41 |     int retcode = EXIT_FAILURE;
42 | 
43 |     pthread_mutex_lock(&r1.mtx);
44 |     r1.value = 37.0;
45 |     pthread_mutex_unlock(&r1.mtx);
46 | 
47 |     pthread_mutex_lock(&r2.mtx);
48 |     r2.value = -4.0;
49 |     pthread_mutex_unlock(&r2.mtx);
50 | 
51 |     pthread_t thread1, thread2;
52 |     struct resource* resources_1[] = {&r1, &r2};
53 |     struct resource* resources_2[] = {&r1, &r2};
54 | 
55 |     if (pthread_create(&thread1, NULL, threadswap, resources_1))
56 |         return retcode;
57 | 
58 |     if (pthread_create(&thread2, NULL, threadswap, resources_2))
59 |         goto thread1_cleanup;
60 | 
61 |     if (pthread_join(thread2, NULL))
62 |         goto thread1_cleanup;
63 | 
64 |     retcode = EXIT_SUCCESS;
65 | 
66 | thread1_cleanup:
67 |     if (pthread_join(thread1, NULL))
68 |         return EXIT_FAILURE;
69 | 
70 |     if (retcode == EXIT_FAILURE)
71 |         return retcode;
72 | 
73 |     pthread_mutex_lock(&r1.mtx);
74 |     printf("r1.value = %lf\n", r1.value);
75 |     pthread_mutex_unlock(&r1.mtx);
76 | 
77 |     pthread_mutex_lock(&r2.mtx);
78 |     printf("r2.value = %lf\n", r2.value);
79 |     pthread_mutex_unlock(&r2.mtx);
80 |     
81 |     return EXIT_SUCCESS;
82 | }
83 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/A-no-Deadlock-Try-Backoff.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | #include <sched.h>
 6 | 
 7 | 
 8 | struct resource {
 9 |     double value;
10 |     pthread_mutex_t mtx;
11 | };
12 | 
13 | struct resource r1 = {.value = 0.0,
14 |                       .mtx = PTHREAD_MUTEX_INITIALIZER};
15 | 
16 | struct resource r2 = {.value = 0.0,
17 |                       .mtx = PTHREAD_MUTEX_INITIALIZER};
18 | 
19 | 
20 | void* threadswap(void* resources)
21 | {
22 |     struct resource* first = ((struct resource**)resources)[0];
23 |     struct resource* second = ((struct resource**)resources)[1];
24 | 
25 |     for (;;) {
26 |         pthread_mutex_lock(&first->mtx);
27 |         usleep(10000); // just to simulate some load
28 |         if (pthread_mutex_trylock(&second->mtx)) {
29 |             pthread_mutex_unlock(&first->mtx);
30 |             sched_yield();
31 |         } else {
32 |             break;
33 |         }
34 |     }
35 | 
36 |     double tmp = first->value;
37 |     first->value = second->value;
38 |     second->value = tmp;
39 | 
40 |     pthread_mutex_unlock(&second->mtx);
41 |     pthread_mutex_unlock(&first->mtx);
42 | 
43 |     return NULL;
44 | }
45 | 
46 | 
47 | int main()
48 | {
49 |     int retcode = EXIT_FAILURE;
50 | 
51 |     pthread_mutex_lock(&r1.mtx);
52 |     r1.value = 37.0;
53 |     pthread_mutex_unlock(&r1.mtx);
54 | 
55 |     pthread_mutex_lock(&r2.mtx);
56 |     r2.value = -4.0;
57 |     pthread_mutex_unlock(&r2.mtx);
58 | 
59 |     pthread_t thread1, thread2;
60 |     struct resource* resources_1[] = {&r1, &r2};
61 |     struct resource* resources_2[] = {&r2, &r1};
62 | 
63 |     if (pthread_create(&thread1, NULL, threadswap, resources_1))
64 |         return retcode;
65 | 
66 |     if (pthread_create(&thread2, NULL, threadswap, resources_2))
67 |         goto thread1_cleanup;
68 | 
69 |     if (pthread_join(thread2, NULL))
70 |         goto thread1_cleanup;
71 | 
72 |     retcode = EXIT_SUCCESS;
73 | 
74 | thread1_cleanup:
75 |     if (pthread_join(thread1, NULL))
76 |         return EXIT_FAILURE;
77 | 
78 |     if (retcode == EXIT_FAILURE)
79 |         return retcode;
80 | 
81 |     pthread_mutex_lock(&r1.mtx);
82 |     printf("r1.value = %lf\n", r1.value);
83 |     pthread_mutex_unlock(&r1.mtx);
84 | 
85 |     pthread_mutex_lock(&r2.mtx);
86 |     printf("r2.value = %lf\n", r2.value);
87 |     pthread_mutex_unlock(&r2.mtx);
88 |     
89 |     return EXIT_SUCCESS;
90 | }
91 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -Wall -Wextra -pedantic -g
 3 | 
 4 | .SUFFIXES :
 5 | 
 6 | .PHONY : all
 7 | all: 1-No-Sync.x 2-Atomic.x 3-Mutex.x 4-Timed-Lock.x \
 8 | 	 5-Try-Lock.x 6-Busy.x 7-not-Busy.x 8-Deadlock.x \
 9 | 	 9-no-Deadlock-Hierarchy.x A-no-Deadlock-Try-Backoff.x
10 | 
11 | .PHONY : clean
12 | clean :
13 | 	rm -f *.x
14 | 
15 | %.x : %.c timing.h
16 | 	$(CC) $(CFLAGS) -pthread -o $@ $<
17 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/repeat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | for i in $(seq $1)
4 | do
5 | 	echo "------- i = $i -------"
6 | 	$2
7 | done
8 | 


--------------------------------------------------------------------------------
/Concurrency/code/3-Mutex/timing.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <time.h>
 4 | 
 5 | 
 6 | static inline double cputime_ms()
 7 | {
 8 |     struct timespec ts;
 9 |     clock_gettime(CLOCK_MONOTONIC, &ts);
10 |     
11 |     return 1.0e3 * (double)ts.tv_sec + 1.0e-6 * (double)ts.tv_nsec; 
12 | }
13 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/1-no-Condition-Variable.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Adapted from: M. Kerrisk, The Linux Programming Interface.
 3 |  *
 4 |  * https://github.com/prm239/kerrisk/tree/master
 5 |  */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include <unistd.h>
10 | #include <pthread.h>
11 | 
12 | #define DEFAULT_TOTAL 20
13 | 
14 | 
15 | int avail;
16 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
17 | 
18 | 
19 | void* producer(void* tot)
20 | {
21 |     const int total = atoi((char*)tot);
22 |     printf("[PRODUCER]: %d\n", total);
23 | 
24 |     for (int i = 0; i < total; ++i) {
25 |         sleep(1);
26 | 
27 |         pthread_mutex_lock(&mtx);
28 |         ++avail;
29 |         pthread_mutex_unlock(&mtx);
30 |     }
31 | 
32 |     printf("[PRODUCER]: end.\n");
33 | 
34 |     return NULL;
35 | }
36 | 
37 | 
38 | void* consumer(void* tot)
39 | {
40 |     int total = *(int *)tot;
41 |     printf("[CONSUMER]: %d\n", total);
42 |     
43 |     while (total) {
44 |         pthread_mutex_lock(&mtx);
45 |         
46 |         while (avail) {
47 |             --avail;
48 |             --total;
49 |         }
50 | 
51 |         pthread_mutex_unlock(&mtx);
52 |     }
53 | 
54 |     printf("[CONSUMER]: end.\n");
55 | 
56 |     return NULL;
57 | }
58 | 
59 | 
60 | int main(int argc, char* argv[])
61 | {
62 |     int retcode = EXIT_FAILURE;
63 | 
64 |     int to_consume = 0;
65 |     int producers = argc > 1 ? argc - 1 : 0;
66 |     int started_producers = 0;
67 | 
68 |     pthread_t* producers_th = (pthread_t*)malloc(sizeof(pthread_t) * producers);
69 |     if (!producers_th)
70 |         return retcode;
71 | 
72 |     pthread_t consumer_th;
73 | 
74 |     for (int i = 0; i < producers; ++i) {
75 |         to_consume += atoi(argv[i + 1]);
76 |         if (pthread_create(&producers_th[i], NULL, producer, argv[i + 1]))
77 |             goto producers_cleanup;
78 |         ++started_producers;
79 |     }
80 | 
81 |     if (pthread_create(&consumer_th, NULL, consumer, &to_consume))
82 |         goto producers_cleanup;
83 | 
84 |     retcode = EXIT_SUCCESS;
85 | 
86 |     pthread_join(consumer_th, NULL);
87 | 
88 | producers_cleanup:
89 |     for (int i = 0; i < started_producers; ++i)
90 |         pthread_join(producers_th[i], NULL);
91 | 
92 |     free(producers_th);
93 | 
94 |     return retcode;
95 | }
96 | 
97 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/2-Minimal.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | #define DEFAULT_MAIN_SLEEP 3
 7 | #define DEFAULT_THREAD_SLEEP 2
 8 | 
 9 | 
10 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
11 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
12 | 
13 | 
14 | void* threadfunc(void* vsleep_s)
15 | {
16 |     int sleep_s = *((int *)vsleep_s);
17 | 
18 |     printf("Inside threadfunc:\n");
19 |     sleep(sleep_s);
20 |     pthread_mutex_lock(&mtx);
21 | 
22 |     printf("Going to sleep... ");
23 |     fflush(stdout);
24 |     pthread_cond_wait(&cv, &mtx);
25 |     printf("Waken up!\n");
26 | 
27 |     pthread_mutex_unlock(&mtx);
28 | 
29 |     return NULL;
30 | }
31 | 
32 | 
33 | int main(int argc, char* argv[])
34 | {
35 |     int retcode = EXIT_FAILURE;
36 | 
37 |     int main_sleep = DEFAULT_MAIN_SLEEP;
38 |     int thread_sleep = DEFAULT_THREAD_SLEEP;
39 | 
40 |     if (argc > 1)
41 |         main_sleep = atoi(argv[1]);
42 |     if (argc > 2)
43 |         thread_sleep = atoi(argv[2]);
44 | 
45 | 
46 |     pthread_t thread;
47 |     
48 |     if (pthread_create(&thread, NULL, threadfunc, &thread_sleep))
49 |         return retcode;
50 | 
51 |     sleep(main_sleep);
52 |     pthread_cond_signal(&cv);
53 | 
54 |     pthread_join(thread, NULL);
55 | 
56 |     retcode = EXIT_SUCCESS;
57 | 
58 | 
59 |     return retcode;
60 | }
61 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/3-Better.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <stdbool.h>
 4 | #include <unistd.h>
 5 | #include <pthread.h>
 6 | 
 7 | #define DEFAULT_MAIN_SLEEP 3
 8 | #define DEFAULT_THREAD_SLEEP 2
 9 | 
10 | 
11 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
12 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
13 | bool set = false;
14 | 
15 | void* threadfunc(void* vsleep_s)
16 | {
17 |     int sleep_s = *((int *)vsleep_s);
18 | 
19 |     printf("Inside threadfunc:\n");
20 |     sleep(sleep_s);
21 |     pthread_mutex_lock(&mtx);
22 | 
23 |     printf("Going to sleep... ");
24 |     fflush(stdout);
25 | 
26 |     while (!set)
27 |         pthread_cond_wait(&cv, &mtx);
28 | 
29 |     printf("Waken up!\n");
30 | 
31 |     pthread_mutex_unlock(&mtx);
32 | 
33 |     return NULL;
34 | }
35 | 
36 | 
37 | int main(int argc, char* argv[])
38 | {
39 |     int retcode = EXIT_FAILURE;
40 | 
41 |     int main_sleep = DEFAULT_MAIN_SLEEP;
42 |     int thread_sleep = DEFAULT_THREAD_SLEEP;
43 | 
44 |     if (argc > 1)
45 |         main_sleep = atoi(argv[1]);
46 |     if (argc > 2)
47 |         thread_sleep = atoi(argv[2]);
48 | 
49 | 
50 |     pthread_t thread;
51 |     
52 |     if (pthread_create(&thread, NULL, threadfunc, &thread_sleep))
53 |         return retcode;
54 | 
55 |     sleep(main_sleep);
56 |     pthread_mutex_lock(&mtx);
57 |     set = true;
58 |     pthread_mutex_unlock(&mtx);
59 | 
60 |     pthread_cond_signal(&cv);
61 | 
62 |     pthread_join(thread, NULL);
63 | 
64 |     retcode = EXIT_SUCCESS;
65 | 
66 | 
67 |     return retcode;
68 | }
69 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/4-Condition-Variable.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Adapted from: M. Kerrisk, The Linux Programming Interface.
  3 |  *
  4 |  * https://github.com/prm239/kerrisk/tree/master
  5 |  */
  6 | 
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <unistd.h>
 10 | #include <pthread.h>
 11 | 
 12 | #define DEFAULT_TOTAL 20
 13 | 
 14 | 
 15 | int avail;
 16 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
 17 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
 18 | 
 19 | 
 20 | void* producer(void* tot)
 21 | {
 22 |     const int total = atoi((char*)tot);
 23 |     printf("[PRODUCER]: %d\n", total);
 24 | 
 25 |     for (int i = 0; i < total; ++i) {
 26 |         sleep(1);
 27 | 
 28 |         pthread_mutex_lock(&mtx);
 29 |         ++avail;
 30 |         pthread_mutex_unlock(&mtx);
 31 |     }
 32 | 
 33 |     pthread_cond_signal(&cv);
 34 | 
 35 |     printf("[PRODUCER]: end.\n");
 36 | 
 37 |     return NULL;
 38 | }
 39 | 
 40 | 
 41 | void* consumer(void* tot)
 42 | {
 43 |     int total = *(int *)tot;
 44 |     printf("[CONSUMER]: %d\n", total);
 45 |     
 46 |     while (total) {
 47 |         pthread_mutex_lock(&mtx);
 48 |         
 49 |         while (!avail)
 50 |             pthread_cond_wait(&cv, &mtx);
 51 | 
 52 |         while (avail) {
 53 |             --avail;
 54 |             --total;
 55 |         }
 56 | 
 57 |         pthread_mutex_unlock(&mtx);
 58 |     }
 59 | 
 60 |     printf("[CONSUMER]: end.\n");
 61 | 
 62 |     return NULL;
 63 | }
 64 | 
 65 | 
 66 | int main(int argc, char* argv[])
 67 | {
 68 |     int retcode = EXIT_FAILURE;
 69 | 
 70 |     int to_consume = 0;
 71 |     int producers = argc > 1 ? argc - 1 : 0;
 72 |     int started_producers = 0;
 73 | 
 74 |     pthread_t* producers_th = (pthread_t*)malloc(sizeof(pthread_t) * producers);
 75 |     if (!producers_th)
 76 |         return retcode;
 77 | 
 78 |     pthread_t consumer_th;
 79 | 
 80 |     for (int i = 0; i < producers; ++i) {
 81 |         to_consume += atoi(argv[i + 1]);
 82 |         if (pthread_create(&producers_th[i], NULL, producer, argv[i + 1]))
 83 |             goto producers_cleanup;
 84 |         ++started_producers;
 85 |     }
 86 | 
 87 |     if (pthread_create(&consumer_th, NULL, consumer, &to_consume))
 88 |         goto producers_cleanup;
 89 | 
 90 |     retcode = EXIT_SUCCESS;
 91 | 
 92 |     pthread_join(consumer_th, NULL);
 93 | 
 94 | producers_cleanup:
 95 |     for (int i = 0; i < started_producers; ++i)
 96 |         pthread_join(producers_th[i], NULL);
 97 | 
 98 |     free(producers_th);
 99 | 
100 |     return retcode;
101 | }
102 | 
103 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/5-Bad-Signal.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | #define WTHREADS 10
 7 | 
 8 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
 9 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
10 | int order = 0;
11 | 
12 | 
13 | void* threadfunc(void* vorder)
14 | {
15 |     int myorder = *((int*)vorder);
16 | 
17 |     pthread_mutex_lock(&mtx);
18 |     
19 |     while (order != myorder)
20 |         pthread_cond_wait(&cv, &mtx);
21 | 
22 |     printf("Set %d\n", myorder);
23 |     ++order;
24 | 
25 |     pthread_mutex_unlock(&mtx);
26 | 
27 | 
28 |     pthread_cond_signal(&cv);
29 | 
30 |     return NULL;
31 | }
32 | 
33 | 
34 | int main()
35 | {
36 |     int retcode = EXIT_FAILURE;
37 | 
38 |     pthread_t threads[WTHREADS];
39 |     int all_order[WTHREADS];
40 | 
41 |     int running_threads = 0;
42 |     for (int i = 0; i < WTHREADS; ++i) {
43 |         all_order[i] = WTHREADS - i;
44 |         if (pthread_create(&threads[i], NULL, threadfunc, &all_order[i]))
45 |             goto threads_cleanup;
46 |         ++running_threads;
47 |     }
48 | 
49 |     printf("About to start...\n");
50 |     sleep(1);
51 |     pthread_mutex_lock(&mtx);
52 |     ++order;
53 |     pthread_mutex_unlock(&mtx);
54 | 
55 |     pthread_cond_signal(&cv);
56 | 
57 | 
58 |     retcode = EXIT_SUCCESS;
59 | 
60 | threads_cleanup:
61 |     for (int i = 0; i < running_threads; ++i)
62 |         pthread_join(threads[i], NULL);
63 | 
64 |     return retcode;
65 | }
66 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/6-Broadcast.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <pthread.h>
 5 | 
 6 | #define WTHREADS 10
 7 | 
 8 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
 9 | pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
10 | int order = 0;
11 | 
12 | 
13 | void* threadfunc(void* vorder)
14 | {
15 |     int myorder = *((int*)vorder);
16 | 
17 |     pthread_mutex_lock(&mtx);
18 |     
19 |     while (order != myorder)
20 |         pthread_cond_wait(&cv, &mtx);
21 | 
22 |     printf("Set %d\n", myorder);
23 |     ++order;
24 | 
25 |     pthread_mutex_unlock(&mtx);
26 | 
27 | 
28 |     pthread_cond_broadcast(&cv);
29 | 
30 |     return NULL;
31 | }
32 | 
33 | 
34 | int main()
35 | {
36 |     int retcode = EXIT_FAILURE;
37 | 
38 |     pthread_t threads[WTHREADS];
39 |     int all_order[WTHREADS];
40 | 
41 |     int running_threads = 0;
42 |     for (int i = 0; i < WTHREADS; ++i) {
43 |         all_order[i] = WTHREADS - i;
44 |         if (pthread_create(&threads[i], NULL, threadfunc, &all_order[i]))
45 |             goto threads_cleanup;
46 |         ++running_threads;
47 |     }
48 | 
49 |     printf("About to start...\n");
50 |     sleep(1);
51 |     pthread_mutex_lock(&mtx);
52 |     ++order;
53 |     pthread_mutex_unlock(&mtx);
54 | 
55 |     pthread_cond_broadcast(&cv);
56 | 
57 | 
58 |     retcode = EXIT_SUCCESS;
59 | 
60 | threads_cleanup:
61 |     for (int i = 0; i < running_threads; ++i)
62 |         pthread_join(threads[i], NULL);
63 | 
64 |     return retcode;
65 | }
66 | 


--------------------------------------------------------------------------------
/Concurrency/code/4-Condition-Variables/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -Wall -Wextra -pedantic -g
 3 | 
 4 | .SUFFIXES :
 5 | 
 6 | .PHONY : all
 7 | all: 1-no-Condition-Variable.x 2-Minimal.x 3-Better.x 4-Condition-Variable.x \
 8 | 	 5-Bad-Signal.x 6-Broadcast.x
 9 | 
10 | .PHONY : clean
11 | clean :
12 | 	rm -f *.x
13 | 
14 | %.x : %.c 
15 | 	$(CC) $(CFLAGS) -pthread -o $@ $<
16 | 


--------------------------------------------------------------------------------
/Concurrency/code/X-Circular-Buffer/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -Wall -Wextra -pedantic -g
 3 | 
 4 | .SUFFIXES :
 5 | 
 6 | .PHONY : all
 7 | all: libcircbuf.so test.x
 8 | 
 9 | .PHONY : clean
10 | clean :
11 | 	rm -f *.x *.o *.so
12 | 
13 | circbuf.o : circbuf.c
14 | 	$(CC) -c -fPIC $(CFLAGS) -o $@ $<
15 | 
16 | libcircbuf.so : circbuf.o
17 | 	$(CC) -shared -o $@ $<
18 | 
19 | test.x : test.c libcircbuf.so
20 | 	$(CC) $(CFLAGS) -I. -L. '-Wl,-rpath=$$ORIGIN' -lcircbuf -o $@ $<
21 | 


--------------------------------------------------------------------------------
/Concurrency/code/X-Circular-Buffer/circbuf.c:
--------------------------------------------------------------------------------
 1 | #include "circbuf.h"
 2 | 
 3 | #include <string.h>
 4 | 
 5 | 
 6 | inline static void ptr_advance(const struct circbuf* cb,
 7 |                                void** ptr)
 8 | {
 9 |     char* ptr_c = (char*)(*ptr);
10 |     char* const end_buffer = (char*)cb->buffer + cb->capacity * cb->el_size;
11 |     ptr_c += cb->el_size;
12 |     *ptr = ptr_c < end_buffer ? ptr_c : cb->buffer;
13 | }
14 | 
15 | 
16 | void cb_init(struct circbuf* cb, 
17 |              void* buf,
18 |              size_t el_size,
19 |              size_t capacity)
20 | {
21 |     cb->buffer = buf;
22 |     cb->el_size = el_size;
23 |     cb->capacity = capacity;
24 | 
25 |     cb->head = buf;
26 |     cb->tail = buf;
27 |     cb->size = 0;
28 | }
29 | 
30 | void cb_unset(struct circbuf* cb)
31 | {
32 |     cb_init(cb, NULL, 0, 0);
33 | }
34 | 
35 | size_t cb_size(const struct circbuf* cb)
36 | {
37 |     return cb->size;
38 | }
39 | 
40 | size_t cb_capacity(const struct circbuf* cb)
41 | {
42 |     return cb->capacity;
43 | }
44 | 
45 | bool cb_push(struct circbuf* cb, const void* el)
46 | {
47 |     if (cb->size == cb->capacity)
48 |         return false;
49 | 
50 |     memcpy(cb->head, el, cb->el_size);
51 |     ptr_advance(cb, &cb->head);
52 |     ++cb->size;
53 |     return true;
54 | }
55 | 
56 | bool cb_pop(struct circbuf* cb, void* el)
57 | {
58 |     if (!cb->size)
59 |         return 0;
60 | 
61 |     memcpy(el, cb->tail, cb->el_size);
62 |     ptr_advance(cb, &cb->tail);
63 |     --cb->size;
64 |     return true;
65 | }
66 | 
67 | 
68 | #ifdef DEBUG
69 | 
70 | ptrdiff_t cb_head_offset(const struct circbuf* cb)
71 | {
72 |     ptrdiff_t dptr = (char*)cb->head - (char*)cb->buffer;
73 |     return dptr / cb->el_size;
74 | }
75 | 
76 | ptrdiff_t cb_tail_offset(const struct circbuf* cb)
77 | {
78 |     ptrdiff_t dptr = (char*)cb->tail - (char*)cb->buffer;
79 |     return dptr / cb->el_size;
80 | }
81 | 
82 | #endif //DEBUG
83 | 


--------------------------------------------------------------------------------
/Concurrency/code/X-Circular-Buffer/circbuf.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stddef.h>
 4 | #include <stdbool.h>
 5 | 
 6 | struct circbuf {
 7 |     void* buffer;
 8 |     size_t el_size;
 9 |     size_t capacity;
10 | 
11 |     void* head;
12 |     void* tail;
13 |     size_t size;
14 | };
15 | 
16 | 
17 | void cb_init(struct circbuf* cb,
18 |              void* buf,
19 |              size_t el_size,
20 |              size_t capacity);
21 | void cb_unset(struct circbuf* cb);
22 | size_t cb_size(const struct circbuf* cb); 
23 | size_t cb_capacity(const struct circbuf* cb);
24 | bool cb_push(struct circbuf* cb, const void* el);
25 | bool cb_pop(struct circbuf* cb, void* el);
26 | 
27 | #ifdef DEBUG
28 | ptrdiff_t cb_head_offset(const struct circbuf* cb);
29 | ptrdiff_t cb_tail_offset(const struct circbuf* cb);
30 | #endif //DEBUG
31 | 


--------------------------------------------------------------------------------
/Concurrency/code/X-Circular-Buffer/test.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "circbuf.h"
  5 | 
  6 | #define DEFAULT_LEN 10
  7 | #define MULTIPLICITY_FACTOR 10
  8 | 
  9 | #ifdef DEBUG
 10 | #define OFFSET_ENABLED 1
 11 | #else
 12 | #define OFFSET_ENABLED 0
 13 | #endif //DEBUG
 14 | 
 15 | void print_test_result(size_t n, const char* name, bool passed)
 16 | {
 17 |     printf("TEST %2lu: %-50s [%.6s]\n",
 18 |            n,
 19 |            name,
 20 |            passed ? "PASSED" : "FAILED");
 21 | }
 22 | 
 23 | 
 24 | bool test_push_until_full(struct circbuf* cb,
 25 |                           size_t* written,
 26 |                           double* checkbuf)
 27 | {
 28 |     const size_t capacity = cb_capacity(cb);
 29 |     const size_t size = cb_size(cb);
 30 |     const size_t empty = size < capacity ? capacity - size : 0;
 31 |     *written = 0;
 32 | 
 33 |     double el;
 34 |     for (size_t i = 0; i < empty; ++i) {
 35 |         el = i;
 36 |         if (!cb_push(cb, &el)) {
 37 |             return false;
 38 |         } else {
 39 |             checkbuf[*written] = el;
 40 |             ++*written;
 41 |         }
 42 |     }
 43 | 
 44 |     ++el; 
 45 |     if (cb_push(cb, &el))
 46 |         return false;
 47 | 
 48 |     return empty == *written;
 49 | }
 50 | 
 51 | 
 52 | bool test_pop_until_empty(struct circbuf* cb,
 53 |                           const size_t* written,
 54 |                           const double* checkbuf)
 55 | {
 56 |     const size_t size = cb_size(cb);
 57 | 
 58 |     if (*written != size)
 59 |         return false;
 60 | 
 61 |     double el;
 62 |     for (size_t i = 0; i < size; ++i) {
 63 |         if (!cb_pop(cb, &el))
 64 |             return false;
 65 | 
 66 |         if (el != checkbuf[i])
 67 |             return false;
 68 |     }
 69 | 
 70 |     if (cb_pop(cb, &el))
 71 |         return false;
 72 | 
 73 |     return !cb_size(cb);
 74 | }
 75 | 
 76 | 
 77 | bool test_push_then_pop(struct circbuf* cb)
 78 | {
 79 |     const size_t capacity = cb_capacity(cb);
 80 |     const size_t steps = capacity * MULTIPLICITY_FACTOR;
 81 | 
 82 |     double el;
 83 |     while (cb_size(cb))
 84 |         if (!cb_pop(cb, &el))
 85 |             return false;
 86 | 
 87 |     for (size_t i = 0; i < steps; ++i) {
 88 |         el = i;
 89 |         if (!cb_push(cb, &el))
 90 |             return false;
 91 | 
 92 |         if (!cb_pop(cb, &el))
 93 |             return false;
 94 | 
 95 |         if (el != i)
 96 |             return false;
 97 | 
 98 | #ifdef DEBUG
 99 |         if (cb_head_offset(cb) != cb_tail_offset(cb))
100 |             return false;
101 | #endif //DEBUG
102 |     }
103 | 
104 |     return true;
105 | }
106 | 
107 | 
108 | bool test_multiple_push_then_pop(struct circbuf* cb,
109 |                                  size_t batches,
110 |                                  const size_t* push_n,
111 |                                  const double* push_v,
112 |                                  const size_t* pop_n,
113 |                                  bool* inserted)
114 | {
115 |     size_t push_idx = 0;
116 |     size_t pop_idx = 0;
117 | 
118 |     for (size_t batch = 0; batch < batches; ++batch)
119 |     {
120 |         // push
121 |         for (size_t i = 0; i < push_n[batch]; ++i) {
122 |             if (cb_push(cb, &push_v[push_idx]))
123 |                 inserted[push_idx] = true;
124 |             else
125 |                 inserted[push_idx] = false;
126 |         
127 |         ++push_idx;
128 |         }
129 | 
130 |         // pop
131 |         for (size_t i = 0; i < pop_n[batch]; ++i) {
132 |             double el;
133 |             if (!cb_pop(cb, &el))
134 |                 continue;
135 | 
136 |             if (inserted[pop_idx]) {
137 |                 if (el != push_v[pop_idx])
138 |                     return false;
139 |             }
140 | 
141 |             ++pop_idx;
142 |         }
143 |     }
144 | 
145 |     return true;
146 | }
147 | 
148 | 
149 | 
150 | int main(int argc, char* argv[])
151 | {
152 |     int retcode = EXIT_FAILURE;
153 | 
154 |     int len = DEFAULT_LEN;
155 |     if (argc == 2)
156 |         len = atoi(argv[1]);
157 | 
158 |     double* buffer = (double*)malloc(len * sizeof(double));
159 |     if (!buffer) {
160 |         fprintf(stderr, "Impossible to allocate buffer.\n");
161 |         return retcode;
162 |     }
163 | 
164 |     double* checkbuf = (double*)malloc(len * sizeof(double));
165 |     if (!checkbuf) {
166 |         fprintf(stderr, "Impossible to allocate checkbuf.\n");
167 |         goto free_buffer;
168 |     }
169 | 
170 |     size_t written;
171 | 
172 |     retcode = EXIT_SUCCESS;
173 | 
174 |     struct circbuf cb;
175 |     cb_init(&cb, buffer, sizeof(double), len);
176 | 
177 |     printf("circbuf test suite (offset tests %s)\n\n",
178 |             OFFSET_ENABLED ? "ENABLED" : "DISABLED");
179 | 
180 |     {
181 |         // TEST 0: empty at the beginning
182 |         bool passed = cb_size(&cb) == 0;
183 | #ifdef DEBUG
184 |         passed &= cb_head_offset(&cb) == 0;
185 |         passed &= cb_tail_offset(&cb) == 0;
186 | #endif //DEBUG
187 |         print_test_result(0, "empty at the beginning", passed);
188 |     }
189 | 
190 |     {
191 |         // TEST 1: full circbuf from empty
192 |         bool ret_test = test_push_until_full(&cb, &written, checkbuf);
193 |         bool ok_written = written == (size_t)len;
194 |         bool ok_inserted = true;
195 |         for (size_t i = 0; i < written; ++i) {
196 |             if (checkbuf[i] != i) {
197 |                 ok_inserted = false;
198 |                 break;
199 |             }
200 |         }
201 | 
202 |         bool passed = ret_test && ok_written && ok_inserted;
203 | #ifdef DEBUG
204 |         passed &= cb_head_offset(&cb) == 0;
205 |         passed &= cb_tail_offset(&cb) == 0;
206 | #endif //DEBUG
207 |         print_test_result(1, "full circbuf from empty", passed);
208 |     }
209 | 
210 |     {
211 |         // TEST 2: empty circbuf from full
212 |         bool passed = test_pop_until_empty(&cb, &written, checkbuf);
213 | #ifdef DEBUG
214 |         passed &= cb_head_offset(&cb) == 0;
215 |         passed &= cb_head_offset(&cb) == 0;
216 | #endif //DEBUG
217 |         print_test_result(2, "empty circbuf from full", passed);
218 |     }
219 | 
220 |     {
221 |         // TEST 3: push then pop
222 |         bool passed = test_push_then_pop(&cb);
223 |         print_test_result(3, "push then pop", passed);
224 |     }
225 | 
226 |     {
227 |         // TEST 4: multiple push then pop
228 |         size_t batches = 5;
229 |         size_t push_n[] = {3, 4, 5, 0, 2};
230 |         size_t pop_n[] = {3, 3, 3, 3, 3};
231 |         double push_v[] = { 0.0,  1.0,  2.0,  3.0,  4.0,
232 |                             5.0,  6.0,  7.0,  8.0,  9.0,
233 |                            10.0, 11.0, 12.0, 13.0, 14.0};
234 |         bool inserted[15];
235 | 
236 |         bool passed = test_multiple_push_then_pop(&cb,
237 |                                                   batches,
238 |                                                   push_n,
239 |                                                   push_v,
240 |                                                   pop_n,
241 |                                                   inserted);
242 |         print_test_result(4, "multiple push then pop", passed);
243 |     }
244 | 
245 | 
246 |     cb_unset(&cb);
247 |     free(checkbuf);
248 | free_buffer:
249 |     free(buffer);
250 | 
251 |     return retcode;
252 | }
253 | 


--------------------------------------------------------------------------------
/Concurrency/code/Y-Thread-Safe-CB/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -Wall -Wextra -pedantic -g
 3 | 
 4 | .SUFFIXES :
 5 | 
 6 | .PHONY : all
 7 | all: libtscircbuf.so test.x
 8 | 
 9 | .PHONY : clean
10 | clean :
11 | 	rm -f *.x *.o *.so
12 | 
13 | circbuf.o : circbuf.c circbuf.h
14 | 	$(CC) -c -fPIC $(CFLAGS) -o $@ $<
15 | 
16 | tscircbuf.o : tscircbuf.c tscircbuf.h
17 | 	$(CC) -c -fPIC $(CFLAGS) -pthread -o $@ $<
18 | 
19 | libtscircbuf.so : circbuf.o tscircbuf.o
20 | 	$(CC) -pthread -shared -o $@ $^
21 | 
22 | test.x: test.c libtscircbuf.so
23 | 	$(CC) $(CFLAGS) -pthread -I. -L. '-Wl,-rpath=$$ORIGIN' -ltscircbuf -o $@ $<
24 | 


--------------------------------------------------------------------------------
/Concurrency/code/Y-Thread-Safe-CB/circbuf.c:
--------------------------------------------------------------------------------
 1 | #include "circbuf.h"
 2 | 
 3 | #include <string.h>
 4 | 
 5 | 
 6 | inline static void ptr_advance(const struct circbuf* cb,
 7 |                                void** ptr)
 8 | {
 9 |     char* ptr_c = (char*)(*ptr);
10 |     char* const end_buffer = (char*)cb->buffer + cb->capacity * cb->el_size;
11 |     ptr_c += cb->el_size;
12 |     *ptr = ptr_c < end_buffer ? ptr_c : cb->buffer;
13 | }
14 | 
15 | 
16 | void cb_init(struct circbuf* cb, 
17 |              void* buf,
18 |              size_t el_size,
19 |              size_t capacity)
20 | {
21 |     cb->buffer = buf;
22 |     cb->el_size = el_size;
23 |     cb->capacity = capacity;
24 | 
25 |     cb->head = buf;
26 |     cb->tail = buf;
27 |     cb->size = 0;
28 | }
29 | 
30 | void cb_unset(struct circbuf* cb)
31 | {
32 |     cb_init(cb, NULL, 0, 0);
33 | }
34 | 
35 | size_t cb_size(const struct circbuf* cb)
36 | {
37 |     return cb->size;
38 | }
39 | 
40 | size_t cb_capacity(const struct circbuf* cb)
41 | {
42 |     return cb->capacity;
43 | }
44 | 
45 | bool cb_push(struct circbuf* cb, const void* el)
46 | {
47 |     if (cb->size == cb->capacity)
48 |         return false;
49 | 
50 |     memcpy(cb->head, el, cb->el_size);
51 |     ptr_advance(cb, &cb->head);
52 |     ++cb->size;
53 |     return true;
54 | }
55 | 
56 | bool cb_pop(struct circbuf* cb, void* el)
57 | {
58 |     if (!cb->size)
59 |         return 0;
60 | 
61 |     memcpy(el, cb->tail, cb->el_size);
62 |     ptr_advance(cb, &cb->tail);
63 |     --cb->size;
64 |     return true;
65 | }
66 | 
67 | 
68 | #ifdef DEBUG
69 | 
70 | ptrdiff_t cb_head_offset(const struct circbuf* cb)
71 | {
72 |     ptrdiff_t dptr = (char*)cb->head - (char*)cb->buffer;
73 |     return dptr / cb->el_size;
74 | }
75 | 
76 | ptrdiff_t cb_tail_offset(const struct circbuf* cb)
77 | {
78 |     ptrdiff_t dptr = (char*)cb->tail - (char*)cb->buffer;
79 |     return dptr / cb->el_size;
80 | }
81 | 
82 | #endif //DEBUG
83 | 


--------------------------------------------------------------------------------
/Concurrency/code/Y-Thread-Safe-CB/circbuf.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stddef.h>
 4 | #include <stdbool.h>
 5 | 
 6 | struct circbuf {
 7 |     void* buffer;
 8 |     size_t el_size;
 9 |     size_t capacity;
10 | 
11 |     void* head;
12 |     void* tail;
13 |     size_t size;
14 | };
15 | 
16 | 
17 | void cb_init(struct circbuf* cb,
18 |              void* buf,
19 |              size_t el_size,
20 |              size_t capacity);
21 | void cb_unset(struct circbuf* cb);
22 | size_t cb_size(const struct circbuf* cb); 
23 | size_t cb_capacity(const struct circbuf* cb);
24 | bool cb_push(struct circbuf* cb, const void* el);
25 | bool cb_pop(struct circbuf* cb, void* el);
26 | 
27 | #ifdef DEBUG
28 | ptrdiff_t cb_head_offset(const struct circbuf* cb);
29 | ptrdiff_t cb_tail_offset(const struct circbuf* cb);
30 | #endif //DEBUG
31 | 


--------------------------------------------------------------------------------
/Concurrency/code/Y-Thread-Safe-CB/test.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <unistd.h>
  4 | #include <pthread.h>
  5 | 
  6 | #include "tscircbuf.h"
  7 | 
  8 | #define DEFAULT_LEN 1000
  9 | #define DEFAULT_PRODUCERS 4
 10 | #define DEFAULT_CONSUMERS 4
 11 | 
 12 | #define TRUE (void*)true
 13 | #define FALSE (void*)false
 14 | 
 15 | 
 16 | void print_test_result(size_t n, const char* name, bool passed)
 17 | {
 18 |     printf("TEST %2lu: %-50s [%.6s]\n",
 19 |            n,
 20 |            name,
 21 |            passed ? "PASSED" : "FAILED");
 22 | }
 23 | 
 24 | 
 25 | void* producer_1(void* vtscb)
 26 | {
 27 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
 28 |     double el = 7.0;
 29 | 
 30 |     sleep(1);
 31 |     if (!tscb_try_push(tscb, &el))
 32 |         return FALSE;
 33 | 
 34 |     return TRUE;
 35 | }
 36 | 
 37 | 
 38 | void* consumer_1(void* vtscb)
 39 | {
 40 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
 41 |     double el;
 42 | 
 43 |     if (tscb_size(tscb))  // BEWARE: fragile! sleep-safe!
 44 |         return FALSE;
 45 | 
 46 |     if (!tscb_wait_and_pop(tscb, &el))
 47 |         return FALSE;
 48 |     
 49 |     if (el != 7.0)
 50 |         return FALSE;
 51 | 
 52 |     if (tscb_size(tscb))
 53 |         return FALSE;
 54 | 
 55 |     return TRUE;
 56 | }
 57 | 
 58 | 
 59 | void* producer_2(void* vtscb)
 60 | {
 61 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
 62 |     double el = 0.0;
 63 | 
 64 |     for (size_t i = 0; i < DEFAULT_LEN + 1; ++i) {
 65 |         if (!tscb_wait_and_push(tscb, &el)) {
 66 |             return FALSE;
 67 |         }
 68 |         ++el;
 69 |     }
 70 | 
 71 |     return TRUE;
 72 | }
 73 | 
 74 | 
 75 | void* consumer_2(void* vtscb)
 76 | {
 77 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
 78 |     double el;
 79 | 
 80 |     sleep(1);
 81 | 
 82 |     for (size_t i = 0; i < DEFAULT_LEN; ++i) {
 83 |         if (!tscb_try_pop(tscb, &el))
 84 |             return FALSE;
 85 | 
 86 |         if (el != (double)i)
 87 |             return FALSE;
 88 |     }
 89 | 
 90 |     sleep(1);
 91 | 
 92 |     if (!tscb_try_pop(tscb, &el))
 93 |         return FALSE;
 94 | 
 95 |     if (el != (double)DEFAULT_LEN)
 96 |         return FALSE;
 97 | 
 98 |     if (tscb_size(tscb))
 99 |         return FALSE;
100 | 
101 |     return TRUE;
102 | }
103 | 
104 | 
105 | void* producer_3(void* vtscb)
106 | {
107 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
108 |     double el = 0.0;
109 |     
110 |     for (size_t i = 0; i < DEFAULT_LEN / DEFAULT_PRODUCERS; ++i) {
111 |         if (!tscb_try_push(tscb, &el)) {
112 |             printf("producer_3 fail: %ld\n", i);
113 |             return FALSE;
114 |         }
115 |     }
116 | 
117 |     return TRUE;
118 | }
119 | 
120 | 
121 | void* consumer_3(void* vtscb)
122 | {
123 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
124 |     double el;
125 | 
126 |     for (size_t i = 0; i < DEFAULT_LEN / DEFAULT_CONSUMERS; ++i) {
127 |         if (!tscb_wait_and_pop(tscb, &el)) {
128 |             printf("consumer_3 fail: %ld\n", i);
129 |             return FALSE;
130 |         }
131 |     }
132 | 
133 |     return TRUE;
134 | }
135 | 
136 | 
137 | void* producer_4(void* vtscb)
138 | {
139 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
140 | 
141 |     sleep(5);
142 |     tscb_abort_wait(tscb);
143 | 
144 |     return TRUE;
145 | }
146 | 
147 | 
148 | void* consumer_4(void* vtscb)
149 | {
150 |     struct tscircbuf* tscb = (struct tscircbuf*)vtscb;
151 |     double el;
152 | 
153 |     tscb_wait_and_pop(tscb, &el);
154 | 
155 |     return TRUE;
156 | }
157 | 
158 | 
159 | int main(int argc, char* argv[])
160 | {
161 |     int retcode = EXIT_FAILURE;
162 | 
163 |     int len = DEFAULT_LEN;
164 |     if (argc == 2)
165 |         len = atoi(argv[1]);
166 |     
167 | 
168 |     double* buffer = (double*)malloc(len * sizeof(double));
169 |     if (!buffer) {
170 |         fprintf(stderr, "Impossible to allocate buffer.\n");
171 |         return retcode;
172 |     }
173 | 
174 |     struct tscircbuf tscb;
175 |     if (!tscb_init(&tscb, buffer, sizeof(double), len))
176 |         goto free_buffer;
177 | 
178 | 
179 |     pthread_t producers[DEFAULT_PRODUCERS], consumers[DEFAULT_CONSUMERS];
180 |     
181 |     pthread_create(&producers[0], NULL, producer_1, (void*)&tscb);
182 |     pthread_create(&consumers[0], NULL, consumer_1, (void*)&tscb);
183 | 
184 |     void *ret_consumer, *ret_producer;
185 |     pthread_join(consumers[0], &ret_consumer);
186 |     pthread_join(producers[0], &ret_producer);
187 | 
188 |     bool passed = (bool)ret_producer & (bool)ret_consumer;
189 |     print_test_result(1, "producer: try, consumer: wait", passed);
190 | 
191 | 
192 |     pthread_create(&producers[0], NULL, producer_2, (void*)&tscb);
193 |     pthread_create(&consumers[0], NULL, consumer_2, (void*)&tscb);
194 | 
195 |     pthread_join(consumers[0], &ret_consumer);
196 |     pthread_join(producers[0], &ret_producer);
197 | 
198 |     passed = (bool)ret_producer & (bool)ret_consumer;
199 |     print_test_result(2, "producer: wait, consumer: try", passed);
200 | 
201 |     
202 |     for (size_t i = 0; i < DEFAULT_PRODUCERS; ++i)
203 |         pthread_create(&producers[i], NULL, producer_3, (void*)&tscb);
204 | 
205 |     passed = true;
206 |     for (size_t i = 0; i < DEFAULT_PRODUCERS; ++i) {
207 |         pthread_join(producers[i], &ret_producer);
208 |         passed &= (bool)ret_producer;
209 |     }
210 | 
211 |     size_t expected_size = DEFAULT_LEN / DEFAULT_PRODUCERS * DEFAULT_PRODUCERS;
212 |     passed &= (tscb_size(&tscb) == expected_size);
213 | 
214 |     for (size_t i = 0; i < DEFAULT_CONSUMERS; ++i) {
215 |         pthread_create(&consumers[i], NULL, consumer_3, (void*)&tscb);
216 |     }
217 | 
218 |     for (size_t i = 0; i < DEFAULT_CONSUMERS; ++i) {
219 |         pthread_join(consumers[i], &ret_consumer);
220 |         passed &= (bool)ret_consumer;
221 |     }
222 | 
223 |     expected_size -= DEFAULT_LEN / DEFAULT_CONSUMERS * DEFAULT_CONSUMERS;
224 |     size_t actual_size = tscb_size(&tscb);
225 |     passed &= (actual_size == expected_size);
226 | 
227 |     print_test_result(3, "multiple producers: try, multiple consumers: wait", passed);
228 | 
229 |     for (size_t i = 0; i < actual_size; ++i) {
230 |         double el;
231 |         if (!tscb_try_pop(&tscb, &el))
232 |             break;
233 |     }
234 | 
235 |     passed = tscb_size(&tscb) == 0;
236 | 
237 |     print_test_result(4, "empty buffer", passed);
238 | 
239 |     
240 |     pthread_create(&producers[0], NULL, producer_4, (void*)&tscb);
241 |     pthread_create(&consumers[0], NULL, consumer_4, (void*)&tscb);
242 |     pthread_create(&consumers[1], NULL, consumer_4, (void*)&tscb);
243 | 
244 |     pthread_join(consumers[1], &ret_consumer);
245 |     passed = (bool)ret_consumer;
246 |     pthread_join(consumers[0], &ret_consumer);
247 |     passed &= (bool)ret_consumer;
248 |     pthread_join(producers[0], &ret_producer);
249 |     passed &= (bool)ret_producer;
250 | 
251 |     print_test_result(5, "abort wait", passed);
252 | 
253 | 
254 |     tscb_unset(&tscb);
255 | 
256 | free_buffer:
257 |     free(buffer);
258 | 
259 |     return retcode;
260 | }
261 | 


--------------------------------------------------------------------------------
/Concurrency/code/Y-Thread-Safe-CB/tscircbuf.c:
--------------------------------------------------------------------------------
  1 | #include "tscircbuf.h"
  2 | 
  3 | 
  4 | bool tscb_init(struct tscircbuf* tscb,
  5 |                void* buf,
  6 |                size_t el_size,
  7 |                size_t capacity)
  8 | {
  9 |     cb_init(&tscb->cb, buf, el_size, capacity);
 10 |     
 11 |     if (pthread_mutex_init(&tscb->mtx, NULL))
 12 |         return false;
 13 | 
 14 |     if (pthread_cond_init(&tscb->cv, NULL))
 15 |         return false;
 16 | 
 17 |     tscb->abort = false;
 18 | 
 19 |     return true;
 20 | }
 21 | 
 22 | 
 23 | bool tscb_unset(struct tscircbuf* tscb)
 24 | {
 25 |     if (pthread_cond_destroy(&tscb->cv))
 26 |         return false;
 27 | 
 28 |     if (pthread_mutex_destroy(&tscb->mtx))
 29 |         return false;
 30 | 
 31 |     tscb->abort = true;
 32 | 
 33 |     cb_unset(&tscb->cb);
 34 | 
 35 |     return true;
 36 | }
 37 | 
 38 | 
 39 | size_t tscb_size(struct tscircbuf* tscb)
 40 | {
 41 |     pthread_mutex_lock(&tscb->mtx);
 42 |     size_t size = cb_size(&tscb->cb);
 43 |     pthread_mutex_unlock(&tscb->mtx);
 44 | 
 45 |     return size;
 46 | }
 47 | 
 48 | 
 49 | size_t tscb_capacity(const struct tscircbuf* tscb)
 50 | {
 51 |     return cb_capacity(&tscb->cb);
 52 | }
 53 | 
 54 | 
 55 | bool tscb_try_push(struct tscircbuf* tscb, const void* el)
 56 | {
 57 |     pthread_mutex_lock(&tscb->mtx);
 58 | 
 59 |     if (cb_size(&tscb->cb) == cb_capacity(&tscb->cb)) {
 60 |         pthread_mutex_unlock(&tscb->mtx);
 61 |         return false;
 62 |     }
 63 | 
 64 |     bool retval = cb_push(&tscb->cb, el);
 65 |     pthread_mutex_unlock(&tscb->mtx);
 66 |     pthread_cond_broadcast(&tscb->cv);
 67 | 
 68 |     return retval;
 69 | }
 70 | 
 71 | 
 72 | bool tscb_wait_and_push(struct tscircbuf* tscb, const void* el)
 73 | {
 74 |     pthread_mutex_lock(&tscb->mtx);
 75 |     while (cb_size(&tscb->cb) == cb_capacity(&tscb->cb) && !tscb->abort)
 76 |         pthread_cond_wait(&tscb->cv, &tscb->mtx);
 77 | 
 78 |     if (tscb->abort) {
 79 |         pthread_mutex_unlock(&tscb->mtx);
 80 |         return false;
 81 |     }
 82 | 
 83 |     bool retval = cb_push(&tscb->cb, el);
 84 |     pthread_mutex_unlock(&tscb->mtx);
 85 |     pthread_cond_broadcast(&tscb->cv);
 86 | 
 87 |     return retval;
 88 | }
 89 | 
 90 | 
 91 | bool tscb_try_pop(struct tscircbuf* tscb, void* el)
 92 | {
 93 |     pthread_mutex_lock(&tscb->mtx);
 94 | 
 95 |     if (cb_size(&tscb->cb) == 0) {
 96 |         pthread_mutex_unlock(&tscb->mtx);
 97 |         return false;
 98 |     }
 99 | 
100 |     bool retval = cb_pop(&tscb->cb, el);
101 |     pthread_mutex_unlock(&tscb->mtx);
102 |     pthread_cond_broadcast(&tscb->cv);
103 | 
104 |     return retval;
105 | }
106 | 
107 | 
108 | bool tscb_wait_and_pop(struct tscircbuf* tscb, void* el)
109 | {
110 |     pthread_mutex_lock(&tscb->mtx);
111 |     while (cb_size(&tscb->cb) == 0 && !tscb->abort)
112 |         pthread_cond_wait(&tscb->cv, &tscb->mtx);
113 | 
114 |     if (tscb->abort) {
115 |         pthread_mutex_unlock(&tscb->mtx);
116 |         return false;
117 |     }
118 | 
119 |     bool retval = cb_pop(&tscb->cb, el);
120 |     pthread_mutex_unlock(&tscb->mtx);
121 |     pthread_cond_broadcast(&tscb->cv);
122 | 
123 |     return retval;
124 | }
125 | 
126 | 
127 | void tscb_abort_wait(struct tscircbuf* tscb)
128 | {
129 |     pthread_mutex_lock(&tscb->mtx);
130 |     tscb->abort = true;
131 |     pthread_mutex_unlock(&tscb->mtx);
132 | 
133 |     pthread_cond_broadcast(&tscb->cv);
134 | }
135 | 
136 | 
137 | void tscb_reset_abort(struct tscircbuf* tscb)
138 | {
139 |     pthread_mutex_lock(&tscb->mtx);
140 |     tscb->abort = false;
141 |     pthread_mutex_unlock(&tscb->mtx);
142 | 
143 |     pthread_cond_broadcast(&tscb->cv);
144 | }
145 | 
146 | 


--------------------------------------------------------------------------------
/Concurrency/code/Y-Thread-Safe-CB/tscircbuf.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "circbuf.h"
 4 | #include <pthread.h>
 5 | 
 6 | 
 7 | struct tscircbuf {
 8 |     struct circbuf cb;
 9 |     pthread_mutex_t mtx;
10 |     pthread_cond_t cv;
11 |     bool abort;
12 | };
13 | 
14 | bool tscb_init(struct tscircbuf* tscb,
15 |                void* buf,
16 |                size_t el_size,
17 |                size_t capacity);
18 | bool tscb_unset(struct tscircbuf* tscb);
19 | size_t tscb_size(struct tscircbuf* tscb);
20 | size_t tscb_capacity(const struct tscircbuf* tscb);
21 | bool tscb_try_push(struct tscircbuf* tscb, const void* el);
22 | bool tscb_wait_and_push(struct tscircbuf* tscb, const void* el);
23 | bool tscb_try_pop(struct tscircbuf* tscb, void* el);
24 | bool tscb_wait_and_pop(struct tscircbuf* tscb, void* el);
25 | void tscb_abort_wait(struct tscircbuf* tscb);
26 | void tscb_reset_abort(struct tscircbuf* tscb);
27 | 


--------------------------------------------------------------------------------
/Concurrency/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/GPU/DSSC-EXAME-README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/DSSC-EXAME-README.pdf


--------------------------------------------------------------------------------
/GPU/Jacobi-project/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/.DS_Store


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/background.html:
--------------------------------------------------------------------------------
 1 | <h2 id="background">Background</h2>
 2 | <p>The algorithm used in this program solves Laplace’s equation on an evenly spaced grid through the use of a simple Jacobi iteration technique. The equation is a partial differential equation named after Pierre-Simon Laplace and are important in many fields of science: namely the fields of electromagnetism, astronomy and fluid dynamics. The physical interpretation of the equations is they describe the behavior of potentials.</p>
 3 | <p>The equation has the form: <img src="./jacobiEq1.jpg" alt="Equation 1" /></p>
 4 | <p>A practical solution to this equation is the use of a Jacobi iteration that employs numerical second derivatives. Lets assume that we would like to know the state of heat flow across a metal surface where the source is coming from one of the corners of the square surface.</p>
 5 | <p>To tackle this, we would set up a two dimensional grid to represent the surface, and we will divide it evenly into square regions. We can simulate the heat source by defining the boundary conditions along the sides of the grid. In this case, we will be setting the bottom left corner to 100.0 and with an increasing gradient toward the other corners until it is zero. Once these conditions are set, the algorithm will use numerical solutions to the second derivatives in each direction to update the current matrix elements. And although we won’t check for convergence, the flow of the surface will eventually hit a steady-state.</p>
 6 | <h3 id="the-algorithm">The Algorithm</h3>
 7 | <p>Following is a high-level description of the algorithm you will be implementing. Figure 1 shows a diagram of the grid that will result from the algorithm.</p>
 8 | <ol type="1">
 9 | <li><p>Allocate and specify a 2D array defining an evenly spaced grid of square dimension. You will need to leave space for the boundaries, as they do not belong to the main grid (i.e. a 1024 x 1024 matrix would need to be allocated as 1026x1026 to leave room for the borders.</p></li>
10 | <li><p>Setup the initial constant boundary conditions. The value at the lower left hand corner of the of the grid will be fixed at 100.00, and the value ascending and to the right will be set to a linear gradient reaching zero at the opposite corners (see Figure 1). The rest of the borders will be fixed at zero. Please note, these boundaries will remain constant throughout the simulation.</p></li>
11 | <li><p>Setup the initial condition of the inner grid elements as 0.5.</p></li>
12 | <li><p>Begin and continue for a fixed number of cycles the iterative process. At each iteration, the value of each inner matrix element needs to be recomputed from elements of the current iteration. The updating formula, based on numerical computation of second derivatives, is:</p>
13 | <figure>
14 | <img src="eqn.PNG" alt="Equation 2" /><figcaption>Equation 2</figcaption>
15 | </figure></li>
16 | <li><p>After updating, copy the new matrix into the old’s memory and continue iterations until completion.</p></li>
17 | </ol>
18 | <figure>
19 | <img src="jacobiFigure1.jpg" alt="Figure1" /><figcaption>Figure1</figcaption>
20 | </figure>
21 | 


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/background.md:
--------------------------------------------------------------------------------
 1 | ## Background
 2 | 
 3 | The algorithm used in this program solves Laplace’s equation on an
 4 | evenly spaced grid through the use of a simple Jacobi iteration
 5 | technique. The equation is a partial differential equation named after
 6 | Pierre-Simon Laplace and are important in many fields of science: namely
 7 | the fields of electromagnetism, astronomy and fluid dynamics. The
 8 | physical interpretation of the equations is they describe the behavior
 9 | of potentials.
10 | 
11 | The equation has the form:
12 | ![Equation 1](./jacobiEq1.jpg)
13 | 
14 | A practical solution to this equation is the use of a Jacobi iteration
15 | that employs numerical second derivatives. Lets assume that we would
16 | like to know the state of heat flow across a metal surface where the
17 | source is coming from one of the corners of the square surface.
18 | 
19 | To tackle this, we would set up a two dimensional grid to represent the
20 | surface, and we will divide it evenly into square regions. We can
21 | simulate the heat source by defining the boundary conditions along the
22 | sides of the grid. In this case, we will be setting the bottom left
23 | corner to 100.0 and with an increasing gradient toward the other corners
24 | until it is zero. Once these conditions are set, the algorithm will use
25 | numerical solutions to the second derivatives in each direction to
26 | update the current matrix elements. And although we won’t check for
27 | convergence, the flow of the surface will eventually hit a
28 | steady-state.
29 | 
30 | 
31 | ### The Algorithm
32 | 
33 | Following is a high-level description of the algorithm you will be
34 | implementing. Figure 1 shows a diagram of the grid that will result from
35 | the algorithm.
36 | 
37 | 1.  Allocate and specify a 2D array defining an evenly spaced grid of
38 |     square dimension. You will need to leave space for the boundaries,
39 |     as they do not belong to the main grid (i.e. a 1024 x 1024 matrix
40 |     would need to be allocated as 1026x1026 to leave room for the
41 |     borders.
42 |     
43 | 2.  Setup the initial constant boundary conditions. The value at the
44 |     lower left hand corner of the of the grid will be fixed at 100.00,
45 |     and the value ascending and to the right will be set to a linear
46 |     gradient reaching zero at the opposite corners (see Figure 1). The
47 |     rest of the borders will be fixed at zero. Please note, these
48 |     boundaries will remain constant throughout the simulation.
49 |     
50 | 3.  Setup the initial condition of the inner grid elements as 0.5.
51 |     
52 | 4.  Begin and continue for a fixed number of cycles the iterative
53 |     process. At each iteration, the value of each inner matrix element
54 |     needs to be recomputed from elements of the current iteration. The
55 |     updating formula, based on numerical computation of second
56 |     derivatives, is:
57 |     
58 |     ![Equation 2](eqn.PNG)
59 |     
60 | 5.  After updating, copy the new matrix into the old's memory and
61 |     continue iterations until completion.
62 | 
63 | ![Figure1](jacobiFigure1.jpg)
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/eqn.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/eqn.PNG


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/hints.html:
--------------------------------------------------------------------------------
 1 | <h2 id="mpi---1d-decomposition">MPI - 1D Decomposition</h2>
 2 | <h3 id="assignments">Assignments</h3>
 3 | <p>The parameters of the algorithm are such:</p>
 4 | <ol type="1">
 5 | <li><p>The grid matrix must be completely distributed, no replicating the matrix on all processors. In this exercise, only use a 1 dimensional decomposition (see <a href="#Figure_2">Figure 2</a>).</p>
 6 | <figure>
 7 | <img src="jacobiFigure2.jpg" alt="Figure 2" /><figcaption>Figure 2</figcaption>
 8 | </figure></li>
 9 | <li><p>The whole process must be parallel, that includes initialization of the grid and boundary conditions, the iterative evolution and the final dump on file of the resulting grid.</p></li>
10 | <li><p>Implement an efficient data exchange between processes.</p></li>
11 | <li><p>Handle dimensions even if not multiple of the number of processes (optional for merit).</p></li>
12 | </ol>
13 | <p>Here is a guideline for the process that parallel programmers use to do this:</p>
14 | <ol type="1">
15 | <li><p>Study the serial algorithm and see where parallelism can be exploited. Also think about how the data can be divided. Best way to do this is on a piece of paper, drawing out the layout conceptually before you even touch the code.</p></li>
16 | <li><p>Still on paper, figure out how this conceptualization moves to being expressed in the parallel programming language you want to use. What MPI calls do you need to use? Which processors will be doing what work? STILL ON PAPER.</p></li>
17 | <li><p>Now begin programming the algorithm up in MPI.</p></li>
18 | <li><p>Test the program on a small matrix and processor count to make sure it is doing what you expect it to do.</p></li>
19 | <li><p>Once you are satisfied it works, scale it up.</p></li>
20 | </ol>
21 | <p>With this in mind, go through this process to implement a 1-D decomposition of the Jacobi iteration algorithm.</p>
22 | <h3 id="tips">Tips</h3>
23 | <ul>
24 | <li><p>To set up the initial matrix, you will need to figure out which values go in what chunk of the distributed matrix. Think carefully about the data that each parallel chunk of work needs to have on it.</p></li>
25 | <li><p>Notice the value of each matrix element depends on the adjacent elements from the previous matrix. In the distributed matrix, this has consequences for the boundary elements, in that if you straightforwardly divide the matrix up by rows, elements that are needed to compute a matrix element will reside on a different processor. Think carefully about how to allocate the piece of the matrix on the current processor, and what communication needs to be performed before computing the matrix elements. <a href="#Figure_2">Figure 2</a>. is an illustration of one communication patter that can be used.</p></li>
26 | <li><p>It is requested to write a function that will print the distributed matrix, so that you have the ability to check to see if things are going the way you want.</p></li>
27 | <li><p>To perform a data exchange with a “dummy” process you can use <a href="http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node53.html">MPI_PROC_NULL</a></p></li>
28 | <li><p>A reference of MPI routines can be found at: <a href="http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node182.html" class="uri">http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node182.html</a></p></li>
29 | </ul>
30 | 


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/hints.md:
--------------------------------------------------------------------------------
 1 | ## MPI - 1D Decomposition
 2 | 
 3 | ### Assignments
 4 | 
 5 | The parameters of the algorithm are such:
 6 | 
 7 | 
 8 | 1.  The grid matrix must be completely distributed, no replicating the
 9 |     matrix on all processors. In this exercise, only use a 1 dimensional decomposition (see
10 |     [Figure 2](#Figure_2)).
11 |     
12 |     ![Figure 2](jacobiFigure2.jpg)
13 |     
14 | 2.  The whole process must be parallel, that includes initialization of
15 |     the grid and boundary conditions, the iterative evolution and the final dump on file of the resulting grid. 
16 |     
17 | 3.  Implement an efficient data exchange between processes.
18 |     
19 | 4.  Handle dimensions even if not multiple of the number of processes.
20 | 
21 | Here is a guideline for the process that parallel programmers use to do
22 | this:
23 | 
24 | 1.  Study the serial algorithm and see where parallelism can be
25 |     exploited. Also think about how the data can be divided. Best way
26 |     to do this is on a piece of paper, drawing out the layout
27 |     conceptually before you even touch the code.
28 |     
29 | 2.  Still on paper, figure out how this conceptualization moves to being
30 |     expressed in the parallel programming language you want to use.
31 |     What MPI calls do you need to use? Which processors will be doing
32 |     what work? STILL ON PAPER.
33 |     
34 | 3.  Now begin programming the algorithm up in MPI.
35 |     
36 | 4.  Test the program on a small matrix and processor count to make sure
37 |     it is doing what you expect it to do.
38 |     
39 | 5.  Once you are satisfied it works, scale it up.
40 | 
41 | With this in mind, go through this process to implement a 1-D
42 | decomposition of the Jacobi iteration algorithm.
43 | 
44 | 
45 | ### Tips
46 | 
47 | -   To set up the initial matrix, you will need to figure out which
48 |     values go in what chunk of the distributed matrix. Think carefully
49 |     about the data that each parallel chunk of work needs to have on
50 |     it.
51 |     
52 | -   Notice the value of each matrix element depends on the adjacent
53 |     elements from the previous matrix. In the distributed matrix, this
54 |     has consequences for the boundary elements, in that if you
55 |     straightforwardly divide the matrix up by rows, elements that are
56 |     needed to compute a matrix element will reside on a different
57 |     processor. Think carefully about how to allocate the piece of the
58 |     matrix on the current processor, and what communication needs to be
59 |     performed before computing the matrix elements. [Figure
60 |     2](#Figure_2). is an illustration of one communication patter that
61 |     can be used.
62 |     
63 |     
64 | -   It is requested to write a function that will print the
65 |     distributed matrix, so that you have the ability to check to see
66 |     if things are going the way you want.
67 | 
68 | -   To perform a data exchange with a “dummy” process you can use
69 |     [MPI_PROC_NULL](http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node53.html)
70 | 
71 | -   A reference of MPI routines can be found at:
72 |     <http://mpi-forum.org/docs/mpi-1.1/mpi-11-html/node182.html>
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/jacobiEq1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/jacobiEq1.jpg


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/jacobiFigure1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/jacobiFigure1.jpg


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/jacobiFigure2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/jacobiFigure2.jpg


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/ref2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/ref2.png


--------------------------------------------------------------------------------
/GPU/Jacobi-project/aux/ref_Init.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/GPU/Jacobi-project/aux/ref_Init.png


--------------------------------------------------------------------------------
/GPU/Jacobi-project/code/Makefile:
--------------------------------------------------------------------------------
 1 | # inputs for the executables
 2 | 
 3 | dim = 10
 4 | it  = 100
 5 | r = 3
 6 | c = 3
 7 | 
 8 | CC=cc
 9 | CFLAGS=-O3
10 | 
11 | SRCS=$(wildcard *.c)
12 | EXE=$(SRCS:.c=.x)
13 | 
14 | all: $(EXE)
15 | 
16 | run: clean all
17 | 	./$(EXE) $(dim) $(it) $(r) $(c)
18 | 
19 | %.x: %.c
20 | 	$(CC) $< $(CFLAGS) -o $@
21 | 
22 | clean:
23 | 	@rm -f *~ $(EXE) solution.dat 
24 | 
25 | plot:
26 | 	@gnuplot -p plot.plt
27 | 
28 | .PHONY: clean plot all
29 | 
30 | 


--------------------------------------------------------------------------------
/GPU/Jacobi-project/code/jacobi.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | #include <time.h>
  5 | #include <sys/time.h>
  6 | 
  7 | /*** function declarations ***/
  8 | 
  9 | // save matrix to file
 10 | void save_gnuplot( double *M, size_t dim );
 11 | 
 12 | // evolve Jacobi
 13 | void evolve( double * matrix, double *matrix_new, size_t dimension );
 14 | 
 15 | // return the elapsed time
 16 | double seconds( void );
 17 | 
 18 | /*** end function declaration ***/
 19 | 
 20 | int main(int argc, char* argv[]){
 21 | 
 22 |   // timing variables
 23 |   double t_start, t_end, increment;
 24 | 
 25 |   // indexes for loops
 26 |   size_t i, j, it;
 27 |   
 28 |   // initialize matrix
 29 |   double *matrix, *matrix_new, *tmp_matrix;
 30 | 
 31 |   size_t dimension = 0, iterations = 0, row_peek = 0, col_peek = 0;
 32 |   size_t byte_dimension = 0;
 33 | 
 34 |   // check on input parameters
 35 |   if(argc != 5) {
 36 |     fprintf(stderr,"\nwrong number of arguments. Usage: ./a.out dim it n m\n");
 37 |     return 1;
 38 |   }
 39 | 
 40 |   dimension = atoi(argv[1]);
 41 |   iterations = atoi(argv[2]);
 42 |   row_peek = atoi(argv[3]);
 43 |   col_peek = atoi(argv[4]);
 44 | 
 45 |   printf("matrix size = %zu\n", dimension);
 46 |   printf("number of iterations = %zu\n", iterations);
 47 |   printf("element for checking = Mat[%zu,%zu]\n",row_peek, col_peek);
 48 | 
 49 |   if((row_peek > dimension) || (col_peek > dimension)){
 50 |     fprintf(stderr, "Cannot Peek a matrix element outside of the matrix dimension\n");
 51 |     fprintf(stderr, "Arguments n and m must be smaller than %zu\n", dimension);
 52 |     return 1;
 53 |   }
 54 | 
 55 | 
 56 |   byte_dimension = sizeof(double) * ( dimension + 2 ) * ( dimension + 2 );
 57 |   matrix = ( double* )malloc( byte_dimension );
 58 |   matrix_new = ( double* )malloc( byte_dimension );
 59 | 
 60 |   memset( matrix, 0, byte_dimension );
 61 |   memset( matrix_new, 0, byte_dimension );
 62 | 
 63 |   //fill initial values  
 64 |   for( i = 1; i <= dimension; ++i )
 65 |     for( j = 1; j <= dimension; ++j )
 66 |       matrix[ ( i * ( dimension + 2 ) ) + j ] = 0.5;
 67 | 	      
 68 |   // set up borders 
 69 |   increment = 100.0 / ( dimension + 1 );
 70 |   
 71 |   for( i=1; i <= dimension+1; ++i ){
 72 |     matrix[ i * ( dimension + 2 ) ] = i * increment;
 73 |     matrix[ ( ( dimension + 1 ) * ( dimension + 2 ) ) + ( dimension + 1 - i ) ] = i * increment;
 74 |     matrix_new[ i * ( dimension + 2 ) ] = i * increment;
 75 |     matrix_new[ ( ( dimension + 1 ) * ( dimension + 2 ) ) + ( dimension + 1 - i ) ] = i * increment;
 76 |   }
 77 |   
 78 |   // start algorithm
 79 |   t_start = seconds();
 80 |   for( it = 0; it < iterations; ++it ){
 81 |     
 82 |     evolve( matrix, matrix_new, dimension );
 83 | 
 84 |     // swap the pointers
 85 |     tmp_matrix = matrix;
 86 |     matrix = matrix_new;
 87 |     matrix_new = tmp_matrix;
 88 | 
 89 |   }
 90 |   t_end = seconds();
 91 |   
 92 |   printf( "\nelapsed time = %f seconds\n", t_end - t_start );
 93 |   printf( "\nmatrix[%zu,%zu] = %f\n", row_peek, col_peek, matrix[ ( row_peek + 1 ) * ( dimension + 2 ) + ( col_peek + 1 ) ] );
 94 | 
 95 |   save_gnuplot( matrix, dimension );
 96 |   
 97 |   free( matrix );
 98 |   free( matrix_new );
 99 | 
100 |   return 0;
101 | }
102 | 
103 | void evolve( double * matrix, double *matrix_new, size_t dimension ){
104 |   
105 |   size_t i , j;
106 | 
107 |   //This will be a row dominant program.
108 |   for( i = 1 ; i <= dimension; ++i )
109 |     for( j = 1; j <= dimension; ++j )
110 |       matrix_new[ ( i * ( dimension + 2 ) ) + j ] = ( 0.25 ) * 
111 | 	( matrix[ ( ( i - 1 ) * ( dimension + 2 ) ) + j ] + 
112 | 	  matrix[ ( i * ( dimension + 2 ) ) + ( j + 1 ) ] + 	  
113 | 	  matrix[ ( ( i + 1 ) * ( dimension + 2 ) ) + j ] + 
114 | 	  matrix[ ( i * ( dimension + 2 ) ) + ( j - 1 ) ] ); 
115 | }
116 | 
117 | void save_gnuplot( double *M, size_t dimension ){
118 |   
119 |   size_t i , j;
120 |   const double h = 0.1;
121 |   FILE *file;
122 | 
123 |   file = fopen( "solution.dat", "w" );
124 | 
125 |   for( i = 0; i < dimension + 2; ++i )
126 |     for( j = 0; j < dimension + 2; ++j )
127 |       fprintf(file, "%f\t%f\t%f\n", h * j, -h * i, M[ ( i * ( dimension + 2 ) ) + j ] );
128 | 
129 |   fclose( file );
130 | 
131 | }
132 | 
133 | // A Simple timer for measuring the walltime
134 | double seconds(){
135 | 
136 |     struct timeval tmp;
137 |     double sec;
138 |     gettimeofday( &tmp, (struct timezone *)0 );
139 |     sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
140 |     return sec;
141 | }
142 | 
143 | 


--------------------------------------------------------------------------------
/GPU/Jacobi-project/code/plot.plt:
--------------------------------------------------------------------------------
1 | unset colorbox
2 | set palette rgb 33,13,10
3 | set size square
4 | plot 'solution.dat' with image


--------------------------------------------------------------------------------
/GPU/Jacobi-project/readme.md:
--------------------------------------------------------------------------------
 1 | # Laplace Equation by Jacobi method
 2 | ## Background
 3 | 
 4 | Please refer to [background](./aux/background.md)
 5 | 
 6 | ## Exercises
 7 | 1. Parallelize and optimize your C++ Jacobi code version following
 8 |    [**these assignments**](./aux/hints.md)
 9 | 
10 | 2. Perform a performance analysis of the code scaling, provide
11 |    scalability charts and a brief explanation (matrix size 1200 and
12 |    12000, 10 iterations)
13 | 
14 | 
15 | ## Reference result (matrix size 60, 2000 iterations)
16 | ### Initial distribution of temperature
17 | 
18 | <img src="./aux/ref_Init.png" alt="Drawing" style="width: 800px;"/>
19 | 
20 | ### Final distribution of temperature (after 2000 iterations)
21 | 
22 | <img src="./aux/ref2.png" alt="Drawing" style="width: 1000px;"/>
23 | 


--------------------------------------------------------------------------------
/GPU/par_transp/main.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <mpi.h>
  4 | #include <openacc.h>
  5 | 
  6 | #ifdef DEBUG
  7 | #define SIZE 8
  8 | #else
  9 | #define SIZE 32000
 10 | #endif
 11 | 
 12 | void print_loc( double * mat, int loc_size ){
 13 |   
 14 |   int i, j;
 15 |   for( i = 0; i < loc_size; i++ ){
 16 |     for( j = 0; j < SIZE; j++ ){
 17 |       fprintf( stdout, "%.3g ", mat[ j + ( i * SIZE ) ] );
 18 |     }
 19 |     fprintf( stdout, "\n" );
 20 |     
 21 |   }
 22 | }
 23 | 
 24 | void print_par( double * mat, int loc_size, int rank, int npes ){
 25 |   
 26 |   int count;
 27 |   
 28 |   if( rank ) MPI_Send( mat, loc_size * SIZE, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD );
 29 |   else{
 30 |     
 31 |     double * buf = (double *) calloc( loc_size * SIZE, sizeof(double) );
 32 |     print_loc( mat, loc_size );
 33 |     for( count = 1; count < npes; count ++){
 34 |       MPI_Recv( buf, loc_size * SIZE, MPI_DOUBLE, count, count, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
 35 |       print_loc( buf, loc_size );
 36 |     }
 37 |     free( buf );
 38 |   }
 39 | }
 40 | 
 41 | int main( int argc, char * argv[] ){
 42 |   
 43 |   int npes, rank;
 44 |   int i, j, count;
 45 |   double * mat, * buf;
 46 |   
 47 |   MPI_Init( &argc, & argv );
 48 |   MPI_Comm_size( MPI_COMM_WORLD, &npes );
 49 |   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
 50 | 
 51 | #ifdef OPENACC  
 52 |   int ngpu = acc_get_num_devices(acc_device_nvidia);
 53 |   int igpu = rank % ngpu;
 54 |   acc_set_device_num(igpu, acc_device_nvidia);
 55 |   acc_init(acc_device_nvidia);
 56 |   if( !rank ) fprintf(stdout, "NUM GPU: %d\n", ngpu);
 57 |   fprintf(stdout, "GPU ID: %d, PID: %d\n", igpu, rank);
 58 |   fflush( stdout );
 59 | #endif
 60 | 
 61 |   MPI_Barrier( MPI_COMM_WORLD );
 62 |   
 63 |   int loc_size = SIZE / npes;
 64 | 
 65 |   mat = (double *) calloc( loc_size * SIZE, sizeof(double) );
 66 |   buf = (double *) calloc( SIZE * loc_size, sizeof(double) );
 67 | 
 68 | #pragma acc enter data create ( mat[ 0 : loc_size * SIZE ], buf[ 0 : SIZE * loc_size ] )
 69 | 
 70 | #pragma acc parallel loop collapse(2) present( mat )   
 71 |   for( i = 0; i < loc_size; i++ ){
 72 |     for( j = 0; j < SIZE; j++ ){
 73 |       mat[ j + ( i * SIZE ) ] = j + ( ( ( rank * loc_size ) + i ) * SIZE ) ;
 74 |     }
 75 |   }
 76 | 
 77 | #ifdef DEBUG  
 78 | #pragma acc update self ( mat[ 0 : loc_size * SIZE ] )  
 79 |   print_par( mat, loc_size, rank, npes );
 80 | #endif
 81 | 
 82 |   //1) prepare the contigous block of data for the All2all
 83 | #pragma acc parallel loop collapse(3) present( mat, buf )   
 84 |   for( count = 0; count < npes; count ++ ){
 85 |     for( i = 0; i < loc_size; i++ ){
 86 |       for( j = 0; j < loc_size; j++ ){
 87 | 	int i_g = i + ( count * loc_size );
 88 | 	int j_g = j + ( count * loc_size );
 89 | 	buf[ j + ( i_g * loc_size ) ] = mat[ j_g + ( i * SIZE ) ];
 90 |       }
 91 |     }
 92 |   }
 93 |   
 94 |   //2) perform all2all in place
 95 | #pragma acc host_data use_device( buf )
 96 |   MPI_Alltoall( MPI_IN_PLACE, loc_size * loc_size, MPI_DOUBLE, buf, loc_size * loc_size, MPI_DOUBLE, MPI_COMM_WORLD);
 97 | 
 98 |   //3) local_tranposition of data into blocks
 99 | #pragma acc parallel loop collapse(3) present( mat, buf )   
100 |   for( count = 0; count < npes; count ++ ){
101 |     for( i = 0; i < loc_size; i++ ){
102 |       for( j = 0; j < loc_size; j++ ){
103 | 	int i_g = i + ( count * loc_size );
104 | 	mat[ i_g + ( j * SIZE ) ] = buf[ j + ( i_g * loc_size ) ];
105 |       }
106 |     }
107 |   }
108 | 
109 | #ifdef DEBUG  
110 | #pragma acc update self ( mat[ 0 : loc_size * SIZE ] )  
111 |   print_par( mat, loc_size, rank, npes );
112 | #endif
113 |   
114 |   MPI_Finalize();
115 | 
116 |   return 0;
117 | }
118 |     
119 |   
120 | 


--------------------------------------------------------------------------------
/GPU/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/HPC/codes/00_simple.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* ────────────────────────────────────────────────────────────────────────── *
 3 |  │                                                                            │
 4 |  │ This file is part of the exercises for the Lectures on                     │
 5 |  │   "Foundations of High Performance Computing"                              │
 6 |  │ given at                                                                   │
 7 |  │   Master in HPC and                                                        │
 8 |  │   Master in Data Science and Scientific Computing                          │
 9 |  │ @ SISSA, ICTP and University of Trieste                                    │
10 |  │                                                                            │
11 |  │ contact: luca.tornatore@inaf.it                                            │
12 |  │                                                                            │
13 |  │     This is free software; you can redistribute it and/or modify           │
14 |  │     it under the terms of the GNU General Public License as published by   │
15 |  │     the Free Software Foundation; either version 3 of the License, or      │
16 |  │     (at your option) any later version.                                    │
17 |  │     This code is distributed in the hope that it will be useful,           │
18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
20 |  │     GNU General Public License for more details.                           │
21 |  │                                                                            │
22 |  │     You should have received a copy of the GNU General Public License      │
23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
24 |  │                                                                            │
25 |  * ────────────────────────────────────────────────────────────────────────── */
26 | 
27 | 
28 | #if defined(__STDC__)
29 | #  if (__STDC_VERSION__ >= 199901L)
30 | #     define _XOPEN_SOURCE 700
31 | #  endif
32 | #endif
33 | #include <stdlib.h>
34 | #include <stdio.h>
35 | #include <string.h>
36 | #include <time.h>
37 | #include <math.h>
38 | #include <omp.h>
39 | 
40 | 
41 | int main( int argc, char **argv )
42 | {
43 |  #pragma omp parallel
44 |   {
45 | 
46 |     #pragma omp single
47 |     {
48 |       printf( " »Yuk yuk, here is thread %d from "
49 | 	      "within the single region\n", omp_get_thread_num() );
50 |       
51 |       #pragma omp task
52 |       {
53 | 	printf( "\tHi, here is thread %d "
54 | 		"running task A\n", omp_get_thread_num() );
55 |       }
56 |       
57 |      #pragma omp task
58 |       {
59 | 	printf( "\tHi, here is thread %d "
60 | 		"running task B\n", omp_get_thread_num() );
61 |       }
62 | 
63 |     }
64 | 
65 |     printf(" :Hi, here is thread %d after the end "
66 | 	   "of the single region, I was stuck waiting "
67 | 	   "all the others\n", omp_get_thread_num() );    
68 |   }
69 | 
70 |   return 0;
71 |   
72 | }
73 | 


--------------------------------------------------------------------------------
/HPC/codes/00_simple_nowait.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* ────────────────────────────────────────────────────────────────────────── *
 3 |  │                                                                            │
 4 |  │ This file is part of the exercises for the Lectures on                     │
 5 |  │   "Foundations of High Performance Computing"                              │
 6 |  │ given at                                                                   │
 7 |  │   Master in HPC and                                                        │
 8 |  │   Master in Data Science and Scientific Computing                          │
 9 |  │ @ SISSA, ICTP and University of Trieste                                    │
10 |  │                                                                            │
11 |  │ contact: luca.tornatore@inaf.it                                            │
12 |  │                                                                            │
13 |  │     This is free software; you can redistribute it and/or modify           │
14 |  │     it under the terms of the GNU General Public License as published by   │
15 |  │     the Free Software Foundation; either version 3 of the License, or      │
16 |  │     (at your option) any later version.                                    │
17 |  │     This code is distributed in the hope that it will be useful,           │
18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
20 |  │     GNU General Public License for more details.                           │
21 |  │                                                                            │
22 |  │     You should have received a copy of the GNU General Public License      │
23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
24 |  │                                                                            │
25 |  * ────────────────────────────────────────────────────────────────────────── */
26 | 
27 | 
28 | #if defined(__STDC__)
29 | #  if (__STDC_VERSION__ >= 199901L)
30 | #     define _XOPEN_SOURCE 700
31 | #  endif
32 | #endif
33 | #include <stdlib.h>
34 | #include <stdio.h>
35 | #include <string.h>
36 | #include <time.h>
37 | #include <math.h>
38 | #include <omp.h>
39 | 
40 | 
41 | 
42 | int main( int argc, char **argv )
43 | {
44 | 
45 |  #pragma omp parallel
46 |   {
47 | 
48 |     #pragma omp single nowait
49 |     {
50 |       printf( " »Yuk yuk, here is thread %d from "
51 | 	      "within the single region\n", omp_get_thread_num() );
52 |       
53 |       #pragma omp task
54 |       {
55 | 	printf( "\tHi, here is thread %d "
56 | 		"running task A\n", omp_get_thread_num() );
57 |       }
58 |       
59 |      #pragma omp task
60 |       {
61 | 	printf( "\tHi, here is thread %d "
62 | 		"running task B\n", omp_get_thread_num() );
63 |       }
64 | 
65 |     }
66 | 
67 |     printf(" :Hi, here is thread %d after the end "
68 | 	   "of the single region, I'm stuck waiting "
69 | 	   "all the others\n", omp_get_thread_num() );    
70 |   }
71 | 
72 |   return 0;
73 |   
74 | }
75 | 


--------------------------------------------------------------------------------
/HPC/codes/00_simple_taskwait.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* ────────────────────────────────────────────────────────────────────────── *
 3 |  │                                                                            │
 4 |  │ This file is part of the exercises for the Lectures on                     │
 5 |  │   "Foundations of High Performance Computing"                              │
 6 |  │ given at                                                                   │
 7 |  │   Master in HPC and                                                        │
 8 |  │   Master in Data Science and Scientific Computing                          │
 9 |  │ @ SISSA, ICTP and University of Trieste                                    │
10 |  │                                                                            │
11 |  │ contact: luca.tornatore@inaf.it                                            │
12 |  │                                                                            │
13 |  │     This is free software; you can redistribute it and/or modify           │
14 |  │     it under the terms of the GNU General Public License as published by   │
15 |  │     the Free Software Foundation; either version 3 of the License, or      │
16 |  │     (at your option) any later version.                                    │
17 |  │     This code is distributed in the hope that it will be useful,           │
18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
20 |  │     GNU General Public License for more details.                           │
21 |  │                                                                            │
22 |  │     You should have received a copy of the GNU General Public License      │
23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
24 |  │                                                                            │
25 |  * ────────────────────────────────────────────────────────────────────────── */
26 | 
27 | 
28 | #if defined(__STDC__)
29 | #  if (__STDC_VERSION__ >= 199901L)
30 | #     define _XOPEN_SOURCE 700
31 | #  endif
32 | #endif
33 | #include <stdlib.h>
34 | #include <stdio.h>
35 | #include <string.h>
36 | #include <time.h>
37 | #include <math.h>
38 | #include <omp.h>
39 | 
40 | 
41 | 
42 | int main( int argc, char **argv )
43 | {
44 | 
45 |  #pragma omp parallel
46 |   {
47 |     int me = omp_get_thread_num();
48 | 
49 |     #pragma omp single nowait
50 |     {
51 |       printf( " »Yuk yuk, here is thread %d from "
52 | 	      "within the single region\n", omp_get_thread_num() );
53 |       
54 |       #pragma omp task
55 |       {
56 | 	printf( "\tHi, here is thread %d "
57 | 		"running task A\n", omp_get_thread_num() );
58 |       }
59 |       
60 |      #pragma omp task
61 |       {
62 | 	printf( "\tHi, here is thread %d "
63 | 		"running task B\n", omp_get_thread_num() );
64 |       }
65 | 
66 |      #pragma omp taskwait
67 |       printf(" «Yuk yuk, it is still me, thread %d "
68 | 	     "inside single region after all tasks ended\n", me);
69 |       
70 |     }
71 | 
72 |     printf(" :Hi, here is thread %d after the end "
73 | 	   "of the single region, I'm stuck waiting "
74 | 	   "all the others\n", omp_get_thread_num() );    
75 |   }
76 | 
77 |   return 0;
78 |   
79 | }
80 | 


--------------------------------------------------------------------------------
/HPC/codes/02_tasks.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #define N_default      2000   // how long is the main array
 42 | #define max_default    2000   // the maximum argument to heavy_work_? functions
 43 | 
 44 | #if defined(_OPENMP)
 45 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 46 | 		  (double)ts.tv_nsec * 1e-9)
 47 | 
 48 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +	\
 49 | 		     (double)myts.tv_nsec * 1e-9)
 50 | 
 51 | #else
 52 | 
 53 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 54 | 		  (double)ts.tv_nsec * 1e-9)
 55 | #endif
 56 | 
 57 | #if !defined(NTHREADS)    // threads for the first level of parallelism
 58 |                           // this value should be equal to the number_of_sockets-1
 59 | #define NTHREADS 3
 60 | #endif
 61 | 
 62 | 
 63 | 
 64 | #if defined(DEBUG)
 65 | #define PRINTF(...) printf(__VA_ARGS__)
 66 | #else
 67 | #define PRINTF(...)
 68 | #endif
 69 |   
 70 | typedef unsigned int uint;
 71 | double heavy_work_0( uint );
 72 | double heavy_work_1( uint );
 73 | double heavy_work_2( uint );
 74 | 
 75 | 
 76 | 
 77 | int main( int argc, char **argv )
 78 | {
 79 | 
 80 |   int      N         = N_default;
 81 |   int      max_value = max_default;
 82 |   int      nthreads  = 1;
 83 |   
 84 |   struct  timespec ts;
 85 | 
 86 |   /*  -----------------------------------------------------------------------------
 87 |    *   initialize 
 88 |    *  -----------------------------------------------------------------------------
 89 |    */
 90 | 
 91 |   // check whether some arg has been passed on
 92 |   if ( argc > 1 )
 93 |     {
 94 |       N = atoi( *(argv+1) );
 95 |       if( argc > 2 )
 96 | 	max_value = atoi( *(argv+2) );
 97 |     }
 98 | 
 99 |   srand48(1234321);
100 |   double result = 0;
101 | 
102 |   int *array = (int*)malloc( N*sizeof(double) );
103 |   
104 |   
105 |   // this mimic a stream of number you can not
106 |   // initialize in parallel
107 |   // note: that also means that data resides
108 |   //       in one's thread DRAM
109 |   //
110 |   for( int ii = 0; ii < N; ii++ )
111 |     array[ii] = 100 + (lrand48() % max_value);
112 | 
113 |   
114 |   printf("serial summation\n" );
115 | 
116 |   double tstart = CPU_TIME;
117 | 
118 |   for( int ii = 0; ii < N; ii++ )
119 |     result += heavy_work_0(array[ii]) +
120 |       heavy_work_1(array[ii]) +
121 |       heavy_work_2(array[ii]) ;
122 | 
123 |   double tstop = CPU_TIME;
124 |   printf("serial summation results to be %g and took %g sec\n", result, tstop-tstart);
125 |          		   
126 |   printf("omp summation\n" );
127 | 
128 |   result = 0;			    
129 |   tstart = CPU_TIME;
130 |   
131 |  #pragma omp parallel shared(result)
132 |   {
133 |     
134 |    #pragma omp single  // having or not a taskwait here is irrelevant
135 | 		       // since there are no instructions after the
136 | 		       // single region
137 |     {
138 |       
139 |      #pragma omp task   // result is shared, no need for "shared(result)" clause
140 |       {
141 | 	double myresult = 0;
142 | 	for( int jj = 0; jj < N; jj++ )
143 | 	  myresult += heavy_work_0( array[jj] );
144 |        #pragma omp atomic update
145 | 	result += myresult;
146 |       }
147 | 
148 |       #pragma omp  task // result is shared
149 |       {
150 | 	double myresult = 0;
151 | 	for( int jj = 0; jj < N; jj++ )
152 | 	  myresult += heavy_work_1( array[jj] );
153 |        #pragma omp atomic update
154 | 	result += myresult;
155 |       }
156 | 
157 |       #pragma omp task  // result is shared
158 |       {
159 | 	double myresult = 0;
160 | 	for( int jj = 0; jj < N; jj++ )
161 | 	  myresult += heavy_work_2(array[jj] );
162 |        #pragma omp atomic update
163 | 	result += myresult;
164 |       }
165 | 
166 |     }
167 | 
168 |     // all the threads will pile-up here, waiting for all
169 |     // of them to arrive here.
170 |   }
171 | 
172 | 
173 | 
174 |   double tend = CPU_TIME;
175 | 
176 | 
177 |   /*  -----------------------------------------------------------------------------
178 |    *   finalize
179 |    *  -----------------------------------------------------------------------------
180 |    */
181 | 
182 |   free(array);
183 |   
184 |   printf("The result is %g\nrun took %g of wall-clock time\n\n",
185 | 	 result, tend - tstart );
186 | 
187 | 
188 | return 0;
189 | }
190 | 
191 | 
192 | 
193 | double heavy_work_0( uint N )
194 | {
195 |   double guess = 3.141572 / 3;
196 |   
197 |   for( int i = 0; i < N; i++ )
198 |     {
199 |       guess = exp( guess );
200 |       guess = sin( guess );
201 | 
202 |     }
203 | 
204 |   return guess;
205 | }
206 | 
207 | double heavy_work_1( uint N )
208 | {
209 |   double guess = 3.141572 / 3;
210 | 
211 |   for( int i = 0; i < N; i++ )
212 |     {
213 |       guess = log( guess );
214 |       guess = exp( sqrt(guess)/guess );
215 |     }
216 | 
217 |   return guess;
218 | }
219 | 
220 | double heavy_work_2( uint N  )
221 | {
222 |   double guess = 3.141572 / 3;
223 | 
224 |   for( int i = 0; i < N; i++ )
225 |     {
226 |       guess = sqrt( guess );
227 |       guess = exp( 1+1.0/guess );
228 |     }
229 | 
230 |   return guess;
231 | }
232 | 


--------------------------------------------------------------------------------
/HPC/codes/02_tasks_wrong.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #define N_default      2000   // how long is the main array
 42 | #define max_default    2000   // the maximum argument to heavy_work_? functions
 43 | 
 44 | #if defined(_OPENMP)
 45 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 46 | 		  (double)ts.tv_nsec * 1e-9)
 47 | 
 48 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +	\
 49 | 		     (double)myts.tv_nsec * 1e-9)
 50 | 
 51 | #else
 52 | 
 53 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 54 | 		  (double)ts.tv_nsec * 1e-9)
 55 | #endif
 56 | 
 57 | #if !defined(NTHREADS)    // threads for the first level of parallelism
 58 |                           // this value should be equal to the number_of_sockets-1
 59 | #define NTHREADS 3
 60 | #endif
 61 | 
 62 | 
 63 | 
 64 | #if defined(DEBUG)
 65 | #define PRINTF(...) printf(__VA_ARGS__);
 66 | #else
 67 | #define PRINTF(...)
 68 | #endif
 69 |   
 70 | typedef unsigned int uint;
 71 | double heavy_work_0( uint );
 72 | double heavy_work_1( uint );
 73 | double heavy_work_2( uint );
 74 | 
 75 | 
 76 | 
 77 | int main( int argc, char **argv )
 78 | {
 79 | 
 80 |   int      N         = N_default;
 81 |   int      max_value = max_default;
 82 |   int      nthreads  = 1;
 83 |   
 84 |   struct  timespec ts, myts;
 85 | 
 86 |   /*  -----------------------------------------------------------------------------
 87 |    *   initialize 
 88 |    *  -----------------------------------------------------------------------------
 89 |    */
 90 | 
 91 |   // check whether some arg has been passed on
 92 |   if ( argc > 1 )
 93 |     {
 94 |       N = atoi( *(argv+1) );
 95 |       if( argc > 2 )
 96 | 	max_value = atoi( *(argv+2) );
 97 |     }
 98 | 
 99 |   srand48(1234321);
100 |   double result = 0;
101 | 
102 |   int *array = (int*)malloc( N*sizeof(double) );
103 |   
104 |   
105 |   // this mimic a stream of number you can not
106 |   // initialize in parallel
107 |   // note: that also means that data resides
108 |   //       in one's thread DRAM
109 |   //
110 |   for( int ii = 0; ii < N; ii++ )
111 |     array[ii] = 100 + (lrand48() % max_value);
112 | 
113 |   
114 |   printf("serial summation\n" );
115 | 
116 |   double tstart = CPU_TIME;
117 | 
118 |   for( int ii = 0; ii < N; ii++ )
119 |     result += heavy_work_0(array[ii]) +
120 |       heavy_work_1(array[ii]) +
121 |       heavy_work_2(array[ii]) ;
122 | 
123 |   double tstop = CPU_TIME;
124 |   printf("serial summation results to be %g and took %g sec\n", result, tstop-tstart);
125 |          		   
126 |   printf("omp summation\n" );
127 | 
128 |   result = 0;			    
129 |   tstart = CPU_TIME;
130 |   
131 |  #pragma omp parallel shared(result)
132 |   {
133 |    #if defined(DEBUG)
134 |     int me = omp_get_thread_num();
135 |    #endif
136 |     double result1, result2, result3;
137 | 
138 |    #pragma omp single
139 |     {
140 |       PRINTF(" : Thread %d is generating the tasks\n", me);
141 |       
142 |      #pragma omp task
143 |       {
144 | 	PRINTF(" + Thread %d is executing T1\n", omp_get_thread_num());
145 |        	for( int jj = 0; jj < N; jj++ )
146 |        	  result1 += heavy_work_0( array[jj] );
147 |       }
148 | 
149 |      #pragma omp task
150 |       {
151 | 	PRINTF(" + Thread %d is executing T2\n", omp_get_thread_num());
152 |        	for( int jj = 0; jj < N; jj++ )
153 |        	  result2 += heavy_work_1( array[jj] );
154 |       }
155 | 
156 |      #pragma omp task
157 |       {
158 | 	PRINTF(" + Thread %d is executing T3\n", omp_get_thread_num());
159 |        	for( int jj = 0; jj < N; jj++ )
160 |        	  result3 += heavy_work_2(array[jj] );
161 |       }
162 | 
163 |     }
164 | 
165 |     #pragma omp barrier
166 |     PRINTF("\tThread %d is here (%g %g %g)\n", me, result1, result2, result3 );
167 |     
168 |     #pragma omp atomic update
169 |     result += result1;
170 |     #pragma omp atomic update
171 |     result += result2;
172 |     #pragma omp atomic update
173 |     result += result3;
174 | 
175 |     PRINTF("\tThread %d is here (%g)\n", me, result );
176 |   }
177 | 
178 | 
179 | 
180 |   double tend = CPU_TIME;
181 | 
182 | 
183 |   /*  -----------------------------------------------------------------------------
184 |    *   finalize
185 |    *  -----------------------------------------------------------------------------
186 |    */
187 | 
188 |   free(array);
189 |   
190 |   printf("The result is %g\nrun took %g of wall-clock time\n\n",
191 | 	 result, tend - tstart );
192 | 
193 | 
194 | return 0;
195 | }
196 | 
197 | 
198 | 
199 | double heavy_work_0( uint N )
200 | {
201 |   double guess = 3.141572 / 3;
202 |   
203 |   for( int i = 0; i < N; i++ )
204 |     {
205 |       guess = exp( guess );
206 |       guess = sin( guess );
207 | 
208 |     }
209 | 
210 |   return guess;
211 | }
212 | 
213 | double heavy_work_1( uint N )
214 | {
215 |   double guess = 3.141572 / 3;
216 | 
217 |   for( int i = 0; i < N; i++ )
218 |     {
219 |       guess = log( guess );
220 |       guess = exp( sqrt(guess)/guess );
221 |     }
222 | 
223 |   return guess;
224 | }
225 | 
226 | double heavy_work_2( uint N  )
227 | {
228 |   double guess = 3.141572 / 3;
229 | 
230 |   for( int i = 0; i < N; i++ )
231 |     {
232 |       guess = sqrt( guess );
233 |       guess = exp( 1+1.0/guess );
234 |     }
235 | 
236 |   return guess;
237 | }
238 | 


--------------------------------------------------------------------------------
/HPC/codes/03_variable_workload.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #if defined(_OPENMP)
 42 | #define CPU_TIME ({struct  timespec ts; clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 43 | 		    (double)ts.tv_nsec * 1e-9};)
 44 | 
 45 | #define CPU_TIME_th ({struct  timespec ts; clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 46 | 					     (double)ts.tv_nsec * 1e-9;})
 47 | 
 48 | #else
 49 | 
 50 | #define CPU_TIME ({struct  timespec ts; clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 51 | 					  (double)ts.tv_nsec * 1e-9;})
 52 | #endif
 53 | 
 54 | // -------------------------------------------------------------
 55 | 
 56 | // using the compile-time parameter TASKS_GRANULARITY we
 57 | // can regulate how many tasks we are crating and, consequently,
 58 | // how much workload will be assigne to each task
 59 | // see below, around lines 215, 245
 60 | //
 61 | 
 62 | #if !defined(TASKS_GRANULARITY )
 63 | 
 64 | 
 65 | // when compiling with this option, a single task will execute TASK_GRANULARITY
 66 | // iterations od the for loop.
 67 | // To activate that, compile adding -DTASK_GRANULARITY=#INT_VALUE
 68 | //
 69 | 
 70 | #define ROUND_N_TO_GRANULARITY {N += (N%TASKS_GRANULARITY);   printf("tasks will be created with granularity: %d\n", TASKS_GRANULARITY);}
 71 | #define CREATE_TASKS for ( int i = 0; i < N; i+= TASKS_GRANULARITY )
 72 | #define TASK_FOR for ( int JJ = i; JJ < i+TASKS_GRANULARITY; JJ++ )
 73 | #define TASKS_SIZE TASKS_GRANULARITY
 74 | 
 75 | #else
 76 | 
 77 | #define ROUND_N_TO_GRANULARITY 
 78 | #define CREATE_TASKS for ( int i = 0; i < N; i++ )
 79 | #define TASK_FOR
 80 | #define JJ i
 81 | #define TASKS_SIZE 1
 82 | 
 83 | #endif
 84 | 
 85 | // -------------------------------------------------------------
 86 | 
 87 | // if RANDOMLY_DECREASING is defined, the decreasing work case
 88 | // will be randomized with a work that will be decreasing but with
 89 | // some randomization added
 90 | //
 91 | 
 92 | #if defined(RANDOMLY_DECREASING)
 93 | #define DECREASING_WORK( I ) workload - ((I) + rand_r(&seeds[me]) % (10+((I)/10)))
 94 | #else
 95 | #define DECREASING_WORK( I ) workload - (I)
 96 | #endif
 97 | 
 98 | // -------------------------------------------------------------
 99 | 
100 | 
101 | #define REPETITIONS 10
102 | 
103 | #define NSTRATEGIES 2
104 | #define FOR         0
105 | #define TASKS       1
106 | char *STRATEGIES_NAMES[] = {"FORloop", "TASKS"};
107 | 
108 | #define NTIMINGS    2
109 | #define RND_WORK    0
110 | #define DECR_WORK   1
111 | char *TIMINGS_NAMES[] = {"RANDOM work", "DECREASING work"};
112 | 
113 | 
114 | // -------------------------------------------------------------
115 | 
116 | double heavy_work( int N );   // a nonsense routine that just crunches floating point ops
117 | 
118 | 
119 | 
120 | int main( int argc, char **argv )
121 | {
122 | 
123 |   int nthreads;
124 |   int N = 10000;
125 |   int workload = 40000;
126 |   double wtstart, wtend;
127 |   
128 | 
129 |   
130 |   if ( argc > 1 )
131 |     {
132 |       N = atoi( *(argv+1) );
133 |       if ( argc > 2 )
134 | 	workload = atoi(*(argv+2));
135 |     }
136 |   
137 | #pragma omp parallel
138 | #pragma omp master
139 |   nthreads = omp_get_num_threads();
140 |   
141 |   printf("using %d threads with N = %d\n\n", nthreads, N);
142 |   
143 |   double timings[NTIMINGS][NSTRATEGIES][nthreads];
144 |   double wtimings[NTIMINGS][NSTRATEGIES] = {0.0};
145 |   double min_timings[NTIMINGS][NSTRATEGIES] = {0.0};
146 |   double max_timings[NTIMINGS][NSTRATEGIES] = {0.0};
147 | 
148 |  #if defined(DEBUG)
149 |   unsigned int howmanytasks[nthreads];
150 |  #endif
151 |   
152 |   memset( timings, 0, NTIMINGS*NSTRATEGIES*nthreads*sizeof(double));
153 |  #if defined(DEBUG)
154 |   memset( howmanytasks, 0, nthreads*sizeof(int));
155 |  #endif
156 | 
157 |   ROUND_N_TO_GRANULARITY ;
158 | 
159 |   for ( int R = 0; R < REPETITIONS; R++ )
160 |     {
161 |       printf("shot %d/%d.. ", R+1, REPETITIONS);
162 |       fflush(stdout);
163 | 
164 |       /* ······················································ *
165 |        *                                                        *
166 |        *  First, we run the random work and the randomly        *
167 |        *  decreasing work with standard for loops               *
168 |        *                                                        *
169 |        * ······················································ */
170 | 
171 |       
172 |       // ----------------------------------------------------- random work, FOR  
173 |       wtstart = CPU_TIME;
174 |      #pragma omp parallel shared(N, workload)
175 |       {
176 | 	struct  timespec myts;
177 | 	int myid   = omp_get_thread_num();
178 | 	int status = myid;
179 |     
180 | 	srand( myid*123 );
181 | 	double tstart = CPU_TIME_th;
182 |        #pragma omp for schedule(dynamic, TASKS_SIZE)
183 | 	for( int i = 0; i < N; i++ )
184 | 	  heavy_work( 10 + rand_r(&status) % workload );
185 | 
186 | 	double tend = CPU_TIME_th;
187 | 	timings[RND_WORK][FOR][myid] += tend - tstart;
188 |       }
189 |       wtend = CPU_TIME;
190 |       wtimings[RND_WORK][FOR] += wtend - wtstart;
191 | 
192 | 
193 |       // ----------------------------------------------------- randomly decreasing work, FOR  
194 |       wtstart = CPU_TIME;
195 |      #pragma omp parallel shared(N, workload)
196 |       {
197 | 	struct  timespec myts;
198 | 	int myid   = omp_get_thread_num();
199 | 	int status = myid;
200 | 	
201 | 	srand( myid*123 );
202 | 	double tstart = CPU_TIME_th;
203 | 
204 |        #pragma omp for schedule(dynamic, TASKS_SIZE)
205 | 	for( int i = 0; i < N; i++ )
206 | 	  heavy_work( DECREASING_WORK(i) );
207 |     
208 | 	double tend = CPU_TIME_th;
209 | 	timings[DECR_WORK][FOR][myid] += tend - tstart;
210 |       }
211 |       wtend = CPU_TIME;
212 |       wtimings[DECR_WORK][FOR] += wtend - wtstart;
213 | 
214 | 
215 |       /* ······················································ *
216 |        *                                                        *
217 |        *  Now, we run the random work and the randomly          *
218 |        *  decreasing work using TASKS                           *
219 |        *                                                        *
220 |        * ······················································ */
221 | 
222 |       unsigned int seeds[nthreads];
223 | 
224 |       // ----------------------------------------------------- random work, TASKS
225 | 
226 |       wtstart = CPU_TIME;
227 |      #pragma omp parallel shared(N, workload)
228 |       {
229 | 	struct  timespec myts;
230 | 	int myid   = omp_get_thread_num();
231 |     
232 | 	srand( myid*123 );
233 | 	double tstart = CPU_TIME_th;
234 |        #pragma omp single nowait
235 | 	{
236 | 	  CREATE_TASKS
237 | 	   #pragma omp task
238 | 	    {
239 | 	      int me = omp_get_thread_num();
240 | 	      TASK_FOR
241 | 		heavy_work( 10 + rand_r(&seeds[me]) % workload );
242 | 	    }
243 | 	}
244 |        #pragma omp barrier
245 |     
246 | 	double tend = CPU_TIME_th;
247 | 	timings[RND_WORK][TASKS][myid] += tend - tstart;
248 |       }
249 |       wtend = CPU_TIME;
250 |       wtimings[RND_WORK][TASKS] += wtend - wtstart;
251 | 
252 | 
253 |       // ----------------------------------------------------- randomly decreasing work, TASKS  
254 | 
255 |       wtstart = CPU_TIME;
256 |      #pragma omp parallel shared(N, workload)
257 |       {
258 | 	struct  timespec myts;
259 | 	int myid   = omp_get_thread_num();
260 |     
261 | 	srand( myid*123 );
262 | 	double tstart = CPU_TIME_th;
263 | 
264 |        #pragma omp single nowait
265 | 	{
266 | 	  CREATE_TASKS
267 | 	   #pragma omp task
268 | 	    {
269 | 	      int me = omp_get_thread_num();
270 | 	     #if defined(DEBUG)
271 | 	      howmanytasks[me]++;
272 | 	     #endif
273 | 	      TASK_FOR
274 | 		heavy_work( DECREASING_WORK(JJ) );
275 | 	    }
276 |       
277 | 	}
278 |        #pragma omp barrier
279 | 	double tend = CPU_TIME_th;
280 | 	timings[DECR_WORK][TASKS][myid] += tend - tstart;
281 |       }
282 |       wtend = CPU_TIME;
283 |       wtimings[DECR_WORK][TASKS] += wtend - wtstart;
284 |     }    
285 | 
286 | 
287 |   /* ······················································ *
288 |    *                                                        *
289 |    *  Here below we collect the data on timings             *
290 |    *                                                        *
291 |    * ······················································ */
292 | 
293 |   
294 |   double INV_REP = 1.0 / REPETITIONS;
295 |   for ( int k = 0; k < NTIMINGS; k++ )
296 |     {
297 |       printf("\ntimings %s:\n", TIMINGS_NAMES[k] );
298 |       double std_dev = 0;
299 |       for ( int j = 0; j < NSTRATEGIES; j++ )
300 | 	{	  
301 | 	  min_timings[k][j] = timings[k][j][0];
302 | 	  max_timings[k][j] = timings[k][j][0];
303 | 	  std_dev = timings[k][j][0]*timings[k][j][0];
304 | 	  for( int i = 1; i < nthreads; i++)
305 | 	    {
306 | 	      timings[k][j][0] += timings[k][j][i];
307 | 	      std_dev          += timings[k][j][i] * timings[k][j][i];
308 | 	      min_timings[k][j] = (min_timings[k][j] < timings[k][j][i]) ? min_timings[k][j] : timings[k][j][i];
309 | 	      max_timings[k][j] = (max_timings[k][j] > timings[k][j][i]) ? max_timings[k][j] : timings[k][j][i];
310 | 	    }
311 | 	  timings[k][j][0] /= nthreads;
312 | 	  std_dev = sqrt( std_dev/(nthreads-1) - nthreads/(nthreads-1)*timings[k][j][0]*timings[k][j][0] );
313 | 	  
314 | 	  printf("\t%16s :  w-clock %9.7g, avg %9.7g +- %9.7g, min: %9.7g, max: %9.7g\n",
315 | 		 STRATEGIES_NAMES[j],
316 | 		 wtimings[k][j]*INV_REP, timings[k][j][0]*INV_REP, std_dev*INV_REP, min_timings[k][j]*INV_REP, max_timings[k][j]*INV_REP );
317 | 	}
318 |     }
319 | 
320 |   
321 |  #if defined(DEBUG)
322 |   for ( int t = 0; t < nthreads; t++ )
323 |     printf("thread %d has processed %u tasks\n", t, howmanytasks[t] );
324 |  #endif
325 | 
326 |   
327 |   return 0;
328 | }
329 | 
330 | 
331 | // 
332 | // ----------------------------------------------
333 | //
334 | //  crunching numbers without having either
335 | //  overflows or underflows
336 | // 
337 | 
338 | double heavy_work( int N )
339 | {
340 |   double guess = 3.141572 / 5 * N;
341 |   guess = ( guess > 200 ? 111 : guess);
342 |   
343 |   for( int i = 0; i < N; i++ )
344 |     {
345 |       guess = exp( guess );
346 |       guess = sin( guess );
347 | 
348 |     }
349 |   return guess;
350 | }
351 | 


--------------------------------------------------------------------------------
/HPC/codes/03_variable_workload.v2.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #define N_default      10000 // how long is the main array
 42 | #define min_default    100   // the minimum argument to heavy_work_? functions
 43 | #define max_default    20000 // the maximum argument to heavy_work_? functions
 44 | #define chunk_default  10  // the size of the small work chunks
 45 | 
 46 | #define NANO_PAUSE    100   // the sleeping time when checking for initialization
 47 | #define uSEC          1000  // a microsecond
 48 | 
 49 | 
 50 | #if defined(_OPENMP)
 51 | #define CPU_TIME ({struct  timespec ts; clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 52 | 		    (double)ts.tv_nsec * 1e-9};)
 53 | 
 54 | #define CPU_TIME_th ({struct  timespec ts; clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 55 | 					     (double)ts.tv_nsec * 1e-9;})
 56 | 
 57 | #else
 58 | 
 59 | #define CPU_TIME ({struct  timespec ts; clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 60 | 					  (double)ts.tv_nsec * 1e-9;})
 61 | #endif
 62 | 
 63 | 
 64 | #if defined(DEBUG)
 65 | #define PRINTF(...) printf(__VA_ARGS__);
 66 | #define PRINTFS(...) _Pragma("omp single")	\
 67 |   printf(__VA_ARGS__);
 68 | #else
 69 | #define PRINTF(...)
 70 | #define PRINTFS(...)
 71 | #endif
 72 |   
 73 | typedef unsigned int uint;
 74 | double heavy_work_0( uint );
 75 | double heavy_work_1( uint );
 76 | double heavy_work_2( uint );
 77 | 
 78 | 
 79 | 
 80 | int main( int argc, char **argv )
 81 | {
 82 | 
 83 |   int N         = N_default;
 84 |   int min_value = min_default;
 85 |   int max_value = max_default;
 86 |   int chunk     = N / chunk_default;
 87 |   
 88 | 
 89 |   /*  -----------------------------------------------------------------------------
 90 |    *   initialize 
 91 |    *  -----------------------------------------------------------------------------
 92 |    */
 93 | 
 94 |   // check whether some arg has been passed on
 95 |   if ( argc > 1 )
 96 |     {
 97 |       N = atoi( *(argv+1) );
 98 |       if( argc > 2 )
 99 | 	{
100 | 	  max_value = atoi( *(argv+2) );
101 | 	  if( argc > 3 )
102 | 	    chunk = atoi( *(argv+3) );
103 | 	}
104 |     }
105 | 
106 |   srand48(1234321);
107 |   double result = 0;
108 | 
109 |   int *array = (int*)malloc( N*sizeof(double) );
110 | 
111 | 
112 |   
113 | #if !defined(_OPENMP)
114 | 
115 |   /*  -----------------------------------------------------------------------------
116 |    *   SERIAL RUN
117 |    *  -----------------------------------------------------------------------------
118 |    */
119 | 
120 |   
121 |   printf("serial summation\n" );
122 | 
123 |   double tstart = CPU_TIME;
124 |   
125 |   // this mimic a stream of number you can not
126 |   // initialize in parallel
127 |   // note: that also means that data resides
128 |   //       in one's thread DRAM  
129 |   //
130 |   for( int ii = 0; ii < N; ii++ )    
131 |     array[ii] = min_value + lrand48() % max_value;     
132 | 
133 |  #if defined(DEBUG)  
134 |   double partial_result1, partial_result2, partial_result3 = 0;
135 |   for( int ii = 0; ii < N; ii++ )
136 |     partial_result1 += heavy_work_0(array[ii]);
137 | 
138 |   for( int ii = 0; ii < N; ii++ )
139 |     partial_result2 += heavy_work_1(array[ii]);
140 |   
141 |   for( int ii = 0; ii < N; ii++ )
142 |     partial_result3 += heavy_work_2(array[ii]);
143 | 
144 |   result = partial_result1 + partial_result2 + partial_result3;
145 | 
146 |   double tend = CPU_TIME;
147 |   
148 |   printf("partial results are: %g %g %g\n", partial_result1, partial_result2, partial_result3 );    
149 | 
150 |   #else
151 | 
152 |   for( int ii = 0; ii < N; ii++ )
153 |     result += heavy_work_0(array[ii]) +
154 |       heavy_work_1(array[ii]) + heavy_work_2(array[ii]);
155 | 
156 |   double tend = CPU_TIME;
157 |   #endif
158 | 
159 | #else
160 | 
161 |   /*  -----------------------------------------------------------------------------
162 |    *   PARALLEL RUN
163 |    *  -----------------------------------------------------------------------------
164 |    */
165 | 
166 | 
167 |   double tstart = CPU_TIME;
168 | 			     
169 |  #pragma omp parallel proc_bind(close) reduction(+:result)
170 |   {
171 | 
172 |    #pragma omp single
173 |     {
174 |       int idx   = 0;
175 |       int first = 0;
176 |       int last  = chunk
177 | 	;
178 |      #if defined (MIMIC_SLOWER_INITIALIZATION)
179 |       //
180 |       // when compiling with this option, data are
181 |       // initialized in random chunks with a random
182 |       // pause within one chunk and the subsequeunt
183 |       //
184 |       
185 |       struct timespec nanot = {0, 200*uSEC};
186 |       nanosleep( &nanot, NULL );
187 |      #endif
188 |       
189 |      #if defined(DEBUG)
190 |       struct timespec myts;
191 |       double tstart = CPU_TIME_th;
192 |       int    me     = omp_get_thread_num();
193 |      #endif
194 | 	
195 |       while( first < N )
196 | 	{
197 | 	  last = (last >= N)? N : last;
198 | 	  for( int kk = first; kk < last; kk++, idx++ )
199 | 	    array[idx] = min_value + lrand48() % max_value;
200 | 
201 | 	  PRINTF("* initializer (thread %d) : %g sec, initialized chunk from %d to %d\n",
202 | 		 me, CPU_TIME_th - tstart, first, last);
203 | 
204 | 	 #pragma omp task firstprivate(first, last) shared(result) untied
205 | 	  // note: by default rules on the scope, variables "first" and "last"
206 | 	  // would have been automatically firstprivate since they
207 | 	  // are local private variables in the enclosing single
208 | 	  // region
209 | 	  {
210 | 	    
211 | 	    double myresult    = 0;
212 | 	    for( int ii = first; ii < last; ii++)
213 | 	      myresult += heavy_work_0(array[ii]);
214 | 	   #if defined(DEBUG)
215 | 	    double current_result;
216 | 	   #pragma omp atomic read
217 | 	    current_result = result;
218 | 	    PRINTF("\t thread %d runs chunk %d >> %d for work0 (result is %g, adding %g)\n",
219 | 		   me, first, last, current_result, myresult);
220 | 	   #endif
221 | 	   #pragma omp atomic update
222 | 	    result += myresult;
223 | 	  }
224 | 	  
225 | 	 #pragma omp task firstprivate(first, last) shared(result) untied
226 | 	  // note: by default rules on the scope, variables "first" and "last"
227 | 	  // would have been automatically firstprivate since they
228 | 	  // are local private variables in the enclosing single
229 | 	  // region
230 | 	  {
231 | 	    double myresult    = 0;
232 | 	    for( int ii = first; ii < last; ii++)
233 | 	      myresult += heavy_work_1(array[ii]);
234 | 	   #if defined(DEBUG)
235 | 	    double current_result;
236 | 	   #pragma omp atomic read
237 | 	    current_result = result;
238 | 	    PRINTF("\t thread %d runs chunk %d >> %d for work1 (result is %g, adding %g)\n",
239 | 		   me, first, last, current_result, myresult);
240 | 	   #endif
241 |             #pragma omp atomic update
242 | 	    result += myresult;	    
243 | 	  }
244 | 	  
245 | 	 #pragma omp task firstprivate(first, last) shared(result) untied
246 | 	  // note: by default rules on the scope, variables "first" and "last"
247 | 	  // would have been automatically firstprivate since they
248 | 	  // are local private variables in the enclosing single
249 | 	  // region
250 | 	  {
251 | 	    double myresult    = 0;
252 | 	    for( int ii = first; ii < last; ii++)
253 | 	      myresult += heavy_work_2(array[ii]);
254 | 	    	   #if defined(DEBUG)
255 | 	    double current_result;
256 | 	   #pragma omp atomic read
257 | 	    current_result = result;
258 | 	    PRINTF("\t thread %d runs chunk %d >> %d for work2 (result is %g, adding %g)\n",
259 | 		   me, first, last, current_result, myresult);
260 | 	   #endif
261 |             #pragma omp atomic update
262 | 	    result += myresult;	    
263 | 	  }
264 | 
265 | 	  first += chunk;
266 | 	  last  += chunk;
267 | 
268 | 
269 |           #if defined (MIMIC_SLOWER_INITIALIZATION)
270 | 	  nanot.tv_nsec = 200*uSEC + lrand48() % 100*uSEC;
271 | 	  nanosleep( &nanot, NULL );
272 |           #endif
273 | 
274 | 	}
275 |       PRINTF("* initializer thread: initialization lasted %g seconds\n", CPU_TIME_th - tstart );
276 |     }
277 | 
278 |     printf("thread waiting..\n");
279 |     
280 |     // threads will wait here to receive the tasks
281 |   } // close parallel region
282 | 
283 | 
284 |   double tend = CPU_TIME;
285 | #endif
286 |   
287 | 
288 | 
289 |   /*  -----------------------------------------------------------------------------
290 |    *   finalize
291 |    *  -----------------------------------------------------------------------------
292 |    */
293 | 
294 |   free(array);
295 |   
296 |   printf("The result is %g\nrun took %g of wall-clock time\n\n",
297 | 	 result, tend - tstart );
298 | 
299 | 
300 | return 0;
301 | }
302 | 
303 | 
304 | 
305 | double heavy_work_0( uint N )
306 | {
307 |   double guess = 3.141572 / 3;
308 |   
309 |   for( int i = 0; i < N; i++ )
310 |     {
311 |       guess = exp( guess );
312 |       guess = sin( guess );
313 | 
314 |     }
315 | 
316 |   return guess;
317 | }
318 | 
319 | double heavy_work_1( uint N )
320 | {
321 |   double guess = 3.141572 / 3;
322 | 
323 |   for( int i = 0; i < N; i++ )
324 |     {
325 |       guess = log( guess );
326 |       guess = exp( sqrt(guess)/guess );
327 |     }
328 | 
329 |   return guess;
330 | }
331 | 
332 | double heavy_work_2( uint N  )
333 | {
334 |   double guess = 3.141572 / 3;
335 | 
336 |   for( int i = 0; i < N; i++ )
337 |     {
338 |       guess = sqrt( guess );
339 |       guess = exp( 1+1.0/guess );
340 |     }
341 | 
342 |   return guess;
343 | }
344 | 


--------------------------------------------------------------------------------
/HPC/codes/04_tasks_reduction.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #define N_default      20000 // how long is the main array
 42 | #define min_default    100   // the minimum argument to heavy_work_? functions
 43 | #define max_default    20000 // the maximum argument to heavy_work_? functions
 44 | #define chunkf_default 10  // the size of the small work chunks
 45 | 
 46 | 
 47 | #define NANO_PAUSE    100   // the sleeping time when checking for initialization
 48 | #define uSEC          1000  // a microsecond
 49 | 
 50 | #if defined(_OPENMP)
 51 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 52 | 		  (double)ts.tv_nsec * 1e-9)
 53 | 
 54 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +	\
 55 | 		     (double)myts.tv_nsec * 1e-9)
 56 | 
 57 | #else
 58 | 
 59 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 60 | 		  (double)ts.tv_nsec * 1e-9)
 61 | #endif
 62 | 
 63 | 
 64 | #if defined(DEBUG)
 65 | #define PRINTF(...) printf(__VA_ARGS__);
 66 | #define PRINTFS(...) _Pragma("omp single")	\
 67 |   printf(__VA_ARGS__);
 68 | #else
 69 | #define PRINTF(...)
 70 | #define PRINTFS(...)
 71 | #endif
 72 |   
 73 | typedef unsigned int uint;
 74 | double heavy_work_0( uint );
 75 | double heavy_work_1( uint );
 76 | double heavy_work_2( uint );
 77 | 
 78 | 
 79 | 
 80 | int main( int argc, char **argv )
 81 | {
 82 | 
 83 |   int N         = N_default;
 84 |   int min_value = min_default;
 85 |   int max_value = max_default;
 86 |   int chunkf    = chunkf_default;
 87 |   int chunk     = N / chunkf_default;
 88 |   
 89 |   struct  timespec ts;
 90 | 
 91 |   /*  -----------------------------------------------------------------------------
 92 |    *   initialize 
 93 |    *  -----------------------------------------------------------------------------
 94 |    */
 95 | 
 96 |   // check whether some arg has been passed on
 97 |   if ( argc > 1 )
 98 |     {
 99 |       N = atoi( *(argv+1) );
100 |       if( argc > 2 )
101 | 	{
102 | 	  max_value = atoi( *(argv+2) );
103 | 	  if( argc > 3 )
104 | 	    chunkf = atoi( *(argv+3) );
105 | 	}
106 |     }
107 | 
108 |   srand48(1234321);
109 |   double result = 0;
110 | 
111 |   int *array = (int*)malloc( N*sizeof(double) );
112 |   
113 | #if !defined(_OPENMP)
114 |   
115 |   printf("serial summation\n" );
116 | 
117 |   double tstart = CPU_TIME;
118 |   
119 |   // this mimic a stream of number you can not
120 |   // initialize in parallel
121 |   // note: that also means that data resides
122 |   //       in one's thread DRAM  
123 |   //
124 |   for( int ii = 0; ii < N; ii++ )    
125 |     array[ii] = min_value + lrand48() % max_value;     
126 | 
127 |   #if defined(DEBUG)  
128 |   double partial_result1, partial_result2, partial_result3 = 0;
129 |   for( int ii = 0; ii < N; ii++ )
130 |     partial_result1 += heavy_work_0(array[ii]);
131 | 
132 |   for( int ii = 0; ii < N; ii++ )
133 |     partial_result2 += heavy_work_1(array[ii]);
134 |   
135 |   for( int ii = 0; ii < N; ii++ )
136 |     partial_result3 += heavy_work_2(array[ii]);
137 | 
138 |   result = partial_result1 + partial_result2 + partial_result3;
139 | 
140 |   double tend = CPU_TIME;
141 |   
142 |   printf("partial results are: %g %g %g\n", partial_result1, partial_result2, partial_result3 );    
143 | 
144 |   #else
145 | 
146 |   for( int ii = 0; ii < N; ii++ )
147 |     result += heavy_work_0(array[ii]) +
148 |       heavy_work_1(array[ii]) + heavy_work_2(array[ii]);
149 | 
150 |   double tend = CPU_TIME;
151 |   #endif
152 | 
153 | #else
154 | 			   
155 |   
156 | 
157 |   double tstart = CPU_TIME;
158 | 			     
159 | #pragma omp parallel proc_bind(close) reduction(+:result)
160 |   {
161 | 
162 |     #pragma omp single nowait
163 |     {
164 |       int idx   = 0;
165 |       int first = 0;
166 |       int last  = chunk;
167 |       #if defined (MIMIC_SLOWER_INITIALIZATION)
168 |       struct timespec nanot = {0, 200*uSEC};
169 |       nanosleep( &nanot, NULL );
170 |       #endif
171 |       #if defined(DEBUG)
172 |       struct timespec myts;
173 |       double tstart = CPU_TIME_th;
174 |       int    me     = omp_get_thread_num();
175 |       #endif
176 | 	
177 |       while( first < N )
178 | 	{
179 | 	  last = (last >= N)?N:last;
180 | 	  for( int kk = first; kk < last; kk++, idx++ )
181 | 	    array[idx] = min_value + lrand48() % max_value;
182 | 
183 | 	  PRINTF("* initializer (thread %d) : %g sec, initialized chunk from %d to %d\n",
184 | 		 me, CPU_TIME_th - tstart, first, last);
185 | 
186 |           #pragma omp task firstprivate(first, last) shared(result) untied
187 | 	  {
188 | 	    double myresult    = 0;
189 | 	    for( int ii = first; ii < last; ii++)
190 | 	      myresult += heavy_work_0(array[ii]);
191 |             #pragma omp atomic update
192 | 	    result += myresult;
193 | 	  }
194 |           #pragma omp task firstprivate(first, last) shared(result) untied
195 | 	  {
196 | 	    double myresult    = 0;
197 | 	    for( int ii = first; ii < last; ii++)
198 | 	      myresult += heavy_work_1(array[ii]);
199 |             #pragma omp atomic update
200 | 	    result += myresult;	    
201 | 	  }
202 |           #pragma omp task firstprivate(first, last) shared(result) untied
203 | 	  {
204 | 	    double myresult    = 0;
205 | 	    for( int ii = first; ii < last; ii++)
206 | 	      myresult += heavy_work_2(array[ii]);
207 |             #pragma omp atomic update
208 | 	    result += myresult;	    
209 | 	  }
210 | 
211 | 	  first += chunk;
212 | 	  last  += chunk;
213 | 
214 | 
215 |           #if defined (MIMIC_SLOWER_INITIALIZATION)
216 | 	  nanot.tv_nsec = 200*uSEC + lrand48() % 100*uSEC;
217 | 	  nanosleep( &nanot, NULL );
218 |           #endif
219 | 
220 | 	}
221 |       PRINTF("* initializer thread: initialization lasted %g seconds\n", CPU_TIME_th - tstart );
222 |     }
223 | 
224 |     #pragma omp taskwait
225 |   } // close parallel region
226 | 
227 | 
228 |   double tend = CPU_TIME;
229 | #endif
230 |   
231 | 
232 | 
233 |   /*  -----------------------------------------------------------------------------
234 |    *   finalize
235 |    *  -----------------------------------------------------------------------------
236 |    */
237 | 
238 |   free(array);
239 |   
240 |   printf("The result is %g\nrun took %g of wall-clock time\n\n",
241 | 	 result, tend - tstart );
242 | 
243 | 
244 | return 0;
245 | }
246 | 
247 | 
248 | 
249 | double heavy_work_0( uint N )
250 | {
251 |   double guess = 3.141572 / 3;
252 |   
253 |   for( int i = 0; i < N; i++ )
254 |     {
255 |       guess = exp( guess );
256 |       guess = sin( guess );
257 | 
258 |     }
259 | 
260 |   return guess;
261 | }
262 | 
263 | double heavy_work_1( uint N )
264 | {
265 |   double guess = 3.141572 / 3;
266 | 
267 |   for( int i = 0; i < N; i++ )
268 |     {
269 |       guess = log( guess );
270 |       guess = exp( sqrt(guess)/guess );
271 |     }
272 | 
273 |   return guess;
274 | }
275 | 
276 | double heavy_work_2( uint N  )
277 | {
278 |   double guess = 3.141572 / 3;
279 | 
280 |   for( int i = 0; i < N; i++ )
281 |     {
282 |       guess = sqrt( guess );
283 |       guess = exp( 1+1.0/guess );
284 |     }
285 | 
286 |   return guess;
287 | }
288 | 


--------------------------------------------------------------------------------
/HPC/codes/04_unpredictable_pattern.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #if defined(_OPENMP)
 42 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 43 |                   (double)ts.tv_nsec * 1e-9)
 44 | 
 45 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +     \
 46 |                      (double)myts.tv_nsec * 1e-9)
 47 | 
 48 | #else
 49 | 
 50 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 51 |                   (double)ts.tv_nsec * 1e-9)
 52 | #endif
 53 | 
 54 | // -------------------------------------------------------------
 55 | 
 56 | // using the compile-time parameter TASKS_GRANULARITY we
 57 | // can regulate how many tasks we are creating and, consequently,
 58 | // how much workload will be assigned to each task
 59 | // see below, around lines 215, 245
 60 | //
 61 | #if defined(TASKS_GRANULARITY )
 62 | 
 63 | #define ROUND_N_TO_GRANULARITY {N += (N%TASKS_GRANULARITY);   printf("tasks will be created with granularity: %d\n", TASKS_GRANULARITY);}
 64 | #define CREATE_TASKS for ( int i = 0; i < N; i+= TASKS_GRANULARITY )
 65 | #define TASK_FOR for ( int JJ = i; JJ < i+TASKS_GRANULARITY; JJ++ )
 66 | #define TASKS_SIZE TASKS_GRANULARITY
 67 | 
 68 | #else
 69 | 
 70 | #define ROUND_N_TO_GRANULARITY 
 71 | #define CREATE_TASKS for ( int i = 0; i < N; i++ )
 72 | #define TASK_FOR
 73 | #define JJ i
 74 | #define TASKS_SIZE 1
 75 | 
 76 | #endif
 77 | 
 78 | // -------------------------------------------------------------
 79 | 
 80 | // if RANDOMLY_DECREASING is defined, the decreasing work case
 81 | // will be randomized with a work that will be decreasing but with
 82 | // some randomization added
 83 | //
 84 | 
 85 | #if defined(RANDOMLY_DECREASING)
 86 | #define DECREASING_WORK( I ) workload - ((I) + rand_r(&seeds[me]) % (10+((I)/10)))
 87 | #else
 88 | #define DECREASING_WORK( I ) workload - (I)
 89 | #endif
 90 | 
 91 | // -------------------------------------------------------------
 92 | 
 93 | 
 94 | #define REPETITIONS 10
 95 | 
 96 | #define NSTRATEGIES 2
 97 | #define FOR         0
 98 | #define TASKS       1
 99 | char *STRATEGIES_NAMES[] = {"FORloop", "TASKS"};
100 | 
101 | #define NTIMINGS    1
102 | #define RND_WORK    0
103 | char *TIMINGS_NAMES[] = {"RANDOM work"};
104 | 
105 | 
106 | // -------------------------------------------------------------
107 | 
108 | double heavy_work( int N );
109 | 
110 | 
111 | 
112 | int main( int argc, char **argv )
113 | {
114 | 
115 |   int nthreads;
116 |   int N = 10000;
117 |   int workload = 40000;
118 |   double wtstart, wtend;
119 |   struct  timespec ts;
120 | 
121 |   
122 |   if ( argc > 1 )
123 |     {
124 |       N = atoi( *(argv+1) );
125 |       if ( argc > 2 )
126 | 	workload = atoi(*(argv+2));
127 |     }
128 |   
129 | #pragma omp parallel
130 | #pragma omp master
131 |   nthreads = omp_get_num_threads();
132 |   
133 |   printf("using %d threads with N = %d\n\n", nthreads, N);
134 |   
135 |   double timings[NTIMINGS][NSTRATEGIES][nthreads];
136 |   double wtimings[NTIMINGS][NSTRATEGIES] = {0.0};
137 |   double min_timings[NTIMINGS][NSTRATEGIES] = {0.0};
138 |   double max_timings[NTIMINGS][NSTRATEGIES] = {0.0};
139 | 
140 |  #if defined(DEBUG)
141 |   unsigned int howmanytasks[nthreads];
142 |  #endif
143 |   
144 |   memset( timings, 0, NTIMINGS*NSTRATEGIES*nthreads*sizeof(double));
145 |  #if defined(DEBUG)
146 |   memset( howmanytasks, 0, nthreads*sizeof(int));
147 |  #endif
148 | 
149 |   ROUND_N_TO_GRANULARITY ;
150 | 
151 |   for ( int R = 0; R < REPETITIONS; R++ )
152 |     {
153 |       printf("shot %d/%d.. ", R+1, REPETITIONS);
154 |       fflush(stdout);
155 |       
156 |       // ----------------------------------------------------- random work, FOR  
157 |       wtstart = CPU_TIME;
158 |      #pragma omp parallel shared(N, workload)
159 |       {
160 | 	struct  timespec myts;
161 | 	int myid   = omp_get_thread_num();
162 | 	int status = myid;
163 | 	unsigned int half_workload = workload/2;
164 |     
165 | 	srand( myid*123 + time(NULL) );
166 | 	double tstart = CPU_TIME_th;
167 |        #pragma omp for schedule(dynamic, TASKS_SIZE)
168 | 	for( int i = 0; i < N; i++ )
169 | 	  {
170 | 	    unsigned int work = 10 + rand_r(&status) % workload;
171 | 	    if ( work > half_workload )
172 | 	      heavy_work( work );
173 | 	  }
174 | 	double tend = CPU_TIME_th;
175 | 	timings[RND_WORK][FOR][myid] += tend - tstart;
176 |       }
177 |       wtend = CPU_TIME;
178 |       wtimings[RND_WORK][FOR] += wtend - wtstart;
179 | 
180 | 
181 |       // ----------------------------------------------------- TASKS
182 | 
183 |       unsigned int seeds[nthreads];
184 | 
185 |       // ----------------------------------------------------- random work, TASKS
186 | 
187 |       wtstart = CPU_TIME;
188 |      #pragma omp parallel shared(N, workload)
189 |       {
190 | 	struct  timespec myts;
191 | 	int myid   = omp_get_thread_num();
192 |     
193 | 	srand( myid*123 + time(NULL) );
194 | 	double tstart = CPU_TIME_th;
195 |        #pragma omp single nowait
196 | 	{
197 | 	  unsigned int half_workload = workload/2;
198 | 	  
199 | 	  CREATE_TASKS
200 | 	   #pragma omp task
201 | 	    {
202 | 	      int me = omp_get_thread_num();
203 | 	      TASK_FOR
204 | 		{
205 | 		  unsigned int work = 10 + rand_r(&seeds[me]) % workload;
206 | 		  if ( work > half_workload )
207 | 		    heavy_work( work );
208 | 		}
209 | 	    }
210 | 	}
211 |        #pragma omp barrier
212 |     
213 | 	double tend = CPU_TIME_th;
214 | 	timings[RND_WORK][TASKS][myid] += tend - tstart;
215 |       }
216 |       wtend = CPU_TIME;
217 |       wtimings[RND_WORK][TASKS] += wtend - wtstart;
218 | 
219 |     }    
220 | 
221 |   double INV_REP = 1.0 / REPETITIONS;
222 |   for ( int k = 0; k < NTIMINGS; k++ )
223 |     {
224 |       printf("\ntimings %s:\n", TIMINGS_NAMES[k] );
225 |       double std_dev = 0;
226 |       for ( int j = 0; j < NSTRATEGIES; j++ )
227 | 	{	  
228 | 	  min_timings[k][j] = timings[k][j][0];
229 | 	  max_timings[k][j] = timings[k][j][0];
230 | 	  std_dev = timings[k][j][0]*timings[k][j][0];
231 | 	  for( int i = 1; i < nthreads; i++)
232 | 	    {
233 | 	      timings[k][j][0] += timings[k][j][i];
234 | 	      std_dev          += timings[k][j][i] * timings[k][j][i];
235 | 	      min_timings[k][j] = (min_timings[k][j] < timings[k][j][i]) ? min_timings[k][j] : timings[k][j][i];
236 | 	      max_timings[k][j] = (max_timings[k][j] > timings[k][j][i]) ? max_timings[k][j] : timings[k][j][i];
237 | 	    }
238 | 	  timings[k][j][0] /= nthreads;
239 | 	  std_dev = sqrt( std_dev/(nthreads-1) - nthreads/(nthreads-1)*timings[k][j][0]*timings[k][j][0] );
240 | 	  
241 | 	  printf("\t%16s :  w-clock %9.7g, avg %9.7g +- %9.7g, min: %9.7g, max: %9.7g\n",
242 | 		 STRATEGIES_NAMES[j],
243 | 		 wtimings[k][j]*INV_REP, timings[k][j][0]*INV_REP, std_dev*INV_REP, min_timings[k][j]*INV_REP, max_timings[k][j]*INV_REP );
244 | 	}
245 |     }
246 |   
247 |  #if defined(DEBUG)
248 |   for ( int t = 0; t < nthreads; t++ )
249 |     printf("thread %d has processed %u tasks\n", t, howmanytasks[t] );
250 |  #endif
251 |   return 0;
252 | }
253 | 
254 | 
255 | double heavy_work( int N )
256 | {
257 |   double guess = 3.141572 / 5 * N;
258 |   guess = ( guess > 200 ? 111 : guess);
259 |   
260 |   for( int i = 0; i < N; i++ )
261 |     {
262 |       guess = exp( guess );
263 |       guess = sin( guess );
264 | 
265 |     }
266 |   return guess;
267 | }
268 | 


--------------------------------------------------------------------------------
/HPC/codes/05_taskgroup_reduction.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <time.h>
 37 | #include <math.h>
 38 | #include <omp.h>
 39 | 
 40 | 
 41 | #define N_default      20000 // how long is the main array
 42 | #define min_default    100   // the minimum argument to heavy_work_? functions
 43 | #define max_default    20000 // the maximum argument to heavy_work_? functions
 44 | #define chunkf_default 10  // the size of the small work chunks
 45 | 
 46 | 
 47 | #define NANO_PAUSE    100   // the sleeping time when checking for initialization
 48 | #define uSEC          1000  // a microsecond
 49 | 
 50 | #if defined(_OPENMP)
 51 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 52 | 		  (double)ts.tv_nsec * 1e-9)
 53 | 
 54 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +	\
 55 | 		     (double)myts.tv_nsec * 1e-9)
 56 | 
 57 | #else
 58 | 
 59 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 60 | 		  (double)ts.tv_nsec * 1e-9)
 61 | #endif
 62 | 
 63 | 
 64 | #if defined(DEBUG)
 65 | #define PRINTF(...) printf(__VA_ARGS__);
 66 | #define PRINTFS(...) _Pragma("omp single")	\
 67 |   printf(__VA_ARGS__);
 68 | #else
 69 | #define PRINTF(...)
 70 | #define PRINTFS(...)
 71 | #endif
 72 |   
 73 | typedef unsigned int uint;
 74 | double heavy_work_0( uint );
 75 | double heavy_work_1( uint );
 76 | double heavy_work_2( uint );
 77 | 
 78 | 
 79 | 
 80 | int main( int argc, char **argv )
 81 | {
 82 | 
 83 |   int N         = N_default;
 84 |   int min_value = min_default;
 85 |   int max_value = max_default;
 86 |   int chunkf    = chunkf_default;
 87 |   int chunk     = N / chunkf_default;
 88 |   
 89 |   struct  timespec ts;
 90 | 
 91 |   /*  -----------------------------------------------------------------------------
 92 |    *   initialize 
 93 |    *  -----------------------------------------------------------------------------
 94 |    */
 95 | 
 96 |   // check whether some arg has been passed on
 97 |   if ( argc > 1 )
 98 |     {
 99 |       N = atoi( *(argv+1) );
100 |       if( argc > 2 )
101 | 	{
102 | 	  max_value = atoi( *(argv+2) );
103 | 	  if( argc > 3 )
104 | 	    chunkf = atoi( *(argv+3) );
105 | 	}
106 |     }
107 | 
108 |   srand48(1234321);
109 |   double result = 0;
110 | 
111 |   int *array = (int*)malloc( N*sizeof(double) );
112 |   
113 | #if !defined(_OPENMP)
114 |   
115 |   printf("serial summation\n" );
116 | 
117 |   double tstart = CPU_TIME;
118 |   
119 |   // this mimic a stream of number you can not
120 |   // initialize in parallel
121 |   // note: that also means that data resides
122 |   //       in one's thread DRAM  
123 |   //
124 |   for( int ii = 0; ii < N; ii++ )    
125 |     array[ii] = min_value + lrand48() % max_value;     
126 | 
127 |   #if defined(DEBUG)  
128 |   double partial_result1, partial_result2, partial_result3 = 0;
129 |   for( int ii = 0; ii < N; ii++ )
130 |     partial_result1 += heavy_work_0(array[ii]);
131 | 
132 |   for( int ii = 0; ii < N; ii++ )
133 |     partial_result2 += heavy_work_1(array[ii]);
134 |   
135 |   for( int ii = 0; ii < N; ii++ )
136 |     partial_result3 += heavy_work_2(array[ii]);
137 | 
138 |   result = partial_result1 + partial_result2 + partial_result3;
139 | 
140 |   double tend = CPU_TIME;
141 |   
142 |   printf("partial results are: %g %g %g\n", partial_result1, partial_result2, partial_result3 );    
143 | 
144 |   #else
145 | 
146 |   for( int ii = 0; ii < N; ii++ )
147 |     result += heavy_work_0(array[ii]) +
148 |       heavy_work_1(array[ii]) + heavy_work_2(array[ii]);
149 | 
150 |   double tend = CPU_TIME;
151 |   #endif
152 | 
153 | #else
154 | 			   
155 |   
156 | 
157 |   double tstart = CPU_TIME;
158 | 			     
159 | #pragma omp parallel proc_bind(close) 
160 |   {
161 | 
162 |     #pragma omp single nowait
163 |     {
164 |       #pragma omp taskgroup task_reduction(+:result)
165 |       {
166 | 	int idx   = 0;
167 | 	int first = 0;
168 | 	int last  = chunk;
169 |         #if defined (MIMIC_SLOWER_INITIALIZATION)
170 | 	struct timespec nanot = {0, 200*uSEC};
171 | 	nanosleep( &nanot, NULL );
172 |         #endif
173 |         #if defined(DEBUG)
174 | 	struct timespec myts;
175 | 	double tstart = CPU_TIME_th;
176 | 	int    me     = omp_get_thread_num();
177 |         #endif
178 | 	
179 | 	while( first < N )
180 | 	  {
181 | 	    last = (last >= N)?N:last;
182 | 	    for( int kk = first; kk < last; kk++, idx++ )
183 | 	      array[idx] = min_value + lrand48() % max_value;
184 | 	    
185 | 	    PRINTF("* initializer (thread %d) : %g sec, initialized chunk from %d to %d\n",
186 | 		   me, CPU_TIME_th - tstart, first, last);
187 | 
188 |             #pragma omp task in_reduction(+:result) firstprivate(first, last) untied
189 | 	    {
190 | 	      double myresult    = 0;
191 | 	      for( int ii = first; ii < last; ii++)
192 | 		myresult += heavy_work_0(array[ii]);
193 | 	      result += myresult;
194 | 	    }
195 |             #pragma omp task in_reduction(+:result) firstprivate(first, last) untied
196 | 	    {
197 | 	      double myresult    = 0;
198 | 	      for( int ii = first; ii < last; ii++)
199 | 		myresult += heavy_work_1(array[ii]);
200 | 	      result += myresult;	    
201 | 	    }
202 |             #pragma omp task in_reduction(+:result) firstprivate(first, last) untied
203 | 	    {
204 | 	      double myresult    = 0;
205 | 	      for( int ii = first; ii < last; ii++)
206 | 		myresult += heavy_work_2(array[ii]);
207 | 	      result += myresult;	    
208 | 	    }
209 | 	    
210 | 	    first += chunk;
211 | 	    last  += chunk;
212 | 	    
213 | 	    
214 |             #if defined (MIMIC_SLOWER_INITIALIZATION)
215 | 	    nanot.tv_nsec = 200*uSEC + lrand48() % 100*uSEC;
216 | 	    nanosleep( &nanot, NULL );
217 |             #endif
218 | 	    
219 | 	  }
220 |       }
221 |       PRINTF("* initializer thread: initialization lasted %g seconds\n", CPU_TIME_th - tstart );
222 |     }
223 | 
224 |     #pragma omp taskwait
225 |   } // close parallel region
226 | 
227 | 
228 |   double tend = CPU_TIME;
229 | #endif
230 |   
231 | 
232 | 
233 |   /*  -----------------------------------------------------------------------------
234 |    *   finalize
235 |    *  -----------------------------------------------------------------------------
236 |    */
237 | 
238 |   free(array);
239 |   
240 |   printf("The result is %g\nrun took %g of wall-clock time\n\n",
241 | 	 result, tend - tstart );
242 | 
243 | 
244 | return 0;
245 | }
246 | 
247 | 
248 | 
249 | double heavy_work_0( uint N )
250 | {
251 |   double guess = 3.141572 / 3;
252 |   
253 |   for( int i = 0; i < N; i++ )
254 |     {
255 |       guess = exp( guess );
256 |       guess = sin( guess );
257 | 
258 |     }
259 | 
260 |   return guess;
261 | }
262 | 
263 | double heavy_work_1( uint N )
264 | {
265 |   double guess = 3.141572 / 3;
266 | 
267 |   for( int i = 0; i < N; i++ )
268 |     {
269 |       guess = log( guess );
270 |       guess = exp( sqrt(guess)/guess );
271 |     }
272 | 
273 |   return guess;
274 | }
275 | 
276 | double heavy_work_2( uint N  )
277 | {
278 |   double guess = 3.141572 / 3;
279 | 
280 |   for( int i = 0; i < N; i++ )
281 |     {
282 |       guess = sqrt( guess );
283 |       guess = exp( 1+1.0/guess );
284 |     }
285 | 
286 |   return guess;
287 | }
288 | 


--------------------------------------------------------------------------------
/HPC/codes/linked_list.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #if defined(__STDC__)
  3 | #  if (__STDC_VERSION__ >= 199901L)
  4 | #     define _XOPEN_SOURCE 700
  5 | #  endif
  6 | #endif
  7 | 
  8 | #include <stdlib.h>
  9 | #include <stdio.h>
 10 | #include <string.h>
 11 | #include <time.h>
 12 | #include <math.h>
 13 | #include <omp.h>
 14 | 
 15 | 
 16 | // =========================================================================
 17 | //
 18 | //  define useful quantities
 19 | //
 20 | 
 21 | typedef unsigned long long ull;
 22 | #define TIME_CUT 1000000009
 23 | 
 24 | 
 25 | #if defined(_OPENMP)
 26 | 
 27 | int me;
 28 | #pragma omp threadprivate(me)
 29 | 
 30 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_REALTIME, &ts ), \
 31 | 					 (ull)ts.tv_sec * 1000000000 +	\
 32 | 					 (ull)ts.tv_nsec); })
 33 | 
 34 | #define CPU_TIME_th ({ struct  timespec ts; (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), \
 35 | 					     (ull)myts.tv_sec*1000000000 + \
 36 | 					     (ull)myts.tv_nsec); })
 37 | 
 38 | #else
 39 | 
 40 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), \
 41 | 					 (ull)ts.tv_sec * 1000000000 +	\
 42 | 					 (ull)ts.tv_nsec); })
 43 | #endif
 44 | 
 45 | 
 46 | #if defined(DEBUG)
 47 | #define TIMESTAP (CPU_TIME % TIME_CUT)
 48 | #define dbgout(...) printf( __VA_ARGS__ );
 49 | #else
 50 | #define TIMESTAP
 51 | #define dbgout(...) 
 52 | #endif
 53 | 
 54 | 
 55 | //
 56 | // =========================================================================
 57 | //
 58 | // define data structures
 59 | //
 60 | 
 61 | #define DONT_USE_TASKYIELD  0
 62 | #define USE_TASKYIELD  1
 63 | 
 64 | typedef struct llnode
 65 | {
 66 |   int data;
 67 |  #if defined(_OPENMP)
 68 |   omp_lock_t lock;
 69 |  #endif
 70 |   
 71 |   struct llnode *next;
 72 |   struct llnode *prev;  
 73 | } llnode_t;
 74 | 
 75 | //
 76 | // =========================================================================
 77 | //
 78 | // declare data structures
 79 | //
 80 | 
 81 | int clashes;
 82 | 
 83 | //
 84 | // =========================================================================
 85 | //
 86 | // prototypes
 87 | //
 88 | 
 89 | llnode_t* get_head        ( llnode_t *);
 90 | int       walk            ( llnode_t *);
 91 | int       delete          ( llnode_t * );
 92 | int       find            ( llnode_t *, int, llnode_t **, llnode_t ** );
 93 | int       find_and_insert ( llnode_t *, int );
 94 | 
 95 | #if defined(_OPENMP)
 96 | int       find_and_insert_parallel ( llnode_t *, int, int );
 97 | #endif
 98 | 
 99 | //
100 | // =========================================================================
101 | // =========================================================================
102 | 
103 | 
104 | // ······················································
105 | 
106 | llnode_t *get_head ( llnode_t *start )
107 | /*
108 |  * walk the list basck to find the list head
109 |  * returns the head
110 |  */
111 | {
112 |   while( start->prev != NULL )
113 |     start = start->prev;
114 |   
115 |   return start;
116 | }
117 | 
118 | // ······················································
119 | 
120 | int walk ( llnode_t *start )
121 | /*
122 |  * walk the list starting from the node start
123 |  * as first, the list is walked back until the list head
124 |  * if mode == 1, the list is then walked ahed printing
125 |  * the first 100 nodes.
126 |  */
127 | {
128 |   int n = 0;
129 |   if ( start != NULL )
130 |     {
131 |       n = 1;
132 |       int prev_value = start->data;
133 |       printf("%9d [-]", start->data );
134 |       start = start->next;
135 |       while( start != NULL)
136 | 	{
137 | 	  if (++n < 100 )
138 | 	    printf( "%9d %s ",
139 | 		   start->data,
140 | 		   (start->data < prev_value? "[!]":"[ok]") );
141 | 	  else if ( n == 100)
142 | 	    printf( "..." );
143 | 	  prev_value = start->data;
144 | 	  start = start->next;
145 | 	}
146 |     }
147 |   printf("\n");
148 |   return n;
149 | }
150 | 
151 | 
152 | // ······················································
153 | 
154 | int delete ( llnode_t *head )
155 | /*
156 |  * delete all the nodes
157 |  * destroy every lock
158 |  */
159 | {
160 |   while ( head != NULL )
161 |     {
162 |       llnode_t *prev = head;
163 |       head = head->next;
164 |      #if defined(_OPENMP)
165 |       omp_destroy_lock( &(prev->lock) );
166 |      #endif
167 |       free( prev );
168 |     }
169 |   return 0;
170 | }
171 | 
172 | 
173 | // ······················································
174 | 
175 | int find ( llnode_t *head, int value, llnode_t **prev, llnode_t **next )
176 | {
177 |   *prev = NULL, *next = NULL;
178 |   
179 |   if ( head == NULL )
180 |     // The first node must exist in this simple
181 |     // implementation.
182 |     // To improve that, pass **head instead
183 |     // of *head
184 |     return -1;
185 | 
186 |   int       nsteps = 0;
187 |   llnode_t *ptr = NULL;
188 | 
189 |   if ( head-> data > value )
190 |     {
191 |       // we need to walk back
192 |       //
193 |       ptr  = head->prev;
194 |       *next = head;
195 |       while ( (ptr != NULL) && (ptr->data > value) )
196 | 	{
197 | 	  *next = ptr;
198 | 	  ptr  = ptr->prev;
199 | 	  nsteps++;
200 | 	}
201 |       *prev = ptr;
202 |     }
203 |   else
204 |     {
205 |       // we need to walk ahead
206 |       //
207 |       ptr  = head->next;
208 |       *prev = head;
209 |       while ( (ptr != NULL) && (ptr->data < value) )
210 | 	{
211 | 	  *prev = ptr;
212 | 	  ptr  = ptr->next;
213 | 	  nsteps++;
214 | 	}
215 |       *next = ptr;
216 |     }
217 | 
218 |   return nsteps;
219 | }
220 | 
221 | 
222 | int find_and_insert( llnode_t *head, int value )
223 | {
224 |   if ( head == NULL )
225 |     // The first node must exist in this simple
226 |     // implementation.
227 |     // To improve that, pass **head instead
228 |     // of *head
229 |     return -1;
230 | 
231 |   llnode_t *prev = NULL, *next = NULL;
232 | 
233 |   find ( head, value, &prev, &next );
234 |   
235 |   llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) );
236 |   if ( new == NULL )
237 |     // signals a problem in mem alloc
238 |     return -2;
239 |   
240 |   new->data = value;
241 |   new->prev = prev;
242 |   new->next = next;
243 |   if( prev != NULL )
244 |     prev->next = new;
245 |   if( next != NULL )
246 |     next->prev = new;
247 | 
248 |   return 0;
249 | }
250 | 
251 | 
252 | 
253 | #if defined(_OPENMP)
254 | 
255 | 
256 | // ······················································
257 | 
258 | 
259 | int find_and_insert_parallel( llnode_t *head, int value, int use_taskyield )
260 | {
261 |   if ( head == NULL )
262 |     return -1;
263 | 
264 |   llnode_t *prev = NULL, *next = NULL;
265 | 
266 |   dbgout("[ %llu ] > T %d process value %d\n", TIMESTAMP, me, value );
267 |   
268 |   find ( head, value, &prev, &next );
269 |   
270 |   dbgout("[ %llu ] T %d V %d found p: %d and n: %d\n", TIMESTAMP, me, value,
271 | 	 prev!=NULL?prev->data:-1, next!=NULL?next->data:-1);
272 |   
273 |   // to our best knowledge, ptr is the first node with data > value
274 |   // and prev is the last node with data < value
275 |   // then, we should create a new node between prev and ptr
276 |   
277 |   // acquire the lock of prev and next
278 |   //
279 | 
280 |   int locks_acquired = 0;
281 |   while( !locks_acquired )
282 |     {
283 |       if( prev != NULL )
284 | 	{
285 | 	  if ( use_taskyield ) {
286 | 	    while ( omp_test_lock(&(prev->lock)) == 0 ) {
287 | 	     #pragma omp taskyield
288 | 	    } }
289 | 	  else
290 | 	    omp_set_lock(&(prev->lock));
291 | 
292 | 	  locks_acquired = 1;
293 | 	}
294 |       
295 |       if ( next != NULL )
296 | 	{
297 | 	  locks_acquired = omp_test_lock(&(next->lock));
298 | 	  if( !locks_acquired && (prev!=NULL) )
299 | 	    omp_unset_lock(&(prev->lock));
300 | 	  if ( use_taskyield ) {
301 | 	   #pragma omp taskyield
302 | 	  }
303 | 	}
304 |     }
305 |   
306 | 
307 |   dbgout("[ %llu ] T %d V %d locked: (p: %d p>n: %d) (n: %d n<p: %d)\n",
308 | 	 TIMESTAMP, me, value,
309 | 	 (prev!=NULL?prev->data:-1),((prev!=NULL)&&(prev->next!=NULL)?(prev->next)->data:-1),
310 | 	 (next!=NULL?next->data:-1),((next!=NULL)&&(next->prev!=NULL)?(next->prev)->data:-1) );
311 |   
312 |   // meanwhile, did somebody already insert a node between prev and next?
313 |   if( ( (prev != NULL) && (prev-> next != next) ) ||
314 |       ( (next != NULL) && (next-> prev != prev) ) )
315 |     {
316 |       // yes, that happened
317 |       // let's keep track of how many clashes
318 |       // 
319 |      #pragma omp atomic update
320 |       clashes++;
321 |       
322 |       if( (prev != NULL) && (prev-> next != next) )
323 | 	{
324 | 	  // the next pointer has changed
325 | 	  // prev is not null, so that is our still valid point
326 | 	  // we'll walk ahead from there
327 | 	  //
328 | 	  dbgout("[ %llu ]\t>> T %d V %d next has changed: from %d to %d\n",
329 | 		 TIMESTAMP, me, value,
330 | 		 (next!=NULL?next->data:-1),(prev->next!=NULL?(prev->next)->data:-1) );
331 | 
332 | 	  if (next != NULL)
333 | 	    // free the lock on the old next
334 | 	    omp_unset_lock(&(next->lock));
335 | 
336 | 	  dbgout("[ %llu ]\t\t>>> T %d V %d restart from %d to walk ahead\n",
337 | 		 TIMESTAMP, me, value, prev->data);
338 | 	  
339 | 	  // search again, while always keeping prev locked
340 | 	  next = prev->next;
341 | 	  while(next)
342 | 	    {
343 | 	      dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n",
344 | 		     TIMESTAMP, me, value, next->data );
345 | 	      omp_set_lock(&(next->lock));
346 | 	      
347 | 	      if( next->data >= value )
348 | 		break;
349 | 	      omp_unset_lock(&(prev->lock));
350 | 	      prev = next;
351 | 	      next = next->next;
352 | 	    }	  
353 | 	}
354 |       
355 |       else if ( next->prev != prev )
356 | 	// note that next can not be NULL
357 | 	{
358 | 	  // the prev pointer has changed
359 | 	  // next is not null, so that is our still valid point
360 | 	  // we walk back from there
361 | 	  //
362 | 	  dbgout("[ %llu ]\t>> T %d V %d prev has changed: from %d to %d\n",
363 | 		 TIMESTAMP, me, value,
364 | 		 (prev!=NULL?prev->data:-1),(next->prev!=NULL?(next->prev)->data:-1) );
365 | 
366 | 	  if (prev != NULL)
367 | 	    // free the lock on the old next
368 | 	    omp_unset_lock(&(prev->lock));
369 | 
370 | 	  dbgout("[ %llu ]\t\t>> T %d V %d restart from %d to walk back\n",
371 | 		 TIMESTAMP, me, value, next->data);
372 |       	
373 | 	  // search again, while always keeping prev locked
374 | 	 prev = next->prev;
375 | 	 while(prev)
376 | 	   {
377 | 	     dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n",
378 | 		    TIMESTAMP, me, value, prev->data);
379 | 	     omp_set_lock(&(prev->lock));
380 | 	     if( prev->data <= value )
381 | 	       break;
382 | 	     omp_unset_lock(&(next->lock));
383 | 	     next = prev;
384 | 	     prev = prev->prev;
385 | 	   }
386 | 	}
387 |       else if ( next == NULL )
388 | 	{
389 | 	  printf("Some serious error occurred, a prev = next = NULL situation arose!\n");
390 | 	  return -3;
391 | 	}
392 |     }
393 | 
394 |   //
395 |   // insertion code
396 |   //
397 | 
398 |   llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) );
399 |   if ( new == NULL )
400 |     return -2;
401 |   
402 |   new->data = value;
403 |   new->prev = prev;
404 |   new->next = next;
405 |   omp_init_lock( &(new->lock) );
406 |   if ( prev != NULL )
407 |     prev->next = new;
408 |   if ( next != NULL)
409 |     next->prev = new;
410 |   
411 |   // release locks
412 |   //
413 |   if ( prev != NULL ) {
414 |     omp_unset_lock(&(prev->lock));
415 |     dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n",
416 | 	   TIMESTAMP, me, value, prev->data);}
417 | 					  
418 |   if( next != NULL ) {
419 |     omp_unset_lock(&(next->lock));
420 |     dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n",
421 | 	   TIMESTAMP, me, value, next->data);}
422 | 
423 |   dbgout("T %d V %d has done\n", me, value);
424 |   return 0;
425 | }
426 | 
427 | #endif
428 | 
429 | 
430 | // ······················································
431 | 
432 | int main ( int argc, char **argv )
433 | {
434 |   int N, mode;
435 |   
436 |   {
437 |     int a = 1;
438 |     N    = ( argc > 1 ? atoi(*(argv+a++)) : 1000000 ); 
439 |    #if defined(_OPENMP)
440 |     mode = ( argc > a ? atoi(*(argv+a++)) : DONT_USE_TASKYIELD );
441 |    #endif
442 |     int seed = ( argc > a ? atoi(*(argv+a++)) : 98765 );
443 |     
444 |     srand( seed );
445 |   }
446 | 
447 | 
448 |   llnode_t *head = (llnode_t*)malloc(sizeof(llnode_t));
449 |   head->data = rand();
450 |   head->prev = NULL;
451 |   head->next = NULL;
452 |  #if defined(_OPENMP)
453 |   omp_init_lock( &(head->lock) );
454 |  #endif
455 |   
456 |   ull timing = CPU_TIME;
457 |   
458 |  #if !defined(_OPENMP)
459 |   
460 |   int n = 1;
461 |   while ( n < N )
462 |     {
463 |       int new_value = rand();
464 |       int ret = find_and_insert( head, new_value );
465 |       if ( ret < 0 )
466 | 	{
467 | 	  printf("I've got a problem inserting node %d\n", n);
468 | 	  delete( head );
469 | 	}
470 |       n++;
471 |     }
472 | 
473 |  #else
474 | 
475 |   #pragma omp parallel
476 |   {
477 |     me = omp_get_thread_num();
478 |     #pragma omp single
479 |     {
480 |       printf("running with %d threads\n", omp_get_num_threads());
481 |       int n = 1;
482 | 
483 |       while ( n < N )
484 | 	{
485 | 	  int new_value = rand();
486 | 
487 | 	 #pragma omp task
488 | 	  find_and_insert_parallel( head, new_value, mode );
489 | 	  
490 | 	  n++;
491 | 	}
492 |     }
493 |   }
494 | 
495 |  #endif
496 | 
497 |   timing = CPU_TIME - timing;
498 | 
499 |   head = get_head( head );
500 | 
501 |   int actual_nodes = walk( head);
502 |   if ( actual_nodes != N )
503 |     printf("shame on me! %d nodes instaed of %d have been found!",
504 | 	   actual_nodes, N);
505 |   
506 |   delete ( head );
507 | 
508 |   char string[23] = {0};
509 |  #if defined(_OPENMP)
510 |   sprintf( string, " with %d clashes", clashes);  
511 |  #endif
512 |   printf("generation took %g seconds (wtime) %s\n", ((double)timing/1e9), string);
513 |   
514 |   
515 |   return 0;
516 | }
517 | 


--------------------------------------------------------------------------------
/HPC/codes/linked_list.deadlock.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #if defined(__STDC__)
  3 | #  if (__STDC_VERSION__ >= 199901L)
  4 | #     define _XOPEN_SOURCE 700
  5 | #  endif
  6 | #endif
  7 | 
  8 | #include <stdlib.h>
  9 | #include <stdio.h>
 10 | #include <string.h>
 11 | #include <time.h>
 12 | #include <math.h>
 13 | #include <omp.h>
 14 | 
 15 | 
 16 | // =========================================================================
 17 | //
 18 | //  define useful quantities
 19 | //
 20 | 
 21 | typedef unsigned long long ull;
 22 | #define TIME_CUT 1000000009
 23 | 
 24 | 
 25 | #if defined(_OPENMP)
 26 | 
 27 | int me;
 28 | #pragma omp threadprivate(me)
 29 | 
 30 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_REALTIME, &ts ), \
 31 | 					 (ull)ts.tv_sec * 1000000000 +	\
 32 | 					 (ull)ts.tv_nsec); })
 33 | 
 34 | #define CPU_TIME_th ({ struct  timespec ts; (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), \
 35 | 					     (ull)myts.tv_sec*1000000000 + \
 36 | 					     (ull)myts.tv_nsec); })
 37 | 
 38 | #else
 39 | 
 40 | #define CPU_TIME ({ struct timespec ts; (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), \
 41 | 					 (ull)ts.tv_sec * 1000000000 +	\
 42 | 					 (ull)ts.tv_nsec); })
 43 | #endif
 44 | 
 45 | 
 46 | #if defined(DEBUG)
 47 | #define TIMESTAP (CPU_TIME % TIME_CUT)
 48 | #define dbgout(...) printf( __VA_ARGS__ );
 49 | #else
 50 | #define TIMESTAP
 51 | #define dbgout(...) 
 52 | #endif
 53 | 
 54 | 
 55 | //
 56 | // =========================================================================
 57 | //
 58 | // define data structures
 59 | //
 60 | 
 61 | #define DONT_USE_TASKYIELD  0
 62 | #define USE_TASKYIELD  1
 63 | 
 64 | typedef struct llnode
 65 | {
 66 |   int data;
 67 |  #if defined(_OPENMP)
 68 |   omp_lock_t lock;
 69 |  #endif
 70 |   
 71 |   struct llnode *next;
 72 |   struct llnode *prev;  
 73 | } llnode_t;
 74 | 
 75 | //
 76 | // =========================================================================
 77 | //
 78 | // declare data structures
 79 | //
 80 | 
 81 | int clashes;
 82 | 
 83 | //
 84 | // =========================================================================
 85 | //
 86 | // prototypes
 87 | //
 88 | 
 89 | llnode_t* get_head        ( llnode_t *);
 90 | int       walk            ( llnode_t *);
 91 | int       delete          ( llnode_t * );
 92 | int       find            ( llnode_t *, int, llnode_t **, llnode_t ** );
 93 | int       find_and_insert ( llnode_t *, int );
 94 | 
 95 | #if defined(_OPENMP)
 96 | int       find_and_insert_parallel ( llnode_t *, int, int );
 97 | #endif
 98 | 
 99 | //
100 | // =========================================================================
101 | // =========================================================================
102 | 
103 | 
104 | // ······················································
105 | 
106 | llnode_t *get_head ( llnode_t *start )
107 | /*
108 |  * walk the list basck to find the list head
109 |  * returns the head
110 |  */
111 | {
112 |   while( start->prev != NULL )
113 |     start = start->prev;
114 |   
115 |   return start;
116 | }
117 | 
118 | // ······················································
119 | 
120 | int walk ( llnode_t *start )
121 | /*
122 |  * walk the list starting from the node start
123 |  * as first, the list is walked back until the list head
124 |  * if mode == 1, the list is then walked ahed printing
125 |  * the first 100 nodes.
126 |  */
127 | {
128 |   int n = 0;
129 |   if ( start != NULL )
130 |     {
131 |       n = 1;
132 |       int prev_value = start->data;
133 |       printf("%9d [-]", start->data );
134 |       start = start->next;
135 |       while( start != NULL)
136 | 	{
137 | 	  if (++n < 100 )
138 | 	    printf( "%9d %s ",
139 | 		   start->data,
140 | 		   (start->data < prev_value? "[!]":"[ok]") );
141 | 	  else if ( n == 100)
142 | 	    printf( "..." );
143 | 	  prev_value = start->data;
144 | 	  start = start->next;
145 | 	}
146 |     }
147 |   printf("\n");
148 |   return n;
149 | }
150 | 
151 | 
152 | // ······················································
153 | 
154 | int delete ( llnode_t *head )
155 | /*
156 |  * delete all the nodes
157 |  * destroy every lock
158 |  */
159 | {
160 |   while ( head != NULL )
161 |     {
162 |       llnode_t *prev = head;
163 |       head = head->next;
164 |      #if defined(_OPENMP)
165 |       omp_destroy_lock( &(prev->lock) );
166 |      #endif
167 |       free( prev );
168 |     }
169 |   return 0;
170 | }
171 | 
172 | 
173 | // ······················································
174 | 
175 | int find ( llnode_t *head, int value, llnode_t **prev, llnode_t **next )
176 | {
177 |   *prev = NULL, *next = NULL;
178 |   
179 |   if ( head == NULL )
180 |     // The first node must exist in this simple
181 |     // implementation.
182 |     // To improve that, pass **head instead
183 |     // of *head
184 |     return -1;
185 | 
186 |   int       nsteps = 0;
187 |   llnode_t *ptr = NULL;
188 | 
189 |   if ( head-> data > value )
190 |     {
191 |       // we need to walk back
192 |       //
193 |       ptr  = head->prev;
194 |       *next = head;
195 |       while ( (ptr != NULL) && (ptr->data > value) )
196 | 	{
197 | 	  *next = ptr;
198 | 	  ptr  = ptr->prev;
199 | 	  nsteps++;
200 | 	}
201 |       *prev = ptr;
202 |     }
203 |   else
204 |     {
205 |       // we need to walk ahead
206 |       //
207 |       ptr  = head->next;
208 |       *prev = head;
209 |       while ( (ptr != NULL) && (ptr->data < value) )
210 | 	{
211 | 	  *prev = ptr;
212 | 	  ptr  = ptr->next;
213 | 	  nsteps++;
214 | 	}
215 |       *next = ptr;
216 |     }
217 | 
218 |   return nsteps;
219 | }
220 | 
221 | 
222 | int find_and_insert( llnode_t *head, int value )
223 | {
224 |   if ( head == NULL )
225 |     // The first node must exist in this simple
226 |     // implementation.
227 |     // To improve that, pass **head instead
228 |     // of *head
229 |     return -1;
230 | 
231 |   llnode_t *prev = NULL, *next = NULL;
232 | 
233 |   find ( head, value, &prev, &next );
234 |   
235 |   llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) );
236 |   if ( new == NULL )
237 |     // signals a problem in mem alloc
238 |     return -2;
239 |   
240 |   new->data = value;
241 |   new->prev = prev;
242 |   new->next = next;
243 |   if( prev != NULL )
244 |     prev->next = new;
245 |   if( next != NULL )
246 |     next->prev = new;
247 | 
248 |   return 0;
249 | }
250 | 
251 | 
252 | 
253 | #if defined(_OPENMP)
254 | 
255 | 
256 | // ······················································
257 | 
258 | 
259 | int find_and_insert_parallel( llnode_t *head, int value, int use_taskyield )
260 | {
261 |   if ( head == NULL )
262 |     return -1;
263 | 
264 |   llnode_t *prev = NULL, *next = NULL;
265 | 
266 |   dbgout("[ %llu ] > T %d process value %d\n", TIMESTAMP, me, value );
267 |   
268 |   find ( head, value, &prev, &next );
269 |   
270 |   dbgout("[ %llu ] T %d V %d found p: %d and n: %d\n", TIMESTAMP, me, value,
271 | 	 prev!=NULL?prev->data:-1, next!=NULL?next->data:-1);
272 |   
273 |   // to our best knowledge, ptr is the first node with data > value
274 |   // and prev is the last node with data < value
275 |   // then, we should create a new node between prev and ptr
276 |   
277 |   // acquire the lock of prev and next
278 |   //
279 |   if ( use_taskyield )
280 |     {
281 |       if ( prev != NULL )
282 | 	while ( omp_test_lock(&(prev->lock)) == 0 ) {
283 | 	 #pragma omp taskyield
284 | 	}
285 |       prev->owner=me;
286 |       if ( next != NULL )
287 | 	while ( omp_test_lock(&(next->lock)) == 0 ) {
288 | 	 #pragma omp taskyield
289 | 	}
290 |     }
291 |   else
292 |     {
293 |       if( prev != NULL )
294 | 	omp_set_lock(&(prev->lock));
295 |       
296 |       if( next != NULL )
297 | 	omp_set_lock(&(next->lock));
298 |     }
299 | 
300 | 
301 |   dbgout("[ %llu ] T %d V %d locked: (p: %d p>n: %d) (n: %d n<p: %d)\n",
302 | 	 TIMESTAMP, me, value,
303 | 	 (prev!=NULL?prev->data:-1),((prev!=NULL)&&(prev->next!=NULL)?(prev->next)->data:-1),
304 | 	 (next!=NULL?next->data:-1),((next!=NULL)&&(next->prev!=NULL)?(next->prev)->data:-1) );
305 |   
306 |   // meanwhile, did somebody already insert a node between prev and next?
307 |   if( ( (prev != NULL) && (prev-> next != next) ) ||
308 |       ( (next != NULL) && (next-> prev != prev) ) )
309 |     {
310 |       // yes, that happened
311 |       // let's keep track of how many clashes
312 |       // 
313 |      #pragma omp atomic update
314 |       clashes++;
315 |       
316 |       if( (prev != NULL) && (prev-> next != next) )
317 | 	{
318 | 	  // the next pointer has changed
319 | 	  // prev is not null, so that is our still valid point
320 | 	  // we'll walk ahead from there
321 | 	  //
322 | 	  dbgout("[ %llu ]\t>> T %d V %d next has changed: from %d to %d\n",
323 | 		 TIMESTAMP, me, value,
324 | 		 (next!=NULL?next->data:-1),(prev->next!=NULL?(prev->next)->data:-1) );
325 | 
326 | 	  if (next != NULL)
327 | 	    // free the lock on the old next
328 | 	    omp_unset_lock(&(next->lock));
329 | 
330 | 	  dbgout("[ %llu ]\t\t>>> T %d V %d restart from %d to walk ahead\n",
331 | 		 TIMESTAMP, me, value, prev->data);
332 | 	  
333 | 	  // search again, while always keeping prev locked
334 | 	  next = prev->next;
335 | 	  while(next)
336 | 	    {
337 | 	      dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n",
338 | 		     TIMESTAMP, me, value, next->data );
339 | 	      omp_set_lock(&(next->lock));
340 | 	      
341 | 	      if( next->data >= value )
342 | 		break;
343 | 	      omp_unset_lock(&(prev->lock));
344 | 	      prev = next;
345 | 	      next = next->next;
346 | 	    }	  
347 | 	}
348 |       
349 |       else if ( next->prev != prev )
350 | 	// note that next can not be NULL
351 | 	{
352 | 	  // the prev pointer has changed
353 | 	  // next is not null, so that is our still valid point
354 | 	  // we walk back from there
355 | 	  //
356 | 	  dbgout("[ %llu ]\t>> T %d V %d prev has changed: from %d to %d\n",
357 | 		 TIMESTAMP, me, value,
358 | 		 (prev!=NULL?prev->data:-1),(next->prev!=NULL?(next->prev)->data:-1) );
359 | 
360 | 	  if (prev != NULL)
361 | 	    // free the lock on the old next
362 | 	    omp_unset_lock(&(prev->lock));
363 | 
364 | 	  dbgout("[ %llu ]\t\t>> T %d V %d restart from %d to walk back\n",
365 | 		 TIMESTAMP, me, value, next->data);
366 |       	
367 | 	  // search again, while always keeping prev locked
368 | 	 prev = next->prev;
369 | 	 while(prev)
370 | 	   {
371 | 	     dbgout("[ %llu ]\t\t\t>>> T %d V %d stepping into %d\n",
372 | 		    TIMESTAMP, me, value, prev->data);
373 | 	     omp_set_lock(&(prev->lock));
374 | 	     if( prev->data <= value )
375 | 	       break;
376 | 	     omp_unset_lock(&(next->lock));
377 | 	     next = prev;
378 | 	     prev = prev->prev;
379 | 	   }
380 | 	}
381 |       else if ( next == NULL )
382 | 	{
383 | 	  printf("Some serious error occurred, a prev = next = NULL situation arose!\n");
384 | 	  return -3;
385 | 	}
386 |     }
387 | 
388 |   //
389 |   // insertion code
390 |   //
391 | 
392 |   llnode_t *new = (llnode_t*)malloc( sizeof(llnode_t) );
393 |   if ( new == NULL )
394 |     return -2;
395 |   
396 |   new->data = value;
397 |   new->prev = prev;
398 |   new->next = next;
399 |   omp_init_lock( &(new->lock) );
400 |   if ( prev != NULL )
401 |     prev->next = new;
402 |   if ( next != NULL)
403 |     next->prev = new;
404 |   
405 |   // release locks
406 |   //
407 |   if ( prev != NULL ) {
408 |     omp_unset_lock(&(prev->lock));
409 |     dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n",
410 | 	   TIMESTAMP, me, value, prev->data);}
411 | 					  
412 |   if( next != NULL ) {
413 |     omp_unset_lock(&(next->lock));
414 |     dbgout("[ %llu ]\tthread %d processing %d releases lock for %d\n",
415 | 	   TIMESTAMP, me, value, next->data);}
416 | 
417 |   dbgout("T %d V %d has done\n", me, value);
418 |   return 0;
419 | }
420 | 
421 | #endif
422 | 
423 | 
424 | // ······················································
425 | 
426 | int main ( int argc, char **argv )
427 | {
428 |   int N, mode;
429 |   
430 |   {
431 |     int a = 1;
432 |     N    = ( argc > 1 ? atoi(*(argv+a++)) : 1000000 ); 
433 |    #if defined(_OPENMP)
434 |     mode = ( argc > a ? atoi(*(argv+a++)) : DONT_USE_TASKYIELD );
435 |    #endif
436 |     int seed = ( argc > a ? atoi(*(argv+a++)) : 98765 );
437 |     
438 |     srand( seed );
439 |   }
440 | 
441 | 
442 |   llnode_t *head = (llnode_t*)malloc(sizeof(llnode_t));
443 |   head->data = rand();
444 |   head->prev = NULL;
445 |   head->next = NULL;
446 |  #if defined(_OPENMP)
447 |   omp_init_lock( &(head->lock) );
448 |  #endif
449 |   
450 |   ull timing = CPU_TIME;
451 |   
452 |  #if !defined(_OPENMP)
453 |   
454 |   int n = 1;
455 |   while ( n < N )
456 |     {
457 |       int new_value = rand();
458 |       int ret = find_and_insert( head, new_value );
459 |       if ( ret < 0 )
460 | 	{
461 | 	  printf("I've got a problem inserting node %d\n", n);
462 | 	  delete( head );
463 | 	}
464 |       n++;
465 |     }
466 | 
467 |  #else
468 | 
469 |   #pragma omp parallel
470 |   {
471 |     me = omp_get_thread_num();
472 |     #pragma omp single
473 |     {
474 |       printf("running with %d threads\n", omp_get_num_threads());
475 |       int n = 1;
476 | 
477 |       while ( n < N )
478 | 	{
479 | 	  int new_value = rand();
480 | 
481 | 	 #pragma omp task
482 | 	  find_and_insert_parallel( head, new_value, mode );
483 | 	  
484 | 	  n++;
485 | 	}
486 |     }
487 |   }
488 | 
489 |  #endif
490 | 
491 |   timing = CPU_TIME - timing;
492 | 
493 |   head = get_head( head );
494 | 
495 |   int actual_nodes = walk( head);
496 |   if ( actual_nodes != N )
497 |     printf("shame on me! %d nodes instaed of %d have been found!",
498 | 	   actual_nodes, N);
499 |   
500 |   delete ( head );
501 | 
502 |   char string[23] = {0};
503 |  #if defined(_OPENMP)
504 |   sprintf( string, " with %d clashes", clashes);  
505 |  #endif
506 |   printf("generation took %g seconds (wtime) %s\n", ((double)timing/1e9), string);
507 |   
508 |   
509 |   return 0;
510 | }
511 | 


--------------------------------------------------------------------------------
/HPC/codes/quicksort.v0.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <unistd.h>
 35 | #include <stdio.h>
 36 | #include <math.h>
 37 | #include <string.h>
 38 | #include <omp.h>
 39 | #include <time.h>
 40 | 
 41 | 
 42 | 
 43 | #if defined(_OPENMP)
 44 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 45 |                   (double)ts.tv_nsec * 1e-9)
 46 | 
 47 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +     \
 48 |                      (double)myts.tv_nsec * 1e-9)
 49 | 
 50 | #else
 51 | 
 52 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 53 |                   (double)ts.tv_nsec * 1e-9)
 54 | #endif
 55 | 
 56 | #if defined(DEBUG)
 57 | #define VERBOSE
 58 | #endif
 59 | 
 60 | #if defined(VERBOSE)
 61 | #define PRINTF(...) printf(__VA_ARGS__)
 62 | #else
 63 | #define PRINTF(...)
 64 | #endif
 65 | 
 66 | 
 67 | 
 68 | 
 69 | #define MAX( a, b ) ( (a)->data[HOT] >(b)->data[HOT]? (a) : (b) );
 70 | #define MIN( a, b ) ( (a)->data[HOT] <(b)->data[HOT]? (a) : (b) );
 71 | 
 72 | #if !defined(DATA_SIZE)
 73 | #define DATA_SIZE 8
 74 | #endif
 75 | #define HOT       0
 76 | 
 77 | #if (!defined(DEBUG) || defined(_OPENMP))
 78 | #define N_dflt    100000
 79 | #else
 80 | #define N_dflt    10000
 81 | #endif
 82 | 
 83 | typedef struct
 84 | {
 85 |   double data[DATA_SIZE];
 86 | } data_t;
 87 | 
 88 | typedef int (compare_t)(const void*, const void*);
 89 | typedef int (verify_t)(data_t *, int, int, int);
 90 | 
 91 | extern inline compare_t compare;
 92 | extern inline compare_t compare_ge;
 93 | verify_t  verify_partitioning;
 94 | verify_t  verify_sorting;
 95 | verify_t  show_array;
 96 | 
 97 | extern inline int partitioning( data_t *, int, int, compare_t );
 98 | void pqsort( data_t *, int, int, compare_t ); 
 99 | 
100 | 
101 | 
102 | int main ( int argc, char **argv )
103 | {
104 | 
105 |   
106 |   // ---------------------------------------------
107 |   //  get the arguments
108 |   //
109 | 
110 | 
111 |   int N          = N_dflt;
112 |   
113 |   /* check command-line arguments */
114 |   {
115 |     int a = 0;
116 |     
117 |     if ( argc > ++a ) N = atoi(*(argv+a));
118 |   }
119 |   
120 |   // ---------------------------------------------
121 |   //  generate the array
122 |   //
123 |   
124 |   data_t *data = (data_t*)malloc(N*sizeof(data_t));
125 |   long int seed;
126 |  #if defined(_OPENMP)
127 |  #pragma omp parallel
128 |   {
129 |     int me             = omp_get_thread_num();
130 |     short int seed     = time(NULL) % ( (1 << sizeof(short int))-1 );
131 |     short int seeds[3] = {seed-me, seed+me, seed+me*2};
132 | 
133 |    #pragma omp for
134 |     for ( int i = 0; i < N; i++ )
135 |       data[i].data[HOT] = erand48( seeds );
136 |   }
137 |  #else
138 |   {
139 |     seed = time(NULL);
140 |     srand48(seed);
141 |     
142 |     PRINTF("ssed is % ld\n", seed);
143 |     
144 |     for ( int i = 0; i < N; i++ )
145 |       data[i].data[HOT] = drand48();
146 |   }    
147 |  #endif
148 | 
149 |   
150 |   // ---------------------------------------------
151 |   //  process 
152 |   //
153 |   struct timespec ts;
154 |   int    nthreads = 1;
155 |   double tstart = CPU_TIME;
156 |   
157 |  #if defined(_OPENMP)
158 | 
159 |  #pragma omp parallel
160 |   {
161 |    #pragma omp single
162 |     {
163 |       nthreads = omp_get_num_threads();
164 |       pqsort( data, 0, N, compare_ge );
165 |     }
166 |   }
167 |   
168 |  #else
169 | 
170 |   pqsort( data, 0, N, compare_ge );
171 |  #endif
172 |   
173 |   double tend = CPU_TIME;  
174 |   
175 |   // ---------------------------------------------
176 |   //  release the memory and stop
177 |   //
178 | 
179 |   if ( verify_sorting( data, 0, N, 0) )
180 |     printf("%d\t%g sec\n", nthreads, tend-tstart);
181 |   else
182 |     printf("the array is not sorted correctly\n");
183 |   
184 |   free( data );
185 | 
186 |   return 0;
187 | }
188 | 
189 | 
190 |  #define SWAP(A,B,SIZE) do {int sz = (SIZE); char *a = (A); char *b = (B); \
191 |     do { char _temp = *a;*a++ = *b;*b++ = _temp;} while (--sz);} while (0)
192 | 
193 | inline int partitioning( data_t *data, int start, int end, compare_t cmp_ge )
194 | {
195 |   
196 |   // pick up the meadian of [0], [mid] and [end] as pivot
197 |   //
198 |   /* to be done */
199 | 
200 |   // pick up the last element as pivot
201 |   //
202 |   --end;  
203 |   void *pivot = (void*)&data[end];
204 |   
205 |   int pointbreak = end-1;
206 |   for ( int i = start; i <= pointbreak; i++ )
207 |     if( cmp_ge( (void*)&data[i], pivot ) )
208 |       {
209 | 	while( (pointbreak > i) && cmp_ge( (void*)&data[pointbreak], pivot ) ) pointbreak--;
210 | 	if (pointbreak > i ) 
211 | 	  SWAP( (void*)&data[i], (void*)&data[pointbreak--], sizeof(data_t) );
212 |       }  
213 |   pointbreak += !cmp_ge( (void*)&data[pointbreak], pivot ) ;
214 |   SWAP( (void*)&data[pointbreak], pivot, sizeof(data_t) );
215 |   
216 |   return pointbreak;
217 | }
218 | 
219 | 
220 | void pqsort( data_t *data, int start, int end, compare_t cmp_ge )
221 | {
222 | 
223 |  #if defined(DEBUG)
224 |  #define CHECK {							\
225 |     if ( verify_partitioning( data, start, end, mid ) ) {		\
226 |       printf( "partitioning is wrong\n");				\
227 |       printf("%4d, %4d (%4d, %g) -> %4d, %4d  +  %4d, %4d\n",		\
228 | 	     start, end, mid, data[mid].data[HOT],start, mid, mid+1, end); \
229 |       show_array( data, start, end, 0 ); }}
230 |  #else
231 |  #define CHECK
232 |  #endif
233 | 
234 |   int size = end-start;
235 |   if ( size > 2 )
236 |     {
237 |       int mid = partitioning( data, start, end, cmp_ge );
238 | 
239 |       CHECK;
240 |       
241 |      #pragma omp task shared(data) firstprivate(start, mid)
242 |       pqsort( data, start, mid, cmp_ge );
243 |      #pragma omp task shared(data) firstprivate(mid, end)   // note: this may not be a task
244 |       pqsort( data, mid+1, end , cmp_ge );
245 |     }
246 |   else
247 |     {
248 |       if ( (size == 2) && cmp_ge ( (void*)&data[start], (void*)&data[end-1] ) )
249 | 	SWAP( (void*)&data[start], (void*)&data[end-1], sizeof(data_t) );
250 |     }
251 | }
252 | 
253 | 
254 | 
255 | 
256 |  
257 | int verify_sorting( data_t *data, int start, int end, int not_used )
258 | {
259 |   int i = start;
260 |   while( (++i < end) && (data[i].data[HOT] >= data[i-1].data[HOT]) );
261 |   return ( i == end );
262 | }
263 | 
264 | int verify_partitioning( data_t *data, int start, int end, int mid )
265 | {
266 |   int failure = 0;
267 |   int fail = 0;
268 |   
269 |   for( int i = start; i < mid; i++ )
270 |     if ( compare( (void*)&data[i], (void*)&data[mid] ) >= 0 )
271 |       fail++;
272 | 
273 |   failure += fail;
274 |   if ( fail )
275 |     { 
276 |       printf("failure in first half\n");
277 |       fail = 0;
278 |     }
279 | 
280 |   for( int i = mid+1; i < end; i++ )
281 |     if ( compare( (void*)&data[i], (void*)&data[mid] ) < 0 )
282 |       fail++;
283 | 
284 |   failure += fail;
285 |   if ( fail )
286 |     printf("failure in second half\n");
287 | 
288 |   return failure;
289 | }
290 | 
291 | 
292 | int show_array( data_t *data, int start, int end, int not_used )
293 | {
294 |   for ( int i = start; i < end; i++ )
295 |     printf( "%f ", data[i].data[HOT] );
296 |   printf("\n");
297 |   return 0;
298 | }
299 | 
300 | 
301 | inline int compare( const void *A, const void *B )
302 | {
303 |   data_t *a = (data_t*)A;
304 |   data_t *b = (data_t*)B;
305 | 
306 |   double diff = a->data[HOT] - b->data[HOT];
307 |   return ( (diff > 0) - (diff < 0) );
308 | }
309 | 
310 | inline int compare_ge( const void *A, const void *B )
311 | {
312 |   data_t *a = (data_t*)A;
313 |   data_t *b = (data_t*)B;
314 | 
315 |   return (a->data[HOT] >= b->data[HOT]);
316 | }
317 | 


--------------------------------------------------------------------------------
/HPC/codes/quicksort.v1.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <unistd.h>
 35 | #include <stdio.h>
 36 | #include <math.h>
 37 | #include <string.h>
 38 | #include <omp.h>
 39 | #include <time.h>
 40 | 
 41 | 
 42 | 
 43 | #if defined(_OPENMP)
 44 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 45 |                   (double)ts.tv_nsec * 1e-9)
 46 | 
 47 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +     \
 48 |                      (double)myts.tv_nsec * 1e-9)
 49 | 
 50 | #else
 51 | 
 52 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 53 |                   (double)ts.tv_nsec * 1e-9)
 54 | #endif
 55 | 
 56 | #if defined(DEBUG)
 57 | #define VERBOSE
 58 | #endif
 59 | 
 60 | #if defined(VERBOSE)
 61 | #define PRINTF(...) printf(__VA_ARGS__)
 62 | #else
 63 | #define PRINTF(...)
 64 | #endif
 65 | 
 66 | 
 67 | 
 68 | 
 69 | #define MAX( a, b ) ( (a)->data[HOT] >(b)->data[HOT]? (a) : (b) );
 70 | #define MIN( a, b ) ( (a)->data[HOT] <(b)->data[HOT]? (a) : (b) );
 71 | 
 72 | #if !defined(DATA_SIZE)
 73 | #define DATA_SIZE 8
 74 | #endif
 75 | #define HOT       0
 76 | 
 77 | #if (!defined(DEBUG) || defined(_OPENMP))
 78 | #define N_dflt    100000
 79 | #else
 80 | #define N_dflt    10000
 81 | #endif
 82 | 
 83 | typedef struct
 84 | {
 85 |   double data[DATA_SIZE];
 86 | } data_t;
 87 | 
 88 | typedef int (compare_t)(const void*, const void*);
 89 | typedef int (verify_t)(data_t *, int, int, int);
 90 | 
 91 | extern inline compare_t compare;
 92 | extern inline compare_t compare_ge;
 93 | verify_t  verify_partitioning;
 94 | verify_t  verify_sorting;
 95 | verify_t  show_array;
 96 | 
 97 | extern inline int partitioning( data_t *, int, int, compare_t );
 98 | void pqsort( data_t *, int, int, compare_t ); 
 99 | 
100 | 
101 | 
102 | int main ( int argc, char **argv )
103 | {
104 | 
105 |   
106 |   // ---------------------------------------------
107 |   //  get the arguments
108 |   //
109 | 
110 | 
111 |   int N          = N_dflt;
112 |   
113 |   /* check command-line arguments */
114 |   {
115 |     int a = 0;
116 |     
117 |     if ( argc > ++a ) N = atoi(*(argv+a));
118 |   }
119 |   
120 |   // ---------------------------------------------
121 |   //  generate the array
122 |   //
123 |   
124 |   data_t *data = (data_t*)malloc(N*sizeof(data_t));
125 |   long int seed;
126 |  #if defined(_OPENMP)
127 |  #pragma omp parallel
128 |   {
129 |     int me             = omp_get_thread_num();
130 |     short int seed     = time(NULL) % ( (1 << sizeof(short int))-1 );
131 |     short int seeds[3] = {seed-me, seed+me, seed+me*2};
132 | 
133 |    #pragma omp for
134 |     for ( int i = 0; i < N; i++ )
135 |       data[i].data[HOT] = erand48( seeds );
136 |   }
137 |  #else
138 |   {
139 |     seed = time(NULL);
140 |     srand48(seed);
141 |     
142 |     PRINTF("ssed is % ld\n", seed);
143 |      
144 |     for ( int i = 0; i < N; i++ )
145 |       data[i].data[HOT] = drand48();
146 |   }    
147 |  #endif
148 | 
149 |   
150 |   // ---------------------------------------------
151 |   //  process 
152 |   //
153 |   struct timespec ts;
154 |   int    nthreads = 1;
155 |   double tstart = CPU_TIME;
156 |   
157 |  #if defined(_OPENMP)
158 | 
159 |  #pragma omp parallel
160 |   {
161 |    #pragma omp single
162 |     {
163 |       nthreads = omp_get_num_threads();
164 |       pqsort( data, 0, N, compare_ge );
165 |     }
166 |   }
167 |   
168 |  #else
169 | 
170 |   pqsort( data, 0, N, compare_ge );
171 |  #endif
172 |   
173 |   double tend = CPU_TIME;  
174 |   
175 |   // ---------------------------------------------
176 |   //  release the memory and stop
177 |   //
178 | 
179 |   if ( verify_sorting( data, 0, N, 0) )
180 |     printf("%d\t%g sec\n", nthreads, tend-tstart);
181 |   else
182 |     printf("the array is not sorted correctly\n");
183 |   
184 |   free( data );
185 | 
186 |   return 0;
187 | }
188 | 
189 | 
190 |  #define SWAP(A,B,SIZE) do {int sz = (SIZE); char *a = (A); char *b = (B); \
191 |     do { char _temp = *a;*a++ = *b;*b++ = _temp;} while (--sz);} while (0)
192 | 
193 | inline int partitioning( data_t *data, int start, int end, compare_t cmp_ge )
194 | {
195 |   
196 |   // pick up the meadian of [0], [mid] and [end] as pivot
197 |   //
198 |   /* to be done */
199 | 
200 |   // pick up the last element as pivot
201 |   //
202 |   --end;  
203 |   void *pivot = (void*)&data[end];
204 |   
205 |   int pointbreak = end-1;
206 |   for ( int i = start; i <= pointbreak; i++ )
207 |     if( cmp_ge( (void*)&data[i], pivot ) )
208 |       {
209 | 	while( (pointbreak > i) && cmp_ge( (void*)&data[pointbreak], pivot ) ) pointbreak--;
210 | 	if (pointbreak > i ) 
211 | 	  SWAP( (void*)&data[i], (void*)&data[pointbreak--], sizeof(data_t) );
212 |       }  
213 |   pointbreak += !cmp_ge( (void*)&data[pointbreak], pivot ) ;
214 |   SWAP( (void*)&data[pointbreak], pivot, sizeof(data_t) );
215 |   
216 |   return pointbreak;
217 | }
218 | 
219 | 
220 | void pqsort( data_t *data, int start, int end, compare_t cmp_ge )
221 | {
222 | 
223 |  #if defined(DEBUG)
224 |  #define CHECK {							\
225 |     if ( verify_partitioning( data, start, end, mid ) ) {		\
226 |       printf( "partitioning is wrong\n");				\
227 |       printf("%4d, %4d (%4d, %g) -> %4d, %4d  +  %4d, %4d\n",		\
228 | 	     start, end, mid, data[mid].data[HOT],start, mid, mid+1, end); \
229 |       show_array( data, start, end, 0 ); }}
230 |  #else
231 |  #define CHECK
232 |  #endif
233 | 
234 |  #define CHECKSWAP( a, b) { if ( cmp_ge ( (void*)&data[start+(a)], (void*)&data[start+(b)] ) )\
235 |       SWAP( (void*)&data[start+(a)], (void*)&data[start+(b)], sizeof(data_t) );}
236 | 
237 |   int size = end-start;
238 | 
239 |   switch ( size )
240 |     {
241 |     case 1: break;
242 |       
243 |     case 2:
244 |       if ( cmp_ge ( (void*)&data[start], (void*)&data[end-1] ) )
245 | 	SWAP( (void*)&data[start], (void*)&data[end-1], sizeof(data_t) );
246 |       break;
247 |       
248 |     case 3:
249 |       CHECKSWAP( 1, 2 );
250 |       CHECKSWAP( 0, 2 );
251 |       CHECKSWAP( 0, 1 );
252 |       break;
253 |       
254 |     case 4:
255 |       CHECKSWAP( 0, 1 );
256 |       CHECKSWAP( 2, 3 );
257 |       CHECKSWAP( 0, 2 );
258 |       CHECKSWAP( 1, 3 );
259 |       CHECKSWAP( 1, 2 );
260 |       break;
261 | 
262 |     case 5:
263 |       CHECKSWAP( 0, 1 );
264 |       CHECKSWAP( 3, 4 );
265 |       CHECKSWAP( 2, 4 );
266 |       CHECKSWAP( 2, 3 );
267 |       CHECKSWAP( 0, 3 );
268 |       CHECKSWAP( 0, 2 );
269 |       CHECKSWAP( 1, 4 );
270 |       CHECKSWAP( 1, 3 );
271 |       CHECKSWAP( 1, 2 );
272 |       break;
273 | 
274 |     case 6:
275 |       CHECKSWAP( 1, 2 );
276 |       CHECKSWAP( 0, 2 );
277 |       CHECKSWAP( 0, 1 );
278 |       CHECKSWAP( 4, 5 );
279 |       CHECKSWAP( 3, 5 );
280 |       CHECKSWAP( 3, 4 );
281 |       CHECKSWAP( 0, 3 );
282 |       CHECKSWAP( 1, 4 );
283 |       CHECKSWAP( 2, 5 );
284 |       CHECKSWAP( 2, 4 );
285 |       CHECKSWAP( 1, 3 );
286 |       CHECKSWAP( 2, 3 );
287 |       break;
288 |       
289 |     default: {
290 |       int mid = partitioning( data, start, end, cmp_ge );
291 |       
292 |       CHECK;
293 | 
294 |       if ( mid > start )
295 |        #pragma omp task default(none) shared(data, cmp_ge) firstprivate(start, mid) untied
296 | 	pqsort( data, start, mid, cmp_ge );
297 | 
298 |       if ( end > mid+1 )
299 |        #pragma omp task default(none) shared(data, cmp_ge) firstprivate(mid, end) untied
300 | 	pqsort( data, mid+1, end , cmp_ge );}
301 |       
302 |       break;
303 |     }
304 |   
305 | }
306 | 
307 | 
308 | 
309 | 
310 |  
311 | int verify_sorting( data_t *data, int start, int end, int not_used )
312 | {
313 |   int i = start;
314 |   while( (++i < end) && (data[i].data[HOT] >= data[i-1].data[HOT]) );
315 |   return ( i == end );
316 | }
317 | 
318 | int verify_partitioning( data_t *data, int start, int end, int mid )
319 | {
320 |   int failure = 0;
321 |   int fail = 0;
322 |   
323 |   for( int i = start; i < mid; i++ )
324 |     if ( compare( (void*)&data[i], (void*)&data[mid] ) >= 0 )
325 |       fail++;
326 | 
327 |   failure += fail;
328 |   if ( fail )
329 |     { 
330 |       printf("failure in first half\n");
331 |       fail = 0;
332 |     }
333 | 
334 |   for( int i = mid+1; i < end; i++ )
335 |     if ( compare( (void*)&data[i], (void*)&data[mid] ) < 0 )
336 |       fail++;
337 | 
338 |   failure += fail;
339 |   if ( fail )
340 |     printf("failure in second half\n");
341 | 
342 |   return failure;
343 | }
344 | 
345 | 
346 | int show_array( data_t *data, int start, int end, int not_used )
347 | {
348 |   for ( int i = start; i < end; i++ )
349 |     printf( "%f ", data[i].data[HOT] );
350 |   printf("\n");
351 |   return 0;
352 | }
353 | 
354 | 
355 | inline int compare( const void *A, const void *B )
356 | {
357 |   data_t *a = (data_t*)A;
358 |   data_t *b = (data_t*)B;
359 | 
360 |   double diff = a->data[HOT] - b->data[HOT];
361 |   return ( (diff > 0) - (diff < 0) );
362 | }
363 | 
364 | inline int compare_ge( const void *A, const void *B )
365 | {
366 |   data_t *a = (data_t*)A;
367 |   data_t *b = (data_t*)B;
368 | 
369 |   return (a->data[HOT] >= b->data[HOT]);
370 | }
371 | 


--------------------------------------------------------------------------------
/HPC/codes/quicksort.v2.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ────────────────────────────────────────────────────────────────────────── *
  3 |  │                                                                            │
  4 |  │ This file is part of the exercises for the Lectures on                     │
  5 |  │   "Foundations of High Performance Computing"                              │
  6 |  │ given at                                                                   │
  7 |  │   Master in HPC and                                                        │
  8 |  │   Master in Data Science and Scientific Computing                          │
  9 |  │ @ SISSA, ICTP and University of Trieste                                    │
 10 |  │                                                                            │
 11 |  │ contact: luca.tornatore@inaf.it                                            │
 12 |  │                                                                            │
 13 |  │     This is free software; you can redistribute it and/or modify           │
 14 |  │     it under the terms of the GNU General Public License as published by   │
 15 |  │     the Free Software Foundation; either version 3 of the License, or      │
 16 |  │     (at your option) any later version.                                    │
 17 |  │     This code is distributed in the hope that it will be useful,           │
 18 |  │     but WITHOUT ANY WARRANTY; without even the implied warranty of         │
 19 |  │     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          │
 20 |  │     GNU General Public License for more details.                           │
 21 |  │                                                                            │
 22 |  │     You should have received a copy of the GNU General Public License      │
 23 |  │     along with this program.  If not, see <http://www.gnu.org/licenses/>   │
 24 |  │                                                                            │
 25 |  * ────────────────────────────────────────────────────────────────────────── */
 26 | 
 27 | 
 28 | #if defined(__STDC__)
 29 | #  if (__STDC_VERSION__ >= 199901L)
 30 | #     define _XOPEN_SOURCE 700
 31 | #  endif
 32 | #endif
 33 | #include <stdlib.h>
 34 | #include <unistd.h>
 35 | #include <stdio.h>
 36 | #include <math.h>
 37 | #include <string.h>
 38 | #include <omp.h>
 39 | #include <time.h>
 40 | 
 41 | 
 42 | 
 43 | #if defined(_OPENMP)
 44 | #define CPU_TIME (clock_gettime( CLOCK_REALTIME, &ts ), (double)ts.tv_sec + \
 45 |                   (double)ts.tv_nsec * 1e-9)
 46 | 
 47 | #define CPU_TIME_th (clock_gettime( CLOCK_THREAD_CPUTIME_ID, &myts ), (double)myts.tv_sec +     \
 48 |                      (double)myts.tv_nsec * 1e-9)
 49 | 
 50 | #else
 51 | 
 52 | #define CPU_TIME (clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ), (double)ts.tv_sec + \
 53 |                   (double)ts.tv_nsec * 1e-9)
 54 | #endif
 55 | 
 56 | #if defined(DEBUG)
 57 | #define VERBOSE
 58 | #endif
 59 | 
 60 | #if defined(VERBOSE)
 61 | #define PRINTF(...) printf(__VA_ARGS__)
 62 | #else
 63 | #define PRINTF(...)
 64 | #endif
 65 | 
 66 | 
 67 | 
 68 | 
 69 | #define MAX( a, b ) ( (a)->data[HOT] >(b)->data[HOT]? (a) : (b) );
 70 | #define MIN( a, b ) ( (a)->data[HOT] <(b)->data[HOT]? (a) : (b) );
 71 | 
 72 | #if !defined(DATA_SIZE)
 73 | #define DATA_SIZE 8
 74 | #endif
 75 | #define HOT       0
 76 | 
 77 | #if (!defined(DEBUG) || defined(_OPENMP))
 78 | #define N_dflt    100000
 79 | #else
 80 | #define N_dflt    10000
 81 | #endif
 82 | 
 83 | #define TaskTh_dflt 64
 84 | 
 85 | typedef struct
 86 | {
 87 |   double data[DATA_SIZE];
 88 | } data_t;
 89 | 
 90 | typedef int (compare_t)(const void*, const void*);
 91 | typedef int (verify_t)(data_t *, int, int, int);
 92 | 
 93 | extern inline compare_t compare;
 94 | extern inline compare_t compare_ge;
 95 | extern inline compare_t compare_g;
 96 | verify_t  verify_partitioning;
 97 | verify_t  verify_sorting;
 98 | verify_t  show_array;
 99 | 
100 | extern inline int partitioning( data_t *, int, int );
101 | void pqsort( data_t *, int, int ); 
102 | void insertion_sort( data_t *, int, int );
103 | 
104 | int task_cutoff      = TaskTh_dflt;
105 | int insertion_cutoff = TaskTh_dflt / 2;
106 | 
107 | #pragma omp threadprivate( task_cutoff, insertion_cutoff )
108 | 
109 | int main ( int argc, char **argv )
110 | {
111 | 
112 |   
113 |   // ---------------------------------------------
114 |   //  get the arguments
115 |   //
116 | 
117 | 
118 |   int N          = N_dflt;
119 |   
120 |   /* check command-line arguments */
121 |   {
122 |     int a = 0;
123 |     
124 |     if ( argc > ++a ){
125 |       N = atoi(*(argv+a));
126 |       if ( argc > ++a ) {
127 | 	task_cutoff = atoi(*(argv+a));	
128 | 	if ( argc > ++a ) {
129 | 	  insertion_cutoff = atoi(*(argv+a)); }
130 | 	else insertion_cutoff = task_cutoff/2;}}
131 |   }
132 |   
133 |   // ---------------------------------------------
134 |   //  generate the array
135 |   //
136 |   
137 |   data_t *data = (data_t*)malloc(N*sizeof(data_t));
138 |   long int seed;
139 |  #if defined(_OPENMP)
140 |  #pragma omp parallel
141 |   {
142 |     int me             = omp_get_thread_num();
143 |     short int seed     = time(NULL) % ( (1 << sizeof(short int))-1 );
144 |     short int seeds[3] = {seed-me, seed+me, seed+me*2};
145 | 
146 |    #pragma omp for
147 |     for ( int i = 0; i < N; i++ )
148 |       data[i].data[HOT] = erand48( seeds );
149 |   }
150 |  #else
151 |   {
152 |     seed = time(NULL);
153 |     srand48(seed);
154 |     
155 |     PRINTF("ssed is % ld\n", seed);
156 |      
157 |     for ( int i = 0; i < N; i++ )
158 |       data[i].data[HOT] = drand48();
159 |   }    
160 |  #endif
161 | 
162 |   
163 |   // ---------------------------------------------
164 |   //  process 
165 |   //
166 |   struct timespec ts;
167 |   int    nthreads = 1;
168 |   double tstart = CPU_TIME;
169 |   
170 |  #if defined(_OPENMP)
171 | 
172 |  #pragma omp parallel copyin( task_cutoff, insertion_cutoff )
173 |   {
174 |    #pragma omp single
175 |     {
176 |       nthreads = omp_get_num_threads();
177 |       pqsort( data, 0, N );
178 |     }
179 |   }
180 |   
181 |  #else
182 | 
183 |   // uncomment the following call to use
184 |   // exactly the same routine than the omp version
185 |   pqsort( data, 0, N );
186 | 
187 |   // uncomment the following call to test
188 |   // the insertion sort routine
189 |   /* insertion_sort( data, 0, N); */
190 | 
191 |   // uncomment the following call to use
192 |   // the library qsort routine
193 |   /* qsort( data, N, sizeof(data_t), compare); */
194 |   
195 |  #endif
196 |   
197 |   double tend = CPU_TIME;  
198 |   
199 |   // ---------------------------------------------
200 |   //  release the memory and stop
201 |   //
202 | 
203 |   if ( verify_sorting( data, 0, N, 0) )
204 |     printf("%d\t%g sec\n", nthreads, tend-tstart);
205 |   else
206 |     printf("the array is not sorted correctly\n");
207 |   
208 |   free( data );
209 | 
210 |   return 0;
211 | }
212 | 
213 | 
214 |  #define SWAP(A,B,SIZE) do {int sz = (SIZE); char *a = (A); char *b = (B); \
215 |     do { char _temp = *a;*a++ = *b;*b++ = _temp;} while (--sz);} while (0)
216 | 
217 | inline int partitioning( data_t *data, int start, int end )
218 | {
219 |   
220 |   // pick up the meadian of [0], [mid] and [end] as pivot
221 |   //
222 |   /* to be done */
223 | 
224 |   // pick up the last element as pivot
225 |   //
226 |   --end;  
227 |   void *pivot = (void*)&data[end];
228 |   
229 |   int pointbreak = end-1;
230 |   for ( int i = start; i <= pointbreak; i++ )
231 |     if( compare_ge( (void*)&data[i], pivot ) )
232 |       {
233 | 	while( (pointbreak > i) && compare_ge( (void*)&data[pointbreak], pivot ) ) pointbreak--;
234 | 	if (pointbreak > i ) 
235 | 	  SWAP( (void*)&data[i], (void*)&data[pointbreak--], sizeof(data_t) );
236 |       }  
237 |   pointbreak += !compare_ge( (void*)&data[pointbreak], pivot ) ;
238 |   SWAP( (void*)&data[pointbreak], pivot, sizeof(data_t) );
239 |   
240 |   return pointbreak;
241 | }
242 | 
243 | 
244 | void pqsort( data_t *data, int start, int end )
245 | {
246 | 
247 |  #if defined(DEBUG)
248 |  #define CHECK {							\
249 |     if ( verify_partitioning( data, start, end, mid ) ) {		\
250 |       printf( "partitioning is wrong\n");				\
251 |       printf("%4d, %4d (%4d, %g) -> %4d, %4d  +  %4d, %4d\n",		\
252 | 	     start, end, mid, data[mid].data[HOT],start, mid, mid+1, end); \
253 |       show_array( data, start, end, 0 ); }}
254 |  #define CHECK_S {						\
255 |      if ( !verify_sorting( data, start, end, 0 ) )		\
256 |        printf("error between %d and %d\n", start, end ); }
257 |  #else
258 |  #define CHECK
259 |  #define CHECK_S
260 |  #endif
261 | 
262 |  #define CHECKSWAP( a, b) { if ( compare_ge ( (void*)&data[start+(a)], (void*)&data[start+(b)] ) )\
263 |       SWAP( (void*)&data[start+(a)], (void*)&data[start+(b)], sizeof(data_t) );}
264 | 
265 |   int size = end-start;
266 | 
267 |   switch ( size )
268 |     {
269 |     case 1: break;
270 |     case 2: { if ( compare_ge ( (void*)&data[start], (void*)&data[end-1] ) )
271 | 	  SWAP( (void*)&data[start], (void*)&data[end-1], sizeof(data_t) ); } break;
272 |     case 3: { CHECKSWAP( 1, 2 );
273 | 	CHECKSWAP( 0, 2 );
274 | 	CHECKSWAP( 0, 1 ); } break;
275 |     default: { if ( size < insertion_cutoff ) {
276 | 	  insertion_sort( data, start, end );
277 | 	  CHECK_S; }
278 | 	else {
279 |     
280 | 	  int mid = partitioning( data, start, end );
281 | 	  
282 | 	  CHECK;
283 | 	  
284 | 	  int mid_start = mid-start;
285 | 	  if ( mid_start > 0 )
286 | 	   #pragma omp task default(none) final( mid_start < task_cutoff ) mergeable \
287 | 	     shared(data) firstprivate(start, mid) untied
288 | 	    pqsort( data, start, mid );
289 | 	  
290 | 	  int end_mid = end -(mid+1);
291 | 	  if ( end_mid )
292 | 	   #pragma omp task default(none) final( end_mid < task_cutoff ) mergeable \
293 | 	     shared(data) firstprivate(mid, end) untied
294 | 	    pqsort( data, mid+1, end );
295 | 	} } break;
296 |     }
297 |   
298 | }
299 | 
300 | 
301 | 
302 | void insertion_sort( data_t *data, int start, int end )
303 | {
304 |   {
305 |     int min_idx = start;
306 |     for ( int i = start+1; i < end; i++ )
307 |       if ( compare_g( (void*)&data[min_idx], (void*)&data[i] ) )
308 | 	min_idx = i;
309 |     
310 |     SWAP( (void*)&data[start], (void*)&data[min_idx], sizeof(data_t) );
311 |   }
312 | 
313 |   for ( int head = start+1, run = start+1; (run = ++head) < end; )
314 |     {      
315 |       while ( (run > 0) && compare_g( (void*)&data[run-1], (void*)&data[run] ) ) {
316 | 	SWAP( (void*)&data[run-1], (void*)&data[run], sizeof(data_t) ); --run;}
317 |     }
318 |   
319 | }
320 | 
321 | 
322 | 
323 | 
324 |  
325 | int verify_sorting( data_t *data, int start, int end, int mid )
326 | {
327 |   int i = start;
328 |   while( (++i < end) && (data[i].data[HOT] >= data[i-1].data[HOT]) );
329 |   return ( i == end );
330 | }
331 | 
332 | int verify_partitioning( data_t *data, int start, int end, int mid )
333 | {
334 |   int failure = 0;
335 |   int fail = 0;
336 |   
337 |   for( int i = start; i < mid; i++ )
338 |     if ( compare( (void*)&data[i], (void*)&data[mid] ) >= 0 )
339 |       fail++;
340 | 
341 |   failure += fail;
342 |   if ( fail )
343 |     { 
344 |       printf("failure in first half\n");
345 |       fail = 0;
346 |     }
347 | 
348 |   for( int i = mid+1; i < end; i++ )
349 |     if ( compare( (void*)&data[i], (void*)&data[mid] ) < 0 )
350 |       fail++;
351 | 
352 |   failure += fail;
353 |   if ( fail )
354 |     printf("failure in second half\n");
355 | 
356 |   return failure;
357 | }
358 | 
359 | 
360 | int show_array( data_t *data, int start, int end, int not_used )
361 | {
362 |   for ( int i = start; i < end; i++ )
363 |     printf( "%f ", data[i].data[HOT] );
364 |   printf("\n");
365 |   return 0;
366 | }
367 | 
368 | 
369 | inline int compare( const void *A, const void *B )
370 | {
371 |   data_t *a = (data_t*)A;
372 |   data_t *b = (data_t*)B;
373 | 
374 |   double diff = a->data[HOT] - b->data[HOT];
375 |   return ( (diff > 0) - (diff < 0) );
376 | }
377 | 
378 | inline int compare_ge( const void *A, const void *B )
379 | {
380 |   data_t *a = (data_t*)A;
381 |   data_t *b = (data_t*)B;
382 | 
383 |   return (a->data[HOT] >= b->data[HOT]);
384 | }
385 | 
386 | inline int compare_g( const void *A, const void *B )
387 | {
388 |   data_t *a = (data_t*)A;
389 |   data_t *b = (data_t*)B;
390 | 
391 |   return (a->data[HOT] > b->data[HOT]);
392 | }
393 | 


--------------------------------------------------------------------------------
/HPC/codes/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/HPC/mpi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/mpi.pdf


--------------------------------------------------------------------------------
/HPC/openmp_outline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/openmp_outline.pdf


--------------------------------------------------------------------------------
/HPC/readme.md:
--------------------------------------------------------------------------------
1 | TBD
2 | 


--------------------------------------------------------------------------------
/HPC/tasks.new.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/tasks.new.pdf


--------------------------------------------------------------------------------
/HPC/tasks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/HPC/tasks.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Advanced High Performance Computing 2023
2 | 


--------------------------------------------------------------------------------
/access_Leonardo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/access_Leonardo.pdf


--------------------------------------------------------------------------------
/intro_to_course.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Foundations-of-HPC/Advanced-High-Performance-Computing-2023/c9cb4161b6424dbd33867275cc9263b0275d0d8d/intro_to_course.pdf


--------------------------------------------------------------------------------