├── .github
    └── workflows
    │   └── sphinx.yml
├── .gitignore
├── LICENSE
├── LICENSE.code
├── Makefile
├── README.md
├── content
    ├── _static
    │   └── overrides.css
    ├── conf.py
    ├── data.rst
    ├── diagrams
    │   ├── stencil-fixed-boundaries
    │   └── stencil.drawio
    ├── examples
    │   └── v4.5.0
    │   │   ├── Example_target.1.c
    │   │   ├── Example_target.1.f90
    │   │   ├── Example_target_data.2.c
    │   │   ├── Example_target_data.2.f90
    │   │   ├── Example_target_unstructured_data.1.c
    │   │   ├── Example_target_unstructured_data.1.f90
    │   │   ├── Example_teams.6.c
    │   │   └── Example_teams.6.f90
    ├── exercise
    │   ├── common
    │   │   ├── pngwriter.c
    │   │   └── pngwriter.h
    │   ├── data_mapping
    │   │   ├── LICENSE-MIT
    │   │   ├── Makefile
    │   │   ├── core.cpp
    │   │   ├── fortran
    │   │   │   ├── Makefile
    │   │   │   ├── core.F90
    │   │   │   ├── heat_mod.F90
    │   │   │   ├── io.F90
    │   │   │   ├── main.F90
    │   │   │   ├── pngwriter_mod.F90
    │   │   │   ├── setup.F90
    │   │   │   └── utilities.F90
    │   │   ├── heat.h
    │   │   ├── heat_serial
    │   │   ├── io.cpp
    │   │   ├── main.cpp
    │   │   ├── setup.cpp
    │   │   └── utilities.cpp
    │   ├── ex00
    │   │   ├── LICENSE-MIT
    │   │   ├── ex00.F90
    │   │   └── ex00.c
    │   ├── ex01
    │   │   ├── LICENSE-MIT
    │   │   ├── ex01.F90
    │   │   ├── ex01.c
    │   │   └── solution
    │   │   │   ├── ex01.F90
    │   │   │   └── ex01.c
    │   ├── ex02
    │   │   ├── LICENSE-MIT
    │   │   ├── ex02.F90
    │   │   ├── ex02.c
    │   │   └── solution
    │   │   │   ├── ex02.F90
    │   │   │   └── ex02.c
    │   ├── ex03
    │   │   ├── LICENSE-MIT
    │   │   ├── ex03.F90
    │   │   ├── ex03.c
    │   │   └── solution
    │   │   │   ├── ex03.F90
    │   │   │   └── ex03.c
    │   ├── ex04
    │   │   ├── LICENSE-MIT
    │   │   ├── ex04.F90
    │   │   ├── ex04.c
    │   │   └── solution
    │   │   │   ├── ex04.F90
    │   │   │   └── ex04.c
    │   ├── ex05
    │   │   ├── LICENSE-MIT
    │   │   ├── ex05.F90
    │   │   ├── ex05.c
    │   │   └── solution
    │   │   │   ├── ex05.F90
    │   │   │   └── ex05.c
    │   ├── ex06
    │   │   ├── ex06.F90
    │   │   └── ex06.c
    │   ├── offloading
    │   │   ├── LICENSE-MIT
    │   │   ├── Makefile
    │   │   ├── core.cpp
    │   │   ├── fortran
    │   │   │   ├── Makefile
    │   │   │   ├── core.F90
    │   │   │   ├── heat_mod.F90
    │   │   │   ├── io.F90
    │   │   │   ├── main.F90
    │   │   │   ├── pngwriter_mod.F90
    │   │   │   ├── setup.F90
    │   │   │   └── utilities.F90
    │   │   ├── heat.h
    │   │   ├── io.cpp
    │   │   ├── main.cpp
    │   │   ├── setup.cpp
    │   │   └── utilities.cpp
    │   ├── serial
    │   │   ├── LICENSE-MIT
    │   │   ├── Makefile
    │   │   ├── core.cpp
    │   │   ├── fortran
    │   │   │   ├── Makefile
    │   │   │   ├── core.F90
    │   │   │   ├── heat_mod.F90
    │   │   │   ├── io.F90
    │   │   │   ├── main.F90
    │   │   │   ├── pngwriter_mod.F90
    │   │   │   ├── setup.F90
    │   │   │   └── utilities.F90
    │   │   ├── heat.h
    │   │   ├── heat_0000.png
    │   │   ├── heat_0010.png
    │   │   ├── io.cpp
    │   │   ├── main.cpp
    │   │   ├── setup.cpp
    │   │   └── utilities.cpp
    │   └── solution
    │   │   ├── common
    │   │       ├── pngwriter.c
    │   │       └── pngwriter.h
    │   │   ├── data_mapping
    │   │       ├── LICENSE-MIT
    │   │       ├── Makefile
    │   │       ├── core.cpp
    │   │       ├── fortran
    │   │       │   ├── Makefile
    │   │       │   ├── core.F90
    │   │       │   ├── heat_mod.F90
    │   │       │   ├── io.F90
    │   │       │   ├── main.F90
    │   │       │   ├── pngwriter_mod.F90
    │   │       │   ├── setup.F90
    │   │       │   └── utilities.F90
    │   │       ├── heat.h
    │   │       ├── io.cpp
    │   │       ├── main.cpp
    │   │       ├── setup.cpp
    │   │       └── utilities.cpp
    │   │   └── offloading
    │   │       ├── LICENSE-MIT
    │   │       ├── Makefile
    │   │       ├── core.cpp
    │   │       ├── fortran
    │   │           ├── Makefile
    │   │           ├── core.F90
    │   │           ├── heat_mod.F90
    │   │           ├── io.F90
    │   │           ├── main.F90
    │   │           ├── pngwriter_mod.F90
    │   │           ├── setup.F90
    │   │           └── utilities.F90
    │   │       ├── heat.h
    │   │       ├── io.cpp
    │   │       ├── main.cpp
    │   │       ├── setup.cpp
    │   │       └── utilities.cpp
    ├── gpu-architecture.rst
    ├── guide.rst
    ├── img
    │   ├── Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png
    │   ├── C2050Timeline.png
    │   ├── ENCCS.jpg
    │   ├── ENCCS_CSC_logos.jpg
    │   ├── HardwareReview.png
    │   ├── Loom.jpeg
    │   ├── ThreadExecution.jpeg
    │   ├── coalesced.png
    │   ├── comparison.png
    │   ├── compp.png
    │   ├── distributed_vs_shared.png
    │   ├── favicon.ico
    │   ├── gpu_vs_cpu.png
    │   ├── heat_0000.png
    │   ├── heat_montage.png
    │   ├── heteprogra.jpeg
    │   ├── memsch.png
    │   ├── microprocessor-trend-data.png
    │   ├── nvidia_block_diagram.jpeg
    │   ├── omp-parallel.png
    │   ├── processes-threads.png
    │   ├── processes-threads.svg
    │   ├── shared_mem.png
    │   ├── stencil-fixed-boundaries.svg
    │   ├── stencil.svg
    │   ├── threads.png
    │   ├── volta-architecture.png
    │   └── volta-sm-architecture.png
    ├── index.rst
    ├── interoperability.rst
    ├── introduction.rst
    ├── miniapp.rst
    ├── multi-gpu.rst
    ├── optimization.rst
    ├── porting.rst
    ├── profiling.rst
    ├── quick-reference.rst
    ├── syntax
    │   └── v4.5.0
    │   │   ├── composite.c
    │   │   ├── composite.f90
    │   │   ├── distribute.c
    │   │   ├── distribute.clause
    │   │   ├── distribute.f90
    │   │   ├── target.c
    │   │   ├── target.clause
    │   │   ├── target.f90
    │   │   ├── target_data.c
    │   │   ├── target_data.clause
    │   │   ├── target_data.f90
    │   │   ├── target_enter_data.c
    │   │   ├── target_enter_data.f90
    │   │   ├── target_enter_exit_data.clause
    │   │   ├── target_exit_data.c
    │   │   ├── target_exit_data.f90
    │   │   ├── teams.c
    │   │   ├── teams.clause
    │   │   └── teams.f90
    ├── target.rst
    └── volta-sm-architecture.png
├── make.bat
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | /_build
 2 | /venv
 3 | .ipynb_checkpoints
 4 | venv*
 5 | jupyter_execute
 6 | /content/.auctex-auto/
 7 | /content/__pycache__/
 8 | /.ccls-cache/
 9 | a.out
10 | *~
11 | \#*
12 | *.o
13 | 


--------------------------------------------------------------------------------
/LICENSE.code:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021, Thor Wikfeldt and individual contributors from ENCCS and CSC Training.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = content
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | code-samples-tarball:
23 | 	tar cfz openmp-gpu-code-samples.tgz --exclude \*.png --exclude heat_serial --exclude \*.o --exclude \*~ -C content code-samples
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenMP for GPU offloading
2 | 
3 | OpenMP for GPU offloading
4 | 
5 | ## Credit and license
6 | 
7 | - https://enccs.github.io/lesson/#credits
8 | 


--------------------------------------------------------------------------------
/content/_static/overrides.css:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * colors = ['#0271AE', '#DC2830', '#FFC438',  # blue, red, light orange
 3 |  *           '#6E3B87', '#008D5D', '#FA902D',  # purple, green, orange
 4 |  *           '#0095B7', '#CB0C7B', '#F7E43C',  # cyan, magenta, yellow
 5 |  *           '#88B93B', '#444F95', '#F16232']  # pea green, dark blue, dark orange
 6 |  *
 7 |  * To use them in rST, you need to define a command in the epilog, see conf.py
 8 |  */
 9 | .blue {color: #0271AE;}
10 | .red {color: #DC2830;}
11 | .orange {color: #FFC438;}
12 | .purple {color: #633B87;}
13 | .green {color: #008D5D;}
14 | .dkorange {color: #FA902D;}
15 | .cyan {color: #0095B7;}
16 | .magenta {color: #CB0C8B;}
17 | .yellow {color: #F7E43C;}
18 | .peagreen {color: #88B93B;}
19 | .darkblue {color: #444F95;}
20 | .darkorange {color: #F16232;}
21 | 
22 | /* override colors in sphinx_lesson.css with the schemes here: https://personal.sron.nl/~pault/#sec:qualitative */
23 | 
24 | /* instructor-note */
25 | .rst-content .instructor-note {
26 |     background: #DDDDDD;
27 | }
28 | .rst-content .instructor-note > .admonition-title {
29 |     background: #BBBBBB;
30 | }
31 | .rst-content .instructor-note > .admonition-title::before {
32 |     content: "";
33 | }
34 | 
35 | /* callout */
36 | .rst-content .callout {
37 |     background: #EEEEBB;
38 | }
39 | .rst-content .callout > .admonition-title {
40 |     background: #BBCC33;
41 | }
42 | 
43 | /* questions */
44 | .rst-content .questions {
45 |     background: rgba(253, 219, 199, 0.3);
46 | }
47 | .rst-content .questions > .admonition-title {
48 |     background: rgba(204, 51, 17, 0.5);
49 | }
50 | 
51 | /* discussion */
52 | .rst-content .discussion {
53 |     background: rgba(231, 212, 232 0.3);
54 | }
55 | .rst-content .discussion > .admonition-title {
56 |     background: rgba(194, 165, 207, 0.5);
57 | }
58 | 
59 | /* signature */
60 | .rst-content .signature {
61 |     background: rgba(217, 240, 211, 0.3);
62 | }
63 | .rst-content .signature > .admonition-title {
64 |     background: rgba(172, 211, 158, 0.5);
65 | }
66 | .rst-content .signature > .admonition-title::before {
67 |     content: "\01F527";
68 | }
69 | 
70 | /* parameters */
71 | .rst-content .parameters {
72 |     background: rgba(217, 240, 211, 0.0);
73 | }
74 | .rst-content .parameters > .admonition-title {
75 |     background: rgba(172, 211, 158, 0.5);
76 | }
77 | .rst-content .parameters > .admonition-title::before {
78 |     content: "\01F4BB";
79 | }
80 | 
81 | /* typealong */
82 | .rst-content .typealong {
83 |     background: rgba(221, 221, 221, 0.3);
84 | }
85 | .rst-content .typealong > .admonition-title {
86 |     background: rgba(187, 187, 187, 1.0);
87 | }
88 | .rst-content .typealong > .admonition-title::before {
89 |     content: "\02328";
90 | }
91 | 
92 | /* Equation numbers to the right */
93 | .math {
94 |     text-align: left;
95 | }
96 | .eqno {
97 |     float: right;
98 | }
99 | 


--------------------------------------------------------------------------------
/content/diagrams/stencil-fixed-boundaries:
--------------------------------------------------------------------------------
1 | <mxfile host="Electron" modified="2021-08-20T13:49:41.115Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/14.9.6 Chrome/89.0.4389.128 Electron/12.0.16 Safari/537.36" etag="pSQNY6_o_PlEHUN3cp1t" version="14.9.6" type="device"><diagram id="HZfbI1szdyYGrwidaAlg" name="Page-1">3Z1Lb9pAFIV/DctIfgHOMk1asmikRKnUKjtjD9iNYZAxgeTX1xSbx4wttQvPmZwV+I4N+MM6c8+d8Xjg3y52kyJapQ8yEfnAc5LdwL8beJ7rj93qZR95ryOO4xwi8yJL6tgp8Jx9iGbHOrrJErG+2LGUMi+z1WUwlsuliMuLWFQUcnu520zml9+6iuZCCzzHUa5Hf2ZJmR6i4dA5xe9FNk+bb3ab81tEzc51YJ1GidyehfyvA/+2kLI8vFvsbkW+p9dwORz3raP1+MMKsSz/5YDwMZXh8PdNtvrx8f0tde4fwvKq/pS3KN/UJ1z/2PK9IVDIzTIR+w9xBv6XbZqV4nkVxfvWbfWnV7G0XOTVllu9nWV5fitzWfw91k8iEc7iKr4uC/kqzlpGcSims6pFP43mN4miFLuzUH1aEyEXoizeq12a1gZxc5GN6u3t6R8L6lB69mc1sai+RubHTz5hrN7UJP+DqsdI1XPAVH1KqgGYakBBdWSZAgwZqcIVYERJFa0AYwaqx2vTFgUIGanCFeCakipaARqz9cmxBpZJgEvhrlSscA1wKeyVhhUuAhT+ygttEwEKg6VixYsAhcPSsMJFgCLBUossvgfG6lEkWBpWtLZ6FAmWijVAa2tD8ZNjHdkmAhQJloYVLgIUCZaKFS8CHAmWY5sIUNSwNaxwEaAsYuNFgKOKHdgmAhQuS8OKFgGfwmWpWOEi4FO4LLXUAhcBn8JlaVjhIkDhslSscBFwHQ6bpU6/CuFcKXyWNqkFz5XSaFnAlcNpqaOveK4cVksd0MJzpfBa2hgBfKCQY86QVnbFc+VwW2olC8+Vw26pxQE8V06/ZZDrTRHduR/3TzdT99GfrF+eHE9w3JjlA2/MaqVKoQEqVZNzsVqpUiiARhWtABT1Fh94Y1YrVYpqi0oVrgAUtRaNKloBKCotAfDGrFaqFHUWlSpcASiqLBpVtAJwFFnU1Mrk0Gs7Vkp3ZXLotR0rpb0yOfTajpXDXwGnY7djpTRYeBGgdFh4EaCwWGqChRcBSo+FFwFKk4UXAQqXpVVaDc4RaB8V4HBZwClY7VgpXJZWE4Bj5XRZ6FILx92ZWt4Kx0rhsrRMwCDWzfDly694eh1N4mD8epU+TaL5lev3nGHNwljErVyn4TAYOkeuGsQW1J1c1cVaDNqBdqxBz6mAGazagrh4rj13Woa4qqPZeK4991pmuGqLN+K59txtGeIaWMe15+qgIa729Vs9lwfNcNXmC+K59lwfNMTVvn6LIn3V5rbgufZcITTE1bp+q3nex+fmqk5yN1jK6sDac4kQY7cs4EpptyzgSmm3LOBKabcs4EpptyzgSmm3LOBKabcs4EpptyzgSmm38FxHFPkr8J7iDqwU6StwaawOrBTZK3Ahtw6sFMkrcNnBDqwUuStwnaEOrBSpK/B5JB1YKTJX4NNzOrByJq7wy3VMMU6gcYVfr81z5Lm44nOsMYXR0rjCk6wxhdNSueKzrDGF1dK44vstCq+lccX3WxRmS5uPge+3GN1Wj1irzULK8qxtUiFIH2Qi9nv8AQ==</diagram></mxfile>


--------------------------------------------------------------------------------
/content/diagrams/stencil.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="Electron" modified="2021-08-20T13:25:56.952Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/14.9.6 Chrome/89.0.4389.128 Electron/12.0.16 Safari/537.36" etag="t_2YhYU0XIVWTze_4KM_" version="14.9.6" type="device"><diagram id="HZfbI1szdyYGrwidaAlg" name="Page-1">3Z1fc9o4FMU/DY+Z8R9szGOSbpOHZiaZ7Mzu9M2xBbg1iDEmJPn0awc7gCSm6Rb5mPNUcm1s9JN60Lm6FgP/ev5yU8TL2Z1MRT7wnPRl4H8ZeJ7rj9zqnzry2kQcx9lGpkWWNrFd4DF7E+2JTXSdpWJ1cGIpZV5my8NgIhcLkZQHsbgo5ObwtInMD++6jKdCCzwmca5H/8nScraNRoGzi9+KbDpr7+y27ZvH7clNYDWLU7nZC/l/DfzrQspy+2r+ci3yml7LZfu+r0eOfnywQizKz7whup/JKPhxmS3/fvv2PHNu76Lywg+2l3mO83XT4ubTlq8tgkKuF6mor+IM/KvNLCvF4zJO6qObqter2Kyc59VfbvVykuX5tcxl8f5efzKZeElSxVdlIX+KvSNp+BQGYXVEb0fTtGdRlOJlL9S060bIuSiL1+qU5mjQMm5Gmd92wmbXZcMmNNvrrTYWN4Nk+nHlHcfqRYPyN7C6dqmmsYgmRqphEomnyWmougpVNwRT9Ripeuix6lNSHYKpDimohj1TAMvfVhiqcAUIKamiFWDEQPVjbPZFASJGqnAFGFNSRStA62HPHOuwZxLgUrgrFStcA1wKe6VhhYsAhb/yor6JAIXBUrHiRYDCYWlY4SJAMcFSkyy+B8bqUUywNKxobfUoJlgq1iFaW1uKZ4417JsIUEywNKxwEaCYYKlY8SLAMcFy+iYCFDlsDStcBCiT2HgR4MhiD/smAhQuS8OKFgGfwmWpWOEi4FO4LDXVAhcBn8JlaVjhIkDhslSseBGw7LKO1F8u5EKchmig+lY4UctOoKOKVhUrOoPtW3YC3WANVYMFH62WncAnsE7konxs7mYoKv/joevBv7p0WxBcLGVWtbDmLBZJlmvQq8aWh2SNCrqPuwnFeTZdVH8mFUJRxa9qdFkS55fNgXmWpvVtjF152Nmn6A61itvXu+MjVb7fH56t/nAdjtSiWnIcgce561DkFrVCTjxXyuRiD7hyZBfViiM8V470olrEgedKkV/U1sXR1sLlqJPVlhrxXDkyjOrqDZ6rR8FVTYjjuXLmGDvkelnEX9y324fLJ/fev1l9f3A8wfEwsg98GNlIlUIDVKpd1h8bqVIogEYVrQAU+RYf+DCykSpFtkWlClcAilyLRhWtABSZliHwYWQjVYo8i0oVrgAUWRaNKloBOJIs6tSqy3IjM1ZKd9VluZEZK6W96rLcyIyVw18BH0EyY6U0WHgRoHRYeBGgsFjqBAsvApQeCy8ClCYLLwIULkvLtHZYI2BeFeBwWcASLDNWCpel5QTgWDldFjrVwrEjgTZvhWOlcFnaTACOVXdZ64EX5nX5+2q9rF5O65fzNlbdZC+s9QBN4by29ampcD409NQpCufXwferf5OncXyTDEc/L2YPN/H0IrRt3AIRpUPTf4DIe/LD3TM4GkQD6qNcR8BKDjNW28YNgrXLZRwzVtvGDYO1Q7k2Y7Vt3LrBCizmMGId2TZuEKxwERjZNm4YrGgRGNk2bp1gjYD1HGasto0bBCteBGwbNwxWuAjYXh7rBuuwbyJA4bJUrHgRoHBZGla4CFC4rAi4vbwZK4XLUrHCRaD9PGRY0SIQcbgsYG2nGSuFy9KworU1onBZKtYuKzrMWClclppqwYsAhcvSsMJFgMJlqVjxIsDhsoC1nWasHC4LWNtpxsrhsoC1nUasYw6XNeyZCIwpXJaGFS0CYwqXpWLFiwCFy1JTLXgRoHBZGla4CFC4LBUrXgQ4XBbwKQ8zVgqXpVW1wLFSuiw8Vg6Xpa68orG6bT+fOVfgNrtHuFL4LG19AL1I6DoURktLueK5cjgt4HNJR7hyWC01MYDnyum18FwpzNa4b49n2f+ZCAhXeC2W/Z+JwHDF6wCF3xr37Qkt+z8TAeGK1wHrWxhiuMJ1wPoehp1wdZ2+PaVl/3ciMGB7oAQUjksHi5cCCsulTrXgi7Gu9a0MMVzxCktpueDLsa71zQwhU60e6ACl5cLrgPXtDCFc8TpgfT9DzEQLLwQtRzaweCXg9Fw9kAIKz6VlYeFVBIZtDc+Ra99qtFyPwnLpOQI8WE7PBc+9eJSeqwdcKTyXPiOAg233mf31hrxX7v/elPf0m+2edis4+Ga7rv8JjyYW6WVRyE0NMo9Xqyw5pLx9g0in4ne57LU7MDS7jRUij8vs+fDyJhbNHe5lVt14txn1sUxOe4mVXBeJaN61I6pdKDjmBNsLlXExFaV2ofeu+Wj2n/TWJ4zfufdWeKreGsF7S3eT2cC7/rGVtHMTr0CdpI8+9xViUbt0U/nO94KArh/A6erWMtuO3HoEnxvfUBUDF85Xt5hZPXKryzlnCDhQl/fwgHWrmZ3n4NXEwR7b6s9CynL/m7Bq1OxOpqI+4z8=</diagram></mxfile>


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target.1.c:
--------------------------------------------------------------------------------
 1 | extern void init(float*, float*, int);
 2 | extern void output(float*, int);
 3 | void vec_mult(int N)
 4 | {
 5 |    int i;
 6 |    float p[N], v1[N], v2[N];
 7 |    init(v1, v2, N);
 8 |    #pragma omp target
 9 |    #pragma omp parallel for private(i)
10 |    for (i=0; i<N; i++)
11 |      p[i] = v1[i] * v2[i];
12 |    output(p, N);
13 | }
14 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target.1.f90:
--------------------------------------------------------------------------------
 1 | subroutine vec_mult(N)
 2 |    integer ::  i,N
 3 |    real    ::  p(N), v1(N), v2(N)
 4 |    call init(v1, v2, N)
 5 |    !$omp target
 6 |    !$omp parallel do
 7 |    do i=1,N
 8 |       p(i) = v1(i) * v2(i)
 9 |    end do
10 |    !$omp end target
11 |    call output(p, N)
12 | end subroutine
13 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target_data.2.c:
--------------------------------------------------------------------------------
 1 | extern void init(float*, float*, int);
 2 | extern void init_again(float*, float*, int);
 3 | extern void output(float*, int);
 4 | void vec_mult(float *p, float *v1, float *v2, int N)
 5 | {
 6 |    int i;
 7 |    init(v1, v2, N);
 8 |    #pragma omp target data map(from: p[0:N])
 9 |    {
10 |       #pragma omp target map(to: v1[:N], v2[:N])
11 |       #pragma omp parallel for
12 |       for (i=0; i<N; i++)
13 | 	p[i] = v1[i] * v2[i];
14 |       init_again(v1, v2, N);
15 |       #pragma omp target map(to: v1[:N], v2[:N])
16 |       #pragma omp parallel for
17 |       for (i=0; i<N; i++)
18 | 	p[i] = p[i] + (v1[i] * v2[i]);
19 |    }
20 |    output(p, N);
21 | }
22 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target_data.2.f90:
--------------------------------------------------------------------------------
 1 | subroutine vec_mult(p, v1, v2, N)
 2 |    real    ::  p(N), v1(N), v2(N)
 3 |    integer ::  i
 4 |    call init(v1, v2, N)
 5 |    !$omp target data map(from: p)
 6 |       !$omp target map(to: v1, v2 )
 7 |          !$omp parallel do
 8 |          do i=1,N
 9 |             p(i) = v1(i) * v2(i)
10 |          end do
11 |       !$omp end target
12 |       call init_again(v1, v2, N)
13 |       !$omp target map(to: v1, v2 )
14 |          !$omp parallel do
15 |          do i=1,N
16 |             p(i) = p(i) + v1(i) * v2(i)
17 |          end do
18 |       !$omp end target
19 |    !$omp end target data
20 |    call output(p, N)
21 | end subroutine
22 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target_unstructured_data.1.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | typedef struct {
 3 |   double *A;
 4 |   int N;
 5 | } Matrix;
 6 | 
 7 | void init_matrix(Matrix *mat, int n)
 8 | {
 9 |   mat->A = (double *)malloc(n*sizeof(double));
10 |   mat->N = n;
11 |   #pragma omp target enter data map(alloc:mat->A[:n])
12 | }
13 | 
14 | void free_matrix(Matrix *mat)
15 | {
16 |   #pragma omp target exit data map(delete:mat->A[:mat->N])
17 |   mat->N = 0;
18 |   free(mat->A);
19 |   mat->A = NULL;
20 | }
21 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target_unstructured_data.1.f90:
--------------------------------------------------------------------------------
 1 | module example
 2 |   real(8), allocatable :: A(:)
 3 | 
 4 |   contains
 5 |     subroutine initialize(N)
 6 |       integer :: N
 7 | 
 8 |       allocate(A(N))
 9 |       !$omp target enter data map(alloc:A)
10 | 
11 |     end subroutine initialize
12 | 
13 |     subroutine finalize()
14 | 
15 |       !$omp target exit data map(delete:A)
16 |       deallocate(A)
17 | 
18 |     end subroutine finalize
19 | end module example
20 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_teams.6.c:
--------------------------------------------------------------------------------
 1 | extern void init(float *, float *, int);
 2 | extern void output(float *, int);
 3 | void vec_mult(float *p, float *v1, float *v2, int N)
 4 | {
 5 |    int i;
 6 |    init(v1, v2, N);
 7 |    #pragma omp target teams map(to: v1[0:N], v2[:N]) map(from: p[0:N])
 8 |    #pragma omp distribute parallel for simd
 9 |    for (i=0; i<N; i++)
10 |      p[i] = v1[i] * v2[i];
11 |    output(p, N);
12 | }
13 | 


--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_teams.6.f90:
--------------------------------------------------------------------------------
 1 | subroutine vec_mult(p, v1, v2, N)
 2 |    real    ::  p(N), v1(N), v2(N)
 3 |    integer ::  i
 4 |    call init(v1, v2, N)
 5 |    !$omp target teams map(to: v1, v2) map(from: p)
 6 |    !$omp distribute parallel do simd
 7 |          do i=1,N
 8 |             p(i) = v1(i) * v2(i)
 9 |          end do
10 |    !$omp end target teams
11 |    call output(p, N)
12 | end subroutine
13 | 


--------------------------------------------------------------------------------
/content/exercise/common/pngwriter.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #ifndef PNGWRITER_H_
 4 | #define PNGWRITER_H_
 5 | 
 6 | #if __cplusplus
 7 |   extern "C" {
 8 | #endif
 9 | 
10 | int save_png(double *data, const int nx, const int ny, const char *fname,
11 |              const char lang);
12 | 
13 | #if __cplusplus
14 |   }
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 | 
13 | COMMONDIR=../common
14 | 
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 | 
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 | 
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 | 
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 | 
43 | 
44 | all: $(EXE)
45 | 
46 | 
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 | 
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 | 
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 | 
60 | %.o: %.cpp
61 | 	$(CXX) $(CCFLAGS) -c $< -o $@
62 | 
63 | %.o: %.c
64 | 	$(CC) $(CCFLAGS) -c $< -o $@
65 | 
66 | .PHONY: clean
67 | clean:
68 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/core.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main solver routines for heat equation solver
 4 | 
 5 | #include "heat.h"
 6 | 
 7 | // Update the temperature values using five-point stencil
 8 | // Arguments:
 9 | //   curr: current temperature values
10 | //   prev: temperature values from previous time step
11 | //   a: diffusivity
12 | //   dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 |   // Help the compiler avoid being confused by the structs
16 |   double *currdata = curr->data.data();
17 |   double *prevdata = prev->data.data();
18 |   int nx = curr->nx;
19 |   int ny = curr->ny;
20 | 
21 |   // Determine the temperature field at next time step
22 |   // As we have fixed boundary conditions, the outermost gridpoints
23 |   // are not updated.
24 |   double dx2 = prev->dx * prev->dx;
25 |   double dy2 = prev->dy * prev->dy;
26 |   #pragma omp target teams distribute parallel for \
27 |   map(currdata[0:(nx+2)*(ny+2)],prevdata[0:(nx+2)*(ny+2)])
28 |   for (int i = 1; i < nx + 1; i++) {
29 |     for (int j = 1; j < ny + 1; j++) {
30 |       int ind = i * (ny + 2) + j;
31 |       int ip = (i + 1) * (ny + 2) + j;
32 |       int im = (i - 1) * (ny + 2) + j;
33 |       int jp = i * (ny + 2) + j + 1;
34 |       int jm = i * (ny + 2) + j - 1;
35 |       currdata[ind] = prevdata[ind] + a*dt*
36 | 	    ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
37 | 	     (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
38 |     }
39 |   }
40 | }
41 | 
42 | // Start a data region and copy temperature fields to the device 
43 | void enter_data(field *curr, field *prev)
44 | {
45 |     int nx, ny;
46 |     double *currdata, *prevdata;
47 | 
48 |     currdata = curr->data.data();
49 |     prevdata = prev->data.data();
50 |     nx = curr->nx;
51 |     ny = curr->ny;
52 | 
53 | // adding data mapping here
54 | 
55 | }
56 | 
57 | // End a data region and copy temperature fields back to the host 
58 | void exit_data(field *curr, field *prev)
59 | {
60 |     int nx, ny;
61 |     double *currdata, *prevdata;
62 | 
63 |     currdata = curr->data.data();
64 |     prevdata = prev->data.data();
65 |     nx = curr->nx;
66 |     ny = curr->ny;
67 | 
68 | // adding data mapping here
69 | 
70 | }
71 | 
72 | // Copy a temperature field from the device to the host 
73 | void update_host(field *temperature)
74 | {
75 |     int nx, ny;
76 |     double *data;
77 | 
78 |     data = temperature->data.data();
79 |     nx = temperature->nx;
80 |     ny = temperature->ny;
81 | 
82 | // adding data mapping here
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 
11 | endif
12 | 
13 | 
14 | COMMONDIR=../../common
15 | 
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS= 
23 | endif
24 | 
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 | 
34 | 
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 | 
39 | all: $(EXE)
40 | 
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 | 
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 | 
53 | %.o: %.F90
54 | 	$(FC) $(FCFLAGS) -c $< -o $@
55 | 
56 | %.o: %.c
57 | 	$(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
58 | 
59 | .PHONY: clean
60 | clean:
61 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~  ../../common/*.o
62 | 
63 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Main solver routines for heat equation solver
 4 | module core
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Update the temperature values using five-point stencil
10 |   ! Arguments:
11 |   !   curr (type(field)): current temperature values
12 |   !   prev (type(field)): temperature values from previous time step
13 |   !   a (real(dp)): diffusivity
14 |   !   dt (real(dp)): time step
15 |   subroutine evolve(curr, prev, a, dt)
16 | 
17 |     implicit none
18 | 
19 |     type(field),target, intent(inout) :: curr, prev
20 |     real(dp) :: a, dt
21 |     integer :: i, j, nx, ny
22 |     real(dp) :: dx, dy
23 |     real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 | 
25 |     ! Help the compiler avoid being confused
26 |     nx = curr%nx
27 |     ny = curr%ny
28 |     dx = curr%dx
29 |     dy = curr%dy
30 |     currdata => curr%data
31 |     prevdata => prev%data
32 | 
33 |     ! Determine the temperature field at next time step As we have
34 |     ! fixed boundary conditions, the outermost gridpoints are not
35 |     ! updated.
36 |     !$omp target teams distribute parallel do  
37 |     do j = 1, ny
38 |        do i = 1, nx
39 |           currdata(i, j) = prevdata(i, j) + a * dt * &
40 |                & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
41 |                &   prevdata(i+1, j)) / dx**2 + &
42 |                &  (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
43 |                &   prevdata(i, j+1)) / dy**2)
44 |        end do
45 |     end do
46 |     !$omp end target teams distribute parallel do 
47 |   end subroutine evolve
48 | 
49 |   ! Start a data region and copy temperature fields to the device
50 |   !   curr (type(field)): current temperature values
51 |   !   prev (type(field)): values from previous time step
52 |   subroutine enter_data(curr, prev)
53 |     implicit none
54 |     type(field), target, intent(in) :: curr, prev
55 |     real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
56 | 
57 |     currdata => curr%data
58 |     prevdata => prev%data
59 | 
60 |   ! adding data mapping here
61 | 
62 |   end subroutine enter_data
63 | 
64 |   ! End a data region and copy temperature fields back to the host
65 |   !   curr (type(field)): current temperature values
66 |   !   prev (type(field)): values from previous time step
67 |   subroutine exit_data(curr, prev)
68 |     implicit none
69 |     type(field), target :: curr, prev
70 |     real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
71 | 
72 |     currdata => curr%data
73 |     prevdata => prev%data
74 | 
75 |   ! adding data mapping here
76 | 
77 |   end subroutine exit_data
78 | 
79 |   ! Copy a temperature field from the device to the host
80 |   !   temperature (type(field)): temperature field
81 |   subroutine update_host(temperature)
82 |     implicit none
83 |     type(field), target :: temperature
84 |     real(kind=dp), pointer, contiguous :: tempdata(:,:)
85 | 
86 |     tempdata => temperature%data
87 | 
88 |   ! adding data mapping here
89 | 
90 |   end subroutine update_host
91 | 
92 | end module core
93 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Field metadata for heat equation solver
 4 | module heat
 5 |   use iso_fortran_env, only : REAL64
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: dp = REAL64
 9 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
10 | 
11 |   type :: field
12 |      integer :: nx          ! ldimension of the field
13 |      integer :: ny
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 | contains
20 |   ! Initialize the field type metadata
21 |   ! Arguments:
22 |   !   field0 (type(field)): input field
23 |   !   nx, ny, dx, dy: field dimensions and spatial step size
24 |   subroutine set_field_dimensions(field0, nx, ny)
25 |     implicit none
26 | 
27 |     type(field), intent(out) :: field0
28 |     integer, intent(in) :: nx, ny
29 | 
30 |     field0%dx = DX
31 |     field0%dy = DY
32 |     field0%nx = nx
33 |     field0%ny = ny
34 | 
35 |   end subroutine set_field_dimensions
36 | 
37 | end module heat
38 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/io.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! I/O routines for heat equation solver
 4 | module io
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Output routine, saves the temperature distribution as a png image
10 |   ! Arguments:
11 |   !   curr (type(field)): variable with the temperature data
12 |   !   iter (integer): index of the time step
13 |   subroutine write_field(curr, iter)
14 | 
15 |     use pngwriter
16 |     implicit none
17 |     type(field), intent(in) :: curr
18 |     integer, intent(in) :: iter
19 | 
20 |     character(len=85) :: filename
21 | 
22 |     integer :: stat
23 |     real(dp), dimension(:,:), allocatable, target :: full_data
24 | 
25 |     allocate(full_data(curr%nx, curr%ny))
26 |     ! Copy rand #0 data to the global array
27 |     full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 | 
29 |     write(filename,'(A5,I4.4,A4,A)')  'heat_', iter, '.png'
30 |     stat = save_png(full_data, curr%nx, curr%ny, filename)
31 |     deallocate(full_data)
32 | 
33 |   end subroutine write_field
34 | 
35 | 
36 |   ! Reads the temperature distribution from an input file
37 |   ! Arguments:
38 |   !   field0 (type(field)): field variable that will store the
39 |   !                         read data
40 |   !   filename (char): name of the input file
41 |   ! Note that this version assumes the input data to be in C memory layout
42 |   subroutine read_field(field0, filename)
43 | 
44 |     implicit none
45 |     type(field), intent(out) :: field0
46 |     character(len=85), intent(in) :: filename
47 | 
48 |     integer :: nx, ny, i
49 |     character(len=2) :: dummy
50 | 
51 |     real(dp), dimension(:,:), allocatable :: full_data
52 | 
53 |     open(10, file=filename)
54 |     ! Read the header
55 |     read(10, *) dummy, nx, ny
56 | 
57 |     call set_field_dimensions(field0, nx, ny)
58 | 
59 |     ! The arrays for temperature field contain also a halo region
60 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 | 
62 |     allocate(full_data(nx, ny))
63 |     ! Read the data
64 |     do i = 1, nx
65 |        read(10, *) full_data(i, 1:ny)
66 |     end do
67 | 
68 |     ! Copy to full array containing also boundaries
69 |     field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 | 
71 |     ! Set the boundary values
72 |     field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 |     field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 |     field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 |     field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 | 
77 |     close(10)
78 |     deallocate(full_data)
79 | 
80 |   end subroutine read_field
81 | 
82 | end module io
83 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Heat equation solver in 2D.
 4 | 
 5 | program heat_solve
 6 |   use heat
 7 |   use core
 8 |   use io
 9 |   use setup
10 |   use utilities
11 |   use omp_lib
12 | 
13 |   implicit none
14 | 
15 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
16 |   type(field) :: current, previous    ! Current and previus temperature fields
17 | 
18 |   real(dp) :: dt     ! Time step
19 |   integer :: nsteps       ! Number of time steps
20 |   integer, parameter :: image_interval = 1500 ! Image output interval
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call initialize(current, previous, nsteps)
29 | 
30 |   ! Draw the picture of the initial state
31 |   call write_field(current, 0)
32 | 
33 |   average_temp = average(current)
34 |   write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 | 
36 |   ! Largest stable time step
37 |   dt = current%dx**2 * current%dy**2 / &
38 |        & (2.0 * a * (current%dx**2 + current%dy**2))
39 | 
40 |   ! Main iteration loop
41 | 
42 |   start =  omp_get_wtime()
43 | 
44 |   ! copy data to device
45 |   call enter_data(current, previous)
46 | 
47 |   do iter = 1, nsteps
48 |      call evolve(current, previous, a, dt)
49 |      if (mod(iter, image_interval) == 0) then
50 |         ! update data on host for output
51 |         call update_host(current)
52 |         call write_field(current, iter)
53 |      end if
54 |      call swap_fields(current, previous)
55 |   end do
56 | 
57 |   ! copy data back to host
58 |   call exit_data(current, previous)
59 | 
60 |   stop = omp_get_wtime()
61 | 
62 |   ! Average temperature for reference
63 |   average_temp = average(previous)
64 | 
65 |   write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
66 |   write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
67 |   if (command_argument_count() == 0) then
68 |       write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
69 |   end if
70 | 
71 |   call finalize(current, previous)
72 | 
73 | end program heat_solve
74 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! PNG writer for heat equation solver
 4 | module pngwriter
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   function save_png(data, nx, ny, fname) result(stat)
10 | 
11 |     use, intrinsic :: ISO_C_BINDING
12 |     implicit none
13 | 
14 |     real(dp), dimension(:,:), intent(in) :: data
15 |     integer, intent(in) :: nx, ny
16 |     character(len=*), intent(in) :: fname
17 |     integer :: stat
18 | 
19 |     ! Interface for save_png C-function
20 |     interface
21 |        ! The C-function definition is
22 |        !   int save_png(double *data, const int nx, const int ny,
23 |        !                const char *fname)
24 |        function save_png_c(data, nx, ny, fname, order) &
25 |             & bind(C,name="save_png") result(stat)
26 |          use, intrinsic :: ISO_C_BINDING
27 |          implicit none
28 |          real(kind=C_DOUBLE) :: data(*)
29 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
30 |          character(kind=C_CHAR), intent(IN) :: fname(*)
31 |          character(kind=C_CHAR), value, intent(IN) :: order
32 |          integer(kind=C_INT) :: stat
33 |        end function save_png_c
34 |     end interface
35 | 
36 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 |     if (stat /= 0) then
38 |        write(*,*) 'save_png returned error!'
39 |     end if
40 | 
41 |   end function save_png
42 | 
43 | end module pngwriter
44 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/setup.F90:
--------------------------------------------------------------------------------
  1 | ! Copyright (c) 2019 CSC Training
  2 | ! Copyright (c) 2021 ENCCS
  3 | ! Setup routines for heat equation solver
  4 | module setup
  5 |   use heat
  6 | 
  7 | contains
  8 | 
  9 |   subroutine initialize(previous, current, nsteps)
 10 |     use utilities
 11 |     use io
 12 | 
 13 |     implicit none
 14 | 
 15 |     type(field), intent(out) :: previous, current
 16 |     integer, intent(out) :: nsteps
 17 | 
 18 |     integer :: rows, cols
 19 |     logical :: using_input_file
 20 |     character(len=85) :: input_file, arg  ! Input file name and command line arguments
 21 | 
 22 | 
 23 |     ! Default values for grid size and time steps
 24 |     rows = 2000
 25 |     cols = 2000
 26 |     nsteps = 500
 27 |     using_input_file = .false.
 28 | 
 29 |     ! Read in the command line arguments and
 30 |     ! set up the needed variables
 31 |     select case(command_argument_count())
 32 |     case(0) ! No arguments -> default values
 33 |     case(1) ! One argument -> input file name
 34 |        using_input_file = .true.
 35 |        call get_command_argument(1, input_file)
 36 |     case(2) ! Two arguments -> input file name and number of steps
 37 |        using_input_file = .true.
 38 |        call get_command_argument(1, input_file)
 39 |        call get_command_argument(2, arg)
 40 |        read(arg, *) nsteps
 41 |     case(3) ! Three arguments -> rows, cols and nsteps
 42 |        call get_command_argument(1, arg)
 43 |        read(arg, *) rows
 44 |        call get_command_argument(2, arg)
 45 |        read(arg, *) cols
 46 |        call get_command_argument(3, arg)
 47 |        read(arg, *) nsteps
 48 |     case default
 49 |        call usage()
 50 |        stop
 51 |     end select
 52 | 
 53 |     ! Initialize the fields according the command line arguments
 54 |     if (using_input_file) then
 55 |        call read_field(previous, input_file)
 56 |        call copy_fields(previous, current)
 57 |     else
 58 |        call set_field_dimensions(previous, rows, cols)
 59 |        call set_field_dimensions(current, rows, cols)
 60 |        call generate_field(previous)
 61 |        call copy_fields(previous, current)
 62 |     end if
 63 | 
 64 |   end subroutine initialize
 65 | 
 66 |   ! Generate initial the temperature field.  Pattern is disc with a radius
 67 |   ! of nx / 6 in the center of the grid.
 68 |   ! Boundary conditions are (different) constant temperatures outside the grid
 69 |   subroutine generate_field(field0)
 70 |     use heat
 71 | 
 72 |     implicit none
 73 | 
 74 |     type(field), intent(inout) :: field0
 75 | 
 76 |     real(dp) :: radius2
 77 |     integer :: i, j, ds2
 78 | 
 79 |     ! The arrays for field contain also a halo region
 80 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
 81 | 
 82 |     ! Square of the disk radius
 83 |     radius2 = (field0%nx / 6.0_dp)**2
 84 | 
 85 |     do j = 0, field0%ny + 1
 86 |        do i = 0, field0%nx + 1
 87 |           ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + &
 88 |                & (j - field0%ny / 2.0_dp + 1)**2)
 89 |           if (ds2 < radius2) then
 90 |              field0%data(i,j) = 5.0_dp
 91 |           else
 92 |              field0%data(i,j) = 65.0_dp
 93 |           end if
 94 |        end do
 95 |     end do
 96 | 
 97 |     ! Boundary conditions
 98 |     field0%data(:,0) = 20.0_dp
 99 |     field0%data(:,field0%ny+1) = 70.0_dp
100 |     field0%data(0,:) = 85.0_dp
101 |     field0%data(field0%nx+1,:) = 5.0_dp
102 | 
103 |   end subroutine generate_field
104 | 
105 | 
106 |   ! Clean up routine for field type
107 |   ! Arguments:
108 |   !   field0 (type(field)): field variable to be cleared
109 |   subroutine finalize(field0, field1)
110 |     use heat
111 | 
112 |     implicit none
113 | 
114 |     type(field), intent(inout) :: field0, field1
115 | 
116 |     deallocate(field0%data)
117 |     deallocate(field1%data)
118 | 
119 |   end subroutine finalize
120 | 
121 |   ! Helper routine that prints out a simple usage if
122 |   ! user gives more than three arguments
123 |   subroutine usage()
124 |     implicit none
125 |     character(len=256) :: buf
126 | 
127 |     call get_command_argument(0, buf)
128 |     write (*,'(A)') 'Usage:'
129 |     write (*,'(A, " (default values will be used)")') trim(buf)
130 |     write (*,'(A, " <filename>")') trim(buf)
131 |     write (*,'(A, " <filename> <nsteps>")') trim(buf)
132 |     write (*,'(A, " <rows> <cols> <nsteps>")') trim(buf)
133 |   end subroutine usage
134 | 
135 | end module setup
136 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Utility routines for heat equation solver
 4 | !   NOTE: This file does not need to be edited!
 5 | module utilities
 6 |   use heat
 7 | 
 8 | contains
 9 | 
10 |   ! Swap the data fields of two variables of type field
11 |   ! Arguments:
12 |   !   curr, prev (type(field)): the two variables that are swapped
13 |   subroutine swap_fields(curr, prev)
14 | 
15 |     implicit none
16 | 
17 |     type(field), intent(inout) :: curr, prev
18 |     real(dp), allocatable, dimension(:,:) :: tmp
19 | 
20 |     call move_alloc(curr%data, tmp)
21 |     call move_alloc(prev%data, curr%data)
22 |     call move_alloc(tmp, prev%data)
23 |   end subroutine swap_fields
24 | 
25 |   ! Copy the data from one field to another
26 |   ! Arguments:
27 |   !   from_field (type(field)): variable to copy from
28 |   !   to_field (type(field)): variable to copy to
29 |   subroutine copy_fields(from_field, to_field)
30 | 
31 |     implicit none
32 | 
33 |     type(field), intent(in) :: from_field
34 |     type(field), intent(out) :: to_field
35 | 
36 |     ! Consistency checks
37 |     if (.not.allocated(from_field%data)) then
38 |        write (*,*) "Can not copy from a field without allocated data"
39 |        stop
40 |     end if
41 |     if (.not.allocated(to_field%data)) then
42 |        ! Target is not initialize, allocate memory
43 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 |        write (*,*) "Wrong field data sizes in copy routine"
47 |        print *, shape(from_field%data), shape(to_field%data)
48 |        stop
49 |     end if
50 | 
51 |     to_field%data = from_field%data
52 | 
53 |     to_field%nx = from_field%nx
54 |     to_field%ny = from_field%ny
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 | 
66 |     real(dp) :: local_average
67 |     integer :: rc
68 | 
69 |     average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 |     average = average / (field0%nx * field0%ny)
71 | 
72 |   end function average
73 | 
74 | end module utilities
75 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/heat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | #ifndef __HEAT_H__
 4 | #define __HEAT_H__
 5 | 
 6 | #include <vector>
 7 | 
 8 | // Datatype for temperature field
 9 | struct field {
10 |     // nx and ny are the dimensions of the field. The array data
11 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 |     int nx;
13 |     int ny;
14 |     // Size of the grid cells
15 |     double dx;
16 |     double dy;
17 |     // The temperature values in the 2D grid
18 |     std::vector<double> data;
19 | };
20 | 
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 | 
25 | #if __cplusplus
26 |   extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 | 
31 | void initialize(int argc, char *argv[], field *temperature1,
32 |                 field *temperature2, int *nsteps);
33 | 
34 | void generate_field(field *temperature);
35 | 
36 | double average(field *temperature);
37 | 
38 | void evolve(field *curr, field *prev, double a, double dt);
39 | 
40 | void write_field(field *temperature, int iter);
41 | 
42 | void read_field(field *temperature1, field *temperature2,
43 |                 char *filename);
44 | 
45 | void copy_field(field *temperature1, field *temperature2);
46 | 
47 | void swap_fields(field *temperature1, field *temperature2);
48 | 
49 | void allocate_field(field *temperature);
50 | 
51 | void enter_data(field *temperature1, field *temperature2);
52 | 
53 | void exit_data(field *temperature1, field *temperature2);
54 | 
55 | void update_host(field *temperature);
56 | 
57 | #if __cplusplus
58 |   }
59 | #endif
60 | #endif  // __HEAT_H__
61 | 
62 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/heat_serial:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/data_mapping/heat_serial


--------------------------------------------------------------------------------
/content/exercise/data_mapping/io.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019 CSC Training
  2 | // Copyright (c) 2021 ENCCS
  3 | // I/O related functions for heat equation solver
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | #include <assert.h>
  9 | 
 10 | #include "heat.h"
 11 | #include "pngwriter.h"
 12 | 
 13 | // Output routine that prints out a picture of the temperature
 14 | // distribution.
 15 | void write_field(field *temperature, int iter)
 16 | {
 17 |     char filename[64];
 18 | 
 19 |     // The actual write routine takes only the actual data
 20 |     // (without boundary layers) so we need to copy an array with that.
 21 |     std::vector<double> inner_data(temperature->nx * temperature->ny);
 22 |     auto inner_data_iterator = inner_data.begin();
 23 |     auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
 24 |     for (int i = 0; i < temperature->nx; i++) {
 25 |         auto end_of_row = beginning_of_row + temperature->ny;
 26 |         std::copy(beginning_of_row, end_of_row, inner_data_iterator);
 27 |         inner_data_iterator += temperature->ny;
 28 |         beginning_of_row = end_of_row + 2;
 29 |     }
 30 | 
 31 |     // Write out the data to a png file
 32 |     sprintf(filename, "%s_%04d.png", "heat", iter);
 33 |     save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
 34 | }
 35 | 
 36 | // Read the initial temperature distribution from a file and
 37 | // initialize the temperature fields temperature1 and
 38 | // temperature2 to the same initial state.
 39 | void read_field(field *temperature1, field *temperature2, char *filename)
 40 | {
 41 |     FILE *fp;
 42 |     int nx, ny, ind;
 43 | 
 44 |     int nx_local, ny_local, count;
 45 | 
 46 |     fp = fopen(filename, "r");
 47 |     // Read the header
 48 |     count = fscanf(fp, "# %d %d \n", &nx, &ny);
 49 |     if (count < 2) {
 50 |         fprintf(stderr, "Error while reading the input file!\n");
 51 | 	exit(-1);
 52 |     }
 53 | 
 54 |     set_field_dimensions(temperature1, nx, ny);
 55 |     set_field_dimensions(temperature2, nx, ny);
 56 | 
 57 |     // Allocate arrays (including boundary layers)
 58 |     int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
 59 |     temperature1->data.resize(newSize, 0.0);
 60 |     temperature2->data.resize(newSize, 0.0);
 61 | 
 62 |     // Array from file
 63 |     std::vector<double> file_data(nx * ny, 0.0);
 64 | 
 65 |     // Read the actual data
 66 |     for (int i = 0; i < nx; i++) {
 67 |         for (int j = 0; j < ny; j++) {
 68 |             ind = i * ny + j;
 69 |             count = fscanf(fp, "%lf", &file_data[ind]);
 70 |         }
 71 |     }
 72 | 
 73 |     nx_local = temperature1->nx;
 74 |     ny_local = temperature1->ny;
 75 | 
 76 |     // Copy to the inner part of the full temperature field
 77 |     auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
 78 |     auto beginning_of_row = file_data.begin();
 79 |     for (int i = 0; i < nx_local; i++) {
 80 |         auto end_of_row = beginning_of_row + ny_local;
 81 |         std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
 82 |         temperature_data_iterator += ny_local + 2;
 83 |         beginning_of_row = end_of_row;
 84 |     }
 85 | 
 86 |     // Set the boundary values
 87 |     for (int i = 1; i < nx_local + 1; i++) {
 88 |         temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
 89 |         temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
 90 |     }
 91 |     for (int j = 0; j < ny + 2; j++) {
 92 |         temperature1->data[j] = temperature1->data[ny_local + j];
 93 |         temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
 94 |             temperature1->data[nx_local * (ny_local + 2) + j];
 95 |     }
 96 | 
 97 |     copy_field(temperature1, temperature2);
 98 | 
 99 |     fclose(fp);
100 | }
101 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/main.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main routine for heat equation solver in 2D.
 4 | 
 5 | #include <stdio.h>
 6 | #include <omp.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | int main(int argc, char **argv)
11 | {
12 |     // Image output interval
13 |     int image_interval = 1500;
14 | 
15 |     // Number of time steps
16 |     int nsteps;
17 |     // Current and previous temperature fields
18 |     field current, previous;
19 |     initialize(argc, argv, &current, &previous, &nsteps);
20 | 
21 |     // Output the initial field 
22 |     write_field(&current, 0);
23 | 
24 |     double average_temp = average(&current);
25 |     printf("Average temperature at start: %f\n", average_temp);
26 | 
27 |     // Diffusion constant
28 |     double a = 0.5;
29 | 
30 |     // Compute the largest stable time step
31 |     double dx2 = current.dx * current.dx;
32 |     double dy2 = current.dy * current.dy;
33 |     // Time step
34 |     double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 | 
36 |     // Get the start time stamp
37 |     double start_clock = omp_get_wtime();
38 | 
39 |     // Copy fields to device 
40 |     enter_data(&current, &previous);
41 | 
42 |     // Time evolution
43 |     for (int iter = 1; iter <= nsteps; iter++) {
44 |         evolve(&current, &previous, a, dt);
45 |         if (iter % image_interval == 0) {
46 | 	  // update data on host for output
47 |             update_host(&current);
48 |             write_field(&current, iter);
49 |         }
50 |         // Swap current field so that it will be used
51 |         // as previous for next iteration step
52 |         swap_fields(&current, &previous);
53 |     }
54 |   
55 |     // copy data back to host
56 |     exit_data(&current, &previous);
57 | 
58 |     double stop_clock = omp_get_wtime();
59 | 
60 |     // Average temperature for reference
61 |     average_temp = average(&previous);
62 | 
63 |     // Determine the CPU time used for all the iterations
64 |     printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
65 |     printf("Average temperature: %f\n", average_temp);
66 |     if (argc == 1) {
67 |         printf("Reference value with default arguments: 59.281239\n");
68 |     }
69 | 
70 |     // Output the final field
71 |     write_field(&previous, nsteps);
72 | 
73 |     return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/content/exercise/data_mapping/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Utility functions for heat equation solver
 4 | 
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | 
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 |     assert(temperature1->nx == temperature2->nx);
15 |     assert(temperature1->ny == temperature2->ny);
16 |     assert(temperature1->data.size() == temperature2->data.size());
17 |     std::copy(temperature1->data.begin(), temperature1->data.end(),
18 |               temperature2->data.begin());
19 | }
20 | 
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 |     std::swap(temperature1->data, temperature2->data);
25 | }
26 | 
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 |     // Include also boundary layers
31 |     int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 |     temperature->data.resize(newSize, 0.0);
33 | }
34 | 
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 |      double average = 0.0;
39 | 
40 |      for (int i = 1; i < temperature->nx + 1; i++) {
41 |        for (int j = 1; j < temperature->ny + 1; j++) {
42 |          int ind = i * (temperature->ny + 2) + j;
43 |          average += temperature->data[ind];
44 |        }
45 |      }
46 | 
47 |      average /= (temperature->nx * temperature->ny);
48 |      return average;
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/content/exercise/ex00/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/ex00/ex00.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program hello
 4 | 
 5 | #ifdef _OPENMP
 6 |   use omp_lib
 7 | #endif
 8 |   implicit none
 9 | 
10 |   integer :: num_devices,nteams,nthreads
11 |   logical :: initial_device
12 | 
13 |   num_devices = omp_get_num_devices()
14 |   print *, "Number of available devices", num_devices
15 | 
16 |   !$omp target map(nteams,nthreads)
17 |     initial_device = omp_is_initial_device()
18 |     nteams= omp_get_num_teams()
19 |     nthreads= omp_get_num_threads()
20 |   !$omp end target 
21 |     if (initial_device) then
22 |       write(*,*) "Running on host"
23 |     else 
24 |       write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team"
25 |     end if
26 | 
27 | end program
28 | 
29 | 


--------------------------------------------------------------------------------
/content/exercise/ex00/ex00.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | 
 5 | #ifdef _OPENMP
 6 | #include <omp.h>
 7 | #endif
 8 | 
 9 | int main() 
10 | {
11 |   int num_devices = omp_get_num_devices();
12 |   printf("Number of available devices %d\n", num_devices);
13 | 
14 |   #pragma omp target 
15 |   {
16 |       if (omp_is_initial_device()) {
17 |         printf("Running on host\n");    
18 |       } else {
19 |         int nteams= omp_get_num_teams(); 
20 |         int nthreads= omp_get_num_threads();
21 |         printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads);
22 |       }
23 |   }
24 |   
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/content/exercise/ex01/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/ex01/ex01.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors
20 |   do i = 1, nx
21 |      vecC(i) =  vecA(i) * vecB(i)
22 |   end do
23 | 
24 |   sum = 0.0
25 |   ! Calculate the sum 
26 |   do i = 1, nx
27 |      sum =  vecC(i) + sum
28 |   end do
29 | 
30 |   write(*,*) 'The sum is: ', sum
31 | 
32 | end program dotproduct
33 | 


--------------------------------------------------------------------------------
/content/exercise/ex01/ex01.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* Dot product of two vectors */
19 |   for (int i = 0; i < NX; i++) {
20 |      vecC[i] = vecA[i] * vecB[i];
21 |   }
22 | 
23 |   double sum = 0.0;
24 |   /* Calculate the sum */
25 |   for (int i = 0; i < NX; i++) {
26 |     sum += vecC[i];
27 |   }
28 |   printf("The sum is: %8.6f \n", sum);
29 |   return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/content/exercise/ex01/solution/ex01.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target  
21 |   do i = 1, nx
22 |      vecC(i) =  vecA(i) * vecB(i)
23 |   end do
24 |   !$omp end target
25 | 
26 |   sum = 0.0
27 |   ! Calculate the sum 
28 |   do i = 1, nx
29 |      sum =  vecC(i) + sum
30 |   end do
31 | 
32 |   write(*,*) 'The sum is: ', sum
33 | 
34 | end program dotproduct
35 | 


--------------------------------------------------------------------------------
/content/exercise/ex01/solution/ex01.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |   #pragma omp target
20 |   for (int i = 0; i < NX; i++) {
21 |      vecC[i] = vecA[i] * vecB[i];
22 |   }
23 | 
24 |   double sum = 0.0;
25 |   /* calculate the sum */
26 |   for (int i = 0; i < NX; i++) {
27 |     sum += vecC[i];
28 |   }
29 |   printf("The sum is: %8.6f \n", sum);
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/content/exercise/ex02/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/ex02/ex02.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target  
21 |   do i = 1, nx
22 |      vecC(i) =  vecA(i) * vecB(i)
23 |   end do
24 |   !$omp end target
25 | 
26 |   sum = 0.0
27 |   ! Calculate the sum 
28 |   do i = 1, nx
29 |      sum =  vecC(i) + sum
30 |   end do
31 | 
32 |   write(*,*) 'The sum is: ', sum
33 | 
34 | end program dotproduct
35 | 


--------------------------------------------------------------------------------
/content/exercise/ex02/ex02.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | // Copyright (c) 2021 ENCCS
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |   #pragma omp target
20 |   for (int i = 0; i < NX; i++) {
21 |      vecC[i] = vecA[i] * vecB[i];
22 |   }
23 | 
24 |   double sum = 0.0;
25 |   /* calculate the sum */
26 |   for (int i = 0; i < NX; i++) {
27 |     sum += vecC[i];
28 |   }
29 |   printf("The sum is: %8.6f \n", sum);
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/content/exercise/ex02/solution/ex02.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target teams distribute parallel do 
21 |   do i = 1, nx
22 |      vecC(i) =  vecA(i) * vecB(i)
23 |   end do
24 |   !$omp end target teams distribute parallel do
25 | 
26 |   sum = 0.0
27 |   ! Calculate the sum 
28 |   do i = 1, nx
29 |      sum =  vecC(i) + sum
30 |   end do
31 | 
32 |   write(*,*) 'The sum is: ', sum
33 | 
34 | end program dotproduct
35 | 


--------------------------------------------------------------------------------
/content/exercise/ex02/solution/ex02.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |   #pragma omp target teams distribute parallel for
20 |   for (int i = 0; i < NX; i++) {
21 |      vecC[i] = vecA[i] * vecB[i];
22 |   }
23 | 
24 |   double sum = 0.0;
25 |   /* calculate the sum */
26 |   for (int i = 0; i < NX; i++) {
27 |     sum += vecC[i];
28 |   }
29 |   printf("The sum is: %8.6f \n", sum);
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/content/exercise/ex03/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/ex03/ex03.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program hello
 4 | 
 5 | #ifdef _OPENMP
 6 |   use omp_lib
 7 | #endif
 8 |   implicit none
 9 | 
10 |   integer :: num_devices,nteams,nthreads
11 |   logical :: initial_device
12 | 
13 |   num_devices = omp_get_num_devices()
14 |   print *, "Number of available devices", num_devices
15 | 
16 |   !$omp target  map(nteams,nthreads)
17 |     initial_device = omp_is_initial_device()
18 |     nteams= omp_get_num_teams()
19 |     nthreads= omp_get_num_threads()
20 |   !$omp end target 
21 |     if (initial_device) then
22 |       write(*,*) "Running on host"
23 |     else 
24 |       write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team"
25 |     end if
26 | 
27 | end program
28 | 
29 | 


--------------------------------------------------------------------------------
/content/exercise/ex03/ex03.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | 
 5 | #ifdef _OPENMP
 6 | #include <omp.h>
 7 | #endif
 8 | 
 9 | int main() 
10 | {
11 |   int num_devices = omp_get_num_devices();
12 |   printf("Number of available devices %d\n", num_devices);
13 | 
14 |   #pragma omp target 
15 |   {
16 |       if (omp_is_initial_device()) {
17 |         printf("Running on host\n");    
18 |       } else {
19 |         int nteams= omp_get_num_teams(); 
20 |         int nthreads= omp_get_num_threads();
21 |         printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads);
22 |       }
23 |   }
24 |   
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/content/exercise/ex03/solution/ex03.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program hello
 4 | 
 5 | #ifdef _OPENMP
 6 |   use omp_lib
 7 | #endif
 8 |   implicit none
 9 | 
10 |   integer :: num_devices,nteams,nthreads
11 |   logical :: initial_device
12 | 
13 |   num_devices = omp_get_num_devices()
14 |   print *, "Number of available devices", num_devices
15 | 
16 |   !$omp target map(nteams,nthreads)
17 |   !$omp teams num_teams(2) thread_limit(3)
18 |   !$omp parallel
19 |     initial_device = omp_is_initial_device()
20 |     nteams= omp_get_num_teams()
21 |     nthreads= omp_get_num_threads()
22 |   !$omp end parallel 
23 |   !$omp end teams
24 |   !$omp end target 
25 |     if (initial_device) then
26 |       write(*,*) "Running on host"
27 |     else 
28 |       write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team"
29 |     end if
30 | 
31 | end program
32 | 
33 | 


--------------------------------------------------------------------------------
/content/exercise/ex03/solution/ex03.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | 
 5 | #ifdef _OPENMP
 6 | #include <omp.h>
 7 | #endif
 8 | 
 9 | int main() 
10 | {
11 |   int num_devices = omp_get_num_devices();
12 |   printf("Number of available devices %d\n", num_devices);
13 | 
14 |   #pragma omp target 
15 |   #pragma omp teams num_teams(2) thread_limit(3)
16 |   #pragma omp parallel
17 |   {
18 |       if (omp_is_initial_device()) {
19 |         printf("Running on host\n");    
20 |       } else {
21 |         int nteams= omp_get_num_teams(); 
22 |         int nthreads= omp_get_num_threads();
23 |         printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads);
24 |       }
25 |   }
26 |   
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/content/exercise/ex04/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/ex04/ex04.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target teams distribute 
21 |   do i = 1, nx
22 |      vecC(i) =  vecA(i) * vecB(i)
23 |   end do
24 |   !$omp end target teams distribute
25 | 
26 |   sum = 0.0
27 |   ! Calculate the sum 
28 |   do i = 1, nx
29 |      sum =  vecC(i) + sum
30 |   end do
31 | 
32 |   write(*,*) 'The sum is: ', sum
33 | 
34 | end program dotproduct
35 | 


--------------------------------------------------------------------------------
/content/exercise/ex04/ex04.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |   #pragma omp target teams distribute
20 |   for (int i = 0; i < NX; i++) {
21 |      vecC[i] = vecA[i] * vecB[i];
22 |   }
23 | 
24 |   double sum = 0.0;
25 |   /* calculate the sum */
26 |   for (int i = 0; i < NX; i++) {
27 |     sum += vecC[i];
28 |   }
29 |   printf("The sum is: %8.6f \n", sum);
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/content/exercise/ex04/solution/ex04.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target teams distribute map(from:vecC) map(to:vecA,vecB) 
21 |   do i = 1, nx
22 |      vecC(i) =  vecA(i) * vecB(i)
23 |   end do
24 |   !$omp end target teams distribute
25 | 
26 |   sum = 0.0
27 |   ! Calculate the sum
28 |   !$omp target map(tofrom:sum)
29 |   do i = 1, nx
30 |      sum =  vecC(i) + sum
31 |   end do
32 |   !$omp end target
33 |   write(*,*) 'The sum is: ', sum
34 | 
35 | end program dotproduct
36 | 


--------------------------------------------------------------------------------
/content/exercise/ex04/solution/ex04.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |   #pragma omp target teams distribute map(from:vecC[0:NX]) map(to:vecA[0:NX],vecB[0:NX])
20 |   for (int i = 0; i < NX; i++) {
21 |      vecC[i] = vecA[i] * vecB[i];
22 |   }
23 | 
24 |   double sum = 0.0;
25 |   /* calculate the sum */
26 |   #pragma omp target map(tofrom:sum)
27 |   for (int i = 0; i < NX; i++) {
28 |     sum += vecC[i];
29 |   }
30 |   printf("The sum is: %8.6f \n", sum);
31 |   return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/content/exercise/ex05/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/ex05/ex05.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target 
21 |   do i = 1, nx
22 |      vecC(i) =  vecA(i) * vecB(i)
23 |   end do
24 |   !$omp end target 
25 | 
26 |   ! Initialization of vectors again
27 |   do i = 1, nx
28 |      vecA(i) = r**(i-1)
29 |      vecB(i) = 1.0
30 |   end do
31 | 
32 |   !$omp target
33 |   do i = 1, nx
34 |      vecC(i) =  vecC(i) + vecA(i) * vecB(i)
35 |   end do
36 |   !$omp end target
37 | 
38 |   sum = 0.0
39 |   ! Calculate the sum
40 |   do i = 1, nx
41 |      sum =  vecC(i) + sum
42 |   end do
43 |   write(*,'(A,F18.6)') 'The sum is: ', sum
44 | 
45 | end program dotproduct
46 | 


--------------------------------------------------------------------------------
/content/exercise/ex05/ex05.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |      #pragma omp target
20 |      for (int i = 0; i < NX; i++) {
21 |         vecC[i] = vecA[i] * vecB[i];
22 |      }
23 | 
24 | /* Initialization of vectors again */
25 |      for (int i = 0; i < NX; i++) {
26 |         vecA[i] = 1.0;
27 |         vecB[i] = 1.0;
28 |      }
29 | 
30 |      #pragma omp target 
31 |      for (int i = 0; i < NX; i++) {
32 |         vecC[i] = vecC[i] + vecA[i] * vecB[i];
33 |      }
34 |   double sum = 0.0;
35 |   /* calculate the sum */
36 |   for (int i = 0; i < NX; i++) {
37 |     sum += vecC[i];
38 |   }
39 |   printf("The sum is: %8.6f \n", sum);
40 |   return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/content/exercise/ex05/solution/ex05.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | program dotproduct
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: nx = 102400
 7 |   real, parameter :: r=0.2
 8 | 
 9 |   real, dimension(nx) :: vecA,vecB,vecC
10 |   real    :: sum
11 |   integer :: i
12 | 
13 |   ! Initialization of vectors
14 |   do i = 1, nx
15 |      vecA(i) = r**(i-1)
16 |      vecB(i) = 1.0
17 |   end do
18 | 
19 |   ! Dot product of two vectors 
20 |   !$omp target data map(from:vecC) 
21 |   !$omp target map(to:vecA,vecB)
22 |   do i = 1, nx
23 |      vecC(i) =  vecA(i) * vecB(i)
24 |   end do
25 |   !$omp end target 
26 | 
27 |   ! Initialization of vectors again
28 |   do i = 1, nx
29 |      vecA(i) = 0.5 
30 |      vecB(i) = 2.0
31 |   end do
32 | 
33 |   !$omp target map(to:vecA,vecB)
34 |   do i = 1, nx
35 |      vecC(i) =  vecC(i) + vecA(i) * vecB(i)
36 |   end do
37 |   !$omp end target
38 |   !$omp end target data 
39 | 
40 |   sum = 0.0
41 |   ! Calculate the sum
42 |   do i = 1, nx
43 |      sum =  vecC(i) + sum
44 |   end do
45 |   write(*,'(A,F18.6)') 'The sum is: ', sum
46 | 
47 | end program dotproduct
48 | 


--------------------------------------------------------------------------------
/content/exercise/ex05/solution/ex05.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #include <stdio.h>
 4 | #include <math.h>
 5 | #define NX 102400
 6 | 
 7 | int main(void)
 8 | {
 9 |   double vecA[NX],vecB[NX],vecC[NX];
10 |   double r=0.2;
11 | 
12 | /* Initialization of vectors */
13 |   for (int i = 0; i < NX; i++) {
14 |      vecA[i] = pow(r, i);
15 |      vecB[i] = 1.0;
16 |   }
17 | 
18 | /* dot product of two vectors */
19 |   #pragma omp target data map(from:vecC[0:NX])
20 |   {
21 |      #pragma omp target map(to:vecA[0:NX],vecB[0:NX])
22 |      for (int i = 0; i < NX; i++) {
23 |         vecC[i] = vecA[i] * vecB[i];
24 |      }
25 | 
26 | /* Initialization of vectors again */
27 |      for (int i = 0; i < NX; i++) {
28 |         vecA[i] = 0.5;
29 |         vecB[i] = 2.0;
30 |      }
31 | 
32 |      #pragma omp target map(to:vecA[0:NX],vecB[0:NX])
33 |      for (int i = 0; i < NX; i++) {
34 |         vecC[i] = vecC[i] + vecA[i] * vecB[i];
35 |      }
36 |   }
37 |   double sum = 0.0;
38 |   /* calculate the sum */
39 |   for (int i = 0; i < NX; i++) {
40 |     sum += vecC[i];
41 |   }
42 |   printf("The sum is: %8.6f \n", sum);
43 |   return 0;
44 | }
45 | 


--------------------------------------------------------------------------------
/content/exercise/ex06/ex06.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2021 ENCCS
 2 | program dotproduct
 3 |   implicit none
 4 | 
 5 |   integer :: x
 6 | 
 7 |   x = 0
 8 |   !$omp target data map(tofrom:x) 
 9 |   ! check point 1 
10 |   x = 10                        
11 |   ! check point 2 
12 |   !$omp target update to(x)       
13 |   ! check point 3 
14 |   !$omp end target data
15 | 
16 | end program dotproduct
17 | 


--------------------------------------------------------------------------------
/content/exercise/ex06/ex06.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2021 ENCCS */
 2 | #include <stdio.h>
 3 | int main(void)
 4 | {
 5 |   int x = 0;
 6 | 
 7 |   #pragma omp target data map(tofrom:x)
 8 |   {
 9 | /* check point 1 */
10 |     x = 10;                        
11 | /* check point 2 */
12 |   #pragma omp target update to(x)       
13 | /* check point 3 */
14 |   }
15 | 
16 | return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 
11 | endif
12 | 
13 | COMMONDIR=../common
14 | 
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 | 
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 | 
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 | 
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 | 
43 | 
44 | all: $(EXE)
45 | 
46 | 
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 | 
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 | 
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 | 
60 | %.o: %.cpp
61 | 	$(CXX) $(CCFLAGS) -c $< -o $@
62 | 
63 | %.o: %.c
64 | 	$(CC) $(CCFLAGS) -c $< -o $@
65 | 
66 | .PHONY: clean
67 | clean:
68 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/core.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main solver routines for heat equation solver
 4 | 
 5 | #include "heat.h"
 6 | 
 7 | // Update the temperature values using five-point stencil
 8 | // Arguments:
 9 | //   curr: current temperature values
10 | //   prev: temperature values from previous time step
11 | //   a: diffusivity
12 | //   dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 |   // Help the compiler avoid being confused by the structs
16 |   double *currdata = curr->data.data();
17 |   double *prevdata = prev->data.data();
18 |   int nx = curr->nx;
19 |   int ny = curr->ny;
20 | 
21 |   // Determine the temperature field at next time step
22 |   // As we have fixed boundary conditions, the outermost gridpoints
23 |   // are not updated.
24 |   double dx2 = prev->dx * prev->dx;
25 |   double dy2 = prev->dy * prev->dy;
26 |   // add the directives below for offloading
27 |   for (int i = 1; i < nx + 1; i++) {
28 |     for (int j = 1; j < ny + 1; j++) {
29 |       int ind = i * (ny + 2) + j;
30 |       int ip = (i + 1) * (ny + 2) + j;
31 |       int im = (i - 1) * (ny + 2) + j;
32 |       int jp = i * (ny + 2) + j + 1;
33 |       int jm = i * (ny + 2) + j - 1;
34 |       currdata[ind] = prevdata[ind] + a*dt*
35 | 	    ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
36 | 	     (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 
11 | endif
12 | 
13 | 
14 | COMMONDIR=../../common
15 | 
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS= 
23 | endif
24 | 
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 | 
34 | EXE=heat_serial
35 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
36 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
37 | 
38 | all: $(EXE)
39 | 
40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
41 | heat_mod.o: heat_mod.F90
42 | core.o: core.F90 heat_mod.o
43 | utilities.o: utilities.F90 heat_mod.o
44 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
45 | setup.o: setup.F90 heat_mod.o utilities.o io.o
46 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
47 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
48 | 
49 | $(EXE): $(OBJS) $(OBJS_PNG)
50 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
51 | 
52 | %.o: %.F90
53 | 	$(FC) $(FCFLAGS) -c $< -o $@
54 | 
55 | %.o: %.c
56 | 	$(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
57 | 
58 | .PHONY: clean
59 | clean:
60 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~  ../../common/*.o
61 | 
62 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Main solver routines for heat equation solver
 4 | module core
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Update the temperature values using five-point stencil
10 |   ! Arguments:
11 |   !   curr (type(field)): current temperature values
12 |   !   prev (type(field)): temperature values from previous time step
13 |   !   a (real(dp)): diffusivity
14 |   !   dt (real(dp)): time step
15 |   subroutine evolve(curr, prev, a, dt)
16 | 
17 |     implicit none
18 | 
19 |     type(field),target, intent(inout) :: curr, prev
20 |     real(dp) :: a, dt
21 |     integer :: i, j, nx, ny
22 |     real(dp) :: dx, dy
23 |     real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 | 
25 |     ! Help the compiler avoid being confused
26 |     nx = curr%nx
27 |     ny = curr%ny
28 |     dx = curr%dx
29 |     dy = curr%dy
30 |     currdata => curr%data
31 |     prevdata => prev%data
32 | 
33 |     ! Determine the temperature field at next time step As we have
34 |     ! fixed boundary conditions, the outermost gridpoints are not
35 |     ! updated.
36 | 
37 | ! add the directives below for offloading
38 | 
39 |     do j = 1, ny
40 |        do i = 1, nx
41 |           currdata(i, j) = prevdata(i, j) + a * dt * &
42 |                & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
43 |                &   prevdata(i+1, j)) / dx**2 + &
44 |                &  (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
45 |                &   prevdata(i, j+1)) / dy**2)
46 |        end do
47 |     end do
48 |   end subroutine evolve
49 | 
50 | end module core
51 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Field metadata for heat equation solver
 4 | module heat
 5 |   use iso_fortran_env, only : REAL64
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: dp = REAL64
 9 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
10 | 
11 |   type :: field
12 |      integer :: nx          ! ldimension of the field
13 |      integer :: ny
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 | contains
20 |   ! Initialize the field type metadata
21 |   ! Arguments:
22 |   !   field0 (type(field)): input field
23 |   !   nx, ny, dx, dy: field dimensions and spatial step size
24 |   subroutine set_field_dimensions(field0, nx, ny)
25 |     implicit none
26 | 
27 |     type(field), intent(out) :: field0
28 |     integer, intent(in) :: nx, ny
29 | 
30 |     field0%dx = DX
31 |     field0%dy = DY
32 |     field0%nx = nx
33 |     field0%ny = ny
34 | 
35 |   end subroutine set_field_dimensions
36 | 
37 | end module heat
38 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/io.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! I/O routines for heat equation solver
 4 | module io
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Output routine, saves the temperature distribution as a png image
10 |   ! Arguments:
11 |   !   curr (type(field)): variable with the temperature data
12 |   !   iter (integer): index of the time step
13 |   subroutine write_field(curr, iter)
14 | 
15 |     use pngwriter
16 |     implicit none
17 |     type(field), intent(in) :: curr
18 |     integer, intent(in) :: iter
19 | 
20 |     character(len=85) :: filename
21 | 
22 |     integer :: stat
23 |     real(dp), dimension(:,:), allocatable, target :: full_data
24 | 
25 |     allocate(full_data(curr%nx, curr%ny))
26 |     ! Copy rand #0 data to the global array
27 |     full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 | 
29 |     write(filename,'(A5,I4.4,A4,A)')  'heat_', iter, '.png'
30 |     stat = save_png(full_data, curr%nx, curr%ny, filename)
31 |     deallocate(full_data)
32 | 
33 |   end subroutine write_field
34 | 
35 | 
36 |   ! Reads the temperature distribution from an input file
37 |   ! Arguments:
38 |   !   field0 (type(field)): field variable that will store the
39 |   !                         read data
40 |   !   filename (char): name of the input file
41 |   ! Note that this version assumes the input data to be in C memory layout
42 |   subroutine read_field(field0, filename)
43 | 
44 |     implicit none
45 |     type(field), intent(out) :: field0
46 |     character(len=85), intent(in) :: filename
47 | 
48 |     integer :: nx, ny, i
49 |     character(len=2) :: dummy
50 | 
51 |     real(dp), dimension(:,:), allocatable :: full_data
52 | 
53 |     open(10, file=filename)
54 |     ! Read the header
55 |     read(10, *) dummy, nx, ny
56 | 
57 |     call set_field_dimensions(field0, nx, ny)
58 | 
59 |     ! The arrays for temperature field contain also a halo region
60 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 | 
62 |     allocate(full_data(nx, ny))
63 |     ! Read the data
64 |     do i = 1, nx
65 |        read(10, *) full_data(i, 1:ny)
66 |     end do
67 | 
68 |     ! Copy to full array containing also boundaries
69 |     field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 | 
71 |     ! Set the boundary values
72 |     field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 |     field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 |     field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 |     field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 | 
77 |     close(10)
78 |     deallocate(full_data)
79 | 
80 |   end subroutine read_field
81 | 
82 | end module io
83 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Heat equation solver in 2D.
 4 | 
 5 | program heat_solve
 6 |   use heat
 7 |   use core
 8 |   use io
 9 |   use setup
10 |   use utilities
11 |   use omp_lib
12 | 
13 |   implicit none
14 | 
15 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
16 |   type(field) :: current, previous    ! Current and previus temperature fields
17 | 
18 |   real(dp) :: dt     ! Time step
19 |   integer :: nsteps       ! Number of time steps
20 |   integer, parameter :: image_interval = 1500 ! Image output interval
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call initialize(current, previous, nsteps)
29 | 
30 |   ! Draw the picture of the initial state
31 |   call write_field(current, 0)
32 | 
33 |   average_temp = average(current)
34 |   write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 | 
36 |   ! Largest stable time step
37 |   dt = current%dx**2 * current%dy**2 / &
38 |        & (2.0 * a * (current%dx**2 + current%dy**2))
39 | 
40 |   ! Main iteration loop
41 | 
42 |   start =  omp_get_wtime()
43 | 
44 |   do iter = 1, nsteps
45 |      call evolve(current, previous, a, dt)
46 |      if (mod(iter, image_interval) == 0) then
47 |         call write_field(current, iter)
48 |      end if
49 |      call swap_fields(current, previous)
50 |   end do
51 | 
52 |   stop = omp_get_wtime()
53 | 
54 |   ! Average temperature for reference
55 |   average_temp = average(previous)
56 | 
57 |   write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
58 |   write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
59 |   if (command_argument_count() == 0) then
60 |       write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
61 |   end if
62 | 
63 |   call finalize(current, previous)
64 | 
65 | end program heat_solve
66 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! PNG writer for heat equation solver
 4 | module pngwriter
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   function save_png(data, nx, ny, fname) result(stat)
10 | 
11 |     use, intrinsic :: ISO_C_BINDING
12 |     implicit none
13 | 
14 |     real(dp), dimension(:,:), intent(in) :: data
15 |     integer, intent(in) :: nx, ny
16 |     character(len=*), intent(in) :: fname
17 |     integer :: stat
18 | 
19 |     ! Interface for save_png C-function
20 |     interface
21 |        ! The C-function definition is
22 |        !   int save_png(double *data, const int nx, const int ny,
23 |        !                const char *fname)
24 |        function save_png_c(data, nx, ny, fname, order) &
25 |             & bind(C,name="save_png") result(stat)
26 |          use, intrinsic :: ISO_C_BINDING
27 |          implicit none
28 |          real(kind=C_DOUBLE) :: data(*)
29 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
30 |          character(kind=C_CHAR), intent(IN) :: fname(*)
31 |          character(kind=C_CHAR), value, intent(IN) :: order
32 |          integer(kind=C_INT) :: stat
33 |        end function save_png_c
34 |     end interface
35 | 
36 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 |     if (stat /= 0) then
38 |        write(*,*) 'save_png returned error!'
39 |     end if
40 | 
41 |   end function save_png
42 | 
43 | end module pngwriter
44 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Utility routines for heat equation solver
 4 | !   NOTE: This file does not need to be edited!
 5 | module utilities
 6 |   use heat
 7 | 
 8 | contains
 9 | 
10 |   ! Swap the data fields of two variables of type field
11 |   ! Arguments:
12 |   !   curr, prev (type(field)): the two variables that are swapped
13 |   subroutine swap_fields(curr, prev)
14 | 
15 |     implicit none
16 | 
17 |     type(field), intent(inout) :: curr, prev
18 |     real(dp), allocatable, dimension(:,:) :: tmp
19 | 
20 |     call move_alloc(curr%data, tmp)
21 |     call move_alloc(prev%data, curr%data)
22 |     call move_alloc(tmp, prev%data)
23 |   end subroutine swap_fields
24 | 
25 |   ! Copy the data from one field to another
26 |   ! Arguments:
27 |   !   from_field (type(field)): variable to copy from
28 |   !   to_field (type(field)): variable to copy to
29 |   subroutine copy_fields(from_field, to_field)
30 | 
31 |     implicit none
32 | 
33 |     type(field), intent(in) :: from_field
34 |     type(field), intent(out) :: to_field
35 | 
36 |     ! Consistency checks
37 |     if (.not.allocated(from_field%data)) then
38 |        write (*,*) "Can not copy from a field without allocated data"
39 |        stop
40 |     end if
41 |     if (.not.allocated(to_field%data)) then
42 |        ! Target is not initialize, allocate memory
43 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 |        write (*,*) "Wrong field data sizes in copy routine"
47 |        print *, shape(from_field%data), shape(to_field%data)
48 |        stop
49 |     end if
50 | 
51 |     to_field%data = from_field%data
52 | 
53 |     to_field%nx = from_field%nx
54 |     to_field%ny = from_field%ny
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 | 
66 |     real(dp) :: local_average
67 |     integer :: rc
68 | 
69 |     average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 |     average = average / (field0%nx * field0%ny)
71 | 
72 |   end function average
73 | 
74 | end module utilities
75 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/heat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | #ifndef __HEAT_H__
 4 | #define __HEAT_H__
 5 | 
 6 | #include <vector>
 7 | 
 8 | // Datatype for temperature field
 9 | struct field {
10 |     // nx and ny are the dimensions of the field. The array data
11 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 |     int nx;
13 |     int ny;
14 |     // Size of the grid cells
15 |     double dx;
16 |     double dy;
17 |     // The temperature values in the 2D grid
18 |     std::vector<double> data;
19 | };
20 | 
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 | 
25 | #if __cplusplus
26 |   extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 | 
31 | void initialize(int argc, char *argv[], field *temperature1,
32 |                 field *temperature2, int *nsteps);
33 | 
34 | void generate_field(field *temperature);
35 | 
36 | double average(field *temperature);
37 | 
38 | void evolve(field *curr, field *prev, double a, double dt);
39 | 
40 | void write_field(field *temperature, int iter);
41 | 
42 | void read_field(field *temperature1, field *temperature2,
43 |                 char *filename);
44 | 
45 | void copy_field(field *temperature1, field *temperature2);
46 | 
47 | void swap_fields(field *temperature1, field *temperature2);
48 | 
49 | void allocate_field(field *temperature);
50 | 
51 | #if __cplusplus
52 |   }
53 | #endif
54 | #endif  // __HEAT_H__
55 | 
56 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/io.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019 CSC Training
  2 | // Copyright (c) 2021 ENCCS
  3 | // I/O related functions for heat equation solver
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | #include <assert.h>
  9 | 
 10 | #include "heat.h"
 11 | #include "pngwriter.h"
 12 | 
 13 | // Output routine that prints out a picture of the temperature
 14 | // distribution.
 15 | void write_field(field *temperature, int iter)
 16 | {
 17 |     char filename[64];
 18 | 
 19 |     // The actual write routine takes only the actual data
 20 |     // (without boundary layers) so we need to copy an array with that.
 21 |     std::vector<double> inner_data(temperature->nx * temperature->ny);
 22 |     auto inner_data_iterator = inner_data.begin();
 23 |     auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
 24 |     for (int i = 0; i < temperature->nx; i++) {
 25 |         auto end_of_row = beginning_of_row + temperature->ny;
 26 |         std::copy(beginning_of_row, end_of_row, inner_data_iterator);
 27 |         inner_data_iterator += temperature->ny;
 28 |         beginning_of_row = end_of_row + 2;
 29 |     }
 30 | 
 31 |     // Write out the data to a png file
 32 |     sprintf(filename, "%s_%04d.png", "heat", iter);
 33 |     save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
 34 | }
 35 | 
 36 | // Read the initial temperature distribution from a file and
 37 | // initialize the temperature fields temperature1 and
 38 | // temperature2 to the same initial state.
 39 | void read_field(field *temperature1, field *temperature2, char *filename)
 40 | {
 41 |     FILE *fp;
 42 |     int nx, ny, ind;
 43 | 
 44 |     int nx_local, ny_local, count;
 45 | 
 46 |     fp = fopen(filename, "r");
 47 |     // Read the header
 48 |     count = fscanf(fp, "# %d %d \n", &nx, &ny);
 49 |     if (count < 2) {
 50 |         fprintf(stderr, "Error while reading the input file!\n");
 51 | 	exit(-1);
 52 |     }
 53 | 
 54 |     set_field_dimensions(temperature1, nx, ny);
 55 |     set_field_dimensions(temperature2, nx, ny);
 56 | 
 57 |     // Allocate arrays (including boundary layers)
 58 |     int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
 59 |     temperature1->data.resize(newSize, 0.0);
 60 |     temperature2->data.resize(newSize, 0.0);
 61 | 
 62 |     // Array from file
 63 |     std::vector<double> file_data(nx * ny, 0.0);
 64 | 
 65 |     // Read the actual data
 66 |     for (int i = 0; i < nx; i++) {
 67 |         for (int j = 0; j < ny; j++) {
 68 |             ind = i * ny + j;
 69 |             count = fscanf(fp, "%lf", &file_data[ind]);
 70 |         }
 71 |     }
 72 | 
 73 |     nx_local = temperature1->nx;
 74 |     ny_local = temperature1->ny;
 75 | 
 76 |     // Copy to the inner part of the full temperature field
 77 |     auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
 78 |     auto beginning_of_row = file_data.begin();
 79 |     for (int i = 0; i < nx_local; i++) {
 80 |         auto end_of_row = beginning_of_row + ny_local;
 81 |         std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
 82 |         temperature_data_iterator += ny_local + 2;
 83 |         beginning_of_row = end_of_row;
 84 |     }
 85 | 
 86 |     // Set the boundary values
 87 |     for (int i = 1; i < nx_local + 1; i++) {
 88 |         temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
 89 |         temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
 90 |     }
 91 |     for (int j = 0; j < ny + 2; j++) {
 92 |         temperature1->data[j] = temperature1->data[ny_local + j];
 93 |         temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
 94 |             temperature1->data[nx_local * (ny_local + 2) + j];
 95 |     }
 96 | 
 97 |     copy_field(temperature1, temperature2);
 98 | 
 99 |     fclose(fp);
100 | }
101 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/main.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main routine for heat equation solver in 2D.
 4 | 
 5 | #include <stdio.h>
 6 | #include <omp.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | int main(int argc, char **argv)
11 | {
12 |     // Image output interval
13 |     int image_interval = 1500;
14 | 
15 |     // Number of time steps
16 |     int nsteps;
17 |     // Current and previous temperature fields
18 |     field current, previous;
19 |     initialize(argc, argv, &current, &previous, &nsteps);
20 | 
21 |     // Output the initial field 
22 |     write_field(&current, 0);
23 | 
24 |     double average_temp = average(&current);
25 |     printf("Average temperature at start: %f\n", average_temp);
26 | 
27 |     // Diffusion constant
28 |     double a = 0.5;
29 | 
30 |     // Compute the largest stable time step
31 |     double dx2 = current.dx * current.dx;
32 |     double dy2 = current.dy * current.dy;
33 |     // Time step
34 |     double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 | 
36 |     // Get the start time stamp
37 |     double start_clock = omp_get_wtime();
38 | 
39 |     // Time evolution
40 |     for (int iter = 1; iter <= nsteps; iter++) {
41 |         evolve(&current, &previous, a, dt);
42 |         if (iter % image_interval == 0) {
43 |             write_field(&current, iter);
44 |         }
45 |         // Swap current field so that it will be used
46 |         // as previous for next iteration step
47 |         swap_fields(&current, &previous);
48 |     }
49 | 
50 |     double stop_clock = omp_get_wtime();
51 | 
52 |     // Average temperature for reference
53 |     average_temp = average(&previous);
54 | 
55 |     // Determine the CPU time used for all the iterations
56 |     printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
57 |     printf("Average temperature: %f\n", average_temp);
58 |     if (argc == 1) {
59 |         printf("Reference value with default arguments: 59.281239\n");
60 |     }
61 | 
62 |     // Output the final field
63 |     write_field(&previous, nsteps);
64 | 
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/content/exercise/offloading/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Utility functions for heat equation solver
 4 | 
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | 
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 |     assert(temperature1->nx == temperature2->nx);
15 |     assert(temperature1->ny == temperature2->ny);
16 |     assert(temperature1->data.size() == temperature2->data.size());
17 |     std::copy(temperature1->data.begin(), temperature1->data.end(),
18 |               temperature2->data.begin());
19 | }
20 | 
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 |     std::swap(temperature1->data, temperature2->data);
25 | }
26 | 
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 |     // Include also boundary layers
31 |     int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 |     temperature->data.resize(newSize, 0.0);
33 | }
34 | 
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 |      double average = 0.0;
39 | 
40 |      for (int i = 1; i < temperature->nx + 1; i++) {
41 |        for (int j = 1; j < temperature->ny + 1; j++) {
42 |          int ind = i * (temperature->ny + 2) + j;
43 |          average += temperature->data[ind];
44 |        }
45 |      }
46 | 
47 |      average /= (temperature->nx * temperature->ny);
48 |      return average;
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/content/exercise/serial/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/serial/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 | 
13 | COMMONDIR=../common
14 | 
15 | ifeq ($(COMP),pgi)
16 | CXX=pgCC
17 | CC=pgcc
18 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 | 
23 | ifeq ($(COMP),gnu)
24 | CXX=g++
25 | CC=gcc
26 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 | 
31 | ifeq ($(COMP),nv)
32 | CXX=nvc++
33 | CC=nvc
34 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 | 
39 | ifeq ($(COMP),intel)
40 | CXX=icpx
41 | CC=icx
42 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
43 | LDFLAGS=
44 | LIBS=
45 | endif
46 | 
47 | EXE=heat_serial
48 | OBJS=main.o core.o setup.o utilities.o io.o
49 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
50 | 
51 | 
52 | all: $(EXE)
53 | 
54 | 
55 | core.o: core.cpp heat.h
56 | utilities.o: utilities.cpp heat.h
57 | setup.o: setup.cpp heat.h
58 | io.o: io.cpp heat.h
59 | main.o: main.cpp heat.h
60 | 
61 | $(OBJS_PNG): C_COMPILER := $(CC)
62 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
63 | $(OBJS): C_COMPILER := $(CXX)
64 | 
65 | $(EXE): $(OBJS) $(OBJS_PNG)
66 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
67 | 
68 | %.o: %.cpp
69 | 	$(CXX) $(CCFLAGS) -c $< -o $@
70 | 
71 | %.o: %.c
72 | 	$(CC) $(CCFLAGS) -c $< -o $@
73 | 
74 | .PHONY: clean
75 | clean:
76 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
77 | 


--------------------------------------------------------------------------------
/content/exercise/serial/core.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main solver routines for heat equation solver
 4 | 
 5 | #include "heat.h"
 6 | 
 7 | // Update the temperature values using five-point stencil
 8 | // Arguments:
 9 | //   curr: current temperature values
10 | //   prev: temperature values from previous time step
11 | //   a: diffusivity
12 | //   dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 |   // Help the compiler avoid being confused by the structs
16 |   double *currdata = curr->data.data();
17 |   double *prevdata = prev->data.data();
18 |   int nx = curr->nx;
19 |   int ny = curr->ny;
20 | 
21 |   // Determine the temperature field at next time step
22 |   // As we have fixed boundary conditions, the outermost gridpoints
23 |   // are not updated.
24 |   double dx2 = prev->dx * prev->dx;
25 |   double dy2 = prev->dy * prev->dy;
26 |   for (int i = 1; i < nx + 1; i++) {
27 |     for (int j = 1; j < ny + 1; j++) {
28 |       int ind = i * (ny + 2) + j;
29 |       int ip = (i + 1) * (ny + 2) + j;
30 |       int im = (i - 1) * (ny + 2) + j;
31 |       int jp = i * (ny + 2) + j + 1;
32 |       int jm = i * (ny + 2) + j - 1;
33 |       currdata[ind] = prevdata[ind] + a*dt*
34 | 	    ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
35 | 	     (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
36 |     }
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 
11 | endif
12 | 
13 | 
14 | COMMONDIR=../../common
15 | 
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=multicore -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS= 
23 | endif
24 | 
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 | 
34 | 
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 | 
39 | all: $(EXE)
40 | 
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 | 
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 | 
53 | %.o: %.F90
54 | 	$(FC) $(FCFLAGS) -c $< -o $@
55 | 
56 | %.o: %.c
57 | 	$(CC) -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include $(CCFLAGS) -c $< -o $@
58 | 
59 | .PHONY: clean
60 | clean:
61 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~  ../../common/*.o
62 | 
63 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Main solver routines for heat equation solver
 4 | module core
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Update the temperature values using five-point stencil
10 |   ! Arguments:
11 |   !   curr (type(field)): current temperature values
12 |   !   prev (type(field)): temperature values from previous time step
13 |   !   a (real(dp)): diffusivity
14 |   !   dt (real(dp)): time step
15 |   subroutine evolve(curr, prev, a, dt)
16 | 
17 |     implicit none
18 | 
19 |     type(field), target, intent(inout) :: curr, prev
20 |     real(dp) :: a, dt
21 |     integer :: i, j, nx, ny
22 |     real(dp) :: dx, dy
23 |     real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 | 
25 |     ! Help the compiler avoid being confused
26 |     nx = curr%nx
27 |     ny = curr%ny
28 |     dx = curr%dx
29 |     dy = curr%dy
30 |     currdata => curr%data
31 |     prevdata => prev%data
32 | 
33 |     ! Determine the temperature field at next time step As we have
34 |     ! fixed boundary conditions, the outermost gridpoints are not
35 |     ! updated.
36 |     do j = 1, ny
37 |        do i = 1, nx
38 |           currdata(i, j) = prevdata(i, j) + a * dt * &
39 |                & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
40 |                &   prevdata(i+1, j)) / dx**2 + &
41 |                &  (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
42 |                &   prevdata(i, j+1)) / dy**2)
43 |        end do
44 |     end do
45 |   end subroutine evolve
46 | 
47 | end module core
48 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Field metadata for heat equation solver
 4 | module heat
 5 |   use iso_fortran_env, only : REAL64
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: dp = REAL64
 9 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
10 | 
11 |   type :: field
12 |      integer :: nx          ! ldimension of the field
13 |      integer :: ny
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 | contains
20 |   ! Initialize the field type metadata
21 |   ! Arguments:
22 |   !   field0 (type(field)): input field
23 |   !   nx, ny, dx, dy: field dimensions and spatial step size
24 |   subroutine set_field_dimensions(field0, nx, ny)
25 |     implicit none
26 | 
27 |     type(field), intent(out) :: field0
28 |     integer, intent(in) :: nx, ny
29 | 
30 |     field0%dx = DX
31 |     field0%dy = DY
32 |     field0%nx = nx
33 |     field0%ny = ny
34 | 
35 |   end subroutine set_field_dimensions
36 | 
37 | end module heat
38 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/io.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! I/O routines for heat equation solver
 4 | module io
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Output routine, saves the temperature distribution as a png image
10 |   ! Arguments:
11 |   !   curr (type(field)): variable with the temperature data
12 |   !   iter (integer): index of the time step
13 |   subroutine write_field(curr, iter)
14 | 
15 |     use pngwriter
16 |     implicit none
17 |     type(field), intent(in) :: curr
18 |     integer, intent(in) :: iter
19 | 
20 |     character(len=85) :: filename
21 | 
22 |     integer :: stat
23 |     real(dp), dimension(:,:), allocatable, target :: full_data
24 | 
25 |     allocate(full_data(curr%nx, curr%ny))
26 |     ! Copy rand #0 data to the global array
27 |     full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 | 
29 |     write(filename,'(A5,I4.4,A4,A)')  'heat_', iter, '.png'
30 |     stat = save_png(full_data, curr%nx, curr%ny, filename)
31 |     deallocate(full_data)
32 | 
33 |   end subroutine write_field
34 | 
35 | 
36 |   ! Reads the temperature distribution from an input file
37 |   ! Arguments:
38 |   !   field0 (type(field)): field variable that will store the
39 |   !                         read data
40 |   !   filename (char): name of the input file
41 |   ! Note that this version assumes the input data to be in C memory layout
42 |   subroutine read_field(field0, filename)
43 | 
44 |     implicit none
45 |     type(field), intent(out) :: field0
46 |     character(len=85), intent(in) :: filename
47 | 
48 |     integer :: nx, ny, i
49 |     character(len=2) :: dummy
50 | 
51 |     real(dp), dimension(:,:), allocatable :: full_data
52 | 
53 |     open(10, file=filename)
54 |     ! Read the header
55 |     read(10, *) dummy, nx, ny
56 | 
57 |     call set_field_dimensions(field0, nx, ny)
58 | 
59 |     ! The arrays for temperature field contain also a halo region
60 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 | 
62 |     allocate(full_data(nx, ny))
63 |     ! Read the data
64 |     do i = 1, nx
65 |        read(10, *) full_data(i, 1:ny)
66 |     end do
67 | 
68 |     ! Copy to full array containing also boundaries
69 |     field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 | 
71 |     ! Set the boundary values
72 |     field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 |     field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 |     field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 |     field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 | 
77 |     close(10)
78 |     deallocate(full_data)
79 | 
80 |   end subroutine read_field
81 | 
82 | end module io
83 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Heat equation solver in 2D.
 4 | 
 5 | program heat_solve
 6 |   use heat
 7 |   use core
 8 |   use io
 9 |   use setup
10 |   use utilities
11 |   use omp_lib
12 | 
13 |   implicit none
14 | 
15 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
16 |   type(field) :: current, previous    ! Current and previus temperature fields
17 | 
18 |   real(dp) :: dt     ! Time step
19 |   integer :: nsteps       ! Number of time steps
20 |   integer, parameter :: image_interval = 1500 ! Image output interval
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call initialize(current, previous, nsteps)
29 | 
30 |   ! Draw the picture of the initial state
31 |   call write_field(current, 0)
32 | 
33 |   average_temp = average(current)
34 |   write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 | 
36 |   ! Largest stable time step
37 |   dt = current%dx**2 * current%dy**2 / &
38 |        & (2.0 * a * (current%dx**2 + current%dy**2))
39 | 
40 |   ! Main iteration loop
41 | 
42 |   start =  omp_get_wtime()
43 | 
44 |   do iter = 1, nsteps
45 |      call evolve(current, previous, a, dt)
46 |      if (mod(iter, image_interval) == 0) then
47 |         call write_field(current, iter)
48 |      end if
49 |      call swap_fields(current, previous)
50 |   end do
51 | 
52 |   stop = omp_get_wtime()
53 | 
54 |   ! Average temperature for reference
55 |   average_temp = average(previous)
56 | 
57 |   write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
58 |   write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
59 |   if (command_argument_count() == 0) then
60 |       write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
61 |   end if
62 | 
63 |   call finalize(current, previous)
64 | 
65 | end program heat_solve
66 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! PNG writer for heat equation solver
 4 | module pngwriter
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   function save_png(data, nx, ny, fname) result(stat)
10 | 
11 |     use, intrinsic :: ISO_C_BINDING
12 |     implicit none
13 | 
14 |     real(dp), dimension(:,:), intent(in) :: data
15 |     integer, intent(in) :: nx, ny
16 |     character(len=*), intent(in) :: fname
17 |     integer :: stat
18 | 
19 |     ! Interface for save_png C-function
20 |     interface
21 |        ! The C-function definition is
22 |        !   int save_png(double *data, const int nx, const int ny,
23 |        !                const char *fname)
24 |        function save_png_c(data, nx, ny, fname, order) &
25 |             & bind(C,name="save_png") result(stat)
26 |          use, intrinsic :: ISO_C_BINDING
27 |          implicit none
28 |          real(kind=C_DOUBLE) :: data(*)
29 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
30 |          character(kind=C_CHAR), intent(IN) :: fname(*)
31 |          character(kind=C_CHAR), value, intent(IN) :: order
32 |          integer(kind=C_INT) :: stat
33 |        end function save_png_c
34 |     end interface
35 | 
36 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 |     if (stat /= 0) then
38 |        write(*,*) 'save_png returned error!'
39 |     end if
40 | 
41 |   end function save_png
42 | 
43 | end module pngwriter
44 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/setup.F90:
--------------------------------------------------------------------------------
  1 | ! Copyright (c) 2019 CSC Training
  2 | ! Copyright (c) 2021 ENCCS
  3 | ! Setup routines for heat equation solver
  4 | module setup
  5 |   use heat
  6 | 
  7 | contains
  8 | 
  9 |   subroutine initialize(previous, current, nsteps)
 10 |     use utilities
 11 |     use io
 12 | 
 13 |     implicit none
 14 | 
 15 |     type(field), intent(out) :: previous, current
 16 |     integer, intent(out) :: nsteps
 17 | 
 18 |     integer :: rows, cols
 19 |     logical :: using_input_file
 20 |     character(len=85) :: input_file, arg  ! Input file name and command line arguments
 21 | 
 22 | 
 23 |     ! Default values for grid size and time steps
 24 |     rows = 2000
 25 |     cols = 2000
 26 |     nsteps = 500
 27 |     using_input_file = .false.
 28 | 
 29 |     ! Read in the command line arguments and
 30 |     ! set up the needed variables
 31 |     select case(command_argument_count())
 32 |     case(0) ! No arguments -> default values
 33 |     case(1) ! One argument -> input file name
 34 |        using_input_file = .true.
 35 |        call get_command_argument(1, input_file)
 36 |     case(2) ! Two arguments -> input file name and number of steps
 37 |        using_input_file = .true.
 38 |        call get_command_argument(1, input_file)
 39 |        call get_command_argument(2, arg)
 40 |        read(arg, *) nsteps
 41 |     case(3) ! Three arguments -> rows, cols and nsteps
 42 |        call get_command_argument(1, arg)
 43 |        read(arg, *) rows
 44 |        call get_command_argument(2, arg)
 45 |        read(arg, *) cols
 46 |        call get_command_argument(3, arg)
 47 |        read(arg, *) nsteps
 48 |     case default
 49 |        call usage()
 50 |        stop
 51 |     end select
 52 | 
 53 |     ! Initialize the fields according the command line arguments
 54 |     if (using_input_file) then
 55 |        call read_field(previous, input_file)
 56 |        call copy_fields(previous, current)
 57 |     else
 58 |        call set_field_dimensions(previous, rows, cols)
 59 |        call set_field_dimensions(current, rows, cols)
 60 |        call generate_field(previous)
 61 |        call copy_fields(previous, current)
 62 |     end if
 63 | 
 64 |   end subroutine initialize
 65 | 
 66 |   ! Generate initial the temperature field.  Pattern is disc with a radius
 67 |   ! of nx / 6 in the center of the grid.
 68 |   ! Boundary conditions are (different) constant temperatures outside the grid
 69 |   subroutine generate_field(field0)
 70 |     use heat
 71 | 
 72 |     implicit none
 73 | 
 74 |     type(field), intent(inout) :: field0
 75 | 
 76 |     real(dp) :: radius2
 77 |     integer :: i, j, ds2
 78 | 
 79 |     ! The arrays for field contain also a halo region
 80 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
 81 | 
 82 |     ! Square of the disk radius
 83 |     radius2 = (field0%nx / 6.0_dp)**2
 84 | 
 85 |     do j = 0, field0%ny + 1
 86 |        do i = 0, field0%nx + 1
 87 |           ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + &
 88 |                & (j - field0%ny / 2.0_dp + 1)**2)
 89 |           if (ds2 < radius2) then
 90 |              field0%data(i,j) = 5.0_dp
 91 |           else
 92 |              field0%data(i,j) = 65.0_dp
 93 |           end if
 94 |        end do
 95 |     end do
 96 | 
 97 |     ! Boundary conditions
 98 |     field0%data(:,0) = 20.0_dp
 99 |     field0%data(:,field0%ny+1) = 70.0_dp
100 |     field0%data(0,:) = 85.0_dp
101 |     field0%data(field0%nx+1,:) = 5.0_dp
102 | 
103 |   end subroutine generate_field
104 | 
105 | 
106 |   ! Clean up routine for field type
107 |   ! Arguments:
108 |   !   field0 (type(field)): field variable to be cleared
109 |   subroutine finalize(field0, field1)
110 |     use heat
111 | 
112 |     implicit none
113 | 
114 |     type(field), intent(inout) :: field0, field1
115 | 
116 |     deallocate(field0%data)
117 |     deallocate(field1%data)
118 | 
119 |   end subroutine finalize
120 | 
121 |   ! Helper routine that prints out a simple usage if
122 |   ! user gives more than three arguments
123 |   subroutine usage()
124 |     implicit none
125 |     character(len=256) :: buf
126 | 
127 |     call get_command_argument(0, buf)
128 |     write (*,'(A)') 'Usage:'
129 |     write (*,'(A, " (default values will be used)")') trim(buf)
130 |     write (*,'(A, " <filename>")') trim(buf)
131 |     write (*,'(A, " <filename> <nsteps>")') trim(buf)
132 |     write (*,'(A, " <rows> <cols> <nsteps>")') trim(buf)
133 |   end subroutine usage
134 | 
135 | end module setup
136 | 


--------------------------------------------------------------------------------
/content/exercise/serial/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Utility routines for heat equation solver
 4 | !   NOTE: This file does not need to be edited!
 5 | module utilities
 6 |   use heat
 7 | 
 8 | contains
 9 | 
10 |   ! Swap the data fields of two variables of type field
11 |   ! Arguments:
12 |   !   curr, prev (type(field)): the two variables that are swapped
13 |   subroutine swap_fields(curr, prev)
14 | 
15 |     implicit none
16 | 
17 |     type(field), intent(inout) :: curr, prev
18 |     real(dp), allocatable, dimension(:,:) :: tmp
19 | 
20 |     call move_alloc(curr%data, tmp)
21 |     call move_alloc(prev%data, curr%data)
22 |     call move_alloc(tmp, prev%data)
23 |   end subroutine swap_fields
24 | 
25 |   ! Copy the data from one field to another
26 |   ! Arguments:
27 |   !   from_field (type(field)): variable to copy from
28 |   !   to_field (type(field)): variable to copy to
29 |   subroutine copy_fields(from_field, to_field)
30 | 
31 |     implicit none
32 | 
33 |     type(field), intent(in) :: from_field
34 |     type(field), intent(out) :: to_field
35 | 
36 |     ! Consistency checks
37 |     if (.not.allocated(from_field%data)) then
38 |        write (*,*) "Can not copy from a field without allocated data"
39 |        stop
40 |     end if
41 |     if (.not.allocated(to_field%data)) then
42 |        ! Target is not initialize, allocate memory
43 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 |        write (*,*) "Wrong field data sizes in copy routine"
47 |        print *, shape(from_field%data), shape(to_field%data)
48 |        stop
49 |     end if
50 | 
51 |     to_field%data = from_field%data
52 | 
53 |     to_field%nx = from_field%nx
54 |     to_field%ny = from_field%ny
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 | 
66 |     real(dp) :: local_average
67 |     integer :: rc
68 | 
69 |     average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 |     average = average / (field0%nx * field0%ny)
71 | 
72 |   end function average
73 | 
74 | end module utilities
75 | 


--------------------------------------------------------------------------------
/content/exercise/serial/heat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | #ifndef __HEAT_H__
 4 | #define __HEAT_H__
 5 | 
 6 | #include <vector>
 7 | 
 8 | // Datatype for temperature field
 9 | struct field {
10 |     // nx and ny are the dimensions of the field. The array data
11 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 |     int nx;
13 |     int ny;
14 |     // Size of the grid cells
15 |     double dx;
16 |     double dy;
17 |     // The temperature values in the 2D grid
18 |     std::vector<double> data;
19 | };
20 | 
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 | 
25 | #if __cplusplus
26 |   extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 | 
31 | void initialize(int argc, char *argv[], field *temperature1,
32 |                 field *temperature2, int *nsteps);
33 | 
34 | void generate_field(field *temperature);
35 | 
36 | double average(field *temperature);
37 | 
38 | void evolve(field *curr, field *prev, double a, double dt);
39 | 
40 | void write_field(field *temperature, int iter);
41 | 
42 | void read_field(field *temperature1, field *temperature2,
43 |                 char *filename);
44 | 
45 | void copy_field(field *temperature1, field *temperature2);
46 | 
47 | void swap_fields(field *temperature1, field *temperature2);
48 | 
49 | void allocate_field(field *temperature);
50 | 
51 | #if __cplusplus
52 |   }
53 | #endif
54 | #endif  // __HEAT_H__
55 | 
56 | 


--------------------------------------------------------------------------------
/content/exercise/serial/heat_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/serial/heat_0000.png


--------------------------------------------------------------------------------
/content/exercise/serial/heat_0010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/serial/heat_0010.png


--------------------------------------------------------------------------------
/content/exercise/serial/io.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019 CSC Training
  2 | // Copyright (c) 2021 ENCCS
  3 | // I/O related functions for heat equation solver
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | #include <assert.h>
  9 | 
 10 | #include "heat.h"
 11 | #include "pngwriter.h"
 12 | 
 13 | // Output routine that prints out a picture of the temperature
 14 | // distribution.
 15 | void write_field(field *temperature, int iter)
 16 | {
 17 |     char filename[64];
 18 | 
 19 |     // The actual write routine takes only the actual data
 20 |     // (without boundary layers) so we need to copy an array with that.
 21 |     std::vector<double> inner_data(temperature->nx * temperature->ny);
 22 |     auto inner_data_iterator = inner_data.begin();
 23 |     auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
 24 |     for (int i = 0; i < temperature->nx; i++) {
 25 |         auto end_of_row = beginning_of_row + temperature->ny;
 26 |         std::copy(beginning_of_row, end_of_row, inner_data_iterator);
 27 |         inner_data_iterator += temperature->ny;
 28 |         beginning_of_row = end_of_row + 2;
 29 |     }
 30 | 
 31 |     // Write out the data to a png file
 32 |     sprintf(filename, "%s_%04d.png", "heat", iter);
 33 |     save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
 34 | }
 35 | 
 36 | // Read the initial temperature distribution from a file and
 37 | // initialize the temperature fields temperature1 and
 38 | // temperature2 to the same initial state.
 39 | void read_field(field *temperature1, field *temperature2, char *filename)
 40 | {
 41 |     FILE *fp;
 42 |     int nx, ny, ind;
 43 | 
 44 |     int nx_local, ny_local, count;
 45 | 
 46 |     fp = fopen(filename, "r");
 47 |     // Read the header
 48 |     count = fscanf(fp, "# %d %d \n", &nx, &ny);
 49 |     if (count < 2) {
 50 |         fprintf(stderr, "Error while reading the input file!\n");
 51 | 	exit(-1);
 52 |     }
 53 | 
 54 |     set_field_dimensions(temperature1, nx, ny);
 55 |     set_field_dimensions(temperature2, nx, ny);
 56 | 
 57 |     // Allocate arrays (including boundary layers)
 58 |     int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
 59 |     temperature1->data.resize(newSize, 0.0);
 60 |     temperature2->data.resize(newSize, 0.0);
 61 | 
 62 |     // Array from file
 63 |     std::vector<double> file_data(nx * ny, 0.0);
 64 | 
 65 |     // Read the actual data
 66 |     for (int i = 0; i < nx; i++) {
 67 |         for (int j = 0; j < ny; j++) {
 68 |             ind = i * ny + j;
 69 |             count = fscanf(fp, "%lf", &file_data[ind]);
 70 |         }
 71 |     }
 72 | 
 73 |     nx_local = temperature1->nx;
 74 |     ny_local = temperature1->ny;
 75 | 
 76 |     // Copy to the inner part of the full temperature field
 77 |     auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
 78 |     auto beginning_of_row = file_data.begin();
 79 |     for (int i = 0; i < nx_local; i++) {
 80 |         auto end_of_row = beginning_of_row + ny_local;
 81 |         std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
 82 |         temperature_data_iterator += ny_local + 2;
 83 |         beginning_of_row = end_of_row;
 84 |     }
 85 | 
 86 |     // Set the boundary values
 87 |     for (int i = 1; i < nx_local + 1; i++) {
 88 |         temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
 89 |         temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
 90 |     }
 91 |     for (int j = 0; j < ny + 2; j++) {
 92 |         temperature1->data[j] = temperature1->data[ny_local + j];
 93 |         temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
 94 |             temperature1->data[nx_local * (ny_local + 2) + j];
 95 |     }
 96 | 
 97 |     copy_field(temperature1, temperature2);
 98 | 
 99 |     fclose(fp);
100 | }
101 | 


--------------------------------------------------------------------------------
/content/exercise/serial/main.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main routine for heat equation solver in 2D.
 4 | 
 5 | #include <stdio.h>
 6 | #include <omp.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | int main(int argc, char **argv)
11 | {
12 |     // Image output interval
13 |     int image_interval = 1500;
14 | 
15 |     // Number of time steps
16 |     int nsteps;
17 |     // Current and previous temperature fields
18 |     field current, previous;
19 |     initialize(argc, argv, &current, &previous, &nsteps);
20 | 
21 |     // Output the initial field 
22 |     write_field(&current, 0);
23 | 
24 |     double average_temp = average(&current);
25 |     printf("Average temperature at start: %f\n", average_temp);
26 | 
27 |     // Diffusion constant
28 |     double a = 0.5;
29 | 
30 |     // Compute the largest stable time step
31 |     double dx2 = current.dx * current.dx;
32 |     double dy2 = current.dy * current.dy;
33 |     // Time step
34 |     double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 | 
36 |     // Get the start time stamp
37 |     double start_clock = omp_get_wtime();
38 | 
39 |     // Time evolution
40 |     for (int iter = 1; iter <= nsteps; iter++) {
41 |         evolve(&current, &previous, a, dt);
42 |         if (iter % image_interval == 0) {
43 |             write_field(&current, iter);
44 |         }
45 |         // Swap current field so that it will be used
46 |         // as previous for next iteration step
47 |         swap_fields(&current, &previous);
48 |     }
49 | 
50 |     double stop_clock = omp_get_wtime();
51 | 
52 |     // Average temperature for reference
53 |     average_temp = average(&previous);
54 | 
55 |     // Determine the CPU time used for all the iterations
56 |     printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
57 |     printf("Average temperature: %f\n", average_temp);
58 |     if (argc == 1) {
59 |         printf("Reference value with default arguments: 59.281239\n");
60 |     }
61 | 
62 |     // Output the final field
63 |     write_field(&previous, nsteps);
64 | 
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/content/exercise/serial/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Utility functions for heat equation solver
 4 | 
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | 
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 |     assert(temperature1->nx == temperature2->nx);
15 |     assert(temperature1->ny == temperature2->ny);
16 |     assert(temperature1->data.size() == temperature2->data.size());
17 |     std::copy(temperature1->data.begin(), temperature1->data.end(),
18 |               temperature2->data.begin());
19 | }
20 | 
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 |     std::swap(temperature1->data, temperature2->data);
25 | }
26 | 
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 |     // Include also boundary layers
31 |     int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 |     temperature->data.resize(newSize, 0.0);
33 | }
34 | 
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 |      double average = 0.0;
39 | 
40 |      for (int i = 1; i < temperature->nx + 1; i++) {
41 |        for (int j = 1; j < temperature->ny + 1; j++) {
42 |          int ind = i * (temperature->ny + 2) + j;
43 |          average += temperature->data[ind];
44 |        }
45 |      }
46 | 
47 |      average /= (temperature->nx * temperature->ny);
48 |      return average;
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/content/exercise/solution/common/pngwriter.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2019 CSC Training */
 2 | /* Copyright (c) 2021 ENCCS */
 3 | #ifndef PNGWRITER_H_
 4 | #define PNGWRITER_H_
 5 | 
 6 | #if __cplusplus
 7 |   extern "C" {
 8 | #endif
 9 | 
10 | int save_png(double *data, const int nx, const int ny, const char *fname,
11 |              const char lang);
12 | 
13 | #if __cplusplus
14 |   }
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 | 
13 | COMMONDIR=../common
14 | 
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 | 
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 | 
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 | 
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 | 
43 | 
44 | all: $(EXE)
45 | 
46 | 
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 | 
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 | 
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 | 
60 | %.o: %.cpp
61 | 	$(CXX) $(CCFLAGS) -c $< -o $@
62 | 
63 | %.o: %.c
64 | 	$(CC) $(CCFLAGS) -c $< -o $@
65 | 
66 | .PHONY: clean
67 | clean:
68 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/core.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main solver routines for heat equation solver
 4 | 
 5 | #include "heat.h"
 6 | 
 7 | // Update the temperature values using five-point stencil
 8 | // Arguments:
 9 | //   curr: current temperature values
10 | //   prev: temperature values from previous time step
11 | //   a: diffusivity
12 | //   dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 |   // Help the compiler avoid being confused by the structs
16 |   double *currdata = curr->data.data();
17 |   double *prevdata = prev->data.data();
18 |   int nx = curr->nx;
19 |   int ny = curr->ny;
20 | 
21 |   // Determine the temperature field at next time step
22 |   // As we have fixed boundary conditions, the outermost gridpoints
23 |   // are not updated.
24 |   double dx2 = prev->dx * prev->dx;
25 |   double dy2 = prev->dy * prev->dy;
26 |   #pragma omp target teams distribute parallel for 
27 |   for (int i = 1; i < nx + 1; i++) {
28 |     for (int j = 1; j < ny + 1; j++) {
29 |       int ind = i * (ny + 2) + j;
30 |       int ip = (i + 1) * (ny + 2) + j;
31 |       int im = (i - 1) * (ny + 2) + j;
32 |       int jp = i * (ny + 2) + j + 1;
33 |       int jm = i * (ny + 2) + j - 1;
34 |       currdata[ind] = prevdata[ind] + a*dt*
35 | 	    ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
36 | 	     (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
37 |     }
38 |   }
39 | }
40 | 
41 | // Start a data region and copy temperature fields to the device 
42 | void enter_data(field *curr, field *prev)
43 | {
44 |     int nx, ny;
45 |     double *currdata, *prevdata;
46 | 
47 |     currdata = curr->data.data();
48 |     prevdata = prev->data.data();
49 |     nx = curr->nx;
50 |     ny = curr->ny;
51 | 
52 | // adding data mapping here
53 |     #pragma omp target enter data \
54 |     map(to: currdata[0:(nx+2)*(ny+2)], prevdata[0:(nx+2)*(ny+2)])
55 | }
56 | 
57 | // End a data region and copy temperature fields back to the host 
58 | void exit_data(field *curr, field *prev)
59 | {
60 |     int nx, ny;
61 |     double *currdata, *prevdata;
62 | 
63 |     currdata = curr->data.data();
64 |     prevdata = prev->data.data();
65 |     nx = curr->nx;
66 |     ny = curr->ny;
67 | 
68 | // adding data mapping here
69 |     #pragma omp target exit data \
70 |     map(from: currdata[0:(nx+2)*(ny+2)], prevdata[0:(nx+2)*(ny+2)])
71 | }
72 | 
73 | // Copy a temperature field from the device to the host 
74 | void update_host(field *temperature)
75 | {
76 |     int nx, ny;
77 |     double *data;
78 | 
79 |     data = temperature->data.data();
80 |     nx = temperature->nx;
81 |     ny = temperature->ny;
82 | 
83 | // adding data mapping here
84 |     #pragma omp target update from(data[0:(nx+2)*(ny+2)])
85 | }
86 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 
11 | endif
12 | 
13 | 
14 | COMMONDIR=../../common
15 | 
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS= 
23 | endif
24 | 
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 | 
34 | 
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 | 
39 | all: $(EXE)
40 | 
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 | 
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 | 
53 | %.o: %.F90
54 | 	$(FC) $(FCFLAGS) -c $< -o $@
55 | 
56 | %.o: %.c
57 | 	$(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
58 | 
59 | .PHONY: clean
60 | clean:
61 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~  ../../common/*.o
62 | 
63 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Main solver routines for heat equation solver
 4 | module core
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Update the temperature values using five-point stencil
10 |   ! Arguments:
11 |   !   curr (type(field)): current temperature values
12 |   !   prev (type(field)): temperature values from previous time step
13 |   !   a (real(dp)): diffusivity
14 |   !   dt (real(dp)): time step
15 |   subroutine evolve(curr, prev, a, dt)
16 | 
17 |     implicit none
18 | 
19 |     type(field),target, intent(inout) :: curr, prev
20 |     real(dp) :: a, dt
21 |     integer :: i, j, nx, ny
22 |     real(dp) :: dx, dy
23 |     real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 | 
25 |     ! Help the compiler avoid being confused
26 |     nx = curr%nx
27 |     ny = curr%ny
28 |     dx = curr%dx
29 |     dy = curr%dy
30 |     currdata => curr%data
31 |     prevdata => prev%data
32 | 
33 |     ! Determine the temperature field at next time step As we have
34 |     ! fixed boundary conditions, the outermost gridpoints are not
35 |     ! updated.
36 |     !$omp target teams distribute parallel do  
37 |     do j = 1, ny
38 |        do i = 1, nx
39 |           currdata(i, j) = prevdata(i, j) + a * dt * &
40 |                & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
41 |                &   prevdata(i+1, j)) / dx**2 + &
42 |                &  (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
43 |                &   prevdata(i, j+1)) / dy**2)
44 |        end do
45 |     end do
46 |     !$omp end target teams distribute parallel do 
47 |   end subroutine evolve
48 | 
49 |   ! Start a data region and copy temperature fields to the device
50 |   !   curr (type(field)): current temperature values
51 |   !   prev (type(field)): values from previous time step
52 |   subroutine enter_data(curr, prev)
53 |     implicit none
54 |     type(field), target, intent(in) :: curr, prev
55 |     real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
56 | 
57 |     currdata => curr%data
58 |     prevdata => prev%data
59 | 
60 |   ! adding data mapping here
61 |     !$omp target enter data map(to: currdata, prevdata)
62 | 
63 |   end subroutine enter_data
64 | 
65 |   ! End a data region and copy temperature fields back to the host
66 |   !   curr (type(field)): current temperature values
67 |   !   prev (type(field)): values from previous time step
68 |   subroutine exit_data(curr, prev)
69 |     implicit none
70 |     type(field), target :: curr, prev
71 |     real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
72 | 
73 |     currdata => curr%data
74 |     prevdata => prev%data
75 | 
76 |   ! adding data mapping here
77 |     !$omp target exit data map(from: currdata, prevdata)
78 | 
79 |   end subroutine exit_data
80 | 
81 |   ! Copy a temperature field from the device to the host
82 |   !   temperature (type(field)): temperature field
83 |   subroutine update_host(temperature)
84 |     implicit none
85 |     type(field), target :: temperature
86 |     real(kind=dp), pointer, contiguous :: tempdata(:,:)
87 | 
88 |     tempdata => temperature%data
89 | 
90 |   ! adding data mapping here
91 |     !$omp target update from(tempdata)
92 | 
93 |   end subroutine update_host
94 | 
95 | end module core
96 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Field metadata for heat equation solver
 4 | module heat
 5 |   use iso_fortran_env, only : REAL64
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: dp = REAL64
 9 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
10 | 
11 |   type :: field
12 |      integer :: nx          ! ldimension of the field
13 |      integer :: ny
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 | contains
20 |   ! Initialize the field type metadata
21 |   ! Arguments:
22 |   !   field0 (type(field)): input field
23 |   !   nx, ny, dx, dy: field dimensions and spatial step size
24 |   subroutine set_field_dimensions(field0, nx, ny)
25 |     implicit none
26 | 
27 |     type(field), intent(out) :: field0
28 |     integer, intent(in) :: nx, ny
29 | 
30 |     field0%dx = DX
31 |     field0%dy = DY
32 |     field0%nx = nx
33 |     field0%ny = ny
34 | 
35 |   end subroutine set_field_dimensions
36 | 
37 | end module heat
38 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/io.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! I/O routines for heat equation solver
 4 | module io
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Output routine, saves the temperature distribution as a png image
10 |   ! Arguments:
11 |   !   curr (type(field)): variable with the temperature data
12 |   !   iter (integer): index of the time step
13 |   subroutine write_field(curr, iter)
14 | 
15 |     use pngwriter
16 |     implicit none
17 |     type(field), intent(in) :: curr
18 |     integer, intent(in) :: iter
19 | 
20 |     character(len=85) :: filename
21 | 
22 |     integer :: stat
23 |     real(dp), dimension(:,:), allocatable, target :: full_data
24 | 
25 |     allocate(full_data(curr%nx, curr%ny))
26 |     ! Copy rand #0 data to the global array
27 |     full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 | 
29 |     write(filename,'(A5,I4.4,A4,A)')  'heat_', iter, '.png'
30 |     stat = save_png(full_data, curr%nx, curr%ny, filename)
31 |     deallocate(full_data)
32 | 
33 |   end subroutine write_field
34 | 
35 | 
36 |   ! Reads the temperature distribution from an input file
37 |   ! Arguments:
38 |   !   field0 (type(field)): field variable that will store the
39 |   !                         read data
40 |   !   filename (char): name of the input file
41 |   ! Note that this version assumes the input data to be in C memory layout
42 |   subroutine read_field(field0, filename)
43 | 
44 |     implicit none
45 |     type(field), intent(out) :: field0
46 |     character(len=85), intent(in) :: filename
47 | 
48 |     integer :: nx, ny, i
49 |     character(len=2) :: dummy
50 | 
51 |     real(dp), dimension(:,:), allocatable :: full_data
52 | 
53 |     open(10, file=filename)
54 |     ! Read the header
55 |     read(10, *) dummy, nx, ny
56 | 
57 |     call set_field_dimensions(field0, nx, ny)
58 | 
59 |     ! The arrays for temperature field contain also a halo region
60 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 | 
62 |     allocate(full_data(nx, ny))
63 |     ! Read the data
64 |     do i = 1, nx
65 |        read(10, *) full_data(i, 1:ny)
66 |     end do
67 | 
68 |     ! Copy to full array containing also boundaries
69 |     field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 | 
71 |     ! Set the boundary values
72 |     field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 |     field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 |     field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 |     field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 | 
77 |     close(10)
78 |     deallocate(full_data)
79 | 
80 |   end subroutine read_field
81 | 
82 | end module io
83 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Heat equation solver in 2D.
 4 | 
 5 | program heat_solve
 6 |   use heat
 7 |   use core
 8 |   use io
 9 |   use setup
10 |   use utilities
11 |   use omp_lib
12 | 
13 |   implicit none
14 | 
15 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
16 |   type(field) :: current, previous    ! Current and previus temperature fields
17 | 
18 |   real(dp) :: dt     ! Time step
19 |   integer :: nsteps       ! Number of time steps
20 |   integer, parameter :: image_interval = 1500 ! Image output interval
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call initialize(current, previous, nsteps)
29 | 
30 |   ! Draw the picture of the initial state
31 |   call write_field(current, 0)
32 | 
33 |   average_temp = average(current)
34 |   write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 | 
36 |   ! Largest stable time step
37 |   dt = current%dx**2 * current%dy**2 / &
38 |        & (2.0 * a * (current%dx**2 + current%dy**2))
39 | 
40 |   ! Main iteration loop
41 | 
42 |   start =  omp_get_wtime()
43 | 
44 |   ! copy data to device
45 |   call enter_data(current, previous)
46 | 
47 |   do iter = 1, nsteps
48 |      call evolve(current, previous, a, dt)
49 |      if (mod(iter, image_interval) == 0) then
50 |         ! update data on host for output
51 |         call update_host(current)
52 |         call write_field(current, iter)
53 |      end if
54 |      call swap_fields(current, previous)
55 |   end do
56 | 
57 |   ! copy data back to host
58 |   call exit_data(current, previous)
59 | 
60 |   stop = omp_get_wtime()
61 | 
62 |   ! Average temperature for reference
63 |   average_temp = average(previous)
64 | 
65 |   write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
66 |   write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
67 |   if (command_argument_count() == 0) then
68 |       write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
69 |   end if
70 | 
71 |   call finalize(current, previous)
72 | 
73 | end program heat_solve
74 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! PNG writer for heat equation solver
 4 | module pngwriter
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   function save_png(data, nx, ny, fname) result(stat)
10 | 
11 |     use, intrinsic :: ISO_C_BINDING
12 |     implicit none
13 | 
14 |     real(dp), dimension(:,:), intent(in) :: data
15 |     integer, intent(in) :: nx, ny
16 |     character(len=*), intent(in) :: fname
17 |     integer :: stat
18 | 
19 |     ! Interface for save_png C-function
20 |     interface
21 |        ! The C-function definition is
22 |        !   int save_png(double *data, const int nx, const int ny,
23 |        !                const char *fname)
24 |        function save_png_c(data, nx, ny, fname, order) &
25 |             & bind(C,name="save_png") result(stat)
26 |          use, intrinsic :: ISO_C_BINDING
27 |          implicit none
28 |          real(kind=C_DOUBLE) :: data(*)
29 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
30 |          character(kind=C_CHAR), intent(IN) :: fname(*)
31 |          character(kind=C_CHAR), value, intent(IN) :: order
32 |          integer(kind=C_INT) :: stat
33 |        end function save_png_c
34 |     end interface
35 | 
36 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 |     if (stat /= 0) then
38 |        write(*,*) 'save_png returned error!'
39 |     end if
40 | 
41 |   end function save_png
42 | 
43 | end module pngwriter
44 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Utility routines for heat equation solver
 4 | !   NOTE: This file does not need to be edited!
 5 | module utilities
 6 |   use heat
 7 | 
 8 | contains
 9 | 
10 |   ! Swap the data fields of two variables of type field
11 |   ! Arguments:
12 |   !   curr, prev (type(field)): the two variables that are swapped
13 |   subroutine swap_fields(curr, prev)
14 | 
15 |     implicit none
16 | 
17 |     type(field), intent(inout) :: curr, prev
18 |     real(dp), allocatable, dimension(:,:) :: tmp
19 | 
20 |     call move_alloc(curr%data, tmp)
21 |     call move_alloc(prev%data, curr%data)
22 |     call move_alloc(tmp, prev%data)
23 |   end subroutine swap_fields
24 | 
25 |   ! Copy the data from one field to another
26 |   ! Arguments:
27 |   !   from_field (type(field)): variable to copy from
28 |   !   to_field (type(field)): variable to copy to
29 |   subroutine copy_fields(from_field, to_field)
30 | 
31 |     implicit none
32 | 
33 |     type(field), intent(in) :: from_field
34 |     type(field), intent(out) :: to_field
35 | 
36 |     ! Consistency checks
37 |     if (.not.allocated(from_field%data)) then
38 |        write (*,*) "Can not copy from a field without allocated data"
39 |        stop
40 |     end if
41 |     if (.not.allocated(to_field%data)) then
42 |        ! Target is not initialize, allocate memory
43 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 |        write (*,*) "Wrong field data sizes in copy routine"
47 |        print *, shape(from_field%data), shape(to_field%data)
48 |        stop
49 |     end if
50 | 
51 |     to_field%data = from_field%data
52 | 
53 |     to_field%nx = from_field%nx
54 |     to_field%ny = from_field%ny
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 | 
66 |     real(dp) :: local_average
67 |     integer :: rc
68 | 
69 |     average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 |     average = average / (field0%nx * field0%ny)
71 | 
72 |   end function average
73 | 
74 | end module utilities
75 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/heat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | #ifndef __HEAT_H__
 4 | #define __HEAT_H__
 5 | 
 6 | #include <vector>
 7 | 
 8 | // Datatype for temperature field
 9 | struct field {
10 |     // nx and ny are the dimensions of the field. The array data
11 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 |     int nx;
13 |     int ny;
14 |     // Size of the grid cells
15 |     double dx;
16 |     double dy;
17 |     // The temperature values in the 2D grid
18 |     std::vector<double> data;
19 | };
20 | 
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 | 
25 | #if __cplusplus
26 |   extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 | 
31 | void initialize(int argc, char *argv[], field *temperature1,
32 |                 field *temperature2, int *nsteps);
33 | 
34 | void generate_field(field *temperature);
35 | 
36 | double average(field *temperature);
37 | 
38 | void evolve(field *curr, field *prev, double a, double dt);
39 | 
40 | void write_field(field *temperature, int iter);
41 | 
42 | void read_field(field *temperature1, field *temperature2,
43 |                 char *filename);
44 | 
45 | void copy_field(field *temperature1, field *temperature2);
46 | 
47 | void swap_fields(field *temperature1, field *temperature2);
48 | 
49 | void allocate_field(field *temperature);
50 | 
51 | void enter_data(field *temperature1, field *temperature2);
52 | 
53 | void exit_data(field *temperature1, field *temperature2);
54 | 
55 | void update_host(field *temperature);
56 | 
57 | #if __cplusplus
58 |   }
59 | #endif
60 | #endif  // __HEAT_H__
61 | 
62 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/io.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019 CSC Training
  2 | // Copyright (c) 2021 ENCCS
  3 | // I/O related functions for heat equation solver
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | #include <assert.h>
  9 | 
 10 | #include "heat.h"
 11 | #include "pngwriter.h"
 12 | 
 13 | // Output routine that prints out a picture of the temperature
 14 | // distribution.
 15 | void write_field(field *temperature, int iter)
 16 | {
 17 |     char filename[64];
 18 | 
 19 |     // The actual write routine takes only the actual data
 20 |     // (without boundary layers) so we need to copy an array with that.
 21 |     std::vector<double> inner_data(temperature->nx * temperature->ny);
 22 |     auto inner_data_iterator = inner_data.begin();
 23 |     auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
 24 |     for (int i = 0; i < temperature->nx; i++) {
 25 |         auto end_of_row = beginning_of_row + temperature->ny;
 26 |         std::copy(beginning_of_row, end_of_row, inner_data_iterator);
 27 |         inner_data_iterator += temperature->ny;
 28 |         beginning_of_row = end_of_row + 2;
 29 |     }
 30 | 
 31 |     // Write out the data to a png file
 32 |     sprintf(filename, "%s_%04d.png", "heat", iter);
 33 |     save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
 34 | }
 35 | 
 36 | // Read the initial temperature distribution from a file and
 37 | // initialize the temperature fields temperature1 and
 38 | // temperature2 to the same initial state.
 39 | void read_field(field *temperature1, field *temperature2, char *filename)
 40 | {
 41 |     FILE *fp;
 42 |     int nx, ny, ind;
 43 | 
 44 |     int nx_local, ny_local, count;
 45 | 
 46 |     fp = fopen(filename, "r");
 47 |     // Read the header
 48 |     count = fscanf(fp, "# %d %d \n", &nx, &ny);
 49 |     if (count < 2) {
 50 |         fprintf(stderr, "Error while reading the input file!\n");
 51 | 	exit(-1);
 52 |     }
 53 | 
 54 |     set_field_dimensions(temperature1, nx, ny);
 55 |     set_field_dimensions(temperature2, nx, ny);
 56 | 
 57 |     // Allocate arrays (including boundary layers)
 58 |     int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
 59 |     temperature1->data.resize(newSize, 0.0);
 60 |     temperature2->data.resize(newSize, 0.0);
 61 | 
 62 |     // Array from file
 63 |     std::vector<double> file_data(nx * ny, 0.0);
 64 | 
 65 |     // Read the actual data
 66 |     for (int i = 0; i < nx; i++) {
 67 |         for (int j = 0; j < ny; j++) {
 68 |             ind = i * ny + j;
 69 |             count = fscanf(fp, "%lf", &file_data[ind]);
 70 |         }
 71 |     }
 72 | 
 73 |     nx_local = temperature1->nx;
 74 |     ny_local = temperature1->ny;
 75 | 
 76 |     // Copy to the inner part of the full temperature field
 77 |     auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
 78 |     auto beginning_of_row = file_data.begin();
 79 |     for (int i = 0; i < nx_local; i++) {
 80 |         auto end_of_row = beginning_of_row + ny_local;
 81 |         std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
 82 |         temperature_data_iterator += ny_local + 2;
 83 |         beginning_of_row = end_of_row;
 84 |     }
 85 | 
 86 |     // Set the boundary values
 87 |     for (int i = 1; i < nx_local + 1; i++) {
 88 |         temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
 89 |         temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
 90 |     }
 91 |     for (int j = 0; j < ny + 2; j++) {
 92 |         temperature1->data[j] = temperature1->data[ny_local + j];
 93 |         temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
 94 |             temperature1->data[nx_local * (ny_local + 2) + j];
 95 |     }
 96 | 
 97 |     copy_field(temperature1, temperature2);
 98 | 
 99 |     fclose(fp);
100 | }
101 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/main.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main routine for heat equation solver in 2D.
 4 | 
 5 | #include <stdio.h>
 6 | #include <omp.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | int main(int argc, char **argv)
11 | {
12 |     // Image output interval
13 |     int image_interval = 1500;
14 | 
15 |     // Number of time steps
16 |     int nsteps;
17 |     // Current and previous temperature fields
18 |     field current, previous;
19 |     initialize(argc, argv, &current, &previous, &nsteps);
20 | 
21 |     // Output the initial field 
22 |     write_field(&current, 0);
23 | 
24 |     double average_temp = average(&current);
25 |     printf("Average temperature at start: %f\n", average_temp);
26 | 
27 |     // Diffusion constant
28 |     double a = 0.5;
29 | 
30 |     // Compute the largest stable time step
31 |     double dx2 = current.dx * current.dx;
32 |     double dy2 = current.dy * current.dy;
33 |     // Time step
34 |     double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 | 
36 |     // Get the start time stamp
37 |     double start_clock = omp_get_wtime();
38 | 
39 |     // Copy fields to device 
40 |     enter_data(&current, &previous);
41 | 
42 |     // Time evolution
43 |     for (int iter = 1; iter <= nsteps; iter++) {
44 |         evolve(&current, &previous, a, dt);
45 |         if (iter % image_interval == 0) {
46 | 	  // update data on host for output
47 |             update_host(&current);
48 |             write_field(&current, iter);
49 |         }
50 |         // Swap current field so that it will be used
51 |         // as previous for next iteration step
52 |         swap_fields(&current, &previous);
53 |     }
54 |   
55 |     // copy data back to host
56 |     exit_data(&current, &previous);
57 | 
58 |     double stop_clock = omp_get_wtime();
59 | 
60 |     // Average temperature for reference
61 |     average_temp = average(&previous);
62 | 
63 |     // Determine the CPU time used for all the iterations
64 |     printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
65 |     printf("Average temperature: %f\n", average_temp);
66 |     if (argc == 1) {
67 |         printf("Reference value with default arguments: 59.281239\n");
68 |     }
69 | 
70 |     // Output the final field
71 |     write_field(&previous, nsteps);
72 | 
73 |     return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Utility functions for heat equation solver
 4 | 
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | 
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 |     assert(temperature1->nx == temperature2->nx);
15 |     assert(temperature1->ny == temperature2->ny);
16 |     assert(temperature1->data.size() == temperature2->data.size());
17 |     std::copy(temperature1->data.begin(), temperature1->data.end(),
18 |               temperature2->data.begin());
19 | }
20 | 
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 |     std::swap(temperature1->data, temperature2->data);
25 | }
26 | 
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 |     // Include also boundary layers
31 |     int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 |     temperature->data.resize(newSize, 0.0);
33 | }
34 | 
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 |      double average = 0.0;
39 | 
40 |      for (int i = 1; i < temperature->nx + 1; i++) {
41 |        for (int j = 1; j < temperature->ny + 1; j++) {
42 |          int ind = i * (temperature->ny + 2) + j;
43 |          average += temperature->data[ind];
44 |        }
45 |      }
46 | 
47 |      average /= (temperature->nx * temperature->ny);
48 |      return average;
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | Copyright (c) 2021 ENCCS
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 | 
13 | COMMONDIR=../common
14 | 
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 | 
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 | 
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 | 
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 | 
43 | 
44 | all: $(EXE)
45 | 
46 | 
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 | 
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 | 
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 | 
60 | %.o: %.cpp
61 | 	$(CXX) $(CCFLAGS) -c $< -o $@
62 | 
63 | %.o: %.c
64 | 	$(CC) $(CCFLAGS) -c $< -o $@
65 | 
66 | .PHONY: clean
67 | clean:
68 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/core.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main solver routines for heat equation solver
 4 | 
 5 | #include "heat.h"
 6 | 
 7 | // Update the temperature values using five-point stencil
 8 | // Arguments:
 9 | //   curr: current temperature values
10 | //   prev: temperature values from previous time step
11 | //   a: diffusivity
12 | //   dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 |   // Help the compiler avoid being confused by the structs
16 |   double *currdata = curr->data.data();
17 |   double *prevdata = prev->data.data();
18 |   int nx = curr->nx;
19 |   int ny = curr->ny;
20 | 
21 |   // Determine the temperature field at next time step
22 |   // As we have fixed boundary conditions, the outermost gridpoints
23 |   // are not updated.
24 |   double dx2 = prev->dx * prev->dx;
25 |   double dy2 = prev->dy * prev->dy;
26 |   #pragma omp target teams distribute parallel for \
27 |   map(currdata[0:(nx+2)*(ny+2)],prevdata[0:(nx+2)*(ny+2)])
28 |   for (int i = 1; i < nx + 1; i++) {
29 |     for (int j = 1; j < ny + 1; j++) {
30 |       int ind = i * (ny + 2) + j;
31 |       int ip = (i + 1) * (ny + 2) + j;
32 |       int im = (i - 1) * (ny + 2) + j;
33 |       int jp = i * (ny + 2) + j + 1;
34 |       int jm = i * (ny + 2) + j - 1;
35 |       currdata[ind] = prevdata[ind] + a*dt*
36 | 	    ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
37 | 	     (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
38 |     }
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 CSC Training
 2 | # Copyright (c) 2021 ENCCS
 3 | ifeq ($(COMP),)
 4 | COMP=nv
 5 | endif
 6 | 
 7 | HAVE_PNG=0
 8 | ifeq ($(HAVE_PNG),1)
 9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 
11 | endif
12 | 
13 | 
14 | COMMONDIR=../../common
15 | 
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS= 
23 | endif
24 | 
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 | 
34 | 
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 | 
39 | all: $(EXE)
40 | 
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 | 
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 | 
53 | %.o: %.F90
54 | 	$(FC) $(FCFLAGS) -c $< -o $@
55 | 
56 | %.o: %.c
57 | 	$(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
58 | 
59 | .PHONY: clean
60 | clean:
61 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~  ../../common/*.o
62 | 
63 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Main solver routines for heat equation solver
 4 | module core
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Update the temperature values using five-point stencil
10 |   ! Arguments:
11 |   !   curr (type(field)): current temperature values
12 |   !   prev (type(field)): temperature values from previous time step
13 |   !   a (real(dp)): diffusivity
14 |   !   dt (real(dp)): time step
15 |   subroutine evolve(curr, prev, a, dt)
16 | 
17 |     implicit none
18 | 
19 |     type(field),target, intent(inout) :: curr, prev
20 |     real(dp) :: a, dt
21 |     integer :: i, j, nx, ny
22 |     real(dp) :: dx, dy
23 |     real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 | 
25 |     ! Help the compiler avoid being confused
26 |     nx = curr%nx
27 |     ny = curr%ny
28 |     dx = curr%dx
29 |     dy = curr%dy
30 |     currdata => curr%data
31 |     prevdata => prev%data
32 | 
33 |     ! Determine the temperature field at next time step As we have
34 |     ! fixed boundary conditions, the outermost gridpoints are not
35 |     ! updated.
36 | 
37 |     !$omp target teams distribute parallel do
38 |     do j = 1, ny
39 |        do i = 1, nx
40 |           currdata(i, j) = prevdata(i, j) + a * dt * &
41 |                & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
42 |                &   prevdata(i+1, j)) / dx**2 + &
43 |                &  (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
44 |                &   prevdata(i, j+1)) / dy**2)
45 |        end do
46 |     end do
47 |     !$omp end target teams distribute parallel do
48 |   end subroutine evolve
49 | 
50 | end module core
51 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Field metadata for heat equation solver
 4 | module heat
 5 |   use iso_fortran_env, only : REAL64
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: dp = REAL64
 9 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
10 | 
11 |   type :: field
12 |      integer :: nx          ! ldimension of the field
13 |      integer :: ny
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 | contains
20 |   ! Initialize the field type metadata
21 |   ! Arguments:
22 |   !   field0 (type(field)): input field
23 |   !   nx, ny, dx, dy: field dimensions and spatial step size
24 |   subroutine set_field_dimensions(field0, nx, ny)
25 |     implicit none
26 | 
27 |     type(field), intent(out) :: field0
28 |     integer, intent(in) :: nx, ny
29 | 
30 |     field0%dx = DX
31 |     field0%dy = DY
32 |     field0%nx = nx
33 |     field0%ny = ny
34 | 
35 |   end subroutine set_field_dimensions
36 | 
37 | end module heat
38 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/io.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! I/O routines for heat equation solver
 4 | module io
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   ! Output routine, saves the temperature distribution as a png image
10 |   ! Arguments:
11 |   !   curr (type(field)): variable with the temperature data
12 |   !   iter (integer): index of the time step
13 |   subroutine write_field(curr, iter)
14 | 
15 |     use pngwriter
16 |     implicit none
17 |     type(field), intent(in) :: curr
18 |     integer, intent(in) :: iter
19 | 
20 |     character(len=85) :: filename
21 | 
22 |     integer :: stat
23 |     real(dp), dimension(:,:), allocatable, target :: full_data
24 | 
25 |     allocate(full_data(curr%nx, curr%ny))
26 |     ! Copy rand #0 data to the global array
27 |     full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 |     write(filename,'(A5,I4.4,A4,A)')  'heat_', iter, '.png'
29 |     stat = save_png(full_data, curr%nx, curr%ny, filename)
30 |     deallocate(full_data)
31 | 
32 |   end subroutine write_field
33 | 
34 | 
35 |   ! Reads the temperature distribution from an input file
36 |   ! Arguments:
37 |   !   field0 (type(field)): field variable that will store the
38 |   !                         read data
39 |   !   filename (char): name of the input file
40 |   ! Note that this version assumes the input data to be in C memory layout
41 |   subroutine read_field(field0, filename)
42 | 
43 |     implicit none
44 |     type(field), intent(out) :: field0
45 |     character(len=85), intent(in) :: filename
46 | 
47 |     integer :: nx, ny, i
48 |     character(len=2) :: dummy
49 | 
50 |     real(dp), dimension(:,:), allocatable :: full_data
51 | 
52 |     open(10, file=filename)
53 |     ! Read the header
54 |     read(10, *) dummy, nx, ny
55 | 
56 |     call set_field_dimensions(field0, nx, ny)
57 | 
58 |     ! The arrays for temperature field contain also a halo region
59 |     allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
60 | 
61 |     allocate(full_data(nx, ny))
62 |     ! Read the data
63 |     do i = 1, nx
64 |        read(10, *) full_data(i, 1:ny)
65 |     end do
66 | 
67 |     ! Copy to full array containing also boundaries
68 |     field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
69 | 
70 |     ! Set the boundary values
71 |     field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
72 |     field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
73 |     field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
74 |     field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
75 | 
76 |     close(10)
77 |     deallocate(full_data)
78 | 
79 |   end subroutine read_field
80 | 
81 | end module io
82 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Heat equation solver in 2D.
 4 | 
 5 | program heat_solve
 6 |   use heat
 7 |   use core
 8 |   use io
 9 |   use setup
10 |   use utilities
11 |   use omp_lib
12 | 
13 |   implicit none
14 | 
15 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
16 |   type(field) :: current, previous    ! Current and previus temperature fields
17 | 
18 |   real(dp) :: dt     ! Time step
19 |   integer :: nsteps       ! Number of time steps
20 |   integer, parameter :: image_interval = 1500 ! Image output interval
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call initialize(current, previous, nsteps)
29 | 
30 |   ! Draw the picture of the initial state
31 |   call write_field(current, 0)
32 | 
33 |   average_temp = average(current)
34 |   write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 | 
36 |   ! Largest stable time step
37 |   dt = current%dx**2 * current%dy**2 / &
38 |        & (2.0 * a * (current%dx**2 + current%dy**2))
39 | 
40 |   ! Main iteration loop
41 | 
42 |   start =  omp_get_wtime()
43 | 
44 | 
45 |   do iter = 1, nsteps
46 |      call evolve(current, previous, a, dt)
47 |      if (mod(iter, image_interval) == 0) then
48 |         call write_field(current, iter)
49 |      end if
50 |      call swap_fields(current, previous)
51 |   end do
52 | 
53 |   stop = omp_get_wtime()
54 | 
55 |   ! Average temperature for reference
56 |   average_temp = average(previous)
57 | 
58 |   write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
59 |   write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
60 |   if (command_argument_count() == 0) then
61 |       write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
62 |   end if
63 | 
64 |   call finalize(current, previous)
65 | 
66 | end program heat_solve
67 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! PNG writer for heat equation solver
 4 | module pngwriter
 5 |   use heat
 6 | 
 7 | contains
 8 | 
 9 |   function save_png(data, nx, ny, fname) result(stat)
10 | 
11 |     use, intrinsic :: ISO_C_BINDING
12 |     implicit none
13 | 
14 |     real(dp), dimension(:,:), intent(in) :: data
15 |     integer, intent(in) :: nx, ny
16 |     character(len=*), intent(in) :: fname
17 |     integer :: stat
18 | 
19 |     ! Interface for save_png C-function
20 |     interface
21 |        ! The C-function definition is
22 |        !   int save_png(double *data, const int nx, const int ny,
23 |        !                const char *fname)
24 |        function save_png_c(data, nx, ny, fname, order) &
25 |             & bind(C,name="save_png") result(stat)
26 |          use, intrinsic :: ISO_C_BINDING
27 |          implicit none
28 |          real(kind=C_DOUBLE) :: data(*)
29 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
30 |          character(kind=C_CHAR), intent(IN) :: fname(*)
31 |          character(kind=C_CHAR), value, intent(IN) :: order
32 |          integer(kind=C_INT) :: stat
33 |        end function save_png_c
34 |     end interface
35 | 
36 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 |     if (stat /= 0) then
38 |        write(*,*) 'save_png returned error!'
39 |     end if
40 | 
41 |   end function save_png
42 | 
43 | end module pngwriter
44 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Copyright (c) 2019 CSC Training
 2 | ! Copyright (c) 2021 ENCCS
 3 | ! Utility routines for heat equation solver
 4 | !   NOTE: This file does not need to be edited!
 5 | module utilities
 6 |   use heat
 7 | 
 8 | contains
 9 | 
10 |   ! Swap the data fields of two variables of type field
11 |   ! Arguments:
12 |   !   curr, prev (type(field)): the two variables that are swapped
13 |   subroutine swap_fields(curr, prev)
14 | 
15 |     implicit none
16 | 
17 |     type(field), intent(inout) :: curr, prev
18 |     real(dp), allocatable, dimension(:,:) :: tmp
19 | 
20 |     call move_alloc(curr%data, tmp)
21 |     call move_alloc(prev%data, curr%data)
22 |     call move_alloc(tmp, prev%data)
23 |   end subroutine swap_fields
24 | 
25 |   ! Copy the data from one field to another
26 |   ! Arguments:
27 |   !   from_field (type(field)): variable to copy from
28 |   !   to_field (type(field)): variable to copy to
29 |   subroutine copy_fields(from_field, to_field)
30 | 
31 |     implicit none
32 | 
33 |     type(field), intent(in) :: from_field
34 |     type(field), intent(out) :: to_field
35 | 
36 |     ! Consistency checks
37 |     if (.not.allocated(from_field%data)) then
38 |        write (*,*) "Can not copy from a field without allocated data"
39 |        stop
40 |     end if
41 |     if (.not.allocated(to_field%data)) then
42 |        ! Target is not initialize, allocate memory
43 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 |        write (*,*) "Wrong field data sizes in copy routine"
47 |        print *, shape(from_field%data), shape(to_field%data)
48 |        stop
49 |     end if
50 | 
51 |     to_field%data = from_field%data
52 | 
53 |     to_field%nx = from_field%nx
54 |     to_field%ny = from_field%ny
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 | 
66 |     real(dp) :: local_average
67 |     integer :: rc
68 | 
69 |     average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 |     average = average / (field0%nx * field0%ny)
71 | 
72 |   end function average
73 | 
74 | end module utilities
75 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/heat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | #ifndef __HEAT_H__
 4 | #define __HEAT_H__
 5 | 
 6 | #include <vector>
 7 | 
 8 | // Datatype for temperature field
 9 | struct field {
10 |     // nx and ny are the dimensions of the field. The array data
11 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 |     int nx;
13 |     int ny;
14 |     // Size of the grid cells
15 |     double dx;
16 |     double dy;
17 |     // The temperature values in the 2D grid
18 |     std::vector<double> data;
19 | };
20 | 
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 | 
25 | #if __cplusplus
26 |   extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 | 
31 | void initialize(int argc, char *argv[], field *temperature1,
32 |                 field *temperature2, int *nsteps);
33 | 
34 | void generate_field(field *temperature);
35 | 
36 | double average(field *temperature);
37 | 
38 | void evolve(field *curr, field *prev, double a, double dt);
39 | 
40 | void write_field(field *temperature, int iter);
41 | 
42 | void read_field(field *temperature1, field *temperature2,
43 |                 char *filename);
44 | 
45 | void copy_field(field *temperature1, field *temperature2);
46 | 
47 | void swap_fields(field *temperature1, field *temperature2);
48 | 
49 | void allocate_field(field *temperature);
50 | 
51 | #if __cplusplus
52 |   }
53 | #endif
54 | #endif  // __HEAT_H__
55 | 
56 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/io.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019 CSC Training
  2 | // Copyright (c) 2021 ENCCS
  3 | // I/O related functions for heat equation solver
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | #include <assert.h>
  9 | 
 10 | #include "heat.h"
 11 | #include "pngwriter.h"
 12 | 
 13 | // Output routine that prints out a picture of the temperature
 14 | // distribution.
 15 | void write_field(field *temperature, int iter)
 16 | {
 17 |     char filename[64];
 18 | 
 19 |     // The actual write routine takes only the actual data
 20 |     // (without boundary layers) so we need to copy an array with that.
 21 |     std::vector<double> inner_data(temperature->nx * temperature->ny);
 22 |     auto inner_data_iterator = inner_data.begin();
 23 |     auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
 24 |     for (int i = 0; i < temperature->nx; i++) {
 25 |         auto end_of_row = beginning_of_row + temperature->ny;
 26 |         std::copy(beginning_of_row, end_of_row, inner_data_iterator);
 27 |         inner_data_iterator += temperature->ny;
 28 |         beginning_of_row = end_of_row + 2;
 29 |     }
 30 | 
 31 |     // Write out the data to a png file
 32 |     sprintf(filename, "%s_%04d.png", "heat", iter);
 33 |     save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
 34 | }
 35 | 
 36 | // Read the initial temperature distribution from a file and
 37 | // initialize the temperature fields temperature1 and
 38 | // temperature2 to the same initial state.
 39 | void read_field(field *temperature1, field *temperature2, char *filename)
 40 | {
 41 |     FILE *fp;
 42 |     int nx, ny, ind;
 43 | 
 44 |     int nx_local, ny_local, count;
 45 | 
 46 |     fp = fopen(filename, "r");
 47 |     // Read the header
 48 |     count = fscanf(fp, "# %d %d \n", &nx, &ny);
 49 |     if (count < 2) {
 50 |         fprintf(stderr, "Error while reading the input file!\n");
 51 | 	exit(-1);
 52 |     }
 53 | 
 54 |     set_field_dimensions(temperature1, nx, ny);
 55 |     set_field_dimensions(temperature2, nx, ny);
 56 | 
 57 |     // Allocate arrays (including boundary layers)
 58 |     int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
 59 |     temperature1->data.resize(newSize, 0.0);
 60 |     temperature2->data.resize(newSize, 0.0);
 61 | 
 62 |     // Array from file
 63 |     std::vector<double> file_data(nx * ny, 0.0);
 64 | 
 65 |     // Read the actual data
 66 |     for (int i = 0; i < nx; i++) {
 67 |         for (int j = 0; j < ny; j++) {
 68 |             ind = i * ny + j;
 69 |             count = fscanf(fp, "%lf", &file_data[ind]);
 70 |         }
 71 |     }
 72 | 
 73 |     nx_local = temperature1->nx;
 74 |     ny_local = temperature1->ny;
 75 | 
 76 |     // Copy to the inner part of the full temperature field
 77 |     auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
 78 |     auto beginning_of_row = file_data.begin();
 79 |     for (int i = 0; i < nx_local; i++) {
 80 |         auto end_of_row = beginning_of_row + ny_local;
 81 |         std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
 82 |         temperature_data_iterator += ny_local + 2;
 83 |         beginning_of_row = end_of_row;
 84 |     }
 85 | 
 86 |     // Set the boundary values
 87 |     for (int i = 1; i < nx_local + 1; i++) {
 88 |         temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
 89 |         temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
 90 |     }
 91 |     for (int j = 0; j < ny + 2; j++) {
 92 |         temperature1->data[j] = temperature1->data[ny_local + j];
 93 |         temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
 94 |             temperature1->data[nx_local * (ny_local + 2) + j];
 95 |     }
 96 | 
 97 |     copy_field(temperature1, temperature2);
 98 | 
 99 |     fclose(fp);
100 | }
101 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/main.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Main routine for heat equation solver in 2D.
 4 | 
 5 | #include <stdio.h>
 6 | #include <omp.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | int main(int argc, char **argv)
11 | {
12 |     // Image output interval
13 |     int image_interval = 1500;
14 | 
15 |     // Number of time steps
16 |     int nsteps;
17 |     // Current and previous temperature fields
18 |     field current, previous;
19 |     initialize(argc, argv, &current, &previous, &nsteps);
20 | 
21 |     // Output the initial field 
22 |     write_field(&current, 0);
23 | 
24 |     double average_temp = average(&current);
25 |     printf("Average temperature at start: %f\n", average_temp);
26 | 
27 |     // Diffusion constant
28 |     double a = 0.5;
29 | 
30 |     // Compute the largest stable time step
31 |     double dx2 = current.dx * current.dx;
32 |     double dy2 = current.dy * current.dy;
33 |     // Time step
34 |     double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 | 
36 |     // Get the start time stamp
37 |     double start_clock = omp_get_wtime();
38 | 
39 |     // Time evolution
40 |     for (int iter = 1; iter <= nsteps; iter++) {
41 |         evolve(&current, &previous, a, dt);
42 |         if (iter % image_interval == 0) {
43 |             write_field(&current, iter);
44 |         }
45 |         // Swap current field so that it will be used
46 |         // as previous for next iteration step
47 |         swap_fields(&current, &previous);
48 |     }
49 | 
50 |     double stop_clock = omp_get_wtime();
51 |     // Average temperature for reference
52 |     average_temp = average(&previous);
53 | 
54 |     // Determine the CPU time used for all the iterations
55 |     printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
56 |     printf("Average temperature: %f\n", average_temp);
57 |     if (argc == 1) {
58 |         printf("Reference value with default arguments: 59.281239\n");
59 |     }
60 | 
61 |     // Output the final field
62 |     write_field(&previous, nsteps);
63 | 
64 |     return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/content/exercise/solution/offloading/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 CSC Training
 2 | // Copyright (c) 2021 ENCCS
 3 | // Utility functions for heat equation solver
 4 | 
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "heat.h"
 9 | 
10 | 
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 |     assert(temperature1->nx == temperature2->nx);
15 |     assert(temperature1->ny == temperature2->ny);
16 |     assert(temperature1->data.size() == temperature2->data.size());
17 |     std::copy(temperature1->data.begin(), temperature1->data.end(),
18 |               temperature2->data.begin());
19 | }
20 | 
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 |     std::swap(temperature1->data, temperature2->data);
25 | }
26 | 
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 |     // Include also boundary layers
31 |     int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 |     temperature->data.resize(newSize, 0.0);
33 | }
34 | 
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 |      double average = 0.0;
39 | 
40 |      for (int i = 1; i < temperature->nx + 1; i++) {
41 |        for (int j = 1; j < temperature->ny + 1; j++) {
42 |          int ind = i * (temperature->ny + 2) + j;
43 |          average += temperature->data[ind];
44 |        }
45 |      }
46 | 
47 |      average /= (temperature->nx * temperature->ny);
48 |      return average;
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/content/guide.rst:
--------------------------------------------------------------------------------
 1 | Instructor's guide
 2 | ------------------
 3 | 
 4 | 
 5 | 
 6 | Preparing to Teach
 7 | ------------------
 8 | 
 9 |     - Making sure that all the compilers are installed correctly on the system 
10 |     - Run all the examples beforehand at least once
11 |     - Be aware which parts could be skipped in case needed
12 |     - Give enough time for the exercises
13 |     - Do not open too many tabs and switch among them
14 |     - Emphasize the differences between C/C++ and Fortran for certrain directives
15 |     - Briefly introduce the exercises before and make a short summary afterwards on the most important take-home messages
16 | 


--------------------------------------------------------------------------------
/content/img/Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png


--------------------------------------------------------------------------------
/content/img/C2050Timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/C2050Timeline.png


--------------------------------------------------------------------------------
/content/img/ENCCS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ENCCS.jpg


--------------------------------------------------------------------------------
/content/img/ENCCS_CSC_logos.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ENCCS_CSC_logos.jpg


--------------------------------------------------------------------------------
/content/img/HardwareReview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/HardwareReview.png


--------------------------------------------------------------------------------
/content/img/Loom.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/Loom.jpeg


--------------------------------------------------------------------------------
/content/img/ThreadExecution.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ThreadExecution.jpeg


--------------------------------------------------------------------------------
/content/img/coalesced.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/coalesced.png


--------------------------------------------------------------------------------
/content/img/comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/comparison.png


--------------------------------------------------------------------------------
/content/img/compp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/compp.png


--------------------------------------------------------------------------------
/content/img/distributed_vs_shared.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/distributed_vs_shared.png


--------------------------------------------------------------------------------
/content/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/favicon.ico


--------------------------------------------------------------------------------
/content/img/gpu_vs_cpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/gpu_vs_cpu.png


--------------------------------------------------------------------------------
/content/img/heat_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heat_0000.png


--------------------------------------------------------------------------------
/content/img/heat_montage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heat_montage.png


--------------------------------------------------------------------------------
/content/img/heteprogra.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heteprogra.jpeg


--------------------------------------------------------------------------------
/content/img/memsch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/memsch.png


--------------------------------------------------------------------------------
/content/img/microprocessor-trend-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/microprocessor-trend-data.png


--------------------------------------------------------------------------------
/content/img/nvidia_block_diagram.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/nvidia_block_diagram.jpeg


--------------------------------------------------------------------------------
/content/img/omp-parallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/omp-parallel.png


--------------------------------------------------------------------------------
/content/img/processes-threads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/processes-threads.png


--------------------------------------------------------------------------------
/content/img/shared_mem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/shared_mem.png


--------------------------------------------------------------------------------
/content/img/threads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/threads.png


--------------------------------------------------------------------------------
/content/img/volta-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/volta-architecture.png


--------------------------------------------------------------------------------
/content/img/volta-sm-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/volta-sm-architecture.png


--------------------------------------------------------------------------------
/content/interoperability.rst:
--------------------------------------------------------------------------------
 1 | Working alongside GPU libraries
 2 | ===============================
 3 | 
 4 | .. questions::
 5 | 
 6 |    - My code needs to use a library, how should they work together?
 7 |    - How to use OpenMP mapped variables? 
 8 |    - How to use CUDA or HIP device variables into OpenMP?
 9 | 
10 | .. objectives::
11 | 
12 |    - Understand TODO
13 |    - Understand TODO
14 |    - Understand 
15 |    - Understand 
16 | 
17 | .. prereq::
18 | 
19 |    1. TODO
20 |    2. TODO
21 | 
22 | 
23 | First heading
24 | -------------
25 | 
26 | OpenMP interoperability with CUDA C/C++ and CUDA Fortran.
27 | 
28 |  - You can call kernels written in CUDA C/C++ or CUDA Fortran in your OpenMP programs from the host.
29 |  - You can use the OpenMP **USE_DEVICE_PTR** clause to pass OpenMP mapped variables to CUDA kernels that are launched from the host.
30 |  - You can use the OpenMP **IS_DEVICE_PTR** clause to access CUDA device attribute variables or to pass device addresses directly to target regions.
31 | 
32 | Second heading
33 | --------------
34 | 
35 | Some more text, with a figure
36 | 
37 | .. figure:: img/stencil.svg
38 |    :align: center
39 | 
40 |    This is a sample image
41 | 
42 | .. exercise::
43 | 
44 |    TODO get the students to think about the content and answer a Zoom quiz
45 | 
46 | .. solution::
47 | 
48 |    Hide the answer and reasoning in here
49 | 
50 | Some source code
51 | ----------------
52 | 
53 | Sometimes we need to look at code, which can be in the webpage and optionally
54 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
55 | 
56 | .. typealong:: The field data structure
57 | 
58 |    .. tabs::
59 | 
60 |       .. tab:: C++
61 | 
62 |          .. literalinclude:: exercise/serial/heat.h
63 |                         :language: cpp
64 |                         :lines: 9-19
65 |                                 
66 |       .. tab:: Fortran
67 | 
68 |          .. literalinclude:: exercise/serial/fortran/heat_mod.F90
69 |                         :language: fortran
70 |                         :lines: 11-17
71 | 
72 | Building the code
73 | -----------------
74 | 
75 | If there's terminal output to discuss, show something like::
76 | 
77 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
78 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
79 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
80 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
81 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
82 |   nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial  -lpng
83 | 
84 | 
85 | Running the code
86 | ----------------
87 | 
88 | To show a sample command line, use this approach
89 | 
90 | .. code-block:: bash
91 | 
92 |    ./heat_serial 800 800 1000
93 | 
94 | 
95 | .. keypoints::
96 | 
97 |    - TODO summarize the learning outcome
98 |    - TODO
99 | 


--------------------------------------------------------------------------------
/content/multi-gpu.rst:
--------------------------------------------------------------------------------
 1 | Multiple GPUs
 2 | =============
 3 | 
 4 | .. questions::
 5 | 
 6 |    - How do I run on more than one GPU?
 7 |    - TODO
 8 | 
 9 | .. objectives::
10 | 
11 |    - Understand TODO
12 |    - Understand TODO
13 |    - Understand 
14 |    - Understand 
15 | 
16 | .. prereq::
17 | 
18 |    1. TODO
19 |    2. TODO
20 | 
21 | 
22 | First heading
23 | -------------
24 | 
25 | Some text
26 | 
27 | Second heading
28 | --------------
29 | 
30 | Some more text, with a figure
31 | 
32 | .. figure:: img/stencil.svg
33 |    :align: center
34 | 
35 |    This is a sample image
36 | 
37 | .. exercise::
38 | 
39 |    TODO get the students to think about the content and answer a Zoom quiz
40 | 
41 | .. solution::
42 | 
43 |    Hide the answer and reasoning in here
44 | 
45 | Some source code
46 | ----------------
47 | 
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 | 
51 | .. typealong:: The field data structure
52 | 
53 |    .. tabs::
54 | 
55 |       .. tab:: C++
56 | 
57 |          .. literalinclude:: exercise/serial/heat.h
58 |                         :language: cpp
59 |                         :lines: 9-19
60 |                                 
61 |       .. tab:: Fortran
62 | 
63 |          .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 |                         :language: fortran
65 |                         :lines: 11-17
66 | 
67 | Building the code
68 | -----------------
69 | 
70 | If there's terminal output to discuss, show something like::
71 | 
72 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 |   nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial  -lpng
78 | 
79 | 
80 | Running the code
81 | ----------------
82 | 
83 | To show a sample command line, use this approach
84 | 
85 | .. code-block:: bash
86 | 
87 |    ./heat_serial 800 800 1000
88 | 
89 | 
90 | .. keypoints::
91 | 
92 |    - TODO summarize the learning outcome
93 |    - TODO
94 | 


--------------------------------------------------------------------------------
/content/optimization.rst:
--------------------------------------------------------------------------------
 1 | Optimizing OpenMP offloaded code
 2 | ================================
 3 | 
 4 | .. questions::
 5 | 
 6 |    - What tools are available to run faster?
 7 |    - TODO
 8 | 
 9 | .. objectives::
10 | 
11 |    - Understand TODO
12 |    - Understand TODO
13 |    - Understand 
14 |    - Understand 
15 | 
16 | .. prereq::
17 | 
18 |    1. TODO
19 |    2. TODO
20 | 
21 | 
22 | First heading
23 | -------------
24 | 
25 | Some text
26 | 
27 | Second heading
28 | --------------
29 | 
30 | Some more text, with a figure
31 | 
32 | .. figure:: img/stencil.svg
33 |    :align: center
34 | 
35 |    This is a sample image
36 | 
37 | .. exercise::
38 | 
39 |    TODO get the students to think about the content and answer a Zoom quiz
40 | 
41 | .. solution::
42 | 
43 |    Hide the answer and reasoning in here
44 | 
45 | Some source code
46 | ----------------
47 | 
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 | 
51 | .. typealong:: The field data structure
52 | 
53 |    .. tabs::
54 | 
55 |       .. tab:: C++
56 | 
57 |          .. literalinclude:: exercise/serial/heat.h
58 |                         :language: cpp
59 |                         :lines: 9-19
60 |                                 
61 |       .. tab:: Fortran
62 | 
63 |          .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 |                         :language: fortran
65 |                         :lines: 11-17
66 | 
67 | Building the code
68 | -----------------
69 | 
70 | If there's terminal output to discuss, show something like::
71 | 
72 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 |   nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial  -lpng
78 | 
79 | 
80 | Running the code
81 | ----------------
82 | 
83 | To show a sample command line, use this approach
84 | 
85 | .. code-block:: bash
86 | 
87 |    ./heat_serial 800 800 1000
88 | 
89 | 
90 | .. keypoints::
91 | 
92 |    - TODO summarize the learning outcome
93 |    - TODO
94 | 


--------------------------------------------------------------------------------
/content/porting.rst:
--------------------------------------------------------------------------------
 1 | Porting code to OpenMP offloading
 2 | =================================
 3 | 
 4 | .. questions::
 5 | 
 6 |    - When and why should I use OpenMP offloading in my code?
 7 |    - TODO
 8 | 
 9 | .. objectives::
10 | 
11 |    - Understand TODO
12 |    - Understand TODO
13 |    - Understand 
14 |    - Understand 
15 | 
16 | .. prereq::
17 | 
18 |    1. TODO
19 |    2. TODO
20 | 
21 | 
22 | First heading
23 | -------------
24 | 
25 | Some text
26 | 
27 | Second heading
28 | --------------
29 | 
30 | Some more text, with a figure
31 | 
32 | .. figure:: img/stencil.svg
33 |    :align: center
34 | 
35 |    This is a sample image
36 | 
37 | .. exercise::
38 | 
39 |    TODO get the students to think about the content and answer a Zoom quiz
40 | 
41 | .. solution::
42 | 
43 |    Hide the answer and reasoning in here
44 | 
45 | Some source code
46 | ----------------
47 | 
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 | 
51 | .. typealong:: The field data structure
52 | 
53 |    .. tabs::
54 | 
55 |       .. tab:: C++
56 | 
57 |          .. literalinclude:: exercise/serial/heat.h
58 |                         :language: cpp
59 |                         :lines: 9-19
60 |                                 
61 |       .. tab:: Fortran
62 | 
63 |          .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 |                         :language: fortran
65 |                         :lines: 11-17
66 | 
67 | Building the code
68 | -----------------
69 | 
70 | If there's terminal output to discuss, show something like::
71 | 
72 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 |   nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial  -lpng
78 | 
79 | 
80 | Running the code
81 | ----------------
82 | 
83 | To show a sample command line, use this approach
84 | 
85 | .. code-block:: bash
86 | 
87 |    ./heat_serial 800 800 1000
88 | 
89 | 
90 | .. keypoints::
91 | 
92 |    - TODO summarize the learning outcome
93 |    - TODO
94 | 


--------------------------------------------------------------------------------
/content/profiling.rst:
--------------------------------------------------------------------------------
 1 | Profiling code for GPUs
 2 | =======================
 3 | 
 4 | .. questions::
 5 | 
 6 |    - What tools can help me reason about the performance of my code?
 7 |    - TODO
 8 | 
 9 | .. objectives::
10 | 
11 |    - Understand TODO
12 |    - Understand TODO
13 |    - Understand 
14 |    - Understand 
15 | 
16 | .. prereq::
17 | 
18 |    1. TODO
19 |    2. TODO
20 | 
21 | 
22 | First heading
23 | -------------
24 | 
25 | Some text
26 | 
27 | Second heading
28 | --------------
29 | 
30 | Some more text, with a figure
31 | 
32 | .. figure:: img/stencil.svg
33 |    :align: center
34 | 
35 |    This is a sample image
36 | 
37 | .. exercise::
38 | 
39 |    TODO get the students to think about the content and answer a Zoom quiz
40 | 
41 | .. solution::
42 | 
43 |    Hide the answer and reasoning in here
44 | 
45 | Some source code
46 | ----------------
47 | 
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 | 
51 | .. typealong:: The field data structure
52 | 
53 |    .. tabs::
54 | 
55 |       .. tab:: C++
56 | 
57 |          .. literalinclude:: exercise/serial/heat.h
58 |                         :language: cpp
59 |                         :lines: 9-19
60 |                                 
61 |       .. tab:: Fortran
62 | 
63 |          .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 |                         :language: fortran
65 |                         :lines: 11-17
66 | 
67 | Building the code
68 | -----------------
69 | 
70 | If there's terminal output to discuss, show something like::
71 | 
72 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 |   nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 |   nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial  -lpng
78 | 
79 | 
80 | Running the code
81 | ----------------
82 | 
83 | To show a sample command line, use this approach
84 | 
85 | .. code-block:: bash
86 | 
87 |    ./heat_serial 800 800 1000
88 | 
89 | 
90 | .. keypoints::
91 | 
92 |    - TODO summarize the learning outcome
93 |    - TODO
94 | 


--------------------------------------------------------------------------------
/content/quick-reference.rst:
--------------------------------------------------------------------------------
1 | Quick Reference
2 | ---------------
3 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/composite.c:
--------------------------------------------------------------------------------
1 | #pragma omp target teams distribute parallel for simd [clauses] 
2 | 	for-loops
3 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/composite.f90:
--------------------------------------------------------------------------------
1 | !$omp target teams distribute parallel do simd [clauses]  
2 |         do-loops
3 | !$omp end target teams distribute parallel do simd
4 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/distribute.c:
--------------------------------------------------------------------------------
1 | #pragma omp distribute [clauses]  
2 | 	for-loops
3 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/distribute.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | private(list)
3 | firstprivate(list)
4 | lastprivate(list)
5 | collapse(n)
6 | dist_schedule(kind[, chunk_size])
7 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/distribute.f90:
--------------------------------------------------------------------------------
1 | !$omp distribute [clauses]  
2 |         do-loops
3 | !$omp end distribute
4 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target.c:
--------------------------------------------------------------------------------
1 | #pragma omp target [clauses]
2 | 	structured-block
3 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target.clause:
--------------------------------------------------------------------------------
 1 | clause:
 2 | if([ target:] scalar-expression)
 3 | device(integer-expression) 
 4 | private(list)
 5 | firstprivate(list)
 6 | map([map-type:] list)
 7 | is_device_ptr(list)
 8 | defaultmap(tofrom:scalar) 
 9 | nowait
10 | depend(dependence-type : list)
11 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target.f90:
--------------------------------------------------------------------------------
1 | !$omp target [clauses]
2 |         structured-block
3 | !$omp end target
4 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_data.c:
--------------------------------------------------------------------------------
1 | #pragma omp target data clause [clauses] 
2 | 	structured-block
3 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_data.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | if( [target data:]scalar-logical-expression)
3 | device(scalar-integer-expression) 
4 | map([map-type :] list)
5 | use_device_ptr(list)
6 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_data.f90:
--------------------------------------------------------------------------------
1 | !$omp target data clause [clauses]
2 |         structured-block
3 | !$omp end target data
4 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_enter_data.c:
--------------------------------------------------------------------------------
1 | #pragma omp target enter data [clauses]
2 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_enter_data.f90:
--------------------------------------------------------------------------------
1 | !$omp target enter data [clauses] 
2 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_enter_exit_data.clause:
--------------------------------------------------------------------------------
1 | clause: 
2 | if(scalar-logical-expression)
3 | device(scalar-integer-expression) 
4 | map( [map-type:] list)
5 | depend(dependence-type:list)
6 | nowait
7 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_exit_data.c:
--------------------------------------------------------------------------------
1 | #pragma omp target exit data [clauses]
2 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_exit_data.f90:
--------------------------------------------------------------------------------
1 | !$omp target exit data [clauses]
2 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/teams.c:
--------------------------------------------------------------------------------
1 | #pragma omp teams [clauses] 
2 | 	structured-block
3 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/teams.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | num_teams(integer-expression)
3 | thread_limit(integer-expression)
4 | default(shared | none)
5 | private(list)
6 | firstprivate(list)
7 | shared(list)
8 | reduction(reduction-identifier : list) 
9 | 


--------------------------------------------------------------------------------
/content/syntax/v4.5.0/teams.f90:
--------------------------------------------------------------------------------
1 | !$omp teams [clauses]  
2 |         structured-block
3 | !$omp end teams
4 | 


--------------------------------------------------------------------------------
/content/volta-sm-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/volta-sm-architecture.png


--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Sphinx
2 | sphinx_rtd_theme
3 | sphinx_rtd_theme_ext_color_contrast
4 | myst_nb
5 | sphinx-lesson
6 | 


--------------------------------------------------------------------------------