├── .github
└── workflows
│ └── sphinx.yml
├── .gitignore
├── LICENSE
├── LICENSE.code
├── Makefile
├── README.md
├── content
├── _static
│ └── overrides.css
├── conf.py
├── data.rst
├── diagrams
│ ├── stencil-fixed-boundaries
│ └── stencil.drawio
├── examples
│ └── v4.5.0
│ │ ├── Example_target.1.c
│ │ ├── Example_target.1.f90
│ │ ├── Example_target_data.2.c
│ │ ├── Example_target_data.2.f90
│ │ ├── Example_target_unstructured_data.1.c
│ │ ├── Example_target_unstructured_data.1.f90
│ │ ├── Example_teams.6.c
│ │ └── Example_teams.6.f90
├── exercise
│ ├── common
│ │ ├── pngwriter.c
│ │ └── pngwriter.h
│ ├── data_mapping
│ │ ├── LICENSE-MIT
│ │ ├── Makefile
│ │ ├── core.cpp
│ │ ├── fortran
│ │ │ ├── Makefile
│ │ │ ├── core.F90
│ │ │ ├── heat_mod.F90
│ │ │ ├── io.F90
│ │ │ ├── main.F90
│ │ │ ├── pngwriter_mod.F90
│ │ │ ├── setup.F90
│ │ │ └── utilities.F90
│ │ ├── heat.h
│ │ ├── heat_serial
│ │ ├── io.cpp
│ │ ├── main.cpp
│ │ ├── setup.cpp
│ │ └── utilities.cpp
│ ├── ex00
│ │ ├── LICENSE-MIT
│ │ ├── ex00.F90
│ │ └── ex00.c
│ ├── ex01
│ │ ├── LICENSE-MIT
│ │ ├── ex01.F90
│ │ ├── ex01.c
│ │ └── solution
│ │ │ ├── ex01.F90
│ │ │ └── ex01.c
│ ├── ex02
│ │ ├── LICENSE-MIT
│ │ ├── ex02.F90
│ │ ├── ex02.c
│ │ └── solution
│ │ │ ├── ex02.F90
│ │ │ └── ex02.c
│ ├── ex03
│ │ ├── LICENSE-MIT
│ │ ├── ex03.F90
│ │ ├── ex03.c
│ │ └── solution
│ │ │ ├── ex03.F90
│ │ │ └── ex03.c
│ ├── ex04
│ │ ├── LICENSE-MIT
│ │ ├── ex04.F90
│ │ ├── ex04.c
│ │ └── solution
│ │ │ ├── ex04.F90
│ │ │ └── ex04.c
│ ├── ex05
│ │ ├── LICENSE-MIT
│ │ ├── ex05.F90
│ │ ├── ex05.c
│ │ └── solution
│ │ │ ├── ex05.F90
│ │ │ └── ex05.c
│ ├── ex06
│ │ ├── ex06.F90
│ │ └── ex06.c
│ ├── offloading
│ │ ├── LICENSE-MIT
│ │ ├── Makefile
│ │ ├── core.cpp
│ │ ├── fortran
│ │ │ ├── Makefile
│ │ │ ├── core.F90
│ │ │ ├── heat_mod.F90
│ │ │ ├── io.F90
│ │ │ ├── main.F90
│ │ │ ├── pngwriter_mod.F90
│ │ │ ├── setup.F90
│ │ │ └── utilities.F90
│ │ ├── heat.h
│ │ ├── io.cpp
│ │ ├── main.cpp
│ │ ├── setup.cpp
│ │ └── utilities.cpp
│ ├── serial
│ │ ├── LICENSE-MIT
│ │ ├── Makefile
│ │ ├── core.cpp
│ │ ├── fortran
│ │ │ ├── Makefile
│ │ │ ├── core.F90
│ │ │ ├── heat_mod.F90
│ │ │ ├── io.F90
│ │ │ ├── main.F90
│ │ │ ├── pngwriter_mod.F90
│ │ │ ├── setup.F90
│ │ │ └── utilities.F90
│ │ ├── heat.h
│ │ ├── heat_0000.png
│ │ ├── heat_0010.png
│ │ ├── io.cpp
│ │ ├── main.cpp
│ │ ├── setup.cpp
│ │ └── utilities.cpp
│ └── solution
│ │ ├── common
│ │ ├── pngwriter.c
│ │ └── pngwriter.h
│ │ ├── data_mapping
│ │ ├── LICENSE-MIT
│ │ ├── Makefile
│ │ ├── core.cpp
│ │ ├── fortran
│ │ │ ├── Makefile
│ │ │ ├── core.F90
│ │ │ ├── heat_mod.F90
│ │ │ ├── io.F90
│ │ │ ├── main.F90
│ │ │ ├── pngwriter_mod.F90
│ │ │ ├── setup.F90
│ │ │ └── utilities.F90
│ │ ├── heat.h
│ │ ├── io.cpp
│ │ ├── main.cpp
│ │ ├── setup.cpp
│ │ └── utilities.cpp
│ │ └── offloading
│ │ ├── LICENSE-MIT
│ │ ├── Makefile
│ │ ├── core.cpp
│ │ ├── fortran
│ │ ├── Makefile
│ │ ├── core.F90
│ │ ├── heat_mod.F90
│ │ ├── io.F90
│ │ ├── main.F90
│ │ ├── pngwriter_mod.F90
│ │ ├── setup.F90
│ │ └── utilities.F90
│ │ ├── heat.h
│ │ ├── io.cpp
│ │ ├── main.cpp
│ │ ├── setup.cpp
│ │ └── utilities.cpp
├── gpu-architecture.rst
├── guide.rst
├── img
│ ├── Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png
│ ├── C2050Timeline.png
│ ├── ENCCS.jpg
│ ├── ENCCS_CSC_logos.jpg
│ ├── HardwareReview.png
│ ├── Loom.jpeg
│ ├── ThreadExecution.jpeg
│ ├── coalesced.png
│ ├── comparison.png
│ ├── compp.png
│ ├── distributed_vs_shared.png
│ ├── favicon.ico
│ ├── gpu_vs_cpu.png
│ ├── heat_0000.png
│ ├── heat_montage.png
│ ├── heteprogra.jpeg
│ ├── memsch.png
│ ├── microprocessor-trend-data.png
│ ├── nvidia_block_diagram.jpeg
│ ├── omp-parallel.png
│ ├── processes-threads.png
│ ├── processes-threads.svg
│ ├── shared_mem.png
│ ├── stencil-fixed-boundaries.svg
│ ├── stencil.svg
│ ├── threads.png
│ ├── volta-architecture.png
│ └── volta-sm-architecture.png
├── index.rst
├── interoperability.rst
├── introduction.rst
├── miniapp.rst
├── multi-gpu.rst
├── optimization.rst
├── porting.rst
├── profiling.rst
├── quick-reference.rst
├── syntax
│ └── v4.5.0
│ │ ├── composite.c
│ │ ├── composite.f90
│ │ ├── distribute.c
│ │ ├── distribute.clause
│ │ ├── distribute.f90
│ │ ├── target.c
│ │ ├── target.clause
│ │ ├── target.f90
│ │ ├── target_data.c
│ │ ├── target_data.clause
│ │ ├── target_data.f90
│ │ ├── target_enter_data.c
│ │ ├── target_enter_data.f90
│ │ ├── target_enter_exit_data.clause
│ │ ├── target_exit_data.c
│ │ ├── target_exit_data.f90
│ │ ├── teams.c
│ │ ├── teams.clause
│ │ └── teams.f90
├── target.rst
└── volta-sm-architecture.png
├── make.bat
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | /_build
2 | /venv
3 | .ipynb_checkpoints
4 | venv*
5 | jupyter_execute
6 | /content/.auctex-auto/
7 | /content/__pycache__/
8 | /.ccls-cache/
9 | a.out
10 | *~
11 | \#*
12 | *.o
13 |
--------------------------------------------------------------------------------
/LICENSE.code:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021, Thor Wikfeldt and individual contributors from ENCCS and CSC Training.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = content
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
22 | code-samples-tarball:
23 | tar cfz openmp-gpu-code-samples.tgz --exclude \*.png --exclude heat_serial --exclude \*.o --exclude \*~ -C content code-samples
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenMP for GPU offloading
2 |
3 | OpenMP for GPU offloading
4 |
5 | ## Credit and license
6 |
7 | - https://enccs.github.io/lesson/#credits
8 |
--------------------------------------------------------------------------------
/content/_static/overrides.css:
--------------------------------------------------------------------------------
1 | /*
2 | * colors = ['#0271AE', '#DC2830', '#FFC438', # blue, red, light orange
3 | * '#6E3B87', '#008D5D', '#FA902D', # purple, green, orange
4 | * '#0095B7', '#CB0C7B', '#F7E43C', # cyan, magenta, yellow
5 | * '#88B93B', '#444F95', '#F16232'] # pea green, dark blue, dark orange
6 | *
7 | * To use them in rST, you need to define a command in the epilog, see conf.py
8 | */
9 | .blue {color: #0271AE;}
10 | .red {color: #DC2830;}
11 | .orange {color: #FFC438;}
12 | .purple {color: #633B87;}
13 | .green {color: #008D5D;}
14 | .dkorange {color: #FA902D;}
15 | .cyan {color: #0095B7;}
16 | .magenta {color: #CB0C8B;}
17 | .yellow {color: #F7E43C;}
18 | .peagreen {color: #88B93B;}
19 | .darkblue {color: #444F95;}
20 | .darkorange {color: #F16232;}
21 |
22 | /* override colors in sphinx_lesson.css with the schemes here: https://personal.sron.nl/~pault/#sec:qualitative */
23 |
24 | /* instructor-note */
25 | .rst-content .instructor-note {
26 | background: #DDDDDD;
27 | }
28 | .rst-content .instructor-note > .admonition-title {
29 | background: #BBBBBB;
30 | }
31 | .rst-content .instructor-note > .admonition-title::before {
32 | content: "";
33 | }
34 |
35 | /* callout */
36 | .rst-content .callout {
37 | background: #EEEEBB;
38 | }
39 | .rst-content .callout > .admonition-title {
40 | background: #BBCC33;
41 | }
42 |
43 | /* questions */
44 | .rst-content .questions {
45 | background: rgba(253, 219, 199, 0.3);
46 | }
47 | .rst-content .questions > .admonition-title {
48 | background: rgba(204, 51, 17, 0.5);
49 | }
50 |
51 | /* discussion */
52 | .rst-content .discussion {
53 | background: rgba(231, 212, 232 0.3);
54 | }
55 | .rst-content .discussion > .admonition-title {
56 | background: rgba(194, 165, 207, 0.5);
57 | }
58 |
59 | /* signature */
60 | .rst-content .signature {
61 | background: rgba(217, 240, 211, 0.3);
62 | }
63 | .rst-content .signature > .admonition-title {
64 | background: rgba(172, 211, 158, 0.5);
65 | }
66 | .rst-content .signature > .admonition-title::before {
67 | content: "\01F527";
68 | }
69 |
70 | /* parameters */
71 | .rst-content .parameters {
72 | background: rgba(217, 240, 211, 0.0);
73 | }
74 | .rst-content .parameters > .admonition-title {
75 | background: rgba(172, 211, 158, 0.5);
76 | }
77 | .rst-content .parameters > .admonition-title::before {
78 | content: "\01F4BB";
79 | }
80 |
81 | /* typealong */
82 | .rst-content .typealong {
83 | background: rgba(221, 221, 221, 0.3);
84 | }
85 | .rst-content .typealong > .admonition-title {
86 | background: rgba(187, 187, 187, 1.0);
87 | }
88 | .rst-content .typealong > .admonition-title::before {
89 | content: "\02328";
90 | }
91 |
92 | /* Equation numbers to the right */
93 | .math {
94 | text-align: left;
95 | }
96 | .eqno {
97 | float: right;
98 | }
99 |
--------------------------------------------------------------------------------
/content/diagrams/stencil-fixed-boundaries:
--------------------------------------------------------------------------------
1 | 3Z1Lb9pAFIV/DctIfgHOMk1asmikRKnUKjtjD9iNYZAxgeTX1xSbx4wttQvPmZwV+I4N+MM6c8+d8Xjg3y52kyJapQ8yEfnAc5LdwL8beJ7rj93qZR95ryOO4xwi8yJL6tgp8Jx9iGbHOrrJErG+2LGUMi+z1WUwlsuliMuLWFQUcnu520zml9+6iuZCCzzHUa5Hf2ZJmR6i4dA5xe9FNk+bb3ab81tEzc51YJ1GidyehfyvA/+2kLI8vFvsbkW+p9dwORz3raP1+MMKsSz/5YDwMZXh8PdNtvrx8f0tde4fwvKq/pS3KN/UJ1z/2PK9IVDIzTIR+w9xBv6XbZqV4nkVxfvWbfWnV7G0XOTVllu9nWV5fitzWfw91k8iEc7iKr4uC/kqzlpGcSims6pFP43mN4miFLuzUH1aEyEXoizeq12a1gZxc5GN6u3t6R8L6lB69mc1sai+RubHTz5hrN7UJP+DqsdI1XPAVH1KqgGYakBBdWSZAgwZqcIVYERJFa0AYwaqx2vTFgUIGanCFeCakipaARqz9cmxBpZJgEvhrlSscA1wKeyVhhUuAhT+ygttEwEKg6VixYsAhcPSsMJFgCLBUossvgfG6lEkWBpWtLZ6FAmWijVAa2tD8ZNjHdkmAhQJloYVLgIUCZaKFS8CHAmWY5sIUNSwNaxwEaAsYuNFgKOKHdgmAhQuS8OKFgGfwmWpWOEi4FO4LLXUAhcBn8JlaVjhIkDhslSscBFwHQ6bpU6/CuFcKXyWNqkFz5XSaFnAlcNpqaOveK4cVksd0MJzpfBa2hgBfKCQY86QVnbFc+VwW2olC8+Vw26pxQE8V06/ZZDrTRHduR/3TzdT99GfrF+eHE9w3JjlA2/MaqVKoQEqVZNzsVqpUiiARhWtABT1Fh94Y1YrVYpqi0oVrgAUtRaNKloBKCotAfDGrFaqFHUWlSpcASiqLBpVtAJwFFnU1Mrk0Gs7Vkp3ZXLotR0rpb0yOfTajpXDXwGnY7djpTRYeBGgdFh4EaCwWGqChRcBSo+FFwFKk4UXAQqXpVVaDc4RaB8V4HBZwClY7VgpXJZWE4Bj5XRZ6FILx92ZWt4Kx0rhsrRMwCDWzfDly694eh1N4mD8epU+TaL5lev3nGHNwljErVyn4TAYOkeuGsQW1J1c1cVaDNqBdqxBz6mAGazagrh4rj13Woa4qqPZeK4991pmuGqLN+K59txtGeIaWMe15+qgIa729Vs9lwfNcNXmC+K59lwfNMTVvn6LIn3V5rbgufZcITTE1bp+q3nex+fmqk5yN1jK6sDac4kQY7cs4EpptyzgSmm3LOBKabcs4EpptyzgSmm3LOBKabcs4EpptyzgSmm38FxHFPkr8J7iDqwU6StwaawOrBTZK3Ahtw6sFMkrcNnBDqwUuStwnaEOrBSpK/B5JB1YKTJX4NNzOrByJq7wy3VMMU6gcYVfr81z5Lm44nOsMYXR0rjCk6wxhdNSueKzrDGF1dK44vstCq+lccX3WxRmS5uPge+3GN1Wj1irzULK8qxtUiFIH2Qi9nv8AQ==
--------------------------------------------------------------------------------
/content/diagrams/stencil.drawio:
--------------------------------------------------------------------------------
1 | 3Z1fc9o4FMU/DY+Z8R9szGOSbpOHZiaZ7Mzu9M2xBbg1iDEmJPn0awc7gCSm6Rb5mPNUcm1s9JN60Lm6FgP/ev5yU8TL2Z1MRT7wnPRl4H8ZeJ7rj9zqnzry2kQcx9lGpkWWNrFd4DF7E+2JTXSdpWJ1cGIpZV5my8NgIhcLkZQHsbgo5ObwtInMD++6jKdCCzwmca5H/8nScraNRoGzi9+KbDpr7+y27ZvH7clNYDWLU7nZC/l/DfzrQspy+2r+ci3yml7LZfu+r0eOfnywQizKz7whup/JKPhxmS3/fvv2PHNu76Lywg+2l3mO83XT4ubTlq8tgkKuF6mor+IM/KvNLCvF4zJO6qObqter2Kyc59VfbvVykuX5tcxl8f5efzKZeElSxVdlIX+KvSNp+BQGYXVEb0fTtGdRlOJlL9S060bIuSiL1+qU5mjQMm5Gmd92wmbXZcMmNNvrrTYWN4Nk+nHlHcfqRYPyN7C6dqmmsYgmRqphEomnyWmougpVNwRT9Ripeuix6lNSHYKpDimohj1TAMvfVhiqcAUIKamiFWDEQPVjbPZFASJGqnAFGFNSRStA62HPHOuwZxLgUrgrFStcA1wKe6VhhYsAhb/yor6JAIXBUrHiRYDCYWlY4SJAMcFSkyy+B8bqUUywNKxobfUoJlgq1iFaW1uKZ4417JsIUEywNKxwEaCYYKlY8SLAMcFy+iYCFDlsDStcBCiT2HgR4MhiD/smAhQuS8OKFgGfwmWpWOEi4FO4LDXVAhcBn8JlaVjhIkDhslSseBGw7LKO1F8u5EKchmig+lY4UctOoKOKVhUrOoPtW3YC3WANVYMFH62WncAnsE7konxs7mYoKv/joevBv7p0WxBcLGVWtbDmLBZJlmvQq8aWh2SNCrqPuwnFeTZdVH8mFUJRxa9qdFkS55fNgXmWpvVtjF152Nmn6A61itvXu+MjVb7fH56t/nAdjtSiWnIcgce561DkFrVCTjxXyuRiD7hyZBfViiM8V470olrEgedKkV/U1sXR1sLlqJPVlhrxXDkyjOrqDZ6rR8FVTYjjuXLmGDvkelnEX9y324fLJ/fev1l9f3A8wfEwsg98GNlIlUIDVKpd1h8bqVIogEYVrQAU+RYf+DCykSpFtkWlClcAilyLRhWtABSZliHwYWQjVYo8i0oVrgAUWRaNKloBOJIs6tSqy3IjM1ZKd9VluZEZK6W96rLcyIyVw18BH0EyY6U0WHgRoHRYeBGgsFjqBAsvApQeCy8ClCYLLwIULkvLtHZYI2BeFeBwWcASLDNWCpel5QTgWDldFjrVwrEjgTZvhWOlcFnaTACOVXdZ64EX5nX5+2q9rF5O65fzNlbdZC+s9QBN4by29ampcD409NQpCufXwferf5OncXyTDEc/L2YPN/H0IrRt3AIRpUPTf4DIe/LD3TM4GkQD6qNcR8BKDjNW28YNgrXLZRwzVtvGDYO1Q7k2Y7Vt3LrBCizmMGId2TZuEKxwERjZNm4YrGgRGNk2bp1gjYD1HGasto0bBCteBGwbNwxWuAjYXh7rBuuwbyJA4bJUrHgRoHBZGla4CFC4rAi4vbwZK4XLUrHCRaD9PGRY0SIQcbgsYG2nGSuFy9KworU1onBZKtYuKzrMWClclppqwYsAhcvSsMJFgMJlqVjxIsDhsoC1nWasHC4LWNtpxsrhsoC1nUasYw6XNeyZCIwpXJaGFS0CYwqXpWLFiwCFy1JTLXgRoHBZGla4CFC4LBUrXgQ4XBbwKQ8zVgqXpVW1wLFSuiw8Vg6Xpa68orG6bT+fOVfgNrtHuFL4LG19AL1I6DoURktLueK5cjgt4HNJR7hyWC01MYDnyum18FwpzNa4b49n2f+ZCAhXeC2W/Z+JwHDF6wCF3xr37Qkt+z8TAeGK1wHrWxhiuMJ1wPoehp1wdZ2+PaVl/3ciMGB7oAQUjksHi5cCCsulTrXgi7Gu9a0MMVzxCktpueDLsa71zQwhU60e6ACl5cLrgPXtDCFc8TpgfT9DzEQLLwQtRzaweCXg9Fw9kAIKz6VlYeFVBIZtDc+Ra99qtFyPwnLpOQI8WE7PBc+9eJSeqwdcKTyXPiOAg233mf31hrxX7v/elPf0m+2edis4+Ga7rv8JjyYW6WVRyE0NMo9Xqyw5pLx9g0in4ne57LU7MDS7jRUij8vs+fDyJhbNHe5lVt14txn1sUxOe4mVXBeJaN61I6pdKDjmBNsLlXExFaV2ofeu+Wj2n/TWJ4zfufdWeKreGsF7S3eT2cC7/rGVtHMTr0CdpI8+9xViUbt0U/nO94KArh/A6erWMtuO3HoEnxvfUBUDF85Xt5hZPXKryzlnCDhQl/fwgHWrmZ3n4NXEwR7b6s9CynL/m7Bq1OxOpqI+4z8=
--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target.1.c:
--------------------------------------------------------------------------------
1 | extern void init(float*, float*, int);
2 | extern void output(float*, int);
3 | void vec_mult(int N)
4 | {
5 | int i;
6 | float p[N], v1[N], v2[N];
7 | init(v1, v2, N);
8 | #pragma omp target
9 | #pragma omp parallel for private(i)
10 | for (i=0; i
2 | typedef struct {
3 | double *A;
4 | int N;
5 | } Matrix;
6 |
7 | void init_matrix(Matrix *mat, int n)
8 | {
9 | mat->A = (double *)malloc(n*sizeof(double));
10 | mat->N = n;
11 | #pragma omp target enter data map(alloc:mat->A[:n])
12 | }
13 |
14 | void free_matrix(Matrix *mat)
15 | {
16 | #pragma omp target exit data map(delete:mat->A[:mat->N])
17 | mat->N = 0;
18 | free(mat->A);
19 | mat->A = NULL;
20 | }
21 |
--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_target_unstructured_data.1.f90:
--------------------------------------------------------------------------------
1 | module example
2 | real(8), allocatable :: A(:)
3 |
4 | contains
5 | subroutine initialize(N)
6 | integer :: N
7 |
8 | allocate(A(N))
9 | !$omp target enter data map(alloc:A)
10 |
11 | end subroutine initialize
12 |
13 | subroutine finalize()
14 |
15 | !$omp target exit data map(delete:A)
16 | deallocate(A)
17 |
18 | end subroutine finalize
19 | end module example
20 |
--------------------------------------------------------------------------------
/content/examples/v4.5.0/Example_teams.6.c:
--------------------------------------------------------------------------------
1 | extern void init(float *, float *, int);
2 | extern void output(float *, int);
3 | void vec_mult(float *p, float *v1, float *v2, int N)
4 | {
5 | int i;
6 | init(v1, v2, N);
7 | #pragma omp target teams map(to: v1[0:N], v2[:N]) map(from: p[0:N])
8 | #pragma omp distribute parallel for simd
9 | for (i=0; idata.data();
17 | double *prevdata = prev->data.data();
18 | int nx = curr->nx;
19 | int ny = curr->ny;
20 |
21 | // Determine the temperature field at next time step
22 | // As we have fixed boundary conditions, the outermost gridpoints
23 | // are not updated.
24 | double dx2 = prev->dx * prev->dx;
25 | double dy2 = prev->dy * prev->dy;
26 | #pragma omp target teams distribute parallel for \
27 | map(currdata[0:(nx+2)*(ny+2)],prevdata[0:(nx+2)*(ny+2)])
28 | for (int i = 1; i < nx + 1; i++) {
29 | for (int j = 1; j < ny + 1; j++) {
30 | int ind = i * (ny + 2) + j;
31 | int ip = (i + 1) * (ny + 2) + j;
32 | int im = (i - 1) * (ny + 2) + j;
33 | int jp = i * (ny + 2) + j + 1;
34 | int jm = i * (ny + 2) + j - 1;
35 | currdata[ind] = prevdata[ind] + a*dt*
36 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
37 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
38 | }
39 | }
40 | }
41 |
42 | // Start a data region and copy temperature fields to the device
43 | void enter_data(field *curr, field *prev)
44 | {
45 | int nx, ny;
46 | double *currdata, *prevdata;
47 |
48 | currdata = curr->data.data();
49 | prevdata = prev->data.data();
50 | nx = curr->nx;
51 | ny = curr->ny;
52 |
53 | // adding data mapping here
54 |
55 | }
56 |
57 | // End a data region and copy temperature fields back to the host
58 | void exit_data(field *curr, field *prev)
59 | {
60 | int nx, ny;
61 | double *currdata, *prevdata;
62 |
63 | currdata = curr->data.data();
64 | prevdata = prev->data.data();
65 | nx = curr->nx;
66 | ny = curr->ny;
67 |
68 | // adding data mapping here
69 |
70 | }
71 |
72 | // Copy a temperature field from the device to the host
73 | void update_host(field *temperature)
74 | {
75 | int nx, ny;
76 | double *data;
77 |
78 | data = temperature->data.data();
79 | nx = temperature->nx;
80 | ny = temperature->ny;
81 |
82 | // adding data mapping here
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 |
14 | COMMONDIR=../../common
15 |
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=
23 | endif
24 |
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 |
34 |
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 |
39 | all: $(EXE)
40 |
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 |
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 |
53 | %.o: %.F90
54 | $(FC) $(FCFLAGS) -c $< -o $@
55 |
56 | %.o: %.c
57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
58 |
59 | .PHONY: clean
60 | clean:
61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o
62 |
63 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/core.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Main solver routines for heat equation solver
4 | module core
5 | use heat
6 |
7 | contains
8 |
9 | ! Update the temperature values using five-point stencil
10 | ! Arguments:
11 | ! curr (type(field)): current temperature values
12 | ! prev (type(field)): temperature values from previous time step
13 | ! a (real(dp)): diffusivity
14 | ! dt (real(dp)): time step
15 | subroutine evolve(curr, prev, a, dt)
16 |
17 | implicit none
18 |
19 | type(field),target, intent(inout) :: curr, prev
20 | real(dp) :: a, dt
21 | integer :: i, j, nx, ny
22 | real(dp) :: dx, dy
23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 |
25 | ! Help the compiler avoid being confused
26 | nx = curr%nx
27 | ny = curr%ny
28 | dx = curr%dx
29 | dy = curr%dy
30 | currdata => curr%data
31 | prevdata => prev%data
32 |
33 | ! Determine the temperature field at next time step As we have
34 | ! fixed boundary conditions, the outermost gridpoints are not
35 | ! updated.
36 | !$omp target teams distribute parallel do
37 | do j = 1, ny
38 | do i = 1, nx
39 | currdata(i, j) = prevdata(i, j) + a * dt * &
40 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
41 | & prevdata(i+1, j)) / dx**2 + &
42 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
43 | & prevdata(i, j+1)) / dy**2)
44 | end do
45 | end do
46 | !$omp end target teams distribute parallel do
47 | end subroutine evolve
48 |
49 | ! Start a data region and copy temperature fields to the device
50 | ! curr (type(field)): current temperature values
51 | ! prev (type(field)): values from previous time step
52 | subroutine enter_data(curr, prev)
53 | implicit none
54 | type(field), target, intent(in) :: curr, prev
55 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
56 |
57 | currdata => curr%data
58 | prevdata => prev%data
59 |
60 | ! adding data mapping here
61 |
62 | end subroutine enter_data
63 |
64 | ! End a data region and copy temperature fields back to the host
65 | ! curr (type(field)): current temperature values
66 | ! prev (type(field)): values from previous time step
67 | subroutine exit_data(curr, prev)
68 | implicit none
69 | type(field), target :: curr, prev
70 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
71 |
72 | currdata => curr%data
73 | prevdata => prev%data
74 |
75 | ! adding data mapping here
76 |
77 | end subroutine exit_data
78 |
79 | ! Copy a temperature field from the device to the host
80 | ! temperature (type(field)): temperature field
81 | subroutine update_host(temperature)
82 | implicit none
83 | type(field), target :: temperature
84 | real(kind=dp), pointer, contiguous :: tempdata(:,:)
85 |
86 | tempdata => temperature%data
87 |
88 | ! adding data mapping here
89 |
90 | end subroutine update_host
91 |
92 | end module core
93 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Field metadata for heat equation solver
4 | module heat
5 | use iso_fortran_env, only : REAL64
6 | implicit none
7 |
8 | integer, parameter :: dp = REAL64
9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
10 |
11 | type :: field
12 | integer :: nx ! ldimension of the field
13 | integer :: ny
14 | real(dp) :: dx
15 | real(dp) :: dy
16 | real(dp), dimension(:,:), allocatable :: data
17 | end type field
18 |
19 | contains
20 | ! Initialize the field type metadata
21 | ! Arguments:
22 | ! field0 (type(field)): input field
23 | ! nx, ny, dx, dy: field dimensions and spatial step size
24 | subroutine set_field_dimensions(field0, nx, ny)
25 | implicit none
26 |
27 | type(field), intent(out) :: field0
28 | integer, intent(in) :: nx, ny
29 |
30 | field0%dx = DX
31 | field0%dy = DY
32 | field0%nx = nx
33 | field0%ny = ny
34 |
35 | end subroutine set_field_dimensions
36 |
37 | end module heat
38 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/io.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! I/O routines for heat equation solver
4 | module io
5 | use heat
6 |
7 | contains
8 |
9 | ! Output routine, saves the temperature distribution as a png image
10 | ! Arguments:
11 | ! curr (type(field)): variable with the temperature data
12 | ! iter (integer): index of the time step
13 | subroutine write_field(curr, iter)
14 |
15 | use pngwriter
16 | implicit none
17 | type(field), intent(in) :: curr
18 | integer, intent(in) :: iter
19 |
20 | character(len=85) :: filename
21 |
22 | integer :: stat
23 | real(dp), dimension(:,:), allocatable, target :: full_data
24 |
25 | allocate(full_data(curr%nx, curr%ny))
26 | ! Copy rand #0 data to the global array
27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 |
29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
30 | stat = save_png(full_data, curr%nx, curr%ny, filename)
31 | deallocate(full_data)
32 |
33 | end subroutine write_field
34 |
35 |
36 | ! Reads the temperature distribution from an input file
37 | ! Arguments:
38 | ! field0 (type(field)): field variable that will store the
39 | ! read data
40 | ! filename (char): name of the input file
41 | ! Note that this version assumes the input data to be in C memory layout
42 | subroutine read_field(field0, filename)
43 |
44 | implicit none
45 | type(field), intent(out) :: field0
46 | character(len=85), intent(in) :: filename
47 |
48 | integer :: nx, ny, i
49 | character(len=2) :: dummy
50 |
51 | real(dp), dimension(:,:), allocatable :: full_data
52 |
53 | open(10, file=filename)
54 | ! Read the header
55 | read(10, *) dummy, nx, ny
56 |
57 | call set_field_dimensions(field0, nx, ny)
58 |
59 | ! The arrays for temperature field contain also a halo region
60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 |
62 | allocate(full_data(nx, ny))
63 | ! Read the data
64 | do i = 1, nx
65 | read(10, *) full_data(i, 1:ny)
66 | end do
67 |
68 | ! Copy to full array containing also boundaries
69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 |
71 | ! Set the boundary values
72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 |
77 | close(10)
78 | deallocate(full_data)
79 |
80 | end subroutine read_field
81 |
82 | end module io
83 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/main.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Heat equation solver in 2D.
4 |
5 | program heat_solve
6 | use heat
7 | use core
8 | use io
9 | use setup
10 | use utilities
11 | use omp_lib
12 |
13 | implicit none
14 |
15 | real(dp), parameter :: a = 0.5 ! Diffusion constant
16 | type(field) :: current, previous ! Current and previus temperature fields
17 |
18 | real(dp) :: dt ! Time step
19 | integer :: nsteps ! Number of time steps
20 | integer, parameter :: image_interval = 1500 ! Image output interval
21 |
22 | integer :: iter
23 |
24 | real(dp) :: average_temp ! Average temperature
25 |
26 | real(kind=dp) :: start, stop ! Timers
27 |
28 | call initialize(current, previous, nsteps)
29 |
30 | ! Draw the picture of the initial state
31 | call write_field(current, 0)
32 |
33 | average_temp = average(current)
34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 |
36 | ! Largest stable time step
37 | dt = current%dx**2 * current%dy**2 / &
38 | & (2.0 * a * (current%dx**2 + current%dy**2))
39 |
40 | ! Main iteration loop
41 |
42 | start = omp_get_wtime()
43 |
44 | ! copy data to device
45 | call enter_data(current, previous)
46 |
47 | do iter = 1, nsteps
48 | call evolve(current, previous, a, dt)
49 | if (mod(iter, image_interval) == 0) then
50 | ! update data on host for output
51 | call update_host(current)
52 | call write_field(current, iter)
53 | end if
54 | call swap_fields(current, previous)
55 | end do
56 |
57 | ! copy data back to host
58 | call exit_data(current, previous)
59 |
60 | stop = omp_get_wtime()
61 |
62 | ! Average temperature for reference
63 | average_temp = average(previous)
64 |
65 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
66 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp
67 | if (command_argument_count() == 0) then
68 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
69 | end if
70 |
71 | call finalize(current, previous)
72 |
73 | end program heat_solve
74 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! PNG writer for heat equation solver
4 | module pngwriter
5 | use heat
6 |
7 | contains
8 |
9 | function save_png(data, nx, ny, fname) result(stat)
10 |
11 | use, intrinsic :: ISO_C_BINDING
12 | implicit none
13 |
14 | real(dp), dimension(:,:), intent(in) :: data
15 | integer, intent(in) :: nx, ny
16 | character(len=*), intent(in) :: fname
17 | integer :: stat
18 |
19 | ! Interface for save_png C-function
20 | interface
21 | ! The C-function definition is
22 | ! int save_png(double *data, const int nx, const int ny,
23 | ! const char *fname)
24 | function save_png_c(data, nx, ny, fname, order) &
25 | & bind(C,name="save_png") result(stat)
26 | use, intrinsic :: ISO_C_BINDING
27 | implicit none
28 | real(kind=C_DOUBLE) :: data(*)
29 | integer(kind=C_INT), value, intent(IN) :: nx, ny
30 | character(kind=C_CHAR), intent(IN) :: fname(*)
31 | character(kind=C_CHAR), value, intent(IN) :: order
32 | integer(kind=C_INT) :: stat
33 | end function save_png_c
34 | end interface
35 |
36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 | if (stat /= 0) then
38 | write(*,*) 'save_png returned error!'
39 | end if
40 |
41 | end function save_png
42 |
43 | end module pngwriter
44 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/setup.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Setup routines for heat equation solver
4 | module setup
5 | use heat
6 |
7 | contains
8 |
9 | subroutine initialize(previous, current, nsteps)
10 | use utilities
11 | use io
12 |
13 | implicit none
14 |
15 | type(field), intent(out) :: previous, current
16 | integer, intent(out) :: nsteps
17 |
18 | integer :: rows, cols
19 | logical :: using_input_file
20 | character(len=85) :: input_file, arg ! Input file name and command line arguments
21 |
22 |
23 | ! Default values for grid size and time steps
24 | rows = 2000
25 | cols = 2000
26 | nsteps = 500
27 | using_input_file = .false.
28 |
29 | ! Read in the command line arguments and
30 | ! set up the needed variables
31 | select case(command_argument_count())
32 | case(0) ! No arguments -> default values
33 | case(1) ! One argument -> input file name
34 | using_input_file = .true.
35 | call get_command_argument(1, input_file)
36 | case(2) ! Two arguments -> input file name and number of steps
37 | using_input_file = .true.
38 | call get_command_argument(1, input_file)
39 | call get_command_argument(2, arg)
40 | read(arg, *) nsteps
41 | case(3) ! Three arguments -> rows, cols and nsteps
42 | call get_command_argument(1, arg)
43 | read(arg, *) rows
44 | call get_command_argument(2, arg)
45 | read(arg, *) cols
46 | call get_command_argument(3, arg)
47 | read(arg, *) nsteps
48 | case default
49 | call usage()
50 | stop
51 | end select
52 |
53 | ! Initialize the fields according the command line arguments
54 | if (using_input_file) then
55 | call read_field(previous, input_file)
56 | call copy_fields(previous, current)
57 | else
58 | call set_field_dimensions(previous, rows, cols)
59 | call set_field_dimensions(current, rows, cols)
60 | call generate_field(previous)
61 | call copy_fields(previous, current)
62 | end if
63 |
64 | end subroutine initialize
65 |
66 | ! Generate initial the temperature field. Pattern is disc with a radius
67 | ! of nx / 6 in the center of the grid.
68 | ! Boundary conditions are (different) constant temperatures outside the grid
69 | subroutine generate_field(field0)
70 | use heat
71 |
72 | implicit none
73 |
74 | type(field), intent(inout) :: field0
75 |
76 | real(dp) :: radius2
77 | integer :: i, j, ds2
78 |
79 | ! The arrays for field contain also a halo region
80 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
81 |
82 | ! Square of the disk radius
83 | radius2 = (field0%nx / 6.0_dp)**2
84 |
85 | do j = 0, field0%ny + 1
86 | do i = 0, field0%nx + 1
87 | ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + &
88 | & (j - field0%ny / 2.0_dp + 1)**2)
89 | if (ds2 < radius2) then
90 | field0%data(i,j) = 5.0_dp
91 | else
92 | field0%data(i,j) = 65.0_dp
93 | end if
94 | end do
95 | end do
96 |
97 | ! Boundary conditions
98 | field0%data(:,0) = 20.0_dp
99 | field0%data(:,field0%ny+1) = 70.0_dp
100 | field0%data(0,:) = 85.0_dp
101 | field0%data(field0%nx+1,:) = 5.0_dp
102 |
103 | end subroutine generate_field
104 |
105 |
106 | ! Clean up routine for field type
107 | ! Arguments:
108 | ! field0 (type(field)): field variable to be cleared
109 | subroutine finalize(field0, field1)
110 | use heat
111 |
112 | implicit none
113 |
114 | type(field), intent(inout) :: field0, field1
115 |
116 | deallocate(field0%data)
117 | deallocate(field1%data)
118 |
119 | end subroutine finalize
120 |
121 | ! Helper routine that prints out a simple usage if
122 | ! user gives more than three arguments
123 | subroutine usage()
124 | implicit none
125 | character(len=256) :: buf
126 |
127 | call get_command_argument(0, buf)
128 | write (*,'(A)') 'Usage:'
129 | write (*,'(A, " (default values will be used)")') trim(buf)
130 | write (*,'(A, " ")') trim(buf)
131 | write (*,'(A, " ")') trim(buf)
132 | write (*,'(A, " ")') trim(buf)
133 | end subroutine usage
134 |
135 | end module setup
136 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/fortran/utilities.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Utility routines for heat equation solver
4 | ! NOTE: This file does not need to be edited!
5 | module utilities
6 | use heat
7 |
8 | contains
9 |
10 | ! Swap the data fields of two variables of type field
11 | ! Arguments:
12 | ! curr, prev (type(field)): the two variables that are swapped
13 | subroutine swap_fields(curr, prev)
14 |
15 | implicit none
16 |
17 | type(field), intent(inout) :: curr, prev
18 | real(dp), allocatable, dimension(:,:) :: tmp
19 |
20 | call move_alloc(curr%data, tmp)
21 | call move_alloc(prev%data, curr%data)
22 | call move_alloc(tmp, prev%data)
23 | end subroutine swap_fields
24 |
25 | ! Copy the data from one field to another
26 | ! Arguments:
27 | ! from_field (type(field)): variable to copy from
28 | ! to_field (type(field)): variable to copy to
29 | subroutine copy_fields(from_field, to_field)
30 |
31 | implicit none
32 |
33 | type(field), intent(in) :: from_field
34 | type(field), intent(out) :: to_field
35 |
36 | ! Consistency checks
37 | if (.not.allocated(from_field%data)) then
38 | write (*,*) "Can not copy from a field without allocated data"
39 | stop
40 | end if
41 | if (.not.allocated(to_field%data)) then
42 | ! Target is not initialize, allocate memory
43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 | & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 | write (*,*) "Wrong field data sizes in copy routine"
47 | print *, shape(from_field%data), shape(to_field%data)
48 | stop
49 | end if
50 |
51 | to_field%data = from_field%data
52 |
53 | to_field%nx = from_field%nx
54 | to_field%ny = from_field%ny
55 | to_field%dx = from_field%dx
56 | to_field%dy = from_field%dy
57 | end subroutine copy_fields
58 |
59 | function average(field0)
60 |
61 | implicit none
62 |
63 | real(dp) :: average
64 | type(field) :: field0
65 |
66 | real(dp) :: local_average
67 | integer :: rc
68 |
69 | average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 | average = average / (field0%nx * field0%ny)
71 |
72 | end function average
73 |
74 | end module utilities
75 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/heat.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | #ifndef __HEAT_H__
4 | #define __HEAT_H__
5 |
6 | #include
7 |
8 | // Datatype for temperature field
9 | struct field {
10 | // nx and ny are the dimensions of the field. The array data
11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 | int nx;
13 | int ny;
14 | // Size of the grid cells
15 | double dx;
16 | double dy;
17 | // The temperature values in the 2D grid
18 | std::vector data;
19 | };
20 |
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 |
25 | #if __cplusplus
26 | extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 |
31 | void initialize(int argc, char *argv[], field *temperature1,
32 | field *temperature2, int *nsteps);
33 |
34 | void generate_field(field *temperature);
35 |
36 | double average(field *temperature);
37 |
38 | void evolve(field *curr, field *prev, double a, double dt);
39 |
40 | void write_field(field *temperature, int iter);
41 |
42 | void read_field(field *temperature1, field *temperature2,
43 | char *filename);
44 |
45 | void copy_field(field *temperature1, field *temperature2);
46 |
47 | void swap_fields(field *temperature1, field *temperature2);
48 |
49 | void allocate_field(field *temperature);
50 |
51 | void enter_data(field *temperature1, field *temperature2);
52 |
53 | void exit_data(field *temperature1, field *temperature2);
54 |
55 | void update_host(field *temperature);
56 |
57 | #if __cplusplus
58 | }
59 | #endif
60 | #endif // __HEAT_H__
61 |
62 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/heat_serial:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/data_mapping/heat_serial
--------------------------------------------------------------------------------
/content/exercise/data_mapping/io.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // I/O related functions for heat equation solver
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "heat.h"
11 | #include "pngwriter.h"
12 |
13 | // Output routine that prints out a picture of the temperature
14 | // distribution.
15 | void write_field(field *temperature, int iter)
16 | {
17 | char filename[64];
18 |
19 | // The actual write routine takes only the actual data
20 | // (without boundary layers) so we need to copy an array with that.
21 | std::vector inner_data(temperature->nx * temperature->ny);
22 | auto inner_data_iterator = inner_data.begin();
23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
24 | for (int i = 0; i < temperature->nx; i++) {
25 | auto end_of_row = beginning_of_row + temperature->ny;
26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator);
27 | inner_data_iterator += temperature->ny;
28 | beginning_of_row = end_of_row + 2;
29 | }
30 |
31 | // Write out the data to a png file
32 | sprintf(filename, "%s_%04d.png", "heat", iter);
33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
34 | }
35 |
36 | // Read the initial temperature distribution from a file and
37 | // initialize the temperature fields temperature1 and
38 | // temperature2 to the same initial state.
39 | void read_field(field *temperature1, field *temperature2, char *filename)
40 | {
41 | FILE *fp;
42 | int nx, ny, ind;
43 |
44 | int nx_local, ny_local, count;
45 |
46 | fp = fopen(filename, "r");
47 | // Read the header
48 | count = fscanf(fp, "# %d %d \n", &nx, &ny);
49 | if (count < 2) {
50 | fprintf(stderr, "Error while reading the input file!\n");
51 | exit(-1);
52 | }
53 |
54 | set_field_dimensions(temperature1, nx, ny);
55 | set_field_dimensions(temperature2, nx, ny);
56 |
57 | // Allocate arrays (including boundary layers)
58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
59 | temperature1->data.resize(newSize, 0.0);
60 | temperature2->data.resize(newSize, 0.0);
61 |
62 | // Array from file
63 | std::vector file_data(nx * ny, 0.0);
64 |
65 | // Read the actual data
66 | for (int i = 0; i < nx; i++) {
67 | for (int j = 0; j < ny; j++) {
68 | ind = i * ny + j;
69 | count = fscanf(fp, "%lf", &file_data[ind]);
70 | }
71 | }
72 |
73 | nx_local = temperature1->nx;
74 | ny_local = temperature1->ny;
75 |
76 | // Copy to the inner part of the full temperature field
77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
78 | auto beginning_of_row = file_data.begin();
79 | for (int i = 0; i < nx_local; i++) {
80 | auto end_of_row = beginning_of_row + ny_local;
81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
82 | temperature_data_iterator += ny_local + 2;
83 | beginning_of_row = end_of_row;
84 | }
85 |
86 | // Set the boundary values
87 | for (int i = 1; i < nx_local + 1; i++) {
88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
90 | }
91 | for (int j = 0; j < ny + 2; j++) {
92 | temperature1->data[j] = temperature1->data[ny_local + j];
93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
94 | temperature1->data[nx_local * (ny_local + 2) + j];
95 | }
96 |
97 | copy_field(temperature1, temperature2);
98 |
99 | fclose(fp);
100 | }
101 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/main.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main routine for heat equation solver in 2D.
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 | int main(int argc, char **argv)
11 | {
12 | // Image output interval
13 | int image_interval = 1500;
14 |
15 | // Number of time steps
16 | int nsteps;
17 | // Current and previous temperature fields
18 | field current, previous;
19 | initialize(argc, argv, ¤t, &previous, &nsteps);
20 |
21 | // Output the initial field
22 | write_field(¤t, 0);
23 |
24 | double average_temp = average(¤t);
25 | printf("Average temperature at start: %f\n", average_temp);
26 |
27 | // Diffusion constant
28 | double a = 0.5;
29 |
30 | // Compute the largest stable time step
31 | double dx2 = current.dx * current.dx;
32 | double dy2 = current.dy * current.dy;
33 | // Time step
34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 |
36 | // Get the start time stamp
37 | double start_clock = omp_get_wtime();
38 |
39 | // Copy fields to device
40 | enter_data(¤t, &previous);
41 |
42 | // Time evolution
43 | for (int iter = 1; iter <= nsteps; iter++) {
44 | evolve(¤t, &previous, a, dt);
45 | if (iter % image_interval == 0) {
46 | // update data on host for output
47 | update_host(¤t);
48 | write_field(¤t, iter);
49 | }
50 | // Swap current field so that it will be used
51 | // as previous for next iteration step
52 | swap_fields(¤t, &previous);
53 | }
54 |
55 | // copy data back to host
56 | exit_data(¤t, &previous);
57 |
58 | double stop_clock = omp_get_wtime();
59 |
60 | // Average temperature for reference
61 | average_temp = average(&previous);
62 |
63 | // Determine the CPU time used for all the iterations
64 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
65 | printf("Average temperature: %f\n", average_temp);
66 | if (argc == 1) {
67 | printf("Reference value with default arguments: 59.281239\n");
68 | }
69 |
70 | // Output the final field
71 | write_field(&previous, nsteps);
72 |
73 | return 0;
74 | }
75 |
--------------------------------------------------------------------------------
/content/exercise/data_mapping/utilities.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Utility functions for heat equation solver
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 |
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 | assert(temperature1->nx == temperature2->nx);
15 | assert(temperature1->ny == temperature2->ny);
16 | assert(temperature1->data.size() == temperature2->data.size());
17 | std::copy(temperature1->data.begin(), temperature1->data.end(),
18 | temperature2->data.begin());
19 | }
20 |
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 | std::swap(temperature1->data, temperature2->data);
25 | }
26 |
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 | // Include also boundary layers
31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 | temperature->data.resize(newSize, 0.0);
33 | }
34 |
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 | double average = 0.0;
39 |
40 | for (int i = 1; i < temperature->nx + 1; i++) {
41 | for (int j = 1; j < temperature->ny + 1; j++) {
42 | int ind = i * (temperature->ny + 2) + j;
43 | average += temperature->data[ind];
44 | }
45 | }
46 |
47 | average /= (temperature->nx * temperature->ny);
48 | return average;
49 | }
50 |
51 |
52 |
--------------------------------------------------------------------------------
/content/exercise/ex00/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/ex00/ex00.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program hello
4 |
5 | #ifdef _OPENMP
6 | use omp_lib
7 | #endif
8 | implicit none
9 |
10 | integer :: num_devices,nteams,nthreads
11 | logical :: initial_device
12 |
13 | num_devices = omp_get_num_devices()
14 | print *, "Number of available devices", num_devices
15 |
16 | !$omp target map(nteams,nthreads)
17 | initial_device = omp_is_initial_device()
18 | nteams= omp_get_num_teams()
19 | nthreads= omp_get_num_threads()
20 | !$omp end target
21 | if (initial_device) then
22 | write(*,*) "Running on host"
23 | else
24 | write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team"
25 | end if
26 |
27 | end program
28 |
29 |
--------------------------------------------------------------------------------
/content/exercise/ex00/ex00.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 |
5 | #ifdef _OPENMP
6 | #include
7 | #endif
8 |
9 | int main()
10 | {
11 | int num_devices = omp_get_num_devices();
12 | printf("Number of available devices %d\n", num_devices);
13 |
14 | #pragma omp target
15 | {
16 | if (omp_is_initial_device()) {
17 | printf("Running on host\n");
18 | } else {
19 | int nteams= omp_get_num_teams();
20 | int nthreads= omp_get_num_threads();
21 | printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads);
22 | }
23 | }
24 |
25 | }
26 |
27 |
--------------------------------------------------------------------------------
/content/exercise/ex01/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/ex01/ex01.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | do i = 1, nx
21 | vecC(i) = vecA(i) * vecB(i)
22 | end do
23 |
24 | sum = 0.0
25 | ! Calculate the sum
26 | do i = 1, nx
27 | sum = vecC(i) + sum
28 | end do
29 |
30 | write(*,*) 'The sum is: ', sum
31 |
32 | end program dotproduct
33 |
--------------------------------------------------------------------------------
/content/exercise/ex01/ex01.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* Dot product of two vectors */
19 | for (int i = 0; i < NX; i++) {
20 | vecC[i] = vecA[i] * vecB[i];
21 | }
22 |
23 | double sum = 0.0;
24 | /* Calculate the sum */
25 | for (int i = 0; i < NX; i++) {
26 | sum += vecC[i];
27 | }
28 | printf("The sum is: %8.6f \n", sum);
29 | return 0;
30 | }
31 |
--------------------------------------------------------------------------------
/content/exercise/ex01/solution/ex01.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target
21 | do i = 1, nx
22 | vecC(i) = vecA(i) * vecB(i)
23 | end do
24 | !$omp end target
25 |
26 | sum = 0.0
27 | ! Calculate the sum
28 | do i = 1, nx
29 | sum = vecC(i) + sum
30 | end do
31 |
32 | write(*,*) 'The sum is: ', sum
33 |
34 | end program dotproduct
35 |
--------------------------------------------------------------------------------
/content/exercise/ex01/solution/ex01.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target
20 | for (int i = 0; i < NX; i++) {
21 | vecC[i] = vecA[i] * vecB[i];
22 | }
23 |
24 | double sum = 0.0;
25 | /* calculate the sum */
26 | for (int i = 0; i < NX; i++) {
27 | sum += vecC[i];
28 | }
29 | printf("The sum is: %8.6f \n", sum);
30 | return 0;
31 | }
32 |
--------------------------------------------------------------------------------
/content/exercise/ex02/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/ex02/ex02.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target
21 | do i = 1, nx
22 | vecC(i) = vecA(i) * vecB(i)
23 | end do
24 | !$omp end target
25 |
26 | sum = 0.0
27 | ! Calculate the sum
28 | do i = 1, nx
29 | sum = vecC(i) + sum
30 | end do
31 |
32 | write(*,*) 'The sum is: ', sum
33 |
34 | end program dotproduct
35 |
--------------------------------------------------------------------------------
/content/exercise/ex02/ex02.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | // Copyright (c) 2021 ENCCS
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target
20 | for (int i = 0; i < NX; i++) {
21 | vecC[i] = vecA[i] * vecB[i];
22 | }
23 |
24 | double sum = 0.0;
25 | /* calculate the sum */
26 | for (int i = 0; i < NX; i++) {
27 | sum += vecC[i];
28 | }
29 | printf("The sum is: %8.6f \n", sum);
30 | return 0;
31 | }
32 |
--------------------------------------------------------------------------------
/content/exercise/ex02/solution/ex02.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target teams distribute parallel do
21 | do i = 1, nx
22 | vecC(i) = vecA(i) * vecB(i)
23 | end do
24 | !$omp end target teams distribute parallel do
25 |
26 | sum = 0.0
27 | ! Calculate the sum
28 | do i = 1, nx
29 | sum = vecC(i) + sum
30 | end do
31 |
32 | write(*,*) 'The sum is: ', sum
33 |
34 | end program dotproduct
35 |
--------------------------------------------------------------------------------
/content/exercise/ex02/solution/ex02.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target teams distribute parallel for
20 | for (int i = 0; i < NX; i++) {
21 | vecC[i] = vecA[i] * vecB[i];
22 | }
23 |
24 | double sum = 0.0;
25 | /* calculate the sum */
26 | for (int i = 0; i < NX; i++) {
27 | sum += vecC[i];
28 | }
29 | printf("The sum is: %8.6f \n", sum);
30 | return 0;
31 | }
32 |
--------------------------------------------------------------------------------
/content/exercise/ex03/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/ex03/ex03.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program hello
4 |
5 | #ifdef _OPENMP
6 | use omp_lib
7 | #endif
8 | implicit none
9 |
10 | integer :: num_devices,nteams,nthreads
11 | logical :: initial_device
12 |
13 | num_devices = omp_get_num_devices()
14 | print *, "Number of available devices", num_devices
15 |
16 | !$omp target map(nteams,nthreads)
17 | initial_device = omp_is_initial_device()
18 | nteams= omp_get_num_teams()
19 | nthreads= omp_get_num_threads()
20 | !$omp end target
21 | if (initial_device) then
22 | write(*,*) "Running on host"
23 | else
24 | write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team"
25 | end if
26 |
27 | end program
28 |
29 |
--------------------------------------------------------------------------------
/content/exercise/ex03/ex03.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 |
5 | #ifdef _OPENMP
6 | #include
7 | #endif
8 |
9 | int main()
10 | {
11 | int num_devices = omp_get_num_devices();
12 | printf("Number of available devices %d\n", num_devices);
13 |
14 | #pragma omp target
15 | {
16 | if (omp_is_initial_device()) {
17 | printf("Running on host\n");
18 | } else {
19 | int nteams= omp_get_num_teams();
20 | int nthreads= omp_get_num_threads();
21 | printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads);
22 | }
23 | }
24 |
25 | }
26 |
27 |
--------------------------------------------------------------------------------
/content/exercise/ex03/solution/ex03.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program hello
4 |
5 | #ifdef _OPENMP
6 | use omp_lib
7 | #endif
8 | implicit none
9 |
10 | integer :: num_devices,nteams,nthreads
11 | logical :: initial_device
12 |
13 | num_devices = omp_get_num_devices()
14 | print *, "Number of available devices", num_devices
15 |
16 | !$omp target map(nteams,nthreads)
17 | !$omp teams num_teams(2) thread_limit(3)
18 | !$omp parallel
19 | initial_device = omp_is_initial_device()
20 | nteams= omp_get_num_teams()
21 | nthreads= omp_get_num_threads()
22 | !$omp end parallel
23 | !$omp end teams
24 | !$omp end target
25 | if (initial_device) then
26 | write(*,*) "Running on host"
27 | else
28 | write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team"
29 | end if
30 |
31 | end program
32 |
33 |
--------------------------------------------------------------------------------
/content/exercise/ex03/solution/ex03.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 |
5 | #ifdef _OPENMP
6 | #include
7 | #endif
8 |
9 | int main()
10 | {
11 | int num_devices = omp_get_num_devices();
12 | printf("Number of available devices %d\n", num_devices);
13 |
14 | #pragma omp target
15 | #pragma omp teams num_teams(2) thread_limit(3)
16 | #pragma omp parallel
17 | {
18 | if (omp_is_initial_device()) {
19 | printf("Running on host\n");
20 | } else {
21 | int nteams= omp_get_num_teams();
22 | int nthreads= omp_get_num_threads();
23 | printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads);
24 | }
25 | }
26 |
27 | }
28 |
29 |
--------------------------------------------------------------------------------
/content/exercise/ex04/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/ex04/ex04.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target teams distribute
21 | do i = 1, nx
22 | vecC(i) = vecA(i) * vecB(i)
23 | end do
24 | !$omp end target teams distribute
25 |
26 | sum = 0.0
27 | ! Calculate the sum
28 | do i = 1, nx
29 | sum = vecC(i) + sum
30 | end do
31 |
32 | write(*,*) 'The sum is: ', sum
33 |
34 | end program dotproduct
35 |
--------------------------------------------------------------------------------
/content/exercise/ex04/ex04.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target teams distribute
20 | for (int i = 0; i < NX; i++) {
21 | vecC[i] = vecA[i] * vecB[i];
22 | }
23 |
24 | double sum = 0.0;
25 | /* calculate the sum */
26 | for (int i = 0; i < NX; i++) {
27 | sum += vecC[i];
28 | }
29 | printf("The sum is: %8.6f \n", sum);
30 | return 0;
31 | }
32 |
--------------------------------------------------------------------------------
/content/exercise/ex04/solution/ex04.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target teams distribute map(from:vecC) map(to:vecA,vecB)
21 | do i = 1, nx
22 | vecC(i) = vecA(i) * vecB(i)
23 | end do
24 | !$omp end target teams distribute
25 |
26 | sum = 0.0
27 | ! Calculate the sum
28 | !$omp target map(tofrom:sum)
29 | do i = 1, nx
30 | sum = vecC(i) + sum
31 | end do
32 | !$omp end target
33 | write(*,*) 'The sum is: ', sum
34 |
35 | end program dotproduct
36 |
--------------------------------------------------------------------------------
/content/exercise/ex04/solution/ex04.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target teams distribute map(from:vecC[0:NX]) map(to:vecA[0:NX],vecB[0:NX])
20 | for (int i = 0; i < NX; i++) {
21 | vecC[i] = vecA[i] * vecB[i];
22 | }
23 |
24 | double sum = 0.0;
25 | /* calculate the sum */
26 | #pragma omp target map(tofrom:sum)
27 | for (int i = 0; i < NX; i++) {
28 | sum += vecC[i];
29 | }
30 | printf("The sum is: %8.6f \n", sum);
31 | return 0;
32 | }
33 |
--------------------------------------------------------------------------------
/content/exercise/ex05/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/ex05/ex05.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target
21 | do i = 1, nx
22 | vecC(i) = vecA(i) * vecB(i)
23 | end do
24 | !$omp end target
25 |
26 | ! Initialization of vectors again
27 | do i = 1, nx
28 | vecA(i) = r**(i-1)
29 | vecB(i) = 1.0
30 | end do
31 |
32 | !$omp target
33 | do i = 1, nx
34 | vecC(i) = vecC(i) + vecA(i) * vecB(i)
35 | end do
36 | !$omp end target
37 |
38 | sum = 0.0
39 | ! Calculate the sum
40 | do i = 1, nx
41 | sum = vecC(i) + sum
42 | end do
43 | write(*,'(A,F18.6)') 'The sum is: ', sum
44 |
45 | end program dotproduct
46 |
--------------------------------------------------------------------------------
/content/exercise/ex05/ex05.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target
20 | for (int i = 0; i < NX; i++) {
21 | vecC[i] = vecA[i] * vecB[i];
22 | }
23 |
24 | /* Initialization of vectors again */
25 | for (int i = 0; i < NX; i++) {
26 | vecA[i] = 1.0;
27 | vecB[i] = 1.0;
28 | }
29 |
30 | #pragma omp target
31 | for (int i = 0; i < NX; i++) {
32 | vecC[i] = vecC[i] + vecA[i] * vecB[i];
33 | }
34 | double sum = 0.0;
35 | /* calculate the sum */
36 | for (int i = 0; i < NX; i++) {
37 | sum += vecC[i];
38 | }
39 | printf("The sum is: %8.6f \n", sum);
40 | return 0;
41 | }
42 |
--------------------------------------------------------------------------------
/content/exercise/ex05/solution/ex05.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | program dotproduct
4 | implicit none
5 |
6 | integer, parameter :: nx = 102400
7 | real, parameter :: r=0.2
8 |
9 | real, dimension(nx) :: vecA,vecB,vecC
10 | real :: sum
11 | integer :: i
12 |
13 | ! Initialization of vectors
14 | do i = 1, nx
15 | vecA(i) = r**(i-1)
16 | vecB(i) = 1.0
17 | end do
18 |
19 | ! Dot product of two vectors
20 | !$omp target data map(from:vecC)
21 | !$omp target map(to:vecA,vecB)
22 | do i = 1, nx
23 | vecC(i) = vecA(i) * vecB(i)
24 | end do
25 | !$omp end target
26 |
27 | ! Initialization of vectors again
28 | do i = 1, nx
29 | vecA(i) = 0.5
30 | vecB(i) = 2.0
31 | end do
32 |
33 | !$omp target map(to:vecA,vecB)
34 | do i = 1, nx
35 | vecC(i) = vecC(i) + vecA(i) * vecB(i)
36 | end do
37 | !$omp end target
38 | !$omp end target data
39 |
40 | sum = 0.0
41 | ! Calculate the sum
42 | do i = 1, nx
43 | sum = vecC(i) + sum
44 | end do
45 | write(*,'(A,F18.6)') 'The sum is: ', sum
46 |
47 | end program dotproduct
48 |
--------------------------------------------------------------------------------
/content/exercise/ex05/solution/ex05.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #include
4 | #include
5 | #define NX 102400
6 |
7 | int main(void)
8 | {
9 | double vecA[NX],vecB[NX],vecC[NX];
10 | double r=0.2;
11 |
12 | /* Initialization of vectors */
13 | for (int i = 0; i < NX; i++) {
14 | vecA[i] = pow(r, i);
15 | vecB[i] = 1.0;
16 | }
17 |
18 | /* dot product of two vectors */
19 | #pragma omp target data map(from:vecC[0:NX])
20 | {
21 | #pragma omp target map(to:vecA[0:NX],vecB[0:NX])
22 | for (int i = 0; i < NX; i++) {
23 | vecC[i] = vecA[i] * vecB[i];
24 | }
25 |
26 | /* Initialization of vectors again */
27 | for (int i = 0; i < NX; i++) {
28 | vecA[i] = 0.5;
29 | vecB[i] = 2.0;
30 | }
31 |
32 | #pragma omp target map(to:vecA[0:NX],vecB[0:NX])
33 | for (int i = 0; i < NX; i++) {
34 | vecC[i] = vecC[i] + vecA[i] * vecB[i];
35 | }
36 | }
37 | double sum = 0.0;
38 | /* calculate the sum */
39 | for (int i = 0; i < NX; i++) {
40 | sum += vecC[i];
41 | }
42 | printf("The sum is: %8.6f \n", sum);
43 | return 0;
44 | }
45 |
--------------------------------------------------------------------------------
/content/exercise/ex06/ex06.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2021 ENCCS
2 | program dotproduct
3 | implicit none
4 |
5 | integer :: x
6 |
7 | x = 0
8 | !$omp target data map(tofrom:x)
9 | ! check point 1
10 | x = 10
11 | ! check point 2
12 | !$omp target update to(x)
13 | ! check point 3
14 | !$omp end target data
15 |
16 | end program dotproduct
17 |
--------------------------------------------------------------------------------
/content/exercise/ex06/ex06.c:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2021 ENCCS */
2 | #include
3 | int main(void)
4 | {
5 | int x = 0;
6 |
7 | #pragma omp target data map(tofrom:x)
8 | {
9 | /* check point 1 */
10 | x = 10;
11 | /* check point 2 */
12 | #pragma omp target update to(x)
13 | /* check point 3 */
14 | }
15 |
16 | return 0;
17 | }
18 |
--------------------------------------------------------------------------------
/content/exercise/offloading/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/offloading/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 | COMMONDIR=../common
14 |
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 |
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 |
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 |
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 |
43 |
44 | all: $(EXE)
45 |
46 |
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 |
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 |
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 |
60 | %.o: %.cpp
61 | $(CXX) $(CCFLAGS) -c $< -o $@
62 |
63 | %.o: %.c
64 | $(CC) $(CCFLAGS) -c $< -o $@
65 |
66 | .PHONY: clean
67 | clean:
68 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 |
--------------------------------------------------------------------------------
/content/exercise/offloading/core.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main solver routines for heat equation solver
4 |
5 | #include "heat.h"
6 |
7 | // Update the temperature values using five-point stencil
8 | // Arguments:
9 | // curr: current temperature values
10 | // prev: temperature values from previous time step
11 | // a: diffusivity
12 | // dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 | // Help the compiler avoid being confused by the structs
16 | double *currdata = curr->data.data();
17 | double *prevdata = prev->data.data();
18 | int nx = curr->nx;
19 | int ny = curr->ny;
20 |
21 | // Determine the temperature field at next time step
22 | // As we have fixed boundary conditions, the outermost gridpoints
23 | // are not updated.
24 | double dx2 = prev->dx * prev->dx;
25 | double dy2 = prev->dy * prev->dy;
26 | // add the directives below for offloading
27 | for (int i = 1; i < nx + 1; i++) {
28 | for (int j = 1; j < ny + 1; j++) {
29 | int ind = i * (ny + 2) + j;
30 | int ip = (i + 1) * (ny + 2) + j;
31 | int im = (i - 1) * (ny + 2) + j;
32 | int jp = i * (ny + 2) + j + 1;
33 | int jm = i * (ny + 2) + j - 1;
34 | currdata[ind] = prevdata[ind] + a*dt*
35 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
36 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 |
14 | COMMONDIR=../../common
15 |
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=
23 | endif
24 |
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 |
34 | EXE=heat_serial
35 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
36 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
37 |
38 | all: $(EXE)
39 |
40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
41 | heat_mod.o: heat_mod.F90
42 | core.o: core.F90 heat_mod.o
43 | utilities.o: utilities.F90 heat_mod.o
44 | io.o: io.F90 heat_mod.o pngwriter_mod.o
45 | setup.o: setup.F90 heat_mod.o utilities.o io.o
46 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
47 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
48 |
49 | $(EXE): $(OBJS) $(OBJS_PNG)
50 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
51 |
52 | %.o: %.F90
53 | $(FC) $(FCFLAGS) -c $< -o $@
54 |
55 | %.o: %.c
56 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
57 |
58 | .PHONY: clean
59 | clean:
60 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o
61 |
62 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/core.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Main solver routines for heat equation solver
4 | module core
5 | use heat
6 |
7 | contains
8 |
9 | ! Update the temperature values using five-point stencil
10 | ! Arguments:
11 | ! curr (type(field)): current temperature values
12 | ! prev (type(field)): temperature values from previous time step
13 | ! a (real(dp)): diffusivity
14 | ! dt (real(dp)): time step
15 | subroutine evolve(curr, prev, a, dt)
16 |
17 | implicit none
18 |
19 | type(field),target, intent(inout) :: curr, prev
20 | real(dp) :: a, dt
21 | integer :: i, j, nx, ny
22 | real(dp) :: dx, dy
23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 |
25 | ! Help the compiler avoid being confused
26 | nx = curr%nx
27 | ny = curr%ny
28 | dx = curr%dx
29 | dy = curr%dy
30 | currdata => curr%data
31 | prevdata => prev%data
32 |
33 | ! Determine the temperature field at next time step As we have
34 | ! fixed boundary conditions, the outermost gridpoints are not
35 | ! updated.
36 |
37 | ! add the directives below for offloading
38 |
39 | do j = 1, ny
40 | do i = 1, nx
41 | currdata(i, j) = prevdata(i, j) + a * dt * &
42 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
43 | & prevdata(i+1, j)) / dx**2 + &
44 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
45 | & prevdata(i, j+1)) / dy**2)
46 | end do
47 | end do
48 | end subroutine evolve
49 |
50 | end module core
51 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Field metadata for heat equation solver
4 | module heat
5 | use iso_fortran_env, only : REAL64
6 | implicit none
7 |
8 | integer, parameter :: dp = REAL64
9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
10 |
11 | type :: field
12 | integer :: nx ! ldimension of the field
13 | integer :: ny
14 | real(dp) :: dx
15 | real(dp) :: dy
16 | real(dp), dimension(:,:), allocatable :: data
17 | end type field
18 |
19 | contains
20 | ! Initialize the field type metadata
21 | ! Arguments:
22 | ! field0 (type(field)): input field
23 | ! nx, ny, dx, dy: field dimensions and spatial step size
24 | subroutine set_field_dimensions(field0, nx, ny)
25 | implicit none
26 |
27 | type(field), intent(out) :: field0
28 | integer, intent(in) :: nx, ny
29 |
30 | field0%dx = DX
31 | field0%dy = DY
32 | field0%nx = nx
33 | field0%ny = ny
34 |
35 | end subroutine set_field_dimensions
36 |
37 | end module heat
38 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/io.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! I/O routines for heat equation solver
4 | module io
5 | use heat
6 |
7 | contains
8 |
9 | ! Output routine, saves the temperature distribution as a png image
10 | ! Arguments:
11 | ! curr (type(field)): variable with the temperature data
12 | ! iter (integer): index of the time step
13 | subroutine write_field(curr, iter)
14 |
15 | use pngwriter
16 | implicit none
17 | type(field), intent(in) :: curr
18 | integer, intent(in) :: iter
19 |
20 | character(len=85) :: filename
21 |
22 | integer :: stat
23 | real(dp), dimension(:,:), allocatable, target :: full_data
24 |
25 | allocate(full_data(curr%nx, curr%ny))
26 | ! Copy rand #0 data to the global array
27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 |
29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
30 | stat = save_png(full_data, curr%nx, curr%ny, filename)
31 | deallocate(full_data)
32 |
33 | end subroutine write_field
34 |
35 |
36 | ! Reads the temperature distribution from an input file
37 | ! Arguments:
38 | ! field0 (type(field)): field variable that will store the
39 | ! read data
40 | ! filename (char): name of the input file
41 | ! Note that this version assumes the input data to be in C memory layout
42 | subroutine read_field(field0, filename)
43 |
44 | implicit none
45 | type(field), intent(out) :: field0
46 | character(len=85), intent(in) :: filename
47 |
48 | integer :: nx, ny, i
49 | character(len=2) :: dummy
50 |
51 | real(dp), dimension(:,:), allocatable :: full_data
52 |
53 | open(10, file=filename)
54 | ! Read the header
55 | read(10, *) dummy, nx, ny
56 |
57 | call set_field_dimensions(field0, nx, ny)
58 |
59 | ! The arrays for temperature field contain also a halo region
60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 |
62 | allocate(full_data(nx, ny))
63 | ! Read the data
64 | do i = 1, nx
65 | read(10, *) full_data(i, 1:ny)
66 | end do
67 |
68 | ! Copy to full array containing also boundaries
69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 |
71 | ! Set the boundary values
72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 |
77 | close(10)
78 | deallocate(full_data)
79 |
80 | end subroutine read_field
81 |
82 | end module io
83 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/main.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Heat equation solver in 2D.
4 |
5 | program heat_solve
6 | use heat
7 | use core
8 | use io
9 | use setup
10 | use utilities
11 | use omp_lib
12 |
13 | implicit none
14 |
15 | real(dp), parameter :: a = 0.5 ! Diffusion constant
16 | type(field) :: current, previous ! Current and previus temperature fields
17 |
18 | real(dp) :: dt ! Time step
19 | integer :: nsteps ! Number of time steps
20 | integer, parameter :: image_interval = 1500 ! Image output interval
21 |
22 | integer :: iter
23 |
24 | real(dp) :: average_temp ! Average temperature
25 |
26 | real(kind=dp) :: start, stop ! Timers
27 |
28 | call initialize(current, previous, nsteps)
29 |
30 | ! Draw the picture of the initial state
31 | call write_field(current, 0)
32 |
33 | average_temp = average(current)
34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 |
36 | ! Largest stable time step
37 | dt = current%dx**2 * current%dy**2 / &
38 | & (2.0 * a * (current%dx**2 + current%dy**2))
39 |
40 | ! Main iteration loop
41 |
42 | start = omp_get_wtime()
43 |
44 | do iter = 1, nsteps
45 | call evolve(current, previous, a, dt)
46 | if (mod(iter, image_interval) == 0) then
47 | call write_field(current, iter)
48 | end if
49 | call swap_fields(current, previous)
50 | end do
51 |
52 | stop = omp_get_wtime()
53 |
54 | ! Average temperature for reference
55 | average_temp = average(previous)
56 |
57 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
58 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp
59 | if (command_argument_count() == 0) then
60 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
61 | end if
62 |
63 | call finalize(current, previous)
64 |
65 | end program heat_solve
66 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! PNG writer for heat equation solver
4 | module pngwriter
5 | use heat
6 |
7 | contains
8 |
9 | function save_png(data, nx, ny, fname) result(stat)
10 |
11 | use, intrinsic :: ISO_C_BINDING
12 | implicit none
13 |
14 | real(dp), dimension(:,:), intent(in) :: data
15 | integer, intent(in) :: nx, ny
16 | character(len=*), intent(in) :: fname
17 | integer :: stat
18 |
19 | ! Interface for save_png C-function
20 | interface
21 | ! The C-function definition is
22 | ! int save_png(double *data, const int nx, const int ny,
23 | ! const char *fname)
24 | function save_png_c(data, nx, ny, fname, order) &
25 | & bind(C,name="save_png") result(stat)
26 | use, intrinsic :: ISO_C_BINDING
27 | implicit none
28 | real(kind=C_DOUBLE) :: data(*)
29 | integer(kind=C_INT), value, intent(IN) :: nx, ny
30 | character(kind=C_CHAR), intent(IN) :: fname(*)
31 | character(kind=C_CHAR), value, intent(IN) :: order
32 | integer(kind=C_INT) :: stat
33 | end function save_png_c
34 | end interface
35 |
36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 | if (stat /= 0) then
38 | write(*,*) 'save_png returned error!'
39 | end if
40 |
41 | end function save_png
42 |
43 | end module pngwriter
44 |
--------------------------------------------------------------------------------
/content/exercise/offloading/fortran/utilities.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Utility routines for heat equation solver
4 | ! NOTE: This file does not need to be edited!
5 | module utilities
6 | use heat
7 |
8 | contains
9 |
10 | ! Swap the data fields of two variables of type field
11 | ! Arguments:
12 | ! curr, prev (type(field)): the two variables that are swapped
13 | subroutine swap_fields(curr, prev)
14 |
15 | implicit none
16 |
17 | type(field), intent(inout) :: curr, prev
18 | real(dp), allocatable, dimension(:,:) :: tmp
19 |
20 | call move_alloc(curr%data, tmp)
21 | call move_alloc(prev%data, curr%data)
22 | call move_alloc(tmp, prev%data)
23 | end subroutine swap_fields
24 |
25 | ! Copy the data from one field to another
26 | ! Arguments:
27 | ! from_field (type(field)): variable to copy from
28 | ! to_field (type(field)): variable to copy to
29 | subroutine copy_fields(from_field, to_field)
30 |
31 | implicit none
32 |
33 | type(field), intent(in) :: from_field
34 | type(field), intent(out) :: to_field
35 |
36 | ! Consistency checks
37 | if (.not.allocated(from_field%data)) then
38 | write (*,*) "Can not copy from a field without allocated data"
39 | stop
40 | end if
41 | if (.not.allocated(to_field%data)) then
42 | ! Target is not initialize, allocate memory
43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 | & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 | write (*,*) "Wrong field data sizes in copy routine"
47 | print *, shape(from_field%data), shape(to_field%data)
48 | stop
49 | end if
50 |
51 | to_field%data = from_field%data
52 |
53 | to_field%nx = from_field%nx
54 | to_field%ny = from_field%ny
55 | to_field%dx = from_field%dx
56 | to_field%dy = from_field%dy
57 | end subroutine copy_fields
58 |
59 | function average(field0)
60 |
61 | implicit none
62 |
63 | real(dp) :: average
64 | type(field) :: field0
65 |
66 | real(dp) :: local_average
67 | integer :: rc
68 |
69 | average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 | average = average / (field0%nx * field0%ny)
71 |
72 | end function average
73 |
74 | end module utilities
75 |
--------------------------------------------------------------------------------
/content/exercise/offloading/heat.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | #ifndef __HEAT_H__
4 | #define __HEAT_H__
5 |
6 | #include
7 |
8 | // Datatype for temperature field
9 | struct field {
10 | // nx and ny are the dimensions of the field. The array data
11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 | int nx;
13 | int ny;
14 | // Size of the grid cells
15 | double dx;
16 | double dy;
17 | // The temperature values in the 2D grid
18 | std::vector data;
19 | };
20 |
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 |
25 | #if __cplusplus
26 | extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 |
31 | void initialize(int argc, char *argv[], field *temperature1,
32 | field *temperature2, int *nsteps);
33 |
34 | void generate_field(field *temperature);
35 |
36 | double average(field *temperature);
37 |
38 | void evolve(field *curr, field *prev, double a, double dt);
39 |
40 | void write_field(field *temperature, int iter);
41 |
42 | void read_field(field *temperature1, field *temperature2,
43 | char *filename);
44 |
45 | void copy_field(field *temperature1, field *temperature2);
46 |
47 | void swap_fields(field *temperature1, field *temperature2);
48 |
49 | void allocate_field(field *temperature);
50 |
51 | #if __cplusplus
52 | }
53 | #endif
54 | #endif // __HEAT_H__
55 |
56 |
--------------------------------------------------------------------------------
/content/exercise/offloading/io.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // I/O related functions for heat equation solver
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "heat.h"
11 | #include "pngwriter.h"
12 |
13 | // Output routine that prints out a picture of the temperature
14 | // distribution.
15 | void write_field(field *temperature, int iter)
16 | {
17 | char filename[64];
18 |
19 | // The actual write routine takes only the actual data
20 | // (without boundary layers) so we need to copy an array with that.
21 | std::vector inner_data(temperature->nx * temperature->ny);
22 | auto inner_data_iterator = inner_data.begin();
23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
24 | for (int i = 0; i < temperature->nx; i++) {
25 | auto end_of_row = beginning_of_row + temperature->ny;
26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator);
27 | inner_data_iterator += temperature->ny;
28 | beginning_of_row = end_of_row + 2;
29 | }
30 |
31 | // Write out the data to a png file
32 | sprintf(filename, "%s_%04d.png", "heat", iter);
33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
34 | }
35 |
36 | // Read the initial temperature distribution from a file and
37 | // initialize the temperature fields temperature1 and
38 | // temperature2 to the same initial state.
39 | void read_field(field *temperature1, field *temperature2, char *filename)
40 | {
41 | FILE *fp;
42 | int nx, ny, ind;
43 |
44 | int nx_local, ny_local, count;
45 |
46 | fp = fopen(filename, "r");
47 | // Read the header
48 | count = fscanf(fp, "# %d %d \n", &nx, &ny);
49 | if (count < 2) {
50 | fprintf(stderr, "Error while reading the input file!\n");
51 | exit(-1);
52 | }
53 |
54 | set_field_dimensions(temperature1, nx, ny);
55 | set_field_dimensions(temperature2, nx, ny);
56 |
57 | // Allocate arrays (including boundary layers)
58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
59 | temperature1->data.resize(newSize, 0.0);
60 | temperature2->data.resize(newSize, 0.0);
61 |
62 | // Array from file
63 | std::vector file_data(nx * ny, 0.0);
64 |
65 | // Read the actual data
66 | for (int i = 0; i < nx; i++) {
67 | for (int j = 0; j < ny; j++) {
68 | ind = i * ny + j;
69 | count = fscanf(fp, "%lf", &file_data[ind]);
70 | }
71 | }
72 |
73 | nx_local = temperature1->nx;
74 | ny_local = temperature1->ny;
75 |
76 | // Copy to the inner part of the full temperature field
77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
78 | auto beginning_of_row = file_data.begin();
79 | for (int i = 0; i < nx_local; i++) {
80 | auto end_of_row = beginning_of_row + ny_local;
81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
82 | temperature_data_iterator += ny_local + 2;
83 | beginning_of_row = end_of_row;
84 | }
85 |
86 | // Set the boundary values
87 | for (int i = 1; i < nx_local + 1; i++) {
88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
90 | }
91 | for (int j = 0; j < ny + 2; j++) {
92 | temperature1->data[j] = temperature1->data[ny_local + j];
93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
94 | temperature1->data[nx_local * (ny_local + 2) + j];
95 | }
96 |
97 | copy_field(temperature1, temperature2);
98 |
99 | fclose(fp);
100 | }
101 |
--------------------------------------------------------------------------------
/content/exercise/offloading/main.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main routine for heat equation solver in 2D.
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 | int main(int argc, char **argv)
11 | {
12 | // Image output interval
13 | int image_interval = 1500;
14 |
15 | // Number of time steps
16 | int nsteps;
17 | // Current and previous temperature fields
18 | field current, previous;
19 | initialize(argc, argv, ¤t, &previous, &nsteps);
20 |
21 | // Output the initial field
22 | write_field(¤t, 0);
23 |
24 | double average_temp = average(¤t);
25 | printf("Average temperature at start: %f\n", average_temp);
26 |
27 | // Diffusion constant
28 | double a = 0.5;
29 |
30 | // Compute the largest stable time step
31 | double dx2 = current.dx * current.dx;
32 | double dy2 = current.dy * current.dy;
33 | // Time step
34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 |
36 | // Get the start time stamp
37 | double start_clock = omp_get_wtime();
38 |
39 | // Time evolution
40 | for (int iter = 1; iter <= nsteps; iter++) {
41 | evolve(¤t, &previous, a, dt);
42 | if (iter % image_interval == 0) {
43 | write_field(¤t, iter);
44 | }
45 | // Swap current field so that it will be used
46 | // as previous for next iteration step
47 | swap_fields(¤t, &previous);
48 | }
49 |
50 | double stop_clock = omp_get_wtime();
51 |
52 | // Average temperature for reference
53 | average_temp = average(&previous);
54 |
55 | // Determine the CPU time used for all the iterations
56 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
57 | printf("Average temperature: %f\n", average_temp);
58 | if (argc == 1) {
59 | printf("Reference value with default arguments: 59.281239\n");
60 | }
61 |
62 | // Output the final field
63 | write_field(&previous, nsteps);
64 |
65 | return 0;
66 | }
67 |
--------------------------------------------------------------------------------
/content/exercise/offloading/utilities.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Utility functions for heat equation solver
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 |
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 | assert(temperature1->nx == temperature2->nx);
15 | assert(temperature1->ny == temperature2->ny);
16 | assert(temperature1->data.size() == temperature2->data.size());
17 | std::copy(temperature1->data.begin(), temperature1->data.end(),
18 | temperature2->data.begin());
19 | }
20 |
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 | std::swap(temperature1->data, temperature2->data);
25 | }
26 |
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 | // Include also boundary layers
31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 | temperature->data.resize(newSize, 0.0);
33 | }
34 |
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 | double average = 0.0;
39 |
40 | for (int i = 1; i < temperature->nx + 1; i++) {
41 | for (int j = 1; j < temperature->ny + 1; j++) {
42 | int ind = i * (temperature->ny + 2) + j;
43 | average += temperature->data[ind];
44 | }
45 | }
46 |
47 | average /= (temperature->nx * temperature->ny);
48 | return average;
49 | }
50 |
51 |
52 |
--------------------------------------------------------------------------------
/content/exercise/serial/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/serial/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 | COMMONDIR=../common
14 |
15 | ifeq ($(COMP),pgi)
16 | CXX=pgCC
17 | CC=pgcc
18 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 |
23 | ifeq ($(COMP),gnu)
24 | CXX=g++
25 | CC=gcc
26 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 |
31 | ifeq ($(COMP),nv)
32 | CXX=nvc++
33 | CC=nvc
34 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 |
39 | ifeq ($(COMP),intel)
40 | CXX=icpx
41 | CC=icx
42 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
43 | LDFLAGS=
44 | LIBS=
45 | endif
46 |
47 | EXE=heat_serial
48 | OBJS=main.o core.o setup.o utilities.o io.o
49 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
50 |
51 |
52 | all: $(EXE)
53 |
54 |
55 | core.o: core.cpp heat.h
56 | utilities.o: utilities.cpp heat.h
57 | setup.o: setup.cpp heat.h
58 | io.o: io.cpp heat.h
59 | main.o: main.cpp heat.h
60 |
61 | $(OBJS_PNG): C_COMPILER := $(CC)
62 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
63 | $(OBJS): C_COMPILER := $(CXX)
64 |
65 | $(EXE): $(OBJS) $(OBJS_PNG)
66 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
67 |
68 | %.o: %.cpp
69 | $(CXX) $(CCFLAGS) -c $< -o $@
70 |
71 | %.o: %.c
72 | $(CC) $(CCFLAGS) -c $< -o $@
73 |
74 | .PHONY: clean
75 | clean:
76 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
77 |
--------------------------------------------------------------------------------
/content/exercise/serial/core.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main solver routines for heat equation solver
4 |
5 | #include "heat.h"
6 |
7 | // Update the temperature values using five-point stencil
8 | // Arguments:
9 | // curr: current temperature values
10 | // prev: temperature values from previous time step
11 | // a: diffusivity
12 | // dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 | // Help the compiler avoid being confused by the structs
16 | double *currdata = curr->data.data();
17 | double *prevdata = prev->data.data();
18 | int nx = curr->nx;
19 | int ny = curr->ny;
20 |
21 | // Determine the temperature field at next time step
22 | // As we have fixed boundary conditions, the outermost gridpoints
23 | // are not updated.
24 | double dx2 = prev->dx * prev->dx;
25 | double dy2 = prev->dy * prev->dy;
26 | for (int i = 1; i < nx + 1; i++) {
27 | for (int j = 1; j < ny + 1; j++) {
28 | int ind = i * (ny + 2) + j;
29 | int ip = (i + 1) * (ny + 2) + j;
30 | int im = (i - 1) * (ny + 2) + j;
31 | int jp = i * (ny + 2) + j + 1;
32 | int jm = i * (ny + 2) + j - 1;
33 | currdata[ind] = prevdata[ind] + a*dt*
34 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
35 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 |
14 | COMMONDIR=../../common
15 |
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=multicore -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=
23 | endif
24 |
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 |
34 |
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 |
39 | all: $(EXE)
40 |
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 |
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 |
53 | %.o: %.F90
54 | $(FC) $(FCFLAGS) -c $< -o $@
55 |
56 | %.o: %.c
57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include $(CCFLAGS) -c $< -o $@
58 |
59 | .PHONY: clean
60 | clean:
61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o
62 |
63 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/core.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Main solver routines for heat equation solver
4 | module core
5 | use heat
6 |
7 | contains
8 |
9 | ! Update the temperature values using five-point stencil
10 | ! Arguments:
11 | ! curr (type(field)): current temperature values
12 | ! prev (type(field)): temperature values from previous time step
13 | ! a (real(dp)): diffusivity
14 | ! dt (real(dp)): time step
15 | subroutine evolve(curr, prev, a, dt)
16 |
17 | implicit none
18 |
19 | type(field), target, intent(inout) :: curr, prev
20 | real(dp) :: a, dt
21 | integer :: i, j, nx, ny
22 | real(dp) :: dx, dy
23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 |
25 | ! Help the compiler avoid being confused
26 | nx = curr%nx
27 | ny = curr%ny
28 | dx = curr%dx
29 | dy = curr%dy
30 | currdata => curr%data
31 | prevdata => prev%data
32 |
33 | ! Determine the temperature field at next time step As we have
34 | ! fixed boundary conditions, the outermost gridpoints are not
35 | ! updated.
36 | do j = 1, ny
37 | do i = 1, nx
38 | currdata(i, j) = prevdata(i, j) + a * dt * &
39 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
40 | & prevdata(i+1, j)) / dx**2 + &
41 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
42 | & prevdata(i, j+1)) / dy**2)
43 | end do
44 | end do
45 | end subroutine evolve
46 |
47 | end module core
48 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Field metadata for heat equation solver
4 | module heat
5 | use iso_fortran_env, only : REAL64
6 | implicit none
7 |
8 | integer, parameter :: dp = REAL64
9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
10 |
11 | type :: field
12 | integer :: nx ! ldimension of the field
13 | integer :: ny
14 | real(dp) :: dx
15 | real(dp) :: dy
16 | real(dp), dimension(:,:), allocatable :: data
17 | end type field
18 |
19 | contains
20 | ! Initialize the field type metadata
21 | ! Arguments:
22 | ! field0 (type(field)): input field
23 | ! nx, ny, dx, dy: field dimensions and spatial step size
24 | subroutine set_field_dimensions(field0, nx, ny)
25 | implicit none
26 |
27 | type(field), intent(out) :: field0
28 | integer, intent(in) :: nx, ny
29 |
30 | field0%dx = DX
31 | field0%dy = DY
32 | field0%nx = nx
33 | field0%ny = ny
34 |
35 | end subroutine set_field_dimensions
36 |
37 | end module heat
38 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/io.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! I/O routines for heat equation solver
4 | module io
5 | use heat
6 |
7 | contains
8 |
9 | ! Output routine, saves the temperature distribution as a png image
10 | ! Arguments:
11 | ! curr (type(field)): variable with the temperature data
12 | ! iter (integer): index of the time step
13 | subroutine write_field(curr, iter)
14 |
15 | use pngwriter
16 | implicit none
17 | type(field), intent(in) :: curr
18 | integer, intent(in) :: iter
19 |
20 | character(len=85) :: filename
21 |
22 | integer :: stat
23 | real(dp), dimension(:,:), allocatable, target :: full_data
24 |
25 | allocate(full_data(curr%nx, curr%ny))
26 | ! Copy rand #0 data to the global array
27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 |
29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
30 | stat = save_png(full_data, curr%nx, curr%ny, filename)
31 | deallocate(full_data)
32 |
33 | end subroutine write_field
34 |
35 |
36 | ! Reads the temperature distribution from an input file
37 | ! Arguments:
38 | ! field0 (type(field)): field variable that will store the
39 | ! read data
40 | ! filename (char): name of the input file
41 | ! Note that this version assumes the input data to be in C memory layout
42 | subroutine read_field(field0, filename)
43 |
44 | implicit none
45 | type(field), intent(out) :: field0
46 | character(len=85), intent(in) :: filename
47 |
48 | integer :: nx, ny, i
49 | character(len=2) :: dummy
50 |
51 | real(dp), dimension(:,:), allocatable :: full_data
52 |
53 | open(10, file=filename)
54 | ! Read the header
55 | read(10, *) dummy, nx, ny
56 |
57 | call set_field_dimensions(field0, nx, ny)
58 |
59 | ! The arrays for temperature field contain also a halo region
60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 |
62 | allocate(full_data(nx, ny))
63 | ! Read the data
64 | do i = 1, nx
65 | read(10, *) full_data(i, 1:ny)
66 | end do
67 |
68 | ! Copy to full array containing also boundaries
69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 |
71 | ! Set the boundary values
72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 |
77 | close(10)
78 | deallocate(full_data)
79 |
80 | end subroutine read_field
81 |
82 | end module io
83 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/main.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Heat equation solver in 2D.
4 |
5 | program heat_solve
6 | use heat
7 | use core
8 | use io
9 | use setup
10 | use utilities
11 | use omp_lib
12 |
13 | implicit none
14 |
15 | real(dp), parameter :: a = 0.5 ! Diffusion constant
16 | type(field) :: current, previous ! Current and previus temperature fields
17 |
18 | real(dp) :: dt ! Time step
19 | integer :: nsteps ! Number of time steps
20 | integer, parameter :: image_interval = 1500 ! Image output interval
21 |
22 | integer :: iter
23 |
24 | real(dp) :: average_temp ! Average temperature
25 |
26 | real(kind=dp) :: start, stop ! Timers
27 |
28 | call initialize(current, previous, nsteps)
29 |
30 | ! Draw the picture of the initial state
31 | call write_field(current, 0)
32 |
33 | average_temp = average(current)
34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 |
36 | ! Largest stable time step
37 | dt = current%dx**2 * current%dy**2 / &
38 | & (2.0 * a * (current%dx**2 + current%dy**2))
39 |
40 | ! Main iteration loop
41 |
42 | start = omp_get_wtime()
43 |
44 | do iter = 1, nsteps
45 | call evolve(current, previous, a, dt)
46 | if (mod(iter, image_interval) == 0) then
47 | call write_field(current, iter)
48 | end if
49 | call swap_fields(current, previous)
50 | end do
51 |
52 | stop = omp_get_wtime()
53 |
54 | ! Average temperature for reference
55 | average_temp = average(previous)
56 |
57 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
58 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp
59 | if (command_argument_count() == 0) then
60 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
61 | end if
62 |
63 | call finalize(current, previous)
64 |
65 | end program heat_solve
66 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! PNG writer for heat equation solver
4 | module pngwriter
5 | use heat
6 |
7 | contains
8 |
9 | function save_png(data, nx, ny, fname) result(stat)
10 |
11 | use, intrinsic :: ISO_C_BINDING
12 | implicit none
13 |
14 | real(dp), dimension(:,:), intent(in) :: data
15 | integer, intent(in) :: nx, ny
16 | character(len=*), intent(in) :: fname
17 | integer :: stat
18 |
19 | ! Interface for save_png C-function
20 | interface
21 | ! The C-function definition is
22 | ! int save_png(double *data, const int nx, const int ny,
23 | ! const char *fname)
24 | function save_png_c(data, nx, ny, fname, order) &
25 | & bind(C,name="save_png") result(stat)
26 | use, intrinsic :: ISO_C_BINDING
27 | implicit none
28 | real(kind=C_DOUBLE) :: data(*)
29 | integer(kind=C_INT), value, intent(IN) :: nx, ny
30 | character(kind=C_CHAR), intent(IN) :: fname(*)
31 | character(kind=C_CHAR), value, intent(IN) :: order
32 | integer(kind=C_INT) :: stat
33 | end function save_png_c
34 | end interface
35 |
36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 | if (stat /= 0) then
38 | write(*,*) 'save_png returned error!'
39 | end if
40 |
41 | end function save_png
42 |
43 | end module pngwriter
44 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/setup.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Setup routines for heat equation solver
4 | module setup
5 | use heat
6 |
7 | contains
8 |
9 | subroutine initialize(previous, current, nsteps)
10 | use utilities
11 | use io
12 |
13 | implicit none
14 |
15 | type(field), intent(out) :: previous, current
16 | integer, intent(out) :: nsteps
17 |
18 | integer :: rows, cols
19 | logical :: using_input_file
20 | character(len=85) :: input_file, arg ! Input file name and command line arguments
21 |
22 |
23 | ! Default values for grid size and time steps
24 | rows = 2000
25 | cols = 2000
26 | nsteps = 500
27 | using_input_file = .false.
28 |
29 | ! Read in the command line arguments and
30 | ! set up the needed variables
31 | select case(command_argument_count())
32 | case(0) ! No arguments -> default values
33 | case(1) ! One argument -> input file name
34 | using_input_file = .true.
35 | call get_command_argument(1, input_file)
36 | case(2) ! Two arguments -> input file name and number of steps
37 | using_input_file = .true.
38 | call get_command_argument(1, input_file)
39 | call get_command_argument(2, arg)
40 | read(arg, *) nsteps
41 | case(3) ! Three arguments -> rows, cols and nsteps
42 | call get_command_argument(1, arg)
43 | read(arg, *) rows
44 | call get_command_argument(2, arg)
45 | read(arg, *) cols
46 | call get_command_argument(3, arg)
47 | read(arg, *) nsteps
48 | case default
49 | call usage()
50 | stop
51 | end select
52 |
53 | ! Initialize the fields according the command line arguments
54 | if (using_input_file) then
55 | call read_field(previous, input_file)
56 | call copy_fields(previous, current)
57 | else
58 | call set_field_dimensions(previous, rows, cols)
59 | call set_field_dimensions(current, rows, cols)
60 | call generate_field(previous)
61 | call copy_fields(previous, current)
62 | end if
63 |
64 | end subroutine initialize
65 |
66 | ! Generate initial the temperature field. Pattern is disc with a radius
67 | ! of nx / 6 in the center of the grid.
68 | ! Boundary conditions are (different) constant temperatures outside the grid
69 | subroutine generate_field(field0)
70 | use heat
71 |
72 | implicit none
73 |
74 | type(field), intent(inout) :: field0
75 |
76 | real(dp) :: radius2
77 | integer :: i, j, ds2
78 |
79 | ! The arrays for field contain also a halo region
80 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
81 |
82 | ! Square of the disk radius
83 | radius2 = (field0%nx / 6.0_dp)**2
84 |
85 | do j = 0, field0%ny + 1
86 | do i = 0, field0%nx + 1
87 | ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + &
88 | & (j - field0%ny / 2.0_dp + 1)**2)
89 | if (ds2 < radius2) then
90 | field0%data(i,j) = 5.0_dp
91 | else
92 | field0%data(i,j) = 65.0_dp
93 | end if
94 | end do
95 | end do
96 |
97 | ! Boundary conditions
98 | field0%data(:,0) = 20.0_dp
99 | field0%data(:,field0%ny+1) = 70.0_dp
100 | field0%data(0,:) = 85.0_dp
101 | field0%data(field0%nx+1,:) = 5.0_dp
102 |
103 | end subroutine generate_field
104 |
105 |
106 | ! Clean up routine for field type
107 | ! Arguments:
108 | ! field0 (type(field)): field variable to be cleared
109 | subroutine finalize(field0, field1)
110 | use heat
111 |
112 | implicit none
113 |
114 | type(field), intent(inout) :: field0, field1
115 |
116 | deallocate(field0%data)
117 | deallocate(field1%data)
118 |
119 | end subroutine finalize
120 |
121 | ! Helper routine that prints out a simple usage if
122 | ! user gives more than three arguments
123 | subroutine usage()
124 | implicit none
125 | character(len=256) :: buf
126 |
127 | call get_command_argument(0, buf)
128 | write (*,'(A)') 'Usage:'
129 | write (*,'(A, " (default values will be used)")') trim(buf)
130 | write (*,'(A, " ")') trim(buf)
131 | write (*,'(A, " ")') trim(buf)
132 | write (*,'(A, " ")') trim(buf)
133 | end subroutine usage
134 |
135 | end module setup
136 |
--------------------------------------------------------------------------------
/content/exercise/serial/fortran/utilities.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Utility routines for heat equation solver
4 | ! NOTE: This file does not need to be edited!
5 | module utilities
6 | use heat
7 |
8 | contains
9 |
10 | ! Swap the data fields of two variables of type field
11 | ! Arguments:
12 | ! curr, prev (type(field)): the two variables that are swapped
13 | subroutine swap_fields(curr, prev)
14 |
15 | implicit none
16 |
17 | type(field), intent(inout) :: curr, prev
18 | real(dp), allocatable, dimension(:,:) :: tmp
19 |
20 | call move_alloc(curr%data, tmp)
21 | call move_alloc(prev%data, curr%data)
22 | call move_alloc(tmp, prev%data)
23 | end subroutine swap_fields
24 |
25 | ! Copy the data from one field to another
26 | ! Arguments:
27 | ! from_field (type(field)): variable to copy from
28 | ! to_field (type(field)): variable to copy to
29 | subroutine copy_fields(from_field, to_field)
30 |
31 | implicit none
32 |
33 | type(field), intent(in) :: from_field
34 | type(field), intent(out) :: to_field
35 |
36 | ! Consistency checks
37 | if (.not.allocated(from_field%data)) then
38 | write (*,*) "Can not copy from a field without allocated data"
39 | stop
40 | end if
41 | if (.not.allocated(to_field%data)) then
42 | ! Target is not initialize, allocate memory
43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 | & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 | write (*,*) "Wrong field data sizes in copy routine"
47 | print *, shape(from_field%data), shape(to_field%data)
48 | stop
49 | end if
50 |
51 | to_field%data = from_field%data
52 |
53 | to_field%nx = from_field%nx
54 | to_field%ny = from_field%ny
55 | to_field%dx = from_field%dx
56 | to_field%dy = from_field%dy
57 | end subroutine copy_fields
58 |
59 | function average(field0)
60 |
61 | implicit none
62 |
63 | real(dp) :: average
64 | type(field) :: field0
65 |
66 | real(dp) :: local_average
67 | integer :: rc
68 |
69 | average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 | average = average / (field0%nx * field0%ny)
71 |
72 | end function average
73 |
74 | end module utilities
75 |
--------------------------------------------------------------------------------
/content/exercise/serial/heat.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | #ifndef __HEAT_H__
4 | #define __HEAT_H__
5 |
6 | #include
7 |
8 | // Datatype for temperature field
9 | struct field {
10 | // nx and ny are the dimensions of the field. The array data
11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 | int nx;
13 | int ny;
14 | // Size of the grid cells
15 | double dx;
16 | double dy;
17 | // The temperature values in the 2D grid
18 | std::vector data;
19 | };
20 |
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 |
25 | #if __cplusplus
26 | extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 |
31 | void initialize(int argc, char *argv[], field *temperature1,
32 | field *temperature2, int *nsteps);
33 |
34 | void generate_field(field *temperature);
35 |
36 | double average(field *temperature);
37 |
38 | void evolve(field *curr, field *prev, double a, double dt);
39 |
40 | void write_field(field *temperature, int iter);
41 |
42 | void read_field(field *temperature1, field *temperature2,
43 | char *filename);
44 |
45 | void copy_field(field *temperature1, field *temperature2);
46 |
47 | void swap_fields(field *temperature1, field *temperature2);
48 |
49 | void allocate_field(field *temperature);
50 |
51 | #if __cplusplus
52 | }
53 | #endif
54 | #endif // __HEAT_H__
55 |
56 |
--------------------------------------------------------------------------------
/content/exercise/serial/heat_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/serial/heat_0000.png
--------------------------------------------------------------------------------
/content/exercise/serial/heat_0010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/serial/heat_0010.png
--------------------------------------------------------------------------------
/content/exercise/serial/io.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // I/O related functions for heat equation solver
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "heat.h"
11 | #include "pngwriter.h"
12 |
13 | // Output routine that prints out a picture of the temperature
14 | // distribution.
15 | void write_field(field *temperature, int iter)
16 | {
17 | char filename[64];
18 |
19 | // The actual write routine takes only the actual data
20 | // (without boundary layers) so we need to copy an array with that.
21 | std::vector inner_data(temperature->nx * temperature->ny);
22 | auto inner_data_iterator = inner_data.begin();
23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
24 | for (int i = 0; i < temperature->nx; i++) {
25 | auto end_of_row = beginning_of_row + temperature->ny;
26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator);
27 | inner_data_iterator += temperature->ny;
28 | beginning_of_row = end_of_row + 2;
29 | }
30 |
31 | // Write out the data to a png file
32 | sprintf(filename, "%s_%04d.png", "heat", iter);
33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
34 | }
35 |
36 | // Read the initial temperature distribution from a file and
37 | // initialize the temperature fields temperature1 and
38 | // temperature2 to the same initial state.
39 | void read_field(field *temperature1, field *temperature2, char *filename)
40 | {
41 | FILE *fp;
42 | int nx, ny, ind;
43 |
44 | int nx_local, ny_local, count;
45 |
46 | fp = fopen(filename, "r");
47 | // Read the header
48 | count = fscanf(fp, "# %d %d \n", &nx, &ny);
49 | if (count < 2) {
50 | fprintf(stderr, "Error while reading the input file!\n");
51 | exit(-1);
52 | }
53 |
54 | set_field_dimensions(temperature1, nx, ny);
55 | set_field_dimensions(temperature2, nx, ny);
56 |
57 | // Allocate arrays (including boundary layers)
58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
59 | temperature1->data.resize(newSize, 0.0);
60 | temperature2->data.resize(newSize, 0.0);
61 |
62 | // Array from file
63 | std::vector file_data(nx * ny, 0.0);
64 |
65 | // Read the actual data
66 | for (int i = 0; i < nx; i++) {
67 | for (int j = 0; j < ny; j++) {
68 | ind = i * ny + j;
69 | count = fscanf(fp, "%lf", &file_data[ind]);
70 | }
71 | }
72 |
73 | nx_local = temperature1->nx;
74 | ny_local = temperature1->ny;
75 |
76 | // Copy to the inner part of the full temperature field
77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
78 | auto beginning_of_row = file_data.begin();
79 | for (int i = 0; i < nx_local; i++) {
80 | auto end_of_row = beginning_of_row + ny_local;
81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
82 | temperature_data_iterator += ny_local + 2;
83 | beginning_of_row = end_of_row;
84 | }
85 |
86 | // Set the boundary values
87 | for (int i = 1; i < nx_local + 1; i++) {
88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
90 | }
91 | for (int j = 0; j < ny + 2; j++) {
92 | temperature1->data[j] = temperature1->data[ny_local + j];
93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
94 | temperature1->data[nx_local * (ny_local + 2) + j];
95 | }
96 |
97 | copy_field(temperature1, temperature2);
98 |
99 | fclose(fp);
100 | }
101 |
--------------------------------------------------------------------------------
/content/exercise/serial/main.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main routine for heat equation solver in 2D.
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 | int main(int argc, char **argv)
11 | {
12 | // Image output interval
13 | int image_interval = 1500;
14 |
15 | // Number of time steps
16 | int nsteps;
17 | // Current and previous temperature fields
18 | field current, previous;
19 | initialize(argc, argv, ¤t, &previous, &nsteps);
20 |
21 | // Output the initial field
22 | write_field(¤t, 0);
23 |
24 | double average_temp = average(¤t);
25 | printf("Average temperature at start: %f\n", average_temp);
26 |
27 | // Diffusion constant
28 | double a = 0.5;
29 |
30 | // Compute the largest stable time step
31 | double dx2 = current.dx * current.dx;
32 | double dy2 = current.dy * current.dy;
33 | // Time step
34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 |
36 | // Get the start time stamp
37 | double start_clock = omp_get_wtime();
38 |
39 | // Time evolution
40 | for (int iter = 1; iter <= nsteps; iter++) {
41 | evolve(¤t, &previous, a, dt);
42 | if (iter % image_interval == 0) {
43 | write_field(¤t, iter);
44 | }
45 | // Swap current field so that it will be used
46 | // as previous for next iteration step
47 | swap_fields(¤t, &previous);
48 | }
49 |
50 | double stop_clock = omp_get_wtime();
51 |
52 | // Average temperature for reference
53 | average_temp = average(&previous);
54 |
55 | // Determine the CPU time used for all the iterations
56 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
57 | printf("Average temperature: %f\n", average_temp);
58 | if (argc == 1) {
59 | printf("Reference value with default arguments: 59.281239\n");
60 | }
61 |
62 | // Output the final field
63 | write_field(&previous, nsteps);
64 |
65 | return 0;
66 | }
67 |
--------------------------------------------------------------------------------
/content/exercise/serial/utilities.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Utility functions for heat equation solver
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 |
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 | assert(temperature1->nx == temperature2->nx);
15 | assert(temperature1->ny == temperature2->ny);
16 | assert(temperature1->data.size() == temperature2->data.size());
17 | std::copy(temperature1->data.begin(), temperature1->data.end(),
18 | temperature2->data.begin());
19 | }
20 |
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 | std::swap(temperature1->data, temperature2->data);
25 | }
26 |
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 | // Include also boundary layers
31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 | temperature->data.resize(newSize, 0.0);
33 | }
34 |
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 | double average = 0.0;
39 |
40 | for (int i = 1; i < temperature->nx + 1; i++) {
41 | for (int j = 1; j < temperature->ny + 1; j++) {
42 | int ind = i * (temperature->ny + 2) + j;
43 | average += temperature->data[ind];
44 | }
45 | }
46 |
47 | average /= (temperature->nx * temperature->ny);
48 | return average;
49 | }
50 |
51 |
52 |
--------------------------------------------------------------------------------
/content/exercise/solution/common/pngwriter.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2019 CSC Training */
2 | /* Copyright (c) 2021 ENCCS */
3 | #ifndef PNGWRITER_H_
4 | #define PNGWRITER_H_
5 |
6 | #if __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | int save_png(double *data, const int nx, const int ny, const char *fname,
11 | const char lang);
12 |
13 | #if __cplusplus
14 | }
15 | #endif
16 | #endif
17 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 | COMMONDIR=../common
14 |
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 |
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 |
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 |
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 |
43 |
44 | all: $(EXE)
45 |
46 |
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 |
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 |
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 |
60 | %.o: %.cpp
61 | $(CXX) $(CCFLAGS) -c $< -o $@
62 |
63 | %.o: %.c
64 | $(CC) $(CCFLAGS) -c $< -o $@
65 |
66 | .PHONY: clean
67 | clean:
68 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/core.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main solver routines for heat equation solver
4 |
5 | #include "heat.h"
6 |
7 | // Update the temperature values using five-point stencil
8 | // Arguments:
9 | // curr: current temperature values
10 | // prev: temperature values from previous time step
11 | // a: diffusivity
12 | // dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 | // Help the compiler avoid being confused by the structs
16 | double *currdata = curr->data.data();
17 | double *prevdata = prev->data.data();
18 | int nx = curr->nx;
19 | int ny = curr->ny;
20 |
21 | // Determine the temperature field at next time step
22 | // As we have fixed boundary conditions, the outermost gridpoints
23 | // are not updated.
24 | double dx2 = prev->dx * prev->dx;
25 | double dy2 = prev->dy * prev->dy;
26 | #pragma omp target teams distribute parallel for
27 | for (int i = 1; i < nx + 1; i++) {
28 | for (int j = 1; j < ny + 1; j++) {
29 | int ind = i * (ny + 2) + j;
30 | int ip = (i + 1) * (ny + 2) + j;
31 | int im = (i - 1) * (ny + 2) + j;
32 | int jp = i * (ny + 2) + j + 1;
33 | int jm = i * (ny + 2) + j - 1;
34 | currdata[ind] = prevdata[ind] + a*dt*
35 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
36 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
37 | }
38 | }
39 | }
40 |
41 | // Start a data region and copy temperature fields to the device
42 | void enter_data(field *curr, field *prev)
43 | {
44 | int nx, ny;
45 | double *currdata, *prevdata;
46 |
47 | currdata = curr->data.data();
48 | prevdata = prev->data.data();
49 | nx = curr->nx;
50 | ny = curr->ny;
51 |
52 | // adding data mapping here
53 | #pragma omp target enter data \
54 | map(to: currdata[0:(nx+2)*(ny+2)], prevdata[0:(nx+2)*(ny+2)])
55 | }
56 |
57 | // End a data region and copy temperature fields back to the host
58 | void exit_data(field *curr, field *prev)
59 | {
60 | int nx, ny;
61 | double *currdata, *prevdata;
62 |
63 | currdata = curr->data.data();
64 | prevdata = prev->data.data();
65 | nx = curr->nx;
66 | ny = curr->ny;
67 |
68 | // adding data mapping here
69 | #pragma omp target exit data \
70 | map(from: currdata[0:(nx+2)*(ny+2)], prevdata[0:(nx+2)*(ny+2)])
71 | }
72 |
73 | // Copy a temperature field from the device to the host
74 | void update_host(field *temperature)
75 | {
76 | int nx, ny;
77 | double *data;
78 |
79 | data = temperature->data.data();
80 | nx = temperature->nx;
81 | ny = temperature->ny;
82 |
83 | // adding data mapping here
84 | #pragma omp target update from(data[0:(nx+2)*(ny+2)])
85 | }
86 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 |
14 | COMMONDIR=../../common
15 |
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=
23 | endif
24 |
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 |
34 |
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 |
39 | all: $(EXE)
40 |
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 |
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 |
53 | %.o: %.F90
54 | $(FC) $(FCFLAGS) -c $< -o $@
55 |
56 | %.o: %.c
57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
58 |
59 | .PHONY: clean
60 | clean:
61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o
62 |
63 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/core.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Main solver routines for heat equation solver
4 | module core
5 | use heat
6 |
7 | contains
8 |
9 | ! Update the temperature values using five-point stencil
10 | ! Arguments:
11 | ! curr (type(field)): current temperature values
12 | ! prev (type(field)): temperature values from previous time step
13 | ! a (real(dp)): diffusivity
14 | ! dt (real(dp)): time step
15 | subroutine evolve(curr, prev, a, dt)
16 |
17 | implicit none
18 |
19 | type(field),target, intent(inout) :: curr, prev
20 | real(dp) :: a, dt
21 | integer :: i, j, nx, ny
22 | real(dp) :: dx, dy
23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 |
25 | ! Help the compiler avoid being confused
26 | nx = curr%nx
27 | ny = curr%ny
28 | dx = curr%dx
29 | dy = curr%dy
30 | currdata => curr%data
31 | prevdata => prev%data
32 |
33 | ! Determine the temperature field at next time step As we have
34 | ! fixed boundary conditions, the outermost gridpoints are not
35 | ! updated.
36 | !$omp target teams distribute parallel do
37 | do j = 1, ny
38 | do i = 1, nx
39 | currdata(i, j) = prevdata(i, j) + a * dt * &
40 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
41 | & prevdata(i+1, j)) / dx**2 + &
42 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
43 | & prevdata(i, j+1)) / dy**2)
44 | end do
45 | end do
46 | !$omp end target teams distribute parallel do
47 | end subroutine evolve
48 |
49 | ! Start a data region and copy temperature fields to the device
50 | ! curr (type(field)): current temperature values
51 | ! prev (type(field)): values from previous time step
52 | subroutine enter_data(curr, prev)
53 | implicit none
54 | type(field), target, intent(in) :: curr, prev
55 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
56 |
57 | currdata => curr%data
58 | prevdata => prev%data
59 |
60 | ! adding data mapping here
61 | !$omp target enter data map(to: currdata, prevdata)
62 |
63 | end subroutine enter_data
64 |
65 | ! End a data region and copy temperature fields back to the host
66 | ! curr (type(field)): current temperature values
67 | ! prev (type(field)): values from previous time step
68 | subroutine exit_data(curr, prev)
69 | implicit none
70 | type(field), target :: curr, prev
71 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:)
72 |
73 | currdata => curr%data
74 | prevdata => prev%data
75 |
76 | ! adding data mapping here
77 | !$omp target exit data map(from: currdata, prevdata)
78 |
79 | end subroutine exit_data
80 |
81 | ! Copy a temperature field from the device to the host
82 | ! temperature (type(field)): temperature field
83 | subroutine update_host(temperature)
84 | implicit none
85 | type(field), target :: temperature
86 | real(kind=dp), pointer, contiguous :: tempdata(:,:)
87 |
88 | tempdata => temperature%data
89 |
90 | ! adding data mapping here
91 | !$omp target update from(tempdata)
92 |
93 | end subroutine update_host
94 |
95 | end module core
96 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Field metadata for heat equation solver
4 | module heat
5 | use iso_fortran_env, only : REAL64
6 | implicit none
7 |
8 | integer, parameter :: dp = REAL64
9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
10 |
11 | type :: field
12 | integer :: nx ! ldimension of the field
13 | integer :: ny
14 | real(dp) :: dx
15 | real(dp) :: dy
16 | real(dp), dimension(:,:), allocatable :: data
17 | end type field
18 |
19 | contains
20 | ! Initialize the field type metadata
21 | ! Arguments:
22 | ! field0 (type(field)): input field
23 | ! nx, ny, dx, dy: field dimensions and spatial step size
24 | subroutine set_field_dimensions(field0, nx, ny)
25 | implicit none
26 |
27 | type(field), intent(out) :: field0
28 | integer, intent(in) :: nx, ny
29 |
30 | field0%dx = DX
31 | field0%dy = DY
32 | field0%nx = nx
33 | field0%ny = ny
34 |
35 | end subroutine set_field_dimensions
36 |
37 | end module heat
38 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/io.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! I/O routines for heat equation solver
4 | module io
5 | use heat
6 |
7 | contains
8 |
9 | ! Output routine, saves the temperature distribution as a png image
10 | ! Arguments:
11 | ! curr (type(field)): variable with the temperature data
12 | ! iter (integer): index of the time step
13 | subroutine write_field(curr, iter)
14 |
15 | use pngwriter
16 | implicit none
17 | type(field), intent(in) :: curr
18 | integer, intent(in) :: iter
19 |
20 | character(len=85) :: filename
21 |
22 | integer :: stat
23 | real(dp), dimension(:,:), allocatable, target :: full_data
24 |
25 | allocate(full_data(curr%nx, curr%ny))
26 | ! Copy rand #0 data to the global array
27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 |
29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
30 | stat = save_png(full_data, curr%nx, curr%ny, filename)
31 | deallocate(full_data)
32 |
33 | end subroutine write_field
34 |
35 |
36 | ! Reads the temperature distribution from an input file
37 | ! Arguments:
38 | ! field0 (type(field)): field variable that will store the
39 | ! read data
40 | ! filename (char): name of the input file
41 | ! Note that this version assumes the input data to be in C memory layout
42 | subroutine read_field(field0, filename)
43 |
44 | implicit none
45 | type(field), intent(out) :: field0
46 | character(len=85), intent(in) :: filename
47 |
48 | integer :: nx, ny, i
49 | character(len=2) :: dummy
50 |
51 | real(dp), dimension(:,:), allocatable :: full_data
52 |
53 | open(10, file=filename)
54 | ! Read the header
55 | read(10, *) dummy, nx, ny
56 |
57 | call set_field_dimensions(field0, nx, ny)
58 |
59 | ! The arrays for temperature field contain also a halo region
60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
61 |
62 | allocate(full_data(nx, ny))
63 | ! Read the data
64 | do i = 1, nx
65 | read(10, *) full_data(i, 1:ny)
66 | end do
67 |
68 | ! Copy to full array containing also boundaries
69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
70 |
71 | ! Set the boundary values
72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
76 |
77 | close(10)
78 | deallocate(full_data)
79 |
80 | end subroutine read_field
81 |
82 | end module io
83 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/main.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Heat equation solver in 2D.
4 |
5 | program heat_solve
6 | use heat
7 | use core
8 | use io
9 | use setup
10 | use utilities
11 | use omp_lib
12 |
13 | implicit none
14 |
15 | real(dp), parameter :: a = 0.5 ! Diffusion constant
16 | type(field) :: current, previous ! Current and previus temperature fields
17 |
18 | real(dp) :: dt ! Time step
19 | integer :: nsteps ! Number of time steps
20 | integer, parameter :: image_interval = 1500 ! Image output interval
21 |
22 | integer :: iter
23 |
24 | real(dp) :: average_temp ! Average temperature
25 |
26 | real(kind=dp) :: start, stop ! Timers
27 |
28 | call initialize(current, previous, nsteps)
29 |
30 | ! Draw the picture of the initial state
31 | call write_field(current, 0)
32 |
33 | average_temp = average(current)
34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 |
36 | ! Largest stable time step
37 | dt = current%dx**2 * current%dy**2 / &
38 | & (2.0 * a * (current%dx**2 + current%dy**2))
39 |
40 | ! Main iteration loop
41 |
42 | start = omp_get_wtime()
43 |
44 | ! copy data to device
45 | call enter_data(current, previous)
46 |
47 | do iter = 1, nsteps
48 | call evolve(current, previous, a, dt)
49 | if (mod(iter, image_interval) == 0) then
50 | ! update data on host for output
51 | call update_host(current)
52 | call write_field(current, iter)
53 | end if
54 | call swap_fields(current, previous)
55 | end do
56 |
57 | ! copy data back to host
58 | call exit_data(current, previous)
59 |
60 | stop = omp_get_wtime()
61 |
62 | ! Average temperature for reference
63 | average_temp = average(previous)
64 |
65 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
66 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp
67 | if (command_argument_count() == 0) then
68 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
69 | end if
70 |
71 | call finalize(current, previous)
72 |
73 | end program heat_solve
74 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! PNG writer for heat equation solver
4 | module pngwriter
5 | use heat
6 |
7 | contains
8 |
9 | function save_png(data, nx, ny, fname) result(stat)
10 |
11 | use, intrinsic :: ISO_C_BINDING
12 | implicit none
13 |
14 | real(dp), dimension(:,:), intent(in) :: data
15 | integer, intent(in) :: nx, ny
16 | character(len=*), intent(in) :: fname
17 | integer :: stat
18 |
19 | ! Interface for save_png C-function
20 | interface
21 | ! The C-function definition is
22 | ! int save_png(double *data, const int nx, const int ny,
23 | ! const char *fname)
24 | function save_png_c(data, nx, ny, fname, order) &
25 | & bind(C,name="save_png") result(stat)
26 | use, intrinsic :: ISO_C_BINDING
27 | implicit none
28 | real(kind=C_DOUBLE) :: data(*)
29 | integer(kind=C_INT), value, intent(IN) :: nx, ny
30 | character(kind=C_CHAR), intent(IN) :: fname(*)
31 | character(kind=C_CHAR), value, intent(IN) :: order
32 | integer(kind=C_INT) :: stat
33 | end function save_png_c
34 | end interface
35 |
36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 | if (stat /= 0) then
38 | write(*,*) 'save_png returned error!'
39 | end if
40 |
41 | end function save_png
42 |
43 | end module pngwriter
44 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/fortran/utilities.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Utility routines for heat equation solver
4 | ! NOTE: This file does not need to be edited!
5 | module utilities
6 | use heat
7 |
8 | contains
9 |
10 | ! Swap the data fields of two variables of type field
11 | ! Arguments:
12 | ! curr, prev (type(field)): the two variables that are swapped
13 | subroutine swap_fields(curr, prev)
14 |
15 | implicit none
16 |
17 | type(field), intent(inout) :: curr, prev
18 | real(dp), allocatable, dimension(:,:) :: tmp
19 |
20 | call move_alloc(curr%data, tmp)
21 | call move_alloc(prev%data, curr%data)
22 | call move_alloc(tmp, prev%data)
23 | end subroutine swap_fields
24 |
25 | ! Copy the data from one field to another
26 | ! Arguments:
27 | ! from_field (type(field)): variable to copy from
28 | ! to_field (type(field)): variable to copy to
29 | subroutine copy_fields(from_field, to_field)
30 |
31 | implicit none
32 |
33 | type(field), intent(in) :: from_field
34 | type(field), intent(out) :: to_field
35 |
36 | ! Consistency checks
37 | if (.not.allocated(from_field%data)) then
38 | write (*,*) "Can not copy from a field without allocated data"
39 | stop
40 | end if
41 | if (.not.allocated(to_field%data)) then
42 | ! Target is not initialize, allocate memory
43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 | & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 | write (*,*) "Wrong field data sizes in copy routine"
47 | print *, shape(from_field%data), shape(to_field%data)
48 | stop
49 | end if
50 |
51 | to_field%data = from_field%data
52 |
53 | to_field%nx = from_field%nx
54 | to_field%ny = from_field%ny
55 | to_field%dx = from_field%dx
56 | to_field%dy = from_field%dy
57 | end subroutine copy_fields
58 |
59 | function average(field0)
60 |
61 | implicit none
62 |
63 | real(dp) :: average
64 | type(field) :: field0
65 |
66 | real(dp) :: local_average
67 | integer :: rc
68 |
69 | average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 | average = average / (field0%nx * field0%ny)
71 |
72 | end function average
73 |
74 | end module utilities
75 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/heat.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | #ifndef __HEAT_H__
4 | #define __HEAT_H__
5 |
6 | #include
7 |
8 | // Datatype for temperature field
9 | struct field {
10 | // nx and ny are the dimensions of the field. The array data
11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 | int nx;
13 | int ny;
14 | // Size of the grid cells
15 | double dx;
16 | double dy;
17 | // The temperature values in the 2D grid
18 | std::vector data;
19 | };
20 |
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 |
25 | #if __cplusplus
26 | extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 |
31 | void initialize(int argc, char *argv[], field *temperature1,
32 | field *temperature2, int *nsteps);
33 |
34 | void generate_field(field *temperature);
35 |
36 | double average(field *temperature);
37 |
38 | void evolve(field *curr, field *prev, double a, double dt);
39 |
40 | void write_field(field *temperature, int iter);
41 |
42 | void read_field(field *temperature1, field *temperature2,
43 | char *filename);
44 |
45 | void copy_field(field *temperature1, field *temperature2);
46 |
47 | void swap_fields(field *temperature1, field *temperature2);
48 |
49 | void allocate_field(field *temperature);
50 |
51 | void enter_data(field *temperature1, field *temperature2);
52 |
53 | void exit_data(field *temperature1, field *temperature2);
54 |
55 | void update_host(field *temperature);
56 |
57 | #if __cplusplus
58 | }
59 | #endif
60 | #endif // __HEAT_H__
61 |
62 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/io.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // I/O related functions for heat equation solver
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "heat.h"
11 | #include "pngwriter.h"
12 |
13 | // Output routine that prints out a picture of the temperature
14 | // distribution.
15 | void write_field(field *temperature, int iter)
16 | {
17 | char filename[64];
18 |
19 | // The actual write routine takes only the actual data
20 | // (without boundary layers) so we need to copy an array with that.
21 | std::vector inner_data(temperature->nx * temperature->ny);
22 | auto inner_data_iterator = inner_data.begin();
23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
24 | for (int i = 0; i < temperature->nx; i++) {
25 | auto end_of_row = beginning_of_row + temperature->ny;
26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator);
27 | inner_data_iterator += temperature->ny;
28 | beginning_of_row = end_of_row + 2;
29 | }
30 |
31 | // Write out the data to a png file
32 | sprintf(filename, "%s_%04d.png", "heat", iter);
33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
34 | }
35 |
36 | // Read the initial temperature distribution from a file and
37 | // initialize the temperature fields temperature1 and
38 | // temperature2 to the same initial state.
39 | void read_field(field *temperature1, field *temperature2, char *filename)
40 | {
41 | FILE *fp;
42 | int nx, ny, ind;
43 |
44 | int nx_local, ny_local, count;
45 |
46 | fp = fopen(filename, "r");
47 | // Read the header
48 | count = fscanf(fp, "# %d %d \n", &nx, &ny);
49 | if (count < 2) {
50 | fprintf(stderr, "Error while reading the input file!\n");
51 | exit(-1);
52 | }
53 |
54 | set_field_dimensions(temperature1, nx, ny);
55 | set_field_dimensions(temperature2, nx, ny);
56 |
57 | // Allocate arrays (including boundary layers)
58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
59 | temperature1->data.resize(newSize, 0.0);
60 | temperature2->data.resize(newSize, 0.0);
61 |
62 | // Array from file
63 | std::vector file_data(nx * ny, 0.0);
64 |
65 | // Read the actual data
66 | for (int i = 0; i < nx; i++) {
67 | for (int j = 0; j < ny; j++) {
68 | ind = i * ny + j;
69 | count = fscanf(fp, "%lf", &file_data[ind]);
70 | }
71 | }
72 |
73 | nx_local = temperature1->nx;
74 | ny_local = temperature1->ny;
75 |
76 | // Copy to the inner part of the full temperature field
77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
78 | auto beginning_of_row = file_data.begin();
79 | for (int i = 0; i < nx_local; i++) {
80 | auto end_of_row = beginning_of_row + ny_local;
81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
82 | temperature_data_iterator += ny_local + 2;
83 | beginning_of_row = end_of_row;
84 | }
85 |
86 | // Set the boundary values
87 | for (int i = 1; i < nx_local + 1; i++) {
88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
90 | }
91 | for (int j = 0; j < ny + 2; j++) {
92 | temperature1->data[j] = temperature1->data[ny_local + j];
93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
94 | temperature1->data[nx_local * (ny_local + 2) + j];
95 | }
96 |
97 | copy_field(temperature1, temperature2);
98 |
99 | fclose(fp);
100 | }
101 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/main.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main routine for heat equation solver in 2D.
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 | int main(int argc, char **argv)
11 | {
12 | // Image output interval
13 | int image_interval = 1500;
14 |
15 | // Number of time steps
16 | int nsteps;
17 | // Current and previous temperature fields
18 | field current, previous;
19 | initialize(argc, argv, ¤t, &previous, &nsteps);
20 |
21 | // Output the initial field
22 | write_field(¤t, 0);
23 |
24 | double average_temp = average(¤t);
25 | printf("Average temperature at start: %f\n", average_temp);
26 |
27 | // Diffusion constant
28 | double a = 0.5;
29 |
30 | // Compute the largest stable time step
31 | double dx2 = current.dx * current.dx;
32 | double dy2 = current.dy * current.dy;
33 | // Time step
34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 |
36 | // Get the start time stamp
37 | double start_clock = omp_get_wtime();
38 |
39 | // Copy fields to device
40 | enter_data(¤t, &previous);
41 |
42 | // Time evolution
43 | for (int iter = 1; iter <= nsteps; iter++) {
44 | evolve(¤t, &previous, a, dt);
45 | if (iter % image_interval == 0) {
46 | // update data on host for output
47 | update_host(¤t);
48 | write_field(¤t, iter);
49 | }
50 | // Swap current field so that it will be used
51 | // as previous for next iteration step
52 | swap_fields(¤t, &previous);
53 | }
54 |
55 | // copy data back to host
56 | exit_data(¤t, &previous);
57 |
58 | double stop_clock = omp_get_wtime();
59 |
60 | // Average temperature for reference
61 | average_temp = average(&previous);
62 |
63 | // Determine the CPU time used for all the iterations
64 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
65 | printf("Average temperature: %f\n", average_temp);
66 | if (argc == 1) {
67 | printf("Reference value with default arguments: 59.281239\n");
68 | }
69 |
70 | // Output the final field
71 | write_field(&previous, nsteps);
72 |
73 | return 0;
74 | }
75 |
--------------------------------------------------------------------------------
/content/exercise/solution/data_mapping/utilities.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Utility functions for heat equation solver
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 |
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 | assert(temperature1->nx == temperature2->nx);
15 | assert(temperature1->ny == temperature2->ny);
16 | assert(temperature1->data.size() == temperature2->data.size());
17 | std::copy(temperature1->data.begin(), temperature1->data.end(),
18 | temperature2->data.begin());
19 | }
20 |
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 | std::swap(temperature1->data, temperature2->data);
25 | }
26 |
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 | // Include also boundary layers
31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 | temperature->data.resize(newSize, 0.0);
33 | }
34 |
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 | double average = 0.0;
39 |
40 | for (int i = 1; i < temperature->nx + 1; i++) {
41 | for (int j = 1; j < temperature->ny + 1; j++) {
42 | int ind = i * (temperature->ny + 2) + j;
43 | average += temperature->data[ind];
44 | }
45 | }
46 |
47 | average /= (temperature->nx * temperature->ny);
48 | return average;
49 | }
50 |
51 |
52 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 CSC Training
4 | Copyright (c) 2021 ENCCS
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 | COMMONDIR=../common
14 |
15 | ifeq ($(COMP),gnu)
16 | CXX=g++
17 | CC=gcc
18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR)
19 | LDFLAGS=
20 | LIBS=
21 | endif
22 |
23 | ifeq ($(COMP),nv)
24 | CXX=nvc++
25 | CC=nvc
26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR)
27 | LDFLAGS=
28 | LIBS=
29 | endif
30 |
31 | ifeq ($(COMP),intel)
32 | CXX=icpx
33 | CC=icx
34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR)
35 | LDFLAGS=
36 | LIBS=
37 | endif
38 |
39 | EXE=heat_serial
40 | OBJS=main.o core.o setup.o utilities.o io.o
41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
42 |
43 |
44 | all: $(EXE)
45 |
46 |
47 | core.o: core.cpp heat.h
48 | utilities.o: utilities.cpp heat.h
49 | setup.o: setup.cpp heat.h
50 | io.o: io.cpp heat.h
51 | main.o: main.cpp heat.h
52 |
53 | $(OBJS_PNG): C_COMPILER := $(CC)
54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include
55 | $(OBJS): C_COMPILER := $(CXX)
56 |
57 | $(EXE): $(OBJS) $(OBJS_PNG)
58 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
59 |
60 | %.o: %.cpp
61 | $(CXX) $(CCFLAGS) -c $< -o $@
62 |
63 | %.o: %.c
64 | $(CC) $(CCFLAGS) -c $< -o $@
65 |
66 | .PHONY: clean
67 | clean:
68 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o
69 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/core.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main solver routines for heat equation solver
4 |
5 | #include "heat.h"
6 |
7 | // Update the temperature values using five-point stencil
8 | // Arguments:
9 | // curr: current temperature values
10 | // prev: temperature values from previous time step
11 | // a: diffusivity
12 | // dt: time step
13 | void evolve(field *curr, field *prev, double a, double dt)
14 | {
15 | // Help the compiler avoid being confused by the structs
16 | double *currdata = curr->data.data();
17 | double *prevdata = prev->data.data();
18 | int nx = curr->nx;
19 | int ny = curr->ny;
20 |
21 | // Determine the temperature field at next time step
22 | // As we have fixed boundary conditions, the outermost gridpoints
23 | // are not updated.
24 | double dx2 = prev->dx * prev->dx;
25 | double dy2 = prev->dy * prev->dy;
26 | #pragma omp target teams distribute parallel for \
27 | map(currdata[0:(nx+2)*(ny+2)],prevdata[0:(nx+2)*(ny+2)])
28 | for (int i = 1; i < nx + 1; i++) {
29 | for (int j = 1; j < ny + 1; j++) {
30 | int ind = i * (ny + 2) + j;
31 | int ip = (i + 1) * (ny + 2) + j;
32 | int im = (i - 1) * (ny + 2) + j;
33 | int jp = i * (ny + 2) + j + 1;
34 | int jm = i * (ny + 2) + j - 1;
35 | currdata[ind] = prevdata[ind] + a*dt*
36 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 +
37 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2);
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 CSC Training
2 | # Copyright (c) 2021 ENCCS
3 | ifeq ($(COMP),)
4 | COMP=nv
5 | endif
6 |
7 | HAVE_PNG=0
8 | ifeq ($(HAVE_PNG),1)
9 | PNG_LIBS=-lpng -lz -lc -lrt
10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib
11 | endif
12 |
13 |
14 | COMMONDIR=../../common
15 |
16 | ifeq ($(COMP),nv)
17 | FC=nvfortran
18 | CC=nvc
19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp
20 | CCFLAGS=-O3 -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=
23 | endif
24 |
25 | ifeq ($(COMP),gnu)
26 | FC=gfortran
27 | CC=gcc
28 | FCFLAGS=-O3 -Wall -fopenmp
29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR)
30 | LDFLAGS=-fopenmp
31 | LIBS=
32 | endif
33 |
34 |
35 | EXE=heat_serial
36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o
38 |
39 | all: $(EXE)
40 |
41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
42 | heat_mod.o: heat_mod.F90
43 | core.o: core.F90 heat_mod.o
44 | utilities.o: utilities.F90 heat_mod.o
45 | io.o: io.F90 heat_mod.o pngwriter_mod.o
46 | setup.o: setup.F90 heat_mod.o utilities.o io.o
47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
49 |
50 | $(EXE): $(OBJS) $(OBJS_PNG)
51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS)
52 |
53 | %.o: %.F90
54 | $(FC) $(FCFLAGS) -c $< -o $@
55 |
56 | %.o: %.c
57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@
58 |
59 | .PHONY: clean
60 | clean:
61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o
62 |
63 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/core.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Main solver routines for heat equation solver
4 | module core
5 | use heat
6 |
7 | contains
8 |
9 | ! Update the temperature values using five-point stencil
10 | ! Arguments:
11 | ! curr (type(field)): current temperature values
12 | ! prev (type(field)): temperature values from previous time step
13 | ! a (real(dp)): diffusivity
14 | ! dt (real(dp)): time step
15 | subroutine evolve(curr, prev, a, dt)
16 |
17 | implicit none
18 |
19 | type(field),target, intent(inout) :: curr, prev
20 | real(dp) :: a, dt
21 | integer :: i, j, nx, ny
22 | real(dp) :: dx, dy
23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata
24 |
25 | ! Help the compiler avoid being confused
26 | nx = curr%nx
27 | ny = curr%ny
28 | dx = curr%dx
29 | dy = curr%dy
30 | currdata => curr%data
31 | prevdata => prev%data
32 |
33 | ! Determine the temperature field at next time step As we have
34 | ! fixed boundary conditions, the outermost gridpoints are not
35 | ! updated.
36 |
37 | !$omp target teams distribute parallel do
38 | do j = 1, ny
39 | do i = 1, nx
40 | currdata(i, j) = prevdata(i, j) + a * dt * &
41 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + &
42 | & prevdata(i+1, j)) / dx**2 + &
43 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + &
44 | & prevdata(i, j+1)) / dy**2)
45 | end do
46 | end do
47 | !$omp end target teams distribute parallel do
48 | end subroutine evolve
49 |
50 | end module core
51 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Field metadata for heat equation solver
4 | module heat
5 | use iso_fortran_env, only : REAL64
6 | implicit none
7 |
8 | integer, parameter :: dp = REAL64
9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing
10 |
11 | type :: field
12 | integer :: nx ! ldimension of the field
13 | integer :: ny
14 | real(dp) :: dx
15 | real(dp) :: dy
16 | real(dp), dimension(:,:), allocatable :: data
17 | end type field
18 |
19 | contains
20 | ! Initialize the field type metadata
21 | ! Arguments:
22 | ! field0 (type(field)): input field
23 | ! nx, ny, dx, dy: field dimensions and spatial step size
24 | subroutine set_field_dimensions(field0, nx, ny)
25 | implicit none
26 |
27 | type(field), intent(out) :: field0
28 | integer, intent(in) :: nx, ny
29 |
30 | field0%dx = DX
31 | field0%dy = DY
32 | field0%nx = nx
33 | field0%ny = ny
34 |
35 | end subroutine set_field_dimensions
36 |
37 | end module heat
38 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/io.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! I/O routines for heat equation solver
4 | module io
5 | use heat
6 |
7 | contains
8 |
9 | ! Output routine, saves the temperature distribution as a png image
10 | ! Arguments:
11 | ! curr (type(field)): variable with the temperature data
12 | ! iter (integer): index of the time step
13 | subroutine write_field(curr, iter)
14 |
15 | use pngwriter
16 | implicit none
17 | type(field), intent(in) :: curr
18 | integer, intent(in) :: iter
19 |
20 | character(len=85) :: filename
21 |
22 | integer :: stat
23 | real(dp), dimension(:,:), allocatable, target :: full_data
24 |
25 | allocate(full_data(curr%nx, curr%ny))
26 | ! Copy rand #0 data to the global array
27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny)
28 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png'
29 | stat = save_png(full_data, curr%nx, curr%ny, filename)
30 | deallocate(full_data)
31 |
32 | end subroutine write_field
33 |
34 |
35 | ! Reads the temperature distribution from an input file
36 | ! Arguments:
37 | ! field0 (type(field)): field variable that will store the
38 | ! read data
39 | ! filename (char): name of the input file
40 | ! Note that this version assumes the input data to be in C memory layout
41 | subroutine read_field(field0, filename)
42 |
43 | implicit none
44 | type(field), intent(out) :: field0
45 | character(len=85), intent(in) :: filename
46 |
47 | integer :: nx, ny, i
48 | character(len=2) :: dummy
49 |
50 | real(dp), dimension(:,:), allocatable :: full_data
51 |
52 | open(10, file=filename)
53 | ! Read the header
54 | read(10, *) dummy, nx, ny
55 |
56 | call set_field_dimensions(field0, nx, ny)
57 |
58 | ! The arrays for temperature field contain also a halo region
59 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1))
60 |
61 | allocate(full_data(nx, ny))
62 | ! Read the data
63 | do i = 1, nx
64 | read(10, *) full_data(i, 1:ny)
65 | end do
66 |
67 | ! Copy to full array containing also boundaries
68 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:)
69 |
70 | ! Set the boundary values
71 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1)
72 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny)
73 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1)
74 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1)
75 |
76 | close(10)
77 | deallocate(full_data)
78 |
79 | end subroutine read_field
80 |
81 | end module io
82 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/main.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Heat equation solver in 2D.
4 |
5 | program heat_solve
6 | use heat
7 | use core
8 | use io
9 | use setup
10 | use utilities
11 | use omp_lib
12 |
13 | implicit none
14 |
15 | real(dp), parameter :: a = 0.5 ! Diffusion constant
16 | type(field) :: current, previous ! Current and previus temperature fields
17 |
18 | real(dp) :: dt ! Time step
19 | integer :: nsteps ! Number of time steps
20 | integer, parameter :: image_interval = 1500 ! Image output interval
21 |
22 | integer :: iter
23 |
24 | real(dp) :: average_temp ! Average temperature
25 |
26 | real(kind=dp) :: start, stop ! Timers
27 |
28 | call initialize(current, previous, nsteps)
29 |
30 | ! Draw the picture of the initial state
31 | call write_field(current, 0)
32 |
33 | average_temp = average(current)
34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
35 |
36 | ! Largest stable time step
37 | dt = current%dx**2 * current%dy**2 / &
38 | & (2.0 * a * (current%dx**2 + current%dy**2))
39 |
40 | ! Main iteration loop
41 |
42 | start = omp_get_wtime()
43 |
44 |
45 | do iter = 1, nsteps
46 | call evolve(current, previous, a, dt)
47 | if (mod(iter, image_interval) == 0) then
48 | call write_field(current, iter)
49 | end if
50 | call swap_fields(current, previous)
51 | end do
52 |
53 | stop = omp_get_wtime()
54 |
55 | ! Average temperature for reference
56 | average_temp = average(previous)
57 |
58 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
59 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp
60 | if (command_argument_count() == 0) then
61 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
62 | end if
63 |
64 | call finalize(current, previous)
65 |
66 | end program heat_solve
67 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! PNG writer for heat equation solver
4 | module pngwriter
5 | use heat
6 |
7 | contains
8 |
9 | function save_png(data, nx, ny, fname) result(stat)
10 |
11 | use, intrinsic :: ISO_C_BINDING
12 | implicit none
13 |
14 | real(dp), dimension(:,:), intent(in) :: data
15 | integer, intent(in) :: nx, ny
16 | character(len=*), intent(in) :: fname
17 | integer :: stat
18 |
19 | ! Interface for save_png C-function
20 | interface
21 | ! The C-function definition is
22 | ! int save_png(double *data, const int nx, const int ny,
23 | ! const char *fname)
24 | function save_png_c(data, nx, ny, fname, order) &
25 | & bind(C,name="save_png") result(stat)
26 | use, intrinsic :: ISO_C_BINDING
27 | implicit none
28 | real(kind=C_DOUBLE) :: data(*)
29 | integer(kind=C_INT), value, intent(IN) :: nx, ny
30 | character(kind=C_CHAR), intent(IN) :: fname(*)
31 | character(kind=C_CHAR), value, intent(IN) :: order
32 | integer(kind=C_INT) :: stat
33 | end function save_png_c
34 | end interface
35 |
36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
37 | if (stat /= 0) then
38 | write(*,*) 'save_png returned error!'
39 | end if
40 |
41 | end function save_png
42 |
43 | end module pngwriter
44 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/fortran/utilities.F90:
--------------------------------------------------------------------------------
1 | ! Copyright (c) 2019 CSC Training
2 | ! Copyright (c) 2021 ENCCS
3 | ! Utility routines for heat equation solver
4 | ! NOTE: This file does not need to be edited!
5 | module utilities
6 | use heat
7 |
8 | contains
9 |
10 | ! Swap the data fields of two variables of type field
11 | ! Arguments:
12 | ! curr, prev (type(field)): the two variables that are swapped
13 | subroutine swap_fields(curr, prev)
14 |
15 | implicit none
16 |
17 | type(field), intent(inout) :: curr, prev
18 | real(dp), allocatable, dimension(:,:) :: tmp
19 |
20 | call move_alloc(curr%data, tmp)
21 | call move_alloc(prev%data, curr%data)
22 | call move_alloc(tmp, prev%data)
23 | end subroutine swap_fields
24 |
25 | ! Copy the data from one field to another
26 | ! Arguments:
27 | ! from_field (type(field)): variable to copy from
28 | ! to_field (type(field)): variable to copy to
29 | subroutine copy_fields(from_field, to_field)
30 |
31 | implicit none
32 |
33 | type(field), intent(in) :: from_field
34 | type(field), intent(out) :: to_field
35 |
36 | ! Consistency checks
37 | if (.not.allocated(from_field%data)) then
38 | write (*,*) "Can not copy from a field without allocated data"
39 | stop
40 | end if
41 | if (.not.allocated(to_field%data)) then
42 | ! Target is not initialize, allocate memory
43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
44 | & lbound(from_field%data, 2):ubound(from_field%data, 2)))
45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then
46 | write (*,*) "Wrong field data sizes in copy routine"
47 | print *, shape(from_field%data), shape(to_field%data)
48 | stop
49 | end if
50 |
51 | to_field%data = from_field%data
52 |
53 | to_field%nx = from_field%nx
54 | to_field%ny = from_field%ny
55 | to_field%dx = from_field%dx
56 | to_field%dy = from_field%dy
57 | end subroutine copy_fields
58 |
59 | function average(field0)
60 |
61 | implicit none
62 |
63 | real(dp) :: average
64 | type(field) :: field0
65 |
66 | real(dp) :: local_average
67 | integer :: rc
68 |
69 | average = sum(field0%data(1:field0%nx, 1:field0%ny))
70 | average = average / (field0%nx * field0%ny)
71 |
72 | end function average
73 |
74 | end module utilities
75 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/heat.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | #ifndef __HEAT_H__
4 | #define __HEAT_H__
5 |
6 | #include
7 |
8 | // Datatype for temperature field
9 | struct field {
10 | // nx and ny are the dimensions of the field. The array data
11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2
12 | int nx;
13 | int ny;
14 | // Size of the grid cells
15 | double dx;
16 | double dy;
17 | // The temperature values in the 2D grid
18 | std::vector data;
19 | };
20 |
21 | // We use here fixed grid spacing
22 | const double DX = 0.01;
23 | const double DY = 0.01;
24 |
25 | #if __cplusplus
26 | extern "C" {
27 | #endif
28 | // Function prototypes
29 | void set_field_dimensions(field *temperature, int nx, int ny);
30 |
31 | void initialize(int argc, char *argv[], field *temperature1,
32 | field *temperature2, int *nsteps);
33 |
34 | void generate_field(field *temperature);
35 |
36 | double average(field *temperature);
37 |
38 | void evolve(field *curr, field *prev, double a, double dt);
39 |
40 | void write_field(field *temperature, int iter);
41 |
42 | void read_field(field *temperature1, field *temperature2,
43 | char *filename);
44 |
45 | void copy_field(field *temperature1, field *temperature2);
46 |
47 | void swap_fields(field *temperature1, field *temperature2);
48 |
49 | void allocate_field(field *temperature);
50 |
51 | #if __cplusplus
52 | }
53 | #endif
54 | #endif // __HEAT_H__
55 |
56 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/io.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // I/O related functions for heat equation solver
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "heat.h"
11 | #include "pngwriter.h"
12 |
13 | // Output routine that prints out a picture of the temperature
14 | // distribution.
15 | void write_field(field *temperature, int iter)
16 | {
17 | char filename[64];
18 |
19 | // The actual write routine takes only the actual data
20 | // (without boundary layers) so we need to copy an array with that.
21 | std::vector inner_data(temperature->nx * temperature->ny);
22 | auto inner_data_iterator = inner_data.begin();
23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1;
24 | for (int i = 0; i < temperature->nx; i++) {
25 | auto end_of_row = beginning_of_row + temperature->ny;
26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator);
27 | inner_data_iterator += temperature->ny;
28 | beginning_of_row = end_of_row + 2;
29 | }
30 |
31 | // Write out the data to a png file
32 | sprintf(filename, "%s_%04d.png", "heat", iter);
33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c');
34 | }
35 |
36 | // Read the initial temperature distribution from a file and
37 | // initialize the temperature fields temperature1 and
38 | // temperature2 to the same initial state.
39 | void read_field(field *temperature1, field *temperature2, char *filename)
40 | {
41 | FILE *fp;
42 | int nx, ny, ind;
43 |
44 | int nx_local, ny_local, count;
45 |
46 | fp = fopen(filename, "r");
47 | // Read the header
48 | count = fscanf(fp, "# %d %d \n", &nx, &ny);
49 | if (count < 2) {
50 | fprintf(stderr, "Error while reading the input file!\n");
51 | exit(-1);
52 | }
53 |
54 | set_field_dimensions(temperature1, nx, ny);
55 | set_field_dimensions(temperature2, nx, ny);
56 |
57 | // Allocate arrays (including boundary layers)
58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2);
59 | temperature1->data.resize(newSize, 0.0);
60 | temperature2->data.resize(newSize, 0.0);
61 |
62 | // Array from file
63 | std::vector file_data(nx * ny, 0.0);
64 |
65 | // Read the actual data
66 | for (int i = 0; i < nx; i++) {
67 | for (int j = 0; j < ny; j++) {
68 | ind = i * ny + j;
69 | count = fscanf(fp, "%lf", &file_data[ind]);
70 | }
71 | }
72 |
73 | nx_local = temperature1->nx;
74 | ny_local = temperature1->ny;
75 |
76 | // Copy to the inner part of the full temperature field
77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1;
78 | auto beginning_of_row = file_data.begin();
79 | for (int i = 0; i < nx_local; i++) {
80 | auto end_of_row = beginning_of_row + ny_local;
81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator);
82 | temperature_data_iterator += ny_local + 2;
83 | beginning_of_row = end_of_row;
84 | }
85 |
86 | // Set the boundary values
87 | for (int i = 1; i < nx_local + 1; i++) {
88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1];
89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny];
90 | }
91 | for (int j = 0; j < ny + 2; j++) {
92 | temperature1->data[j] = temperature1->data[ny_local + j];
93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] =
94 | temperature1->data[nx_local * (ny_local + 2) + j];
95 | }
96 |
97 | copy_field(temperature1, temperature2);
98 |
99 | fclose(fp);
100 | }
101 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/main.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Main routine for heat equation solver in 2D.
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 | int main(int argc, char **argv)
11 | {
12 | // Image output interval
13 | int image_interval = 1500;
14 |
15 | // Number of time steps
16 | int nsteps;
17 | // Current and previous temperature fields
18 | field current, previous;
19 | initialize(argc, argv, ¤t, &previous, &nsteps);
20 |
21 | // Output the initial field
22 | write_field(¤t, 0);
23 |
24 | double average_temp = average(¤t);
25 | printf("Average temperature at start: %f\n", average_temp);
26 |
27 | // Diffusion constant
28 | double a = 0.5;
29 |
30 | // Compute the largest stable time step
31 | double dx2 = current.dx * current.dx;
32 | double dy2 = current.dy * current.dy;
33 | // Time step
34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
35 |
36 | // Get the start time stamp
37 | double start_clock = omp_get_wtime();
38 |
39 | // Time evolution
40 | for (int iter = 1; iter <= nsteps; iter++) {
41 | evolve(¤t, &previous, a, dt);
42 | if (iter % image_interval == 0) {
43 | write_field(¤t, iter);
44 | }
45 | // Swap current field so that it will be used
46 | // as previous for next iteration step
47 | swap_fields(¤t, &previous);
48 | }
49 |
50 | double stop_clock = omp_get_wtime();
51 | // Average temperature for reference
52 | average_temp = average(&previous);
53 |
54 | // Determine the CPU time used for all the iterations
55 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock));
56 | printf("Average temperature: %f\n", average_temp);
57 | if (argc == 1) {
58 | printf("Reference value with default arguments: 59.281239\n");
59 | }
60 |
61 | // Output the final field
62 | write_field(&previous, nsteps);
63 |
64 | return 0;
65 | }
66 |
--------------------------------------------------------------------------------
/content/exercise/solution/offloading/utilities.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2019 CSC Training
2 | // Copyright (c) 2021 ENCCS
3 | // Utility functions for heat equation solver
4 |
5 | #include
6 | #include
7 |
8 | #include "heat.h"
9 |
10 |
11 | // Copy data on temperature1 into temperature2
12 | void copy_field(field *temperature1, field *temperature2)
13 | {
14 | assert(temperature1->nx == temperature2->nx);
15 | assert(temperature1->ny == temperature2->ny);
16 | assert(temperature1->data.size() == temperature2->data.size());
17 | std::copy(temperature1->data.begin(), temperature1->data.end(),
18 | temperature2->data.begin());
19 | }
20 |
21 | // Swap the field data for temperature1 and temperature2
22 | void swap_fields(field *temperature1, field *temperature2)
23 | {
24 | std::swap(temperature1->data, temperature2->data);
25 | }
26 |
27 | // Allocate memory for a temperature field and initialise it to zero
28 | void allocate_field(field *temperature)
29 | {
30 | // Include also boundary layers
31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2);
32 | temperature->data.resize(newSize, 0.0);
33 | }
34 |
35 | // Calculate average temperature over the non-boundary grid cells
36 | double average(field *temperature)
37 | {
38 | double average = 0.0;
39 |
40 | for (int i = 1; i < temperature->nx + 1; i++) {
41 | for (int j = 1; j < temperature->ny + 1; j++) {
42 | int ind = i * (temperature->ny + 2) + j;
43 | average += temperature->data[ind];
44 | }
45 | }
46 |
47 | average /= (temperature->nx * temperature->ny);
48 | return average;
49 | }
50 |
51 |
52 |
--------------------------------------------------------------------------------
/content/guide.rst:
--------------------------------------------------------------------------------
1 | Instructor's guide
2 | ------------------
3 |
4 |
5 |
6 | Preparing to Teach
7 | ------------------
8 |
9 | - Making sure that all the compilers are installed correctly on the system
10 | - Run all the examples beforehand at least once
11 | - Be aware which parts could be skipped in case needed
12 | - Give enough time for the exercises
13 | - Do not open too many tabs and switch among them
14 | - Emphasize the differences between C/C++ and Fortran for certrain directives
15 | - Briefly introduce the exercises before and make a short summary afterwards on the most important take-home messages
16 |
--------------------------------------------------------------------------------
/content/img/Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png
--------------------------------------------------------------------------------
/content/img/C2050Timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/C2050Timeline.png
--------------------------------------------------------------------------------
/content/img/ENCCS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ENCCS.jpg
--------------------------------------------------------------------------------
/content/img/ENCCS_CSC_logos.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ENCCS_CSC_logos.jpg
--------------------------------------------------------------------------------
/content/img/HardwareReview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/HardwareReview.png
--------------------------------------------------------------------------------
/content/img/Loom.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/Loom.jpeg
--------------------------------------------------------------------------------
/content/img/ThreadExecution.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ThreadExecution.jpeg
--------------------------------------------------------------------------------
/content/img/coalesced.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/coalesced.png
--------------------------------------------------------------------------------
/content/img/comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/comparison.png
--------------------------------------------------------------------------------
/content/img/compp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/compp.png
--------------------------------------------------------------------------------
/content/img/distributed_vs_shared.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/distributed_vs_shared.png
--------------------------------------------------------------------------------
/content/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/favicon.ico
--------------------------------------------------------------------------------
/content/img/gpu_vs_cpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/gpu_vs_cpu.png
--------------------------------------------------------------------------------
/content/img/heat_0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heat_0000.png
--------------------------------------------------------------------------------
/content/img/heat_montage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heat_montage.png
--------------------------------------------------------------------------------
/content/img/heteprogra.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heteprogra.jpeg
--------------------------------------------------------------------------------
/content/img/memsch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/memsch.png
--------------------------------------------------------------------------------
/content/img/microprocessor-trend-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/microprocessor-trend-data.png
--------------------------------------------------------------------------------
/content/img/nvidia_block_diagram.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/nvidia_block_diagram.jpeg
--------------------------------------------------------------------------------
/content/img/omp-parallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/omp-parallel.png
--------------------------------------------------------------------------------
/content/img/processes-threads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/processes-threads.png
--------------------------------------------------------------------------------
/content/img/shared_mem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/shared_mem.png
--------------------------------------------------------------------------------
/content/img/threads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/threads.png
--------------------------------------------------------------------------------
/content/img/volta-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/volta-architecture.png
--------------------------------------------------------------------------------
/content/img/volta-sm-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/volta-sm-architecture.png
--------------------------------------------------------------------------------
/content/interoperability.rst:
--------------------------------------------------------------------------------
1 | Working alongside GPU libraries
2 | ===============================
3 |
4 | .. questions::
5 |
6 | - My code needs to use a library, how should they work together?
7 | - How to use OpenMP mapped variables?
8 | - How to use CUDA or HIP device variables into OpenMP?
9 |
10 | .. objectives::
11 |
12 | - Understand TODO
13 | - Understand TODO
14 | - Understand
15 | - Understand
16 |
17 | .. prereq::
18 |
19 | 1. TODO
20 | 2. TODO
21 |
22 |
23 | First heading
24 | -------------
25 |
26 | OpenMP interoperability with CUDA C/C++ and CUDA Fortran.
27 |
28 | - You can call kernels written in CUDA C/C++ or CUDA Fortran in your OpenMP programs from the host.
29 | - You can use the OpenMP **USE_DEVICE_PTR** clause to pass OpenMP mapped variables to CUDA kernels that are launched from the host.
30 | - You can use the OpenMP **IS_DEVICE_PTR** clause to access CUDA device attribute variables or to pass device addresses directly to target regions.
31 |
32 | Second heading
33 | --------------
34 |
35 | Some more text, with a figure
36 |
37 | .. figure:: img/stencil.svg
38 | :align: center
39 |
40 | This is a sample image
41 |
42 | .. exercise::
43 |
44 | TODO get the students to think about the content and answer a Zoom quiz
45 |
46 | .. solution::
47 |
48 | Hide the answer and reasoning in here
49 |
50 | Some source code
51 | ----------------
52 |
53 | Sometimes we need to look at code, which can be in the webpage and optionally
54 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
55 |
56 | .. typealong:: The field data structure
57 |
58 | .. tabs::
59 |
60 | .. tab:: C++
61 |
62 | .. literalinclude:: exercise/serial/heat.h
63 | :language: cpp
64 | :lines: 9-19
65 |
66 | .. tab:: Fortran
67 |
68 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90
69 | :language: fortran
70 | :lines: 11-17
71 |
72 | Building the code
73 | -----------------
74 |
75 | If there's terminal output to discuss, show something like::
76 |
77 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
78 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
79 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
80 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
81 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
82 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng
83 |
84 |
85 | Running the code
86 | ----------------
87 |
88 | To show a sample command line, use this approach
89 |
90 | .. code-block:: bash
91 |
92 | ./heat_serial 800 800 1000
93 |
94 |
95 | .. keypoints::
96 |
97 | - TODO summarize the learning outcome
98 | - TODO
99 |
--------------------------------------------------------------------------------
/content/multi-gpu.rst:
--------------------------------------------------------------------------------
1 | Multiple GPUs
2 | =============
3 |
4 | .. questions::
5 |
6 | - How do I run on more than one GPU?
7 | - TODO
8 |
9 | .. objectives::
10 |
11 | - Understand TODO
12 | - Understand TODO
13 | - Understand
14 | - Understand
15 |
16 | .. prereq::
17 |
18 | 1. TODO
19 | 2. TODO
20 |
21 |
22 | First heading
23 | -------------
24 |
25 | Some text
26 |
27 | Second heading
28 | --------------
29 |
30 | Some more text, with a figure
31 |
32 | .. figure:: img/stencil.svg
33 | :align: center
34 |
35 | This is a sample image
36 |
37 | .. exercise::
38 |
39 | TODO get the students to think about the content and answer a Zoom quiz
40 |
41 | .. solution::
42 |
43 | Hide the answer and reasoning in here
44 |
45 | Some source code
46 | ----------------
47 |
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 |
51 | .. typealong:: The field data structure
52 |
53 | .. tabs::
54 |
55 | .. tab:: C++
56 |
57 | .. literalinclude:: exercise/serial/heat.h
58 | :language: cpp
59 | :lines: 9-19
60 |
61 | .. tab:: Fortran
62 |
63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 | :language: fortran
65 | :lines: 11-17
66 |
67 | Building the code
68 | -----------------
69 |
70 | If there's terminal output to discuss, show something like::
71 |
72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng
78 |
79 |
80 | Running the code
81 | ----------------
82 |
83 | To show a sample command line, use this approach
84 |
85 | .. code-block:: bash
86 |
87 | ./heat_serial 800 800 1000
88 |
89 |
90 | .. keypoints::
91 |
92 | - TODO summarize the learning outcome
93 | - TODO
94 |
--------------------------------------------------------------------------------
/content/optimization.rst:
--------------------------------------------------------------------------------
1 | Optimizing OpenMP offloaded code
2 | ================================
3 |
4 | .. questions::
5 |
6 | - What tools are available to run faster?
7 | - TODO
8 |
9 | .. objectives::
10 |
11 | - Understand TODO
12 | - Understand TODO
13 | - Understand
14 | - Understand
15 |
16 | .. prereq::
17 |
18 | 1. TODO
19 | 2. TODO
20 |
21 |
22 | First heading
23 | -------------
24 |
25 | Some text
26 |
27 | Second heading
28 | --------------
29 |
30 | Some more text, with a figure
31 |
32 | .. figure:: img/stencil.svg
33 | :align: center
34 |
35 | This is a sample image
36 |
37 | .. exercise::
38 |
39 | TODO get the students to think about the content and answer a Zoom quiz
40 |
41 | .. solution::
42 |
43 | Hide the answer and reasoning in here
44 |
45 | Some source code
46 | ----------------
47 |
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 |
51 | .. typealong:: The field data structure
52 |
53 | .. tabs::
54 |
55 | .. tab:: C++
56 |
57 | .. literalinclude:: exercise/serial/heat.h
58 | :language: cpp
59 | :lines: 9-19
60 |
61 | .. tab:: Fortran
62 |
63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 | :language: fortran
65 | :lines: 11-17
66 |
67 | Building the code
68 | -----------------
69 |
70 | If there's terminal output to discuss, show something like::
71 |
72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng
78 |
79 |
80 | Running the code
81 | ----------------
82 |
83 | To show a sample command line, use this approach
84 |
85 | .. code-block:: bash
86 |
87 | ./heat_serial 800 800 1000
88 |
89 |
90 | .. keypoints::
91 |
92 | - TODO summarize the learning outcome
93 | - TODO
94 |
--------------------------------------------------------------------------------
/content/porting.rst:
--------------------------------------------------------------------------------
1 | Porting code to OpenMP offloading
2 | =================================
3 |
4 | .. questions::
5 |
6 | - When and why should I use OpenMP offloading in my code?
7 | - TODO
8 |
9 | .. objectives::
10 |
11 | - Understand TODO
12 | - Understand TODO
13 | - Understand
14 | - Understand
15 |
16 | .. prereq::
17 |
18 | 1. TODO
19 | 2. TODO
20 |
21 |
22 | First heading
23 | -------------
24 |
25 | Some text
26 |
27 | Second heading
28 | --------------
29 |
30 | Some more text, with a figure
31 |
32 | .. figure:: img/stencil.svg
33 | :align: center
34 |
35 | This is a sample image
36 |
37 | .. exercise::
38 |
39 | TODO get the students to think about the content and answer a Zoom quiz
40 |
41 | .. solution::
42 |
43 | Hide the answer and reasoning in here
44 |
45 | Some source code
46 | ----------------
47 |
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 |
51 | .. typealong:: The field data structure
52 |
53 | .. tabs::
54 |
55 | .. tab:: C++
56 |
57 | .. literalinclude:: exercise/serial/heat.h
58 | :language: cpp
59 | :lines: 9-19
60 |
61 | .. tab:: Fortran
62 |
63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 | :language: fortran
65 | :lines: 11-17
66 |
67 | Building the code
68 | -----------------
69 |
70 | If there's terminal output to discuss, show something like::
71 |
72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng
78 |
79 |
80 | Running the code
81 | ----------------
82 |
83 | To show a sample command line, use this approach
84 |
85 | .. code-block:: bash
86 |
87 | ./heat_serial 800 800 1000
88 |
89 |
90 | .. keypoints::
91 |
92 | - TODO summarize the learning outcome
93 | - TODO
94 |
--------------------------------------------------------------------------------
/content/profiling.rst:
--------------------------------------------------------------------------------
1 | Profiling code for GPUs
2 | =======================
3 |
4 | .. questions::
5 |
6 | - What tools can help me reason about the performance of my code?
7 | - TODO
8 |
9 | .. objectives::
10 |
11 | - Understand TODO
12 | - Understand TODO
13 | - Understand
14 | - Understand
15 |
16 | .. prereq::
17 |
18 | 1. TODO
19 | 2. TODO
20 |
21 |
22 | First heading
23 | -------------
24 |
25 | Some text
26 |
27 | Second heading
28 | --------------
29 |
30 | Some more text, with a figure
31 |
32 | .. figure:: img/stencil.svg
33 | :align: center
34 |
35 | This is a sample image
36 |
37 | .. exercise::
38 |
39 | TODO get the students to think about the content and answer a Zoom quiz
40 |
41 | .. solution::
42 |
43 | Hide the answer and reasoning in here
44 |
45 | Some source code
46 | ----------------
47 |
48 | Sometimes we need to look at code, which can be in the webpage and optionally
49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work.
50 |
51 | .. typealong:: The field data structure
52 |
53 | .. tabs::
54 |
55 | .. tab:: C++
56 |
57 | .. literalinclude:: exercise/serial/heat.h
58 | :language: cpp
59 | :lines: 9-19
60 |
61 | .. tab:: Fortran
62 |
63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90
64 | :language: fortran
65 | :lines: 11-17
66 |
67 | Building the code
68 | -----------------
69 |
70 | If there's terminal output to discuss, show something like::
71 |
72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o
73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o
74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o
75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o
76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o
77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng
78 |
79 |
80 | Running the code
81 | ----------------
82 |
83 | To show a sample command line, use this approach
84 |
85 | .. code-block:: bash
86 |
87 | ./heat_serial 800 800 1000
88 |
89 |
90 | .. keypoints::
91 |
92 | - TODO summarize the learning outcome
93 | - TODO
94 |
--------------------------------------------------------------------------------
/content/quick-reference.rst:
--------------------------------------------------------------------------------
1 | Quick Reference
2 | ---------------
3 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/composite.c:
--------------------------------------------------------------------------------
1 | #pragma omp target teams distribute parallel for simd [clauses]
2 | for-loops
3 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/composite.f90:
--------------------------------------------------------------------------------
1 | !$omp target teams distribute parallel do simd [clauses]
2 | do-loops
3 | !$omp end target teams distribute parallel do simd
4 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/distribute.c:
--------------------------------------------------------------------------------
1 | #pragma omp distribute [clauses]
2 | for-loops
3 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/distribute.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | private(list)
3 | firstprivate(list)
4 | lastprivate(list)
5 | collapse(n)
6 | dist_schedule(kind[, chunk_size])
7 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/distribute.f90:
--------------------------------------------------------------------------------
1 | !$omp distribute [clauses]
2 | do-loops
3 | !$omp end distribute
4 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target.c:
--------------------------------------------------------------------------------
1 | #pragma omp target [clauses]
2 | structured-block
3 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | if([ target:] scalar-expression)
3 | device(integer-expression)
4 | private(list)
5 | firstprivate(list)
6 | map([map-type:] list)
7 | is_device_ptr(list)
8 | defaultmap(tofrom:scalar)
9 | nowait
10 | depend(dependence-type : list)
11 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target.f90:
--------------------------------------------------------------------------------
1 | !$omp target [clauses]
2 | structured-block
3 | !$omp end target
4 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_data.c:
--------------------------------------------------------------------------------
1 | #pragma omp target data clause [clauses]
2 | structured-block
3 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_data.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | if( [target data:]scalar-logical-expression)
3 | device(scalar-integer-expression)
4 | map([map-type :] list)
5 | use_device_ptr(list)
6 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_data.f90:
--------------------------------------------------------------------------------
1 | !$omp target data clause [clauses]
2 | structured-block
3 | !$omp end target data
4 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_enter_data.c:
--------------------------------------------------------------------------------
1 | #pragma omp target enter data [clauses]
2 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_enter_data.f90:
--------------------------------------------------------------------------------
1 | !$omp target enter data [clauses]
2 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_enter_exit_data.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | if(scalar-logical-expression)
3 | device(scalar-integer-expression)
4 | map( [map-type:] list)
5 | depend(dependence-type:list)
6 | nowait
7 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_exit_data.c:
--------------------------------------------------------------------------------
1 | #pragma omp target exit data [clauses]
2 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/target_exit_data.f90:
--------------------------------------------------------------------------------
1 | !$omp target exit data [clauses]
2 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/teams.c:
--------------------------------------------------------------------------------
1 | #pragma omp teams [clauses]
2 | structured-block
3 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/teams.clause:
--------------------------------------------------------------------------------
1 | clause:
2 | num_teams(integer-expression)
3 | thread_limit(integer-expression)
4 | default(shared | none)
5 | private(list)
6 | firstprivate(list)
7 | shared(list)
8 | reduction(reduction-identifier : list)
9 |
--------------------------------------------------------------------------------
/content/syntax/v4.5.0/teams.f90:
--------------------------------------------------------------------------------
1 | !$omp teams [clauses]
2 | structured-block
3 | !$omp end teams
4 |
--------------------------------------------------------------------------------
/content/volta-sm-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/volta-sm-architecture.png
--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Sphinx
2 | sphinx_rtd_theme
3 | sphinx_rtd_theme_ext_color_contrast
4 | myst_nb
5 | sphinx-lesson
6 |
--------------------------------------------------------------------------------