├── .github └── workflows │ └── sphinx.yml ├── .gitignore ├── LICENSE ├── LICENSE.code ├── Makefile ├── README.md ├── content ├── _static │ └── overrides.css ├── conf.py ├── data.rst ├── diagrams │ ├── stencil-fixed-boundaries │ └── stencil.drawio ├── examples │ └── v4.5.0 │ │ ├── Example_target.1.c │ │ ├── Example_target.1.f90 │ │ ├── Example_target_data.2.c │ │ ├── Example_target_data.2.f90 │ │ ├── Example_target_unstructured_data.1.c │ │ ├── Example_target_unstructured_data.1.f90 │ │ ├── Example_teams.6.c │ │ └── Example_teams.6.f90 ├── exercise │ ├── common │ │ ├── pngwriter.c │ │ └── pngwriter.h │ ├── data_mapping │ │ ├── LICENSE-MIT │ │ ├── Makefile │ │ ├── core.cpp │ │ ├── fortran │ │ │ ├── Makefile │ │ │ ├── core.F90 │ │ │ ├── heat_mod.F90 │ │ │ ├── io.F90 │ │ │ ├── main.F90 │ │ │ ├── pngwriter_mod.F90 │ │ │ ├── setup.F90 │ │ │ └── utilities.F90 │ │ ├── heat.h │ │ ├── heat_serial │ │ ├── io.cpp │ │ ├── main.cpp │ │ ├── setup.cpp │ │ └── utilities.cpp │ ├── ex00 │ │ ├── LICENSE-MIT │ │ ├── ex00.F90 │ │ └── ex00.c │ ├── ex01 │ │ ├── LICENSE-MIT │ │ ├── ex01.F90 │ │ ├── ex01.c │ │ └── solution │ │ │ ├── ex01.F90 │ │ │ └── ex01.c │ ├── ex02 │ │ ├── LICENSE-MIT │ │ ├── ex02.F90 │ │ ├── ex02.c │ │ └── solution │ │ │ ├── ex02.F90 │ │ │ └── ex02.c │ ├── ex03 │ │ ├── LICENSE-MIT │ │ ├── ex03.F90 │ │ ├── ex03.c │ │ └── solution │ │ │ ├── ex03.F90 │ │ │ └── ex03.c │ ├── ex04 │ │ ├── LICENSE-MIT │ │ ├── ex04.F90 │ │ ├── ex04.c │ │ └── solution │ │ │ ├── ex04.F90 │ │ │ └── ex04.c │ ├── ex05 │ │ ├── LICENSE-MIT │ │ ├── ex05.F90 │ │ ├── ex05.c │ │ └── solution │ │ │ ├── ex05.F90 │ │ │ └── ex05.c │ ├── ex06 │ │ ├── ex06.F90 │ │ └── ex06.c │ ├── offloading │ │ ├── LICENSE-MIT │ │ ├── Makefile │ │ ├── core.cpp │ │ ├── fortran │ │ │ ├── Makefile │ │ │ ├── core.F90 │ │ │ ├── heat_mod.F90 │ │ │ ├── io.F90 │ │ │ ├── main.F90 │ │ │ ├── pngwriter_mod.F90 │ │ │ ├── setup.F90 │ │ │ └── utilities.F90 │ │ ├── heat.h │ │ ├── io.cpp │ │ ├── main.cpp │ │ ├── setup.cpp │ │ └── utilities.cpp │ ├── serial │ │ ├── LICENSE-MIT │ │ ├── Makefile │ │ ├── core.cpp │ │ ├── fortran │ │ │ ├── Makefile │ │ │ ├── core.F90 │ │ │ ├── heat_mod.F90 │ │ │ ├── io.F90 │ │ │ ├── main.F90 │ │ │ ├── pngwriter_mod.F90 │ │ │ ├── setup.F90 │ │ │ └── utilities.F90 │ │ ├── heat.h │ │ ├── heat_0000.png │ │ ├── heat_0010.png │ │ ├── io.cpp │ │ ├── main.cpp │ │ ├── setup.cpp │ │ └── utilities.cpp │ └── solution │ │ ├── common │ │ ├── pngwriter.c │ │ └── pngwriter.h │ │ ├── data_mapping │ │ ├── LICENSE-MIT │ │ ├── Makefile │ │ ├── core.cpp │ │ ├── fortran │ │ │ ├── Makefile │ │ │ ├── core.F90 │ │ │ ├── heat_mod.F90 │ │ │ ├── io.F90 │ │ │ ├── main.F90 │ │ │ ├── pngwriter_mod.F90 │ │ │ ├── setup.F90 │ │ │ └── utilities.F90 │ │ ├── heat.h │ │ ├── io.cpp │ │ ├── main.cpp │ │ ├── setup.cpp │ │ └── utilities.cpp │ │ └── offloading │ │ ├── LICENSE-MIT │ │ ├── Makefile │ │ ├── core.cpp │ │ ├── fortran │ │ ├── Makefile │ │ ├── core.F90 │ │ ├── heat_mod.F90 │ │ ├── io.F90 │ │ ├── main.F90 │ │ ├── pngwriter_mod.F90 │ │ ├── setup.F90 │ │ └── utilities.F90 │ │ ├── heat.h │ │ ├── io.cpp │ │ ├── main.cpp │ │ ├── setup.cpp │ │ └── utilities.cpp ├── gpu-architecture.rst ├── guide.rst ├── img │ ├── Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png │ ├── C2050Timeline.png │ ├── ENCCS.jpg │ ├── ENCCS_CSC_logos.jpg │ ├── HardwareReview.png │ ├── Loom.jpeg │ ├── ThreadExecution.jpeg │ ├── coalesced.png │ ├── comparison.png │ ├── compp.png │ ├── distributed_vs_shared.png │ ├── favicon.ico │ ├── gpu_vs_cpu.png │ ├── heat_0000.png │ ├── heat_montage.png │ ├── heteprogra.jpeg │ ├── memsch.png │ ├── microprocessor-trend-data.png │ ├── nvidia_block_diagram.jpeg │ ├── omp-parallel.png │ ├── processes-threads.png │ ├── processes-threads.svg │ ├── shared_mem.png │ ├── stencil-fixed-boundaries.svg │ ├── stencil.svg │ ├── threads.png │ ├── volta-architecture.png │ └── volta-sm-architecture.png ├── index.rst ├── interoperability.rst ├── introduction.rst ├── miniapp.rst ├── multi-gpu.rst ├── optimization.rst ├── porting.rst ├── profiling.rst ├── quick-reference.rst ├── syntax │ └── v4.5.0 │ │ ├── composite.c │ │ ├── composite.f90 │ │ ├── distribute.c │ │ ├── distribute.clause │ │ ├── distribute.f90 │ │ ├── target.c │ │ ├── target.clause │ │ ├── target.f90 │ │ ├── target_data.c │ │ ├── target_data.clause │ │ ├── target_data.f90 │ │ ├── target_enter_data.c │ │ ├── target_enter_data.f90 │ │ ├── target_enter_exit_data.clause │ │ ├── target_exit_data.c │ │ ├── target_exit_data.f90 │ │ ├── teams.c │ │ ├── teams.clause │ │ └── teams.f90 ├── target.rst └── volta-sm-architecture.png ├── make.bat └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /venv 3 | .ipynb_checkpoints 4 | venv* 5 | jupyter_execute 6 | /content/.auctex-auto/ 7 | /content/__pycache__/ 8 | /.ccls-cache/ 9 | a.out 10 | *~ 11 | \#* 12 | *.o 13 | -------------------------------------------------------------------------------- /LICENSE.code: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021, Thor Wikfeldt and individual contributors from ENCCS and CSC Training. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = content 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | code-samples-tarball: 23 | tar cfz openmp-gpu-code-samples.tgz --exclude \*.png --exclude heat_serial --exclude \*.o --exclude \*~ -C content code-samples 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenMP for GPU offloading 2 | 3 | OpenMP for GPU offloading 4 | 5 | ## Credit and license 6 | 7 | - https://enccs.github.io/lesson/#credits 8 | -------------------------------------------------------------------------------- /content/_static/overrides.css: -------------------------------------------------------------------------------- 1 | /* 2 | * colors = ['#0271AE', '#DC2830', '#FFC438', # blue, red, light orange 3 | * '#6E3B87', '#008D5D', '#FA902D', # purple, green, orange 4 | * '#0095B7', '#CB0C7B', '#F7E43C', # cyan, magenta, yellow 5 | * '#88B93B', '#444F95', '#F16232'] # pea green, dark blue, dark orange 6 | * 7 | * To use them in rST, you need to define a command in the epilog, see conf.py 8 | */ 9 | .blue {color: #0271AE;} 10 | .red {color: #DC2830;} 11 | .orange {color: #FFC438;} 12 | .purple {color: #633B87;} 13 | .green {color: #008D5D;} 14 | .dkorange {color: #FA902D;} 15 | .cyan {color: #0095B7;} 16 | .magenta {color: #CB0C8B;} 17 | .yellow {color: #F7E43C;} 18 | .peagreen {color: #88B93B;} 19 | .darkblue {color: #444F95;} 20 | .darkorange {color: #F16232;} 21 | 22 | /* override colors in sphinx_lesson.css with the schemes here: https://personal.sron.nl/~pault/#sec:qualitative */ 23 | 24 | /* instructor-note */ 25 | .rst-content .instructor-note { 26 | background: #DDDDDD; 27 | } 28 | .rst-content .instructor-note > .admonition-title { 29 | background: #BBBBBB; 30 | } 31 | .rst-content .instructor-note > .admonition-title::before { 32 | content: ""; 33 | } 34 | 35 | /* callout */ 36 | .rst-content .callout { 37 | background: #EEEEBB; 38 | } 39 | .rst-content .callout > .admonition-title { 40 | background: #BBCC33; 41 | } 42 | 43 | /* questions */ 44 | .rst-content .questions { 45 | background: rgba(253, 219, 199, 0.3); 46 | } 47 | .rst-content .questions > .admonition-title { 48 | background: rgba(204, 51, 17, 0.5); 49 | } 50 | 51 | /* discussion */ 52 | .rst-content .discussion { 53 | background: rgba(231, 212, 232 0.3); 54 | } 55 | .rst-content .discussion > .admonition-title { 56 | background: rgba(194, 165, 207, 0.5); 57 | } 58 | 59 | /* signature */ 60 | .rst-content .signature { 61 | background: rgba(217, 240, 211, 0.3); 62 | } 63 | .rst-content .signature > .admonition-title { 64 | background: rgba(172, 211, 158, 0.5); 65 | } 66 | .rst-content .signature > .admonition-title::before { 67 | content: "\01F527"; 68 | } 69 | 70 | /* parameters */ 71 | .rst-content .parameters { 72 | background: rgba(217, 240, 211, 0.0); 73 | } 74 | .rst-content .parameters > .admonition-title { 75 | background: rgba(172, 211, 158, 0.5); 76 | } 77 | .rst-content .parameters > .admonition-title::before { 78 | content: "\01F4BB"; 79 | } 80 | 81 | /* typealong */ 82 | .rst-content .typealong { 83 | background: rgba(221, 221, 221, 0.3); 84 | } 85 | .rst-content .typealong > .admonition-title { 86 | background: rgba(187, 187, 187, 1.0); 87 | } 88 | .rst-content .typealong > .admonition-title::before { 89 | content: "\02328"; 90 | } 91 | 92 | /* Equation numbers to the right */ 93 | .math { 94 | text-align: left; 95 | } 96 | .eqno { 97 | float: right; 98 | } 99 | -------------------------------------------------------------------------------- /content/diagrams/stencil-fixed-boundaries: -------------------------------------------------------------------------------- 1 | 3Z1Lb9pAFIV/DctIfgHOMk1asmikRKnUKjtjD9iNYZAxgeTX1xSbx4wttQvPmZwV+I4N+MM6c8+d8Xjg3y52kyJapQ8yEfnAc5LdwL8beJ7rj93qZR95ryOO4xwi8yJL6tgp8Jx9iGbHOrrJErG+2LGUMi+z1WUwlsuliMuLWFQUcnu520zml9+6iuZCCzzHUa5Hf2ZJmR6i4dA5xe9FNk+bb3ab81tEzc51YJ1GidyehfyvA/+2kLI8vFvsbkW+p9dwORz3raP1+MMKsSz/5YDwMZXh8PdNtvrx8f0tde4fwvKq/pS3KN/UJ1z/2PK9IVDIzTIR+w9xBv6XbZqV4nkVxfvWbfWnV7G0XOTVllu9nWV5fitzWfw91k8iEc7iKr4uC/kqzlpGcSims6pFP43mN4miFLuzUH1aEyEXoizeq12a1gZxc5GN6u3t6R8L6lB69mc1sai+RubHTz5hrN7UJP+DqsdI1XPAVH1KqgGYakBBdWSZAgwZqcIVYERJFa0AYwaqx2vTFgUIGanCFeCakipaARqz9cmxBpZJgEvhrlSscA1wKeyVhhUuAhT+ygttEwEKg6VixYsAhcPSsMJFgCLBUossvgfG6lEkWBpWtLZ6FAmWijVAa2tD8ZNjHdkmAhQJloYVLgIUCZaKFS8CHAmWY5sIUNSwNaxwEaAsYuNFgKOKHdgmAhQuS8OKFgGfwmWpWOEi4FO4LLXUAhcBn8JlaVjhIkDhslSscBFwHQ6bpU6/CuFcKXyWNqkFz5XSaFnAlcNpqaOveK4cVksd0MJzpfBa2hgBfKCQY86QVnbFc+VwW2olC8+Vw26pxQE8V06/ZZDrTRHduR/3TzdT99GfrF+eHE9w3JjlA2/MaqVKoQEqVZNzsVqpUiiARhWtABT1Fh94Y1YrVYpqi0oVrgAUtRaNKloBKCotAfDGrFaqFHUWlSpcASiqLBpVtAJwFFnU1Mrk0Gs7Vkp3ZXLotR0rpb0yOfTajpXDXwGnY7djpTRYeBGgdFh4EaCwWGqChRcBSo+FFwFKk4UXAQqXpVVaDc4RaB8V4HBZwClY7VgpXJZWE4Bj5XRZ6FILx92ZWt4Kx0rhsrRMwCDWzfDly694eh1N4mD8epU+TaL5lev3nGHNwljErVyn4TAYOkeuGsQW1J1c1cVaDNqBdqxBz6mAGazagrh4rj13Woa4qqPZeK4991pmuGqLN+K59txtGeIaWMe15+qgIa729Vs9lwfNcNXmC+K59lwfNMTVvn6LIn3V5rbgufZcITTE1bp+q3nex+fmqk5yN1jK6sDac4kQY7cs4EpptyzgSmm3LOBKabcs4EpptyzgSmm3LOBKabcs4EpptyzgSmm38FxHFPkr8J7iDqwU6StwaawOrBTZK3Ahtw6sFMkrcNnBDqwUuStwnaEOrBSpK/B5JB1YKTJX4NNzOrByJq7wy3VMMU6gcYVfr81z5Lm44nOsMYXR0rjCk6wxhdNSueKzrDGF1dK44vstCq+lccX3WxRmS5uPge+3GN1Wj1irzULK8qxtUiFIH2Qi9nv8AQ== -------------------------------------------------------------------------------- /content/diagrams/stencil.drawio: -------------------------------------------------------------------------------- 1 | 3Z1fc9o4FMU/DY+Z8R9szGOSbpOHZiaZ7Mzu9M2xBbg1iDEmJPn0awc7gCSm6Rb5mPNUcm1s9JN60Lm6FgP/ev5yU8TL2Z1MRT7wnPRl4H8ZeJ7rj9zqnzry2kQcx9lGpkWWNrFd4DF7E+2JTXSdpWJ1cGIpZV5my8NgIhcLkZQHsbgo5ObwtInMD++6jKdCCzwmca5H/8nScraNRoGzi9+KbDpr7+y27ZvH7clNYDWLU7nZC/l/DfzrQspy+2r+ci3yml7LZfu+r0eOfnywQizKz7whup/JKPhxmS3/fvv2PHNu76Lywg+2l3mO83XT4ubTlq8tgkKuF6mor+IM/KvNLCvF4zJO6qObqter2Kyc59VfbvVykuX5tcxl8f5efzKZeElSxVdlIX+KvSNp+BQGYXVEb0fTtGdRlOJlL9S060bIuSiL1+qU5mjQMm5Gmd92wmbXZcMmNNvrrTYWN4Nk+nHlHcfqRYPyN7C6dqmmsYgmRqphEomnyWmougpVNwRT9Ripeuix6lNSHYKpDimohj1TAMvfVhiqcAUIKamiFWDEQPVjbPZFASJGqnAFGFNSRStA62HPHOuwZxLgUrgrFStcA1wKe6VhhYsAhb/yor6JAIXBUrHiRYDCYWlY4SJAMcFSkyy+B8bqUUywNKxobfUoJlgq1iFaW1uKZ4417JsIUEywNKxwEaCYYKlY8SLAMcFy+iYCFDlsDStcBCiT2HgR4MhiD/smAhQuS8OKFgGfwmWpWOEi4FO4LDXVAhcBn8JlaVjhIkDhslSseBGw7LKO1F8u5EKchmig+lY4UctOoKOKVhUrOoPtW3YC3WANVYMFH62WncAnsE7konxs7mYoKv/joevBv7p0WxBcLGVWtbDmLBZJlmvQq8aWh2SNCrqPuwnFeTZdVH8mFUJRxa9qdFkS55fNgXmWpvVtjF152Nmn6A61itvXu+MjVb7fH56t/nAdjtSiWnIcgce561DkFrVCTjxXyuRiD7hyZBfViiM8V470olrEgedKkV/U1sXR1sLlqJPVlhrxXDkyjOrqDZ6rR8FVTYjjuXLmGDvkelnEX9y324fLJ/fev1l9f3A8wfEwsg98GNlIlUIDVKpd1h8bqVIogEYVrQAU+RYf+DCykSpFtkWlClcAilyLRhWtABSZliHwYWQjVYo8i0oVrgAUWRaNKloBOJIs6tSqy3IjM1ZKd9VluZEZK6W96rLcyIyVw18BH0EyY6U0WHgRoHRYeBGgsFjqBAsvApQeCy8ClCYLLwIULkvLtHZYI2BeFeBwWcASLDNWCpel5QTgWDldFjrVwrEjgTZvhWOlcFnaTACOVXdZ64EX5nX5+2q9rF5O65fzNlbdZC+s9QBN4by29ampcD409NQpCufXwferf5OncXyTDEc/L2YPN/H0IrRt3AIRpUPTf4DIe/LD3TM4GkQD6qNcR8BKDjNW28YNgrXLZRwzVtvGDYO1Q7k2Y7Vt3LrBCizmMGId2TZuEKxwERjZNm4YrGgRGNk2bp1gjYD1HGasto0bBCteBGwbNwxWuAjYXh7rBuuwbyJA4bJUrHgRoHBZGla4CFC4rAi4vbwZK4XLUrHCRaD9PGRY0SIQcbgsYG2nGSuFy9KworU1onBZKtYuKzrMWClclppqwYsAhcvSsMJFgMJlqVjxIsDhsoC1nWasHC4LWNtpxsrhsoC1nUasYw6XNeyZCIwpXJaGFS0CYwqXpWLFiwCFy1JTLXgRoHBZGla4CFC4LBUrXgQ4XBbwKQ8zVgqXpVW1wLFSuiw8Vg6Xpa68orG6bT+fOVfgNrtHuFL4LG19AL1I6DoURktLueK5cjgt4HNJR7hyWC01MYDnyum18FwpzNa4b49n2f+ZCAhXeC2W/Z+JwHDF6wCF3xr37Qkt+z8TAeGK1wHrWxhiuMJ1wPoehp1wdZ2+PaVl/3ciMGB7oAQUjksHi5cCCsulTrXgi7Gu9a0MMVzxCktpueDLsa71zQwhU60e6ACl5cLrgPXtDCFc8TpgfT9DzEQLLwQtRzaweCXg9Fw9kAIKz6VlYeFVBIZtDc+Ra99qtFyPwnLpOQI8WE7PBc+9eJSeqwdcKTyXPiOAg233mf31hrxX7v/elPf0m+2edis4+Ga7rv8JjyYW6WVRyE0NMo9Xqyw5pLx9g0in4ne57LU7MDS7jRUij8vs+fDyJhbNHe5lVt14txn1sUxOe4mVXBeJaN61I6pdKDjmBNsLlXExFaV2ofeu+Wj2n/TWJ4zfufdWeKreGsF7S3eT2cC7/rGVtHMTr0CdpI8+9xViUbt0U/nO94KArh/A6erWMtuO3HoEnxvfUBUDF85Xt5hZPXKryzlnCDhQl/fwgHWrmZ3n4NXEwR7b6s9CynL/m7Bq1OxOpqI+4z8= -------------------------------------------------------------------------------- /content/examples/v4.5.0/Example_target.1.c: -------------------------------------------------------------------------------- 1 | extern void init(float*, float*, int); 2 | extern void output(float*, int); 3 | void vec_mult(int N) 4 | { 5 | int i; 6 | float p[N], v1[N], v2[N]; 7 | init(v1, v2, N); 8 | #pragma omp target 9 | #pragma omp parallel for private(i) 10 | for (i=0; i 2 | typedef struct { 3 | double *A; 4 | int N; 5 | } Matrix; 6 | 7 | void init_matrix(Matrix *mat, int n) 8 | { 9 | mat->A = (double *)malloc(n*sizeof(double)); 10 | mat->N = n; 11 | #pragma omp target enter data map(alloc:mat->A[:n]) 12 | } 13 | 14 | void free_matrix(Matrix *mat) 15 | { 16 | #pragma omp target exit data map(delete:mat->A[:mat->N]) 17 | mat->N = 0; 18 | free(mat->A); 19 | mat->A = NULL; 20 | } 21 | -------------------------------------------------------------------------------- /content/examples/v4.5.0/Example_target_unstructured_data.1.f90: -------------------------------------------------------------------------------- 1 | module example 2 | real(8), allocatable :: A(:) 3 | 4 | contains 5 | subroutine initialize(N) 6 | integer :: N 7 | 8 | allocate(A(N)) 9 | !$omp target enter data map(alloc:A) 10 | 11 | end subroutine initialize 12 | 13 | subroutine finalize() 14 | 15 | !$omp target exit data map(delete:A) 16 | deallocate(A) 17 | 18 | end subroutine finalize 19 | end module example 20 | -------------------------------------------------------------------------------- /content/examples/v4.5.0/Example_teams.6.c: -------------------------------------------------------------------------------- 1 | extern void init(float *, float *, int); 2 | extern void output(float *, int); 3 | void vec_mult(float *p, float *v1, float *v2, int N) 4 | { 5 | int i; 6 | init(v1, v2, N); 7 | #pragma omp target teams map(to: v1[0:N], v2[:N]) map(from: p[0:N]) 8 | #pragma omp distribute parallel for simd 9 | for (i=0; idata.data(); 17 | double *prevdata = prev->data.data(); 18 | int nx = curr->nx; 19 | int ny = curr->ny; 20 | 21 | // Determine the temperature field at next time step 22 | // As we have fixed boundary conditions, the outermost gridpoints 23 | // are not updated. 24 | double dx2 = prev->dx * prev->dx; 25 | double dy2 = prev->dy * prev->dy; 26 | #pragma omp target teams distribute parallel for \ 27 | map(currdata[0:(nx+2)*(ny+2)],prevdata[0:(nx+2)*(ny+2)]) 28 | for (int i = 1; i < nx + 1; i++) { 29 | for (int j = 1; j < ny + 1; j++) { 30 | int ind = i * (ny + 2) + j; 31 | int ip = (i + 1) * (ny + 2) + j; 32 | int im = (i - 1) * (ny + 2) + j; 33 | int jp = i * (ny + 2) + j + 1; 34 | int jm = i * (ny + 2) + j - 1; 35 | currdata[ind] = prevdata[ind] + a*dt* 36 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 + 37 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2); 38 | } 39 | } 40 | } 41 | 42 | // Start a data region and copy temperature fields to the device 43 | void enter_data(field *curr, field *prev) 44 | { 45 | int nx, ny; 46 | double *currdata, *prevdata; 47 | 48 | currdata = curr->data.data(); 49 | prevdata = prev->data.data(); 50 | nx = curr->nx; 51 | ny = curr->ny; 52 | 53 | // adding data mapping here 54 | 55 | } 56 | 57 | // End a data region and copy temperature fields back to the host 58 | void exit_data(field *curr, field *prev) 59 | { 60 | int nx, ny; 61 | double *currdata, *prevdata; 62 | 63 | currdata = curr->data.data(); 64 | prevdata = prev->data.data(); 65 | nx = curr->nx; 66 | ny = curr->ny; 67 | 68 | // adding data mapping here 69 | 70 | } 71 | 72 | // Copy a temperature field from the device to the host 73 | void update_host(field *temperature) 74 | { 75 | int nx, ny; 76 | double *data; 77 | 78 | data = temperature->data.data(); 79 | nx = temperature->nx; 80 | ny = temperature->ny; 81 | 82 | // adding data mapping here 83 | 84 | } 85 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | 14 | COMMONDIR=../../common 15 | 16 | ifeq ($(COMP),nv) 17 | FC=nvfortran 18 | CC=nvc 19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp 20 | CCFLAGS=-O3 -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS= 23 | endif 24 | 25 | ifeq ($(COMP),gnu) 26 | FC=gfortran 27 | CC=gcc 28 | FCFLAGS=-O3 -Wall -fopenmp 29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR) 30 | LDFLAGS=-fopenmp 31 | LIBS= 32 | endif 33 | 34 | 35 | EXE=heat_serial 36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o 38 | 39 | all: $(EXE) 40 | 41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 42 | heat_mod.o: heat_mod.F90 43 | core.o: core.F90 heat_mod.o 44 | utilities.o: utilities.F90 heat_mod.o 45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 46 | setup.o: setup.F90 heat_mod.o utilities.o io.o 47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 49 | 50 | $(EXE): $(OBJS) $(OBJS_PNG) 51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 52 | 53 | %.o: %.F90 54 | $(FC) $(FCFLAGS) -c $< -o $@ 55 | 56 | %.o: %.c 57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@ 58 | 59 | .PHONY: clean 60 | clean: 61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o 62 | 63 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Main solver routines for heat equation solver 4 | module core 5 | use heat 6 | 7 | contains 8 | 9 | ! Update the temperature values using five-point stencil 10 | ! Arguments: 11 | ! curr (type(field)): current temperature values 12 | ! prev (type(field)): temperature values from previous time step 13 | ! a (real(dp)): diffusivity 14 | ! dt (real(dp)): time step 15 | subroutine evolve(curr, prev, a, dt) 16 | 17 | implicit none 18 | 19 | type(field),target, intent(inout) :: curr, prev 20 | real(dp) :: a, dt 21 | integer :: i, j, nx, ny 22 | real(dp) :: dx, dy 23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata 24 | 25 | ! Help the compiler avoid being confused 26 | nx = curr%nx 27 | ny = curr%ny 28 | dx = curr%dx 29 | dy = curr%dy 30 | currdata => curr%data 31 | prevdata => prev%data 32 | 33 | ! Determine the temperature field at next time step As we have 34 | ! fixed boundary conditions, the outermost gridpoints are not 35 | ! updated. 36 | !$omp target teams distribute parallel do 37 | do j = 1, ny 38 | do i = 1, nx 39 | currdata(i, j) = prevdata(i, j) + a * dt * & 40 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + & 41 | & prevdata(i+1, j)) / dx**2 + & 42 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + & 43 | & prevdata(i, j+1)) / dy**2) 44 | end do 45 | end do 46 | !$omp end target teams distribute parallel do 47 | end subroutine evolve 48 | 49 | ! Start a data region and copy temperature fields to the device 50 | ! curr (type(field)): current temperature values 51 | ! prev (type(field)): values from previous time step 52 | subroutine enter_data(curr, prev) 53 | implicit none 54 | type(field), target, intent(in) :: curr, prev 55 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:) 56 | 57 | currdata => curr%data 58 | prevdata => prev%data 59 | 60 | ! adding data mapping here 61 | 62 | end subroutine enter_data 63 | 64 | ! End a data region and copy temperature fields back to the host 65 | ! curr (type(field)): current temperature values 66 | ! prev (type(field)): values from previous time step 67 | subroutine exit_data(curr, prev) 68 | implicit none 69 | type(field), target :: curr, prev 70 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:) 71 | 72 | currdata => curr%data 73 | prevdata => prev%data 74 | 75 | ! adding data mapping here 76 | 77 | end subroutine exit_data 78 | 79 | ! Copy a temperature field from the device to the host 80 | ! temperature (type(field)): temperature field 81 | subroutine update_host(temperature) 82 | implicit none 83 | type(field), target :: temperature 84 | real(kind=dp), pointer, contiguous :: tempdata(:,:) 85 | 86 | tempdata => temperature%data 87 | 88 | ! adding data mapping here 89 | 90 | end subroutine update_host 91 | 92 | end module core 93 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Field metadata for heat equation solver 4 | module heat 5 | use iso_fortran_env, only : REAL64 6 | implicit none 7 | 8 | integer, parameter :: dp = REAL64 9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 10 | 11 | type :: field 12 | integer :: nx ! ldimension of the field 13 | integer :: ny 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | contains 20 | ! Initialize the field type metadata 21 | ! Arguments: 22 | ! field0 (type(field)): input field 23 | ! nx, ny, dx, dy: field dimensions and spatial step size 24 | subroutine set_field_dimensions(field0, nx, ny) 25 | implicit none 26 | 27 | type(field), intent(out) :: field0 28 | integer, intent(in) :: nx, ny 29 | 30 | field0%dx = DX 31 | field0%dy = DY 32 | field0%nx = nx 33 | field0%ny = ny 34 | 35 | end subroutine set_field_dimensions 36 | 37 | end module heat 38 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/io.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! I/O routines for heat equation solver 4 | module io 5 | use heat 6 | 7 | contains 8 | 9 | ! Output routine, saves the temperature distribution as a png image 10 | ! Arguments: 11 | ! curr (type(field)): variable with the temperature data 12 | ! iter (integer): index of the time step 13 | subroutine write_field(curr, iter) 14 | 15 | use pngwriter 16 | implicit none 17 | type(field), intent(in) :: curr 18 | integer, intent(in) :: iter 19 | 20 | character(len=85) :: filename 21 | 22 | integer :: stat 23 | real(dp), dimension(:,:), allocatable, target :: full_data 24 | 25 | allocate(full_data(curr%nx, curr%ny)) 26 | ! Copy rand #0 data to the global array 27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny) 28 | 29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png' 30 | stat = save_png(full_data, curr%nx, curr%ny, filename) 31 | deallocate(full_data) 32 | 33 | end subroutine write_field 34 | 35 | 36 | ! Reads the temperature distribution from an input file 37 | ! Arguments: 38 | ! field0 (type(field)): field variable that will store the 39 | ! read data 40 | ! filename (char): name of the input file 41 | ! Note that this version assumes the input data to be in C memory layout 42 | subroutine read_field(field0, filename) 43 | 44 | implicit none 45 | type(field), intent(out) :: field0 46 | character(len=85), intent(in) :: filename 47 | 48 | integer :: nx, ny, i 49 | character(len=2) :: dummy 50 | 51 | real(dp), dimension(:,:), allocatable :: full_data 52 | 53 | open(10, file=filename) 54 | ! Read the header 55 | read(10, *) dummy, nx, ny 56 | 57 | call set_field_dimensions(field0, nx, ny) 58 | 59 | ! The arrays for temperature field contain also a halo region 60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 61 | 62 | allocate(full_data(nx, ny)) 63 | ! Read the data 64 | do i = 1, nx 65 | read(10, *) full_data(i, 1:ny) 66 | end do 67 | 68 | ! Copy to full array containing also boundaries 69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:) 70 | 71 | ! Set the boundary values 72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1) 73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny) 74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1) 75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1) 76 | 77 | close(10) 78 | deallocate(full_data) 79 | 80 | end subroutine read_field 81 | 82 | end module io 83 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Heat equation solver in 2D. 4 | 5 | program heat_solve 6 | use heat 7 | use core 8 | use io 9 | use setup 10 | use utilities 11 | use omp_lib 12 | 13 | implicit none 14 | 15 | real(dp), parameter :: a = 0.5 ! Diffusion constant 16 | type(field) :: current, previous ! Current and previus temperature fields 17 | 18 | real(dp) :: dt ! Time step 19 | integer :: nsteps ! Number of time steps 20 | integer, parameter :: image_interval = 1500 ! Image output interval 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call initialize(current, previous, nsteps) 29 | 30 | ! Draw the picture of the initial state 31 | call write_field(current, 0) 32 | 33 | average_temp = average(current) 34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 35 | 36 | ! Largest stable time step 37 | dt = current%dx**2 * current%dy**2 / & 38 | & (2.0 * a * (current%dx**2 + current%dy**2)) 39 | 40 | ! Main iteration loop 41 | 42 | start = omp_get_wtime() 43 | 44 | ! copy data to device 45 | call enter_data(current, previous) 46 | 47 | do iter = 1, nsteps 48 | call evolve(current, previous, a, dt) 49 | if (mod(iter, image_interval) == 0) then 50 | ! update data on host for output 51 | call update_host(current) 52 | call write_field(current, iter) 53 | end if 54 | call swap_fields(current, previous) 55 | end do 56 | 57 | ! copy data back to host 58 | call exit_data(current, previous) 59 | 60 | stop = omp_get_wtime() 61 | 62 | ! Average temperature for reference 63 | average_temp = average(previous) 64 | 65 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 66 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 67 | if (command_argument_count() == 0) then 68 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 69 | end if 70 | 71 | call finalize(current, previous) 72 | 73 | end program heat_solve 74 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! PNG writer for heat equation solver 4 | module pngwriter 5 | use heat 6 | 7 | contains 8 | 9 | function save_png(data, nx, ny, fname) result(stat) 10 | 11 | use, intrinsic :: ISO_C_BINDING 12 | implicit none 13 | 14 | real(dp), dimension(:,:), intent(in) :: data 15 | integer, intent(in) :: nx, ny 16 | character(len=*), intent(in) :: fname 17 | integer :: stat 18 | 19 | ! Interface for save_png C-function 20 | interface 21 | ! The C-function definition is 22 | ! int save_png(double *data, const int nx, const int ny, 23 | ! const char *fname) 24 | function save_png_c(data, nx, ny, fname, order) & 25 | & bind(C,name="save_png") result(stat) 26 | use, intrinsic :: ISO_C_BINDING 27 | implicit none 28 | real(kind=C_DOUBLE) :: data(*) 29 | integer(kind=C_INT), value, intent(IN) :: nx, ny 30 | character(kind=C_CHAR), intent(IN) :: fname(*) 31 | character(kind=C_CHAR), value, intent(IN) :: order 32 | integer(kind=C_INT) :: stat 33 | end function save_png_c 34 | end interface 35 | 36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 37 | if (stat /= 0) then 38 | write(*,*) 'save_png returned error!' 39 | end if 40 | 41 | end function save_png 42 | 43 | end module pngwriter 44 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/setup.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Setup routines for heat equation solver 4 | module setup 5 | use heat 6 | 7 | contains 8 | 9 | subroutine initialize(previous, current, nsteps) 10 | use utilities 11 | use io 12 | 13 | implicit none 14 | 15 | type(field), intent(out) :: previous, current 16 | integer, intent(out) :: nsteps 17 | 18 | integer :: rows, cols 19 | logical :: using_input_file 20 | character(len=85) :: input_file, arg ! Input file name and command line arguments 21 | 22 | 23 | ! Default values for grid size and time steps 24 | rows = 2000 25 | cols = 2000 26 | nsteps = 500 27 | using_input_file = .false. 28 | 29 | ! Read in the command line arguments and 30 | ! set up the needed variables 31 | select case(command_argument_count()) 32 | case(0) ! No arguments -> default values 33 | case(1) ! One argument -> input file name 34 | using_input_file = .true. 35 | call get_command_argument(1, input_file) 36 | case(2) ! Two arguments -> input file name and number of steps 37 | using_input_file = .true. 38 | call get_command_argument(1, input_file) 39 | call get_command_argument(2, arg) 40 | read(arg, *) nsteps 41 | case(3) ! Three arguments -> rows, cols and nsteps 42 | call get_command_argument(1, arg) 43 | read(arg, *) rows 44 | call get_command_argument(2, arg) 45 | read(arg, *) cols 46 | call get_command_argument(3, arg) 47 | read(arg, *) nsteps 48 | case default 49 | call usage() 50 | stop 51 | end select 52 | 53 | ! Initialize the fields according the command line arguments 54 | if (using_input_file) then 55 | call read_field(previous, input_file) 56 | call copy_fields(previous, current) 57 | else 58 | call set_field_dimensions(previous, rows, cols) 59 | call set_field_dimensions(current, rows, cols) 60 | call generate_field(previous) 61 | call copy_fields(previous, current) 62 | end if 63 | 64 | end subroutine initialize 65 | 66 | ! Generate initial the temperature field. Pattern is disc with a radius 67 | ! of nx / 6 in the center of the grid. 68 | ! Boundary conditions are (different) constant temperatures outside the grid 69 | subroutine generate_field(field0) 70 | use heat 71 | 72 | implicit none 73 | 74 | type(field), intent(inout) :: field0 75 | 76 | real(dp) :: radius2 77 | integer :: i, j, ds2 78 | 79 | ! The arrays for field contain also a halo region 80 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 81 | 82 | ! Square of the disk radius 83 | radius2 = (field0%nx / 6.0_dp)**2 84 | 85 | do j = 0, field0%ny + 1 86 | do i = 0, field0%nx + 1 87 | ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + & 88 | & (j - field0%ny / 2.0_dp + 1)**2) 89 | if (ds2 < radius2) then 90 | field0%data(i,j) = 5.0_dp 91 | else 92 | field0%data(i,j) = 65.0_dp 93 | end if 94 | end do 95 | end do 96 | 97 | ! Boundary conditions 98 | field0%data(:,0) = 20.0_dp 99 | field0%data(:,field0%ny+1) = 70.0_dp 100 | field0%data(0,:) = 85.0_dp 101 | field0%data(field0%nx+1,:) = 5.0_dp 102 | 103 | end subroutine generate_field 104 | 105 | 106 | ! Clean up routine for field type 107 | ! Arguments: 108 | ! field0 (type(field)): field variable to be cleared 109 | subroutine finalize(field0, field1) 110 | use heat 111 | 112 | implicit none 113 | 114 | type(field), intent(inout) :: field0, field1 115 | 116 | deallocate(field0%data) 117 | deallocate(field1%data) 118 | 119 | end subroutine finalize 120 | 121 | ! Helper routine that prints out a simple usage if 122 | ! user gives more than three arguments 123 | subroutine usage() 124 | implicit none 125 | character(len=256) :: buf 126 | 127 | call get_command_argument(0, buf) 128 | write (*,'(A)') 'Usage:' 129 | write (*,'(A, " (default values will be used)")') trim(buf) 130 | write (*,'(A, " ")') trim(buf) 131 | write (*,'(A, " ")') trim(buf) 132 | write (*,'(A, " ")') trim(buf) 133 | end subroutine usage 134 | 135 | end module setup 136 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Utility routines for heat equation solver 4 | ! NOTE: This file does not need to be edited! 5 | module utilities 6 | use heat 7 | 8 | contains 9 | 10 | ! Swap the data fields of two variables of type field 11 | ! Arguments: 12 | ! curr, prev (type(field)): the two variables that are swapped 13 | subroutine swap_fields(curr, prev) 14 | 15 | implicit none 16 | 17 | type(field), intent(inout) :: curr, prev 18 | real(dp), allocatable, dimension(:,:) :: tmp 19 | 20 | call move_alloc(curr%data, tmp) 21 | call move_alloc(prev%data, curr%data) 22 | call move_alloc(tmp, prev%data) 23 | end subroutine swap_fields 24 | 25 | ! Copy the data from one field to another 26 | ! Arguments: 27 | ! from_field (type(field)): variable to copy from 28 | ! to_field (type(field)): variable to copy to 29 | subroutine copy_fields(from_field, to_field) 30 | 31 | implicit none 32 | 33 | type(field), intent(in) :: from_field 34 | type(field), intent(out) :: to_field 35 | 36 | ! Consistency checks 37 | if (.not.allocated(from_field%data)) then 38 | write (*,*) "Can not copy from a field without allocated data" 39 | stop 40 | end if 41 | if (.not.allocated(to_field%data)) then 42 | ! Target is not initialize, allocate memory 43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 44 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 46 | write (*,*) "Wrong field data sizes in copy routine" 47 | print *, shape(from_field%data), shape(to_field%data) 48 | stop 49 | end if 50 | 51 | to_field%data = from_field%data 52 | 53 | to_field%nx = from_field%nx 54 | to_field%ny = from_field%ny 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | 66 | real(dp) :: local_average 67 | integer :: rc 68 | 69 | average = sum(field0%data(1:field0%nx, 1:field0%ny)) 70 | average = average / (field0%nx * field0%ny) 71 | 72 | end function average 73 | 74 | end module utilities 75 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/heat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | #ifndef __HEAT_H__ 4 | #define __HEAT_H__ 5 | 6 | #include 7 | 8 | // Datatype for temperature field 9 | struct field { 10 | // nx and ny are the dimensions of the field. The array data 11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 12 | int nx; 13 | int ny; 14 | // Size of the grid cells 15 | double dx; 16 | double dy; 17 | // The temperature values in the 2D grid 18 | std::vector data; 19 | }; 20 | 21 | // We use here fixed grid spacing 22 | const double DX = 0.01; 23 | const double DY = 0.01; 24 | 25 | #if __cplusplus 26 | extern "C" { 27 | #endif 28 | // Function prototypes 29 | void set_field_dimensions(field *temperature, int nx, int ny); 30 | 31 | void initialize(int argc, char *argv[], field *temperature1, 32 | field *temperature2, int *nsteps); 33 | 34 | void generate_field(field *temperature); 35 | 36 | double average(field *temperature); 37 | 38 | void evolve(field *curr, field *prev, double a, double dt); 39 | 40 | void write_field(field *temperature, int iter); 41 | 42 | void read_field(field *temperature1, field *temperature2, 43 | char *filename); 44 | 45 | void copy_field(field *temperature1, field *temperature2); 46 | 47 | void swap_fields(field *temperature1, field *temperature2); 48 | 49 | void allocate_field(field *temperature); 50 | 51 | void enter_data(field *temperature1, field *temperature2); 52 | 53 | void exit_data(field *temperature1, field *temperature2); 54 | 55 | void update_host(field *temperature); 56 | 57 | #if __cplusplus 58 | } 59 | #endif 60 | #endif // __HEAT_H__ 61 | 62 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/heat_serial: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/data_mapping/heat_serial -------------------------------------------------------------------------------- /content/exercise/data_mapping/io.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // I/O related functions for heat equation solver 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "heat.h" 11 | #include "pngwriter.h" 12 | 13 | // Output routine that prints out a picture of the temperature 14 | // distribution. 15 | void write_field(field *temperature, int iter) 16 | { 17 | char filename[64]; 18 | 19 | // The actual write routine takes only the actual data 20 | // (without boundary layers) so we need to copy an array with that. 21 | std::vector inner_data(temperature->nx * temperature->ny); 22 | auto inner_data_iterator = inner_data.begin(); 23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1; 24 | for (int i = 0; i < temperature->nx; i++) { 25 | auto end_of_row = beginning_of_row + temperature->ny; 26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator); 27 | inner_data_iterator += temperature->ny; 28 | beginning_of_row = end_of_row + 2; 29 | } 30 | 31 | // Write out the data to a png file 32 | sprintf(filename, "%s_%04d.png", "heat", iter); 33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c'); 34 | } 35 | 36 | // Read the initial temperature distribution from a file and 37 | // initialize the temperature fields temperature1 and 38 | // temperature2 to the same initial state. 39 | void read_field(field *temperature1, field *temperature2, char *filename) 40 | { 41 | FILE *fp; 42 | int nx, ny, ind; 43 | 44 | int nx_local, ny_local, count; 45 | 46 | fp = fopen(filename, "r"); 47 | // Read the header 48 | count = fscanf(fp, "# %d %d \n", &nx, &ny); 49 | if (count < 2) { 50 | fprintf(stderr, "Error while reading the input file!\n"); 51 | exit(-1); 52 | } 53 | 54 | set_field_dimensions(temperature1, nx, ny); 55 | set_field_dimensions(temperature2, nx, ny); 56 | 57 | // Allocate arrays (including boundary layers) 58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2); 59 | temperature1->data.resize(newSize, 0.0); 60 | temperature2->data.resize(newSize, 0.0); 61 | 62 | // Array from file 63 | std::vector file_data(nx * ny, 0.0); 64 | 65 | // Read the actual data 66 | for (int i = 0; i < nx; i++) { 67 | for (int j = 0; j < ny; j++) { 68 | ind = i * ny + j; 69 | count = fscanf(fp, "%lf", &file_data[ind]); 70 | } 71 | } 72 | 73 | nx_local = temperature1->nx; 74 | ny_local = temperature1->ny; 75 | 76 | // Copy to the inner part of the full temperature field 77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1; 78 | auto beginning_of_row = file_data.begin(); 79 | for (int i = 0; i < nx_local; i++) { 80 | auto end_of_row = beginning_of_row + ny_local; 81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator); 82 | temperature_data_iterator += ny_local + 2; 83 | beginning_of_row = end_of_row; 84 | } 85 | 86 | // Set the boundary values 87 | for (int i = 1; i < nx_local + 1; i++) { 88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1]; 89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny]; 90 | } 91 | for (int j = 0; j < ny + 2; j++) { 92 | temperature1->data[j] = temperature1->data[ny_local + j]; 93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] = 94 | temperature1->data[nx_local * (ny_local + 2) + j]; 95 | } 96 | 97 | copy_field(temperature1, temperature2); 98 | 99 | fclose(fp); 100 | } 101 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main routine for heat equation solver in 2D. 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | int main(int argc, char **argv) 11 | { 12 | // Image output interval 13 | int image_interval = 1500; 14 | 15 | // Number of time steps 16 | int nsteps; 17 | // Current and previous temperature fields 18 | field current, previous; 19 | initialize(argc, argv, ¤t, &previous, &nsteps); 20 | 21 | // Output the initial field 22 | write_field(¤t, 0); 23 | 24 | double average_temp = average(¤t); 25 | printf("Average temperature at start: %f\n", average_temp); 26 | 27 | // Diffusion constant 28 | double a = 0.5; 29 | 30 | // Compute the largest stable time step 31 | double dx2 = current.dx * current.dx; 32 | double dy2 = current.dy * current.dy; 33 | // Time step 34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 35 | 36 | // Get the start time stamp 37 | double start_clock = omp_get_wtime(); 38 | 39 | // Copy fields to device 40 | enter_data(¤t, &previous); 41 | 42 | // Time evolution 43 | for (int iter = 1; iter <= nsteps; iter++) { 44 | evolve(¤t, &previous, a, dt); 45 | if (iter % image_interval == 0) { 46 | // update data on host for output 47 | update_host(¤t); 48 | write_field(¤t, iter); 49 | } 50 | // Swap current field so that it will be used 51 | // as previous for next iteration step 52 | swap_fields(¤t, &previous); 53 | } 54 | 55 | // copy data back to host 56 | exit_data(¤t, &previous); 57 | 58 | double stop_clock = omp_get_wtime(); 59 | 60 | // Average temperature for reference 61 | average_temp = average(&previous); 62 | 63 | // Determine the CPU time used for all the iterations 64 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock)); 65 | printf("Average temperature: %f\n", average_temp); 66 | if (argc == 1) { 67 | printf("Reference value with default arguments: 59.281239\n"); 68 | } 69 | 70 | // Output the final field 71 | write_field(&previous, nsteps); 72 | 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /content/exercise/data_mapping/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Utility functions for heat equation solver 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | 11 | // Copy data on temperature1 into temperature2 12 | void copy_field(field *temperature1, field *temperature2) 13 | { 14 | assert(temperature1->nx == temperature2->nx); 15 | assert(temperature1->ny == temperature2->ny); 16 | assert(temperature1->data.size() == temperature2->data.size()); 17 | std::copy(temperature1->data.begin(), temperature1->data.end(), 18 | temperature2->data.begin()); 19 | } 20 | 21 | // Swap the field data for temperature1 and temperature2 22 | void swap_fields(field *temperature1, field *temperature2) 23 | { 24 | std::swap(temperature1->data, temperature2->data); 25 | } 26 | 27 | // Allocate memory for a temperature field and initialise it to zero 28 | void allocate_field(field *temperature) 29 | { 30 | // Include also boundary layers 31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2); 32 | temperature->data.resize(newSize, 0.0); 33 | } 34 | 35 | // Calculate average temperature over the non-boundary grid cells 36 | double average(field *temperature) 37 | { 38 | double average = 0.0; 39 | 40 | for (int i = 1; i < temperature->nx + 1; i++) { 41 | for (int j = 1; j < temperature->ny + 1; j++) { 42 | int ind = i * (temperature->ny + 2) + j; 43 | average += temperature->data[ind]; 44 | } 45 | } 46 | 47 | average /= (temperature->nx * temperature->ny); 48 | return average; 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /content/exercise/ex00/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/ex00/ex00.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program hello 4 | 5 | #ifdef _OPENMP 6 | use omp_lib 7 | #endif 8 | implicit none 9 | 10 | integer :: num_devices,nteams,nthreads 11 | logical :: initial_device 12 | 13 | num_devices = omp_get_num_devices() 14 | print *, "Number of available devices", num_devices 15 | 16 | !$omp target map(nteams,nthreads) 17 | initial_device = omp_is_initial_device() 18 | nteams= omp_get_num_teams() 19 | nthreads= omp_get_num_threads() 20 | !$omp end target 21 | if (initial_device) then 22 | write(*,*) "Running on host" 23 | else 24 | write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team" 25 | end if 26 | 27 | end program 28 | 29 | -------------------------------------------------------------------------------- /content/exercise/ex00/ex00.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | 5 | #ifdef _OPENMP 6 | #include 7 | #endif 8 | 9 | int main() 10 | { 11 | int num_devices = omp_get_num_devices(); 12 | printf("Number of available devices %d\n", num_devices); 13 | 14 | #pragma omp target 15 | { 16 | if (omp_is_initial_device()) { 17 | printf("Running on host\n"); 18 | } else { 19 | int nteams= omp_get_num_teams(); 20 | int nthreads= omp_get_num_threads(); 21 | printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads); 22 | } 23 | } 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /content/exercise/ex01/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/ex01/ex01.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | do i = 1, nx 21 | vecC(i) = vecA(i) * vecB(i) 22 | end do 23 | 24 | sum = 0.0 25 | ! Calculate the sum 26 | do i = 1, nx 27 | sum = vecC(i) + sum 28 | end do 29 | 30 | write(*,*) 'The sum is: ', sum 31 | 32 | end program dotproduct 33 | -------------------------------------------------------------------------------- /content/exercise/ex01/ex01.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* Dot product of two vectors */ 19 | for (int i = 0; i < NX; i++) { 20 | vecC[i] = vecA[i] * vecB[i]; 21 | } 22 | 23 | double sum = 0.0; 24 | /* Calculate the sum */ 25 | for (int i = 0; i < NX; i++) { 26 | sum += vecC[i]; 27 | } 28 | printf("The sum is: %8.6f \n", sum); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /content/exercise/ex01/solution/ex01.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target 21 | do i = 1, nx 22 | vecC(i) = vecA(i) * vecB(i) 23 | end do 24 | !$omp end target 25 | 26 | sum = 0.0 27 | ! Calculate the sum 28 | do i = 1, nx 29 | sum = vecC(i) + sum 30 | end do 31 | 32 | write(*,*) 'The sum is: ', sum 33 | 34 | end program dotproduct 35 | -------------------------------------------------------------------------------- /content/exercise/ex01/solution/ex01.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target 20 | for (int i = 0; i < NX; i++) { 21 | vecC[i] = vecA[i] * vecB[i]; 22 | } 23 | 24 | double sum = 0.0; 25 | /* calculate the sum */ 26 | for (int i = 0; i < NX; i++) { 27 | sum += vecC[i]; 28 | } 29 | printf("The sum is: %8.6f \n", sum); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /content/exercise/ex02/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/ex02/ex02.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target 21 | do i = 1, nx 22 | vecC(i) = vecA(i) * vecB(i) 23 | end do 24 | !$omp end target 25 | 26 | sum = 0.0 27 | ! Calculate the sum 28 | do i = 1, nx 29 | sum = vecC(i) + sum 30 | end do 31 | 32 | write(*,*) 'The sum is: ', sum 33 | 34 | end program dotproduct 35 | -------------------------------------------------------------------------------- /content/exercise/ex02/ex02.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | // Copyright (c) 2021 ENCCS 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target 20 | for (int i = 0; i < NX; i++) { 21 | vecC[i] = vecA[i] * vecB[i]; 22 | } 23 | 24 | double sum = 0.0; 25 | /* calculate the sum */ 26 | for (int i = 0; i < NX; i++) { 27 | sum += vecC[i]; 28 | } 29 | printf("The sum is: %8.6f \n", sum); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /content/exercise/ex02/solution/ex02.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target teams distribute parallel do 21 | do i = 1, nx 22 | vecC(i) = vecA(i) * vecB(i) 23 | end do 24 | !$omp end target teams distribute parallel do 25 | 26 | sum = 0.0 27 | ! Calculate the sum 28 | do i = 1, nx 29 | sum = vecC(i) + sum 30 | end do 31 | 32 | write(*,*) 'The sum is: ', sum 33 | 34 | end program dotproduct 35 | -------------------------------------------------------------------------------- /content/exercise/ex02/solution/ex02.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target teams distribute parallel for 20 | for (int i = 0; i < NX; i++) { 21 | vecC[i] = vecA[i] * vecB[i]; 22 | } 23 | 24 | double sum = 0.0; 25 | /* calculate the sum */ 26 | for (int i = 0; i < NX; i++) { 27 | sum += vecC[i]; 28 | } 29 | printf("The sum is: %8.6f \n", sum); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /content/exercise/ex03/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/ex03/ex03.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program hello 4 | 5 | #ifdef _OPENMP 6 | use omp_lib 7 | #endif 8 | implicit none 9 | 10 | integer :: num_devices,nteams,nthreads 11 | logical :: initial_device 12 | 13 | num_devices = omp_get_num_devices() 14 | print *, "Number of available devices", num_devices 15 | 16 | !$omp target map(nteams,nthreads) 17 | initial_device = omp_is_initial_device() 18 | nteams= omp_get_num_teams() 19 | nthreads= omp_get_num_threads() 20 | !$omp end target 21 | if (initial_device) then 22 | write(*,*) "Running on host" 23 | else 24 | write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team" 25 | end if 26 | 27 | end program 28 | 29 | -------------------------------------------------------------------------------- /content/exercise/ex03/ex03.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | 5 | #ifdef _OPENMP 6 | #include 7 | #endif 8 | 9 | int main() 10 | { 11 | int num_devices = omp_get_num_devices(); 12 | printf("Number of available devices %d\n", num_devices); 13 | 14 | #pragma omp target 15 | { 16 | if (omp_is_initial_device()) { 17 | printf("Running on host\n"); 18 | } else { 19 | int nteams= omp_get_num_teams(); 20 | int nthreads= omp_get_num_threads(); 21 | printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads); 22 | } 23 | } 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /content/exercise/ex03/solution/ex03.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program hello 4 | 5 | #ifdef _OPENMP 6 | use omp_lib 7 | #endif 8 | implicit none 9 | 10 | integer :: num_devices,nteams,nthreads 11 | logical :: initial_device 12 | 13 | num_devices = omp_get_num_devices() 14 | print *, "Number of available devices", num_devices 15 | 16 | !$omp target map(nteams,nthreads) 17 | !$omp teams num_teams(2) thread_limit(3) 18 | !$omp parallel 19 | initial_device = omp_is_initial_device() 20 | nteams= omp_get_num_teams() 21 | nthreads= omp_get_num_threads() 22 | !$omp end parallel 23 | !$omp end teams 24 | !$omp end target 25 | if (initial_device) then 26 | write(*,*) "Running on host" 27 | else 28 | write(*,'(A,I4,A,I4,A)') "Running on device with ",nteams, " teams in total and ", nthreads, " threads in each team" 29 | end if 30 | 31 | end program 32 | 33 | -------------------------------------------------------------------------------- /content/exercise/ex03/solution/ex03.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | 5 | #ifdef _OPENMP 6 | #include 7 | #endif 8 | 9 | int main() 10 | { 11 | int num_devices = omp_get_num_devices(); 12 | printf("Number of available devices %d\n", num_devices); 13 | 14 | #pragma omp target 15 | #pragma omp teams num_teams(2) thread_limit(3) 16 | #pragma omp parallel 17 | { 18 | if (omp_is_initial_device()) { 19 | printf("Running on host\n"); 20 | } else { 21 | int nteams= omp_get_num_teams(); 22 | int nthreads= omp_get_num_threads(); 23 | printf("Running on device with %d teams in total and %d threads in each team\n",nteams,nthreads); 24 | } 25 | } 26 | 27 | } 28 | 29 | -------------------------------------------------------------------------------- /content/exercise/ex04/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/ex04/ex04.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target teams distribute 21 | do i = 1, nx 22 | vecC(i) = vecA(i) * vecB(i) 23 | end do 24 | !$omp end target teams distribute 25 | 26 | sum = 0.0 27 | ! Calculate the sum 28 | do i = 1, nx 29 | sum = vecC(i) + sum 30 | end do 31 | 32 | write(*,*) 'The sum is: ', sum 33 | 34 | end program dotproduct 35 | -------------------------------------------------------------------------------- /content/exercise/ex04/ex04.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target teams distribute 20 | for (int i = 0; i < NX; i++) { 21 | vecC[i] = vecA[i] * vecB[i]; 22 | } 23 | 24 | double sum = 0.0; 25 | /* calculate the sum */ 26 | for (int i = 0; i < NX; i++) { 27 | sum += vecC[i]; 28 | } 29 | printf("The sum is: %8.6f \n", sum); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /content/exercise/ex04/solution/ex04.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target teams distribute map(from:vecC) map(to:vecA,vecB) 21 | do i = 1, nx 22 | vecC(i) = vecA(i) * vecB(i) 23 | end do 24 | !$omp end target teams distribute 25 | 26 | sum = 0.0 27 | ! Calculate the sum 28 | !$omp target map(tofrom:sum) 29 | do i = 1, nx 30 | sum = vecC(i) + sum 31 | end do 32 | !$omp end target 33 | write(*,*) 'The sum is: ', sum 34 | 35 | end program dotproduct 36 | -------------------------------------------------------------------------------- /content/exercise/ex04/solution/ex04.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target teams distribute map(from:vecC[0:NX]) map(to:vecA[0:NX],vecB[0:NX]) 20 | for (int i = 0; i < NX; i++) { 21 | vecC[i] = vecA[i] * vecB[i]; 22 | } 23 | 24 | double sum = 0.0; 25 | /* calculate the sum */ 26 | #pragma omp target map(tofrom:sum) 27 | for (int i = 0; i < NX; i++) { 28 | sum += vecC[i]; 29 | } 30 | printf("The sum is: %8.6f \n", sum); 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /content/exercise/ex05/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/ex05/ex05.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target 21 | do i = 1, nx 22 | vecC(i) = vecA(i) * vecB(i) 23 | end do 24 | !$omp end target 25 | 26 | ! Initialization of vectors again 27 | do i = 1, nx 28 | vecA(i) = r**(i-1) 29 | vecB(i) = 1.0 30 | end do 31 | 32 | !$omp target 33 | do i = 1, nx 34 | vecC(i) = vecC(i) + vecA(i) * vecB(i) 35 | end do 36 | !$omp end target 37 | 38 | sum = 0.0 39 | ! Calculate the sum 40 | do i = 1, nx 41 | sum = vecC(i) + sum 42 | end do 43 | write(*,'(A,F18.6)') 'The sum is: ', sum 44 | 45 | end program dotproduct 46 | -------------------------------------------------------------------------------- /content/exercise/ex05/ex05.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target 20 | for (int i = 0; i < NX; i++) { 21 | vecC[i] = vecA[i] * vecB[i]; 22 | } 23 | 24 | /* Initialization of vectors again */ 25 | for (int i = 0; i < NX; i++) { 26 | vecA[i] = 1.0; 27 | vecB[i] = 1.0; 28 | } 29 | 30 | #pragma omp target 31 | for (int i = 0; i < NX; i++) { 32 | vecC[i] = vecC[i] + vecA[i] * vecB[i]; 33 | } 34 | double sum = 0.0; 35 | /* calculate the sum */ 36 | for (int i = 0; i < NX; i++) { 37 | sum += vecC[i]; 38 | } 39 | printf("The sum is: %8.6f \n", sum); 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /content/exercise/ex05/solution/ex05.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | program dotproduct 4 | implicit none 5 | 6 | integer, parameter :: nx = 102400 7 | real, parameter :: r=0.2 8 | 9 | real, dimension(nx) :: vecA,vecB,vecC 10 | real :: sum 11 | integer :: i 12 | 13 | ! Initialization of vectors 14 | do i = 1, nx 15 | vecA(i) = r**(i-1) 16 | vecB(i) = 1.0 17 | end do 18 | 19 | ! Dot product of two vectors 20 | !$omp target data map(from:vecC) 21 | !$omp target map(to:vecA,vecB) 22 | do i = 1, nx 23 | vecC(i) = vecA(i) * vecB(i) 24 | end do 25 | !$omp end target 26 | 27 | ! Initialization of vectors again 28 | do i = 1, nx 29 | vecA(i) = 0.5 30 | vecB(i) = 2.0 31 | end do 32 | 33 | !$omp target map(to:vecA,vecB) 34 | do i = 1, nx 35 | vecC(i) = vecC(i) + vecA(i) * vecB(i) 36 | end do 37 | !$omp end target 38 | !$omp end target data 39 | 40 | sum = 0.0 41 | ! Calculate the sum 42 | do i = 1, nx 43 | sum = vecC(i) + sum 44 | end do 45 | write(*,'(A,F18.6)') 'The sum is: ', sum 46 | 47 | end program dotproduct 48 | -------------------------------------------------------------------------------- /content/exercise/ex05/solution/ex05.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #include 4 | #include 5 | #define NX 102400 6 | 7 | int main(void) 8 | { 9 | double vecA[NX],vecB[NX],vecC[NX]; 10 | double r=0.2; 11 | 12 | /* Initialization of vectors */ 13 | for (int i = 0; i < NX; i++) { 14 | vecA[i] = pow(r, i); 15 | vecB[i] = 1.0; 16 | } 17 | 18 | /* dot product of two vectors */ 19 | #pragma omp target data map(from:vecC[0:NX]) 20 | { 21 | #pragma omp target map(to:vecA[0:NX],vecB[0:NX]) 22 | for (int i = 0; i < NX; i++) { 23 | vecC[i] = vecA[i] * vecB[i]; 24 | } 25 | 26 | /* Initialization of vectors again */ 27 | for (int i = 0; i < NX; i++) { 28 | vecA[i] = 0.5; 29 | vecB[i] = 2.0; 30 | } 31 | 32 | #pragma omp target map(to:vecA[0:NX],vecB[0:NX]) 33 | for (int i = 0; i < NX; i++) { 34 | vecC[i] = vecC[i] + vecA[i] * vecB[i]; 35 | } 36 | } 37 | double sum = 0.0; 38 | /* calculate the sum */ 39 | for (int i = 0; i < NX; i++) { 40 | sum += vecC[i]; 41 | } 42 | printf("The sum is: %8.6f \n", sum); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /content/exercise/ex06/ex06.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2021 ENCCS 2 | program dotproduct 3 | implicit none 4 | 5 | integer :: x 6 | 7 | x = 0 8 | !$omp target data map(tofrom:x) 9 | ! check point 1 10 | x = 10 11 | ! check point 2 12 | !$omp target update to(x) 13 | ! check point 3 14 | !$omp end target data 15 | 16 | end program dotproduct 17 | -------------------------------------------------------------------------------- /content/exercise/ex06/ex06.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021 ENCCS */ 2 | #include 3 | int main(void) 4 | { 5 | int x = 0; 6 | 7 | #pragma omp target data map(tofrom:x) 8 | { 9 | /* check point 1 */ 10 | x = 10; 11 | /* check point 2 */ 12 | #pragma omp target update to(x) 13 | /* check point 3 */ 14 | } 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /content/exercise/offloading/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/offloading/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | COMMONDIR=../common 14 | 15 | ifeq ($(COMP),gnu) 16 | CXX=g++ 17 | CC=gcc 18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR) 19 | LDFLAGS= 20 | LIBS= 21 | endif 22 | 23 | ifeq ($(COMP),nv) 24 | CXX=nvc++ 25 | CC=nvc 26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR) 27 | LDFLAGS= 28 | LIBS= 29 | endif 30 | 31 | ifeq ($(COMP),intel) 32 | CXX=icpx 33 | CC=icx 34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR) 35 | LDFLAGS= 36 | LIBS= 37 | endif 38 | 39 | EXE=heat_serial 40 | OBJS=main.o core.o setup.o utilities.o io.o 41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 42 | 43 | 44 | all: $(EXE) 45 | 46 | 47 | core.o: core.cpp heat.h 48 | utilities.o: utilities.cpp heat.h 49 | setup.o: setup.cpp heat.h 50 | io.o: io.cpp heat.h 51 | main.o: main.cpp heat.h 52 | 53 | $(OBJS_PNG): C_COMPILER := $(CC) 54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include 55 | $(OBJS): C_COMPILER := $(CXX) 56 | 57 | $(EXE): $(OBJS) $(OBJS_PNG) 58 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 59 | 60 | %.o: %.cpp 61 | $(CXX) $(CCFLAGS) -c $< -o $@ 62 | 63 | %.o: %.c 64 | $(CC) $(CCFLAGS) -c $< -o $@ 65 | 66 | .PHONY: clean 67 | clean: 68 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o 69 | -------------------------------------------------------------------------------- /content/exercise/offloading/core.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main solver routines for heat equation solver 4 | 5 | #include "heat.h" 6 | 7 | // Update the temperature values using five-point stencil 8 | // Arguments: 9 | // curr: current temperature values 10 | // prev: temperature values from previous time step 11 | // a: diffusivity 12 | // dt: time step 13 | void evolve(field *curr, field *prev, double a, double dt) 14 | { 15 | // Help the compiler avoid being confused by the structs 16 | double *currdata = curr->data.data(); 17 | double *prevdata = prev->data.data(); 18 | int nx = curr->nx; 19 | int ny = curr->ny; 20 | 21 | // Determine the temperature field at next time step 22 | // As we have fixed boundary conditions, the outermost gridpoints 23 | // are not updated. 24 | double dx2 = prev->dx * prev->dx; 25 | double dy2 = prev->dy * prev->dy; 26 | // add the directives below for offloading 27 | for (int i = 1; i < nx + 1; i++) { 28 | for (int j = 1; j < ny + 1; j++) { 29 | int ind = i * (ny + 2) + j; 30 | int ip = (i + 1) * (ny + 2) + j; 31 | int im = (i - 1) * (ny + 2) + j; 32 | int jp = i * (ny + 2) + j + 1; 33 | int jm = i * (ny + 2) + j - 1; 34 | currdata[ind] = prevdata[ind] + a*dt* 35 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 + 36 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | 14 | COMMONDIR=../../common 15 | 16 | ifeq ($(COMP),nv) 17 | FC=nvfortran 18 | CC=nvc 19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp 20 | CCFLAGS=-O3 -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS= 23 | endif 24 | 25 | ifeq ($(COMP),gnu) 26 | FC=gfortran 27 | CC=gcc 28 | FCFLAGS=-O3 -Wall -fopenmp 29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR) 30 | LDFLAGS=-fopenmp 31 | LIBS= 32 | endif 33 | 34 | EXE=heat_serial 35 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 36 | OBJS_PNG= $(COMMONDIR)/pngwriter.o 37 | 38 | all: $(EXE) 39 | 40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 41 | heat_mod.o: heat_mod.F90 42 | core.o: core.F90 heat_mod.o 43 | utilities.o: utilities.F90 heat_mod.o 44 | io.o: io.F90 heat_mod.o pngwriter_mod.o 45 | setup.o: setup.F90 heat_mod.o utilities.o io.o 46 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 47 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 48 | 49 | $(EXE): $(OBJS) $(OBJS_PNG) 50 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 51 | 52 | %.o: %.F90 53 | $(FC) $(FCFLAGS) -c $< -o $@ 54 | 55 | %.o: %.c 56 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@ 57 | 58 | .PHONY: clean 59 | clean: 60 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o 61 | 62 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Main solver routines for heat equation solver 4 | module core 5 | use heat 6 | 7 | contains 8 | 9 | ! Update the temperature values using five-point stencil 10 | ! Arguments: 11 | ! curr (type(field)): current temperature values 12 | ! prev (type(field)): temperature values from previous time step 13 | ! a (real(dp)): diffusivity 14 | ! dt (real(dp)): time step 15 | subroutine evolve(curr, prev, a, dt) 16 | 17 | implicit none 18 | 19 | type(field),target, intent(inout) :: curr, prev 20 | real(dp) :: a, dt 21 | integer :: i, j, nx, ny 22 | real(dp) :: dx, dy 23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata 24 | 25 | ! Help the compiler avoid being confused 26 | nx = curr%nx 27 | ny = curr%ny 28 | dx = curr%dx 29 | dy = curr%dy 30 | currdata => curr%data 31 | prevdata => prev%data 32 | 33 | ! Determine the temperature field at next time step As we have 34 | ! fixed boundary conditions, the outermost gridpoints are not 35 | ! updated. 36 | 37 | ! add the directives below for offloading 38 | 39 | do j = 1, ny 40 | do i = 1, nx 41 | currdata(i, j) = prevdata(i, j) + a * dt * & 42 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + & 43 | & prevdata(i+1, j)) / dx**2 + & 44 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + & 45 | & prevdata(i, j+1)) / dy**2) 46 | end do 47 | end do 48 | end subroutine evolve 49 | 50 | end module core 51 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Field metadata for heat equation solver 4 | module heat 5 | use iso_fortran_env, only : REAL64 6 | implicit none 7 | 8 | integer, parameter :: dp = REAL64 9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 10 | 11 | type :: field 12 | integer :: nx ! ldimension of the field 13 | integer :: ny 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | contains 20 | ! Initialize the field type metadata 21 | ! Arguments: 22 | ! field0 (type(field)): input field 23 | ! nx, ny, dx, dy: field dimensions and spatial step size 24 | subroutine set_field_dimensions(field0, nx, ny) 25 | implicit none 26 | 27 | type(field), intent(out) :: field0 28 | integer, intent(in) :: nx, ny 29 | 30 | field0%dx = DX 31 | field0%dy = DY 32 | field0%nx = nx 33 | field0%ny = ny 34 | 35 | end subroutine set_field_dimensions 36 | 37 | end module heat 38 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/io.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! I/O routines for heat equation solver 4 | module io 5 | use heat 6 | 7 | contains 8 | 9 | ! Output routine, saves the temperature distribution as a png image 10 | ! Arguments: 11 | ! curr (type(field)): variable with the temperature data 12 | ! iter (integer): index of the time step 13 | subroutine write_field(curr, iter) 14 | 15 | use pngwriter 16 | implicit none 17 | type(field), intent(in) :: curr 18 | integer, intent(in) :: iter 19 | 20 | character(len=85) :: filename 21 | 22 | integer :: stat 23 | real(dp), dimension(:,:), allocatable, target :: full_data 24 | 25 | allocate(full_data(curr%nx, curr%ny)) 26 | ! Copy rand #0 data to the global array 27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny) 28 | 29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png' 30 | stat = save_png(full_data, curr%nx, curr%ny, filename) 31 | deallocate(full_data) 32 | 33 | end subroutine write_field 34 | 35 | 36 | ! Reads the temperature distribution from an input file 37 | ! Arguments: 38 | ! field0 (type(field)): field variable that will store the 39 | ! read data 40 | ! filename (char): name of the input file 41 | ! Note that this version assumes the input data to be in C memory layout 42 | subroutine read_field(field0, filename) 43 | 44 | implicit none 45 | type(field), intent(out) :: field0 46 | character(len=85), intent(in) :: filename 47 | 48 | integer :: nx, ny, i 49 | character(len=2) :: dummy 50 | 51 | real(dp), dimension(:,:), allocatable :: full_data 52 | 53 | open(10, file=filename) 54 | ! Read the header 55 | read(10, *) dummy, nx, ny 56 | 57 | call set_field_dimensions(field0, nx, ny) 58 | 59 | ! The arrays for temperature field contain also a halo region 60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 61 | 62 | allocate(full_data(nx, ny)) 63 | ! Read the data 64 | do i = 1, nx 65 | read(10, *) full_data(i, 1:ny) 66 | end do 67 | 68 | ! Copy to full array containing also boundaries 69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:) 70 | 71 | ! Set the boundary values 72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1) 73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny) 74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1) 75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1) 76 | 77 | close(10) 78 | deallocate(full_data) 79 | 80 | end subroutine read_field 81 | 82 | end module io 83 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Heat equation solver in 2D. 4 | 5 | program heat_solve 6 | use heat 7 | use core 8 | use io 9 | use setup 10 | use utilities 11 | use omp_lib 12 | 13 | implicit none 14 | 15 | real(dp), parameter :: a = 0.5 ! Diffusion constant 16 | type(field) :: current, previous ! Current and previus temperature fields 17 | 18 | real(dp) :: dt ! Time step 19 | integer :: nsteps ! Number of time steps 20 | integer, parameter :: image_interval = 1500 ! Image output interval 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call initialize(current, previous, nsteps) 29 | 30 | ! Draw the picture of the initial state 31 | call write_field(current, 0) 32 | 33 | average_temp = average(current) 34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 35 | 36 | ! Largest stable time step 37 | dt = current%dx**2 * current%dy**2 / & 38 | & (2.0 * a * (current%dx**2 + current%dy**2)) 39 | 40 | ! Main iteration loop 41 | 42 | start = omp_get_wtime() 43 | 44 | do iter = 1, nsteps 45 | call evolve(current, previous, a, dt) 46 | if (mod(iter, image_interval) == 0) then 47 | call write_field(current, iter) 48 | end if 49 | call swap_fields(current, previous) 50 | end do 51 | 52 | stop = omp_get_wtime() 53 | 54 | ! Average temperature for reference 55 | average_temp = average(previous) 56 | 57 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 58 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 59 | if (command_argument_count() == 0) then 60 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 61 | end if 62 | 63 | call finalize(current, previous) 64 | 65 | end program heat_solve 66 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! PNG writer for heat equation solver 4 | module pngwriter 5 | use heat 6 | 7 | contains 8 | 9 | function save_png(data, nx, ny, fname) result(stat) 10 | 11 | use, intrinsic :: ISO_C_BINDING 12 | implicit none 13 | 14 | real(dp), dimension(:,:), intent(in) :: data 15 | integer, intent(in) :: nx, ny 16 | character(len=*), intent(in) :: fname 17 | integer :: stat 18 | 19 | ! Interface for save_png C-function 20 | interface 21 | ! The C-function definition is 22 | ! int save_png(double *data, const int nx, const int ny, 23 | ! const char *fname) 24 | function save_png_c(data, nx, ny, fname, order) & 25 | & bind(C,name="save_png") result(stat) 26 | use, intrinsic :: ISO_C_BINDING 27 | implicit none 28 | real(kind=C_DOUBLE) :: data(*) 29 | integer(kind=C_INT), value, intent(IN) :: nx, ny 30 | character(kind=C_CHAR), intent(IN) :: fname(*) 31 | character(kind=C_CHAR), value, intent(IN) :: order 32 | integer(kind=C_INT) :: stat 33 | end function save_png_c 34 | end interface 35 | 36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 37 | if (stat /= 0) then 38 | write(*,*) 'save_png returned error!' 39 | end if 40 | 41 | end function save_png 42 | 43 | end module pngwriter 44 | -------------------------------------------------------------------------------- /content/exercise/offloading/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Utility routines for heat equation solver 4 | ! NOTE: This file does not need to be edited! 5 | module utilities 6 | use heat 7 | 8 | contains 9 | 10 | ! Swap the data fields of two variables of type field 11 | ! Arguments: 12 | ! curr, prev (type(field)): the two variables that are swapped 13 | subroutine swap_fields(curr, prev) 14 | 15 | implicit none 16 | 17 | type(field), intent(inout) :: curr, prev 18 | real(dp), allocatable, dimension(:,:) :: tmp 19 | 20 | call move_alloc(curr%data, tmp) 21 | call move_alloc(prev%data, curr%data) 22 | call move_alloc(tmp, prev%data) 23 | end subroutine swap_fields 24 | 25 | ! Copy the data from one field to another 26 | ! Arguments: 27 | ! from_field (type(field)): variable to copy from 28 | ! to_field (type(field)): variable to copy to 29 | subroutine copy_fields(from_field, to_field) 30 | 31 | implicit none 32 | 33 | type(field), intent(in) :: from_field 34 | type(field), intent(out) :: to_field 35 | 36 | ! Consistency checks 37 | if (.not.allocated(from_field%data)) then 38 | write (*,*) "Can not copy from a field without allocated data" 39 | stop 40 | end if 41 | if (.not.allocated(to_field%data)) then 42 | ! Target is not initialize, allocate memory 43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 44 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 46 | write (*,*) "Wrong field data sizes in copy routine" 47 | print *, shape(from_field%data), shape(to_field%data) 48 | stop 49 | end if 50 | 51 | to_field%data = from_field%data 52 | 53 | to_field%nx = from_field%nx 54 | to_field%ny = from_field%ny 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | 66 | real(dp) :: local_average 67 | integer :: rc 68 | 69 | average = sum(field0%data(1:field0%nx, 1:field0%ny)) 70 | average = average / (field0%nx * field0%ny) 71 | 72 | end function average 73 | 74 | end module utilities 75 | -------------------------------------------------------------------------------- /content/exercise/offloading/heat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | #ifndef __HEAT_H__ 4 | #define __HEAT_H__ 5 | 6 | #include 7 | 8 | // Datatype for temperature field 9 | struct field { 10 | // nx and ny are the dimensions of the field. The array data 11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 12 | int nx; 13 | int ny; 14 | // Size of the grid cells 15 | double dx; 16 | double dy; 17 | // The temperature values in the 2D grid 18 | std::vector data; 19 | }; 20 | 21 | // We use here fixed grid spacing 22 | const double DX = 0.01; 23 | const double DY = 0.01; 24 | 25 | #if __cplusplus 26 | extern "C" { 27 | #endif 28 | // Function prototypes 29 | void set_field_dimensions(field *temperature, int nx, int ny); 30 | 31 | void initialize(int argc, char *argv[], field *temperature1, 32 | field *temperature2, int *nsteps); 33 | 34 | void generate_field(field *temperature); 35 | 36 | double average(field *temperature); 37 | 38 | void evolve(field *curr, field *prev, double a, double dt); 39 | 40 | void write_field(field *temperature, int iter); 41 | 42 | void read_field(field *temperature1, field *temperature2, 43 | char *filename); 44 | 45 | void copy_field(field *temperature1, field *temperature2); 46 | 47 | void swap_fields(field *temperature1, field *temperature2); 48 | 49 | void allocate_field(field *temperature); 50 | 51 | #if __cplusplus 52 | } 53 | #endif 54 | #endif // __HEAT_H__ 55 | 56 | -------------------------------------------------------------------------------- /content/exercise/offloading/io.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // I/O related functions for heat equation solver 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "heat.h" 11 | #include "pngwriter.h" 12 | 13 | // Output routine that prints out a picture of the temperature 14 | // distribution. 15 | void write_field(field *temperature, int iter) 16 | { 17 | char filename[64]; 18 | 19 | // The actual write routine takes only the actual data 20 | // (without boundary layers) so we need to copy an array with that. 21 | std::vector inner_data(temperature->nx * temperature->ny); 22 | auto inner_data_iterator = inner_data.begin(); 23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1; 24 | for (int i = 0; i < temperature->nx; i++) { 25 | auto end_of_row = beginning_of_row + temperature->ny; 26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator); 27 | inner_data_iterator += temperature->ny; 28 | beginning_of_row = end_of_row + 2; 29 | } 30 | 31 | // Write out the data to a png file 32 | sprintf(filename, "%s_%04d.png", "heat", iter); 33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c'); 34 | } 35 | 36 | // Read the initial temperature distribution from a file and 37 | // initialize the temperature fields temperature1 and 38 | // temperature2 to the same initial state. 39 | void read_field(field *temperature1, field *temperature2, char *filename) 40 | { 41 | FILE *fp; 42 | int nx, ny, ind; 43 | 44 | int nx_local, ny_local, count; 45 | 46 | fp = fopen(filename, "r"); 47 | // Read the header 48 | count = fscanf(fp, "# %d %d \n", &nx, &ny); 49 | if (count < 2) { 50 | fprintf(stderr, "Error while reading the input file!\n"); 51 | exit(-1); 52 | } 53 | 54 | set_field_dimensions(temperature1, nx, ny); 55 | set_field_dimensions(temperature2, nx, ny); 56 | 57 | // Allocate arrays (including boundary layers) 58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2); 59 | temperature1->data.resize(newSize, 0.0); 60 | temperature2->data.resize(newSize, 0.0); 61 | 62 | // Array from file 63 | std::vector file_data(nx * ny, 0.0); 64 | 65 | // Read the actual data 66 | for (int i = 0; i < nx; i++) { 67 | for (int j = 0; j < ny; j++) { 68 | ind = i * ny + j; 69 | count = fscanf(fp, "%lf", &file_data[ind]); 70 | } 71 | } 72 | 73 | nx_local = temperature1->nx; 74 | ny_local = temperature1->ny; 75 | 76 | // Copy to the inner part of the full temperature field 77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1; 78 | auto beginning_of_row = file_data.begin(); 79 | for (int i = 0; i < nx_local; i++) { 80 | auto end_of_row = beginning_of_row + ny_local; 81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator); 82 | temperature_data_iterator += ny_local + 2; 83 | beginning_of_row = end_of_row; 84 | } 85 | 86 | // Set the boundary values 87 | for (int i = 1; i < nx_local + 1; i++) { 88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1]; 89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny]; 90 | } 91 | for (int j = 0; j < ny + 2; j++) { 92 | temperature1->data[j] = temperature1->data[ny_local + j]; 93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] = 94 | temperature1->data[nx_local * (ny_local + 2) + j]; 95 | } 96 | 97 | copy_field(temperature1, temperature2); 98 | 99 | fclose(fp); 100 | } 101 | -------------------------------------------------------------------------------- /content/exercise/offloading/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main routine for heat equation solver in 2D. 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | int main(int argc, char **argv) 11 | { 12 | // Image output interval 13 | int image_interval = 1500; 14 | 15 | // Number of time steps 16 | int nsteps; 17 | // Current and previous temperature fields 18 | field current, previous; 19 | initialize(argc, argv, ¤t, &previous, &nsteps); 20 | 21 | // Output the initial field 22 | write_field(¤t, 0); 23 | 24 | double average_temp = average(¤t); 25 | printf("Average temperature at start: %f\n", average_temp); 26 | 27 | // Diffusion constant 28 | double a = 0.5; 29 | 30 | // Compute the largest stable time step 31 | double dx2 = current.dx * current.dx; 32 | double dy2 = current.dy * current.dy; 33 | // Time step 34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 35 | 36 | // Get the start time stamp 37 | double start_clock = omp_get_wtime(); 38 | 39 | // Time evolution 40 | for (int iter = 1; iter <= nsteps; iter++) { 41 | evolve(¤t, &previous, a, dt); 42 | if (iter % image_interval == 0) { 43 | write_field(¤t, iter); 44 | } 45 | // Swap current field so that it will be used 46 | // as previous for next iteration step 47 | swap_fields(¤t, &previous); 48 | } 49 | 50 | double stop_clock = omp_get_wtime(); 51 | 52 | // Average temperature for reference 53 | average_temp = average(&previous); 54 | 55 | // Determine the CPU time used for all the iterations 56 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock)); 57 | printf("Average temperature: %f\n", average_temp); 58 | if (argc == 1) { 59 | printf("Reference value with default arguments: 59.281239\n"); 60 | } 61 | 62 | // Output the final field 63 | write_field(&previous, nsteps); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /content/exercise/offloading/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Utility functions for heat equation solver 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | 11 | // Copy data on temperature1 into temperature2 12 | void copy_field(field *temperature1, field *temperature2) 13 | { 14 | assert(temperature1->nx == temperature2->nx); 15 | assert(temperature1->ny == temperature2->ny); 16 | assert(temperature1->data.size() == temperature2->data.size()); 17 | std::copy(temperature1->data.begin(), temperature1->data.end(), 18 | temperature2->data.begin()); 19 | } 20 | 21 | // Swap the field data for temperature1 and temperature2 22 | void swap_fields(field *temperature1, field *temperature2) 23 | { 24 | std::swap(temperature1->data, temperature2->data); 25 | } 26 | 27 | // Allocate memory for a temperature field and initialise it to zero 28 | void allocate_field(field *temperature) 29 | { 30 | // Include also boundary layers 31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2); 32 | temperature->data.resize(newSize, 0.0); 33 | } 34 | 35 | // Calculate average temperature over the non-boundary grid cells 36 | double average(field *temperature) 37 | { 38 | double average = 0.0; 39 | 40 | for (int i = 1; i < temperature->nx + 1; i++) { 41 | for (int j = 1; j < temperature->ny + 1; j++) { 42 | int ind = i * (temperature->ny + 2) + j; 43 | average += temperature->data[ind]; 44 | } 45 | } 46 | 47 | average /= (temperature->nx * temperature->ny); 48 | return average; 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /content/exercise/serial/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/serial/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | COMMONDIR=../common 14 | 15 | ifeq ($(COMP),pgi) 16 | CXX=pgCC 17 | CC=pgcc 18 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR) 19 | LDFLAGS= 20 | LIBS= 21 | endif 22 | 23 | ifeq ($(COMP),gnu) 24 | CXX=g++ 25 | CC=gcc 26 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR) 27 | LDFLAGS= 28 | LIBS= 29 | endif 30 | 31 | ifeq ($(COMP),nv) 32 | CXX=nvc++ 33 | CC=nvc 34 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR) 35 | LDFLAGS= 36 | LIBS= 37 | endif 38 | 39 | ifeq ($(COMP),intel) 40 | CXX=icpx 41 | CC=icx 42 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR) 43 | LDFLAGS= 44 | LIBS= 45 | endif 46 | 47 | EXE=heat_serial 48 | OBJS=main.o core.o setup.o utilities.o io.o 49 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 50 | 51 | 52 | all: $(EXE) 53 | 54 | 55 | core.o: core.cpp heat.h 56 | utilities.o: utilities.cpp heat.h 57 | setup.o: setup.cpp heat.h 58 | io.o: io.cpp heat.h 59 | main.o: main.cpp heat.h 60 | 61 | $(OBJS_PNG): C_COMPILER := $(CC) 62 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include 63 | $(OBJS): C_COMPILER := $(CXX) 64 | 65 | $(EXE): $(OBJS) $(OBJS_PNG) 66 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 67 | 68 | %.o: %.cpp 69 | $(CXX) $(CCFLAGS) -c $< -o $@ 70 | 71 | %.o: %.c 72 | $(CC) $(CCFLAGS) -c $< -o $@ 73 | 74 | .PHONY: clean 75 | clean: 76 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o 77 | -------------------------------------------------------------------------------- /content/exercise/serial/core.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main solver routines for heat equation solver 4 | 5 | #include "heat.h" 6 | 7 | // Update the temperature values using five-point stencil 8 | // Arguments: 9 | // curr: current temperature values 10 | // prev: temperature values from previous time step 11 | // a: diffusivity 12 | // dt: time step 13 | void evolve(field *curr, field *prev, double a, double dt) 14 | { 15 | // Help the compiler avoid being confused by the structs 16 | double *currdata = curr->data.data(); 17 | double *prevdata = prev->data.data(); 18 | int nx = curr->nx; 19 | int ny = curr->ny; 20 | 21 | // Determine the temperature field at next time step 22 | // As we have fixed boundary conditions, the outermost gridpoints 23 | // are not updated. 24 | double dx2 = prev->dx * prev->dx; 25 | double dy2 = prev->dy * prev->dy; 26 | for (int i = 1; i < nx + 1; i++) { 27 | for (int j = 1; j < ny + 1; j++) { 28 | int ind = i * (ny + 2) + j; 29 | int ip = (i + 1) * (ny + 2) + j; 30 | int im = (i - 1) * (ny + 2) + j; 31 | int jp = i * (ny + 2) + j + 1; 32 | int jm = i * (ny + 2) + j - 1; 33 | currdata[ind] = prevdata[ind] + a*dt* 34 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 + 35 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | 14 | COMMONDIR=../../common 15 | 16 | ifeq ($(COMP),nv) 17 | FC=nvfortran 18 | CC=nvc 19 | FCFLAGS=-O3 -mp=multicore -Minfo=mp 20 | CCFLAGS=-O3 -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS= 23 | endif 24 | 25 | ifeq ($(COMP),gnu) 26 | FC=gfortran 27 | CC=gcc 28 | FCFLAGS=-O3 -Wall -fopenmp 29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR) 30 | LDFLAGS=-fopenmp 31 | LIBS= 32 | endif 33 | 34 | 35 | EXE=heat_serial 36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o 38 | 39 | all: $(EXE) 40 | 41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 42 | heat_mod.o: heat_mod.F90 43 | core.o: core.F90 heat_mod.o 44 | utilities.o: utilities.F90 heat_mod.o 45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 46 | setup.o: setup.F90 heat_mod.o utilities.o io.o 47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 49 | 50 | $(EXE): $(OBJS) $(OBJS_PNG) 51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 52 | 53 | %.o: %.F90 54 | $(FC) $(FCFLAGS) -c $< -o $@ 55 | 56 | %.o: %.c 57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include $(CCFLAGS) -c $< -o $@ 58 | 59 | .PHONY: clean 60 | clean: 61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o 62 | 63 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Main solver routines for heat equation solver 4 | module core 5 | use heat 6 | 7 | contains 8 | 9 | ! Update the temperature values using five-point stencil 10 | ! Arguments: 11 | ! curr (type(field)): current temperature values 12 | ! prev (type(field)): temperature values from previous time step 13 | ! a (real(dp)): diffusivity 14 | ! dt (real(dp)): time step 15 | subroutine evolve(curr, prev, a, dt) 16 | 17 | implicit none 18 | 19 | type(field), target, intent(inout) :: curr, prev 20 | real(dp) :: a, dt 21 | integer :: i, j, nx, ny 22 | real(dp) :: dx, dy 23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata 24 | 25 | ! Help the compiler avoid being confused 26 | nx = curr%nx 27 | ny = curr%ny 28 | dx = curr%dx 29 | dy = curr%dy 30 | currdata => curr%data 31 | prevdata => prev%data 32 | 33 | ! Determine the temperature field at next time step As we have 34 | ! fixed boundary conditions, the outermost gridpoints are not 35 | ! updated. 36 | do j = 1, ny 37 | do i = 1, nx 38 | currdata(i, j) = prevdata(i, j) + a * dt * & 39 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + & 40 | & prevdata(i+1, j)) / dx**2 + & 41 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + & 42 | & prevdata(i, j+1)) / dy**2) 43 | end do 44 | end do 45 | end subroutine evolve 46 | 47 | end module core 48 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Field metadata for heat equation solver 4 | module heat 5 | use iso_fortran_env, only : REAL64 6 | implicit none 7 | 8 | integer, parameter :: dp = REAL64 9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 10 | 11 | type :: field 12 | integer :: nx ! ldimension of the field 13 | integer :: ny 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | contains 20 | ! Initialize the field type metadata 21 | ! Arguments: 22 | ! field0 (type(field)): input field 23 | ! nx, ny, dx, dy: field dimensions and spatial step size 24 | subroutine set_field_dimensions(field0, nx, ny) 25 | implicit none 26 | 27 | type(field), intent(out) :: field0 28 | integer, intent(in) :: nx, ny 29 | 30 | field0%dx = DX 31 | field0%dy = DY 32 | field0%nx = nx 33 | field0%ny = ny 34 | 35 | end subroutine set_field_dimensions 36 | 37 | end module heat 38 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/io.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! I/O routines for heat equation solver 4 | module io 5 | use heat 6 | 7 | contains 8 | 9 | ! Output routine, saves the temperature distribution as a png image 10 | ! Arguments: 11 | ! curr (type(field)): variable with the temperature data 12 | ! iter (integer): index of the time step 13 | subroutine write_field(curr, iter) 14 | 15 | use pngwriter 16 | implicit none 17 | type(field), intent(in) :: curr 18 | integer, intent(in) :: iter 19 | 20 | character(len=85) :: filename 21 | 22 | integer :: stat 23 | real(dp), dimension(:,:), allocatable, target :: full_data 24 | 25 | allocate(full_data(curr%nx, curr%ny)) 26 | ! Copy rand #0 data to the global array 27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny) 28 | 29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png' 30 | stat = save_png(full_data, curr%nx, curr%ny, filename) 31 | deallocate(full_data) 32 | 33 | end subroutine write_field 34 | 35 | 36 | ! Reads the temperature distribution from an input file 37 | ! Arguments: 38 | ! field0 (type(field)): field variable that will store the 39 | ! read data 40 | ! filename (char): name of the input file 41 | ! Note that this version assumes the input data to be in C memory layout 42 | subroutine read_field(field0, filename) 43 | 44 | implicit none 45 | type(field), intent(out) :: field0 46 | character(len=85), intent(in) :: filename 47 | 48 | integer :: nx, ny, i 49 | character(len=2) :: dummy 50 | 51 | real(dp), dimension(:,:), allocatable :: full_data 52 | 53 | open(10, file=filename) 54 | ! Read the header 55 | read(10, *) dummy, nx, ny 56 | 57 | call set_field_dimensions(field0, nx, ny) 58 | 59 | ! The arrays for temperature field contain also a halo region 60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 61 | 62 | allocate(full_data(nx, ny)) 63 | ! Read the data 64 | do i = 1, nx 65 | read(10, *) full_data(i, 1:ny) 66 | end do 67 | 68 | ! Copy to full array containing also boundaries 69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:) 70 | 71 | ! Set the boundary values 72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1) 73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny) 74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1) 75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1) 76 | 77 | close(10) 78 | deallocate(full_data) 79 | 80 | end subroutine read_field 81 | 82 | end module io 83 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Heat equation solver in 2D. 4 | 5 | program heat_solve 6 | use heat 7 | use core 8 | use io 9 | use setup 10 | use utilities 11 | use omp_lib 12 | 13 | implicit none 14 | 15 | real(dp), parameter :: a = 0.5 ! Diffusion constant 16 | type(field) :: current, previous ! Current and previus temperature fields 17 | 18 | real(dp) :: dt ! Time step 19 | integer :: nsteps ! Number of time steps 20 | integer, parameter :: image_interval = 1500 ! Image output interval 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call initialize(current, previous, nsteps) 29 | 30 | ! Draw the picture of the initial state 31 | call write_field(current, 0) 32 | 33 | average_temp = average(current) 34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 35 | 36 | ! Largest stable time step 37 | dt = current%dx**2 * current%dy**2 / & 38 | & (2.0 * a * (current%dx**2 + current%dy**2)) 39 | 40 | ! Main iteration loop 41 | 42 | start = omp_get_wtime() 43 | 44 | do iter = 1, nsteps 45 | call evolve(current, previous, a, dt) 46 | if (mod(iter, image_interval) == 0) then 47 | call write_field(current, iter) 48 | end if 49 | call swap_fields(current, previous) 50 | end do 51 | 52 | stop = omp_get_wtime() 53 | 54 | ! Average temperature for reference 55 | average_temp = average(previous) 56 | 57 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 58 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 59 | if (command_argument_count() == 0) then 60 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 61 | end if 62 | 63 | call finalize(current, previous) 64 | 65 | end program heat_solve 66 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! PNG writer for heat equation solver 4 | module pngwriter 5 | use heat 6 | 7 | contains 8 | 9 | function save_png(data, nx, ny, fname) result(stat) 10 | 11 | use, intrinsic :: ISO_C_BINDING 12 | implicit none 13 | 14 | real(dp), dimension(:,:), intent(in) :: data 15 | integer, intent(in) :: nx, ny 16 | character(len=*), intent(in) :: fname 17 | integer :: stat 18 | 19 | ! Interface for save_png C-function 20 | interface 21 | ! The C-function definition is 22 | ! int save_png(double *data, const int nx, const int ny, 23 | ! const char *fname) 24 | function save_png_c(data, nx, ny, fname, order) & 25 | & bind(C,name="save_png") result(stat) 26 | use, intrinsic :: ISO_C_BINDING 27 | implicit none 28 | real(kind=C_DOUBLE) :: data(*) 29 | integer(kind=C_INT), value, intent(IN) :: nx, ny 30 | character(kind=C_CHAR), intent(IN) :: fname(*) 31 | character(kind=C_CHAR), value, intent(IN) :: order 32 | integer(kind=C_INT) :: stat 33 | end function save_png_c 34 | end interface 35 | 36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 37 | if (stat /= 0) then 38 | write(*,*) 'save_png returned error!' 39 | end if 40 | 41 | end function save_png 42 | 43 | end module pngwriter 44 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/setup.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Setup routines for heat equation solver 4 | module setup 5 | use heat 6 | 7 | contains 8 | 9 | subroutine initialize(previous, current, nsteps) 10 | use utilities 11 | use io 12 | 13 | implicit none 14 | 15 | type(field), intent(out) :: previous, current 16 | integer, intent(out) :: nsteps 17 | 18 | integer :: rows, cols 19 | logical :: using_input_file 20 | character(len=85) :: input_file, arg ! Input file name and command line arguments 21 | 22 | 23 | ! Default values for grid size and time steps 24 | rows = 2000 25 | cols = 2000 26 | nsteps = 500 27 | using_input_file = .false. 28 | 29 | ! Read in the command line arguments and 30 | ! set up the needed variables 31 | select case(command_argument_count()) 32 | case(0) ! No arguments -> default values 33 | case(1) ! One argument -> input file name 34 | using_input_file = .true. 35 | call get_command_argument(1, input_file) 36 | case(2) ! Two arguments -> input file name and number of steps 37 | using_input_file = .true. 38 | call get_command_argument(1, input_file) 39 | call get_command_argument(2, arg) 40 | read(arg, *) nsteps 41 | case(3) ! Three arguments -> rows, cols and nsteps 42 | call get_command_argument(1, arg) 43 | read(arg, *) rows 44 | call get_command_argument(2, arg) 45 | read(arg, *) cols 46 | call get_command_argument(3, arg) 47 | read(arg, *) nsteps 48 | case default 49 | call usage() 50 | stop 51 | end select 52 | 53 | ! Initialize the fields according the command line arguments 54 | if (using_input_file) then 55 | call read_field(previous, input_file) 56 | call copy_fields(previous, current) 57 | else 58 | call set_field_dimensions(previous, rows, cols) 59 | call set_field_dimensions(current, rows, cols) 60 | call generate_field(previous) 61 | call copy_fields(previous, current) 62 | end if 63 | 64 | end subroutine initialize 65 | 66 | ! Generate initial the temperature field. Pattern is disc with a radius 67 | ! of nx / 6 in the center of the grid. 68 | ! Boundary conditions are (different) constant temperatures outside the grid 69 | subroutine generate_field(field0) 70 | use heat 71 | 72 | implicit none 73 | 74 | type(field), intent(inout) :: field0 75 | 76 | real(dp) :: radius2 77 | integer :: i, j, ds2 78 | 79 | ! The arrays for field contain also a halo region 80 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 81 | 82 | ! Square of the disk radius 83 | radius2 = (field0%nx / 6.0_dp)**2 84 | 85 | do j = 0, field0%ny + 1 86 | do i = 0, field0%nx + 1 87 | ds2 = int((i - field0%nx / 2.0_dp + 1)**2 + & 88 | & (j - field0%ny / 2.0_dp + 1)**2) 89 | if (ds2 < radius2) then 90 | field0%data(i,j) = 5.0_dp 91 | else 92 | field0%data(i,j) = 65.0_dp 93 | end if 94 | end do 95 | end do 96 | 97 | ! Boundary conditions 98 | field0%data(:,0) = 20.0_dp 99 | field0%data(:,field0%ny+1) = 70.0_dp 100 | field0%data(0,:) = 85.0_dp 101 | field0%data(field0%nx+1,:) = 5.0_dp 102 | 103 | end subroutine generate_field 104 | 105 | 106 | ! Clean up routine for field type 107 | ! Arguments: 108 | ! field0 (type(field)): field variable to be cleared 109 | subroutine finalize(field0, field1) 110 | use heat 111 | 112 | implicit none 113 | 114 | type(field), intent(inout) :: field0, field1 115 | 116 | deallocate(field0%data) 117 | deallocate(field1%data) 118 | 119 | end subroutine finalize 120 | 121 | ! Helper routine that prints out a simple usage if 122 | ! user gives more than three arguments 123 | subroutine usage() 124 | implicit none 125 | character(len=256) :: buf 126 | 127 | call get_command_argument(0, buf) 128 | write (*,'(A)') 'Usage:' 129 | write (*,'(A, " (default values will be used)")') trim(buf) 130 | write (*,'(A, " ")') trim(buf) 131 | write (*,'(A, " ")') trim(buf) 132 | write (*,'(A, " ")') trim(buf) 133 | end subroutine usage 134 | 135 | end module setup 136 | -------------------------------------------------------------------------------- /content/exercise/serial/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Utility routines for heat equation solver 4 | ! NOTE: This file does not need to be edited! 5 | module utilities 6 | use heat 7 | 8 | contains 9 | 10 | ! Swap the data fields of two variables of type field 11 | ! Arguments: 12 | ! curr, prev (type(field)): the two variables that are swapped 13 | subroutine swap_fields(curr, prev) 14 | 15 | implicit none 16 | 17 | type(field), intent(inout) :: curr, prev 18 | real(dp), allocatable, dimension(:,:) :: tmp 19 | 20 | call move_alloc(curr%data, tmp) 21 | call move_alloc(prev%data, curr%data) 22 | call move_alloc(tmp, prev%data) 23 | end subroutine swap_fields 24 | 25 | ! Copy the data from one field to another 26 | ! Arguments: 27 | ! from_field (type(field)): variable to copy from 28 | ! to_field (type(field)): variable to copy to 29 | subroutine copy_fields(from_field, to_field) 30 | 31 | implicit none 32 | 33 | type(field), intent(in) :: from_field 34 | type(field), intent(out) :: to_field 35 | 36 | ! Consistency checks 37 | if (.not.allocated(from_field%data)) then 38 | write (*,*) "Can not copy from a field without allocated data" 39 | stop 40 | end if 41 | if (.not.allocated(to_field%data)) then 42 | ! Target is not initialize, allocate memory 43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 44 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 46 | write (*,*) "Wrong field data sizes in copy routine" 47 | print *, shape(from_field%data), shape(to_field%data) 48 | stop 49 | end if 50 | 51 | to_field%data = from_field%data 52 | 53 | to_field%nx = from_field%nx 54 | to_field%ny = from_field%ny 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | 66 | real(dp) :: local_average 67 | integer :: rc 68 | 69 | average = sum(field0%data(1:field0%nx, 1:field0%ny)) 70 | average = average / (field0%nx * field0%ny) 71 | 72 | end function average 73 | 74 | end module utilities 75 | -------------------------------------------------------------------------------- /content/exercise/serial/heat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | #ifndef __HEAT_H__ 4 | #define __HEAT_H__ 5 | 6 | #include 7 | 8 | // Datatype for temperature field 9 | struct field { 10 | // nx and ny are the dimensions of the field. The array data 11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 12 | int nx; 13 | int ny; 14 | // Size of the grid cells 15 | double dx; 16 | double dy; 17 | // The temperature values in the 2D grid 18 | std::vector data; 19 | }; 20 | 21 | // We use here fixed grid spacing 22 | const double DX = 0.01; 23 | const double DY = 0.01; 24 | 25 | #if __cplusplus 26 | extern "C" { 27 | #endif 28 | // Function prototypes 29 | void set_field_dimensions(field *temperature, int nx, int ny); 30 | 31 | void initialize(int argc, char *argv[], field *temperature1, 32 | field *temperature2, int *nsteps); 33 | 34 | void generate_field(field *temperature); 35 | 36 | double average(field *temperature); 37 | 38 | void evolve(field *curr, field *prev, double a, double dt); 39 | 40 | void write_field(field *temperature, int iter); 41 | 42 | void read_field(field *temperature1, field *temperature2, 43 | char *filename); 44 | 45 | void copy_field(field *temperature1, field *temperature2); 46 | 47 | void swap_fields(field *temperature1, field *temperature2); 48 | 49 | void allocate_field(field *temperature); 50 | 51 | #if __cplusplus 52 | } 53 | #endif 54 | #endif // __HEAT_H__ 55 | 56 | -------------------------------------------------------------------------------- /content/exercise/serial/heat_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/serial/heat_0000.png -------------------------------------------------------------------------------- /content/exercise/serial/heat_0010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/exercise/serial/heat_0010.png -------------------------------------------------------------------------------- /content/exercise/serial/io.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // I/O related functions for heat equation solver 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "heat.h" 11 | #include "pngwriter.h" 12 | 13 | // Output routine that prints out a picture of the temperature 14 | // distribution. 15 | void write_field(field *temperature, int iter) 16 | { 17 | char filename[64]; 18 | 19 | // The actual write routine takes only the actual data 20 | // (without boundary layers) so we need to copy an array with that. 21 | std::vector inner_data(temperature->nx * temperature->ny); 22 | auto inner_data_iterator = inner_data.begin(); 23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1; 24 | for (int i = 0; i < temperature->nx; i++) { 25 | auto end_of_row = beginning_of_row + temperature->ny; 26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator); 27 | inner_data_iterator += temperature->ny; 28 | beginning_of_row = end_of_row + 2; 29 | } 30 | 31 | // Write out the data to a png file 32 | sprintf(filename, "%s_%04d.png", "heat", iter); 33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c'); 34 | } 35 | 36 | // Read the initial temperature distribution from a file and 37 | // initialize the temperature fields temperature1 and 38 | // temperature2 to the same initial state. 39 | void read_field(field *temperature1, field *temperature2, char *filename) 40 | { 41 | FILE *fp; 42 | int nx, ny, ind; 43 | 44 | int nx_local, ny_local, count; 45 | 46 | fp = fopen(filename, "r"); 47 | // Read the header 48 | count = fscanf(fp, "# %d %d \n", &nx, &ny); 49 | if (count < 2) { 50 | fprintf(stderr, "Error while reading the input file!\n"); 51 | exit(-1); 52 | } 53 | 54 | set_field_dimensions(temperature1, nx, ny); 55 | set_field_dimensions(temperature2, nx, ny); 56 | 57 | // Allocate arrays (including boundary layers) 58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2); 59 | temperature1->data.resize(newSize, 0.0); 60 | temperature2->data.resize(newSize, 0.0); 61 | 62 | // Array from file 63 | std::vector file_data(nx * ny, 0.0); 64 | 65 | // Read the actual data 66 | for (int i = 0; i < nx; i++) { 67 | for (int j = 0; j < ny; j++) { 68 | ind = i * ny + j; 69 | count = fscanf(fp, "%lf", &file_data[ind]); 70 | } 71 | } 72 | 73 | nx_local = temperature1->nx; 74 | ny_local = temperature1->ny; 75 | 76 | // Copy to the inner part of the full temperature field 77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1; 78 | auto beginning_of_row = file_data.begin(); 79 | for (int i = 0; i < nx_local; i++) { 80 | auto end_of_row = beginning_of_row + ny_local; 81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator); 82 | temperature_data_iterator += ny_local + 2; 83 | beginning_of_row = end_of_row; 84 | } 85 | 86 | // Set the boundary values 87 | for (int i = 1; i < nx_local + 1; i++) { 88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1]; 89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny]; 90 | } 91 | for (int j = 0; j < ny + 2; j++) { 92 | temperature1->data[j] = temperature1->data[ny_local + j]; 93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] = 94 | temperature1->data[nx_local * (ny_local + 2) + j]; 95 | } 96 | 97 | copy_field(temperature1, temperature2); 98 | 99 | fclose(fp); 100 | } 101 | -------------------------------------------------------------------------------- /content/exercise/serial/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main routine for heat equation solver in 2D. 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | int main(int argc, char **argv) 11 | { 12 | // Image output interval 13 | int image_interval = 1500; 14 | 15 | // Number of time steps 16 | int nsteps; 17 | // Current and previous temperature fields 18 | field current, previous; 19 | initialize(argc, argv, ¤t, &previous, &nsteps); 20 | 21 | // Output the initial field 22 | write_field(¤t, 0); 23 | 24 | double average_temp = average(¤t); 25 | printf("Average temperature at start: %f\n", average_temp); 26 | 27 | // Diffusion constant 28 | double a = 0.5; 29 | 30 | // Compute the largest stable time step 31 | double dx2 = current.dx * current.dx; 32 | double dy2 = current.dy * current.dy; 33 | // Time step 34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 35 | 36 | // Get the start time stamp 37 | double start_clock = omp_get_wtime(); 38 | 39 | // Time evolution 40 | for (int iter = 1; iter <= nsteps; iter++) { 41 | evolve(¤t, &previous, a, dt); 42 | if (iter % image_interval == 0) { 43 | write_field(¤t, iter); 44 | } 45 | // Swap current field so that it will be used 46 | // as previous for next iteration step 47 | swap_fields(¤t, &previous); 48 | } 49 | 50 | double stop_clock = omp_get_wtime(); 51 | 52 | // Average temperature for reference 53 | average_temp = average(&previous); 54 | 55 | // Determine the CPU time used for all the iterations 56 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock)); 57 | printf("Average temperature: %f\n", average_temp); 58 | if (argc == 1) { 59 | printf("Reference value with default arguments: 59.281239\n"); 60 | } 61 | 62 | // Output the final field 63 | write_field(&previous, nsteps); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /content/exercise/serial/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Utility functions for heat equation solver 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | 11 | // Copy data on temperature1 into temperature2 12 | void copy_field(field *temperature1, field *temperature2) 13 | { 14 | assert(temperature1->nx == temperature2->nx); 15 | assert(temperature1->ny == temperature2->ny); 16 | assert(temperature1->data.size() == temperature2->data.size()); 17 | std::copy(temperature1->data.begin(), temperature1->data.end(), 18 | temperature2->data.begin()); 19 | } 20 | 21 | // Swap the field data for temperature1 and temperature2 22 | void swap_fields(field *temperature1, field *temperature2) 23 | { 24 | std::swap(temperature1->data, temperature2->data); 25 | } 26 | 27 | // Allocate memory for a temperature field and initialise it to zero 28 | void allocate_field(field *temperature) 29 | { 30 | // Include also boundary layers 31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2); 32 | temperature->data.resize(newSize, 0.0); 33 | } 34 | 35 | // Calculate average temperature over the non-boundary grid cells 36 | double average(field *temperature) 37 | { 38 | double average = 0.0; 39 | 40 | for (int i = 1; i < temperature->nx + 1; i++) { 41 | for (int j = 1; j < temperature->ny + 1; j++) { 42 | int ind = i * (temperature->ny + 2) + j; 43 | average += temperature->data[ind]; 44 | } 45 | } 46 | 47 | average /= (temperature->nx * temperature->ny); 48 | return average; 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /content/exercise/solution/common/pngwriter.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019 CSC Training */ 2 | /* Copyright (c) 2021 ENCCS */ 3 | #ifndef PNGWRITER_H_ 4 | #define PNGWRITER_H_ 5 | 6 | #if __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | int save_png(double *data, const int nx, const int ny, const char *fname, 11 | const char lang); 12 | 13 | #if __cplusplus 14 | } 15 | #endif 16 | #endif 17 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | COMMONDIR=../common 14 | 15 | ifeq ($(COMP),gnu) 16 | CXX=g++ 17 | CC=gcc 18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR) 19 | LDFLAGS= 20 | LIBS= 21 | endif 22 | 23 | ifeq ($(COMP),nv) 24 | CXX=nvc++ 25 | CC=nvc 26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR) 27 | LDFLAGS= 28 | LIBS= 29 | endif 30 | 31 | ifeq ($(COMP),intel) 32 | CXX=icpx 33 | CC=icx 34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR) 35 | LDFLAGS= 36 | LIBS= 37 | endif 38 | 39 | EXE=heat_serial 40 | OBJS=main.o core.o setup.o utilities.o io.o 41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 42 | 43 | 44 | all: $(EXE) 45 | 46 | 47 | core.o: core.cpp heat.h 48 | utilities.o: utilities.cpp heat.h 49 | setup.o: setup.cpp heat.h 50 | io.o: io.cpp heat.h 51 | main.o: main.cpp heat.h 52 | 53 | $(OBJS_PNG): C_COMPILER := $(CC) 54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include 55 | $(OBJS): C_COMPILER := $(CXX) 56 | 57 | $(EXE): $(OBJS) $(OBJS_PNG) 58 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 59 | 60 | %.o: %.cpp 61 | $(CXX) $(CCFLAGS) -c $< -o $@ 62 | 63 | %.o: %.c 64 | $(CC) $(CCFLAGS) -c $< -o $@ 65 | 66 | .PHONY: clean 67 | clean: 68 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o 69 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/core.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main solver routines for heat equation solver 4 | 5 | #include "heat.h" 6 | 7 | // Update the temperature values using five-point stencil 8 | // Arguments: 9 | // curr: current temperature values 10 | // prev: temperature values from previous time step 11 | // a: diffusivity 12 | // dt: time step 13 | void evolve(field *curr, field *prev, double a, double dt) 14 | { 15 | // Help the compiler avoid being confused by the structs 16 | double *currdata = curr->data.data(); 17 | double *prevdata = prev->data.data(); 18 | int nx = curr->nx; 19 | int ny = curr->ny; 20 | 21 | // Determine the temperature field at next time step 22 | // As we have fixed boundary conditions, the outermost gridpoints 23 | // are not updated. 24 | double dx2 = prev->dx * prev->dx; 25 | double dy2 = prev->dy * prev->dy; 26 | #pragma omp target teams distribute parallel for 27 | for (int i = 1; i < nx + 1; i++) { 28 | for (int j = 1; j < ny + 1; j++) { 29 | int ind = i * (ny + 2) + j; 30 | int ip = (i + 1) * (ny + 2) + j; 31 | int im = (i - 1) * (ny + 2) + j; 32 | int jp = i * (ny + 2) + j + 1; 33 | int jm = i * (ny + 2) + j - 1; 34 | currdata[ind] = prevdata[ind] + a*dt* 35 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 + 36 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2); 37 | } 38 | } 39 | } 40 | 41 | // Start a data region and copy temperature fields to the device 42 | void enter_data(field *curr, field *prev) 43 | { 44 | int nx, ny; 45 | double *currdata, *prevdata; 46 | 47 | currdata = curr->data.data(); 48 | prevdata = prev->data.data(); 49 | nx = curr->nx; 50 | ny = curr->ny; 51 | 52 | // adding data mapping here 53 | #pragma omp target enter data \ 54 | map(to: currdata[0:(nx+2)*(ny+2)], prevdata[0:(nx+2)*(ny+2)]) 55 | } 56 | 57 | // End a data region and copy temperature fields back to the host 58 | void exit_data(field *curr, field *prev) 59 | { 60 | int nx, ny; 61 | double *currdata, *prevdata; 62 | 63 | currdata = curr->data.data(); 64 | prevdata = prev->data.data(); 65 | nx = curr->nx; 66 | ny = curr->ny; 67 | 68 | // adding data mapping here 69 | #pragma omp target exit data \ 70 | map(from: currdata[0:(nx+2)*(ny+2)], prevdata[0:(nx+2)*(ny+2)]) 71 | } 72 | 73 | // Copy a temperature field from the device to the host 74 | void update_host(field *temperature) 75 | { 76 | int nx, ny; 77 | double *data; 78 | 79 | data = temperature->data.data(); 80 | nx = temperature->nx; 81 | ny = temperature->ny; 82 | 83 | // adding data mapping here 84 | #pragma omp target update from(data[0:(nx+2)*(ny+2)]) 85 | } 86 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | 14 | COMMONDIR=../../common 15 | 16 | ifeq ($(COMP),nv) 17 | FC=nvfortran 18 | CC=nvc 19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp 20 | CCFLAGS=-O3 -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS= 23 | endif 24 | 25 | ifeq ($(COMP),gnu) 26 | FC=gfortran 27 | CC=gcc 28 | FCFLAGS=-O3 -Wall -fopenmp 29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR) 30 | LDFLAGS=-fopenmp 31 | LIBS= 32 | endif 33 | 34 | 35 | EXE=heat_serial 36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o 38 | 39 | all: $(EXE) 40 | 41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 42 | heat_mod.o: heat_mod.F90 43 | core.o: core.F90 heat_mod.o 44 | utilities.o: utilities.F90 heat_mod.o 45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 46 | setup.o: setup.F90 heat_mod.o utilities.o io.o 47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 49 | 50 | $(EXE): $(OBJS) $(OBJS_PNG) 51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 52 | 53 | %.o: %.F90 54 | $(FC) $(FCFLAGS) -c $< -o $@ 55 | 56 | %.o: %.c 57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@ 58 | 59 | .PHONY: clean 60 | clean: 61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o 62 | 63 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Main solver routines for heat equation solver 4 | module core 5 | use heat 6 | 7 | contains 8 | 9 | ! Update the temperature values using five-point stencil 10 | ! Arguments: 11 | ! curr (type(field)): current temperature values 12 | ! prev (type(field)): temperature values from previous time step 13 | ! a (real(dp)): diffusivity 14 | ! dt (real(dp)): time step 15 | subroutine evolve(curr, prev, a, dt) 16 | 17 | implicit none 18 | 19 | type(field),target, intent(inout) :: curr, prev 20 | real(dp) :: a, dt 21 | integer :: i, j, nx, ny 22 | real(dp) :: dx, dy 23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata 24 | 25 | ! Help the compiler avoid being confused 26 | nx = curr%nx 27 | ny = curr%ny 28 | dx = curr%dx 29 | dy = curr%dy 30 | currdata => curr%data 31 | prevdata => prev%data 32 | 33 | ! Determine the temperature field at next time step As we have 34 | ! fixed boundary conditions, the outermost gridpoints are not 35 | ! updated. 36 | !$omp target teams distribute parallel do 37 | do j = 1, ny 38 | do i = 1, nx 39 | currdata(i, j) = prevdata(i, j) + a * dt * & 40 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + & 41 | & prevdata(i+1, j)) / dx**2 + & 42 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + & 43 | & prevdata(i, j+1)) / dy**2) 44 | end do 45 | end do 46 | !$omp end target teams distribute parallel do 47 | end subroutine evolve 48 | 49 | ! Start a data region and copy temperature fields to the device 50 | ! curr (type(field)): current temperature values 51 | ! prev (type(field)): values from previous time step 52 | subroutine enter_data(curr, prev) 53 | implicit none 54 | type(field), target, intent(in) :: curr, prev 55 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:) 56 | 57 | currdata => curr%data 58 | prevdata => prev%data 59 | 60 | ! adding data mapping here 61 | !$omp target enter data map(to: currdata, prevdata) 62 | 63 | end subroutine enter_data 64 | 65 | ! End a data region and copy temperature fields back to the host 66 | ! curr (type(field)): current temperature values 67 | ! prev (type(field)): values from previous time step 68 | subroutine exit_data(curr, prev) 69 | implicit none 70 | type(field), target :: curr, prev 71 | real(kind=dp), pointer, contiguous :: currdata(:,:), prevdata(:,:) 72 | 73 | currdata => curr%data 74 | prevdata => prev%data 75 | 76 | ! adding data mapping here 77 | !$omp target exit data map(from: currdata, prevdata) 78 | 79 | end subroutine exit_data 80 | 81 | ! Copy a temperature field from the device to the host 82 | ! temperature (type(field)): temperature field 83 | subroutine update_host(temperature) 84 | implicit none 85 | type(field), target :: temperature 86 | real(kind=dp), pointer, contiguous :: tempdata(:,:) 87 | 88 | tempdata => temperature%data 89 | 90 | ! adding data mapping here 91 | !$omp target update from(tempdata) 92 | 93 | end subroutine update_host 94 | 95 | end module core 96 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Field metadata for heat equation solver 4 | module heat 5 | use iso_fortran_env, only : REAL64 6 | implicit none 7 | 8 | integer, parameter :: dp = REAL64 9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 10 | 11 | type :: field 12 | integer :: nx ! ldimension of the field 13 | integer :: ny 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | contains 20 | ! Initialize the field type metadata 21 | ! Arguments: 22 | ! field0 (type(field)): input field 23 | ! nx, ny, dx, dy: field dimensions and spatial step size 24 | subroutine set_field_dimensions(field0, nx, ny) 25 | implicit none 26 | 27 | type(field), intent(out) :: field0 28 | integer, intent(in) :: nx, ny 29 | 30 | field0%dx = DX 31 | field0%dy = DY 32 | field0%nx = nx 33 | field0%ny = ny 34 | 35 | end subroutine set_field_dimensions 36 | 37 | end module heat 38 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/io.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! I/O routines for heat equation solver 4 | module io 5 | use heat 6 | 7 | contains 8 | 9 | ! Output routine, saves the temperature distribution as a png image 10 | ! Arguments: 11 | ! curr (type(field)): variable with the temperature data 12 | ! iter (integer): index of the time step 13 | subroutine write_field(curr, iter) 14 | 15 | use pngwriter 16 | implicit none 17 | type(field), intent(in) :: curr 18 | integer, intent(in) :: iter 19 | 20 | character(len=85) :: filename 21 | 22 | integer :: stat 23 | real(dp), dimension(:,:), allocatable, target :: full_data 24 | 25 | allocate(full_data(curr%nx, curr%ny)) 26 | ! Copy rand #0 data to the global array 27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny) 28 | 29 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png' 30 | stat = save_png(full_data, curr%nx, curr%ny, filename) 31 | deallocate(full_data) 32 | 33 | end subroutine write_field 34 | 35 | 36 | ! Reads the temperature distribution from an input file 37 | ! Arguments: 38 | ! field0 (type(field)): field variable that will store the 39 | ! read data 40 | ! filename (char): name of the input file 41 | ! Note that this version assumes the input data to be in C memory layout 42 | subroutine read_field(field0, filename) 43 | 44 | implicit none 45 | type(field), intent(out) :: field0 46 | character(len=85), intent(in) :: filename 47 | 48 | integer :: nx, ny, i 49 | character(len=2) :: dummy 50 | 51 | real(dp), dimension(:,:), allocatable :: full_data 52 | 53 | open(10, file=filename) 54 | ! Read the header 55 | read(10, *) dummy, nx, ny 56 | 57 | call set_field_dimensions(field0, nx, ny) 58 | 59 | ! The arrays for temperature field contain also a halo region 60 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 61 | 62 | allocate(full_data(nx, ny)) 63 | ! Read the data 64 | do i = 1, nx 65 | read(10, *) full_data(i, 1:ny) 66 | end do 67 | 68 | ! Copy to full array containing also boundaries 69 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:) 70 | 71 | ! Set the boundary values 72 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1) 73 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny) 74 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1) 75 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1) 76 | 77 | close(10) 78 | deallocate(full_data) 79 | 80 | end subroutine read_field 81 | 82 | end module io 83 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Heat equation solver in 2D. 4 | 5 | program heat_solve 6 | use heat 7 | use core 8 | use io 9 | use setup 10 | use utilities 11 | use omp_lib 12 | 13 | implicit none 14 | 15 | real(dp), parameter :: a = 0.5 ! Diffusion constant 16 | type(field) :: current, previous ! Current and previus temperature fields 17 | 18 | real(dp) :: dt ! Time step 19 | integer :: nsteps ! Number of time steps 20 | integer, parameter :: image_interval = 1500 ! Image output interval 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call initialize(current, previous, nsteps) 29 | 30 | ! Draw the picture of the initial state 31 | call write_field(current, 0) 32 | 33 | average_temp = average(current) 34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 35 | 36 | ! Largest stable time step 37 | dt = current%dx**2 * current%dy**2 / & 38 | & (2.0 * a * (current%dx**2 + current%dy**2)) 39 | 40 | ! Main iteration loop 41 | 42 | start = omp_get_wtime() 43 | 44 | ! copy data to device 45 | call enter_data(current, previous) 46 | 47 | do iter = 1, nsteps 48 | call evolve(current, previous, a, dt) 49 | if (mod(iter, image_interval) == 0) then 50 | ! update data on host for output 51 | call update_host(current) 52 | call write_field(current, iter) 53 | end if 54 | call swap_fields(current, previous) 55 | end do 56 | 57 | ! copy data back to host 58 | call exit_data(current, previous) 59 | 60 | stop = omp_get_wtime() 61 | 62 | ! Average temperature for reference 63 | average_temp = average(previous) 64 | 65 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 66 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 67 | if (command_argument_count() == 0) then 68 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 69 | end if 70 | 71 | call finalize(current, previous) 72 | 73 | end program heat_solve 74 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! PNG writer for heat equation solver 4 | module pngwriter 5 | use heat 6 | 7 | contains 8 | 9 | function save_png(data, nx, ny, fname) result(stat) 10 | 11 | use, intrinsic :: ISO_C_BINDING 12 | implicit none 13 | 14 | real(dp), dimension(:,:), intent(in) :: data 15 | integer, intent(in) :: nx, ny 16 | character(len=*), intent(in) :: fname 17 | integer :: stat 18 | 19 | ! Interface for save_png C-function 20 | interface 21 | ! The C-function definition is 22 | ! int save_png(double *data, const int nx, const int ny, 23 | ! const char *fname) 24 | function save_png_c(data, nx, ny, fname, order) & 25 | & bind(C,name="save_png") result(stat) 26 | use, intrinsic :: ISO_C_BINDING 27 | implicit none 28 | real(kind=C_DOUBLE) :: data(*) 29 | integer(kind=C_INT), value, intent(IN) :: nx, ny 30 | character(kind=C_CHAR), intent(IN) :: fname(*) 31 | character(kind=C_CHAR), value, intent(IN) :: order 32 | integer(kind=C_INT) :: stat 33 | end function save_png_c 34 | end interface 35 | 36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 37 | if (stat /= 0) then 38 | write(*,*) 'save_png returned error!' 39 | end if 40 | 41 | end function save_png 42 | 43 | end module pngwriter 44 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Utility routines for heat equation solver 4 | ! NOTE: This file does not need to be edited! 5 | module utilities 6 | use heat 7 | 8 | contains 9 | 10 | ! Swap the data fields of two variables of type field 11 | ! Arguments: 12 | ! curr, prev (type(field)): the two variables that are swapped 13 | subroutine swap_fields(curr, prev) 14 | 15 | implicit none 16 | 17 | type(field), intent(inout) :: curr, prev 18 | real(dp), allocatable, dimension(:,:) :: tmp 19 | 20 | call move_alloc(curr%data, tmp) 21 | call move_alloc(prev%data, curr%data) 22 | call move_alloc(tmp, prev%data) 23 | end subroutine swap_fields 24 | 25 | ! Copy the data from one field to another 26 | ! Arguments: 27 | ! from_field (type(field)): variable to copy from 28 | ! to_field (type(field)): variable to copy to 29 | subroutine copy_fields(from_field, to_field) 30 | 31 | implicit none 32 | 33 | type(field), intent(in) :: from_field 34 | type(field), intent(out) :: to_field 35 | 36 | ! Consistency checks 37 | if (.not.allocated(from_field%data)) then 38 | write (*,*) "Can not copy from a field without allocated data" 39 | stop 40 | end if 41 | if (.not.allocated(to_field%data)) then 42 | ! Target is not initialize, allocate memory 43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 44 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 46 | write (*,*) "Wrong field data sizes in copy routine" 47 | print *, shape(from_field%data), shape(to_field%data) 48 | stop 49 | end if 50 | 51 | to_field%data = from_field%data 52 | 53 | to_field%nx = from_field%nx 54 | to_field%ny = from_field%ny 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | 66 | real(dp) :: local_average 67 | integer :: rc 68 | 69 | average = sum(field0%data(1:field0%nx, 1:field0%ny)) 70 | average = average / (field0%nx * field0%ny) 71 | 72 | end function average 73 | 74 | end module utilities 75 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/heat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | #ifndef __HEAT_H__ 4 | #define __HEAT_H__ 5 | 6 | #include 7 | 8 | // Datatype for temperature field 9 | struct field { 10 | // nx and ny are the dimensions of the field. The array data 11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 12 | int nx; 13 | int ny; 14 | // Size of the grid cells 15 | double dx; 16 | double dy; 17 | // The temperature values in the 2D grid 18 | std::vector data; 19 | }; 20 | 21 | // We use here fixed grid spacing 22 | const double DX = 0.01; 23 | const double DY = 0.01; 24 | 25 | #if __cplusplus 26 | extern "C" { 27 | #endif 28 | // Function prototypes 29 | void set_field_dimensions(field *temperature, int nx, int ny); 30 | 31 | void initialize(int argc, char *argv[], field *temperature1, 32 | field *temperature2, int *nsteps); 33 | 34 | void generate_field(field *temperature); 35 | 36 | double average(field *temperature); 37 | 38 | void evolve(field *curr, field *prev, double a, double dt); 39 | 40 | void write_field(field *temperature, int iter); 41 | 42 | void read_field(field *temperature1, field *temperature2, 43 | char *filename); 44 | 45 | void copy_field(field *temperature1, field *temperature2); 46 | 47 | void swap_fields(field *temperature1, field *temperature2); 48 | 49 | void allocate_field(field *temperature); 50 | 51 | void enter_data(field *temperature1, field *temperature2); 52 | 53 | void exit_data(field *temperature1, field *temperature2); 54 | 55 | void update_host(field *temperature); 56 | 57 | #if __cplusplus 58 | } 59 | #endif 60 | #endif // __HEAT_H__ 61 | 62 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/io.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // I/O related functions for heat equation solver 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "heat.h" 11 | #include "pngwriter.h" 12 | 13 | // Output routine that prints out a picture of the temperature 14 | // distribution. 15 | void write_field(field *temperature, int iter) 16 | { 17 | char filename[64]; 18 | 19 | // The actual write routine takes only the actual data 20 | // (without boundary layers) so we need to copy an array with that. 21 | std::vector inner_data(temperature->nx * temperature->ny); 22 | auto inner_data_iterator = inner_data.begin(); 23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1; 24 | for (int i = 0; i < temperature->nx; i++) { 25 | auto end_of_row = beginning_of_row + temperature->ny; 26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator); 27 | inner_data_iterator += temperature->ny; 28 | beginning_of_row = end_of_row + 2; 29 | } 30 | 31 | // Write out the data to a png file 32 | sprintf(filename, "%s_%04d.png", "heat", iter); 33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c'); 34 | } 35 | 36 | // Read the initial temperature distribution from a file and 37 | // initialize the temperature fields temperature1 and 38 | // temperature2 to the same initial state. 39 | void read_field(field *temperature1, field *temperature2, char *filename) 40 | { 41 | FILE *fp; 42 | int nx, ny, ind; 43 | 44 | int nx_local, ny_local, count; 45 | 46 | fp = fopen(filename, "r"); 47 | // Read the header 48 | count = fscanf(fp, "# %d %d \n", &nx, &ny); 49 | if (count < 2) { 50 | fprintf(stderr, "Error while reading the input file!\n"); 51 | exit(-1); 52 | } 53 | 54 | set_field_dimensions(temperature1, nx, ny); 55 | set_field_dimensions(temperature2, nx, ny); 56 | 57 | // Allocate arrays (including boundary layers) 58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2); 59 | temperature1->data.resize(newSize, 0.0); 60 | temperature2->data.resize(newSize, 0.0); 61 | 62 | // Array from file 63 | std::vector file_data(nx * ny, 0.0); 64 | 65 | // Read the actual data 66 | for (int i = 0; i < nx; i++) { 67 | for (int j = 0; j < ny; j++) { 68 | ind = i * ny + j; 69 | count = fscanf(fp, "%lf", &file_data[ind]); 70 | } 71 | } 72 | 73 | nx_local = temperature1->nx; 74 | ny_local = temperature1->ny; 75 | 76 | // Copy to the inner part of the full temperature field 77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1; 78 | auto beginning_of_row = file_data.begin(); 79 | for (int i = 0; i < nx_local; i++) { 80 | auto end_of_row = beginning_of_row + ny_local; 81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator); 82 | temperature_data_iterator += ny_local + 2; 83 | beginning_of_row = end_of_row; 84 | } 85 | 86 | // Set the boundary values 87 | for (int i = 1; i < nx_local + 1; i++) { 88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1]; 89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny]; 90 | } 91 | for (int j = 0; j < ny + 2; j++) { 92 | temperature1->data[j] = temperature1->data[ny_local + j]; 93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] = 94 | temperature1->data[nx_local * (ny_local + 2) + j]; 95 | } 96 | 97 | copy_field(temperature1, temperature2); 98 | 99 | fclose(fp); 100 | } 101 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main routine for heat equation solver in 2D. 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | int main(int argc, char **argv) 11 | { 12 | // Image output interval 13 | int image_interval = 1500; 14 | 15 | // Number of time steps 16 | int nsteps; 17 | // Current and previous temperature fields 18 | field current, previous; 19 | initialize(argc, argv, ¤t, &previous, &nsteps); 20 | 21 | // Output the initial field 22 | write_field(¤t, 0); 23 | 24 | double average_temp = average(¤t); 25 | printf("Average temperature at start: %f\n", average_temp); 26 | 27 | // Diffusion constant 28 | double a = 0.5; 29 | 30 | // Compute the largest stable time step 31 | double dx2 = current.dx * current.dx; 32 | double dy2 = current.dy * current.dy; 33 | // Time step 34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 35 | 36 | // Get the start time stamp 37 | double start_clock = omp_get_wtime(); 38 | 39 | // Copy fields to device 40 | enter_data(¤t, &previous); 41 | 42 | // Time evolution 43 | for (int iter = 1; iter <= nsteps; iter++) { 44 | evolve(¤t, &previous, a, dt); 45 | if (iter % image_interval == 0) { 46 | // update data on host for output 47 | update_host(¤t); 48 | write_field(¤t, iter); 49 | } 50 | // Swap current field so that it will be used 51 | // as previous for next iteration step 52 | swap_fields(¤t, &previous); 53 | } 54 | 55 | // copy data back to host 56 | exit_data(¤t, &previous); 57 | 58 | double stop_clock = omp_get_wtime(); 59 | 60 | // Average temperature for reference 61 | average_temp = average(&previous); 62 | 63 | // Determine the CPU time used for all the iterations 64 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock)); 65 | printf("Average temperature: %f\n", average_temp); 66 | if (argc == 1) { 67 | printf("Reference value with default arguments: 59.281239\n"); 68 | } 69 | 70 | // Output the final field 71 | write_field(&previous, nsteps); 72 | 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /content/exercise/solution/data_mapping/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Utility functions for heat equation solver 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | 11 | // Copy data on temperature1 into temperature2 12 | void copy_field(field *temperature1, field *temperature2) 13 | { 14 | assert(temperature1->nx == temperature2->nx); 15 | assert(temperature1->ny == temperature2->ny); 16 | assert(temperature1->data.size() == temperature2->data.size()); 17 | std::copy(temperature1->data.begin(), temperature1->data.end(), 18 | temperature2->data.begin()); 19 | } 20 | 21 | // Swap the field data for temperature1 and temperature2 22 | void swap_fields(field *temperature1, field *temperature2) 23 | { 24 | std::swap(temperature1->data, temperature2->data); 25 | } 26 | 27 | // Allocate memory for a temperature field and initialise it to zero 28 | void allocate_field(field *temperature) 29 | { 30 | // Include also boundary layers 31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2); 32 | temperature->data.resize(newSize, 0.0); 33 | } 34 | 35 | // Calculate average temperature over the non-boundary grid cells 36 | double average(field *temperature) 37 | { 38 | double average = 0.0; 39 | 40 | for (int i = 1; i < temperature->nx + 1; i++) { 41 | for (int j = 1; j < temperature->ny + 1; j++) { 42 | int ind = i * (temperature->ny + 2) + j; 43 | average += temperature->data[ind]; 44 | } 45 | } 46 | 47 | average /= (temperature->nx * temperature->ny); 48 | return average; 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | Copyright (c) 2021 ENCCS 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | COMMONDIR=../common 14 | 15 | ifeq ($(COMP),gnu) 16 | CXX=g++ 17 | CC=gcc 18 | CCFLAGS=-g -O3 -fopenmp -Wall -I$(COMMONDIR) 19 | LDFLAGS= 20 | LIBS= 21 | endif 22 | 23 | ifeq ($(COMP),nv) 24 | CXX=nvc++ 25 | CC=nvc 26 | CCFLAGS=-O3 -mp=gpu -Minfo=mp -I$(COMMONDIR) 27 | LDFLAGS= 28 | LIBS= 29 | endif 30 | 31 | ifeq ($(COMP),intel) 32 | CXX=icpx 33 | CC=icx 34 | CCFLAGS=-g -O3 -fopenmp -I$(COMMONDIR) 35 | LDFLAGS= 36 | LIBS= 37 | endif 38 | 39 | EXE=heat_serial 40 | OBJS=main.o core.o setup.o utilities.o io.o 41 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 42 | 43 | 44 | all: $(EXE) 45 | 46 | 47 | core.o: core.cpp heat.h 48 | utilities.o: utilities.cpp heat.h 49 | setup.o: setup.cpp heat.h 50 | io.o: io.cpp heat.h 51 | main.o: main.cpp heat.h 52 | 53 | $(OBJS_PNG): C_COMPILER := $(CC) 54 | $(OBJS_PNG): CCFLAGS += -DHAVE_PNG=$(HAVE_PNG) -I/usr/include -I/usr/local/include 55 | $(OBJS): C_COMPILER := $(CXX) 56 | 57 | $(EXE): $(OBJS) $(OBJS_PNG) 58 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 59 | 60 | %.o: %.cpp 61 | $(CXX) $(CCFLAGS) -c $< -o $@ 62 | 63 | %.o: %.c 64 | $(CC) $(CCFLAGS) -c $< -o $@ 65 | 66 | .PHONY: clean 67 | clean: 68 | -/bin/rm -f $(EXE) a.out *.o *.png *~ ../common/*.o 69 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/core.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main solver routines for heat equation solver 4 | 5 | #include "heat.h" 6 | 7 | // Update the temperature values using five-point stencil 8 | // Arguments: 9 | // curr: current temperature values 10 | // prev: temperature values from previous time step 11 | // a: diffusivity 12 | // dt: time step 13 | void evolve(field *curr, field *prev, double a, double dt) 14 | { 15 | // Help the compiler avoid being confused by the structs 16 | double *currdata = curr->data.data(); 17 | double *prevdata = prev->data.data(); 18 | int nx = curr->nx; 19 | int ny = curr->ny; 20 | 21 | // Determine the temperature field at next time step 22 | // As we have fixed boundary conditions, the outermost gridpoints 23 | // are not updated. 24 | double dx2 = prev->dx * prev->dx; 25 | double dy2 = prev->dy * prev->dy; 26 | #pragma omp target teams distribute parallel for \ 27 | map(currdata[0:(nx+2)*(ny+2)],prevdata[0:(nx+2)*(ny+2)]) 28 | for (int i = 1; i < nx + 1; i++) { 29 | for (int j = 1; j < ny + 1; j++) { 30 | int ind = i * (ny + 2) + j; 31 | int ip = (i + 1) * (ny + 2) + j; 32 | int im = (i - 1) * (ny + 2) + j; 33 | int jp = i * (ny + 2) + j + 1; 34 | int jm = i * (ny + 2) + j - 1; 35 | currdata[ind] = prevdata[ind] + a*dt* 36 | ((prevdata[ip] - 2.0*prevdata[ind] + prevdata[im]) / dx2 + 37 | (prevdata[jp] - 2.0*prevdata[ind] + prevdata[jm]) / dy2); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 CSC Training 2 | # Copyright (c) 2021 ENCCS 3 | ifeq ($(COMP),) 4 | COMP=nv 5 | endif 6 | 7 | HAVE_PNG=0 8 | ifeq ($(HAVE_PNG),1) 9 | PNG_LIBS=-lpng -lz -lc -lrt 10 | PNG_LDFLAGS=-L/usr/lib -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib 11 | endif 12 | 13 | 14 | COMMONDIR=../../common 15 | 16 | ifeq ($(COMP),nv) 17 | FC=nvfortran 18 | CC=nvc 19 | FCFLAGS=-O3 -mp=gpu -Minfo=mp 20 | CCFLAGS=-O3 -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS= 23 | endif 24 | 25 | ifeq ($(COMP),gnu) 26 | FC=gfortran 27 | CC=gcc 28 | FCFLAGS=-O3 -Wall -fopenmp 29 | CCFLAGS=-O3 -Wall -I$(COMMONDIR) 30 | LDFLAGS=-fopenmp 31 | LIBS= 32 | endif 33 | 34 | 35 | EXE=heat_serial 36 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 37 | OBJS_PNG= $(COMMONDIR)/pngwriter.o 38 | 39 | all: $(EXE) 40 | 41 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 42 | heat_mod.o: heat_mod.F90 43 | core.o: core.F90 heat_mod.o 44 | utilities.o: utilities.F90 heat_mod.o 45 | io.o: io.F90 heat_mod.o pngwriter_mod.o 46 | setup.o: setup.F90 heat_mod.o utilities.o io.o 47 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 48 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 49 | 50 | $(EXE): $(OBJS) $(OBJS_PNG) 51 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(PNG_LDFLAGS) $(LIBS) $(PNG_LIBS) 52 | 53 | %.o: %.F90 54 | $(FC) $(FCFLAGS) -c $< -o $@ 55 | 56 | %.o: %.c 57 | $(CC) -DHAVE_PNG=$(HAVE_PNG) $(CCFLAGS) -c $< -o $@ 58 | 59 | .PHONY: clean 60 | clean: 61 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ ../../common/*.o 62 | 63 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Main solver routines for heat equation solver 4 | module core 5 | use heat 6 | 7 | contains 8 | 9 | ! Update the temperature values using five-point stencil 10 | ! Arguments: 11 | ! curr (type(field)): current temperature values 12 | ! prev (type(field)): temperature values from previous time step 13 | ! a (real(dp)): diffusivity 14 | ! dt (real(dp)): time step 15 | subroutine evolve(curr, prev, a, dt) 16 | 17 | implicit none 18 | 19 | type(field),target, intent(inout) :: curr, prev 20 | real(dp) :: a, dt 21 | integer :: i, j, nx, ny 22 | real(dp) :: dx, dy 23 | real(dp), pointer, contiguous, dimension(:,:) :: currdata, prevdata 24 | 25 | ! Help the compiler avoid being confused 26 | nx = curr%nx 27 | ny = curr%ny 28 | dx = curr%dx 29 | dy = curr%dy 30 | currdata => curr%data 31 | prevdata => prev%data 32 | 33 | ! Determine the temperature field at next time step As we have 34 | ! fixed boundary conditions, the outermost gridpoints are not 35 | ! updated. 36 | 37 | !$omp target teams distribute parallel do 38 | do j = 1, ny 39 | do i = 1, nx 40 | currdata(i, j) = prevdata(i, j) + a * dt * & 41 | & ((prevdata(i-1, j) - 2.0 * prevdata(i, j) + & 42 | & prevdata(i+1, j)) / dx**2 + & 43 | & (prevdata(i, j-1) - 2.0 * prevdata(i, j) + & 44 | & prevdata(i, j+1)) / dy**2) 45 | end do 46 | end do 47 | !$omp end target teams distribute parallel do 48 | end subroutine evolve 49 | 50 | end module core 51 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Field metadata for heat equation solver 4 | module heat 5 | use iso_fortran_env, only : REAL64 6 | implicit none 7 | 8 | integer, parameter :: dp = REAL64 9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 10 | 11 | type :: field 12 | integer :: nx ! ldimension of the field 13 | integer :: ny 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | contains 20 | ! Initialize the field type metadata 21 | ! Arguments: 22 | ! field0 (type(field)): input field 23 | ! nx, ny, dx, dy: field dimensions and spatial step size 24 | subroutine set_field_dimensions(field0, nx, ny) 25 | implicit none 26 | 27 | type(field), intent(out) :: field0 28 | integer, intent(in) :: nx, ny 29 | 30 | field0%dx = DX 31 | field0%dy = DY 32 | field0%nx = nx 33 | field0%ny = ny 34 | 35 | end subroutine set_field_dimensions 36 | 37 | end module heat 38 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/io.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! I/O routines for heat equation solver 4 | module io 5 | use heat 6 | 7 | contains 8 | 9 | ! Output routine, saves the temperature distribution as a png image 10 | ! Arguments: 11 | ! curr (type(field)): variable with the temperature data 12 | ! iter (integer): index of the time step 13 | subroutine write_field(curr, iter) 14 | 15 | use pngwriter 16 | implicit none 17 | type(field), intent(in) :: curr 18 | integer, intent(in) :: iter 19 | 20 | character(len=85) :: filename 21 | 22 | integer :: stat 23 | real(dp), dimension(:,:), allocatable, target :: full_data 24 | 25 | allocate(full_data(curr%nx, curr%ny)) 26 | ! Copy rand #0 data to the global array 27 | full_data(1:curr%nx, 1:curr%ny) = curr%data(1:curr%nx, 1:curr%ny) 28 | write(filename,'(A5,I4.4,A4,A)') 'heat_', iter, '.png' 29 | stat = save_png(full_data, curr%nx, curr%ny, filename) 30 | deallocate(full_data) 31 | 32 | end subroutine write_field 33 | 34 | 35 | ! Reads the temperature distribution from an input file 36 | ! Arguments: 37 | ! field0 (type(field)): field variable that will store the 38 | ! read data 39 | ! filename (char): name of the input file 40 | ! Note that this version assumes the input data to be in C memory layout 41 | subroutine read_field(field0, filename) 42 | 43 | implicit none 44 | type(field), intent(out) :: field0 45 | character(len=85), intent(in) :: filename 46 | 47 | integer :: nx, ny, i 48 | character(len=2) :: dummy 49 | 50 | real(dp), dimension(:,:), allocatable :: full_data 51 | 52 | open(10, file=filename) 53 | ! Read the header 54 | read(10, *) dummy, nx, ny 55 | 56 | call set_field_dimensions(field0, nx, ny) 57 | 58 | ! The arrays for temperature field contain also a halo region 59 | allocate(field0%data(0:field0%nx+1, 0:field0%ny+1)) 60 | 61 | allocate(full_data(nx, ny)) 62 | ! Read the data 63 | do i = 1, nx 64 | read(10, *) full_data(i, 1:ny) 65 | end do 66 | 67 | ! Copy to full array containing also boundaries 68 | field0%data(1:field0%nx, 1:field0%ny) = full_data(:,:) 69 | 70 | ! Set the boundary values 71 | field0%data(1:field0%nx, 0) = field0%data(1:field0%nx, 1) 72 | field0%data(1:field0%nx, field0%ny + 1) = field0%data(1:field0%nx, field0%ny) 73 | field0%data(0, 0:field0%ny + 1) = field0%data(1, 0:field0%ny + 1) 74 | field0%data(field0%nx + 1, 0:field0%ny + 1) = field0%data(field0%nx, 0:field0%ny + 1) 75 | 76 | close(10) 77 | deallocate(full_data) 78 | 79 | end subroutine read_field 80 | 81 | end module io 82 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Heat equation solver in 2D. 4 | 5 | program heat_solve 6 | use heat 7 | use core 8 | use io 9 | use setup 10 | use utilities 11 | use omp_lib 12 | 13 | implicit none 14 | 15 | real(dp), parameter :: a = 0.5 ! Diffusion constant 16 | type(field) :: current, previous ! Current and previus temperature fields 17 | 18 | real(dp) :: dt ! Time step 19 | integer :: nsteps ! Number of time steps 20 | integer, parameter :: image_interval = 1500 ! Image output interval 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call initialize(current, previous, nsteps) 29 | 30 | ! Draw the picture of the initial state 31 | call write_field(current, 0) 32 | 33 | average_temp = average(current) 34 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 35 | 36 | ! Largest stable time step 37 | dt = current%dx**2 * current%dy**2 / & 38 | & (2.0 * a * (current%dx**2 + current%dy**2)) 39 | 40 | ! Main iteration loop 41 | 42 | start = omp_get_wtime() 43 | 44 | 45 | do iter = 1, nsteps 46 | call evolve(current, previous, a, dt) 47 | if (mod(iter, image_interval) == 0) then 48 | call write_field(current, iter) 49 | end if 50 | call swap_fields(current, previous) 51 | end do 52 | 53 | stop = omp_get_wtime() 54 | 55 | ! Average temperature for reference 56 | average_temp = average(previous) 57 | 58 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 59 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 60 | if (command_argument_count() == 0) then 61 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 62 | end if 63 | 64 | call finalize(current, previous) 65 | 66 | end program heat_solve 67 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! PNG writer for heat equation solver 4 | module pngwriter 5 | use heat 6 | 7 | contains 8 | 9 | function save_png(data, nx, ny, fname) result(stat) 10 | 11 | use, intrinsic :: ISO_C_BINDING 12 | implicit none 13 | 14 | real(dp), dimension(:,:), intent(in) :: data 15 | integer, intent(in) :: nx, ny 16 | character(len=*), intent(in) :: fname 17 | integer :: stat 18 | 19 | ! Interface for save_png C-function 20 | interface 21 | ! The C-function definition is 22 | ! int save_png(double *data, const int nx, const int ny, 23 | ! const char *fname) 24 | function save_png_c(data, nx, ny, fname, order) & 25 | & bind(C,name="save_png") result(stat) 26 | use, intrinsic :: ISO_C_BINDING 27 | implicit none 28 | real(kind=C_DOUBLE) :: data(*) 29 | integer(kind=C_INT), value, intent(IN) :: nx, ny 30 | character(kind=C_CHAR), intent(IN) :: fname(*) 31 | character(kind=C_CHAR), value, intent(IN) :: order 32 | integer(kind=C_INT) :: stat 33 | end function save_png_c 34 | end interface 35 | 36 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 37 | if (stat /= 0) then 38 | write(*,*) 'save_png returned error!' 39 | end if 40 | 41 | end function save_png 42 | 43 | end module pngwriter 44 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Copyright (c) 2019 CSC Training 2 | ! Copyright (c) 2021 ENCCS 3 | ! Utility routines for heat equation solver 4 | ! NOTE: This file does not need to be edited! 5 | module utilities 6 | use heat 7 | 8 | contains 9 | 10 | ! Swap the data fields of two variables of type field 11 | ! Arguments: 12 | ! curr, prev (type(field)): the two variables that are swapped 13 | subroutine swap_fields(curr, prev) 14 | 15 | implicit none 16 | 17 | type(field), intent(inout) :: curr, prev 18 | real(dp), allocatable, dimension(:,:) :: tmp 19 | 20 | call move_alloc(curr%data, tmp) 21 | call move_alloc(prev%data, curr%data) 22 | call move_alloc(tmp, prev%data) 23 | end subroutine swap_fields 24 | 25 | ! Copy the data from one field to another 26 | ! Arguments: 27 | ! from_field (type(field)): variable to copy from 28 | ! to_field (type(field)): variable to copy to 29 | subroutine copy_fields(from_field, to_field) 30 | 31 | implicit none 32 | 33 | type(field), intent(in) :: from_field 34 | type(field), intent(out) :: to_field 35 | 36 | ! Consistency checks 37 | if (.not.allocated(from_field%data)) then 38 | write (*,*) "Can not copy from a field without allocated data" 39 | stop 40 | end if 41 | if (.not.allocated(to_field%data)) then 42 | ! Target is not initialize, allocate memory 43 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 44 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 45 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 46 | write (*,*) "Wrong field data sizes in copy routine" 47 | print *, shape(from_field%data), shape(to_field%data) 48 | stop 49 | end if 50 | 51 | to_field%data = from_field%data 52 | 53 | to_field%nx = from_field%nx 54 | to_field%ny = from_field%ny 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | 66 | real(dp) :: local_average 67 | integer :: rc 68 | 69 | average = sum(field0%data(1:field0%nx, 1:field0%ny)) 70 | average = average / (field0%nx * field0%ny) 71 | 72 | end function average 73 | 74 | end module utilities 75 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/heat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | #ifndef __HEAT_H__ 4 | #define __HEAT_H__ 5 | 6 | #include 7 | 8 | // Datatype for temperature field 9 | struct field { 10 | // nx and ny are the dimensions of the field. The array data 11 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 12 | int nx; 13 | int ny; 14 | // Size of the grid cells 15 | double dx; 16 | double dy; 17 | // The temperature values in the 2D grid 18 | std::vector data; 19 | }; 20 | 21 | // We use here fixed grid spacing 22 | const double DX = 0.01; 23 | const double DY = 0.01; 24 | 25 | #if __cplusplus 26 | extern "C" { 27 | #endif 28 | // Function prototypes 29 | void set_field_dimensions(field *temperature, int nx, int ny); 30 | 31 | void initialize(int argc, char *argv[], field *temperature1, 32 | field *temperature2, int *nsteps); 33 | 34 | void generate_field(field *temperature); 35 | 36 | double average(field *temperature); 37 | 38 | void evolve(field *curr, field *prev, double a, double dt); 39 | 40 | void write_field(field *temperature, int iter); 41 | 42 | void read_field(field *temperature1, field *temperature2, 43 | char *filename); 44 | 45 | void copy_field(field *temperature1, field *temperature2); 46 | 47 | void swap_fields(field *temperature1, field *temperature2); 48 | 49 | void allocate_field(field *temperature); 50 | 51 | #if __cplusplus 52 | } 53 | #endif 54 | #endif // __HEAT_H__ 55 | 56 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/io.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // I/O related functions for heat equation solver 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "heat.h" 11 | #include "pngwriter.h" 12 | 13 | // Output routine that prints out a picture of the temperature 14 | // distribution. 15 | void write_field(field *temperature, int iter) 16 | { 17 | char filename[64]; 18 | 19 | // The actual write routine takes only the actual data 20 | // (without boundary layers) so we need to copy an array with that. 21 | std::vector inner_data(temperature->nx * temperature->ny); 22 | auto inner_data_iterator = inner_data.begin(); 23 | auto beginning_of_row = temperature->data.begin() + (temperature->ny + 2) + 1; 24 | for (int i = 0; i < temperature->nx; i++) { 25 | auto end_of_row = beginning_of_row + temperature->ny; 26 | std::copy(beginning_of_row, end_of_row, inner_data_iterator); 27 | inner_data_iterator += temperature->ny; 28 | beginning_of_row = end_of_row + 2; 29 | } 30 | 31 | // Write out the data to a png file 32 | sprintf(filename, "%s_%04d.png", "heat", iter); 33 | save_png(inner_data.data(), temperature->nx, temperature->ny, filename, 'c'); 34 | } 35 | 36 | // Read the initial temperature distribution from a file and 37 | // initialize the temperature fields temperature1 and 38 | // temperature2 to the same initial state. 39 | void read_field(field *temperature1, field *temperature2, char *filename) 40 | { 41 | FILE *fp; 42 | int nx, ny, ind; 43 | 44 | int nx_local, ny_local, count; 45 | 46 | fp = fopen(filename, "r"); 47 | // Read the header 48 | count = fscanf(fp, "# %d %d \n", &nx, &ny); 49 | if (count < 2) { 50 | fprintf(stderr, "Error while reading the input file!\n"); 51 | exit(-1); 52 | } 53 | 54 | set_field_dimensions(temperature1, nx, ny); 55 | set_field_dimensions(temperature2, nx, ny); 56 | 57 | // Allocate arrays (including boundary layers) 58 | int newSize = (temperature1->nx + 2) * (temperature1->ny + 2); 59 | temperature1->data.resize(newSize, 0.0); 60 | temperature2->data.resize(newSize, 0.0); 61 | 62 | // Array from file 63 | std::vector file_data(nx * ny, 0.0); 64 | 65 | // Read the actual data 66 | for (int i = 0; i < nx; i++) { 67 | for (int j = 0; j < ny; j++) { 68 | ind = i * ny + j; 69 | count = fscanf(fp, "%lf", &file_data[ind]); 70 | } 71 | } 72 | 73 | nx_local = temperature1->nx; 74 | ny_local = temperature1->ny; 75 | 76 | // Copy to the inner part of the full temperature field 77 | auto temperature_data_iterator = temperature1->data.begin() + (ny_local + 2) + 1; 78 | auto beginning_of_row = file_data.begin(); 79 | for (int i = 0; i < nx_local; i++) { 80 | auto end_of_row = beginning_of_row + ny_local; 81 | std::copy(beginning_of_row, end_of_row, temperature_data_iterator); 82 | temperature_data_iterator += ny_local + 2; 83 | beginning_of_row = end_of_row; 84 | } 85 | 86 | // Set the boundary values 87 | for (int i = 1; i < nx_local + 1; i++) { 88 | temperature1->data[i * (ny_local + 2)] = temperature1->data[i * (ny_local + 2) + 1]; 89 | temperature1->data[i * (ny_local + 2) + ny + 1] = temperature1->data[i * (ny_local + 2) + ny]; 90 | } 91 | for (int j = 0; j < ny + 2; j++) { 92 | temperature1->data[j] = temperature1->data[ny_local + j]; 93 | temperature1->data[(nx_local + 1) * (ny_local + 2) + j] = 94 | temperature1->data[nx_local * (ny_local + 2) + j]; 95 | } 96 | 97 | copy_field(temperature1, temperature2); 98 | 99 | fclose(fp); 100 | } 101 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Main routine for heat equation solver in 2D. 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | int main(int argc, char **argv) 11 | { 12 | // Image output interval 13 | int image_interval = 1500; 14 | 15 | // Number of time steps 16 | int nsteps; 17 | // Current and previous temperature fields 18 | field current, previous; 19 | initialize(argc, argv, ¤t, &previous, &nsteps); 20 | 21 | // Output the initial field 22 | write_field(¤t, 0); 23 | 24 | double average_temp = average(¤t); 25 | printf("Average temperature at start: %f\n", average_temp); 26 | 27 | // Diffusion constant 28 | double a = 0.5; 29 | 30 | // Compute the largest stable time step 31 | double dx2 = current.dx * current.dx; 32 | double dy2 = current.dy * current.dy; 33 | // Time step 34 | double dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 35 | 36 | // Get the start time stamp 37 | double start_clock = omp_get_wtime(); 38 | 39 | // Time evolution 40 | for (int iter = 1; iter <= nsteps; iter++) { 41 | evolve(¤t, &previous, a, dt); 42 | if (iter % image_interval == 0) { 43 | write_field(¤t, iter); 44 | } 45 | // Swap current field so that it will be used 46 | // as previous for next iteration step 47 | swap_fields(¤t, &previous); 48 | } 49 | 50 | double stop_clock = omp_get_wtime(); 51 | // Average temperature for reference 52 | average_temp = average(&previous); 53 | 54 | // Determine the CPU time used for all the iterations 55 | printf("Iterations took %.3f seconds.\n", (stop_clock - start_clock)); 56 | printf("Average temperature: %f\n", average_temp); 57 | if (argc == 1) { 58 | printf("Reference value with default arguments: 59.281239\n"); 59 | } 60 | 61 | // Output the final field 62 | write_field(&previous, nsteps); 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /content/exercise/solution/offloading/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 CSC Training 2 | // Copyright (c) 2021 ENCCS 3 | // Utility functions for heat equation solver 4 | 5 | #include 6 | #include 7 | 8 | #include "heat.h" 9 | 10 | 11 | // Copy data on temperature1 into temperature2 12 | void copy_field(field *temperature1, field *temperature2) 13 | { 14 | assert(temperature1->nx == temperature2->nx); 15 | assert(temperature1->ny == temperature2->ny); 16 | assert(temperature1->data.size() == temperature2->data.size()); 17 | std::copy(temperature1->data.begin(), temperature1->data.end(), 18 | temperature2->data.begin()); 19 | } 20 | 21 | // Swap the field data for temperature1 and temperature2 22 | void swap_fields(field *temperature1, field *temperature2) 23 | { 24 | std::swap(temperature1->data, temperature2->data); 25 | } 26 | 27 | // Allocate memory for a temperature field and initialise it to zero 28 | void allocate_field(field *temperature) 29 | { 30 | // Include also boundary layers 31 | int newSize = (temperature->nx + 2) * (temperature->ny + 2); 32 | temperature->data.resize(newSize, 0.0); 33 | } 34 | 35 | // Calculate average temperature over the non-boundary grid cells 36 | double average(field *temperature) 37 | { 38 | double average = 0.0; 39 | 40 | for (int i = 1; i < temperature->nx + 1; i++) { 41 | for (int j = 1; j < temperature->ny + 1; j++) { 42 | int ind = i * (temperature->ny + 2) + j; 43 | average += temperature->data[ind]; 44 | } 45 | } 46 | 47 | average /= (temperature->nx * temperature->ny); 48 | return average; 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /content/guide.rst: -------------------------------------------------------------------------------- 1 | Instructor's guide 2 | ------------------ 3 | 4 | 5 | 6 | Preparing to Teach 7 | ------------------ 8 | 9 | - Making sure that all the compilers are installed correctly on the system 10 | - Run all the examples beforehand at least once 11 | - Be aware which parts could be skipped in case needed 12 | - Give enough time for the exercises 13 | - Do not open too many tabs and switch among them 14 | - Emphasize the differences between C/C++ and Fortran for certrain directives 15 | - Briefly introduce the exercises before and make a short summary afterwards on the most important take-home messages 16 | -------------------------------------------------------------------------------- /content/img/Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/Automatic-Scalability-of-Cuda-via-scaling-the-number-of-Streaming-Multiprocessors-and.png -------------------------------------------------------------------------------- /content/img/C2050Timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/C2050Timeline.png -------------------------------------------------------------------------------- /content/img/ENCCS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ENCCS.jpg -------------------------------------------------------------------------------- /content/img/ENCCS_CSC_logos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ENCCS_CSC_logos.jpg -------------------------------------------------------------------------------- /content/img/HardwareReview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/HardwareReview.png -------------------------------------------------------------------------------- /content/img/Loom.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/Loom.jpeg -------------------------------------------------------------------------------- /content/img/ThreadExecution.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/ThreadExecution.jpeg -------------------------------------------------------------------------------- /content/img/coalesced.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/coalesced.png -------------------------------------------------------------------------------- /content/img/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/comparison.png -------------------------------------------------------------------------------- /content/img/compp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/compp.png -------------------------------------------------------------------------------- /content/img/distributed_vs_shared.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/distributed_vs_shared.png -------------------------------------------------------------------------------- /content/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/favicon.ico -------------------------------------------------------------------------------- /content/img/gpu_vs_cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/gpu_vs_cpu.png -------------------------------------------------------------------------------- /content/img/heat_0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heat_0000.png -------------------------------------------------------------------------------- /content/img/heat_montage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heat_montage.png -------------------------------------------------------------------------------- /content/img/heteprogra.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/heteprogra.jpeg -------------------------------------------------------------------------------- /content/img/memsch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/memsch.png -------------------------------------------------------------------------------- /content/img/microprocessor-trend-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/microprocessor-trend-data.png -------------------------------------------------------------------------------- /content/img/nvidia_block_diagram.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/nvidia_block_diagram.jpeg -------------------------------------------------------------------------------- /content/img/omp-parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/omp-parallel.png -------------------------------------------------------------------------------- /content/img/processes-threads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/processes-threads.png -------------------------------------------------------------------------------- /content/img/shared_mem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/shared_mem.png -------------------------------------------------------------------------------- /content/img/threads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/threads.png -------------------------------------------------------------------------------- /content/img/volta-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/volta-architecture.png -------------------------------------------------------------------------------- /content/img/volta-sm-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/img/volta-sm-architecture.png -------------------------------------------------------------------------------- /content/interoperability.rst: -------------------------------------------------------------------------------- 1 | Working alongside GPU libraries 2 | =============================== 3 | 4 | .. questions:: 5 | 6 | - My code needs to use a library, how should they work together? 7 | - How to use OpenMP mapped variables? 8 | - How to use CUDA or HIP device variables into OpenMP? 9 | 10 | .. objectives:: 11 | 12 | - Understand TODO 13 | - Understand TODO 14 | - Understand 15 | - Understand 16 | 17 | .. prereq:: 18 | 19 | 1. TODO 20 | 2. TODO 21 | 22 | 23 | First heading 24 | ------------- 25 | 26 | OpenMP interoperability with CUDA C/C++ and CUDA Fortran. 27 | 28 | - You can call kernels written in CUDA C/C++ or CUDA Fortran in your OpenMP programs from the host. 29 | - You can use the OpenMP **USE_DEVICE_PTR** clause to pass OpenMP mapped variables to CUDA kernels that are launched from the host. 30 | - You can use the OpenMP **IS_DEVICE_PTR** clause to access CUDA device attribute variables or to pass device addresses directly to target regions. 31 | 32 | Second heading 33 | -------------- 34 | 35 | Some more text, with a figure 36 | 37 | .. figure:: img/stencil.svg 38 | :align: center 39 | 40 | This is a sample image 41 | 42 | .. exercise:: 43 | 44 | TODO get the students to think about the content and answer a Zoom quiz 45 | 46 | .. solution:: 47 | 48 | Hide the answer and reasoning in here 49 | 50 | Some source code 51 | ---------------- 52 | 53 | Sometimes we need to look at code, which can be in the webpage and optionally 54 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work. 55 | 56 | .. typealong:: The field data structure 57 | 58 | .. tabs:: 59 | 60 | .. tab:: C++ 61 | 62 | .. literalinclude:: exercise/serial/heat.h 63 | :language: cpp 64 | :lines: 9-19 65 | 66 | .. tab:: Fortran 67 | 68 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90 69 | :language: fortran 70 | :lines: 11-17 71 | 72 | Building the code 73 | ----------------- 74 | 75 | If there's terminal output to discuss, show something like:: 76 | 77 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o 78 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o 79 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o 80 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o 81 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o 82 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng 83 | 84 | 85 | Running the code 86 | ---------------- 87 | 88 | To show a sample command line, use this approach 89 | 90 | .. code-block:: bash 91 | 92 | ./heat_serial 800 800 1000 93 | 94 | 95 | .. keypoints:: 96 | 97 | - TODO summarize the learning outcome 98 | - TODO 99 | -------------------------------------------------------------------------------- /content/multi-gpu.rst: -------------------------------------------------------------------------------- 1 | Multiple GPUs 2 | ============= 3 | 4 | .. questions:: 5 | 6 | - How do I run on more than one GPU? 7 | - TODO 8 | 9 | .. objectives:: 10 | 11 | - Understand TODO 12 | - Understand TODO 13 | - Understand 14 | - Understand 15 | 16 | .. prereq:: 17 | 18 | 1. TODO 19 | 2. TODO 20 | 21 | 22 | First heading 23 | ------------- 24 | 25 | Some text 26 | 27 | Second heading 28 | -------------- 29 | 30 | Some more text, with a figure 31 | 32 | .. figure:: img/stencil.svg 33 | :align: center 34 | 35 | This is a sample image 36 | 37 | .. exercise:: 38 | 39 | TODO get the students to think about the content and answer a Zoom quiz 40 | 41 | .. solution:: 42 | 43 | Hide the answer and reasoning in here 44 | 45 | Some source code 46 | ---------------- 47 | 48 | Sometimes we need to look at code, which can be in the webpage and optionally 49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work. 50 | 51 | .. typealong:: The field data structure 52 | 53 | .. tabs:: 54 | 55 | .. tab:: C++ 56 | 57 | .. literalinclude:: exercise/serial/heat.h 58 | :language: cpp 59 | :lines: 9-19 60 | 61 | .. tab:: Fortran 62 | 63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90 64 | :language: fortran 65 | :lines: 11-17 66 | 67 | Building the code 68 | ----------------- 69 | 70 | If there's terminal output to discuss, show something like:: 71 | 72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o 73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o 74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o 75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o 76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o 77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng 78 | 79 | 80 | Running the code 81 | ---------------- 82 | 83 | To show a sample command line, use this approach 84 | 85 | .. code-block:: bash 86 | 87 | ./heat_serial 800 800 1000 88 | 89 | 90 | .. keypoints:: 91 | 92 | - TODO summarize the learning outcome 93 | - TODO 94 | -------------------------------------------------------------------------------- /content/optimization.rst: -------------------------------------------------------------------------------- 1 | Optimizing OpenMP offloaded code 2 | ================================ 3 | 4 | .. questions:: 5 | 6 | - What tools are available to run faster? 7 | - TODO 8 | 9 | .. objectives:: 10 | 11 | - Understand TODO 12 | - Understand TODO 13 | - Understand 14 | - Understand 15 | 16 | .. prereq:: 17 | 18 | 1. TODO 19 | 2. TODO 20 | 21 | 22 | First heading 23 | ------------- 24 | 25 | Some text 26 | 27 | Second heading 28 | -------------- 29 | 30 | Some more text, with a figure 31 | 32 | .. figure:: img/stencil.svg 33 | :align: center 34 | 35 | This is a sample image 36 | 37 | .. exercise:: 38 | 39 | TODO get the students to think about the content and answer a Zoom quiz 40 | 41 | .. solution:: 42 | 43 | Hide the answer and reasoning in here 44 | 45 | Some source code 46 | ---------------- 47 | 48 | Sometimes we need to look at code, which can be in the webpage and optionally 49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work. 50 | 51 | .. typealong:: The field data structure 52 | 53 | .. tabs:: 54 | 55 | .. tab:: C++ 56 | 57 | .. literalinclude:: exercise/serial/heat.h 58 | :language: cpp 59 | :lines: 9-19 60 | 61 | .. tab:: Fortran 62 | 63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90 64 | :language: fortran 65 | :lines: 11-17 66 | 67 | Building the code 68 | ----------------- 69 | 70 | If there's terminal output to discuss, show something like:: 71 | 72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o 73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o 74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o 75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o 76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o 77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng 78 | 79 | 80 | Running the code 81 | ---------------- 82 | 83 | To show a sample command line, use this approach 84 | 85 | .. code-block:: bash 86 | 87 | ./heat_serial 800 800 1000 88 | 89 | 90 | .. keypoints:: 91 | 92 | - TODO summarize the learning outcome 93 | - TODO 94 | -------------------------------------------------------------------------------- /content/porting.rst: -------------------------------------------------------------------------------- 1 | Porting code to OpenMP offloading 2 | ================================= 3 | 4 | .. questions:: 5 | 6 | - When and why should I use OpenMP offloading in my code? 7 | - TODO 8 | 9 | .. objectives:: 10 | 11 | - Understand TODO 12 | - Understand TODO 13 | - Understand 14 | - Understand 15 | 16 | .. prereq:: 17 | 18 | 1. TODO 19 | 2. TODO 20 | 21 | 22 | First heading 23 | ------------- 24 | 25 | Some text 26 | 27 | Second heading 28 | -------------- 29 | 30 | Some more text, with a figure 31 | 32 | .. figure:: img/stencil.svg 33 | :align: center 34 | 35 | This is a sample image 36 | 37 | .. exercise:: 38 | 39 | TODO get the students to think about the content and answer a Zoom quiz 40 | 41 | .. solution:: 42 | 43 | Hide the answer and reasoning in here 44 | 45 | Some source code 46 | ---------------- 47 | 48 | Sometimes we need to look at code, which can be in the webpage and optionally 49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work. 50 | 51 | .. typealong:: The field data structure 52 | 53 | .. tabs:: 54 | 55 | .. tab:: C++ 56 | 57 | .. literalinclude:: exercise/serial/heat.h 58 | :language: cpp 59 | :lines: 9-19 60 | 61 | .. tab:: Fortran 62 | 63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90 64 | :language: fortran 65 | :lines: 11-17 66 | 67 | Building the code 68 | ----------------- 69 | 70 | If there's terminal output to discuss, show something like:: 71 | 72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o 73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o 74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o 75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o 76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o 77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng 78 | 79 | 80 | Running the code 81 | ---------------- 82 | 83 | To show a sample command line, use this approach 84 | 85 | .. code-block:: bash 86 | 87 | ./heat_serial 800 800 1000 88 | 89 | 90 | .. keypoints:: 91 | 92 | - TODO summarize the learning outcome 93 | - TODO 94 | -------------------------------------------------------------------------------- /content/profiling.rst: -------------------------------------------------------------------------------- 1 | Profiling code for GPUs 2 | ======================= 3 | 4 | .. questions:: 5 | 6 | - What tools can help me reason about the performance of my code? 7 | - TODO 8 | 9 | .. objectives:: 10 | 11 | - Understand TODO 12 | - Understand TODO 13 | - Understand 14 | - Understand 15 | 16 | .. prereq:: 17 | 18 | 1. TODO 19 | 2. TODO 20 | 21 | 22 | First heading 23 | ------------- 24 | 25 | Some text 26 | 27 | Second heading 28 | -------------- 29 | 30 | Some more text, with a figure 31 | 32 | .. figure:: img/stencil.svg 33 | :align: center 34 | 35 | This is a sample image 36 | 37 | .. exercise:: 38 | 39 | TODO get the students to think about the content and answer a Zoom quiz 40 | 41 | .. solution:: 42 | 43 | Hide the answer and reasoning in here 44 | 45 | Some source code 46 | ---------------- 47 | 48 | Sometimes we need to look at code, which can be in the webpage and optionally 49 | you can pull out only some lines, or highlight others. Make sure both C++ and Fortran examples exist and work. 50 | 51 | .. typealong:: The field data structure 52 | 53 | .. tabs:: 54 | 55 | .. tab:: C++ 56 | 57 | .. literalinclude:: exercise/serial/heat.h 58 | :language: cpp 59 | :lines: 9-19 60 | 61 | .. tab:: Fortran 62 | 63 | .. literalinclude:: exercise/serial/fortran/heat_mod.F90 64 | :language: fortran 65 | :lines: 11-17 66 | 67 | Building the code 68 | ----------------- 69 | 70 | If there's terminal output to discuss, show something like:: 71 | 72 | nvc++ -g -O3 -fopenmp -Wall -I../common -c main.cpp -o main.o 73 | nvc++ -g -O3 -fopenmp -Wall -I../common -c core.cpp -o core.o 74 | nvc++ -g -O3 -fopenmp -Wall -I../common -c setup.cpp -o setup.o 75 | nvc++ -g -O3 -fopenmp -Wall -I../common -c utilities.cpp -o utilities.o 76 | nvc++ -g -O3 -fopenmp -Wall -I../common -c io.cpp -o io.o 77 | nvc++ -g -O3 -fopenmp -Wall -I../common main.o core.o setup.o utilities.o io.o ../common/pngwriter.o -o heat_serial -lpng 78 | 79 | 80 | Running the code 81 | ---------------- 82 | 83 | To show a sample command line, use this approach 84 | 85 | .. code-block:: bash 86 | 87 | ./heat_serial 800 800 1000 88 | 89 | 90 | .. keypoints:: 91 | 92 | - TODO summarize the learning outcome 93 | - TODO 94 | -------------------------------------------------------------------------------- /content/quick-reference.rst: -------------------------------------------------------------------------------- 1 | Quick Reference 2 | --------------- 3 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/composite.c: -------------------------------------------------------------------------------- 1 | #pragma omp target teams distribute parallel for simd [clauses] 2 | for-loops 3 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/composite.f90: -------------------------------------------------------------------------------- 1 | !$omp target teams distribute parallel do simd [clauses] 2 | do-loops 3 | !$omp end target teams distribute parallel do simd 4 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/distribute.c: -------------------------------------------------------------------------------- 1 | #pragma omp distribute [clauses] 2 | for-loops 3 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/distribute.clause: -------------------------------------------------------------------------------- 1 | clause: 2 | private(list) 3 | firstprivate(list) 4 | lastprivate(list) 5 | collapse(n) 6 | dist_schedule(kind[, chunk_size]) 7 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/distribute.f90: -------------------------------------------------------------------------------- 1 | !$omp distribute [clauses] 2 | do-loops 3 | !$omp end distribute 4 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target.c: -------------------------------------------------------------------------------- 1 | #pragma omp target [clauses] 2 | structured-block 3 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target.clause: -------------------------------------------------------------------------------- 1 | clause: 2 | if([ target:] scalar-expression) 3 | device(integer-expression) 4 | private(list) 5 | firstprivate(list) 6 | map([map-type:] list) 7 | is_device_ptr(list) 8 | defaultmap(tofrom:scalar) 9 | nowait 10 | depend(dependence-type : list) 11 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target.f90: -------------------------------------------------------------------------------- 1 | !$omp target [clauses] 2 | structured-block 3 | !$omp end target 4 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_data.c: -------------------------------------------------------------------------------- 1 | #pragma omp target data clause [clauses] 2 | structured-block 3 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_data.clause: -------------------------------------------------------------------------------- 1 | clause: 2 | if( [target data:]scalar-logical-expression) 3 | device(scalar-integer-expression) 4 | map([map-type :] list) 5 | use_device_ptr(list) 6 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_data.f90: -------------------------------------------------------------------------------- 1 | !$omp target data clause [clauses] 2 | structured-block 3 | !$omp end target data 4 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_enter_data.c: -------------------------------------------------------------------------------- 1 | #pragma omp target enter data [clauses] 2 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_enter_data.f90: -------------------------------------------------------------------------------- 1 | !$omp target enter data [clauses] 2 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_enter_exit_data.clause: -------------------------------------------------------------------------------- 1 | clause: 2 | if(scalar-logical-expression) 3 | device(scalar-integer-expression) 4 | map( [map-type:] list) 5 | depend(dependence-type:list) 6 | nowait 7 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_exit_data.c: -------------------------------------------------------------------------------- 1 | #pragma omp target exit data [clauses] 2 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/target_exit_data.f90: -------------------------------------------------------------------------------- 1 | !$omp target exit data [clauses] 2 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/teams.c: -------------------------------------------------------------------------------- 1 | #pragma omp teams [clauses] 2 | structured-block 3 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/teams.clause: -------------------------------------------------------------------------------- 1 | clause: 2 | num_teams(integer-expression) 3 | thread_limit(integer-expression) 4 | default(shared | none) 5 | private(list) 6 | firstprivate(list) 7 | shared(list) 8 | reduction(reduction-identifier : list) 9 | -------------------------------------------------------------------------------- /content/syntax/v4.5.0/teams.f90: -------------------------------------------------------------------------------- 1 | !$omp teams [clauses] 2 | structured-block 3 | !$omp end teams 4 | -------------------------------------------------------------------------------- /content/volta-sm-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ENCCS/openmp-gpu/4def10c3cf62eb8ebd422a3dbdbe3102d55d03de/content/volta-sm-architecture.png -------------------------------------------------------------------------------- /make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx 2 | sphinx_rtd_theme 3 | sphinx_rtd_theme_ext_color_contrast 4 | myst_nb 5 | sphinx-lesson 6 | --------------------------------------------------------------------------------