├── .envrc
├── content
    ├── .gitignore
    ├── img
    │   ├── ENCCS.jpg
    │   ├── favicon.ico
    │   ├── mat-vec.png
    │   ├── kebnekaise.png
    │   ├── MPI_Allreduce.png
    │   ├── extrae-overlap.png
    │   ├── sample-image.png
    │   └── extrae-deadlock.png
    ├── zbibliography.rst
    ├── code
    │   ├── Makefile
    │   ├── snippets
    │   │   ├── allocate.c
    │   │   ├── alloc_mem+win_create.c
    │   │   ├── send_n.c
    │   │   └── n_send.c
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── day-3
    │   │   ├── 04_rma-accumulate
    │   │   │   ├── rma-accumulate.c
    │   │   │   └── solution
    │   │   │   │   └── rma-accumulate.c
    │   │   ├── 01_rma-vs-nonblocking
    │   │   │   ├── rma-vs-nonblocking.c
    │   │   │   └── solution
    │   │   │   │   └── rma-vs-nonblocking.c
    │   │   ├── 03_rma-put
    │   │   │   ├── rma-put.c
    │   │   │   └── solution
    │   │   │   │   └── rma-put.c
    │   │   ├── 00_rma
    │   │   │   └── solution
    │   │   │   │   └── rma.c
    │   │   └── 02_rma-win-allocate
    │   │   │   ├── solution
    │   │   │       └── rma-win-allocate.c
    │   │   │   └── rma-win-allocate.c
    │   ├── day-1
    │   │   ├── 00_comms-groups-create
    │   │   │   ├── solution
    │   │   │   │   └── comms-groups-create.c
    │   │   │   └── comms-groups-create.c
    │   │   ├── 01_comms-groups-divide-evenly
    │   │   │   ├── comms-split.c
    │   │   │   ├── solution
    │   │   │   │   ├── comms-split.c
    │   │   │   │   └── comms-groups-divide-evenly.c
    │   │   │   └── comms-groups-divide-evenly.c
    │   │   ├── 03_basic-extent-size
    │   │   │   └── solution
    │   │   │   │   └── basic-extent-size.c
    │   │   ├── 09_integrate-pi
    │   │   │   ├── pi-integration.c
    │   │   │   └── solution
    │   │   │   │   └── pi-integration.c
    │   │   ├── 05_pokemon-pack-unpack
    │   │   │   ├── pokemon-pack-unpack.c
    │   │   │   └── solution
    │   │   │   │   └── pokemon-pack-unpack.c
    │   │   ├── 07_pokemon-type-create-struct
    │   │   │   ├── pokemon-type-create-struct.c
    │   │   │   └── solution
    │   │   │   │   └── pokemon-type-create-struct.c
    │   │   ├── 04_struct-extent-size
    │   │   │   └── solution
    │   │   │   │   └── struct-extent-size.c
    │   │   ├── 08_broadcast
    │   │   │   ├── solution
    │   │   │   │   └── collective-communication-broadcast.c
    │   │   │   └── collective-communication-broadcast.c
    │   │   ├── 06_pokemon-pack-unpack-size
    │   │   │   └── solution
    │   │   │   │   └── pokemon-pack-unpack-size.c
    │   │   └── 02_compute-pi
    │   │   │   ├── pi-monte-carlo.c
    │   │   │   └── solution
    │   │   │       └── pi-monte-carlo.c
    │   ├── day-4
    │   │   ├── 05_rma-lock-unlock
    │   │   │   ├── rma-lock-unlock.c
    │   │   │   └── solution
    │   │   │   │   └── rma-lock-unlock.c
    │   │   ├── 03_rma-fence
    │   │   │   ├── rma-fence.c
    │   │   │   └── solution
    │   │   │   │   └── rma-fence.c
    │   │   ├── 04_rma-pscw
    │   │   │   ├── rma-pswc.c
    │   │   │   └── solution
    │   │   │   │   └── rma-pscw.c
    │   │   ├── 10_integrate-pi
    │   │   │   ├── pi-integration.c
    │   │   │   └── solution
    │   │   │   │   └── pi-integration.c
    │   │   ├── 07_rma-pi-lock-unlock
    │   │   │   ├── rma-pi-lock-unlock.c
    │   │   │   └── solution
    │   │   │   │   └── rma-pi-lock-unlock.c
    │   │   ├── 00_threading-query
    │   │   │   ├── threading-query.c
    │   │   │   └── solution
    │   │   │   │   └── threading-query.c
    │   │   └── 06_rma-pi-pscw
    │   │   │   ├── rma-pi-pscw.c
    │   │   │   └── solution
    │   │   │       └── rma-pi-pscw.c
    │   └── day-2
    │   │   ├── 01_allgather
    │   │       ├── solution
    │   │       │   └── collective-communication-allgather.c
    │   │       └── collective-communication-allgather.c
    │   │   ├── 02_alltoall
    │   │       ├── solution
    │   │       │   └── collective-communication-alltoall.c
    │   │       └── collective-communication-alltoall.c
    │   │   ├── 00_scatter-and-gather
    │   │       ├── solution
    │   │       │   ├── scatter-and-gather-2.c
    │   │       │   ├── scatter-and-gather-1.c
    │   │       │   └── scatter-and-gather-3.c
    │   │       ├── scatter-and-gather-2.c
    │   │       ├── scatter-and-gather-1.c
    │   │       └── scatter-and-gather-3.c
    │   │   ├── 03_scatterv-and-gatherv
    │   │       ├── solution
    │   │       │   └── scatterv-and-gatherv.c
    │   │       └── scatterv-and-gatherv.c
    │   │   ├── 05_overlap
    │   │       ├── non-blocking-communication-overlap.c
    │   │       └── solution
    │   │       │   └── non-blocking-communication-overlap-solution.c
    │   │   └── 04_deadlock
    │   │       ├── non-blocking-communication-deadlock.c
    │   │       └── solution
    │   │           └── non-blocking-communication-deadlock.c
    ├── diagrams
    │   ├── Problems-with-blocking-communication.drawio
    │   ├── fork-join-with-mpi.drawio
    │   ├── stencil-with-tasking.drawio
    │   ├── non-blocking-style stencil application.drawio
    │   ├── simple stencil workflow.drawio
    │   └── ring-style stencil workflow.drawio
    ├── bibliography.bib
    ├── _static
    │   └── overrides.css
    ├── setup.rst
    ├── conf.py
    ├── guide.rst
    └── custom.py
├── README.md
├── requirements.txt
├── .gitignore
├── Makefile
├── make.bat
├── flake.nix
├── flake.lock
└── .github
    └── workflows
        └── sphinx.yml


/.envrc:
--------------------------------------------------------------------------------
1 | use flake
2 | 


--------------------------------------------------------------------------------
/content/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | .jupyter_cache
3 | *~
4 | 


--------------------------------------------------------------------------------
/content/img/ENCCS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/ENCCS.jpg


--------------------------------------------------------------------------------
/content/zbibliography.rst:
--------------------------------------------------------------------------------
1 | Bibliography
2 | ------------
3 | 
4 | .. bibliography:: bibliography.bib
5 | 


--------------------------------------------------------------------------------
/content/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/favicon.ico


--------------------------------------------------------------------------------
/content/img/mat-vec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/mat-vec.png


--------------------------------------------------------------------------------
/content/img/kebnekaise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/kebnekaise.png


--------------------------------------------------------------------------------
/content/img/MPI_Allreduce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/MPI_Allreduce.png


--------------------------------------------------------------------------------
/content/img/extrae-overlap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/extrae-overlap.png


--------------------------------------------------------------------------------
/content/img/sample-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/sample-image.png


--------------------------------------------------------------------------------
/content/img/extrae-deadlock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ENCCS/intermediate-mpi/HEAD/content/img/extrae-deadlock.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Intermediate MPI
2 | 
3 | 
4 | ## Credit and license
5 | 
6 | - https://enccs.github.io/intermediate-mpi/#credits
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_rtd_theme
3 | sphinx_rtd_theme_ext_color_contrast
4 | sphinx-lesson
5 | sphinxcontrib-bibtex
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /_build
 2 | /venv
 3 | .ipynb_checkpoints
 4 | venv*
 5 | jupyter_execute
 6 | /content/.auctex-auto/
 7 | /content/__pycache__/
 8 | /.ccls-cache/
 9 | a.out
10 | .direnv/
11 | 


--------------------------------------------------------------------------------
/content/code/Makefile:
--------------------------------------------------------------------------------
 1 | # -*- Mode: Makefile; -*-
 2 | #
 3 | # See COPYRIGHT in top-level directory.
 4 | #
 5 | 
 6 | CC=mpicc
 7 | CFLAGS= -g -Wall -std=c11
 8 | BINS=collective-communication-broadcast
 9 | 
10 | #all: $(BINS)
11 | 
12 | collective-communication-broadcast: collective-communication-broadcast.c
13 | 	$(CC) $(CFLAGS) $^ -o $@ -lm
14 | 
15 | clean:
16 | 	rm -f $(BINS)
17 | 	rm -f output*.bmp
18 | 


--------------------------------------------------------------------------------
/content/code/snippets/allocate.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | 
 3 | int main(int argc, char *argv[]) {
 4 |   MPI_Init(&argc, &argv);
 5 | 
 6 |   // allocate window
 7 |   double *buf;
 8 |   MPI_Win win;
 9 |   MPI_Win_allocate((MPI_Aint)(10 * sizeof(double)), sizeof(double),
10 |                    MPI_INFO_NULL, MPI_COMM_WORLD, &buf, &win);
11 | 
12 |   // do something with win
13 | 
14 |   // free window and the associated memory
15 |   MPI_Win_free(&win);
16 | 
17 |   MPI_Finalize();
18 | }
19 | 


--------------------------------------------------------------------------------
/content/code/snippets/alloc_mem+win_create.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | 
 3 | int main(int argc, char *argv[]) {
 4 |   MPI_Init(&argc, &argv);
 5 | 
 6 |   // allocate memory
 7 |   double *buf;
 8 |   MPI_Alloc_mem((MPI_Aint)(10 * sizeof(double)), MPI_INFO_NULL, &buf);
 9 | 
10 |   // create window
11 |   MPI_Win win;
12 |   MPI_Win_create(buf, (MPI_Aint)(10 * sizeof(double)), sizeof(double),
13 |                  MPI_INFO_NULL, MPI_COMM_WORLD, &win);
14 | 
15 |   // do something with win
16 | 
17 |   // free window
18 |   MPI_Win_free(&win);
19 | 
20 |   // free memory
21 |   MPI_Free_mem(buf);
22 | 
23 |   MPI_Finalize();
24 | }
25 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = content
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/content/code/.gitignore:
--------------------------------------------------------------------------------
 1 | collective-communication-broadcast
 2 | collective-communication-broadcast-solution
 3 | collective-communication-scatter-and-gather
 4 | collective-communication-scatter-and-gather-solution
 5 | collective-communication-allgather
 6 | collective-communication-allgather-solution
 7 | collective-communication-alltoall
 8 | collective-communication-alltoall-solution
 9 | non-blocking-communication-deadlock
10 | non-blocking-communication-deadlock-solution
11 | non-blocking-communication-overlap
12 | non-blocking-communication-overlap-solution
13 | non-blocking-communication-ireduce
14 | non-blocking-communication-ireduce-solution
15 | threading-query
16 | threading-query-solution
17 | threading-funneled
18 | threading-funneled-solution
19 | *~
20 | 


--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   description = "intermediate-mpi";
 3 |   inputs = {
 4 |     nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
 5 |     flake-utils.url = "github:numtide/flake-utils";
 6 |     mach-nix.url = "github:DavHau/mach-nix/3.4.0";
 7 |   };
 8 | 
 9 |   outputs = { self, nixpkgs, flake-utils, mach-nix }:
10 |     flake-utils.lib.eachDefaultSystem (system:
11 |       let
12 |         pkgs = nixpkgs.legacyPackages.${system};
13 |         pythonEnv = mach-nix.lib."${system}".mkPython {
14 |           requirements = builtins.readFile ./requirements.txt;
15 |         };
16 |       in
17 |       {
18 |         devShell = pkgs.mkShell {
19 |           nativeBuildInputs = with pkgs; [
20 |             clang-tools
21 |             cmake
22 |             gcc
23 |             ninja
24 |             openmpi
25 |             pythonEnv
26 |           ];
27 | 
28 |           hardeningDisable = [ "all" ];
29 | 
30 |           NINJA_STATUS = "[Built edge %f of %t in %e sec] ";
31 |         };
32 |       });
33 | }
34 | 


--------------------------------------------------------------------------------
/content/code/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2020 ENCCS
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the “Software”), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/content/code/day-3/04_rma-accumulate/rma-accumulate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm world = MPI_COMM_WORLD;
10 | 
11 |   // Get my rank
12 |   int rank;
13 |   MPI_Comm_rank(world, &rank);
14 | 
15 |   /* FIXME create window object */
16 |   int buffer = 42;
17 |   MPI_Win win;
18 |   MPI_Win_create( .. );
19 | 
20 |   if (rank == 0) {
21 |     printf("[MPI process 0] Value in my buffer before MPI_Accumulate: "
22 |            "%d.\n",
23 |            buffer);
24 |   }
25 | 
26 |   // start access epoch
27 |   MPI_Win_fence(0, win);
28 | 
29 |   if (rank > 0) {
30 |     /* FIXME accumulate this process' rank into the memory window of the root process by summing */
31 |     MPI_Accumulate( .. );
32 |     printf(
33 |         "[MPI process %d] I accumulate my rank %d in process 0 memory window\n",
34 |         rank, rank);
35 |   }
36 | 
37 |   // end access epoch
38 |   MPI_Win_fence(0, win);
39 | 
40 |   if (rank == 0) {
41 |     printf("[MPI process 0] Value in my buffer after MPI_Accumulate: %d.\n",
42 |            buffer);
43 |   }
44 | 
45 |   // Destroy the window
46 |   MPI_Win_free(&win);
47 | 
48 |   MPI_Finalize();
49 | 
50 |   return EXIT_SUCCESS;
51 | }
52 | 


--------------------------------------------------------------------------------
/content/code/snippets/send_n.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define SIZE 5
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |   int rank;
11 |   int size;
12 | 
13 |   MPI_Init(&argc, &argv);
14 | 
15 |   MPI_Comm comm = MPI_COMM_WORLD;
16 | 
17 |   MPI_Comm_size(comm, &size);
18 |   MPI_Comm_rank(comm, &rank);
19 |   if (size != 2) {
20 |     if (rank == 0) {
21 |       fprintf(stdout, "Only two ranks is supported for this exercise, "
22 |                       "please re-run with two ranks\n");
23 |     }
24 |     MPI_Finalize();
25 |     return EXIT_FAILURE;
26 |   }
27 | 
28 |   int buffer[SIZE];
29 |   memset(buffer, 42, SIZE * sizeof(int));
30 | 
31 |   MPI_Status status;
32 | 
33 |   if (rank == 0) {
34 |     fprintf(stdout, "rank %d send\n", rank);
35 |     for (int i = 0; i < SIZE; ++i) {
36 |       fprintf(stdout, "buffer[%d] = %d\n", i, buffer[i]);
37 |     }
38 |     MPI_Send(buffer, SIZE, MPI_INT, 1, 0, comm);
39 |   } else {
40 |     MPI_Recv(buffer, SIZE, MPI_INT, 0, 0, comm, &status);
41 |     fprintf(stdout, "rank %d recv\n", rank);
42 |     for (int i = 0; i < SIZE; ++i) {
43 |       fprintf(stdout, "buffer[%d] = %d\n", i, buffer[i]);
44 |     }
45 |   }
46 | 
47 |   MPI_Finalize();
48 | 
49 |   return EXIT_SUCCESS;
50 | }
51 | 


--------------------------------------------------------------------------------
/content/diagrams/Problems-with-blocking-communication.drawio:
--------------------------------------------------------------------------------
 1 | <mxfile host="app.diagrams.net" modified="2020-12-03T16:31:07.702Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36" etag="G-9dM8atEplreJgW5SvE" version="13.9.7" type="github">
 2 |   <diagram id="JXLNm9u8JLPIDmPNrzw8" name="Page-1">
 3 |     <mxGraphModel dx="800" dy="1141" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
 4 |       <root>
 5 |         <mxCell id="0" />
 6 |         <mxCell id="1" parent="0" />
 7 |         <mxCell id="SBwFMUehIyiKjVu-RFmW-3" value="" style="endArrow=classic;html=1;" edge="1" parent="1">
 8 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
 9 |             <mxPoint x="120" y="120" as="sourcePoint" />
10 |             <mxPoint x="580" y="120" as="targetPoint" />
11 |           </mxGeometry>
12 |         </mxCell>
13 |         <mxCell id="SBwFMUehIyiKjVu-RFmW-5" value="Time" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="1">
14 |           <mxGeometry x="340" y="90" width="40" height="20" as="geometry" />
15 |         </mxCell>
16 |       </root>
17 |     </mxGraphModel>
18 |   </diagram>
19 | </mxfile>
20 | 


--------------------------------------------------------------------------------
/content/code/day-1/00_comms-groups-create/solution/comms-groups-create.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     MPI_Init(&argc, &argv);
 8 |  
 9 |     int world_size, world_rank;
10 |  
11 |     MPI_Comm_size(MPI_COMM_WORLD, &world_size);
12 |     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
13 |  
14 |     printf("global rank %d in MPI_COMM_WORLD of size %d\n", world_rank, world_size);
15 |  
16 |     MPI_Group world_group;
17 |     MPI_Comm_group(MPI_COMM_WORLD, &world_group);
18 |  
19 |     int excl_ranks[2];
20 |     excl_ranks[0] = 0;
21 |     excl_ranks[1] = 1;
22 |  
23 |     MPI_Group local_group;
24 |     MPI_Group_excl(world_group, 2, excl_ranks, &local_group);
25 |  
26 |     MPI_Comm local_comm;
27 |     MPI_Comm_create(MPI_COMM_WORLD, local_group, &local_comm);
28 |  
29 |     if (local_comm != MPI_COMM_NULL)
30 |     {
31 |         int local_size, local_rank;
32 |  
33 |         MPI_Comm_size(local_comm, &local_size);
34 |         MPI_Comm_rank(local_comm, &local_rank);
35 |  
36 |         printf("global rank %d, local rank %d in local_comm of size %d\n", world_rank, local_rank, local_size);
37 |  
38 |         MPI_Comm_free(&local_comm);
39 |     }
40 |  
41 |     MPI_Group_free(&local_group);
42 |     MPI_Group_free(&world_group);
43 |  
44 |     MPI_Finalize();
45 |  
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/content/code/snippets/n_send.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define SIZE 5
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |   int rank;
11 |   int size;
12 | 
13 |   MPI_Init(&argc, &argv);
14 | 
15 |   MPI_Comm comm = MPI_COMM_WORLD;
16 | 
17 |   MPI_Comm_size(comm, &size);
18 |   MPI_Comm_rank(comm, &rank);
19 |   if (size != 2) {
20 |     if (rank == 0) {
21 |       fprintf(stdout, "Only two ranks is supported for this exercise, "
22 |                       "please re-run with two ranks\n");
23 |     }
24 |     MPI_Finalize();
25 |     return EXIT_FAILURE;
26 |   }
27 | 
28 |   int buffer[SIZE];
29 |   memset(buffer, 42, SIZE * sizeof(int));
30 | 
31 |   MPI_Status status;
32 |   MPI_Aint lb, extent;
33 |   MPI_Type_get_extent(MPI_INT, &lb, &extent);
34 | 
35 |   if (rank == 0) {
36 |     for (int i = 0; i < SIZE; ++i) {
37 |       fprintf(stdout, "rank %d send: buffer[%d] = %d\n", rank, i, buffer[i]);
38 |       MPI_Send(buffer + (i * extent), 1, MPI_INT, 1, 0, comm);
39 |     }
40 |   } else {
41 |     for (int i = 0; i < SIZE; ++i) {
42 |       MPI_Recv(buffer + (i * extent), 1, MPI_INT, 0, 0, comm, &status);
43 |       fprintf(stdout, "rank %d recv: buffer[%d] = %d\n", rank, i,
44 |               buffer[i]);
45 |     }
46 |   }
47 | 
48 |   MPI_Finalize();
49 | 
50 |   return EXIT_SUCCESS;
51 | }
52 | 


--------------------------------------------------------------------------------
/content/code/day-4/05_rma-lock-unlock/rma-lock-unlock.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/lul_c_solution */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | #include <mpi.h>
 7 | 
 8 | int main(int argc, char **argv) {
 9 |   MPI_Init(&argc, &argv);
10 |   MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |   int size;
13 |   MPI_Comm_size(comm, &size);
14 | 
15 |   int rank;
16 |   MPI_Comm_rank(comm, &rank);
17 | 
18 |   double *buf = (double *)(malloc(sizeof(double) * size));
19 |   for (int i = 0; i < size; i++) {
20 |     buf[i] = rank * 11; // Give each rank unique data
21 |   }
22 | 
23 |   MPI_Win win;
24 |   // FIXME create a window and attach it to buf
25 |   MPI_xxx(/*  */);
26 | 
27 |   // origin processes
28 |   if (rank > 0) {
29 |     // FIXME Request lock of process 0
30 |     MPI_xxx(/*  */);
31 |     // FIXME put element at index rank into buffer on rank 0
32 |     MPI_xxx(/*  */);
33 |     // FIXME Block until put succeeds
34 |     MPI_xxx(/*  */);
35 |   }
36 | 
37 |   // there is no synchronization on the target process!!!
38 | 
39 |   if (rank == 0) {
40 |     printf("on process %d:\n  [", rank);
41 |     for (int i = 0; i < size; ++i) {
42 |       printf(" %f, ", buf[i]);
43 |     }
44 |     printf("]\n");
45 |   }
46 | 
47 |   // FIXME Free the window
48 |   MPI_xxx(/*  */);
49 | 
50 |   MPI_Finalize();
51 | 
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/content/code/day-3/04_rma-accumulate/solution/rma-accumulate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm world = MPI_COMM_WORLD;
10 | 
11 |   // Get my rank
12 |   int rank;
13 |   MPI_Comm_rank(world, &rank);
14 | 
15 |   // Create the window
16 |   int buffer = 42;
17 |   MPI_Win win;
18 |   MPI_Win_create(&buffer, (MPI_Aint)sizeof(int), sizeof(int), MPI_INFO_NULL,
19 |                  world, &win);
20 | 
21 |   if (rank == 0) {
22 |     printf("[MPI process 0] Value in my buffer before MPI_Accumulate: "
23 |            "%d.\n",
24 |            buffer);
25 |   }
26 | 
27 |   // start access epoch
28 |   MPI_Win_fence(0, win);
29 | 
30 |   if (rank > 0) {
31 |     // Push my value into the first integer in MPI process 0 window
32 |     MPI_Accumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_SUM, win);
33 |     printf(
34 |         "[MPI process %d] I accumulate my rank %d in process 0 memory window\n",
35 |         rank, rank);
36 |   }
37 | 
38 |   // end access epoch
39 |   MPI_Win_fence(0, win);
40 | 
41 |   if (rank == 0) {
42 |     printf("[MPI process 0] Value in my buffer after MPI_Accumulate: %d.\n",
43 |            buffer);
44 |   }
45 | 
46 |   // Destroy the window
47 |   MPI_Win_free(&win);
48 | 
49 |   MPI_Finalize();
50 | 
51 |   return EXIT_SUCCESS;
52 | }
53 | 


--------------------------------------------------------------------------------
/content/bibliography.bib:
--------------------------------------------------------------------------------
 1 | @BOOK{Gropp2014-dz,
 2 |   title     = "{Using Advanced MPI: Modern Features of the Message-Passing
 3 |                Interface}",
 4 |   author    = "Gropp, William and Hoefler, Torsten and Thakur, Rajeev and Lusk,
 5 |                Ewing",
 6 |   publisher = "MIT Press",
 7 |   series    = "Scientific and Engineering Computation",
 8 |   month     =  nov,
 9 |   year      =  2014,
10 |   url       = "https://mitpress.mit.edu/books/using-advanced-mpi",
11 |   language  = "en",
12 |   isbn      = "9780262527637"
13 | }
14 | 
15 | @BOOK{Gropp2014-qf,
16 |   title     = "{Using MPI: Portable Parallel Programming with the
17 |                Message-Passing Interface}",
18 |   author    = "Gropp, William and Lusk, Ewing and Skjellum, Anthony",
19 |   publisher = "MIT Press",
20 |   series    = "Scientific and Engineering Computation",
21 |   month     =  nov,
22 |   year      =  2014,
23 |   url       = "https://mitpress.mit.edu/books/using-mpi-third-edition",
24 |   language  = "en",
25 |   isbn      = "9780262527392"
26 | }
27 | 
28 | @BOOK{Pacheco1997-qq,
29 |   title     = "{Parallel Programming with MPI}",
30 |   author    = "Pacheco, Peter",
31 |   publisher = "Morgan Kaufmann",
32 |   year      =  1997,
33 |   url       = "https://www.elsevier.com/books/parallel-programming-with-mpi/pacheco/978-0-08-051354-6",
34 |   language  = "en",
35 |   isbn      = "9781558603394"
36 | }
37 | 


--------------------------------------------------------------------------------
/content/code/day-1/01_comms-groups-divide-evenly/comms-split.c:
--------------------------------------------------------------------------------
 1 | /* Adapated from:
 2 |  * https://github.com/UoB-HPC/hpc-course-examples/blob/master/mpi/advanced/example10/group_to_comm.c
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include <mpi.h>
 9 | 
10 | int main(int argc, char *argv[])
11 | {
12 |     MPI_Init(&argc, &argv);
13 | 
14 |     // get rank and size from global communicator
15 | 
16 |     int global_rank, global_size;
17 |     MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &global_size);
19 |  
20 |     // FIXME: create local communicator using MPI_Comm_split
21 |  
22 |     int color = ...;
23 |     int key = global_rank;
24 | 
25 |     MPI_Comm local_comm;
26 |     MPI_Comm_split( ... );
27 | 
28 |     // FIXME: get rank in local communicator
29 |  
30 |     int local_rank;
31 |     MPI_Comm_rank( ... );
32 |  
33 |     // send global rank as message
34 | 
35 |     int sendbuf = global_rank;
36 |     int recvbuf;
37 |     int count = 1;
38 |  
39 |     // compute sum of global ranks in local communicator
40 |  
41 |     MPI_Allreduce(&sendbuf, &recvbuf, count, MPI_INT, MPI_SUM, local_comm);
42 |  
43 |     printf("global_rank= %d local_rank= %d recvbuf= %d\n", global_rank, local_rank, recvbuf);
44 | 
45 |     // FIXME: complete MPI_Comm_free
46 |  
47 |     MPI_Comm_free( ... );
48 |  
49 |     MPI_Finalize();
50 |  
51 |     return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/content/code/day-1/01_comms-groups-divide-evenly/solution/comms-split.c:
--------------------------------------------------------------------------------
 1 | /* Adapated from:
 2 |  * https://github.com/UoB-HPC/hpc-course-examples/blob/master/mpi/advanced/example10/group_to_comm.c
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include <mpi.h>
 9 | 
10 | int main(int argc, char *argv[])
11 | {
12 |     MPI_Init(&argc, &argv);
13 | 
14 |     // rank and size from global communicator
15 | 
16 |     int global_rank, global_size;
17 |     MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &global_size);
19 |  
20 |     // create local communicator
21 | 
22 |     int color = (global_rank < (global_size / 2));
23 |     int key = global_rank;
24 | 
25 |     MPI_Comm local_comm;
26 |     MPI_Comm_split(MPI_COMM_WORLD, color, key, &local_comm);
27 | 
28 |     // rank in local communicator
29 |  
30 |     int local_rank;
31 |     MPI_Comm_rank(local_comm, &local_rank);
32 |  
33 |     // send global rank as message
34 | 
35 |     int sendbuf = global_rank;
36 |     int recvbuf;
37 |     int count = 1;
38 |  
39 |     // compute sum of global ranks in local communicator
40 |  
41 |     MPI_Allreduce(&sendbuf, &recvbuf, count, MPI_INT, MPI_SUM, local_comm);
42 |  
43 |     printf("global_rank= %d local_rank= %d recvbuf= %d\n", global_rank, local_rank, recvbuf);
44 | 
45 |     MPI_Comm_free(&local_comm);
46 |  
47 |     MPI_Finalize();
48 |  
49 |     return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/content/code/day-1/03_basic-extent-size/solution/basic-extent-size.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 | 
 8 |   MPI_Init(&argc, &argv);
 9 | 
10 |   MPI_Aint lb, extent;
11 |   int size;
12 | 
13 |   // char
14 |   printf("sizeof(char) = %ld\n", sizeof(char));
15 |   MPI_Type_get_extent(MPI_CHAR, &lb, &extent);
16 |   MPI_Type_size(MPI_CHAR, &size);
17 |   printf("For MPI_CHAR:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
18 |          extent, size);
19 | 
20 |   // int
21 |   printf("sizeof(int) = %ld\n", sizeof(int));
22 |   MPI_Type_get_extent(MPI_INT, &lb, &extent);
23 |   MPI_Type_size(MPI_INT, &size);
24 |   printf("For MPI_INT:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
25 |          extent, size);
26 | 
27 |   // float
28 |   printf("sizeof(float) = %ld\n", sizeof(float));
29 |   MPI_Type_get_extent(MPI_FLOAT, &lb, &extent);
30 |   MPI_Type_size(MPI_FLOAT, &size);
31 |   printf("For MPI_FLOAT:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
32 |          extent, size);
33 | 
34 |   // double
35 |   printf("sizeof(double) = %ld\n", sizeof(double));
36 |   MPI_Type_get_extent(MPI_DOUBLE, &lb, &extent);
37 |   MPI_Type_size(MPI_DOUBLE, &size);
38 |   printf("For MPI_DOUBLE:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
39 |          extent, size);
40 | 
41 |   MPI_Finalize();
42 | 
43 |   return EXIT_SUCCESS;
44 | }
45 | 


--------------------------------------------------------------------------------
/content/code/day-3/01_rma-vs-nonblocking/rma-vs-nonblocking.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   // Get the number of processes and check only 2 processes are used
12 |   int size;
13 |   MPI_Comm_size(comm, &size);
14 |   if (size != 2) {
15 |     printf("This application is meant to be run with 2 processes.\n");
16 |     MPI_Abort(comm, EXIT_FAILURE);
17 |   }
18 |   int rank;
19 |   MPI_Comm_rank(comm, &rank);
20 | 
21 |   // Get my rank and do the corresponding job
22 |   if (rank == 0) {
23 |     int sendbuf[4] = {42, 88, 12, 3};
24 |     printf("MPI process %d sends values:", rank);
25 |     for (int i = 0; i < 4; ++i) {
26 |       printf(" %d", sendbuf[i]);
27 |     }
28 |     printf("\n");
29 |     /* FIXME Non-blocking send of sendbuf to rank 1 */
30 |     MPI_Isend( .. );
31 | 
32 |     /* Here you might do other useful computational work */
33 | 
34 |     /* FIXME wait on MPI_Isend */
35 |     MPI_Wait( .. );
36 |   } else if (rank == 1) {
37 |     /* FIXME receive data on rank 1 */
38 |     int recvbuf[4];
39 |     MPI_Recv( .. );
40 |     printf("MPI process %d receives values:", rank);
41 |     for (int i = 0; i < 4; ++i) {
42 |       printf(" %d", recvbuf[i]);
43 |     }
44 |     printf("\n");
45 |   }
46 | 
47 |   MPI_Finalize();
48 | 
49 |   return EXIT_SUCCESS;
50 | }
51 | 


--------------------------------------------------------------------------------
/content/code/day-4/05_rma-lock-unlock/solution/rma-lock-unlock.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/lul_c_solution */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | #include <mpi.h>
 7 | 
 8 | int main(int argc, char **argv) {
 9 |   MPI_Init(&argc, &argv);
10 |   MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |   int size;
13 |   MPI_Comm_size(comm, &size);
14 | 
15 |   int rank;
16 |   MPI_Comm_rank(comm, &rank);
17 | 
18 |   double *buf = (double *)(malloc(sizeof(double) * size));
19 |   for (int i = 0; i < size; i++) {
20 |     buf[i] = rank * 11; // Give each rank unique data
21 |   }
22 | 
23 |   MPI_Win win;
24 |   MPI_Win_create(buf, size * sizeof(double), sizeof(double), MPI_INFO_NULL,
25 |                  comm, &win);
26 | 
27 |   // origin processes
28 |   if (rank > 0) {
29 |     // Request lock of process 0
30 |     MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win);
31 |     // put element at index rank into buffer on rank 0
32 |     MPI_Put(buf, 1, MPI_DOUBLE, 0, rank, 1, MPI_DOUBLE, win);
33 |     // Block until put succeeds
34 |     MPI_Win_unlock(0, win);
35 |   }
36 | 
37 |   // there is no synchronization on the target process!!!
38 | 
39 |   if (rank == 0) {
40 |     printf("on process %d:\n  [", rank);
41 |     for (int i = 0; i < size; ++i) {
42 |       printf(" %f, ", buf[i]);
43 |     }
44 |     printf("]\n");
45 |   }
46 | 
47 |   // Free the window
48 |   MPI_Win_free(&win);
49 | 
50 |   MPI_Finalize();
51 | 
52 |   return 0;
53 | }
54 | 


--------------------------------------------------------------------------------
/content/code/day-3/01_rma-vs-nonblocking/solution/rma-vs-nonblocking.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   // Get the number of processes and check only 2 processes are used
12 |   int size;
13 |   MPI_Comm_size(comm, &size);
14 |   if (size != 2) {
15 |     printf("This application is meant to be run with 2 processes.\n");
16 |     MPI_Abort(comm, EXIT_FAILURE);
17 |   }
18 |   int rank;
19 |   MPI_Comm_rank(comm, &rank);
20 | 
21 |   // Get my rank and do the corresponding job
22 |   if (rank == 0) {
23 |     int sendbuf[4] = {42, 88, 12, 3};
24 |     MPI_Request request;
25 |     printf("MPI process %d sends values:", rank);
26 |     for (int i = 0; i < 4; ++i) {
27 |       printf(" %d", sendbuf[i]);
28 |     }
29 |     printf("\n");
30 |     MPI_Isend(&sendbuf, 4, MPI_INT, 1, 0, comm, &request);
31 | 
32 |     /* Here you might do other useful computational work */
33 | 
34 |     // Let's wait for the MPI_Isend to complete before progressing further.
35 |     MPI_Wait(&request, MPI_STATUS_IGNORE);
36 |   } else if (rank == 1) {
37 |     int recvbuf[4];
38 |     MPI_Recv(&recvbuf, 4, MPI_INT, 0, 0, comm, MPI_STATUS_IGNORE);
39 |     printf("MPI process %d receives values:", rank);
40 |     for (int i = 0; i < 4; ++i) {
41 |       printf(" %d", recvbuf[i]);
42 |     }
43 |     printf("\n");
44 |   }
45 | 
46 |   MPI_Finalize();
47 | 
48 |   return EXIT_SUCCESS;
49 | }
50 | 


--------------------------------------------------------------------------------
/content/code/day-3/03_rma-put/rma-put.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int size;
12 |   MPI_Comm_size(comm, &size);
13 |   if (size != 2) {
14 |     printf(
15 |         "This application is meant to be run with 2 MPI processes, not %d.\n",
16 |         size);
17 |     MPI_Abort(comm, EXIT_FAILURE);
18 |   }
19 | 
20 |   // Get my rank
21 |   int rank;
22 |   MPI_Comm_rank(comm, &rank);
23 | 
24 |   /* FIXME create the memory window */
25 |   int *window_buffer;
26 |   MPI_Win win;
27 |   MPI_Win_allocate( .. );
28 | 
29 |   // start access epoch
30 |   MPI_Win_fence(0, win);
31 | 
32 |   // local store
33 |   if (rank == 1) {
34 |     window_buffer[0] = -1;
35 |     window_buffer[1] = -1;
36 |     window_buffer[2] = -1;
37 |     window_buffer[3] = -1;
38 |   }
39 | 
40 |   // start access epoch
41 |   MPI_Win_fence(0, win);
42 | 
43 |   if (rank == 0) {
44 |     int putbuf[4] = {42, 88, 12, 3};
45 |     /* FIXME Put the value into the MPI process 1 window */
46 |     MPI_Put( .. );
47 |   }
48 | 
49 |   // end access epoch
50 |   MPI_Win_fence(0, win);
51 | 
52 |   if (rank == 1) {
53 |     printf("[MPI process 1] Value put by MPI process 0:");
54 |     for (int i = 0; i < 4; ++i) {
55 |       printf(" %d", window_buffer[i]);
56 |     }
57 |     printf("\n");
58 |   }
59 | 
60 |   // Destroy the window
61 |   MPI_Win_free(&win);
62 | 
63 |   MPI_Finalize();
64 | 
65 |   return EXIT_SUCCESS;
66 | }
67 | 


--------------------------------------------------------------------------------
/content/code/day-3/00_rma/solution/rma.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int size;
12 |   MPI_Comm_size(comm, &size);
13 |   if (size != 2) {
14 |     printf(
15 |         "This application is meant to be run with 2 MPI processes, not %d.\n",
16 |         size);
17 |     MPI_Abort(comm, EXIT_FAILURE);
18 |   }
19 | 
20 |   // Get my rank
21 |   int rank;
22 |   MPI_Comm_rank(comm, &rank);
23 | 
24 |   // Create the window
25 |   int window_buffer[4] = {0};
26 |   if (rank == 1) {
27 |       window_buffer[0] = 42;
28 |       window_buffer[1] = 88;
29 |       window_buffer[2] = 12;
30 |       window_buffer[3] = 3;
31 |   }
32 |   MPI_Win win;
33 |   MPI_Win_create(&window_buffer, (MPI_Aint)4 * sizeof(int), sizeof(int),
34 |                  MPI_INFO_NULL, comm, &win);
35 | 
36 |   // start access epoch
37 |   MPI_Win_fence(0, win);
38 | 
39 |   int getbuf[4];
40 |   if (rank == 0) {
41 |     // Fetch the value from the MPI process 1 window
42 |     MPI_Get(&getbuf, 4, MPI_INT, 1, 0, 4, MPI_INT, win);
43 |   }
44 | 
45 |   // end access epoch
46 |   MPI_Win_fence(0, win);
47 | 
48 |   if (rank == 0) {
49 |     printf("[MPI process 0] Value fetched from MPI process 1 window:");
50 |     for (int i = 0; i < 4; ++i) {
51 |        printf(" %d", getbuf[i]);
52 |     }
53 |     printf("\n");
54 |   }
55 | 
56 |   // Destroy the window
57 |   MPI_Win_free(&win);
58 | 
59 |   MPI_Finalize();
60 | 
61 |   return EXIT_SUCCESS;
62 | }
63 | 


--------------------------------------------------------------------------------
/content/code/day-1/00_comms-groups-create/comms-groups-create.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     MPI_Init(&argc, &argv);
 8 |  
 9 |     /* size and rank from global communicator */
10 |  
11 |     int global_size, global_rank;
12 |  
13 |     MPI_Comm_size(MPI_COMM_WORLD, &global_size);
14 |     MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
15 |  
16 |     printf("global rank %d in MPI_COMM_WORLD of size %d\n", global_rank, global_size);
17 |  
18 |     /* global group */
19 |  
20 |     MPI_Group global_group;
21 |     MPI_Comm_group(MPI_COMM_WORLD, &global_group);
22 |  
23 |     /* FIXME: create a local group by excluding some ranks */
24 |  
25 |     int excl_ranks[ ... ];
26 |     excl_ranks[0] = ...
27 |     ...
28 |  
29 |     MPI_Group local_group;
30 |     MPI_Group_excl( ... );
31 |  
32 |     /* FIXME: create a local communicator from local group */
33 |  
34 |     MPI_Comm local_comm;
35 |     MPI_Comm_create( ... );
36 |  
37 |     if (local_comm != MPI_COMM_NULL)
38 |     {
39 |         /* FIXME: get size and rank from the local communicator */
40 |  
41 |         int local_size, local_rank;
42 |  
43 |         MPI_Comm_size( ... );
44 |         MPI_Comm_rank( ... );
45 |  
46 |         printf( ... );
47 |  
48 |         /* FIXME: complete MPI_Comm_free (why does one need to call it inside the if block?) */
49 |  
50 |         MPI_Comm_free( ... );
51 |     }
52 |  
53 |     /* FIXME: complete MPI_Group)free */
54 |  
55 |     MPI_Group_free( ... );
56 |     MPI_Group_free( ... );
57 |  
58 |     MPI_Finalize();
59 | 
60 |     return 0;
61 | }
62 | 


--------------------------------------------------------------------------------
/content/code/day-3/03_rma-put/solution/rma-put.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int size;
12 |   MPI_Comm_size(comm, &size);
13 |   if (size != 2) {
14 |     printf(
15 |         "This application is meant to be run with 2 MPI processes, not %d.\n",
16 |         size);
17 |     MPI_Abort(comm, EXIT_FAILURE);
18 |   }
19 | 
20 |   // Get my rank
21 |   int rank;
22 |   MPI_Comm_rank(comm, &rank);
23 | 
24 |   // Create the window
25 |   int *window_buffer;
26 |   MPI_Win win;
27 |   MPI_Win_allocate((MPI_Aint)4 * sizeof(int), sizeof(int), MPI_INFO_NULL, comm,
28 |                    &window_buffer, &win);
29 | 
30 |   // start access epoch
31 |   MPI_Win_fence(0, win);
32 | 
33 |   // local store
34 |   if (rank == 1) {
35 |     window_buffer[0] = -1;
36 |     window_buffer[1] = -1;
37 |     window_buffer[2] = -1;
38 |     window_buffer[3] = -1;
39 |   }
40 | 
41 |   // start access epoch
42 |   MPI_Win_fence(0, win);
43 | 
44 |   if (rank == 0) {
45 |     int putbuf[4] = {42, 88, 12, 3};
46 |     // Fetch the value from the MPI process 1 window
47 |     MPI_Put(&putbuf, 4, MPI_INT, 1, 0, 4, MPI_INT, win);
48 |   }
49 | 
50 |   // end access epoch
51 |   MPI_Win_fence(0, win);
52 | 
53 |   if (rank == 1) {
54 |     printf("[MPI process 1] Value put by MPI process 0:");
55 |     for (int i = 0; i < 4; ++i) {
56 |       printf(" %d", window_buffer[i]);
57 |     }
58 |     printf("\n");
59 |   }
60 | 
61 |   // Destroy the window
62 |   MPI_Win_free(&win);
63 | 
64 |   MPI_Finalize();
65 | 
66 |   return EXIT_SUCCESS;
67 | }
68 | 


--------------------------------------------------------------------------------
/content/code/day-4/03_rma-fence/rma-fence.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/fence_c_solution */
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define BUFSZ 10
 8 | 
 9 | int main(int argc, char **argv) {
10 |   MPI_Init(&argc, &argv);
11 | 
12 |   MPI_Comm comm = MPI_COMM_WORLD;
13 | 
14 |   int rank;
15 |   MPI_Comm_rank(comm, &rank);
16 | 
17 |   // initialize buffer to 0
18 |   int buf[BUFSZ] = {0};
19 | 
20 |   // only on rank 0 the elements of buf are different from zero
21 |   if (rank == 0) {
22 |     for (int i = 0; i < BUFSZ; ++i) {
23 |       buf[i] = (i + 1) * 3;
24 |     }
25 |   }
26 | 
27 |   MPI_Win win;
28 |   // FIXME create a window and attach it to buf
29 |   MPI_xxx(/*  */);
30 | 
31 |   printf("Before Get on %d:\n  [", rank);
32 |   for (int i = 0; i < BUFSZ; ++i) {
33 |     printf(" %d, ", buf[i]);
34 |   }
35 |   printf("]\n");
36 | 
37 |   // initialize access epoch
38 |   // assert that there were no local RMA operations prior to this epoch
39 |   // FIXME
40 |   MPI_xxx(/*  */);
41 | 
42 |   // ranks > 0 originate MPI_Get with rank 0 as target
43 |   // FIXME
44 |   if (/*  */) {
45 |     // FIXME
46 |     MPI_xxx(/*  */);
47 |   }
48 | 
49 |   // finalize the access epoch
50 |   // this blocks until the MPI_Get is complete
51 |   // assert that there will be no further local RMA operations
52 |   // FIXME
53 |   MPI_xxx(/*  */);
54 | 
55 |   printf("After Get on %d:\n  [", rank);
56 |   for (int i = 0; i < BUFSZ; ++i) {
57 |     printf(" %d, ", buf[i]);
58 |   }
59 |   printf("]\n");
60 | 
61 |   // Free up our window
62 |   // FIXME
63 |   MPI_xxx(/*  */);
64 | 
65 |   MPI_Finalize();
66 | 
67 |   return 0;
68 | }
69 | 


--------------------------------------------------------------------------------
/content/code/day-3/02_rma-win-allocate/solution/rma-win-allocate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int size;
12 |   MPI_Comm_size(comm, &size);
13 |   if (size != 2) {
14 |     printf(
15 |         "This application is meant to be run with 2 MPI processes, not %d.\n",
16 |         size);
17 |     MPI_Abort(comm, EXIT_FAILURE);
18 |   }
19 | 
20 |   // Get my rank
21 |   int rank;
22 |   MPI_Comm_rank(comm, &rank);
23 | 
24 |   // Create the window
25 |   int *window_buffer;
26 |   MPI_Win win;
27 |   MPI_Win_allocate((MPI_Aint)4 * sizeof(int), sizeof(int), MPI_INFO_NULL, comm,
28 |                    &window_buffer, &win);
29 | 
30 |   // start access epoch
31 |   MPI_Win_fence(0, win);
32 | 
33 |   // local store
34 |   if (rank == 1) {
35 |     window_buffer[0] = 42;
36 |     window_buffer[1] = 88;
37 |     window_buffer[2] = 12;
38 |     window_buffer[3] = 3;
39 |   }
40 | 
41 |   // start access epoch
42 |   MPI_Win_fence(0, win);
43 | 
44 |   int getbuf[4];
45 |   if (rank == 0) {
46 |     // Fetch the value from the MPI process 1 window
47 |     MPI_Get(&getbuf, 4, MPI_INT, 1, 0, 4, MPI_INT, win);
48 |   }
49 | 
50 |   // end access epoch
51 |   MPI_Win_fence(0, win);
52 | 
53 |   if (rank == 0) {
54 |     printf("[MPI process 0] Value fetched from MPI process 1 window:");
55 |     for (int i = 0; i < 4; ++i) {
56 |       printf(" %d", getbuf[i]);
57 |     }
58 |     printf("\n");
59 |   }
60 | 
61 |   // Destroy the window
62 |   MPI_Win_free(&win);
63 | 
64 |   MPI_Finalize();
65 | 
66 |   return EXIT_SUCCESS;
67 | }
68 | 


--------------------------------------------------------------------------------
/content/diagrams/fork-join-with-mpi.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="app.diagrams.net" modified="2020-12-10T17:55:48.347Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" etag="KvgxHFliRE8knSeycfOb" version="13.11.0" type="github"><diagram id="CEOoAoQKqHlWKiBfO-Pc" name="Page-1">7Vtbc6IwFP41PnYHwkV8rLbbzl47053Z7r7sRIiSaSROiFX76zfUoJDYSl2Qy9qXkkM4Cd/35eQkxJ41mq1uGJyHX2mASA8YwapnXfUAMIHTF/8Sy3pj8WxvY5gyHMhKO8M9fkbSaEjrAgcozlXklBKO53mjT6MI+Txng4zRZb7ahJJ8q3M4RZrh3odEt/7EAQ/lWzjGzn6L8DRMWzYNeWcG08rSEIcwoMuMybruWSNGKd9czVYjRBLwUlw2z3185e62YwxFvMgD8afv0R25iY3H4ej2z+/nq4fP7AJIep4gWcg3lr3l6xQCFAWXCZKi5BMYx9jvWcOQz4gwmOJS74jsGwpy2Mpu3SA6Q5ytRYXlDtEU0DADZmpjiECOn/KMQEnsdOtu28IdxaInwJAidGzpR0rQ9Iy8i5gumI/kU1kEFUeuoThyFUccsinimiNxkXntnemFoPeQ5f4HZFkKxv22kuX8O1lohflD5vqXuDY+OLJ0lbypkRbWstB4goHCizP40DdL4niPr6pprnBIbrr+RuNWo5hVwuyxUbYP3vZTMaGWRugPPEMaqRyteJ7GmDP6iEaUUCYsEY1EzeEEE6KYIMHTKNGCoBsJ+/AJMY5FxnEpb8xwECTNDJch5uh+Dv2kzaXIr4SN0UUUoODQYE98otWbokjhVsZQGm8zmrH3aAYYr8sjR8l78dfD5h1DyRsmfaLsUWNiH0oZWmA83+SFE7xKcMtR0gNWAJE38TX+xB3X99B4Ug7KagoCdJS9PSir2i8NZdPUYL6FhCbBZOWHMJrqim8FzpY6u9QOdIEJohXI2o2TsB6py0Z24vnI34vs2HNsp6QIbDsKslbdyNrVIzuZgP3IBu7YddySkHUVZO26kdVnt280uiBUTP8v+KLIx6Sqmc5BXmDvw9wDY8stC/O+mpPXjXkJq+ZjFmICQrZ+yBYyTyXF3WMvpYMLuE0CfTh1OrhskIA0ZdmgTNn2sSt+VXm2mqNWvRQ06hHa8YJxCwrGbNYWgprkAXVrpy2KKRCZWpEbmuo8W3duOKg+gTlJamiqk2ndqSHQ140dnUwH7YyNnZlNveqH8EnWINuvTU1Zg6RAdn8Ie+chXOsQBgV2aLqhtP5ZabUqTT/F8KVDWyrmoGlbKgXy624M7KJbKv3zuK5iXJslHE/qltAGZ6FVIrQCy42DQitbNAfF4DVKDKYiBu3EVVvEAEoQw6ukNoQsR/3oODiSrLrP2aU7Yq2fIhoiDHvfmTpjsPvzjhzU6vmBA36rlk0ZG5knD/jpl/mmauXoiG/mHQE1j6haDaAjQeRgnmkWlprTaKlpCiksNWXiO/n50jI2xU4feJp21LiswKPK6tTZi37UqhPnMLWssLovwqK4+xHVhpfdT9Gs678=</diagram></mxfile>


--------------------------------------------------------------------------------
/content/diagrams/stencil-with-tasking.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="app.diagrams.net" modified="2020-12-10T17:51:17.503Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" etag="BiHCLuw1w3nrKwSOMY3N" version="13.11.0" type="github"><diagram id="CEOoAoQKqHlWKiBfO-Pc" name="Page-1">5Vpdc6IwFP01PnYHCCA+Vu22091uO9vt1t2XnQhBM0biBFq1v36DBJUEhVEK2j5Jbj6Ac8899ybYAr3p4prB2fiOeoi0DM1btEC/ZRi6YbX5T2xZJhbHdBLDiGFPDNoYHvEbEkZNWF+wh8LMwIhSEuFZ1ujSIEBulLFBxug8O8ynJHvXGRwhxfDoQqJan7EXjcVbWNrGfoPwaJzeWddEzxSmg4UhHEOPzrdM4KoFeozSKLmaLnqIxOCluCTzvu7oXT8YQ0FUZgIbaNbsrnP/1L18arumc/vT/H1hJau8QvIiXlg8bLRMEWD0JfBQvIjWAt35GEfocQbduHfOfc5t42hKeEvnlz4mpEcJZau5wPd9w3W5PYwYnaCtHs8e2pbNe8QDIBahxc4309d4caIhOkURW/Ih6QRbQCw4BtL2fOMxkLphvOUtR9igIMlovfQGR34hoMyHNby9Dx7IdahNur2bf3/f+oNv7MIAxbiiwLuMCcpbLoFhiN0slPzl2XIgYF81/sSNL1ba7C+2O/vLtLXA0SBdg19vzeKtzaS4kc7ZgTiPNchGKNrzproYiLxMHKm+2vKFleOK1MYQgRF+zUZfnn/EHR4o5o+8poLpZKlgapKLQ/rCXCRmbUeLvBCQFjKkhRJklIVWdFm/9hEMOp5AhzsVnJZPs56Qg7asR9vG/nXe2aGqIvzCU6Q4latglHVjVjoDGiBJZ4UJEjwKYi5wdyNu78aainkmuxQdU+x58W1yNTyr8jupo+j0TlG2tSzcbVWTzRzOyGFWmSSXyHQFuQ2Gs6TC8PEiRkpOdh5Ejp+b7GzXQUO/GlylaNANFVenzlSXJoCzB9YyTg1Zu4Ii4oBi4NDCY6cPEoUujs3CvGSfVl6SCGO2D6012g3XGtpnIZqtEi13W3RaBRCQiGbYBxJNXkiXF3pnoqmC9oMGF0NC3QkORrEDOc3OM3vIe9DGs0dHwbq7wZkhF8V8rBhq33FR/nZ/6FimVVFlKUO9LokaS9Ttz6KfZRN156T08xwT9b60dAql9lEHdobTnFjmAmsqwD5DHDueBk1npaOANrUTA1qtAL5TF5IVtChwcXw1p2yiwH3MKbRnIccz87B2jCGwqzqFlsQB5Jx41HsKras1wMfOSx8t38iUMvV663XdqYBAVZOhcPN2WmTQi/Zcpckglb2WWW/xUQUXahcTiQTHfONqTE06BSJQlkC2/EmrUy+BzjIblSHQdnraV7cXKpfZJM8suX459EObLSkeAPXyLI0P6ZiJlCs06zoCOW4DJWmCDtRas9a63vgMsb3vqKMwtnUt36M1BbcUk9ah/4uo8Qg5H8YSnxvfg2kVZoOGmaBL6nzwP2Tks4W6mdCQ5lTIhGbzvUQEQy4HyxJB+XBeGRF4c/P/yGT45l+m4Oo/</diagram></mxfile>


--------------------------------------------------------------------------------
/content/code/day-1/09_integrate-pi/pi-integration.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define PI 3.141592653589793238462643
 8 | 
 9 | int main(int argc, char *argv[]) {
10 | 
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm comm = MPI_COMM_WORLD;
13 |  
14 |     int size, rank;
15 |     MPI_Comm_size(comm, &size);
16 |     MPI_Comm_rank(comm, &rank);
17 | 
18 |     long int num_points;
19 |  
20 |     if (rank == 0) {
21 |         if (argc < 2) {
22 |             fprintf(stderr, "Usage: %s number_of_points \n", argv[0]);
23 |             MPI_Abort(comm, 1);
24 |         }
25 |         sscanf(argv[1], "%ld", &num_points);
26 |     }
27 |  
28 |     // FIXME: broadcast num_points (note that it is long int)
29 |     MPI_Bcast( ... );
30 | 
31 |     double delta_x = 1.0 / (double)(num_points);
32 | 
33 |     long int local_num_points = num_points / size;
34 |     if (num_points % size != 0) {
35 |         local_num_points += 1;
36 |     }
37 | 
38 |     // FIXME: determine the start and end on each rank
39 |     long int start = ...
40 |     long int end = ...
41 |     ...
42 | 
43 |     printf("rank %d: start=%ld, end=%ld\n", rank, start, end);
44 | 
45 |     // FIXME: compute local contribution to integration
46 |     //        \int_{0}^{1} [ 4.0 / (1.0 + x^2) ] dx
47 |     double local_pi = 0.0;
48 |     long int i;
49 |     for (i = start; i < end; i++) {
50 |         ...
51 |     }
52 |     ...
53 | 
54 |     // FIXME: compute pi using MPI_Reduce
55 |     double global_pi;
56 |     MPI_Reduce( ... );
57 | 
58 |     if (rank == 0) {
59 |         printf("pi = %22.12f (error: %.3e)\n", global_pi, fabs(global_pi - PI));
60 |     }
61 | 
62 |     MPI_Finalize();
63 |  
64 |     return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/content/code/day-1/09_integrate-pi/solution/pi-integration.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define PI 3.141592653589793238462643
 8 | 
 9 | int main(int argc, char *argv[]) {
10 | 
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm comm = MPI_COMM_WORLD;
13 |  
14 |     int size, rank;
15 |     MPI_Comm_size(comm, &size);
16 |     MPI_Comm_rank(comm, &rank);
17 | 
18 |     long int num_points;
19 |  
20 |     if (rank == 0) {
21 |         if (argc < 2) {
22 |             fprintf(stderr, "Usage: %s number_of_points \n", argv[0]);
23 |             MPI_Abort(comm, 1);
24 |         }
25 |         sscanf(argv[1], "%ld", &num_points);
26 |     }
27 |  
28 |     MPI_Bcast(&num_points, 1, MPI_LONG_INT, 0, comm);
29 | 
30 |     double delta_x = 1.0 / (double)(num_points);
31 | 
32 |     long int local_num_points = num_points / size;
33 |     if (num_points % size != 0) {
34 |         local_num_points += 1;
35 |     }
36 | 
37 |     long int start = local_num_points * rank;
38 |     long int end = local_num_points * (rank + 1);
39 |     if (end > num_points) {
40 |         end = num_points;
41 |     }
42 | 
43 |     printf("rank %d: start=%ld, end=%ld\n", rank, start, end);
44 | 
45 |     double local_pi = 0.0;
46 |     long int i;
47 |     for (i = start; i < end; i++) {
48 |         double x = delta_x * ((double)(i) + 0.5);
49 |         local_pi += 1.0 / (1.0 + x * x);
50 |     }
51 |     local_pi *= 4.0 * delta_x;
52 | 
53 |     double global_pi;
54 |     MPI_Reduce(&local_pi, &global_pi, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
55 | 
56 |     if (rank == 0) {
57 |         printf("pi = %22.12f (error: %.3e)\n", global_pi, fabs(global_pi - PI));
58 |     }
59 | 
60 |     MPI_Finalize();
61 |  
62 |     return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/content/code/day-4/03_rma-fence/solution/rma-fence.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/fence_c_solution */
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define BUFSZ 10
 8 | 
 9 | int main(int argc, char **argv) {
10 |   MPI_Init(&argc, &argv);
11 | 
12 |   MPI_Comm comm = MPI_COMM_WORLD;
13 | 
14 |   int rank;
15 |   MPI_Comm_rank(comm, &rank);
16 | 
17 |   // initialize buffer to 0
18 |   int buf[BUFSZ] = {0};
19 | 
20 |   // only on rank 0 the elements of buf are different from zero
21 |   if (rank == 0) {
22 |     for (int i = 0; i < BUFSZ; ++i) {
23 |       buf[i] = (i + 1) * 3;
24 |     }
25 |   }
26 | 
27 |   MPI_Win win;
28 |   // create a window and attach it to buf
29 |   MPI_Win_create(buf, sizeof(int) * BUFSZ, sizeof(int), MPI_INFO_NULL, comm,
30 |                  &win);
31 | 
32 |   printf("Before Get on %d:\n  [", rank);
33 |   for (int i = 0; i < BUFSZ; ++i) {
34 |     printf(" %d, ", buf[i]);
35 |   }
36 |   printf("]\n");
37 | 
38 |   // initialize access epoch
39 |   // assert that there were no local RMA operations prior to this epoch
40 |   MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
41 | 
42 |   // ranks > 0 originate MPI_Get with rank 0 as target
43 |   if (rank != 0) {
44 |     MPI_Get(buf, BUFSZ, MPI_INT, 0, 0, BUFSZ, MPI_INT, win);
45 |   }
46 | 
47 |   // finalize the access epoch
48 |   // this blocks until the MPI_Get is complete
49 |   // assert that there will be no further local RMA operations
50 |   MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
51 | 
52 |   printf("After Get on %d:\n  [", rank);
53 |   for (int i = 0; i < BUFSZ; ++i) {
54 |     printf(" %d, ", buf[i]);
55 |   }
56 |   printf("]\n");
57 | 
58 |   /* Free up our window */
59 |   MPI_Win_free(&win);
60 | 
61 |   MPI_Finalize();
62 | 
63 |   return 0;
64 | }
65 | 


--------------------------------------------------------------------------------
/content/code/day-3/02_rma-win-allocate/rma-win-allocate.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[]) {
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int size;
12 |   MPI_Comm_size(comm, &size);
13 |   if (size != 2) {
14 |     printf(
15 |         "This application is meant to be run with 2 MPI processes, not %d.\n",
16 |         size);
17 |     MPI_Abort(comm, EXIT_FAILURE);
18 |   }
19 | 
20 |   // Get my rank
21 |   int rank;
22 |   MPI_Comm_rank(comm, &rank);
23 | 
24 |   /* FIXME create the window */
25 |   // Create the window
26 |   int *window_buffer;
27 |   MPI_Win win;
28 |   MPI_Win_allocate(/* MPI_Aint size */,
29 |                    /* int disp_unit */,
30 |                    /* MPI_Info info */,
31 |                    /* MPI_Comm comm */,
32 |                    /* void *baseptr */,
33 |                    /* MPI_Win *win */)
34 | 
35 |   // start access epoch
36 |   MPI_Win_fence(0, win);
37 | 
38 |   // local store
39 |   if (rank == 1) {
40 |     window_buffer[0] = 42;
41 |     window_buffer[1] = 88;
42 |     window_buffer[2] = 12;
43 |     window_buffer[3] = 3;
44 |   }
45 | 
46 |   // start access epoch
47 |   MPI_Win_fence(0, win);
48 | 
49 |   int getbuf[4];
50 |   if (rank == 0) {
51 |     // Fetch the value from the MPI process 1 window
52 |     MPI_Get(&getbuf, 4, MPI_INT, 1, 0, 4, MPI_INT, win);
53 |   }
54 | 
55 |   // end access epoch
56 |   MPI_Win_fence(0, win);
57 | 
58 |   if (rank == 0) {
59 |     printf("[MPI process 0] Value fetched from MPI process 1 window:");
60 |     for (int i = 0; i < 4; ++i) {
61 |       printf(" %d", getbuf[i]);
62 |     }
63 |     printf("\n");
64 |   }
65 | 
66 |   // Destroy the window
67 |   MPI_Win_free(&win);
68 | 
69 |   MPI_Finalize();
70 | 
71 |   return EXIT_SUCCESS;
72 | }
73 | 


--------------------------------------------------------------------------------
/content/code/day-1/05_pokemon-pack-unpack/pokemon-pack-unpack.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define STRLEN 25
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |   int rank;
11 |   int size;
12 |   /* TODO explain what the purpose of position is */
13 |   int position;
14 | 
15 |   // name of pokemon attacking
16 |   char name[STRLEN];
17 |   // life points
18 |   double life_points;
19 |   // damage done by the attack
20 |   int damage;
21 |   // strength multiplier
22 |   double multiplier;
23 | 
24 |   // buffer for the message
25 |   // we set it to BUFSIZ (defined in stdio.h) assuming that's big enough!
26 |   char message[BUFSIZ];
27 | 
28 |   MPI_Init(&argc, &argv);
29 | 
30 |   MPI_Comm comm = MPI_COMM_WORLD;
31 | 
32 |   MPI_Comm_size(comm, &size);
33 |   MPI_Comm_rank(comm, &rank);
34 | 
35 |   if (rank == 0) {
36 |     /* FIXME define the data for the move in the game */
37 | 
38 |     /* FIXME pack the data into the message buffer with calls to MPI_Pack */
39 | 
40 |     // let it be known that a move was made!
41 |     /* FIXME set the MPI_Datatype parameter to the correct one for a packed message */
42 |     MPI_Bcast(message, BUFSIZ, xxx, 0, comm);
43 |   } else {
44 |     // matching broadcast on all other processes.
45 |     /* FIXME set the MPI_Datatype parameter to the correct one for a packed message */
46 |     MPI_Bcast(message, BUFSIZ, xxx, 0, comm);
47 | 
48 |     /* FIXME unpack the data from the message buffer with calls to MPI_Unpack */
49 |     position = 0;
50 | 
51 |     // did we get it right?
52 |     printf("rank %d:\n", rank);
53 |     printf("  name = %s\n", name);
54 |     printf("  life_points = %2.2f\n", life_points);
55 |     printf("  damage = %d\n", damage);
56 |     printf("  multiplier = %2.2f\n", multiplier);
57 |   }
58 | 
59 |   MPI_Finalize();
60 | 
61 |   return EXIT_SUCCESS;
62 | }
63 | 


--------------------------------------------------------------------------------
/content/code/day-1/07_pokemon-type-create-struct/pokemon-type-create-struct.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define STRLEN 25
 8 | 
 9 | /* FIXME Declare a Pokemon struct containing name, life_points, damage, and multiplier fields */
10 | struct Pokemon {
11 |   // name of pokemon attacking
12 |   // life points
13 |   // damage done by the attack
14 |   // strength multiplier
15 | };
16 | 
17 | int main(int argc, char *argv[]) {
18 |   int rank;
19 |   int size;
20 | 
21 |   struct Pokemon charizard;
22 | 
23 |   MPI_Init(&argc, &argv);
24 | 
25 |   MPI_Comm comm = MPI_COMM_WORLD;
26 | 
27 |   MPI_Comm_size(comm, &size);
28 |   MPI_Comm_rank(comm, &rank);
29 | 
30 |   /* FIXME What is the typesignature for Pokemon */
31 |   MPI_Datatype typesig ..;
32 |   /* FIXME What are the block lenghts? */
33 |   int block_lengths ..;
34 | 
35 |   /* FIXME What are the displacements? Remember, they are relative with respect to the first field! */
36 |   MPI_Aint base_address, displacements[4];
37 | 
38 |   MPI_Get_address(.., ..);
39 | 
40 |   /* FIXME Create the datatype */
41 |   MPI_Datatype mpi_pokemon;
42 | 
43 |   /* FIXME Commit the type */
44 | 
45 |   if (rank == 0) {
46 |     sprintf(charizard.name, "Charizard");
47 |     charizard.life_points = 180.0;
48 |     charizard.damage = 60;
49 |     charizard.multiplier = 0.89;
50 | 
51 |     MPI_Bcast(&charizard, 1, mpi_pokemon, 0, comm);
52 |   } else {
53 |     // matching broadcast on all other processes.
54 |     MPI_Bcast(&charizard, 1, mpi_pokemon, 0, comm);
55 | 
56 |     // did we get it right?
57 |     printf("rank %d:\n", rank);
58 |     printf("  pokemon = %s\n", charizard.name);
59 |     printf("  life_points = %2.2f\n", charizard.life_points);
60 |     printf("  damage = %d\n", charizard.damage);
61 |     printf("  multiplier = %2.2f\n", charizard.multiplier);
62 |   }
63 | 
64 |   /* FIXME free the type */
65 | 
66 |   MPI_Finalize();
67 | 
68 |   return EXIT_SUCCESS;
69 | }
70 | 


--------------------------------------------------------------------------------
/content/code/day-1/04_struct-extent-size/solution/struct-extent-size.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | struct Pair {
 7 |   int first;
 8 |   char second;
 9 | };
10 | 
11 | int main(int argc, char *argv[]) {
12 |   // designated initilization
13 |   struct Pair my_pair = {.first = 5, .second = 'G'};
14 | 
15 |   // initialize MPI
16 |   MPI_Init(&argc, &argv);
17 | 
18 |   // build up the typemap for Pair
19 |   // the type signature for Pair
20 |   MPI_Datatype typesig[2] = {MPI_INT, MPI_CHAR};
21 |   // how many of each type in a "block" of Pair
22 |   int block_lengths[2] = {1, 1};
23 |   // displacements of data members in Pair
24 |   MPI_Aint displacements[2];
25 |   // why not use pointer arithmetic directly?
26 |   MPI_Get_address(&my_pair.first, &displacements[0]);
27 |   MPI_Get_address(&my_pair.second, &displacements[1]);
28 | 
29 |   // create and commit the new type
30 |   MPI_Datatype mpi_pair;
31 |   MPI_Type_create_struct(2, block_lengths, displacements, typesig, &mpi_pair);
32 |   MPI_Type_commit(&mpi_pair);
33 | 
34 |   MPI_Aint lb, extent;
35 |   int size;
36 | 
37 |   // char
38 |   printf("sizeof(char) = %ld\n", sizeof(char));
39 |   MPI_Type_get_extent(MPI_CHAR, &lb, &extent);
40 |   MPI_Type_size(MPI_CHAR, &size);
41 |   printf("For MPI_CHAR:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
42 |          extent, size);
43 | 
44 |   // int
45 |   printf("sizeof(int) = %ld\n", sizeof(int));
46 |   MPI_Type_get_extent(MPI_INT, &lb, &extent);
47 |   MPI_Type_size(MPI_INT, &size);
48 |   printf("For MPI_INT:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
49 |          extent, size);
50 | 
51 |   // Pair
52 |   printf("sizeof(my_pair) = %ld\n", sizeof(my_pair));
53 |   MPI_Type_get_extent(mpi_pair, &lb, &extent);
54 |   MPI_Type_size(mpi_pair, &size);
55 |   printf("For mpi_pair:\n  lowerbound = %ld; extent = %ld; size = %d\n", lb,
56 |          extent, size);
57 | 
58 |   MPI_Type_free(&mpi_pair);
59 | 
60 |   MPI_Finalize();
61 | 
62 |   return EXIT_SUCCESS;
63 | }
64 | 


--------------------------------------------------------------------------------
/content/code/day-2/01_allgather/solution/collective-communication-allgather.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |     /* Initialize the MPI environment and report */
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |     int rank, size;
13 |     MPI_Comm_rank(comm, &rank);
14 |     MPI_Comm_size(comm, &size);
15 | 
16 |     /* Prepare the values to all-gather */
17 |     int initial_values[3] = { 100, -1000, 3 };
18 | 
19 |     int values_to_all_gather[3];
20 |     values_to_all_gather[0] = initial_values[0] * (rank + 1);
21 |     values_to_all_gather[1] = initial_values[1] * (rank + 1);
22 |     values_to_all_gather[2] = initial_values[2] * (rank + 1);
23 | 
24 |     /* Report the state before the all-gather */
25 |     printf("On rank %d, pre-all-gather values were [%d, %d, %d]\n", rank,
26 |             values_to_all_gather[0],
27 |             values_to_all_gather[1],
28 |             values_to_all_gather[2]);
29 | 
30 |     /* Do the all-gather */
31 |     int result_values[12];
32 |     MPI_Allgather(values_to_all_gather, 3, MPI_INT,
33 |                   &result_values, 3, MPI_INT,
34 |                   comm);
35 | 
36 |     /* Report the state after the all-gather */
37 |     printf("On rank %d, all-gathered values were:\n  [", rank);
38 |     for(int i = 0; i < 11; i = i + 1)
39 |     {
40 |         printf("%d, ", result_values[i]);
41 |     }
42 |     printf("%d]\n", result_values[11]);
43 | 
44 |     /* Report whether the code is correct */
45 |     int success = 1;
46 |     for(int i = 0; i < 12; i = i + 1)
47 |     {
48 |         int expected_value = initial_values[i % 3] * ((i / 3) + 1);
49 |         success = success && (result_values[i] == expected_value);
50 |     }
51 | 
52 |     if (success)
53 |     {
54 |         printf("SUCCESS on rank %d!\n", rank);
55 |     }
56 |     else
57 |     {
58 |         printf("Improvement needed before rank %d can report success!\n", rank);
59 |     }
60 | 
61 |     /* Clean up and exit */
62 |     MPI_Finalize();
63 | 
64 |     return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/content/code/day-2/02_alltoall/solution/collective-communication-alltoall.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |     /* Initialize the MPI environment and report */
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |     int rank, size;
13 |     MPI_Comm_rank(comm, &rank);
14 |     MPI_Comm_size(comm, &size);
15 | 
16 |     /* Prepare the values to all-to-all */
17 |     int initial_values[3] = { 100, -1000, 3 };
18 | 
19 |     int values_to_all_to_all[12];
20 |     for(int i = 0; i < 12; i = i + 1)
21 |     {
22 |         values_to_all_to_all[i] = initial_values[i % 3] * (rank + 1) + ((i / 3) + 1);
23 |     }
24 | 
25 |     /* Report the state before the all-to-all */
26 |     printf("On rank %d, values before the all-to-all were:\n  [", rank);
27 |     for(int i = 0; i < 11; i = i + 1)
28 |     {
29 |         printf("%d, ", values_to_all_to_all[i]);
30 |     }
31 |     printf("%d]\n", values_to_all_to_all[11]);
32 | 
33 |     /* Do the all-to-all */
34 |     int result_values[12];
35 |     MPI_Alltoall(values_to_all_to_all, 3, MPI_INT,
36 |                  &result_values, 3, MPI_INT,
37 |                  comm);
38 | 
39 |     /* Report the state after the all-to-all */
40 |     printf("On rank %d, values after the all-to-all were:\n  [", rank);
41 |     for(int i = 0; i < 11; i = i + 1)
42 |     {
43 |         printf("%d, ", result_values[i]);
44 |     }
45 |     printf("%d]\n", result_values[11]);
46 | 
47 |     /* Report whether the code is correct */
48 |     int success = 1;
49 |     for(int i = 0; i < 12; i = i + 1)
50 |     {
51 |         int expected_value = initial_values[i % 3] * ((i / 3) + 1) + (rank + 1);
52 |         success = success && (result_values[i] == expected_value);
53 |     }
54 | 
55 |     if (success)
56 |     {
57 |         printf("SUCCESS on rank %d!\n", rank);
58 |     }
59 |     else
60 |     {
61 |         printf("Improvement needed before rank %d can report success!\n", rank);
62 |     }
63 | 
64 |     /* Clean up and exit */
65 |     MPI_Finalize();
66 | 
67 |     return 0;
68 | }
69 | 


--------------------------------------------------------------------------------
/content/code/day-2/01_allgather/collective-communication-allgather.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |     /* Initialize the MPI environment and report */
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |     int rank, size;
13 |     MPI_Comm_rank(comm, &rank);
14 |     MPI_Comm_size(comm, &size);
15 | 
16 |     /* Prepare the values to all-gather */
17 |     int initial_values[3] = { 100, -1000, 3 };
18 | 
19 |     int values_to_all_gather[3];
20 |     values_to_all_gather[0] = initial_values[0] * (rank + 1);
21 |     values_to_all_gather[1] = initial_values[1] * (rank + 1);
22 |     values_to_all_gather[2] = initial_values[2] * (rank + 1);
23 | 
24 |     /* Report the state before the all-gather */
25 |     printf("On rank %d, pre-all-gather values were [%d, %d, %d]\n", rank,
26 |             values_to_all_gather[0],
27 |             values_to_all_gather[1],
28 |             values_to_all_gather[2]);
29 | 
30 |     /* ==== CHALLENGE ====
31 |      *
32 |      * Uncomment and fix the MPI call to make this code work!
33 |      * We want the values_to_all_gather to end up in
34 |      * result_values on all ranks.
35 |      */
36 |     /* Do the all-gather */
37 |     int result_values[12];
38 |     /* MPI_xxx(xxx); */
39 | 
40 |     /* Report the state after the all-gather */
41 |     printf("On rank %d, all-gathered values were:\n  [", rank);
42 |     for(int i = 0; i < 11; i = i + 1)
43 |     {
44 |         printf("%d, ", result_values[i]);
45 |     }
46 |     printf("%d]\n", result_values[11]);
47 | 
48 |     /* Report whether the code is correct */
49 |     int success = 1;
50 |     for(int i = 0; i < 12; i = i + 1)
51 |     {
52 |         int expected_value = initial_values[i % 3] * ((i / 3) + 1);
53 |         success = success && (result_values[i] == expected_value);
54 |     }
55 | 
56 |     if (success)
57 |     {
58 |         printf("SUCCESS on rank %d!\n", rank);
59 |     }
60 |     else
61 |     {
62 |         printf("Improvement needed before rank %d can report success!\n", rank);
63 |     }
64 | 
65 |     /* Clean up and exit */
66 |     MPI_Finalize();
67 | 
68 |     return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/content/code/day-2/02_alltoall/collective-communication-alltoall.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char **argv)
 6 | {
 7 |     /* Initialize the MPI environment and report */
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |     int rank, size;
13 |     MPI_Comm_rank(comm, &rank);
14 |     MPI_Comm_size(comm, &size);
15 | 
16 |     /* Prepare the values to all-to-all */
17 |     int initial_values[3] = { 100, -1000, 3 };
18 | 
19 |     int values_to_all_to_all[12];
20 |     for(int i = 0; i < 12; i = i + 1)
21 |     {
22 |         values_to_all_to_all[i] = initial_values[i % 3] * (rank + 1) + ((i / 3) + 1);
23 |     }
24 | 
25 |     /* Report the state before the all-to-all */
26 |     printf("On rank %d, values before the all-to-all were:\n  [", rank);
27 |     for(int i = 0; i < 11; i = i + 1)
28 |     {
29 |         printf("%d, ", values_to_all_to_all[i]);
30 |     }
31 |     printf("%d]\n", values_to_all_to_all[11]);
32 | 
33 |     /* ==== CHALLENGE ====
34 |      *
35 |      * Uncomment and fix the MPI call to make this code work!
36 |      * We want the values_to_all_to_all to end up in
37 |      * result_values on all ranks.
38 |      */
39 |     /* Do the all-to-all */
40 |     int result_values[12];
41 |     /* MPI_xxx(xxx); */
42 | 
43 |     /* Report the state after the all-to-all */
44 |     printf("On rank %d, values after the all-to-all were:\n  [", rank);
45 |     for(int i = 0; i < 11; i = i + 1)
46 |     {
47 |         printf("%d, ", result_values[i]);
48 |     }
49 |     printf("%d]\n", result_values[11]);
50 | 
51 |     /* Report whether the code is correct */
52 |     int success = 1;
53 |     for(int i = 0; i < 12; i = i + 1)
54 |     {
55 |         int expected_value = initial_values[i % 3] * ((i / 3) + 1) + (rank + 1);
56 |         success = success && (result_values[i] == expected_value);
57 |     }
58 | 
59 |     if (success)
60 |     {
61 |         printf("SUCCESS on rank %d!\n", rank);
62 |     }
63 |     else
64 |     {
65 |         printf("Improvement needed before rank %d can report success!\n", rank);
66 |     }
67 | 
68 |     /* Clean up and exit */
69 |     MPI_Finalize();
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/content/code/day-2/00_scatter-and-gather/solution/scatter-and-gather-2.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 |     /* Initialize the MPI environment and report */
 9 | 
10 |     MPI_Init(&argc, &argv);
11 | 
12 |     MPI_Comm comm = MPI_COMM_WORLD;
13 | 
14 |     int rank, size;
15 |     MPI_Comm_rank(comm, &rank);
16 |     MPI_Comm_size(comm, &size);
17 | 
18 |     /* Prepare the data */
19 | 
20 |     int* matrix;
21 |     int i, j;
22 | 
23 |     if (rank == 0)
24 |     {
25 |         matrix = (int*)(malloc(sizeof(int) * size * size));
26 |         for (i = 0; i < size; i++)
27 |         {
28 |             for (j = 0; j < size; j++)
29 |             {
30 |                 matrix[i * size + j] = i + j;
31 |             }
32 |         }
33 |     }
34 | 
35 |     /* Do the scatter */
36 | 
37 |     int* vector = (int*)(malloc(sizeof(int) * size));
38 | 
39 |     MPI_Scatter(matrix, size, MPI_INT, vector, size, MPI_INT, 0, comm);
40 | 
41 |     /* Do the gather */
42 | 
43 |     int* matrix_2;
44 | 
45 |     if (rank == 0)
46 |     {
47 |         matrix_2 = (int*)(malloc(sizeof(int) * size * size));
48 |     }
49 | 
50 |     MPI_Gather(vector, size, MPI_INT, matrix_2, size, MPI_INT, 0, comm);
51 | 
52 |     /* Check the result */
53 | 
54 |     int success = 1;
55 |     for (i = 0; i < size; i++)
56 |     {
57 |         if (vector[i] != i + rank)
58 |         {
59 |             success = 0;
60 |         }
61 |     }
62 | 
63 |     if (rank == 0)
64 |     {
65 |         for (i = 0; i < size; i++)
66 |         {
67 |             for (j = 0; j < size; j++)
68 |             {
69 |                 if (matrix_2[i * size + j] != i + j)
70 |                 {
71 |                     success = 0;
72 |                 }
73 |             }
74 |         }
75 |     }
76 | 
77 |     if (success)
78 |     {
79 |         printf("SUCCESS on rank %d!\n", rank);
80 |     }
81 |     else
82 |     {
83 |         printf("Improvement needed before rank %d can report success!\n", rank);
84 |     }
85 | 
86 |     /* Clean up and exit */
87 | 
88 |     if (rank == 0)
89 |     {
90 |         free(matrix);
91 |         free(matrix_2);
92 |     }
93 |     free(vector);
94 | 
95 |     MPI_Finalize();
96 | 
97 |     return 0;
98 | }
99 | 


--------------------------------------------------------------------------------
/content/_static/overrides.css:
--------------------------------------------------------------------------------
 1 | /* override colors in sphinx_lesson.css with the schemes here: https://personal.sron.nl/~pault/#sec:qualitative */
 2 | 
 3 | /* instructor-note */
 4 | .rst-content .instructor-note {
 5 |     background: #DDDDDD;
 6 | }
 7 | .rst-content .instructor-note > .admonition-title {
 8 |     background: #BBBBBB;
 9 | }
10 | .rst-content .instructor-note > .admonition-title::before {
11 |     content: "";
12 | }
13 | 
14 | /* callout */
15 | .rst-content .callout {
16 |     background: #EEEEBB;
17 | }
18 | .rst-content .callout > .admonition-title {
19 |     background: #BBCC33;
20 | }
21 | 
22 | /* questions */
23 | .rst-content .questions {
24 |     background: rgba(253, 219, 199, 0.3);
25 | }
26 | .rst-content .questions > .admonition-title {
27 |     background: rgba(204, 51, 17, 0.5);
28 | }
29 | 
30 | /* discussion */
31 | .rst-content .discussion {
32 |     background: rgba(231, 212, 232 0.3);
33 | }
34 | .rst-content .discussion > .admonition-title {
35 |     background: rgba(194, 165, 207, 0.5);
36 | }
37 | 
38 | /* signature */
39 | .rst-content .signature {
40 |     background: rgba(217, 240, 211, 0.3);
41 | }
42 | .rst-content .signature > .admonition-title {
43 |     background: rgba(172, 211, 158, 0.5);
44 | }
45 | .rst-content .signature > .admonition-title::before {
46 |     content: "\01F527";
47 | }
48 | 
49 | /* parameters */
50 | .rst-content .parameters {
51 |     background: rgba(217, 240, 211, 0.0);
52 | }
53 | .rst-content .parameters > .admonition-title {
54 |     background: rgba(172, 211, 158, 0.5);
55 | }
56 | .rst-content .parameters > .admonition-title::before {
57 |     content: "\01F4BB";
58 | }
59 | 
60 | /* typealong */
61 | .rst-content .typealong {
62 |     background: rgba(221, 221, 221, 0.3);
63 | }
64 | .rst-content .typealong > .admonition-title {
65 |     background: rgba(187, 187, 187, 1.0);
66 | }
67 | .rst-content .typealong > .admonition-title::before {
68 |     content: "\02328";
69 | }
70 | 
71 | /* Equation numbers to the right */
72 | .math {
73 |     text-align: left;
74 | }
75 | .eqno {
76 |     float: right;
77 | }
78 | 
79 | /* Colors and text decoration.
80 |  * For example, :black:`text in black` or :blink:`text blinking` in rST.
81 |  */
82 | 
83 | .red {
84 |     color: red;
85 | }
86 | 
87 | .blue {
88 |     color: blue;
89 | }
90 | 


--------------------------------------------------------------------------------
/content/code/day-1/08_broadcast/solution/collective-communication-broadcast.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char **argv) {
 6 |   /* Initialize the MPI environment and report */
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int rank, size;
12 |   MPI_Comm_rank(comm, &rank);
13 |   MPI_Comm_size(comm, &size);
14 | 
15 |   const int rank_of_root = 0;
16 | 
17 |   /* Prepare the values to broadcast */
18 |   int expected_values[2] = {100, -1000};
19 | 
20 |   int values_to_broadcast[2] = {0, 0};
21 |   if (rank == rank_of_root) {
22 |     values_to_broadcast[0] = expected_values[0];
23 |     values_to_broadcast[1] = expected_values[1];
24 |   }
25 | 
26 |   /* Report the state before the broadcast */
27 |   printf("On rank %d, pre-broadcast values were [%d, %d]\n", rank,
28 |          values_to_broadcast[0], values_to_broadcast[1]);
29 | 
30 |   /* Do the broadcast */
31 |   MPI_Bcast(values_to_broadcast, 2, MPI_INT, rank_of_root, comm);
32 | 
33 |   /* Report the state after the broadcast */
34 |   printf("On rank %d, broadcast values were [%d, %d]\n", rank,
35 |          values_to_broadcast[0], values_to_broadcast[1]);
36 | 
37 |   int reduced_values[2];
38 |   MPI_Reduce(values_to_broadcast, reduced_values, 2, MPI_INT, MPI_SUM,
39 |              rank_of_root, comm);
40 | 
41 |   /* Report the state after the reduction */
42 |   if (rank == rank_of_root) {
43 |     printf("On rank %d, reduced values were [%d, %d]\n", rank,
44 |            reduced_values[0], reduced_values[1]);
45 |   }
46 | 
47 |   /* Report whether the code is correct */
48 |   int success = ((values_to_broadcast[0] == expected_values[0]) &&
49 |                  (values_to_broadcast[1] == expected_values[1]));
50 | 
51 |   /* Success on the root rank also means checking the reduction */
52 |   if (rank == rank_of_root) {
53 |     success = success && ((reduced_values[0] == expected_values[0] * size) &&
54 |                           (reduced_values[1] == expected_values[1] * size));
55 |   }
56 | 
57 |   if (success) {
58 |     printf("SUCCESS on rank %d!\n", rank);
59 |   } else {
60 |     printf("Improvement needed before rank %d can report success!\n", rank);
61 |   }
62 | 
63 |   /* Clean up and exit */
64 |   MPI_Finalize();
65 | 
66 |   return 0;
67 | }
68 | 


--------------------------------------------------------------------------------
/content/code/day-4/04_rma-pscw/rma-pswc.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/pscw_c_solution */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | #include <mpi.h>
 7 | 
 8 | int main(int argc, char **argv) {
 9 |   MPI_Init(&argc, &argv);
10 | 
11 |   MPI_Comm comm = MPI_COMM_WORLD;
12 | 
13 |   int size;
14 |   MPI_Comm_size(comm, &size);
15 | 
16 |   int rank;
17 |   MPI_Comm_rank(comm, &rank);
18 | 
19 |   // creat a buffer of size equal to the size of the communicator
20 |   double *buf = (double *)(malloc(sizeof(double) * size));
21 |   // save ranks of each process, used later on to create groups
22 |   int *ranks = (int *)(malloc(sizeof(int) * size));
23 |   for (int i = 0; i < size; ++i) {
24 |     buf[i] = rank * 11;
25 |     ranks[i] = i;
26 |   }
27 | 
28 |   if (rank == 0) {
29 |     printf("on process %d:\n  [", rank);
30 |     for (int i = 0; i < size; ++i) {
31 |       printf(" %f, ", buf[i]);
32 |     }
33 |     printf("]\n");
34 |   }
35 | 
36 |   MPI_Win win;
37 |   // FIXME create new window and attach it to buf
38 |   MPI_xxx(/*  */);
39 | 
40 |   // Post/Start/Wait/Complete works with process groups
41 |   // Perform the necessary shenanigans
42 |   // processes with ranks > 0 are origin processes for RMA calls
43 |   // process with rank = 0 is the target process for RMA calls
44 |   MPI_Group comm_group;
45 |   // FIXME get comm_group
46 |   MPI_xxx(/*  */);
47 | 
48 |   MPI_Group group;
49 |   if (rank == 0) {
50 |     // FIXME Origin group consists of all ranks > 0
51 |     MPI_xxx(/*  */);
52 |     // FIXME initialize the exposure epoch
53 |     MPI_xxx(/*  */);
54 |     // FIXME wait for exposure epoch to finish
55 |     MPI_xxx(/*  */);
56 |   } else {
57 |     // FIXME Target group consists of rank 0
58 |     MPI_xxx(/*  */);
59 |     // FIXME initialize the access epoch
60 |     MPI_xxx(/*  */);
61 |     // FIXME put element at index rank into buffer on rank 0
62 |     MPI_xxx(/*  */);
63 |     // FIXME Terminate the access epoch
64 |     MPI_xxx(/*  */);
65 |   }
66 | 
67 |   if (rank == 0) {
68 |     printf("on process %d:\n  [", rank);
69 |     for (int i = 0; i < size; ++i) {
70 |       printf(" %f, ", buf[i]);
71 |     }
72 |     printf("]\n");
73 |   }
74 | 
75 |   // FIXME free window and groups
76 |   MPI_xxx(/*  */);
77 | 
78 |   MPI_Finalize();
79 | 
80 |   return 0;
81 | }
82 | 


--------------------------------------------------------------------------------
/content/code/day-2/00_scatter-and-gather/scatter-and-gather-2.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include <mpi.h>
  5 | 
  6 | int main(int argc, char *argv[])
  7 | {
  8 |     /* Initialize the MPI environment and report */
  9 | 
 10 |     MPI_Init(&argc, &argv);
 11 | 
 12 |     MPI_Comm comm = MPI_COMM_WORLD;
 13 | 
 14 |     int rank, size;
 15 |     MPI_Comm_rank( ... );
 16 |     MPI_Comm_size( ... );
 17 | 
 18 |     /* Prepare the matrix and vector */
 19 | 
 20 |     int* matrix;
 21 |     int i, j;
 22 | 
 23 |     if (rank == 0)
 24 |     {
 25 |         matrix = (int*)(malloc(sizeof(int) * size * size));
 26 | 
 27 |         /* FIXME: assign values for elements in matrix */
 28 |         ...
 29 |     }
 30 | 
 31 |     /* Do the scatter */
 32 | 
 33 |     int* vector = (int*)(malloc(sizeof(int) * size));
 34 | 
 35 |     /* FIXME: complete MPI_Scatter */
 36 |     MPI_Scatter( ... );
 37 | 
 38 |     /* Do the gather */
 39 | 
 40 |     int* matrix_2;
 41 | 
 42 |     if (rank == 0)
 43 |     {
 44 |         matrix_2 = (int*)(malloc(sizeof(int) * size * size));
 45 |     }
 46 | 
 47 |     /* FIXME: complete MPI_Gather */
 48 |     MPI_Gather( ... );
 49 | 
 50 |     /* Check the result */
 51 | 
 52 |     int success = 1;
 53 |     for (i = 0; i < size; i++)
 54 |     {
 55 |         /* FIXME: check the values in vector */
 56 |         if (vector[i] != ...)
 57 |         {
 58 |             success = 0;
 59 |         }
 60 |     }
 61 | 
 62 |     if (rank == 0)
 63 |     {
 64 |         for (i = 0; i < size; i++)
 65 |         {
 66 |             for (j = 0; j < size; j++)
 67 |             {
 68 |                 /* FIXME: check the values in matrix_2 */
 69 |                 if (matrix_2[i * size + j] != ...)
 70 |                 {
 71 |                     success = 0;
 72 |                 }
 73 |             }
 74 |         }
 75 |     }
 76 | 
 77 |     if (success)
 78 |     {
 79 |         printf("SUCCESS on rank %d!\n", rank);
 80 |     }
 81 |     else
 82 |     {
 83 |         printf("Improvement needed before rank %d can report success!\n", rank);
 84 |     }
 85 | 
 86 |     /* Clean up and exit */
 87 | 
 88 |     /* FIXME: complete the code for cleaning up */
 89 |     if (rank == 0)
 90 |     {
 91 |         free( ... );
 92 |         free( ... );
 93 |     }
 94 |     free( ... );
 95 | 
 96 |     MPI_...();
 97 | 
 98 |     return 0;
 99 | }
100 | 


--------------------------------------------------------------------------------
/content/code/day-4/10_integrate-pi/pi-integration.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define PI 3.141592653589793238462643
 8 | 
 9 | int main(int argc, char *argv[]) {
10 | 
11 |     /* ==== CHALLENGE ====
12 |      *
13 |      * Uncomment the line and fix the MPI call to make this code work!
14 |      * We want to use fork-join parallelism, so pick a more suitable
15 |      * threading mode for OpenMP parallel regions */
16 |     /* Initialize the MPI environment and check */
17 |     /* required = FIXME */
18 |     /* MPI_xxx(xxx) FIXME */
19 |     MPI_Comm comm = MPI_COMM_WORLD;
20 |  
21 |     /* If the program can't run, stop running */
22 |     if (/* FIXME */)
23 |     {
24 |         printf("Sorry, the MPI library does not provide "
25 |                "this threading level! Aborting!\n");
26 |         MPI_Abort(comm, 1);
27 |     }
28 | 
29 |     int size, rank;
30 |     MPI_Comm_size(comm, &size);
31 |     MPI_Comm_rank(comm, &rank);
32 | 
33 |     long int num_points;
34 |  
35 |     if (rank == 0) {
36 |         if (argc < 2) {
37 |             fprintf(stderr, "Usage: %s number_of_points \n", argv[0]);
38 |             MPI_Abort(comm, 1);
39 |         }
40 |         sscanf(argv[1], "%ld", &num_points);
41 |     }
42 |  
43 |     MPI_Bcast(&num_points, 1, MPI_LONG_INT, 0, comm);
44 | 
45 |     double delta_x = 1.0 / (double)(num_points);
46 | 
47 |     long int local_num_points = num_points / size;
48 |     if (num_points % size != 0) {
49 |         local_num_points += 1;
50 |     }
51 | 
52 |     long int start = local_num_points * rank;
53 |     long int end = local_num_points * (rank + 1);
54 |     if (end > num_points) {
55 |         end = num_points;
56 |     }
57 | 
58 |     printf("rank %d: start=%ld, end=%ld\n", rank, start, end);
59 | 
60 |     double local_pi = 0.0;
61 |     long int i;
62 | /* FIXME: OpenMP reduce the local pi summation in this loop */
63 |     for (i = start; i < end; i++) {
64 |         double x = delta_x * ((double)(i) + 0.5);
65 |         local_pi += 1.0 / (1.0 + x * x);
66 |     }
67 |     local_pi *= 4.0 * delta_x;
68 | 
69 |     double global_pi;
70 |     MPI_Reduce(/* FIXME: MPI reduce the local pi summation */);
71 | 
72 |     if (rank == 0) {
73 |         printf("pi = %22.12f (error: %.3e)\n", global_pi, fabs(global_pi - PI));
74 |     }
75 | 
76 |     MPI_Finalize();
77 |  
78 |     return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/content/code/day-1/01_comms-groups-divide-evenly/comms-groups-divide-evenly.c:
--------------------------------------------------------------------------------
 1 | /* Adapated from:
 2 |  * https://github.com/UoB-HPC/hpc-course-examples/blob/master/mpi/advanced/example10/group_to_comm.c
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include <mpi.h>
 9 | 
10 | int main(int argc, char *argv[])
11 | {
12 |     MPI_Init(&argc, &argv);
13 | 
14 |     // get rank and size from global communicator
15 | 
16 |     int global_rank, global_size;
17 |     MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &global_size);
19 |  
20 |     // FIXME: create global group
21 | 
22 |     MPI_Group global_group;
23 |     MPI_Comm_group( ... );
24 | 
25 |     // FIXME: create and initialize incl_ranks_1 and incl_ranks_2
26 | 
27 |     int num_incl_ranks_1 = ...
28 |     int num_incl_ranks_2 = ...
29 | 
30 |     int* incl_ranks_1 = (int*)(malloc(sizeof(int) * num_incl_ranks_1));
31 |     int* incl_ranks_2 = (int*)(malloc(sizeof(int) * num_incl_ranks_2));
32 | 
33 |     int i;
34 |     for (i = 0; i < num_incl_ranks_1; i++)
35 |     {
36 |         incl_ranks_1[i] = ...
37 |     }
38 |     for (i = 0; i < num_incl_ranks_2; i++)
39 |     {
40 |         incl_ranks_2[i] = ...
41 |     }
42 | 
43 |     // FIXME: create local group using MPI_Group_incl
44 | 
45 |     MPI_Group local_group;
46 |     if (global_rank < num_incl_ranks_1)
47 |     {
48 |         MPI_Group_incl( ... );
49 |     }
50 |     else
51 |     {
52 |         MPI_Group_incl( ... );
53 |     }
54 | 
55 |     // FIXME: create local communicator
56 |  
57 |     MPI_Comm local_comm;
58 |     MPI_Comm_create( ... );
59 | 
60 |     // FIXME: get rank in local communicator
61 |  
62 |     int local_rank;
63 |     MPI_Comm_rank( ... );
64 |  
65 |     // send global rank as message
66 | 
67 |     int sendbuf = global_rank;
68 |     int recvbuf;
69 |     int count = 1;
70 |  
71 |     // compute sum of global ranks in local communicator
72 |  
73 |     MPI_Allreduce(&sendbuf, &recvbuf, count, MPI_INT, MPI_SUM, local_comm);
74 |  
75 |     printf("global_rank= %d local_rank= %d recvbuf= %d\n", global_rank, local_rank, recvbuf);
76 | 
77 |     free(incl_ranks_1);
78 |     free(incl_ranks_2);
79 | 
80 |     // FIXME: complete MPI_Comm_free and MPI_Group_free
81 |  
82 |     MPI_Comm_free( ... );
83 |     MPI_Group_free( ... );
84 |     MPI_Group_free( ... );
85 |  
86 |     MPI_Finalize();
87 |  
88 |     return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/content/code/day-1/07_pokemon-type-create-struct/solution/pokemon-type-create-struct.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define STRLEN 25
 8 | 
 9 | struct Pokemon {
10 |   // name of pokemon attacking
11 |   char name[STRLEN];
12 |   // life points
13 |   double life_points;
14 |   // damage done by the attack
15 |   int damage;
16 |   // strength multiplier
17 |   double multiplier;
18 | };
19 | 
20 | int main(int argc, char *argv[]) {
21 |   int rank;
22 |   int size;
23 | 
24 |   struct Pokemon charizard;
25 | 
26 |   MPI_Init(&argc, &argv);
27 | 
28 |   MPI_Comm comm = MPI_COMM_WORLD;
29 | 
30 |   MPI_Comm_size(comm, &size);
31 |   MPI_Comm_rank(comm, &rank);
32 | 
33 |   MPI_Datatype typesig[4] = {MPI_CHAR, MPI_DOUBLE, MPI_INT, MPI_DOUBLE};
34 |   int block_lengths[4] = {STRLEN, 1, 1, 1};
35 | 
36 |   MPI_Aint base_address, displacements[4];
37 | 
38 |   MPI_Get_address(&charizard.name, &displacements[0]);
39 |   base_address = displacements[0];
40 |   displacements[0] = displacements[0] - base_address;
41 | 
42 |   MPI_Get_address(&charizard.life_points, &displacements[1]);
43 |   displacements[1] = displacements[1] - base_address;
44 | 
45 |   MPI_Get_address(&charizard.damage, &displacements[2]);
46 |   displacements[2] = displacements[2] - base_address;
47 | 
48 |   MPI_Get_address(&charizard.multiplier, &displacements[3]);
49 |   displacements[3] = displacements[3] - base_address;
50 | 
51 |   MPI_Datatype mpi_pokemon;
52 |   MPI_Type_create_struct(4, block_lengths, displacements, typesig,
53 |                          &mpi_pokemon);
54 | 
55 |   MPI_Type_commit(&mpi_pokemon);
56 | 
57 |   if (rank == 0) {
58 |     sprintf(charizard.name, "Charizard");
59 |     charizard.life_points = 180.0;
60 |     charizard.damage = 60;
61 |     charizard.multiplier = 0.89;
62 | 
63 |     MPI_Bcast(&charizard, 1, mpi_pokemon, 0, comm);
64 |   } else {
65 |     // matching broadcast on all other processes.
66 |     MPI_Bcast(&charizard, 1, mpi_pokemon, 0, comm);
67 | 
68 |     // did we get it right?
69 |     printf("rank %d:\n", rank);
70 |     printf("  pokemon = %s\n", charizard.name);
71 |     printf("  life_points = %2.2f\n", charizard.life_points);
72 |     printf("  damage = %d\n", charizard.damage);
73 |     printf("  multiplier = %2.2f\n", charizard.multiplier);
74 |   }
75 | 
76 |   MPI_Type_free(&mpi_pokemon);
77 | 
78 |   MPI_Finalize();
79 | 
80 |   return EXIT_SUCCESS;
81 | }
82 | 


--------------------------------------------------------------------------------
/content/code/day-4/10_integrate-pi/solution/pi-integration.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define PI 3.141592653589793238462643
 8 | 
 9 | int main(int argc, char *argv[]) {
10 | 
11 |     /* ==== CHALLENGE ====
12 |      *
13 |      * Uncomment the line and fix the MPI call to make this code work!
14 |      * We want to use fork-join parallelism, so pick a more suitable
15 |      * threading mode for OpenMP parallel regions */
16 |     /* Initialize the MPI environment and check */
17 |     int provided, required = MPI_THREAD_FUNNELED;
18 |     MPI_Init_thread(NULL, NULL, required, &provided);
19 |     MPI_Comm comm = MPI_COMM_WORLD;
20 |  
21 |     /* If the program can't run, stop running */
22 |     if (required != provided)
23 |     {
24 |         printf("Sorry, the MPI library does not provide "
25 |                "this threading level! Aborting!\n");
26 |         MPI_Abort(comm, 1);
27 |     }
28 | 
29 |     int size, rank;
30 |     MPI_Comm_size(comm, &size);
31 |     MPI_Comm_rank(comm, &rank);
32 | 
33 |     long int num_points;
34 |  
35 |     if (rank == 0) {
36 |         if (argc < 2) {
37 |             fprintf(stderr, "Usage: %s number_of_points \n", argv[0]);
38 |             MPI_Abort(comm, 1);
39 |         }
40 |         sscanf(argv[1], "%ld", &num_points);
41 |     }
42 |  
43 |     MPI_Bcast(&num_points, 1, MPI_LONG_INT, 0, comm);
44 | 
45 |     double delta_x = 1.0 / (double)(num_points);
46 | 
47 |     long int local_num_points = num_points / size;
48 |     if (num_points % size != 0) {
49 |         local_num_points += 1;
50 |     }
51 | 
52 |     long int start = local_num_points * rank;
53 |     long int end = local_num_points * (rank + 1);
54 |     if (end > num_points) {
55 |         end = num_points;
56 |     }
57 | 
58 |     printf("rank %d: start=%ld, end=%ld\n", rank, start, end);
59 | 
60 |     double local_pi = 0.0;
61 |     long int i;
62 | #pragma omp parallel for reduction(+:local_pi)
63 |     for (i = start; i < end; i++) {
64 |         double x = delta_x * ((double)(i) + 0.5);
65 |         local_pi += 1.0 / (1.0 + x * x);
66 |     }
67 |     local_pi *= 4.0 * delta_x;
68 | 
69 |     double global_pi;
70 |     MPI_Reduce(&local_pi, &global_pi, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
71 | 
72 |     if (rank == 0) {
73 |         printf("pi = %22.12f (error: %.3e)\n", global_pi, fabs(global_pi - PI));
74 |     }
75 | 
76 |     MPI_Finalize();
77 |  
78 |     return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/content/code/day-1/01_comms-groups-divide-evenly/solution/comms-groups-divide-evenly.c:
--------------------------------------------------------------------------------
 1 | /* Adapated from:
 2 |  * https://github.com/UoB-HPC/hpc-course-examples/blob/master/mpi/advanced/example10/group_to_comm.c
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include <mpi.h>
 9 | 
10 | int main(int argc, char *argv[])
11 | {
12 |     MPI_Init(&argc, &argv);
13 | 
14 |     // rank and size from global communicator
15 | 
16 |     int global_rank, global_size;
17 |     MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &global_size);
19 |  
20 |     // global group
21 | 
22 |     MPI_Group global_group;
23 |     MPI_Comm_group(MPI_COMM_WORLD, &global_group);
24 | 
25 |     // incl_ranks
26 | 
27 |     int num_incl_ranks_1 = global_size / 2;
28 |     int num_incl_ranks_2 = global_size - num_incl_ranks_1;
29 | 
30 |     int* incl_ranks_1 = (int*)(malloc(sizeof(int) * num_incl_ranks_1));
31 |     int* incl_ranks_2 = (int*)(malloc(sizeof(int) * num_incl_ranks_2));
32 | 
33 |     int i;
34 |     for (i = 0; i < num_incl_ranks_1; i++)
35 |     {
36 |         incl_ranks_1[i] = i;
37 |     }
38 |     for (i = 0; i < num_incl_ranks_2; i++)
39 |     {
40 |         incl_ranks_2[i] = num_incl_ranks_1 + i;
41 |     }
42 | 
43 |     // local group
44 | 
45 |     MPI_Group local_group;
46 |     if (global_rank < num_incl_ranks_1)
47 |     {
48 |         MPI_Group_incl(global_group, num_incl_ranks_1, incl_ranks_1, &local_group);
49 |     }
50 |     else
51 |     {
52 |         MPI_Group_incl(global_group, num_incl_ranks_2, incl_ranks_2, &local_group);
53 |     }
54 | 
55 |     // local communicator
56 |  
57 |     MPI_Comm local_comm;
58 |     MPI_Comm_create(MPI_COMM_WORLD, local_group, &local_comm);
59 | 
60 |     // rank in local communicator
61 |  
62 |     int local_rank;
63 |     MPI_Comm_rank(local_comm, &local_rank);
64 |  
65 |     // send global rank as message
66 | 
67 |     int sendbuf = global_rank;
68 |     int recvbuf;
69 |     int count = 1;
70 |  
71 |     // compute sum of global ranks in local communicator
72 |  
73 |     MPI_Allreduce(&sendbuf, &recvbuf, count, MPI_INT, MPI_SUM, local_comm);
74 |  
75 |     printf("global_rank= %d local_rank= %d recvbuf= %d\n", global_rank, local_rank, recvbuf);
76 | 
77 |     free(incl_ranks_1);
78 |     free(incl_ranks_2);
79 |  
80 |     MPI_Comm_free(&local_comm);
81 |     MPI_Group_free(&local_group);
82 |     MPI_Group_free(&global_group);
83 |  
84 |     MPI_Finalize();
85 |  
86 |     return 0;
87 | }
88 | 


--------------------------------------------------------------------------------
/content/code/day-4/04_rma-pscw/solution/rma-pscw.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/pscw_c_solution */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | #include <mpi.h>
 7 | 
 8 | int main(int argc, char **argv) {
 9 |   MPI_Init(&argc, &argv);
10 | 
11 |   MPI_Comm comm = MPI_COMM_WORLD;
12 | 
13 |   int size;
14 |   MPI_Comm_size(comm, &size);
15 | 
16 |   int rank;
17 |   MPI_Comm_rank(comm, &rank);
18 | 
19 |   // creat a buffer of size equal to the size of the communicator
20 |   double *buf = (double *)(malloc(sizeof(double) * size));
21 |   // save ranks of each process, used later on to create groups
22 |   int *ranks = (int *)(malloc(sizeof(int) * size));
23 |   for (int i = 0; i < size; ++i) {
24 |     buf[i] = rank * 11;
25 |     ranks[i] = i;
26 |   }
27 | 
28 |   if (rank == 0) {
29 |     printf("on process %d:\n  [", rank);
30 |     for (int i = 0; i < size; ++i) {
31 |       printf(" %f, ", buf[i]);
32 |     }
33 |     printf("]\n");
34 |   }
35 | 
36 |   // create new window and attach it to buf
37 |   MPI_Win win;
38 |   MPI_Win_create(buf, sizeof(double) * size, sizeof(double), MPI_INFO_NULL,
39 |                  comm, &win);
40 | 
41 |   // Post/Start/Wait/Complete works with process groups
42 |   // Perform the necessary shenanigans
43 |   // processes with ranks > 0 are origin processes for RMA calls
44 |   // process with rank = 0 is the target process for RMA calls
45 |   MPI_Group comm_group;
46 |   MPI_Comm_group(comm, &comm_group);
47 | 
48 |   MPI_Group group;
49 |   if (rank == 0) {
50 |     // Origin group consists of all ranks > 0
51 |     MPI_Group_incl(comm_group, size - 1, ranks + 1, &group);
52 |     // initialize the exposure epoch
53 |     MPI_Win_post(group, 0, win);
54 |     // wait for exposure epoch to finish
55 |     MPI_Win_wait(win);
56 |   } else {
57 |     // Target group consists of rank 0
58 |     MPI_Group_incl(comm_group, 1, &ranks[0], &group);
59 |     // initialize the access epoch
60 |     MPI_Win_start(group, 0, win);
61 |     // put element at index rank into buffer on rank 0
62 |     MPI_Put(buf, 1, MPI_DOUBLE, 0, rank, 1, MPI_DOUBLE, win);
63 |     // Terminate the access epoch
64 |     MPI_Win_complete(win);
65 |   }
66 | 
67 |   if (rank == 0) {
68 |     printf("on process %d:\n  [", rank);
69 |     for (int i = 0; i < size; ++i) {
70 |       printf(" %f, ", buf[i]);
71 |     }
72 |     printf("]\n");
73 |   }
74 | 
75 |   // free window and groups
76 |   MPI_Win_free(&win);
77 |   MPI_Group_free(&group);
78 |   MPI_Group_free(&comm_group);
79 | 
80 |   MPI_Finalize();
81 | 
82 |   return 0;
83 | }
84 | 


--------------------------------------------------------------------------------
/content/code/day-1/08_broadcast/collective-communication-broadcast.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char **argv) {
 6 |   /* Initialize the MPI environment and report */
 7 |   MPI_Init(&argc, &argv);
 8 | 
 9 |   MPI_Comm comm = MPI_COMM_WORLD;
10 | 
11 |   int rank, size;
12 |   MPI_Comm_rank(comm, &rank);
13 |   MPI_Comm_size(comm, &size);
14 | 
15 |   const int rank_of_root = 0;
16 | 
17 |   /* Prepare the values to broadcast */
18 |   int expected_values[2] = {100, -1000};
19 | 
20 |   int values_to_broadcast[2] = {0, 0};
21 |   if (rank == rank_of_root) {
22 |     values_to_broadcast[0] = expected_values[0];
23 |     values_to_broadcast[1] = expected_values[1];
24 |   }
25 | 
26 |   /* Report the state before the broadcast */
27 |   printf("On rank %d, pre-broadcast values were [%d, %d]\n", rank,
28 |          values_to_broadcast[0], values_to_broadcast[1]);
29 | 
30 |   /* ==== CHALLENGE ====
31 |    *
32 |    * Uncomment and fix the MPI call to make this code work!
33 |    * We want the contents of values_to_broadcast to be
34 |    * sent to all ranks in comm.
35 |    */
36 |   /* Do the broadcast */
37 |   /* MPI_xxx(xxx); */
38 | 
39 |   /* Report the state after the broadcast */
40 |   printf("On rank %d, broadcast values were [%d, %d]\n", rank,
41 |          values_to_broadcast[0], values_to_broadcast[1]);
42 | 
43 |   /* ==== CHALLENGE ====
44 |    *
45 |    * Uncomment and fix the MPI call to make this code work!
46 |    * We want to reduce values_to_broadcast over all ranks
47 |    * and compute the sum on the root rank.
48 |    */
49 |   /* Reduce the data over all ranks with summation */
50 |   int reduced_values[2];
51 |   /* MPI_xxx(xxx); */
52 | 
53 |   /* Report the state after the reduction */
54 |   if (rank == rank_of_root) {
55 |     printf("On rank %d, reduced values were [%d, %d]\n", rank,
56 |            reduced_values[0], reduced_values[1]);
57 |   }
58 | 
59 |   /* Report whether the code is correct */
60 |   int success = ((values_to_broadcast[0] == expected_values[0]) &&
61 |                  (values_to_broadcast[1] == expected_values[1]));
62 | 
63 |   /* Success on the root rank also means checking the reduction */
64 |   if (rank == rank_of_root) {
65 |     success = success && ((reduced_values[0] == expected_values[0] * size) &&
66 |                           (reduced_values[1] == expected_values[1] * size));
67 |   }
68 | 
69 |   if (success) {
70 |     printf("SUCCESS on rank %d!\n", rank);
71 |   } else {
72 |     printf("Improvement needed before rank %d can report success!\n", rank);
73 |   }
74 | 
75 |   /* Clean up and exit */
76 |   MPI_Finalize();
77 | 
78 |   return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/content/code/day-4/07_rma-pi-lock-unlock/rma-pi-lock-unlock.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/pilul_c_solution */
 2 | 
 3 | #include <math.h>
 4 | #include <stdio.h>
 5 | 
 6 | #include <mpi.h>
 7 | 
 8 | #define PI25DT 3.141592653589793238462643
 9 | 
10 | int main(int argc, char *argv[]) {
11 | 
12 |   MPI_Init(&argc, &argv);
13 | 
14 |   MPI_Comm comm = MPI_COMM_WORLD;
15 | 
16 |   int size;
17 |   MPI_Comm_size(comm, &size);
18 |   int rank;
19 |   MPI_Comm_rank(comm, &rank);
20 | 
21 |   int n;
22 |   // on rank 0, read the number of points from the input
23 |   if (rank == 0) {
24 |     if (argc < 2) {
25 |       fprintf(stderr, "Usage: %s N\n", argv[0]);
26 |       MPI_Abort(MPI_COMM_WORLD, 1);
27 |     }
28 |     sscanf(argv[1], "%d", &n);
29 |     printf("The integration grid has N=%d points\n", n);
30 |     if (n == 0) {
31 |       fprintf(stderr, "N should be greater than 0!\n");
32 |       MPI_Abort(MPI_COMM_WORLD, 1);
33 |     }
34 |   }
35 | 
36 |   double pi = 0.0;
37 | 
38 |   // FIXME create two windows:
39 |   // - one for the number of points, and
40 |   // - one for the computation of pi
41 |   MPI_Win win_n, win_pi;
42 |   MPI_xxx(/*  */);
43 | 
44 |   // every rank > 0 originates a MPI_Get to obtain n
45 |   // (or rank = 0 could originate a MPI_Put)
46 |   if (rank > 0) {
47 |     // FIXME lock the window on rank 0 (target process)
48 |     MPI_xxx(/*  */);
49 |     // FIXME RMA with rank 0 as target process
50 |     MPI_xxx(/*  */);
51 |     // FIXME unlock the window on rank 0 (target process)
52 |     MPI_xxx(/*  */);
53 |   }
54 | 
55 |   // compute slice of pi for each process (including on rank 0)
56 |   double h = 1.0 / (double)n;
57 |   double sum = 0.0;
58 | 
59 |   double x;
60 |   for (int i = rank + 1; i <= n; i += size) {
61 |     x = h * ((double)i - 0.5);
62 |     sum += (4.0 / (1.0 + x * x));
63 |   }
64 |   // result of computation on this rank
65 |   double my_pi = h * sum;
66 | 
67 |   if (rank > 0) {
68 |     // FIXME lock the window on rank 0 (target process)
69 |     MPI_xxx(/*  */);
70 |     // FIXME RMA with rank 0 as target process
71 |     MPI_xxx(/*  */);
72 |     // FIXME unlock the window on rank 0 (target process)
73 |     MPI_xxx(/*  */);
74 |   }
75 | 
76 |   // FIXME barrier to make sure that rank 0 is done with its chunk of the computation
77 |   MPI_xxx(/*  */);
78 | 
79 |   if (rank == 0) {
80 |     // sum up my_pi on rank 0 with the result of the reduction with
81 |     // MPI_Accumulate
82 |     pi += my_pi;
83 |     printf("pi is approximately %.16f, Error is %.16f\n", pi,
84 |            fabs(pi - PI25DT));
85 |   }
86 | 
87 |   // FIXME free the windows
88 |   MPI_xxx(/*  */);
89 | 
90 |   MPI_Finalize();
91 | 
92 |   return 0;
93 | }
94 | 


--------------------------------------------------------------------------------
/content/code/day-1/05_pokemon-pack-unpack/solution/pokemon-pack-unpack.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <mpi.h>
 6 | 
 7 | #define STRLEN 25
 8 | 
 9 | int main(int argc, char *argv[]) {
10 |   int rank;
11 |   int size;
12 |   // marker used by MPI_Pack and MPI_Unpack
13 |   int position;
14 | 
15 |   // name of pokemon attacking
16 |   char name[STRLEN];
17 |   // life points
18 |   double life_points;
19 |   // damage done by the attack
20 |   int damage;
21 |   // strength multiplier
22 |   double multiplier;
23 | 
24 |   // buffer for the message
25 |   // we set it to BUFSIZ (defined in stdio.h) assuming that's big enough!
26 |   char message[BUFSIZ];
27 | 
28 |   MPI_Init(&argc, &argv);
29 | 
30 |   MPI_Comm comm = MPI_COMM_WORLD;
31 | 
32 |   MPI_Comm_size(comm, &size);
33 |   MPI_Comm_rank(comm, &rank);
34 | 
35 |   if (rank == 0) {
36 |     sprintf(name, "Blastoise");
37 |     life_points = 150.0;
38 |     damage = 40;
39 |     multiplier = 1.32;
40 | 
41 |     position = 0;
42 |     // we're ready to do some packing!
43 |     MPI_Pack(&name, STRLEN, MPI_CHAR, message, BUFSIZ, &position, comm);
44 |     // where is position now?
45 |     printf("packed name, position = %d\n", position);
46 | 
47 |     MPI_Pack(&life_points, 1, MPI_DOUBLE, message, BUFSIZ, &position, comm);
48 |     printf("packed life_points, position = %d\n", position);
49 | 
50 |     MPI_Pack(&damage, 1, MPI_INT, message, BUFSIZ, &position, comm);
51 |     printf("packed damage, position = %d\n", position);
52 | 
53 |     MPI_Pack(&multiplier, 1, MPI_DOUBLE, message, BUFSIZ, &position, comm);
54 |     printf("packed multiplier, position = %d\n", position);
55 | 
56 |     // let it be known that a move was made!
57 |     MPI_Bcast(message, BUFSIZ, MPI_PACKED, 0, comm);
58 |   } else {
59 |     // matching broadcast on all other processes.
60 |     MPI_Bcast(message, BUFSIZ, MPI_PACKED, 0, comm);
61 | 
62 |     position = 0;
63 |     // let's get to unpacking
64 |     // !!! the length of the string MUST be known
65 |     MPI_Unpack(message, BUFSIZ, &position, &name, STRLEN, MPI_CHAR, comm);
66 |     printf("unpacked name, position = %d\n", position);
67 | 
68 |     MPI_Unpack(message, BUFSIZ, &position, &life_points, 1, MPI_DOUBLE, comm);
69 |     printf("unpacked life_points, position = %d\n", position);
70 | 
71 |     MPI_Unpack(message, BUFSIZ, &position, &damage, 1, MPI_INT, comm);
72 |     printf("unpacked damage, position = %d\n", position);
73 | 
74 |     MPI_Unpack(message, BUFSIZ, &position, &multiplier, 1, MPI_DOUBLE, comm);
75 |     printf("unpacked multiplier, position = %d\n", position);
76 | 
77 |     // did we get it right?
78 |     printf("rank %d:\n", rank);
79 |     printf("  name = %s\n", name);
80 |     printf("  life_points = %2.2f\n", life_points);
81 |     printf("  damage = %d\n", damage);
82 |     printf("  multiplier = %2.2f\n", multiplier);
83 |   }
84 | 
85 |   MPI_Finalize();
86 | 
87 |   return EXIT_SUCCESS;
88 | }
89 | 


--------------------------------------------------------------------------------
/content/code/day-4/07_rma-pi-lock-unlock/solution/rma-pi-lock-unlock.c:
--------------------------------------------------------------------------------
 1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/pilul_c_solution */
 2 | 
 3 | #include <math.h>
 4 | #include <stdio.h>
 5 | 
 6 | #include <mpi.h>
 7 | 
 8 | #define PI25DT 3.141592653589793238462643
 9 | 
10 | int main(int argc, char *argv[]) {
11 | 
12 |   MPI_Init(&argc, &argv);
13 | 
14 |   MPI_Comm comm = MPI_COMM_WORLD;
15 | 
16 |   int size;
17 |   MPI_Comm_size(comm, &size);
18 |   int rank;
19 |   MPI_Comm_rank(comm, &rank);
20 | 
21 |   int n;
22 |   // on rank 0, read the number of points from the input
23 |   if (rank == 0) {
24 |     if (argc < 2) {
25 |       fprintf(stderr, "Usage: %s N\n", argv[0]);
26 |       MPI_Abort(MPI_COMM_WORLD, 1);
27 |     }
28 |     sscanf(argv[1], "%d", &n);
29 |     printf("The integration grid has N=%d points\n", n);
30 |     if (n == 0) {
31 |       fprintf(stderr, "N should be greater than 0!\n");
32 |       MPI_Abort(MPI_COMM_WORLD, 1);
33 |     }
34 |   }
35 | 
36 |   double pi = 0.0;
37 | 
38 |   // create two windows:
39 |   // - one for the number of points, and
40 |   // - one for the computation of pi
41 |   MPI_Win win_n, win_pi;
42 |   MPI_Win_create(&n, sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win_n);
43 |   MPI_Win_create(&pi, sizeof(double), sizeof(double), MPI_INFO_NULL, comm,
44 |                  &win_pi);
45 | 
46 |   // every rank > 0 originates a MPI_Get to obtain n
47 |   // (or rank = 0 could originate a MPI_Put)
48 |   if (rank > 0) {
49 |     // lock the window on rank 0 (target process)
50 |     MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win_n);
51 |     // RMA with rank 0 as target process
52 |     MPI_Get(&n, 1, MPI_INT, 0, 0, 1, MPI_INT, win_n);
53 |     // unlock the window on rank 0 (target process)
54 |     MPI_Win_unlock(0, win_n);
55 |   }
56 | 
57 |   // compute slice of pi for each process (including on rank 0)
58 |   double h = 1.0 / (double)n;
59 |   double sum = 0.0;
60 | 
61 |   double x;
62 |   for (int i = rank + 1; i <= n; i += size) {
63 |     x = h * ((double)i - 0.5);
64 |     sum += (4.0 / (1.0 + x * x));
65 |   }
66 |   // result of computation on this rank
67 |   double my_pi = h * sum;
68 | 
69 |   if (rank > 0) {
70 |     // lock the window on rank 0 (target process)
71 |     MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win_pi);
72 |     // RMA with rank 0 as target process
73 |     MPI_Accumulate(&my_pi, 1, MPI_DOUBLE, 0, 0, 1, MPI_DOUBLE, MPI_SUM, win_pi);
74 |     // unlock the window on rank 0 (target process)
75 |     MPI_Win_unlock(0, win_pi);
76 |   }
77 | 
78 |   // we need to barrier to make sure that rank 0 is done with its chunk of the
79 |   // computation
80 |   MPI_Barrier(comm);
81 | 
82 |   if (rank == 0) {
83 |     // sum up my_pi on rank 0 with the result of the reduction with
84 |     // MPI_Accumulate
85 |     pi += my_pi;
86 |     printf("pi is approximately %.16f, Error is %.16f\n", pi,
87 |            fabs(pi - PI25DT));
88 |   }
89 | 
90 |   // free the windows
91 |   MPI_Win_free(&win_n);
92 |   MPI_Win_free(&win_pi);
93 | 
94 |   MPI_Finalize();
95 | 
96 |   return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/content/code/day-2/00_scatter-and-gather/scatter-and-gather-1.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     /* Initialize the MPI environment and report */
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |     int rank, size;
13 |     MPI_Comm_rank(comm, &rank);
14 |     MPI_Comm_size(comm, &size);
15 | 
16 |     const int rank_of_scatter_root = 0;
17 |     const int rank_of_gather_root = 2;
18 | 
19 |     /* Prepare the values to scatter */
20 |     float initial_values[4] = { 100, -1000, 3.5, -2.25 };
21 | 
22 |     float values_to_scatter[4] = { 0, 0, 0, 0 };
23 |     if (rank == rank_of_scatter_root)
24 |     {
25 |         values_to_scatter[0] = initial_values[0];
26 |         values_to_scatter[1] = initial_values[1];
27 |         values_to_scatter[2] = initial_values[2];
28 |         values_to_scatter[3] = initial_values[3];
29 | 
30 |         /* Report the state before the scatter */
31 |         printf("On rank %d, pre-scatter values were [%f, %f, %f, %f]\n", rank,
32 |                 values_to_scatter[0],
33 |                 values_to_scatter[1],
34 |                 values_to_scatter[2],
35 |                 values_to_scatter[3]);
36 |     }
37 | 
38 |     /* ==== CHALLENGE ====
39 |      *
40 |      * Uncomment and fix the MPI call to make this code work!
41 |      */
42 |     /* Do the scatter */
43 |     float scattered_value;
44 |     /* MPI_xxx(xxx); */
45 | 
46 |     /* Report the state after the scatter */
47 |     printf("On rank %d, scattered value was %f\n", rank, scattered_value);
48 | 
49 |     /* Do some computational work on the scattered value */
50 |     float result = scattered_value * (rank + 1);
51 |     
52 |     /* ==== CHALLENGE ====
53 |      *
54 |      * Uncomment and fix the MPI call to make this code work!
55 |      */
56 |     /* Gather the result over all ranks to the new root */
57 |     float gathered_values[4];
58 |     /* MPI_xxx(xxx); */
59 | 
60 |     /* Report the state after the gather */
61 |     if (rank == rank_of_gather_root)
62 |     {
63 |         printf("On rank %d, gathered values were [%f, %f, %f, %f]\n", rank,
64 |                 gathered_values[0],
65 |                 gathered_values[1],
66 |                 gathered_values[2],
67 |                 gathered_values[3]);
68 |     }
69 | 
70 |     /* Report whether the code is correct */
71 |     int success = (result == initial_values[rank] * (rank + 1));
72 | 
73 |     /* Success on the gather-root rank also means checking the gather */
74 |     if (rank == rank_of_gather_root)
75 |     {
76 |         success = success && ((gathered_values[0] == initial_values[0] * 1) &&
77 |                               (gathered_values[1] == initial_values[1] * 2) &&
78 |                               (gathered_values[2] == initial_values[2] * 3) &&
79 |                               (gathered_values[3] == initial_values[3] * 4));
80 |     }
81 | 
82 |     if (success)
83 |     {
84 |         printf("SUCCESS on rank %d!\n", rank);
85 |     }
86 |     else
87 |     {
88 |         printf("Improvement needed before rank %d can report success!\n", rank);
89 |     }
90 | 
91 |     /* Clean up and exit */
92 |     MPI_Finalize();
93 | 
94 |     return 0;
95 | }
96 | 


--------------------------------------------------------------------------------
/content/code/day-2/00_scatter-and-gather/solution/scatter-and-gather-1.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     /* Initialize the MPI environment and report */
 8 |     MPI_Init(&argc, &argv);
 9 | 
10 |     MPI_Comm comm = MPI_COMM_WORLD;
11 | 
12 |     int rank, size;
13 |     MPI_Comm_rank(comm, &rank);
14 |     MPI_Comm_size(comm, &size);
15 | 
16 |     const int rank_of_scatter_root = 0;
17 |     const int rank_of_gather_root = 2;
18 | 
19 |     /* Prepare the values to scatter */
20 |     float initial_values[4] = { 100, -1000, 3.5, -2.25 };
21 | 
22 |     float values_to_scatter[4] = { 0, 0, 0, 0 };
23 |     if (rank == rank_of_scatter_root)
24 |     {
25 |         values_to_scatter[0] = initial_values[0];
26 |         values_to_scatter[1] = initial_values[1];
27 |         values_to_scatter[2] = initial_values[2];
28 |         values_to_scatter[3] = initial_values[3];
29 | 
30 |         /* Report the state before the scatter */
31 |         printf("On rank %d, pre-scatter values were [%f, %f, %f, %f]\n", rank,
32 |                 values_to_scatter[0],
33 |                 values_to_scatter[1],
34 |                 values_to_scatter[2],
35 |                 values_to_scatter[3]);
36 |     }
37 | 
38 |     /* Do the scatter */
39 |     float scattered_value;
40 |     MPI_Scatter(values_to_scatter, 1, MPI_FLOAT,
41 |                 &scattered_value, 1, MPI_FLOAT,
42 |                 rank_of_scatter_root, comm);
43 | 
44 |     /* Report the state after the scatter */
45 |     printf("On rank %d, scattered value was %f\n", rank, scattered_value);
46 | 
47 |     /* Do some computational work on the scattered value */
48 |     float result = scattered_value * (rank + 1);
49 |     
50 |     /* Gather the result over all ranks to the new root */
51 |     float gathered_values[4];
52 |     MPI_Gather(&result, 1, MPI_FLOAT,
53 |                gathered_values, 1, MPI_FLOAT,
54 |                rank_of_gather_root, comm);
55 | 
56 |     /* Report the state after the gather */
57 |     if (rank == rank_of_gather_root)
58 |     {
59 |         printf("On rank %d, gathered values were [%f, %f, %f, %f]\n", rank,
60 |                 gathered_values[0],
61 |                 gathered_values[1],
62 |                 gathered_values[2],
63 |                 gathered_values[3]);
64 |     }
65 | 
66 |     /* Report whether the code is correct */
67 |     int success = (result == initial_values[rank] * (rank + 1));
68 | 
69 |     /* Success on the gather-root rank also means checking the gather */
70 |     if (rank == rank_of_gather_root)
71 |     {
72 |         success = success && ((gathered_values[0] == initial_values[0] * 1) &&
73 |                               (gathered_values[1] == initial_values[1] * 2) &&
74 |                               (gathered_values[2] == initial_values[2] * 3) &&
75 |                               (gathered_values[3] == initial_values[3] * 4));
76 |     }
77 | 
78 |     if (success)
79 |     {
80 |         printf("SUCCESS on rank %d!\n", rank);
81 |     }
82 |     else
83 |     {
84 |         printf("Improvement needed before rank %d can report success!\n", rank);
85 |     }
86 | 
87 |     /* Clean up and exit */
88 |     MPI_Finalize();
89 | 
90 |     return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/content/code/day-1/06_pokemon-pack-unpack-size/solution/pokemon-pack-unpack-size.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | #include <mpi.h>
  6 | 
  7 | #define STRLEN 25
  8 | 
  9 | int main(int argc, char *argv[]) {
 10 |   int rank;
 11 |   int size;
 12 |   // marker used by MPI_Pack and MPI_Unpack
 13 |   int position;
 14 | 
 15 |   // name of pokemon attacking
 16 |   char pokemon[STRLEN];
 17 |   // life points
 18 |   int life_points;
 19 |   // damage done by the attack
 20 |   int damage;
 21 |   // strength multiplier
 22 |   double multiplier;
 23 | 
 24 |   MPI_Init(&argc, &argv);
 25 | 
 26 |   MPI_Comm comm = MPI_COMM_WORLD;
 27 | 
 28 |   int sz1, sz2, sz3, sz4;
 29 |   MPI_Pack_size(STRLEN, MPI_CHAR, comm, &sz1);
 30 | 
 31 |   MPI_Pack_size(1, MPI_DOUBLE, comm, &sz2);
 32 | 
 33 |   MPI_Pack_size(1, MPI_INT, comm, &sz3);
 34 | 
 35 |   MPI_Pack_size(1, MPI_DOUBLE, comm, &sz4);
 36 | 
 37 |   int bufsiz = sz1 + sz2 + sz3 + sz4;
 38 |   // buffer for the message
 39 |   char *message = (char *)malloc((unsigned)bufsiz);
 40 | 
 41 |   MPI_Comm_size(comm, &size);
 42 |   MPI_Comm_rank(comm, &rank);
 43 | 
 44 |   if (rank == 0) {
 45 |     sprintf(pokemon, "Blastoise");
 46 |     life_points = 150;
 47 |     damage = 40;
 48 |     multiplier = 1.32;
 49 | 
 50 |     position = 0;
 51 |     // we're ready to do some packing!
 52 |     MPI_Pack(&pokemon, STRLEN, MPI_CHAR, message, sz1, &position, comm);
 53 |     // where is position now?
 54 |     printf("packed pokemon, position = %d\n", position);
 55 | 
 56 |     MPI_Pack(&life_points, 1, MPI_INT, message, sz1 + sz2, &position, comm);
 57 |     printf("packed life_points, position = %d\n", position);
 58 | 
 59 |     MPI_Pack(&damage, 1, MPI_INT, message, sz1 + sz2 + sz3, &position, comm);
 60 |     printf("packed damage, position = %d\n", position);
 61 | 
 62 |     MPI_Pack(&multiplier, 1, MPI_DOUBLE, message, sz1 + sz2 + sz3 + sz4,
 63 |              &position, comm);
 64 |     printf("packed multiplier, position = %d\n", position);
 65 | 
 66 |     // let it be known that a move was made!
 67 |     MPI_Bcast(message, bufsiz, MPI_PACKED, 0, comm);
 68 |   } else {
 69 |     // matching broadcast on all other processes.
 70 |     MPI_Bcast(message, bufsiz, MPI_PACKED, 0, comm);
 71 | 
 72 |     position = 0;
 73 |     // let's get to unpacking
 74 |     // !!! the length of the string MUST be known
 75 |     MPI_Unpack(message, sz1, &position, &pokemon, STRLEN, MPI_CHAR, comm);
 76 |     printf("unpacked pokemon, position = %d\n", position);
 77 | 
 78 |     MPI_Unpack(message, sz1 + sz2, &position, &life_points, 1, MPI_INT, comm);
 79 |     printf("unpacked life_points, position = %d\n", position);
 80 | 
 81 |     MPI_Unpack(message, sz1 + sz2 + sz3, &position, &damage, 1, MPI_INT, comm);
 82 |     printf("unpacked damage, position = %d\n", position);
 83 | 
 84 |     MPI_Unpack(message, sz1 + sz2 + sz3 + sz4, &position, &multiplier, 1,
 85 |                MPI_DOUBLE, comm);
 86 |     printf("unpacked multiplier, position = %d\n", position);
 87 | 
 88 |     // did we get it right?
 89 |     printf("rank %d:\n", rank);
 90 |     printf("  pokemon = %s\n", pokemon);
 91 |     printf("  life_points = %d\n", life_points);
 92 |     printf("  damage = %d\n", damage);
 93 |     printf("  multiplier = %2.2f\n", multiplier);
 94 |   }
 95 | 
 96 |   MPI_Finalize();
 97 | 
 98 |   return EXIT_SUCCESS;
 99 | }
100 | 


--------------------------------------------------------------------------------
/content/code/day-4/00_threading-query/threading-query.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #include "mpi.h"
 3 | #include <stdio.h>
 4 | 
 5 | /* Help pretty-print some strings for the MPI_THREAD_* constants */
 6 | const char* mpi_thread_strings[4] = { "MPI_THEAD_SINGLE",
 7 |                                        "MPI_THREAD_FUNNELED",
 8 |                                        "MPI_THREAD_SERIALIZED",
 9 |                                        "MPI_THREAD_MULTIPLE" };
10 | 
11 | void report_whether_thread_is_master(const char *prefix, int thread_id, int num_threads, int rank)
12 | {
13 |     int is_master;
14 |     /* ==== CHALLENGE ====
15 |      *
16 |      * Uncomment and fix the MPI call to make this code work!
17 |      * Call the function that asks the MPI library to report whether
18 |      * the calling thread is the master thread.
19 |      */
20 |     /* MPI_xxx(xxx) FIXME */
21 |     printf("%s: The thread with id %d of %d is%s the main thread of rank %d\n",
22 |            prefix, thread_id, num_threads, is_master ? "" : " not", rank);
23 | }
24 | 
25 | int main(int argc, char **argv)
26 | {
27 |     /* Initialize the MPI environment and report */
28 |     int required, provided;
29 |     /* ==== CHALLENGE ====
30 |      *
31 |      * Uncomment and fix the MPI call to make this code work!
32 |      * Call the function in a way that asks the MPI library to
33 |      * start in MPI_THREAD_MULTIPLE mode.
34 |      */
35 |     /* required = FIXME */;
36 |     /* MPI_xxx(xxx) FIXME */
37 |     MPI_Comm comm = MPI_COMM_WORLD;
38 |     int rank;
39 |     MPI_Comm_rank(comm, &rank);
40 | 
41 |     printf("Initialization required %s, and reported that %s was provided\n",
42 |            mpi_thread_strings[required],
43 |            mpi_thread_strings[provided]);
44 | 
45 |     /* If the program can't run, stop running */
46 |     if (required != provided)
47 |     {
48 |         printf("Sorry, the MPI library does not provide "
49 |                "this threading level! Aborting!\n");
50 |         MPI_Abort(comm, 1);
51 |     }
52 | 
53 |     /* ==== CHALLENGE ====
54 |      *
55 |      * Uncomment and fix the MPI call to make this code work!
56 |      * Call the function that asks the MPI library to report the level
57 |      * of threading support available.
58 |      */
59 |     /* This query should return the same value as for MPI_Init_thread, and
60 |      * is useful in cases where that return value is not available. */
61 |     int provided_query;
62 |     /* MPI_xxx(xxx) FIXME */
63 |     printf("The query about threading support reported that level %s was provided\n",
64 |            mpi_thread_strings[provided_query]);
65 | 
66 |     /* Also valuable in such cases is information on whether this is
67 |      * the main thread, so that MPI can be used in MPI_THREAD_FUNNELED
68 |      * case. */
69 |     report_whether_thread_is_master("Before #pragma omp", omp_get_thread_num(), omp_get_num_threads(), rank);
70 | 
71 | #pragma omp parallel
72 |     {
73 |         /* Let's see that the other threads are *not* master threads */
74 |         report_whether_thread_is_master("After #pragma omp ", omp_get_thread_num(), omp_get_num_threads(), rank);
75 |         /* Only the master thread enters this block */
76 |         #pragma omp master
77 |         {
78 |             report_whether_thread_is_master("In master block   ", omp_get_thread_num(), omp_get_num_threads(), rank);
79 |         }
80 |     }
81 | 
82 |     /* Clean up and exit */
83 |     MPI_Finalize();
84 |     return 0;
85 | }
86 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nodes": {
  3 |     "flake-utils": {
  4 |       "locked": {
  5 |         "lastModified": 1649676176,
  6 |         "narHash": "sha256-OWKJratjt2RW151VUlJPRALb7OU2S5s+f0vLj4o1bHM=",
  7 |         "owner": "numtide",
  8 |         "repo": "flake-utils",
  9 |         "rev": "a4b154ebbdc88c8498a5c7b01589addc9e9cb678",
 10 |         "type": "github"
 11 |       },
 12 |       "original": {
 13 |         "owner": "numtide",
 14 |         "repo": "flake-utils",
 15 |         "type": "github"
 16 |       }
 17 |     },
 18 |     "flake-utils_2": {
 19 |       "locked": {
 20 |         "lastModified": 1642700792,
 21 |         "narHash": "sha256-XqHrk7hFb+zBvRg6Ghl+AZDq03ov6OshJLiSWOoX5es=",
 22 |         "owner": "numtide",
 23 |         "repo": "flake-utils",
 24 |         "rev": "846b2ae0fc4cc943637d3d1def4454213e203cba",
 25 |         "type": "github"
 26 |       },
 27 |       "original": {
 28 |         "owner": "numtide",
 29 |         "repo": "flake-utils",
 30 |         "type": "github"
 31 |       }
 32 |     },
 33 |     "mach-nix": {
 34 |       "inputs": {
 35 |         "flake-utils": "flake-utils_2",
 36 |         "nixpkgs": "nixpkgs",
 37 |         "pypi-deps-db": "pypi-deps-db"
 38 |       },
 39 |       "locked": {
 40 |         "lastModified": 1643953409,
 41 |         "narHash": "sha256-CJDg/RpZdUVyI3QIAXUqIoYDl7VkxFtNE4JWih0ucKc=",
 42 |         "owner": "DavHau",
 43 |         "repo": "mach-nix",
 44 |         "rev": "fe5255e6fd8df57e9507b7af82fc59dda9e9ff2b",
 45 |         "type": "github"
 46 |       },
 47 |       "original": {
 48 |         "owner": "DavHau",
 49 |         "ref": "3.4.0",
 50 |         "repo": "mach-nix",
 51 |         "type": "github"
 52 |       }
 53 |     },
 54 |     "nixpkgs": {
 55 |       "locked": {
 56 |         "lastModified": 1643805626,
 57 |         "narHash": "sha256-AXLDVMG+UaAGsGSpOtQHPIKB+IZ0KSd9WS77aanGzgc=",
 58 |         "owner": "NixOS",
 59 |         "repo": "nixpkgs",
 60 |         "rev": "554d2d8aa25b6e583575459c297ec23750adb6cb",
 61 |         "type": "github"
 62 |       },
 63 |       "original": {
 64 |         "id": "nixpkgs",
 65 |         "ref": "nixos-unstable",
 66 |         "type": "indirect"
 67 |       }
 68 |     },
 69 |     "nixpkgs_2": {
 70 |       "locked": {
 71 |         "lastModified": 1651634615,
 72 |         "narHash": "sha256-VtvcS61bLh5mIBm9cV3idUHdlfPRFus/NwdJfaj5s8o=",
 73 |         "owner": "NixOS",
 74 |         "repo": "nixpkgs",
 75 |         "rev": "abfd31179174133ab8131139d650297bf4da63b7",
 76 |         "type": "github"
 77 |       },
 78 |       "original": {
 79 |         "owner": "NixOS",
 80 |         "ref": "nixpkgs-unstable",
 81 |         "repo": "nixpkgs",
 82 |         "type": "github"
 83 |       }
 84 |     },
 85 |     "pypi-deps-db": {
 86 |       "flake": false,
 87 |       "locked": {
 88 |         "lastModified": 1643877077,
 89 |         "narHash": "sha256-jv8pIvRFTP919GybOxXE5TfOkrjTbdo9QiCO1TD3ZaY=",
 90 |         "owner": "DavHau",
 91 |         "repo": "pypi-deps-db",
 92 |         "rev": "da53397f0b782b0b18deb72ef8e0fb5aa7c98aa3",
 93 |         "type": "github"
 94 |       },
 95 |       "original": {
 96 |         "owner": "DavHau",
 97 |         "repo": "pypi-deps-db",
 98 |         "type": "github"
 99 |       }
100 |     },
101 |     "root": {
102 |       "inputs": {
103 |         "flake-utils": "flake-utils",
104 |         "mach-nix": "mach-nix",
105 |         "nixpkgs": "nixpkgs_2"
106 |       }
107 |     }
108 |   },
109 |   "root": "root",
110 |   "version": 7
111 | }
112 | 


--------------------------------------------------------------------------------
/content/code/day-4/00_threading-query/solution/threading-query.c:
--------------------------------------------------------------------------------
 1 | #include <omp.h>
 2 | #include "mpi.h"
 3 | #include <stdio.h>
 4 | 
 5 | /* Help pretty-print some strings for the MPI_THREAD_* constants */
 6 | const char* mpi_thread_strings[4] = { "MPI_THEAD_SINGLE",
 7 |                                        "MPI_THREAD_FUNNELED",
 8 |                                        "MPI_THREAD_SERIALIZED",
 9 |                                        "MPI_THREAD_MULTIPLE" };
10 | 
11 | void report_whether_thread_is_master(const char *prefix, int thread_id, int num_threads, int rank)
12 | {
13 |     int is_master;
14 |     /* ==== CHALLENGE ====
15 |      *
16 |      * Uncomment and fix the MPI call to make this code work!
17 |      * Call the function that asks the MPI library to report whether
18 |      * the calling thread is the master thread.
19 |      */
20 |     MPI_Is_thread_main(&is_master);
21 |     printf("%s: The thread with id %d of %d is%s the main thread of rank %d\n",
22 |            prefix, thread_id, num_threads, is_master ? "" : " not", rank);
23 | }
24 | 
25 | int main(int argc, char **argv)
26 | {
27 |     /* Initialize the MPI environment and report */
28 |     int required, provided;
29 |     /* ==== CHALLENGE ====
30 |      *
31 |      * Uncomment and fix the MPI call to make this code work!
32 |      * Call the function in a way that asks the MPI library to
33 |      * start in MPI_THREAD_MULTIPLE mode.
34 |      */
35 |     required = MPI_THREAD_MULTIPLE;
36 |     MPI_Init_thread(NULL, NULL, required, &provided);
37 |     MPI_Comm comm = MPI_COMM_WORLD;
38 |     int rank;
39 |     MPI_Comm_rank(comm, &rank);
40 | 
41 |     printf("Initialization required %s, and reported that %s was provided\n",
42 |            mpi_thread_strings[required],
43 |            mpi_thread_strings[provided]);
44 | 
45 |     /* If the program can't run, stop running */
46 |     if (required != provided)
47 |     {
48 |         printf("Sorry, the MPI library does not provide "
49 |                "this threading level! Aborting!\n");
50 |         MPI_Abort(comm, 1);
51 |     }
52 | 
53 |     /* ==== CHALLENGE ====
54 |      *
55 |      * Uncomment and fix the MPI call to make this code work!
56 |      * Call the function that asks the MPI library to report the level
57 |      * of threading support available.
58 |      */
59 |     /* This query should return the same value as for MPI_Init_thread, and
60 |      * is useful in cases where that return value is not available. */
61 |     int provided_query;
62 |     MPI_Query_thread(&provided_query);
63 |     printf("The query about threading support reported that level %s was provided\n",
64 |            mpi_thread_strings[provided_query]);
65 | 
66 |     /* Also valuable in such cases is information on whether this is
67 |      * the main thread, so that MPI can be used in MPI_THREAD_FUNNELED
68 |      * case. */
69 |     report_whether_thread_is_master("Before #pragma omp", omp_get_thread_num(), omp_get_num_threads(), rank);
70 | 
71 | #pragma omp parallel
72 |     {
73 |         /* Let's see that the other threads are *not* master threads */
74 |         report_whether_thread_is_master("After #pragma omp ", omp_get_thread_num(), omp_get_num_threads(), rank);
75 |         /* Only the master thread enters this block */
76 |         #pragma omp master
77 |         {
78 |             report_whether_thread_is_master("In master block   ", omp_get_thread_num(), omp_get_num_threads(), rank);
79 |         }
80 |     }
81 | 
82 |     /* Clean up and exit */
83 |     MPI_Finalize();
84 |     return 0;
85 | }
86 | 


--------------------------------------------------------------------------------
/content/code/day-4/06_rma-pi-pscw/rma-pi-pscw.c:
--------------------------------------------------------------------------------
  1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/piacc_c_solution */
  2 | 
  3 | #include <math.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #include <mpi.h>
  8 | 
  9 | #define PI25DT 3.141592653589793238462643
 10 | 
 11 | int main(int argc, char *argv[]) {
 12 |   MPI_Init(&argc, &argv);
 13 | 
 14 |   MPI_Comm comm = MPI_COMM_WORLD;
 15 | 
 16 |   int rank, size;
 17 |   MPI_Comm_size(comm, &size);
 18 |   MPI_Comm_rank(comm, &rank);
 19 | 
 20 |   int n;
 21 |   // on rank 0, read the number of points from the input
 22 |   if (rank == 0) {
 23 |     if (argc < 2) {
 24 |       fprintf(stderr, "Usage: %s N\n", argv[0]);
 25 |       MPI_Abort(comm, 1);
 26 |     }
 27 |     sscanf(argv[1], "%d", &n);
 28 |     printf("The integration grid has N=%d points\n", n);
 29 |     if (n == 0) {
 30 |       fprintf(stderr, "N should be greater than 0!\n");
 31 |       MPI_Abort(comm, 1);
 32 |     }
 33 |   }
 34 | 
 35 |   // declare/initialize stuff for group shenanigans
 36 |   int *ranks = malloc(size);
 37 |   for (int i = 0; i < size; ++i) {
 38 |     ranks[i] = i;
 39 |   }
 40 | 
 41 |   MPI_Group comm_group, group;
 42 |   MPI_Comm_group(comm, &comm_group);
 43 | 
 44 |   double pi = 0.0;
 45 | 
 46 |   // FIXME create two windows:
 47 |   // - one for the number of points, and
 48 |   // - one for the computation of pi
 49 |   MPI_Win win_n, win_pi;
 50 |   MPI_xxx(/*  */);
 51 |   MPI_xxx(/*  */);
 52 | 
 53 |   if (rank == 0) {
 54 |     // All ranks except 0 will compute a chunk of the integral.
 55 |     // Once they're done, they will accumulate their results on rank 0
 56 |     // Hence the *origin group* for RMA consists of ranks 1 to size-1
 57 |     MPI_xxx(/*  */);
 58 |   } else {
 59 |     // Target group consists of rank 0, where the accumulation will happen.
 60 |     MPI_xxx(/*  */);
 61 |   }
 62 | 
 63 |   // synchronization *and* RMA for number of points
 64 |   if (rank == 0) {
 65 |     // FIXME initialize exposure epoch for win_n on rank 0 (target rank of RMA)
 66 |     MPI_xxx(/*  */);
 67 |     // FIXME finalize exposure epoch
 68 |     MPI_xxx(/*  */);
 69 |   } else {
 70 |     // FIXME initialize access epoch for win_n on rank > 0 (origin ranks of RMA)
 71 |     MPI_xxx(/*  */);
 72 |     // FIXME RMA with rank 0 as target process to get value of n
 73 |     MPI_xxx(/*  */);
 74 |     // FIXME finalize acces epoch
 75 |     MPI_xxx(/*  */);
 76 |   }
 77 | 
 78 |   // compute slice of pi for each process (including on rank 0)
 79 |   double h = 1.0 / (double)n;
 80 |   double sum = 0.0;
 81 | 
 82 |   double x;
 83 |   for (int i = rank + 1; i <= n; i += size) {
 84 |     x = h * ((double)i - 0.5);
 85 |     sum += (4.0 / (1.0 + x * x));
 86 |   }
 87 |   pi = h * sum;
 88 | 
 89 |   // synchronization *and* RMA for final result
 90 |   if (rank > 0) {
 91 |     // FIXME initialize access epoch for win_pi on rank > 0 (origin ranks of RMA)
 92 |     MPI_xxx(/*  */);
 93 |     // FIXME RMA reduction with rank 0 as target process
 94 |     MPI_xxx(/*  */);
 95 |     // FIXME finalize acces epoch
 96 |     MPI_xxx(/*  */);
 97 |   } else {
 98 |     // FIXME initialiaze exposure epoch for win_pi on rank 0 (target rank of RMA)
 99 |     MPI_xxx(/*  */);
100 |     // FIXME finalize exposure epoch for win_pi on rank 0 (target rank of RMA)
101 |     MPI_xxx(/*  */);
102 |     printf("pi is approximately %.16f, Error is %.16f\n", pi,
103 |            fabs(pi - PI25DT));
104 |   }
105 | 
106 |   // FIXME free windows
107 |   MPI_xxx(/*  */);
108 | 
109 |   MPI_Finalize();
110 | 
111 |   return 0;
112 | }
113 | 


--------------------------------------------------------------------------------
/content/code/day-4/06_rma-pi-pscw/solution/rma-pi-pscw.c:
--------------------------------------------------------------------------------
  1 | /* Modified from: https://cvw.cac.cornell.edu/MPIoneSided/piacc_c_solution */
  2 | 
  3 | #include <math.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #include <mpi.h>
  8 | 
  9 | #define PI25DT 3.141592653589793238462643
 10 | 
 11 | int main(int argc, char *argv[]) {
 12 |   MPI_Init(&argc, &argv);
 13 | 
 14 |   MPI_Comm comm = MPI_COMM_WORLD;
 15 | 
 16 |   int rank, size;
 17 |   MPI_Comm_size(comm, &size);
 18 |   MPI_Comm_rank(comm, &rank);
 19 | 
 20 |   int n;
 21 |   // on rank 0, read the number of points from the input
 22 |   if (rank == 0) {
 23 |     if (argc < 2) {
 24 |       fprintf(stderr, "Usage: %s N\n", argv[0]);
 25 |       MPI_Abort(comm, 1);
 26 |     }
 27 |     sscanf(argv[1], "%d", &n);
 28 |     printf("The integration grid has N=%d points\n", n);
 29 |     if (n == 0) {
 30 |       fprintf(stderr, "N should be greater than 0!\n");
 31 |       MPI_Abort(comm, 1);
 32 |     }
 33 |   }
 34 | 
 35 |   // declare/initialize stuff for group shenanigans
 36 |   int *ranks = malloc(size);
 37 |   for (int i = 0; i < size; ++i) {
 38 |     ranks[i] = i;
 39 |   }
 40 | 
 41 |   MPI_Group comm_group, group;
 42 |   MPI_Comm_group(comm, &comm_group);
 43 | 
 44 |   double pi = 0.0;
 45 | 
 46 |   // create two windows:
 47 |   // - one for the number of points, and
 48 |   // - one for the computation of pi
 49 |   MPI_Win win_n, win_pi;
 50 |   MPI_Win_create(&n, sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win_n);
 51 |   MPI_Win_create(&pi, sizeof(double), sizeof(double), MPI_INFO_NULL, comm,
 52 |                  &win_pi);
 53 | 
 54 |   if (rank == 0) {
 55 |     // All ranks except 0 will compute a chunk of the integral.
 56 |     // Once they're done, they will accumulate their results on rank 0
 57 |     // Hence the *origin group* for RMA consists of ranks 1 to size-1
 58 |     MPI_Group_incl(comm_group, size - 1, ranks + 1, &group);
 59 |   } else {
 60 |     // Target group consists of rank 0, where the accumulation will happen.
 61 |     MPI_Group_incl(comm_group, 1, ranks, &group);
 62 |   }
 63 | 
 64 |   // synchronization *and* RMA for number of points
 65 |   if (rank == 0) {
 66 |     // initialize exposure epoch for win_n on rank 0 (target rank of RMA)
 67 |     MPI_Win_post(group, 0, win_n);
 68 |     // finalize exposure epoch
 69 |     MPI_Win_wait(win_n);
 70 |   } else {
 71 |     // initialize access epoch for win_n on rank > 0 (origin ranks of RMA)
 72 |     MPI_Win_start(group, 0, win_n);
 73 |     // RMA with rank 0 as target process to get value of n
 74 |     MPI_Get(&n, 1, MPI_INT, 0, 0, 1, MPI_INT, win_n);
 75 |     // finalize acces epoch
 76 |     MPI_Win_complete(win_n);
 77 |   }
 78 | 
 79 |   // compute slice of pi for each process (including on rank 0)
 80 |   double h = 1.0 / (double)n;
 81 |   double sum = 0.0;
 82 | 
 83 |   double x;
 84 |   for (int i = rank + 1; i <= n; i += size) {
 85 |     x = h * ((double)i - 0.5);
 86 |     sum += (4.0 / (1.0 + x * x));
 87 |   }
 88 |   pi = h * sum;
 89 | 
 90 |   // synchronization *and* RMA for final result
 91 |   if (rank > 0) {
 92 |     // initialize access epoch for win_pi on rank > 0 (origin ranks of RMA)
 93 |     MPI_Win_start(group, 0, win_pi);
 94 |     // RMA reduction with rank 0 as target process
 95 |     MPI_Accumulate(&pi, 1, MPI_DOUBLE, 0, 0, 1, MPI_DOUBLE, MPI_SUM, win_pi);
 96 |     // finalize acces epoch
 97 |     MPI_Win_complete(win_pi);
 98 |   } else {
 99 |     // initialiaze exposure epoch for win_pi on rank 0 (target rank of RMA)
100 |     MPI_Win_post(group, 0, win_pi);
101 |     // finalize exposure epoch for win_pi on rank 0 (target rank of RMA)
102 |     MPI_Win_wait(win_pi);
103 |     printf("pi is approximately %.16f, Error is %.16f\n", pi,
104 |            fabs(pi - PI25DT));
105 |   }
106 | 
107 |   // free windows
108 |   MPI_Win_free(&win_n);
109 |   MPI_Win_free(&win_pi);
110 | 
111 |   MPI_Finalize();
112 | 
113 |   return 0;
114 | }
115 | 


--------------------------------------------------------------------------------
/content/code/day-2/00_scatter-and-gather/solution/scatter-and-gather-3.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <time.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | 
  6 | #include <mpi.h>
  7 | 
  8 | int main(int argc, char *argv[])
  9 | {
 10 |     /* Initialize the MPI environment and report */
 11 | 
 12 |     MPI_Init(&argc, &argv);
 13 | 
 14 |     MPI_Comm comm = MPI_COMM_WORLD;
 15 | 
 16 |     int rank, size;
 17 |     MPI_Comm_rank(comm, &rank);
 18 |     MPI_Comm_size(comm, &size);
 19 | 
 20 |     /* Prepare the data */
 21 | 
 22 |     const int n_elements = 111323;
 23 | 
 24 |     double* vector_a;
 25 |     double* vector_b;
 26 |     int i;
 27 | 
 28 |     if (rank == 0)
 29 |     {
 30 |         vector_a = (double*)(malloc(sizeof(double) * n_elements));
 31 |         vector_b = (double*)(malloc(sizeof(double) * n_elements));
 32 | 
 33 |         for (i = 0; i < n_elements; i++)
 34 |         {
 35 |             vector_a[i] = (double)(i);
 36 |             vector_b[i] = (double)(n_elements - i);
 37 |         }
 38 |     }
 39 | 
 40 |     /* Do the scatter */
 41 | 
 42 |     int n_local_elements = n_elements / size;
 43 | 
 44 |     double* local_vector_a = (double*)(malloc(sizeof(double) * n_local_elements));
 45 |     double* local_vector_b = (double*)(malloc(sizeof(double) * n_local_elements));
 46 | 
 47 |     MPI_Scatter(vector_a, n_local_elements, MPI_DOUBLE, local_vector_a, n_local_elements, MPI_DOUBLE, 0, comm);
 48 |     MPI_Scatter(vector_b, n_local_elements, MPI_DOUBLE, local_vector_b, n_local_elements, MPI_DOUBLE, 0, comm);
 49 | 
 50 |     /* Do local computation */
 51 | 
 52 |     struct timespec ts;
 53 | 
 54 |     timespec_get(&ts, TIME_UTC);
 55 |     double t0 = (double)(ts.tv_sec) + (double)(ts.tv_nsec) * 1e-9;
 56 | 
 57 |     double local_product = 0.0;
 58 | 
 59 |     for (i = 0; i < n_local_elements; i++)
 60 |     {
 61 |         local_product += local_vector_a[i] * local_vector_b[i];
 62 |     }
 63 | 
 64 |     if (rank == 0)
 65 |     {
 66 |         for (i = n_local_elements * size; i < n_elements; i++)
 67 |         {
 68 |             local_product += vector_a[i] * vector_b[i];
 69 |         }
 70 |     }
 71 | 
 72 |     timespec_get(&ts, TIME_UTC);
 73 |     double t1 = (double)(ts.tv_sec) + (double)(ts.tv_nsec) * 1e-9;
 74 | 
 75 |     /* Do the gather */
 76 | 
 77 |     double dt = t1 - t0;
 78 |     double* elapsed_time;
 79 | 
 80 |     if (rank == 0)
 81 |     {
 82 |         elapsed_time = (double*)(malloc(sizeof(double) * size));
 83 |     }
 84 | 
 85 |     MPI_Gather(&dt, 1, MPI_DOUBLE, elapsed_time, 1, MPI_DOUBLE, 0, comm);
 86 | 
 87 |     /* Do the reduce */
 88 | 
 89 |     double final_product;
 90 | 
 91 |     MPI_Reduce(&local_product, &final_product, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
 92 | 
 93 |     if (rank == 0)
 94 |     {
 95 |         double ref_product = 0.0;
 96 |         for (i = 0; i < n_elements; i++)
 97 |         {
 98 |             ref_product += vector_a[i] * vector_b[i];
 99 |         }
100 | 
101 |         double diff = fabs(ref_product - final_product);
102 |         printf("diff = %.6e\n", diff);
103 | 
104 |         if (diff < 1.0e-8)
105 |         {
106 |             printf("SUCCESS!\n");
107 |         }
108 |         else
109 |         {
110 |             printf("Improvement needed!\n");
111 |         }
112 | 
113 |         double dt_sum = 0.0;
114 |         double max_dt = 0.0;
115 |         for (i = 0; i < size; i++)
116 |         {
117 |             dt_sum += elapsed_time[i];
118 |             if (elapsed_time[i] > max_dt)
119 |             {
120 |                 max_dt = elapsed_time[i];
121 |             }
122 |         }
123 | 
124 |         double load_imb = 1.0 - (dt_sum / size) / max_dt;
125 |         printf("Time spent in computation: %.1f microseconds\n", max_dt * 1e+6);
126 |         printf("Load imbalance of computation: %.1f%%\n", load_imb * 100.0);
127 |     }
128 | 
129 |     /* Clean up and exit */
130 | 
131 |     if (rank == 0)
132 |     {
133 |         free(vector_a);
134 |         free(vector_b);
135 |         free(elapsed_time);
136 |     }
137 |     free(local_vector_a);
138 |     free(local_vector_b);
139 | 
140 |     MPI_Finalize();
141 | 
142 |     return 0;
143 | }
144 | 


--------------------------------------------------------------------------------
/content/code/day-2/03_scatterv-and-gatherv/solution/scatterv-and-gatherv.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include <mpi.h>
  5 | 
  6 | int main(int argc, char *argv[])
  7 | {
  8 |     /* Initialize the MPI environment and report */
  9 | 
 10 |     MPI_Init(&argc, &argv);
 11 | 
 12 |     MPI_Comm comm = MPI_COMM_WORLD;
 13 | 
 14 |     int rank, size;
 15 |     MPI_Comm_rank(comm, &rank);
 16 |     MPI_Comm_size(comm, &size);
 17 | 
 18 |     /* Prepare the matrix and vector */
 19 | 
 20 |     const int n_rows = 1559;
 21 |     const int n_cols = 179;
 22 |     int i, j;
 23 | 
 24 |     double* matrix;
 25 |     double* vector = (double*)(malloc(sizeof(double) * n_cols));
 26 | 
 27 |     if (rank == 0)
 28 |     {
 29 |         matrix = (double*)(malloc(sizeof(double) * n_rows * n_cols));
 30 | 
 31 |         for (i = 0; i < n_rows; i++)
 32 |         {
 33 |             for (j = 0; j < n_cols; j++)
 34 |             {
 35 |                 matrix[i * n_cols + j] = (double)(i + j);
 36 |             }
 37 |         }
 38 | 
 39 |         for (j = 0; j < n_cols; j++)
 40 |         {
 41 |             vector[j] = (double)(j);
 42 |         }
 43 |     }
 44 | 
 45 |     /* Do the scatterv */
 46 | 
 47 |     int* counts = (int*)(malloc(sizeof(int) * size));
 48 |     int* displs = (int*)(malloc(sizeof(int) * size));
 49 | 
 50 |     int ave = n_rows / size;
 51 |     int rem = n_rows % size;
 52 | 
 53 |     for (i = 0; i < size; i++)
 54 |     {
 55 |         if (i < rem)
 56 |         {
 57 |             counts[i] = (ave + 1) * n_cols;
 58 |         }
 59 |         else
 60 |         {
 61 |             counts[i] = ave * n_cols;
 62 |         }
 63 | 
 64 |         if (i == 0)
 65 |         {
 66 |             displs[i] = 0;
 67 |         }
 68 |         else
 69 |         {
 70 |             displs[i] = displs[i-1] + counts[i-1];
 71 |         }
 72 |     }
 73 | 
 74 |     double* row_vectors = (double*)(malloc(sizeof(double) * counts[rank]));
 75 | 
 76 |     MPI_Scatterv(matrix, counts, displs, MPI_DOUBLE, row_vectors, counts[rank], MPI_DOUBLE, 0, comm);
 77 | 
 78 |     /* Do the bcast */
 79 | 
 80 |     MPI_Bcast(vector, n_cols, MPI_DOUBLE, 0, comm);
 81 | 
 82 |     /* Do matrix vector multiplication */
 83 | 
 84 |     double* local_product = (double*)(malloc(sizeof(double) * (counts[rank] / n_cols)));
 85 | 
 86 |     for (i = 0; i < counts[rank] / n_cols; i++)
 87 |     {
 88 |         double p = 0.0;
 89 | 
 90 |         for (j = 0; j < n_cols; j++)
 91 |         {
 92 |             p += row_vectors[i * n_cols + j] * vector[j];
 93 |         }
 94 | 
 95 |         local_product[i] = p;
 96 |     }
 97 | 
 98 |     /* Do the gatherv */
 99 | 
100 |     double* final_product;
101 | 
102 |     if (rank == 0)
103 |     {
104 |         final_product = (double*)(malloc(sizeof(double) * n_rows));
105 |     }
106 | 
107 |     int* count_rows = (int*)(malloc(sizeof(int) * size));
108 |     int* displ_rows = (int*)(malloc(sizeof(int) * size));
109 | 
110 |     for (i = 0; i < size; i++)
111 |     {
112 |         count_rows[i] = counts[i] / n_cols;
113 |         displ_rows[i] = displs[i] / n_cols;
114 |     }
115 | 
116 |     MPI_Gatherv(local_product, count_rows[rank], MPI_DOUBLE, final_product, count_rows, displ_rows, MPI_DOUBLE, 0, comm);
117 | 
118 |     /* Check the result */
119 | 
120 |     if (rank == 0)
121 |     {
122 |         int success = 1;
123 | 
124 |         for (i = 0; i < n_rows; i++)
125 |         {
126 |             double p = 0.0;
127 | 
128 |             for (j = 0; j < n_cols; j++)
129 |             {
130 |                 p += matrix[i * n_cols + j] * vector[j];
131 |             }
132 | 
133 |             if (p != final_product[i])
134 |             {
135 |                 success = 0;
136 |             }
137 |         }
138 | 
139 |         if (success)
140 |         {
141 |             printf("SUCCESS!\n");
142 |         }
143 |         else
144 |         {
145 |             printf("Improvement needed!\n");
146 |         }
147 |     }
148 | 
149 |     /* Clean up and exit */
150 | 
151 |     if (rank == 0)
152 |     {
153 |         free(final_product);
154 |         free(matrix);
155 |     }
156 |     free(displ_rows);
157 |     free(count_rows);
158 |     free(displs);
159 |     free(counts);
160 |     free(local_product);
161 |     free(row_vectors);
162 |     free(vector);
163 | 
164 |     MPI_Finalize();
165 | 
166 |     return 0;
167 | }
168 | 


--------------------------------------------------------------------------------
/content/setup.rst:
--------------------------------------------------------------------------------
  1 | Setting up your system
  2 | ======================
  3 | 
  4 | In order to follow this workshop, you will need access to compilers
  5 | and MPI libraries. You can either use a cluster or set things up on
  6 | your local computer - the instructions here are for installing on your
  7 | own computer.
  8 | 
  9 | We recommend that participants create an isolated software environment
 10 | on their computer and install a C compiler along with MPI libraries
 11 | inside that environment. Root-level system installation is also
 12 | possible but will not be covered here due to the risk of various
 13 | conflicts (or worse).
 14 | 
 15 | These instructions are based on installing compilers and MPI via the `Conda
 16 | package and enviroment manager <https://docs.conda.io/en/latest/>`_, as it
 17 | provides a convenient way to install binary packages in an isolated software
 18 | environment.
 19 | 
 20 | Operating systems
 21 | ^^^^^^^^^^^^^^^^^
 22 | 
 23 | The following steps are appropriate for Linux and MacOS systems. For
 24 | Windows, it is necessary to first install the Windows Subsystem for
 25 | Linux (see these `installation instructions for WSL
 26 | <https://docs.microsoft.com/en-us/windows/wsl/install-win10>`_).
 27 | Installing compilers and MPI natively on Windows is also possible
 28 | through `Cygwin <https://www.cygwin.com/>`_ and the Microsoft
 29 | Distribution of MPICH, but we recommend WSL which is available for
 30 | Windows 10 and later.
 31 | 
 32 | 
 33 | Installing conda
 34 | ^^^^^^^^^^^^^^^^
 35 | 
 36 | Begin by installing Miniconda:
 37 | 
 38 | 1. Download the 64-bit installer from `here
 39 |    <https://docs.conda.io/en/latest/miniconda.html>`_ for your operating system:
 40 | 
 41 |      - for MacOS and Linux, choose the bash installer
 42 |      - on Windows, open a Linux-WSL terminal and type: ``wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh``.  
 43 |        If wget is not a recognised command, first install it by ``sudo apt-get install wget`` (provide the password you chose when installing WSL).
 44 | 2. In a terminal, run the installer with ``bash Miniconda3-latest-<operating-system>-x86_64.sh``
 45 |    (replace with correct name of installer)
 46 | 3. Agree to the terms of conditions, specify the installation directory (the default is
 47 |    usually fine), and answer "yes" to the questions "Do you wish the installer to
 48 |    initialize Miniconda3 by running conda init?"
 49 | 
 50 | You now have miniconda and conda installed. Make sure that it works by
 51 | typing ``which conda`` and see that it points to where you installed
 52 | miniconda (you may have to open a new terminal first).
 53 | 
 54 | We recommend that you create an isolated conda environment (this is
 55 | good practice in software development)::
 56 | 
 57 |   $ conda create --name mpi
 58 |   $ conda activate mpi
 59 | 
 60 | This should create a new empty environment and activate it, which
 61 | might prepend your shell prompt with the name of the conda environment.
 62 | 
 63 | Installing a C compiler and MPI
 64 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 65 | 
 66 | Now install compilers and the OpenMPI
 67 | implementation of MPI::
 68 | 
 69 |   (mpi) $ conda install -c conda-forge compilers
 70 |   (mpi) $ conda install -c conda-forge openmpi
 71 | 
 72 | If you prefer MPICH over OpenMPI (or you experience problems with OpenMPI), you can
 73 | instead do::
 74 | 
 75 |   (mpi) $ conda install -c conda-forge compilers  
 76 |   (mpi) $ conda install -c conda-forge mpich
 77 | 
 78 | **Please also verify the installation.**
 79 | 
 80 | The following commands should give version numbers::
 81 | 
 82 |    (mpi) $ mpicc --version
 83 |    (mpi) $ mpirun --version  
 84 | 
 85 | With OpenMPI you can also try the ``-showme`` flag to see what the ``mpicc``
 86 | compiler wrapper does under the hood::
 87 | 
 88 |    (mpi) $ mpicc -showme
 89 | 
 90 | To compile an MPI code `hello_mpi.c`, you should now be able to do::
 91 | 
 92 |   (mpi) $ mpicc -o hello_mpi.x hello_mpi.c
 93 |   (mpi) $ mpirun -n 2 hello_mpi.x
 94 |   
 95 | To compile with OpenMP support for hybrid MPI+OpenMP codes, you need
 96 | to add the ``-fopenmp`` flag::
 97 | 
 98 |   (mpi) $ mpicc -fopenmp -o hello_omp_mpi.x hello_omp_mpi.c
 99 |   (mpi) $ export OMP_NUM_THREADS=2
100 |   (mpi) $ mpirun -n 2 hello_omp_mpi.x
101 | 
102 | You *might* also need to explicitly link against the OpenMP runtime library.
103 | 


--------------------------------------------------------------------------------
/content/code/day-2/00_scatter-and-gather/scatter-and-gather-3.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <time.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | 
  6 | #include <mpi.h>
  7 | 
  8 | int main(int argc, char *argv[])
  9 | {
 10 |     /* Initialize the MPI environment and report */
 11 | 
 12 |     MPI_Init(&argc, &argv);
 13 | 
 14 |     MPI_Comm comm = MPI_COMM_WORLD;
 15 | 
 16 |     int rank, size;
 17 |     MPI_Comm_rank(comm, &rank);
 18 |     MPI_Comm_size(comm, &size);
 19 | 
 20 |     /* Prepare the data */
 21 | 
 22 |     const int n_elements = 111323;
 23 | 
 24 |     double* vector_a;
 25 |     double* vector_b;
 26 |     int i;
 27 | 
 28 |     if (rank == 0)
 29 |     {
 30 |         vector_a = (double*)(malloc(sizeof(double) * n_elements));
 31 |         vector_b = (double*)(malloc(sizeof(double) * n_elements));
 32 | 
 33 |         for (i = 0; i < n_elements; i++)
 34 |         {
 35 |             /* FIXME: assign values for elements in vector_a and vector_b */
 36 |             vector_a[i] = ... ;
 37 |             vector_b[i] = ... ;
 38 |         }
 39 |     }
 40 | 
 41 |     /* Do the scatter */
 42 | 
 43 |     int n_local_elements = n_elements / size;
 44 | 
 45 |     /* FIXME: complete malloc for local vectors */
 46 |     double* local_vector_a = (double*)(malloc( ... ));
 47 |     double* local_vector_b = (double*)(malloc( ... ));
 48 | 
 49 |     /* FIXME: complete MPI_Scatter */
 50 |     MPI_Scatter( ... );
 51 |     MPI_Scatter( ... );
 52 | 
 53 |     /* Do local computation and record timing */
 54 | 
 55 |     struct timespec ts;
 56 | 
 57 |     timespec_get(&ts, TIME_UTC);
 58 |     double t0 = (double)(ts.tv_sec) + (double)(ts.tv_nsec) * 1e-9;
 59 | 
 60 |     double local_product = 0.0;
 61 | 
 62 |     for (i = 0; i < n_local_elements; i++)
 63 |     {
 64 |         /* FIXME: complete calculation of local product */
 65 |         local_product += ... ;
 66 |     }
 67 | 
 68 |     if (rank == 0)
 69 |     {
 70 |         for (i = n_local_elements * size; i < n_elements; i++)
 71 |         {
 72 |             /* FIXME: take care of remaining elements on rank 0, 
 73 |              *        in case number of elements is not divisible
 74 |              *        by number of processes */
 75 |             local_product += ... ;
 76 |         }
 77 |     }
 78 | 
 79 |     timespec_get(&ts, TIME_UTC);
 80 |     double t1 = (double)(ts.tv_sec) + (double)(ts.tv_nsec) * 1e-9;
 81 | 
 82 |     /* Do the gather */
 83 | 
 84 |     double dt = t1 - t0;
 85 |     double* elapsed_time;
 86 | 
 87 |     if (rank == 0)
 88 |     {
 89 |         elapsed_time = (double*)(malloc(sizeof(double) * size));
 90 |     }
 91 | 
 92 |     /* FIXME: complete MPI_Gather of elapsed time */
 93 |     MPI_Gather( ... );
 94 | 
 95 |     /* Do the reduce */
 96 | 
 97 |     double final_product;
 98 | 
 99 |     /* FIXME: complete MPI_Reduce of product */
100 |     MPI_Reduce( ... );
101 | 
102 |     if (rank == 0)
103 |     {
104 |         double ref_product = 0.0;
105 |         for (i = 0; i < n_elements; i++)
106 |         {
107 |             /* FIXME: complete calculation of reference */
108 |             ref_product += vector_a[i] * vector_b[i];
109 |         }
110 | 
111 |         double diff = fabs(ref_product - final_product);
112 |         printf("diff = %.6e\n", diff);
113 | 
114 |         if (diff < 1.0e-8)
115 |         {
116 |             printf("SUCCESS!\n");
117 |         }
118 |         else
119 |         {
120 |             printf("Improvement needed!\n");
121 |         }
122 | 
123 |         double dt_sum = 0.0;
124 |         double max_dt = 0.0;
125 |         for (i = 0; i < size; i++)
126 |         {
127 |             /* FIXME: calculate sum of dt and find maximum value of dt */
128 |             dt_sum += ... ;
129 |             if ( ... )
130 |             {
131 |                 max_dt = ... ;
132 |             }
133 |         }
134 | 
135 |         double load_imb = 1.0 - (dt_sum / size) / max_dt;
136 |         printf("Time spent in computation: %.1f microseconds\n", max_dt * 1e+6);
137 |         printf("Load imbalance of computation: %.1f%%\n", load_imb * 100.0);
138 |     }
139 | 
140 |     /* Clean up and exit */
141 | 
142 |     if (rank == 0)
143 |     {
144 |         free(vector_a);
145 |         free(vector_b);
146 |         free(elapsed_time);
147 |     }
148 |     free(local_vector_a);
149 |     free(local_vector_b);
150 | 
151 |     MPI_Finalize();
152 | 
153 |     return 0;
154 | }
155 | 


--------------------------------------------------------------------------------
/content/code/day-2/03_scatterv-and-gatherv/scatterv-and-gatherv.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include <mpi.h>
  5 | 
  6 | int main(int argc, char *argv[])
  7 | {
  8 |     /* Initialize the MPI environment and report */
  9 | 
 10 |     MPI_Init(&argc, &argv);
 11 | 
 12 |     MPI_Comm comm = MPI_COMM_WORLD;
 13 | 
 14 |     int rank, size;
 15 |     MPI_Comm_rank(comm, &rank);
 16 |     MPI_Comm_size(comm, &size);
 17 | 
 18 |     /* Prepare the matrix and vector */
 19 | 
 20 |     const int n_rows = 1559;
 21 |     const int n_cols = 179;
 22 |     int i, j;
 23 | 
 24 |     double* matrix;
 25 |     double* vector = (double*)(malloc(sizeof(double) * n_cols));
 26 | 
 27 |     if (rank == 0)
 28 |     {
 29 |         matrix = (double*)(malloc(sizeof(double) * n_rows * n_cols));
 30 | 
 31 |         for (i = 0; i < n_rows; i++)
 32 |         {
 33 |             for (j = 0; j < n_cols; j++)
 34 |             {
 35 |                 /* FIXME: assign values for elements in matrix */
 36 |                 matrix[i * n_cols + j] = ...;
 37 |             }
 38 |         }
 39 | 
 40 |         for (j = 0; j < n_cols; j++)
 41 |         {
 42 |             /* FIXME: assign values for elements in vector */
 43 |             vector[j] = ...;
 44 |         }
 45 |     }
 46 | 
 47 |     /* Do the scatterv */
 48 | 
 49 |     int* counts = (int*)(malloc(sizeof(int) * size));
 50 |     int* displs = (int*)(malloc(sizeof(int) * size));
 51 | 
 52 |     int ave = n_rows / size;
 53 |     int rem = n_rows % size;
 54 | 
 55 |     for (i = 0; i < size; i++)
 56 |     {
 57 |         if (i < rem)
 58 |         {
 59 |             /* FIXME: determine counts */
 60 |             counts[i] = ...;
 61 |         }
 62 |         else
 63 |         {
 64 |             /* FIXME: determine counts */
 65 |             counts[i] = ...;
 66 |         }
 67 | 
 68 |         if (i == 0)
 69 |         {
 70 |             displs[i] = 0;
 71 |         }
 72 |         else
 73 |         {
 74 |             displs[i] = displs[i-1] + counts[i-1];
 75 |         }
 76 |     }
 77 | 
 78 |     double* row_vectors = (double*)(malloc(sizeof(double) * counts[rank]));
 79 | 
 80 |     /* FIXME: complete MPI_Scatterv */
 81 |     MPI_Scatterv( ... );
 82 | 
 83 |     /* Do the bcast */
 84 | 
 85 |     MPI_Bcast(vector, n_cols, MPI_DOUBLE, 0, comm);
 86 | 
 87 |     /* Do matrix vector multiplication */
 88 | 
 89 |     double* local_product = (double*)(malloc(sizeof(double) * (counts[rank] / n_cols)));
 90 | 
 91 |     for (i = 0; i < counts[rank] / n_cols; i++)
 92 |     {
 93 |         double p = 0.0;
 94 | 
 95 |         for (j = 0; j < n_cols; j++)
 96 |         {
 97 |             p += row_vectors[i * n_cols + j] * vector[j];
 98 |         }
 99 | 
100 |         local_product[i] = p;
101 |     }
102 | 
103 |     /* Do the gatherv */
104 | 
105 |     double* final_product;
106 | 
107 |     if (rank == 0)
108 |     {
109 |         final_product = (double*)(malloc(sizeof(double) * n_rows));
110 |     }
111 | 
112 |     int* count_rows = (int*)(malloc(sizeof(int) * size));
113 |     int* displ_rows = (int*)(malloc(sizeof(int) * size));
114 | 
115 |     for (i = 0; i < size; i++)
116 |     {
117 |         count_rows[i] = counts[i] / n_cols;
118 |         displ_rows[i] = displs[i] / n_cols;
119 |     }
120 | 
121 |     /* FIXME: complete MPI_Gatherv */
122 |     MPI_Gatherv( ... );
123 | 
124 |     /* Check the result */
125 | 
126 |     if (rank == 0)
127 |     {
128 |         int success = 1;
129 | 
130 |         for (i = 0; i < n_rows; i++)
131 |         {
132 |             double p = 0.0;
133 | 
134 |             for (j = 0; j < n_cols; j++)
135 |             {
136 |                 p += matrix[i * n_cols + j] * vector[j];
137 |             }
138 | 
139 |             if (p != final_product[i])
140 |             {
141 |                 success = 0;
142 |             }
143 |         }
144 | 
145 |         if (success)
146 |         {
147 |             printf("SUCCESS!\n");
148 |         }
149 |         else
150 |         {
151 |             printf("Improvement needed!\n");
152 |         }
153 |     }
154 | 
155 |     /* Clean up and exit */
156 | 
157 |     if (rank == 0)
158 |     {
159 |         free(final_product);
160 |         free(matrix);
161 |     }
162 |     free(displ_rows);
163 |     free(count_rows);
164 |     free(displs);
165 |     free(counts);
166 |     free(local_product);
167 |     free(row_vectors);
168 |     free(vector);
169 | 
170 |     MPI_Finalize();
171 | 
172 |     return 0;
173 | }
174 | 


--------------------------------------------------------------------------------
/.github/workflows/sphinx.yml:
--------------------------------------------------------------------------------
  1 | # From: https://github.com/rkdarst/sphinx-actions-test/blob/master/.github/workflows/sphinx-build.yml
  2 | 
  3 | name: sphinx
  4 | on: [push, pull_request]
  5 | 
  6 | # If these SPHINXOPTS are enabled, then be strict about the builds and
  7 | # fail on any warnings
  8 | #env:
  9 | #  SPHINXOPTS: "-W --keep-going -T"
 10 | 
 11 | 
 12 | jobs:
 13 |   build-and-deploy:
 14 |     name: Build and gh-pages
 15 |     runs-on: ubuntu-latest
 16 |     steps:
 17 |       # https://github.com/marketplace/actions/checkout
 18 |       - uses: actions/checkout@v2
 19 |       # https://github.com/marketplace/actions/setup-python
 20 |       # ^-- This gives info on matrix testing.
 21 |       - name: Install Python
 22 |         uses: actions/setup-python@v1
 23 |         with:
 24 |           python-version: 3.8
 25 |       - name: Fetch all refs
 26 |         run: |
 27 |           git fetch
 28 |       # I don't know where the "run" thing is documented.
 29 |       - name: Install dependencies
 30 |         run: |
 31 |           pip install -r requirements.txt
 32 |       - name: Debugging information
 33 |         run: |
 34 |           echo "github.ref:" ${{github.ref}}
 35 |           echo "github.event_name:" ${{github.event_name}}
 36 |           echo "github.head_ref:" ${{github.head_ref}}
 37 |           echo "github.base_ref:" ${{github.base_ref}}
 38 |           set -x
 39 |           git rev-parse --abbrev-ref HEAD
 40 |           git branch
 41 |           git branch -a
 42 |           git remote -v
 43 |           python -V
 44 |           pip list --not-required
 45 |           pip list
 46 | 
 47 |       # Build
 48 |       - uses: ammaraskar/sphinx-problem-matcher@master
 49 |       - name: Build Sphinx docs
 50 |         run: |
 51 |           make dirhtml
 52 |           sed -i 's/url_root="#"/url_root=""/' _build/dirhtml/index.html || true
 53 | 
 54 | 
 55 |       # The following supports building all branches and combining on
 56 |       # gh-pages
 57 | 
 58 |       # Clone and set up the old gh-pages branch
 59 |       - name: Clone old gh-pages
 60 |         if: ${{ github.event_name == 'push' }}
 61 |         run: |
 62 |           set -x
 63 |           git fetch
 64 |           ( git branch gh-pages remotes/origin/gh-pages && git clone . --branch=gh-pages _gh-pages/ ) || mkdir _gh-pages
 65 |           rm -rf _gh-pages/.git/
 66 |           mkdir -p _gh-pages/branch/
 67 |       # If a push and master, copy build to _gh-pages/ as the "main"
 68 |       # deployment.
 69 |       - name: Copy new build (master)
 70 |         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
 71 |         run: |
 72 |           set -x
 73 |           # Delete everything under _gh-pages/ that is from the
 74 |           # primary branch deployment.  Eicludes the other branches
 75 |           # _gh-pages/branch-* paths, and not including
 76 |           # _gh-pages itself.
 77 |           find _gh-pages/ -mindepth 1 ! -path '_gh-pages/branch*' -delete
 78 |           rsync -a _build/dirhtml/ _gh-pages/
 79 |       # If a push and not on master, then copy the build to
 80 |       # _gh-pages/branch/$brname (transforming '/' into '--')
 81 |       - name: Copy new build (branch)
 82 |         if: ${{ github.event_name == 'push' && github.ref != 'refs/heads/main' }}
 83 |         run: |
 84 |           set -x
 85 |           #brname=$(git rev-parse --abbrev-ref HEAD)
 86 |           brname="${{github.ref}}"
 87 |           brname="${brname##refs/heads/}"
 88 |           brdir=${brname//\//--}   # replace '/' with '--'
 89 |           rm -rf   _gh-pages/branch/${brdir}
 90 |           rsync -a _build/dirhtml/ _gh-pages/branch/${brdir}
 91 |       # Go through each branch in _gh-pages/branch/, if it's not a
 92 |       # ref, then delete it.
 93 |       - name: Delete old feature branches
 94 |         if: ${{ github.event_name == 'push' }}
 95 |         run: |
 96 |           set -x
 97 |           for brdir in `ls _gh-pages/branch/` ; do
 98 |               brname=${brdir//--/\/}   # replace '--' with '/'
 99 |               if ! git show-ref remotes/origin/$brname ; then
100 |                   echo "Removing $brdir"
101 |                   rm -r _gh-pages/branch/$brdir/
102 |               fi
103 |           done
104 | 
105 |       # Deploy
106 |       # https://github.com/peaceiris/actions-gh-pages
107 |       - name: Deploy
108 |         uses: peaceiris/actions-gh-pages@v3
109 |         if: ${{ github.event_name == 'push' }}
110 |         #if: ${{ success() && github.event_name == 'push' && github.ref == 'refs/heads/main' }}
111 |         with:
112 |           publish_branch: gh-pages
113 |           github_token: ${{ secrets.GITHUB_TOKEN }}
114 |           publish_dir: _gh-pages/
115 |           force_orphan: true
116 | 


--------------------------------------------------------------------------------
/content/code/day-1/02_compute-pi/pi-monte-carlo.c:
--------------------------------------------------------------------------------
  1 | /* adapted from:
  2 |  * https://www.mcs.anl.gov/research/projects/mpi/usingmpi/examples-usingmpi/simplempi/monte-ex_c.html
  3 |  */
  4 | 
  5 | #include <math.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #include <mpi.h>
 10 | 
 11 | #define PI 3.141592653589793238462643
 12 | 
 13 | #define CHUNKSIZE 1000
 14 | 
 15 | /* message tags */
 16 | #define REQUEST 1
 17 | #define REPLY 2
 18 | 
 19 | int main(int argc, char *argv[]) {
 20 |   // number of Monte Carlo samples
 21 |   int n_samples;
 22 |   // counter for the number of samples inside and outside the circle
 23 |   int in, out;
 24 |   // total tally of samples inside and outside the circle
 25 |   int totalin, totalout;
 26 |   // coordinates of the random point
 27 |   double x, y;
 28 |   // current estimate of pi
 29 |   double Pi;
 30 |   // error and user-provided threshold
 31 |   double error, epsilon;
 32 |   // whether user-provided threshold was met
 33 |   int done;
 34 |   // random data
 35 |   double rands[CHUNKSIZE];
 36 |   int size, rank, rng;
 37 |   int request;
 38 |   int ranks[1];
 39 |   MPI_Status status;
 40 | 
 41 |   MPI_Init(&argc, &argv);
 42 |   MPI_Comm world = MPI_COMM_WORLD;
 43 | 
 44 |   MPI_Comm_size(world, &size);
 45 |   MPI_Comm_rank(world, &rank);
 46 | 
 47 |   // we use the last process as the random number server
 48 |   rng = size - 1;
 49 | 
 50 |   // read user input
 51 |   if (rank == 0) {
 52 |     if (argc < 2) {
 53 |       fprintf(stderr, "Usage: %s epsilon\n", argv[0]);
 54 |       MPI_Abort(world, 1);
 55 |     }
 56 |     sscanf(argv[1], "%lf", &epsilon);
 57 |   }
 58 | 
 59 |   // let every process know what the threshold is
 60 |   // the collective operation will be discussed later on
 61 |   MPI_Bcast(&epsilon, 1, MPI_DOUBLE, 0, world);
 62 | 
 63 |   /* FIXME create checkers group */
 64 | 
 65 |   /* FIXME create checkers communicator */
 66 | 
 67 |   // handle the random number generation
 68 |   if (rank == rng) { /* I am the random number generator */
 69 |     // listen for requests from any source
 70 |     do {
 71 |       /* FIXME receive a request for random data  */
 72 |       MPI_Recv(&request, 1, MPI_INT, .., .., .., &status);
 73 |       if (request) {
 74 |         for (int i = 0; i < CHUNKSIZE; ++i) {
 75 |           rands[i] = (double)rand() / RAND_MAX * 2.0 - 1.0;
 76 |         }
 77 |         /* FIXME send random data */
 78 |         MPI_Send(rands, .., .., .., .., ..);
 79 |       }
 80 |     } while (request > 0);
 81 |   } else { /* I am a checker process */
 82 |     // first thing, a checker process always requests fresh random data
 83 |     request = 1;
 84 |     done = in = out = 0;
 85 |     /* FIXME send a request for random data */
 86 |     MPI_Send(&request, 1, MPI_INT, .., .., ..);
 87 |     // set the number of samples processed to 0
 88 |     n_samples = 0;
 89 |     // check the random samples
 90 |     while (!done) {
 91 |       n_samples++;
 92 |       /* FIXME receive the random data */
 93 |       MPI_Recv(rands, .., .., .., .., .., ..);
 94 |       int i;
 95 |       for (i = 0; i < CHUNKSIZE-1; i+=2) {
 96 |         x = rands[i];
 97 |         y = rands[i+1];
 98 |         if (x * x + y * y < 1.0) {
 99 |           in++;
100 |         } else {
101 |           out++;
102 |         }
103 |       }
104 | 
105 |       // total tally of points inside the circle
106 |       // the collective operation will be discussed later on
107 |       MPI_Allreduce(&in, &totalin, 1, MPI_INT, MPI_SUM, checkers);
108 | 
109 |       // total tally of points outside the circle
110 |       // the collective operation will be discussed later on
111 |       MPI_Allreduce(&out, &totalout, 1, MPI_INT, MPI_SUM, checkers);
112 | 
113 |       // compute pi
114 |       Pi = (4.0 * totalin) / (totalin + totalout);
115 | 
116 |       // check error
117 |       error = fabs(Pi - PI);
118 | 
119 |       // are we done?
120 |       done = (error < epsilon || (totalin + totalout) > 100000000);
121 |       request = (done) ? 0 : 1;
122 | 
123 |       // print current estimate and send a new request
124 |       if (rank == 0) {
125 |         printf("\rpi = %23.20f", Pi);
126 |         /* FIXME send request for random data */
127 |         MPI_Send(&request, .., .., .., .., ..);
128 |       } else {
129 |         if (request) {
130 |           /* FIXME send request for random data */
131 |           MPI_Send(&request, .., .., .., .., ..);
132 |         }
133 |       }
134 |     }
135 | 
136 |     /* FIXME clean up communicator and group for checker processes */
137 |   }
138 | 
139 |   // print results
140 |   if (rank == 0) {
141 |     printf("\npoints: %d\nin: %d, out: %d, <ret> to exit\n", totalin + totalout,
142 |            totalin, totalout);
143 |     getchar();
144 |   }
145 | 
146 |   MPI_Finalize();
147 | 
148 |   return 0;
149 | }
150 | 


--------------------------------------------------------------------------------
/content/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | 
 16 | sys.path.insert(0, os.path.abspath("."))
 17 | 
 18 | # -- Project information -----------------------------------------------------
 19 | 
 20 | project = "Intermediate MPI"
 21 | copyright = "2020, EuroCC National Competence Centre Sweden"
 22 | author = "Mark Abraham, Roberto Di Remigio, Pedro Ojeda May, Xin Li, Kjartan Thor Wikfeldt"
 23 | github_user = "ENCCS"
 24 | github_repo_name = "intermediate-mpi"  # auto-detected from dirname if blank
 25 | github_version = "master"
 26 | conf_py_path = "/content/"  # with leading and trailing slash
 27 | 
 28 | # -- General configuration ---------------------------------------------------
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = [
 34 |     "sphinx.ext.autosectionlabel",
 35 |     # githubpages just adds a .nojekyll file
 36 |     "sphinx.ext.githubpages",
 37 |     "sphinx_lesson",
 38 |     #'sphinx.ext.intersphinx',
 39 |     "sphinxcontrib.bibtex",
 40 |     "sphinx.ext.todo",
 41 | ]
 42 | bibtex_bibfiles = ['bibliography.bib']
 43 | autosectionlabel_prefix_document = True
 44 | 
 45 | # Settings for myst_nb:
 46 | # https://myst-nb.readthedocs.io/en/latest/use/execute.html#triggering-notebook-execution
 47 | # jupyter_execute_notebooks = "off"
 48 | # jupyter_execute_notebooks = "auto"   # *only* execute if at least one output is missing.
 49 | # jupyter_execute_notebooks = "force"
 50 | jupyter_execute_notebooks = "cache"
 51 | 
 52 | # Add any paths that contain templates here, relative to this directory.
 53 | # templates_path = ['_templates']
 54 | 
 55 | # List of patterns, relative to source directory, that match files and
 56 | # directories to ignore when looking for source files.
 57 | # This pattern also affects html_static_path and html_extra_path.
 58 | exclude_patterns = [
 59 |     "README*",
 60 |     "_build",
 61 |     "Thumbs.db",
 62 |     ".DS_Store",
 63 |     "jupyter_execute",
 64 |     "*venv*",
 65 | ]
 66 | 
 67 | 
 68 | # -- Options for HTML output -------------------------------------------------
 69 | 
 70 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 71 | # a list of builtin themes.
 72 | #
 73 | html_theme = "sphinx_rtd_theme"
 74 | html_logo = "img/ENCCS.jpg"
 75 | html_favicon = "img/favicon.ico"
 76 | html_title = project
 77 | 
 78 | # Add any paths that contain custom static files (such as style sheets) here,
 79 | # relative to this directory. They are copied after the builtin static files,
 80 | # so a file named "default.css" will overwrite the builtin "default.css".
 81 | html_static_path = ["_static"]
 82 | 
 83 | # HTML context:
 84 | from os.path import dirname, realpath, basename
 85 | 
 86 | html_context = {
 87 |     "display_github": True,
 88 |     "github_user": github_user,
 89 |     # Auto-detect directory name.  This can break, but
 90 |     # useful as a default.
 91 |     "github_repo": github_repo_name or basename(dirname(realpath(__file__))),
 92 |     "github_version": github_version,
 93 |     "conf_py_path": conf_py_path,
 94 | }
 95 | 
 96 | todo_include_todos = True
 97 | 
 98 | # Intersphinx mapping.  For example, with this you can use
 99 | # :py:mod:`multiprocessing` to link straight to the Python docs of that module.
100 | # List all available references:
101 | #   python -msphinx.ext.intersphinx https://docs.python.org/3/objects.inv
102 | # intersphinx_mapping = {
103 | #    #'python': ('https://docs.python.org/3', None),
104 | #    #'sphinx': ('https://www.sphinx-doc.org/', None),
105 | #    }
106 | 
107 | # Our own customisation
108 | from custom import MPI_glossary, DIRECTIVES
109 | 
110 | 
111 | # the epilog
112 | rst_epilog = f"""
113 | {MPI_glossary()}
114 | 
115 | .. role:: red
116 | .. role:: blue
117 | """
118 | 
119 | 
120 | def setup(app):
121 |     for obj in DIRECTIVES:
122 |         app.add_directive(obj.get_cssname(), obj)
123 |     app.add_css_file("overrides.css")
124 |     
125 | import os
126 | if os.environ.get('GITHUB_REF', '') == 'refs/heads/main':
127 |     html_js_files = [
128 |         ('https://plausible.io/js/script.js', {"data-domain": "enccs.github.io/intermediate-mpi", "defer": "defer"}),
129 |     ]    
130 | 


--------------------------------------------------------------------------------
/content/code/day-2/05_overlap/non-blocking-communication-overlap.c:
--------------------------------------------------------------------------------
  1 | #include "mpi.h"
  2 | #include <stdio.h>
  3 | 
  4 | int next_working_data_set[6][8];
  5 | void compute_row(int row_index, int input[6][8], int output[6][8])
  6 | {
  7 |     for (int j = 0; j < 8; j = j + 1)
  8 |     {
  9 |         /* Here is the 5-point stencil */
 10 |         const int right_column_index = (j + 1) % 8;
 11 |         const int left_column_index = (j + 8 - 1) % 8;
 12 |         const int top_row_index = row_index-1;
 13 |         const int bottom_row_index = row_index+1;
 14 |         output[row_index][j] = (input[row_index][j] +
 15 |                                 input[row_index][left_column_index] +
 16 |                                 input[row_index][right_column_index] +
 17 |                                 input[top_row_index][j] +
 18 |                                 input[bottom_row_index][j]);
 19 |     }
 20 | }
 21 | 
 22 | int main(int argc, char **argv)
 23 | {
 24 |     /* Initialize the MPI environment and check */
 25 |     MPI_Init(&argc, &argv);
 26 |     MPI_Comm comm = MPI_COMM_WORLD;
 27 |     int rank, size;
 28 |     MPI_Comm_rank(comm, &rank);
 29 |     MPI_Comm_size(comm, &size);
 30 |     if (size != 2)
 31 |     {
 32 |         if (rank == 0)
 33 |         {
 34 |             printf("Only two ranks is supported for this exercise, "
 35 |                     "please re-run with two ranks\n");
 36 |         }
 37 |         MPI_Finalize();
 38 |         return 0;
 39 |     }
 40 | 
 41 |     /* Prepare the initial values for this process */
 42 |     int local_data_set[4][8];
 43 |     printf("Local data set on rank %d was:\n", rank);
 44 |     for (int i = 0; i < 4; i = i + 1)
 45 |     {
 46 |         printf(" [ ");
 47 |         for (int j = 0; j < 8; j = j + 1)
 48 |         {
 49 |             /* Make sure the local data on each rank is different, so
 50 |              * that we see the communication works properly. */
 51 |             local_data_set[i][j] = 10*(rank + 1);
 52 |             if (j != 0)
 53 |             {
 54 |                 printf(", ");
 55 |             }
 56 |             printf("%3d", local_data_set[i][j]);
 57 |         }
 58 |         printf(" ]\n");
 59 |     }
 60 |     int working_data_set[6][8];
 61 |     for (int i = 0; i < 4; i = i + 1)
 62 |     {
 63 |         for (int j = 0; j < 8; j = j + 1)
 64 |         {
 65 |             /* Initialize the local part of the working data set */
 66 |             working_data_set[i+1][j] = local_data_set[i][j];
 67 |         }
 68 |     }
 69 | 
 70 |     int next_working_data_set[6][8];
 71 | 
 72 |     /* ==== CHALLENGE ====
 73 |      *
 74 |      * Uncomment and fix the MPI calls below to make this code work!
 75 |      */
 76 | 
 77 |     int send_up_tag = 0, send_down_tag = 1;
 78 |     /* Prepare to receive the halo data */
 79 |     int source_rank = size-rank-1;
 80 |     MPI_Request sent_from_source[2];
 81 |     /* MPI_xxx(xxx); */
 82 |     /* MPI_xxx(xxx); */
 83 | 
 84 |     /* Prepare to send the border data */
 85 |     int destination_rank = size-rank-1;
 86 |     MPI_Request sent_to_destination[2];
 87 |     /* MPI_xxx(xxx); */
 88 |     /* MPI_xxx(xxx); */
 89 | 
 90 |     /* Do the local computation */
 91 |     compute_row(2, working_data_set, next_working_data_set);
 92 |     compute_row(3, working_data_set, next_working_data_set);
 93 | 
 94 |     /* Wait for the receives to complete */
 95 |     /* MPI_xxx(xxx); */
 96 |     /* MPI_xxx(xxx); */
 97 |     
 98 |     /* Do the non-local computation */
 99 |     compute_row(1, working_data_set, next_working_data_set);
100 |     compute_row(4, working_data_set, next_working_data_set);
101 | 
102 |     /* Wait for the sends to complete */
103 |     /* MPI_xxx(xxx); */
104 |     /* MPI_xxx(xxx); */
105 | 
106 |     /* ==== CHALLENGE ====
107 |      *
108 |      * Can you simplify the above code with MPI_Waitall?
109 |      */
110 | 
111 |     printf("Next local data set on rank %d was:\n", rank);
112 |     for (int i = 1; i < 5; i = i + 1)
113 |     {
114 |         printf(" [ ");
115 |         for (int j = 0; j < 8; j = j + 1)
116 |         {
117 |             if (j != 0)
118 |             {
119 |                 printf(", ");
120 |             }
121 |             printf("%3d", next_working_data_set[i][j]);
122 |         }
123 |         printf(" ]\n");
124 |     }
125 | 
126 |     /* Report whether the code is correct */
127 |     int success = 1;
128 |     for (int i = 1; i < 5; i = i + 1)
129 |     {
130 |         int expected_data;
131 |         if (i == 1 || i == 4)
132 |         {
133 |             expected_data = (rank == 0) ? 60 : 90;
134 |         }
135 |         else
136 |         {
137 |             expected_data = (rank == 0) ? 50 : 100;
138 |         }
139 |         for (int j = 0; j < 8; j = j + 1)
140 |         {
141 |             success = success && (next_working_data_set[i][j] == expected_data);
142 |         }
143 |     }
144 |     if (success)
145 |     {
146 |         printf("SUCCESS on rank %d!\n", rank);
147 |     }
148 |     else
149 |     {
150 |         printf("Improvement needed before rank %d can report success!\n", rank);
151 |     }
152 | 
153 |     /* Clean up and exit */
154 |     MPI_Finalize();
155 |     return 0;
156 | }
157 | 


--------------------------------------------------------------------------------
/content/code/day-2/05_overlap/solution/non-blocking-communication-overlap-solution.c:
--------------------------------------------------------------------------------
  1 | #include "mpi.h"
  2 | #include <stdio.h>
  3 | 
  4 | int next_working_data_set[6][8];
  5 | void compute_row(int row_index, int input[6][8], int output[6][8])
  6 | {
  7 |     for (int j = 0; j < 8; j = j + 1)
  8 |     {
  9 |         /* Here is the 5-point stencil */
 10 |         const int right_column_index = (j + 1) % 8;
 11 |         const int left_column_index = (j + 8 - 1) % 8;
 12 |         const int top_row_index = row_index-1;
 13 |         const int bottom_row_index = row_index+1;
 14 |         output[row_index][j] = (input[row_index][j] +
 15 |                                 input[row_index][left_column_index] +
 16 |                                 input[row_index][right_column_index] +
 17 |                                 input[top_row_index][j] +
 18 |                                 input[bottom_row_index][j]);
 19 |     }
 20 | }
 21 | 
 22 | int main(int argc, char **argv)
 23 | {
 24 |     /* Initialize the MPI environment and check */
 25 |     MPI_Init(&argc, &argv);
 26 |     MPI_Comm comm = MPI_COMM_WORLD;
 27 |     int rank, size;
 28 |     MPI_Comm_rank(comm, &rank);
 29 |     MPI_Comm_size(comm, &size);
 30 |     if (size != 2)
 31 |     {
 32 |         if (rank == 0)
 33 |         {
 34 |             printf("Only two ranks is supported for this exercise, "
 35 |                     "please re-run with two ranks\n");
 36 |         }
 37 |         MPI_Finalize();
 38 |         return 0;
 39 |     }
 40 | 
 41 |     /* Prepare the initial values for this process */
 42 |     int local_data_set[4][8];
 43 |     printf("Local data set on rank %d was:\n", rank);
 44 |     for (int i = 0; i < 4; i = i + 1)
 45 |     {
 46 |         printf(" [ ");
 47 |         for (int j = 0; j < 8; j = j + 1)
 48 |         {
 49 |             /* Make sure the local data on each rank is different, so
 50 |              * that we see the communication works properly. */
 51 |             local_data_set[i][j] = 10*(rank + 1);
 52 |             if (j != 0)
 53 |             {
 54 |                 printf(", ");
 55 |             }
 56 |             printf("%3d", local_data_set[i][j]);
 57 |         }
 58 |         printf(" ]\n");
 59 |     }
 60 |     int working_data_set[6][8];
 61 |     for (int i = 0; i < 4; i = i + 1)
 62 |     {
 63 |         for (int j = 0; j < 8; j = j + 1)
 64 |         {
 65 |             /* Initialize the local part of the working data set */
 66 |             working_data_set[i+1][j] = local_data_set[i][j];
 67 |         }
 68 |     }
 69 | 
 70 |     int next_working_data_set[6][8];
 71 | 
 72 |     int send_up_tag = 0, send_down_tag = 1;
 73 |     /* Prepare to receive the halo data */
 74 |     int source_rank = size-rank-1;
 75 |     MPI_Request sent_from_source[2];
 76 |     MPI_Irecv(working_data_set[5], 8, MPI_INT, source_rank, send_up_tag, comm, &sent_from_source[0]);
 77 |     MPI_Irecv(working_data_set[0], 8, MPI_INT, source_rank, send_down_tag, comm, &sent_from_source[1]);
 78 | 
 79 |     /* Prepare to send the border data */
 80 |     int destination_rank = size-rank-1;
 81 |     MPI_Request sent_to_destination[2];
 82 |     MPI_Isend(working_data_set[1], 8, MPI_INT, destination_rank, send_up_tag, comm, &sent_to_destination[0]);
 83 |     MPI_Isend(working_data_set[4], 8, MPI_INT, destination_rank, send_down_tag, comm, &sent_to_destination[1]);
 84 | 
 85 |     /* Do the local computation */
 86 |     compute_row(2, working_data_set, next_working_data_set);
 87 |     compute_row(3, working_data_set, next_working_data_set);
 88 | 
 89 |     /* Wait for the receives to complete */
 90 |     MPI_Wait(&sent_from_source[0], MPI_STATUS_IGNORE);
 91 |     MPI_Wait(&sent_from_source[1], MPI_STATUS_IGNORE);
 92 |     
 93 |     /* Do the non-local computation */
 94 |     compute_row(1, working_data_set, next_working_data_set);
 95 |     compute_row(4, working_data_set, next_working_data_set);
 96 | 
 97 |     /* Wait for the sends to complete */
 98 |     MPI_Wait(&sent_to_destination[0], MPI_STATUS_IGNORE);
 99 |     MPI_Wait(&sent_to_destination[1], MPI_STATUS_IGNORE);
100 | 
101 |     printf("Next local data set on rank %d was:\n", rank);
102 |     for (int i = 1; i < 5; i = i + 1)
103 |     {
104 |         printf(" [ ");
105 |         for (int j = 0; j < 8; j = j + 1)
106 |         {
107 |             if (j != 0)
108 |             {
109 |                 printf(", ");
110 |             }
111 |             printf("%3d", next_working_data_set[i][j]);
112 |         }
113 |         printf(" ]\n");
114 |     }
115 | 
116 |     /* Report whether the code is correct */
117 |     int success = 1;
118 |     for (int i = 1; i < 5; i = i + 1)
119 |     {
120 |         int expected_data;
121 |         if (i == 1 || i == 4)
122 |         {
123 |             expected_data = (rank == 0) ? 60 : 90;
124 |         }
125 |         else
126 |         {
127 |             expected_data = (rank == 0) ? 50 : 100;
128 |         }
129 |         for (int j = 0; j < 8; j = j + 1)
130 |         {
131 |             success = success && (next_working_data_set[i][j] == expected_data);
132 |         }
133 |     }
134 |     if (success)
135 |     {
136 |         printf("SUCCESS on rank %d!\n", rank);
137 |     }
138 |     else
139 |     {
140 |         printf("Improvement needed before rank %d can report success!\n", rank);
141 |     }
142 | 
143 |     /* Clean up and exit */
144 |     MPI_Finalize();
145 |     return 0;
146 | }
147 | 


--------------------------------------------------------------------------------
/content/code/day-1/02_compute-pi/solution/pi-monte-carlo.c:
--------------------------------------------------------------------------------
  1 | /* adapted from:
  2 |  * https://www.mcs.anl.gov/research/projects/mpi/usingmpi/examples-usingmpi/simplempi/monte-ex_c.html
  3 |  */
  4 | 
  5 | #include <math.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #include <mpi.h>
 10 | 
 11 | #define PI 3.141592653589793238462643
 12 | 
 13 | #define CHUNKSIZE 1000
 14 | 
 15 | /* message tags */
 16 | #define REQUEST 1
 17 | #define REPLY 2
 18 | 
 19 | int main(int argc, char *argv[]) {
 20 |     // number of Monte Carlo samples
 21 |     int n_samples;
 22 |     // counter for the number of samples inside and outside the circle
 23 |     int in, out;
 24 |     // total tally of samples inside and outside the circle
 25 |     int totalin, totalout;
 26 |     // coordinates of the random point
 27 |     double x, y;
 28 |     // current estimate of pi
 29 |     double Pi;
 30 |     // error and user-provided threshold
 31 |     double error, epsilon;
 32 |     // whether user-provided threshold was met
 33 |     int done;
 34 |     // random data
 35 |     double rands[CHUNKSIZE];
 36 |     int size, rank, rng_rank;
 37 |     int request;
 38 |     int excl_ranks[1];
 39 |     MPI_Status status;
 40 |  
 41 |     MPI_Init(&argc, &argv);
 42 |     MPI_Comm world = MPI_COMM_WORLD;
 43 |  
 44 |     MPI_Comm_size(world, &size);
 45 |     MPI_Comm_rank(world, &rank);
 46 |  
 47 |     // we use the last process as the random number server
 48 |     rng_rank = size - 1;
 49 |  
 50 |     // read user input
 51 |     if (rank == 0) {
 52 |         if (argc < 2) {
 53 |             fprintf(stderr, "Usage: %s epsilon\n", argv[0]);
 54 |             MPI_Abort(world, 1);
 55 |         }
 56 |         sscanf(argv[1], "%lf", &epsilon);
 57 |     }
 58 |  
 59 |     // let every process know what the threshold is
 60 |     // the collective operation will be discussed later on
 61 |     MPI_Bcast(&epsilon, 1, MPI_DOUBLE, 0, world);
 62 |  
 63 |     // create checkers group
 64 |     MPI_Group world_group, checkers_group;
 65 |     MPI_Comm_group(world, &world_group);
 66 |     excl_ranks[0] = rng_rank;
 67 |     MPI_Group_excl(world_group, 1, excl_ranks, &checkers_group);
 68 |  
 69 |     // create checkers communicator
 70 |     MPI_Comm checkers;
 71 |     MPI_Comm_create(world, checkers_group, &checkers);
 72 |     MPI_Group_free(&checkers_group);
 73 |  
 74 |     // handle the random number generation
 75 |     if (rank == rng_rank) { /* I am the random number generator */
 76 |         // listen for requests from any source
 77 |         do {
 78 |             MPI_Recv(&request, 1, MPI_INT, MPI_ANY_SOURCE, REQUEST, world, &status);
 79 |             if (request) {
 80 |                 for (int i = 0; i < CHUNKSIZE; ++i) {
 81 |                     rands[i] = (double)rand() / RAND_MAX * 2.0 - 1.0;
 82 |                 }
 83 |                 MPI_Send(rands, CHUNKSIZE, MPI_DOUBLE, status.MPI_SOURCE, REPLY, world);
 84 |             }
 85 |         } while (request > 0);
 86 |     } else { /* I am a checker process */
 87 |         // first thing, a checker process always requests fresh random data
 88 |         request = 1;
 89 |         done = in = out = 0;
 90 |         MPI_Send(&request, 1, MPI_INT, rng_rank, REQUEST, world);
 91 |         // set the number of samples processed to 0
 92 |         n_samples = 0;
 93 |         // check the random samples
 94 |         while (!done) {
 95 |             n_samples++;
 96 |             MPI_Recv(rands, CHUNKSIZE, MPI_DOUBLE, rng_rank, REPLY, world,
 97 |                      MPI_STATUS_IGNORE);
 98 |             int i;
 99 |             for (i = 0; i < CHUNKSIZE-1; i+=2) {
100 |                 x = rands[i];
101 |                 y = rands[i+1];
102 |                 if (x * x + y * y < 1.0) {
103 |                     in++;
104 |                 } else {
105 |                     out++;
106 |                 }
107 |             }
108 |            
109 |             // total tally of points inside the circle
110 |             // the collective operation will be discussed later on
111 |             MPI_Allreduce(&in, &totalin, 1, MPI_INT, MPI_SUM, checkers);
112 |            
113 |             // total tally of points outside the circle
114 |             // the collective operation will be discussed later on
115 |             MPI_Allreduce(&out, &totalout, 1, MPI_INT, MPI_SUM, checkers);
116 |            
117 |             // compute pi
118 |             Pi = (4.0 * totalin) / (totalin + totalout);
119 |            
120 |             // check error
121 |             error = fabs(Pi - PI);
122 |            
123 |             // are we done?
124 |             done = (error < epsilon || (totalin + totalout) > 100000000);
125 |             request = (done) ? 0 : 1;
126 |            
127 |             // print current estimate and send a new request
128 |             if (rank == 0) {
129 |                 printf("\rpi = %23.20f", Pi);
130 |                 MPI_Send(&request, 1, MPI_INT, rng_rank, REQUEST, world);
131 |             } else {
132 |                 if (request) {
133 |                     MPI_Send(&request, 1, MPI_INT, rng_rank, REQUEST, world);
134 |                 }
135 |             }
136 |         }
137 |        
138 |         // clean up!
139 |         MPI_Comm_free(&checkers);
140 |     }
141 |  
142 |     // print results
143 |     if (rank == 0) {
144 |         printf("\npoints: %d\nin: %d, out: %d, <ret> to exit\n", totalin + totalout,
145 |                totalin, totalout);
146 |         getchar();
147 |     }
148 |  
149 |     MPI_Finalize();
150 |  
151 |     return 0;
152 | }
153 | 


--------------------------------------------------------------------------------
/content/code/day-2/04_deadlock/non-blocking-communication-deadlock.c:
--------------------------------------------------------------------------------
  1 | #include "mpi.h"
  2 | #include <stdio.h>
  3 | 
  4 | int next_working_data_set[6][8];
  5 | void compute_row(int row_index, int input[6][8], int output[6][8])
  6 | {
  7 |     for (int j = 0; j < 8; j = j + 1)
  8 |     {
  9 |         /* Here is the 5-point stencil */
 10 |         const int right_column_index = (j + 1) % 8;
 11 |         const int left_column_index = (j + 8 - 1) % 8;
 12 |         const int top_row_index = row_index-1;
 13 |         const int bottom_row_index = row_index+1;
 14 |         output[row_index][j] = (input[row_index][j] +
 15 |                                 input[row_index][left_column_index] +
 16 |                                 input[row_index][right_column_index] +
 17 |                                 input[top_row_index][j] +
 18 |                                 input[bottom_row_index][j]);
 19 |     }
 20 | }
 21 | 
 22 | int main(int argc, char **argv)
 23 | {
 24 |     /* Initialize the MPI environment and check */
 25 |     MPI_Init(&argc, &argv);
 26 |     MPI_Comm comm = MPI_COMM_WORLD;
 27 |     int rank, size;
 28 |     MPI_Comm_rank(comm, &rank);
 29 |     MPI_Comm_size(comm, &size);
 30 |     if (size != 2)
 31 |     {
 32 |         if (rank == 0)
 33 |         {
 34 |             printf("Only two ranks is supported for this exercise, "
 35 |                     "please re-run with two ranks\n");
 36 |         }
 37 |         MPI_Finalize();
 38 |         return 0;
 39 |     }
 40 | 
 41 |     /* Prepare the initial values for this process */
 42 |     int local_data_set[4][8];
 43 |     printf("Local data set on rank %d was:\n", rank);
 44 |     for (int i = 0; i < 4; i = i + 1)
 45 |     {
 46 |         printf(" [ ");
 47 |         for (int j = 0; j < 8; j = j + 1)
 48 |         {
 49 |             /* Make sure the local data on each rank is different, so
 50 |              * that we see the communication works properly. */
 51 |             local_data_set[i][j] = 10*(rank + 1);
 52 |             if (j != 0)
 53 |             {
 54 |                 printf(", ");
 55 |             }
 56 |             printf("%3d", local_data_set[i][j]);
 57 |         }
 58 |         printf(" ]\n");
 59 |     }
 60 |     int working_data_set[6][8];
 61 |     for (int i = 0; i < 4; i = i + 1)
 62 |     {
 63 |         for (int j = 0; j < 8; j = j + 1)
 64 |         {
 65 |             /* Initialize the local part of the working data set */
 66 |             working_data_set[i+1][j] = local_data_set[i][j];
 67 |         }
 68 |     }
 69 | 
 70 |     int next_working_data_set[6][8];
 71 | 
 72 |     /* Do the local computation */
 73 |     compute_row(2, working_data_set, next_working_data_set);
 74 |     compute_row(3, working_data_set, next_working_data_set);
 75 | 
 76 |     /* ==== CHALLENGE ====
 77 |      *
 78 |      * This code *may* work, at the discretion of the MPI
 79 |      * implementation. Relying on it is unwise. The MPI implementation
 80 |      * is free to deliver the behavior of MPI_Ssend when MPI_Send is
 81 |      * called. Try MPI_Ssend and see the problem!
 82 |      */
 83 |     int send_up_tag = 0, send_down_tag = 1;
 84 |     /* Send the border data */
 85 |     int destination_rank = size-rank-1;
 86 |     MPI_Send(working_data_set[1], 8, MPI_INT, destination_rank, send_up_tag, comm);
 87 |     MPI_Send(working_data_set[4], 8, MPI_INT, destination_rank, send_down_tag, comm);
 88 | 
 89 |     /* Receive the halo data */
 90 |     int source_rank = size-rank-1;
 91 |     MPI_Recv(working_data_set[5], 8, MPI_INT, source_rank, send_up_tag, comm, MPI_STATUS_IGNORE);
 92 |     MPI_Recv(working_data_set[0], 8, MPI_INT, source_rank, send_down_tag, comm, MPI_STATUS_IGNORE);
 93 | 
 94 |     /* ===== CHALLENGE ====
 95 |      * Even when using MPI_Ssend, you can work around the problem
 96 |      * by ensuring that the two ranks do receives and sends in the
 97 |      * opposite order from each other. Try writing an if statement
 98 |      * that changes the order of the operations based on the rank
 99 |      * and make the code work!
100 |      */
101 | 
102 |     /* Do the non-local computation */
103 |     compute_row(1, working_data_set, next_working_data_set);
104 |     compute_row(4, working_data_set, next_working_data_set);
105 | 
106 |     printf("Next local data set on rank %d was:\n", rank);
107 |     for (int i = 1; i < 5; i = i + 1)
108 |     {
109 |         printf(" [ ");
110 |         for (int j = 0; j < 8; j = j + 1)
111 |         {
112 |             if (j != 0)
113 |             {
114 |                 printf(", ");
115 |             }
116 |             printf("%3d", next_working_data_set[i][j]);
117 |         }
118 |         printf(" ]\n");
119 |     }
120 | 
121 |     /* ==== CHALLENGE ====
122 |      *
123 |      * 
124 |      * implementation. Relying on it is unwise. The MPI implementation
125 |      * is free to deliver the behavior of MPI_Ssend when MPI_Send is
126 |      * called. Try MPI_Ssend and see the problem!
127 |      */
128 | 
129 |     /* Report whether the code is correct */
130 |     int success = 1;
131 |     for (int i = 1; i < 5; i = i + 1)
132 |     {
133 |         int expected_data;
134 |         if (i == 1 || i == 4)
135 |         {
136 |             expected_data = (rank == 0) ? 60 : 90;
137 |         }
138 |         else
139 |         {
140 |             expected_data = (rank == 0) ? 50 : 100;
141 |         }
142 |         for (int j = 0; j < 8; j = j + 1)
143 |         {
144 |             success = success && (next_working_data_set[i][j] == expected_data);
145 |         }
146 |     }
147 |     if (success)
148 |     {
149 |         printf("SUCCESS on rank %d!\n", rank);
150 |     }
151 |     else
152 |     {
153 |         printf("Improvement needed before rank %d can report success!\n", rank);
154 |     }
155 | 
156 |     /* Clean up and exit */
157 |     MPI_Finalize();
158 |     return 0;
159 | }
160 | 


--------------------------------------------------------------------------------
/content/guide.rst:
--------------------------------------------------------------------------------
  1 | Instructor's guide
  2 | ------------------
  3 | 
  4 | This training material covers intermediate-level aspects of MPI programming.
  5 | 
  6 | Learning outcomes
  7 | =================
  8 | 
  9 | - Communicators, groups, derived datatypes
 10 | - One-sided communication
 11 | - Collective communication, blocking and non-blocking
 12 | - MPI and threads
 13 | 
 14 | Tips
 15 | ====
 16 | 
 17 | - Append ``?plain=1`` to collapse the sidebar while presenting.
 18 |   Suggestion from `Richard Darst <https://github.com/coderefinery/sphinx-lesson/issues/53#issuecomment-741676048>`_.
 19 | 
 20 | Timing
 21 | ======
 22 | 
 23 | Second iteration: 2022-06-14/17
 24 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 25 | 
 26 | Second iteration of this workshop was given as 4 x (almost) 4 hours, that is 4
 27 | half days.
 28 | We ran the workshop in collaboration with Xin Li (PDC) and Pedro Ojeda May (HPC2N).
 29 | 
 30 | **Day 1 - Tuesday 14 June 2022 – Communicators, groups, derived datatypes**
 31 | 
 32 |   .. csv-table::
 33 |      :widths: auto
 34 |      :align: center
 35 |      :delim: ;
 36 | 
 37 |       9:00 -  9:10 ; Welcome and introduction to the training course
 38 |       9:10 -  9:50 ; **Communicators and groups**
 39 |       9:50 - 10:00 ; Break
 40 |      10:00 - 10:40 ; **Derived datatypes: pack-unpack**
 41 |      10:40 - 10:50 ; Break
 42 |      10:50 - 11:30 ; **Derived datatypes: ``MPI_Datatype``**
 43 |      11:30 - 11:40 ; Break
 44 |      11:40 - 12:20 ; **Simple collective communication**
 45 |      12:20 - 12:30 ; Wrap-up
 46 | 
 47 | 
 48 | **Day 2 - Wednesday 15 June 2022 - Collective communication (including nonblocking)**
 49 | 
 50 |   .. csv-table::
 51 |      :widths: auto
 52 |      :align: center
 53 |      :delim: ;
 54 | 
 55 |       9:00 -  9:10 ; What did we cover yesterday?
 56 |       9:10 - 09:55 ; **Scatter and gather**
 57 |       9:55 - 10:05 ; Break
 58 |      10:05 - 10:50 ; **Generalized forms of gather**
 59 |      10:50 - 11:00 ; Break
 60 |      11:00 - 12:10 ; **Non-blocking point-to-point**
 61 |      12:10 - 12:20 ; Wrap-up
 62 | 
 63 | 
 64 | **Day 3 - Thursday 16 June 2022 – Non-blocking and one-sided communication**
 65 | 
 66 |   .. csv-table::
 67 |      :widths: auto
 68 |      :align: center
 69 |      :delim: ;
 70 | 
 71 |       9:00 -  9:10 ; What did we cover yesterday?
 72 |       9:10 -  9:50 ; **Non-blocking collective communication**
 73 |       9:50 - 10:00 ; Break
 74 |      10:00 - 10:40 ; **One-sided communication: concepts**
 75 |      10:40 - 10:50 ; Break
 76 |      10:50 - 11:40 ; **One-sided communication: functions**
 77 |      11:40 - 12:00 ; Wrap-up
 78 | 
 79 | 
 80 | **Day 4 - Friday 17 June 2022 - MPI and threads**
 81 | 
 82 |   .. csv-table::
 83 |      :widths: auto
 84 |      :align: center
 85 |      :delim: ;
 86 | 
 87 |       9:00 -  9:10 ; What did we cover yesterday?
 88 |       9:10 - 10:00 ; **One-sided communication: synchronization**
 89 |      10:00 - 10:10 ; Break
 90 |      10:10 - 11:20 ; **Introducing MPI and threads** 
 91 |      11:20 - 11:30 ; Break
 92 |      11:30 - 12:10 ; **MPI and threads in practice**
 93 |      12:10 - 12:20 ; Wrap-up
 94 | 
 95 | 
 96 | First iteration: 2020-12-08/11
 97 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 98 | 
 99 | First iteration of this workshop was given as 4 x (almost) 4 hours, that is 4 half days.
100 | 
101 | We had two main 30-minutes lectures each day. Each lecture included type-alongs
102 | and was followed by ample time for questions and live-coding demos.  Each day
103 | was concluded with exercises and a wrap-up.
104 | 
105 | **Day 1 - Tuesday 8 December 2020 – Communicators, groups, derived datatypes**
106 | 
107 |   .. csv-table::
108 |      :widths: auto
109 |      :align: center
110 |      :delim: ;
111 | 
112 |       9:00 -  9:10 ; Welcome and introduction to the training course
113 |       9:10 - 10:30 ; **Using communicators in applications and libraries**
114 |      10:30 - 10:45 ; Break
115 |      10:45 - 11:25 ; **Handling derived datatypes**
116 |      11:25 - 12:20 ; Exercises
117 |      12:20 - 12:30 ; Wrap-up
118 | 
119 | 
120 | **Day 2 - Wednesday 9 December 2020 - Collective communication (including nonblocking)**
121 | 
122 |   .. csv-table::
123 |      :widths: auto
124 |      :align: center
125 |      :delim: ;
126 | 
127 |       9:00 -  9:10 ; What did we cover yesterday?
128 |       9:10 - 09:45 ; **Simple collective communication**
129 |       9:45 - 09:50 ; Break
130 |       9:50 - 10:25 ; **Scatter and gather**
131 |      10:30 - 10:35 ; Break
132 |      10:35 - 11:10 : **Generalized forms of gather**
133 |      11:10 - 11:20 ; Break
134 |      11:20 - 11:40 ; **Non-blocking point-to-point communication**
135 |      11:40 - 11:45 ; Break
136 |      11:45 - 12:20 ; **Non-blocking collective communication**
137 |      12:20 - 12:30 ; Wrap-up
138 | 
139 | 
140 | **Day 3 - Thursday 10 December 2020 – One-sided communication**
141 | 
142 |   .. csv-table::
143 |      :widths: auto
144 |      :align: center
145 |      :delim: ;
146 | 
147 |       9:00 -  9:10 ; What did we cover yesterday?
148 |       9:10 - 10:30 ; **The whys and hows of remote memory access in MPI**
149 |      10:30 - 10:45 ; Break
150 |      10:45 - 11:25 ; **RMA and synchronization**
151 |      11:25 - 12:20 ; Exercises
152 |      12:20 - 12:30 ; Wrap-up
153 | 
154 | 
155 | **Day 4 - Friday 11 December 2020 - MPI and threads**
156 | 
157 |   .. csv-table::
158 |      :widths: auto
159 |      :align: center
160 |      :delim: ;
161 | 
162 |       9:00 -  9:10 ; What did we cover yesterday?
163 |       9:10 -  9:40 ; **Why use MPI threading?**
164 |       9:40 -  9:50 ; Break
165 |       9:50 - 10:50 ; **How does MPI support threading?**
166 |      10:50 - 11:00 ; Break
167 |      11:00 - 11:30 ; **Using fork-join parallelism with MPI**
168 |      11:30 - 11:40 ; Break
169 |      11:40 - 12:10 ; **Using tasking with MPI**
170 |      12:10 - 12:20 ; **Tips for hybrid MPI+OpenMP**
171 |      12:20 - 12:30 ; Wrap-up
172 | 


--------------------------------------------------------------------------------
/content/code/day-2/04_deadlock/solution/non-blocking-communication-deadlock.c:
--------------------------------------------------------------------------------
  1 | #include "mpi.h"
  2 | #include <stdio.h>
  3 | 
  4 | int next_working_data_set[6][8];
  5 | void compute_row(int row_index, int input[6][8], int output[6][8])
  6 | {
  7 |     for (int j = 0; j < 8; j = j + 1)
  8 |     {
  9 |         /* Here is the 5-point stencil */
 10 |         const int right_column_index = (j + 1) % 8;
 11 |         const int left_column_index = (j + 8 - 1) % 8;
 12 |         const int top_row_index = row_index-1;
 13 |         const int bottom_row_index = row_index+1;
 14 |         output[row_index][j] = (input[row_index][j] +
 15 |                                 input[row_index][left_column_index] +
 16 |                                 input[row_index][right_column_index] +
 17 |                                 input[top_row_index][j] +
 18 |                                 input[bottom_row_index][j]);
 19 |     }
 20 | }
 21 | 
 22 | int main(int argc, char **argv)
 23 | {
 24 |     /* Initialize the MPI environment and check */
 25 |     MPI_Init(&argc, &argv);
 26 |     MPI_Comm comm = MPI_COMM_WORLD;
 27 |     int rank, size;
 28 |     MPI_Comm_rank(comm, &rank);
 29 |     MPI_Comm_size(comm, &size);
 30 |     if (size != 2)
 31 |     {
 32 |         if (rank == 0)
 33 |         {
 34 |             printf("Only two ranks is supported for this exercise, "
 35 |                     "please re-run with two ranks\n");
 36 |         }
 37 |         MPI_Finalize();
 38 |         return 0;
 39 |     }
 40 | 
 41 |     /* Prepare the initial values for this process */
 42 |     int local_data_set[4][8];
 43 |     printf("Local data set on rank %d was:\n", rank);
 44 |     for (int i = 0; i < 4; i = i + 1)
 45 |     {
 46 |         printf(" [ ");
 47 |         for (int j = 0; j < 8; j = j + 1)
 48 |         {
 49 |             /* Make sure the local data on each rank is different, so
 50 |              * that we see the communication works properly. */
 51 |             local_data_set[i][j] = 10*(rank + 1);
 52 |             if (j != 0)
 53 |             {
 54 |                 printf(", ");
 55 |             }
 56 |             printf("%3d", local_data_set[i][j]);
 57 |         }
 58 |         printf(" ]\n");
 59 |     }
 60 |     int working_data_set[6][8];
 61 |     for (int i = 0; i < 4; i = i + 1)
 62 |     {
 63 |         for (int j = 0; j < 8; j = j + 1)
 64 |         {
 65 |             /* Initialize the local part of the working data set */
 66 |             working_data_set[i+1][j] = local_data_set[i][j];
 67 |         }
 68 |     }
 69 | 
 70 |     int next_working_data_set[6][8];
 71 | 
 72 |     /* Do the local computation */
 73 |     compute_row(2, working_data_set, next_working_data_set);
 74 |     compute_row(3, working_data_set, next_working_data_set);
 75 | 
 76 |     /* Do sends and receives in the opposite order on the two ranks */
 77 |     if (rank == 0)
 78 |     {
 79 |         int send_up_tag = 0, send_down_tag = 1;
 80 |         /* Send the border data */
 81 |         int destination_rank = size-rank-1;
 82 |         MPI_Ssend(working_data_set[1], 8, MPI_INT, destination_rank, send_up_tag, comm);
 83 |         MPI_Ssend(working_data_set[4], 8, MPI_INT, destination_rank, send_down_tag, comm);
 84 | 
 85 |         /* Receive the halo data */
 86 |         int source_rank = size-rank-1;
 87 |         MPI_Recv(working_data_set[5], 8, MPI_INT, source_rank, send_up_tag, comm, MPI_STATUS_IGNORE);
 88 |         MPI_Recv(working_data_set[0], 8, MPI_INT, source_rank, send_down_tag, comm, MPI_STATUS_IGNORE);
 89 |     }
 90 |     else
 91 |     {
 92 |         int send_up_tag = 0, send_down_tag = 1;
 93 |         /* Receive the halo data */
 94 |         int source_rank = size-rank-1;
 95 |         MPI_Recv(working_data_set[5], 8, MPI_INT, source_rank, send_up_tag, comm, MPI_STATUS_IGNORE);
 96 |         MPI_Recv(working_data_set[0], 8, MPI_INT, source_rank, send_down_tag, comm, MPI_STATUS_IGNORE);
 97 | 
 98 |         /* Send the border data */
 99 |         int destination_rank = size-rank-1;
100 |         MPI_Ssend(working_data_set[1], 8, MPI_INT, destination_rank, send_up_tag, comm);
101 |         MPI_Ssend(working_data_set[4], 8, MPI_INT, destination_rank, send_down_tag, comm);
102 |     }
103 | 
104 |     /* Do the non-local computation */
105 |     compute_row(1, working_data_set, next_working_data_set);
106 |     compute_row(4, working_data_set, next_working_data_set);
107 | 
108 |     printf("Next local data set on rank %d was:\n", rank);
109 |     for (int i = 1; i < 5; i = i + 1)
110 |     {
111 |         printf(" [ ");
112 |         for (int j = 0; j < 8; j = j + 1)
113 |         {
114 |             if (j != 0)
115 |             {
116 |                 printf(", ");
117 |             }
118 |             printf("%3d", next_working_data_set[i][j]);
119 |         }
120 |         printf(" ]\n");
121 |     }
122 | 
123 |     /* ==== CHALLENGE ====
124 |      *
125 |      * 
126 |      * implementation. Relying on it is unwise. The MPI implementation
127 |      * is free to deliver the behavior of MPI_Ssend when MPI_Send is
128 |      * called. Try MPI_Ssend and see the problem!
129 |      */
130 | 
131 |     /* Report whether the code is correct */
132 |     int success = 1;
133 |     for (int i = 1; i < 5; i = i + 1)
134 |     {
135 |         int expected_data;
136 |         if (i == 1 || i == 4)
137 |         {
138 |             expected_data = (rank == 0) ? 60 : 90;
139 |         }
140 |         else
141 |         {
142 |             expected_data = (rank == 0) ? 50 : 100;
143 |         }
144 |         for (int j = 0; j < 8; j = j + 1)
145 |         {
146 |             success = success && (next_working_data_set[i][j] == expected_data);
147 |         }
148 |     }
149 |     if (success)
150 |     {
151 |         printf("SUCCESS on rank %d!\n", rank);
152 |     }
153 |     else
154 |     {
155 |         printf("Improvement needed before rank %d can report success!\n", rank);
156 |     }
157 | 
158 |     /* Clean up and exit */
159 |     MPI_Finalize();
160 |     return 0;
161 | }
162 | 


--------------------------------------------------------------------------------
/content/diagrams/non-blocking-style stencil application.drawio:
--------------------------------------------------------------------------------
 1 | <mxfile host="app.diagrams.net" modified="2020-12-04T12:55:16.568Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36" etag="5558oD6_4PHaAziUGpPw" version="13.9.7" type="github">
 2 |   <diagram id="5Cmo91NUEk2H3ckEnoAz" name="Page-1">
 3 |     <mxGraphModel dx="800" dy="1080" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
 4 |       <root>
 5 |         <mxCell id="0" />
 6 |         <mxCell id="1" parent="0" />
 7 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-1" value="Local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1">
 8 |           <mxGeometry x="400" y="120" width="80" height="80" as="geometry" />
 9 |         </mxCell>
10 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-4" value="Non-local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffcccc;strokeColor=#36393d;" parent="1" vertex="1">
11 |           <mxGeometry x="640" y="120" width="80" height="80" as="geometry" />
12 |         </mxCell>
13 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-5" value="Process 0&lt;br&gt;start" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
14 |           <mxGeometry x="40" y="120" width="80" height="80" as="geometry" />
15 |         </mxCell>
16 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-6" value="End time&lt;br&gt;step" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
17 |           <mxGeometry x="880" y="120" width="80" height="80" as="geometry" />
18 |         </mxCell>
19 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-7" value="Local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1">
20 |           <mxGeometry x="400" y="360" width="80" height="80" as="geometry" />
21 |         </mxCell>
22 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-10" value="Non-local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffcccc;strokeColor=#36393d;" parent="1" vertex="1">
23 |           <mxGeometry x="640" y="360" width="80" height="80" as="geometry" />
24 |         </mxCell>
25 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-11" value="Process 1&lt;br&gt;start" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
26 |           <mxGeometry x="40" y="360" width="80" height="80" as="geometry" />
27 |         </mxCell>
28 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-12" value="End time&lt;br&gt;step" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
29 |           <mxGeometry x="880" y="360" width="80" height="80" as="geometry" />
30 |         </mxCell>
31 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-1" value="Non-blocking receive of halo data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;gradientColor=#97d077;" vertex="1" parent="1">
32 |           <mxGeometry x="160" y="360" width="80" height="80" as="geometry" />
33 |         </mxCell>
34 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-2" value="Non-blocking send of border data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;gradientColor=#7ea6e0;" vertex="1" parent="1">
35 |           <mxGeometry x="280" y="360" width="80" height="80" as="geometry" />
36 |         </mxCell>
37 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-3" value="Non-blocking send of border data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;gradientColor=#7ea6e0;" vertex="1" parent="1">
38 |           <mxGeometry x="280" y="120" width="80" height="80" as="geometry" />
39 |         </mxCell>
40 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-4" value="Non-blocking receive of halo data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;gradientColor=#97d077;" vertex="1" parent="1">
41 |           <mxGeometry x="160" y="120" width="80" height="80" as="geometry" />
42 |         </mxCell>
43 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-5" value="Wait for receive of halo data" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
44 |           <mxGeometry x="520" y="120" width="80" height="80" as="geometry" />
45 |         </mxCell>
46 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-7" value="Wait for send of border data" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
47 |           <mxGeometry x="760" y="120" width="80" height="80" as="geometry" />
48 |         </mxCell>
49 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-8" value="Wait for receive of halo data" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#d5e8d4;strokeColor=#82b366;" vertex="1" parent="1">
50 |           <mxGeometry x="520" y="360" width="80" height="80" as="geometry" />
51 |         </mxCell>
52 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-9" value="Wait for send of border data" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#dae8fc;strokeColor=#6c8ebf;" vertex="1" parent="1">
53 |           <mxGeometry x="760" y="360" width="80" height="80" as="geometry" />
54 |         </mxCell>
55 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-10" value="" style="endArrow=classic;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="Cb6wL8hvbZ8Z03pKtiBw-3" target="Cb6wL8hvbZ8Z03pKtiBw-8">
56 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
57 |             <mxPoint x="480" y="550" as="sourcePoint" />
58 |             <mxPoint x="530" y="500" as="targetPoint" />
59 |           </mxGeometry>
60 |         </mxCell>
61 |         <mxCell id="Cb6wL8hvbZ8Z03pKtiBw-11" value="" style="endArrow=classic;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="Cb6wL8hvbZ8Z03pKtiBw-2" target="Cb6wL8hvbZ8Z03pKtiBw-5">
62 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
63 |             <mxPoint x="480" y="550" as="sourcePoint" />
64 |             <mxPoint x="530" y="500" as="targetPoint" />
65 |           </mxGeometry>
66 |         </mxCell>
67 |       </root>
68 |     </mxGraphModel>
69 |   </diagram>
70 | </mxfile>
71 | 


--------------------------------------------------------------------------------
/content/diagrams/simple stencil workflow.drawio:
--------------------------------------------------------------------------------
 1 | <mxfile host="app.diagrams.net" modified="2020-12-04T12:53:39.878Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36" etag="AYEswsrWuZi9LFPAj3vC" version="13.9.7" type="github">
 2 |   <diagram id="5Cmo91NUEk2H3ckEnoAz" name="Page-1">
 3 |     <mxGraphModel dx="800" dy="1141" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
 4 |       <root>
 5 |         <mxCell id="0" />
 6 |         <mxCell id="1" parent="0" />
 7 |         <mxCell id="xFg7o4CnwHjb39-5lcCD-5" value="" style="endArrow=classic;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="xFg7o4CnwHjb39-5lcCD-3" target="lO88fd5yi4aPhk9MZSfZ-3">
 8 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
 9 |             <mxPoint x="370" y="600" as="sourcePoint" />
10 |             <mxPoint x="420" y="550" as="targetPoint" />
11 |           </mxGeometry>
12 |         </mxCell>
13 |         <mxCell id="xFg7o4CnwHjb39-5lcCD-6" value="" style="endArrow=classic;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="lO88fd5yi4aPhk9MZSfZ-2" target="xFg7o4CnwHjb39-5lcCD-4">
14 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
15 |             <mxPoint x="370" y="600" as="sourcePoint" />
16 |             <mxPoint x="420" y="550" as="targetPoint" />
17 |           </mxGeometry>
18 |         </mxCell>
19 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-1" value="Local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1">
20 |           <mxGeometry x="160" y="120" width="80" height="80" as="geometry" />
21 |         </mxCell>
22 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-2" value="Blocking send of border data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;gradientColor=#7ea6e0;" parent="1" vertex="1">
23 |           <mxGeometry x="320" y="120" width="80" height="80" as="geometry" />
24 |         </mxCell>
25 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-3" value="Blocking receive of halo data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;gradientColor=#97d077;" parent="1" vertex="1">
26 |           <mxGeometry x="520" y="120" width="80" height="80" as="geometry" />
27 |         </mxCell>
28 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-4" value="Non-local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffcccc;strokeColor=#36393d;" parent="1" vertex="1">
29 |           <mxGeometry x="680" y="120" width="80" height="80" as="geometry" />
30 |         </mxCell>
31 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-5" value="Process 0&lt;br&gt;start" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
32 |           <mxGeometry x="40" y="120" width="80" height="80" as="geometry" />
33 |         </mxCell>
34 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-6" value="End time&lt;br&gt;step" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
35 |           <mxGeometry x="800" y="120" width="80" height="80" as="geometry" />
36 |         </mxCell>
37 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-7" value="Local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1">
38 |           <mxGeometry x="160" y="360" width="80" height="80" as="geometry" />
39 |         </mxCell>
40 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-10" value="Non-local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffcccc;strokeColor=#36393d;" parent="1" vertex="1">
41 |           <mxGeometry x="680" y="360" width="80" height="80" as="geometry" />
42 |         </mxCell>
43 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-11" value="Process 1 start" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
44 |           <mxGeometry x="40" y="360" width="80" height="80" as="geometry" />
45 |         </mxCell>
46 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-12" value="End time&lt;br&gt;step" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
47 |           <mxGeometry x="800" y="360" width="80" height="80" as="geometry" />
48 |         </mxCell>
49 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-13" value="" style="verticalLabelPosition=bottom;verticalAlign=top;html=1;shape=mxgraph.basic.8_point_star;fillColor=#e51400;strokeColor=#B20000;fontColor=#ffffff;" parent="1" vertex="1">
50 |           <mxGeometry x="270" y="240" width="180" height="100" as="geometry" />
51 |         </mxCell>
52 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-15" value="Deadlock!" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#FFFF33;fontStyle=1" parent="1" vertex="1">
53 |           <mxGeometry x="340" y="280" width="40" height="20" as="geometry" />
54 |         </mxCell>
55 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-19" value="" style="verticalLabelPosition=bottom;verticalAlign=top;html=1;shape=mxgraph.basic.8_point_star;fillColor=#e51400;strokeColor=#B20000;fontColor=#ffffff;" parent="1" vertex="1">
56 |           <mxGeometry x="470" y="240" width="180" height="100" as="geometry" />
57 |         </mxCell>
58 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-20" value="Deadlock!" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#FFFF33;fontStyle=1" parent="1" vertex="1">
59 |           <mxGeometry x="540" y="280" width="40" height="20" as="geometry" />
60 |         </mxCell>
61 |         <mxCell id="xFg7o4CnwHjb39-5lcCD-3" value="Blocking send of border data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;gradientColor=#7ea6e0;" vertex="1" parent="1">
62 |           <mxGeometry x="320" y="360" width="80" height="80" as="geometry" />
63 |         </mxCell>
64 |         <mxCell id="xFg7o4CnwHjb39-5lcCD-4" value="Blocking receive of halo data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;gradientColor=#97d077;" vertex="1" parent="1">
65 |           <mxGeometry x="520" y="360" width="80" height="80" as="geometry" />
66 |         </mxCell>
67 |       </root>
68 |     </mxGraphModel>
69 |   </diagram>
70 | </mxfile>
71 | 


--------------------------------------------------------------------------------
/content/diagrams/ring-style stencil workflow.drawio:
--------------------------------------------------------------------------------
 1 | <mxfile host="app.diagrams.net" modified="2020-12-04T12:53:24.058Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36" etag="DaN5F9uBc_k4qEX_dDyK" version="13.9.7" type="github">
 2 |   <diagram id="5Cmo91NUEk2H3ckEnoAz" name="Page-1">
 3 |     <mxGraphModel dx="800" dy="1141" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
 4 |       <root>
 5 |         <mxCell id="0" />
 6 |         <mxCell id="1" parent="0" />
 7 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-8" value="" style="endArrow=classic;html=1;fontColor=#FFFF33;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="3HjwUv_7Wy4pxJoXdtvw-7" target="3HjwUv_7Wy4pxJoXdtvw-5">
 8 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
 9 |             <mxPoint x="270" y="610" as="sourcePoint" />
10 |             <mxPoint x="320" y="560" as="targetPoint" />
11 |           </mxGeometry>
12 |         </mxCell>
13 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-9" value="" style="endArrow=classic;html=1;fontColor=#FFFF33;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="3HjwUv_7Wy4pxJoXdtvw-6" target="3HjwUv_7Wy4pxJoXdtvw-4">
14 |           <mxGeometry width="50" height="50" relative="1" as="geometry">
15 |             <mxPoint x="270" y="610" as="sourcePoint" />
16 |             <mxPoint x="320" y="560" as="targetPoint" />
17 |           </mxGeometry>
18 |         </mxCell>
19 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-1" value="Local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1">
20 |           <mxGeometry x="160" y="120" width="80" height="80" as="geometry" />
21 |         </mxCell>
22 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-4" value="Non-local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffcccc;strokeColor=#36393d;" parent="1" vertex="1">
23 |           <mxGeometry x="680" y="120" width="80" height="80" as="geometry" />
24 |         </mxCell>
25 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-5" value="Process 0&lt;br&gt;start" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
26 |           <mxGeometry x="40" y="120" width="80" height="80" as="geometry" />
27 |         </mxCell>
28 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-6" value="End time&lt;br&gt;step" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
29 |           <mxGeometry x="800" y="120" width="80" height="80" as="geometry" />
30 |         </mxCell>
31 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-7" value="Local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1">
32 |           <mxGeometry x="160" y="360" width="80" height="80" as="geometry" />
33 |         </mxCell>
34 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-10" value="Non-local computation" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#ffcccc;strokeColor=#36393d;" parent="1" vertex="1">
35 |           <mxGeometry x="680" y="360" width="80" height="80" as="geometry" />
36 |         </mxCell>
37 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-11" value="Process 1&lt;br&gt;start" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
38 |           <mxGeometry x="40" y="360" width="80" height="80" as="geometry" />
39 |         </mxCell>
40 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-12" value="End time&lt;br&gt;step" style="ellipse;whiteSpace=wrap;html=1;aspect=fixed;fillColor=#ffff88;strokeColor=#36393d;" parent="1" vertex="1">
41 |           <mxGeometry x="800" y="360" width="80" height="80" as="geometry" />
42 |         </mxCell>
43 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-13" value="" style="verticalLabelPosition=bottom;verticalAlign=top;html=1;shape=mxgraph.basic.8_point_star;fillColor=#fa6800;strokeColor=#C73500;fontColor=#ffffff;" parent="1" vertex="1">
44 |           <mxGeometry x="270" y="240" width="180" height="100" as="geometry" />
45 |         </mxCell>
46 |         <mxCell id="lO88fd5yi4aPhk9MZSfZ-15" value="Synchronization!" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#FFFF33;fontStyle=1" parent="1" vertex="1">
47 |           <mxGeometry x="340" y="280" width="40" height="20" as="geometry" />
48 |         </mxCell>
49 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-2" value="" style="verticalLabelPosition=bottom;verticalAlign=top;html=1;shape=mxgraph.basic.8_point_star;fillColor=#fa6800;strokeColor=#C73500;fontColor=#ffffff;" vertex="1" parent="1">
50 |           <mxGeometry x="470" y="240" width="180" height="100" as="geometry" />
51 |         </mxCell>
52 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-3" value="Synchronization!" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontColor=#FFFF33;fontStyle=1" vertex="1" parent="1">
53 |           <mxGeometry x="540" y="280" width="40" height="20" as="geometry" />
54 |         </mxCell>
55 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-4" value="Blocking receive of halo data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;gradientColor=#97d077;" vertex="1" parent="1">
56 |           <mxGeometry x="520" y="120" width="80" height="80" as="geometry" />
57 |         </mxCell>
58 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-5" value="Blocking receive of halo data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;gradientColor=#97d077;" vertex="1" parent="1">
59 |           <mxGeometry x="320" y="360" width="80" height="80" as="geometry" />
60 |         </mxCell>
61 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-6" value="Blocking send of border data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;gradientColor=#7ea6e0;" vertex="1" parent="1">
62 |           <mxGeometry x="520" y="360" width="80" height="80" as="geometry" />
63 |         </mxCell>
64 |         <mxCell id="3HjwUv_7Wy4pxJoXdtvw-7" value="Blocking send of border data" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;gradientColor=#7ea6e0;" vertex="1" parent="1">
65 |           <mxGeometry x="320" y="120" width="80" height="80" as="geometry" />
66 |         </mxCell>
67 |       </root>
68 |     </mxGraphModel>
69 |   </diagram>
70 | </mxfile>
71 | 


--------------------------------------------------------------------------------
/content/custom.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # add the MPI function to reference in the glossary here. This skullduggery is
  4 | # necessary to get consistent monospace formatting of the function. Each list
  5 | # entry should be a list of a string of the name of the MPI function and an
  6 | # integer of the node number of the MPI standard document webpage, e.g.
  7 | # 47 for https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node47.htm#Node47.
  8 | # Use a negative node number if unknown or inapplicable.
  9 | #
 10 | # Communicators and groups
 11 | lesson_comms = [
 12 |     ["MPI_Comm_split", 156],
 13 |     ["MPI_Comm_create", 156],
 14 |     ["MPI_Comm_free", 157],
 15 |     ["MPI_Comm_rank", 155],
 16 |     ["MPI_Comm_size", 155],
 17 |     ["MPI_Comm_group", 152],
 18 |     ["MPI_Group_free", 153],
 19 |     ["MPI_Group_excl", 153],
 20 |     ["MPI_Group_incl", 153],
 21 | ]
 22 | # derived datatypes
 23 | lesson_dd = [
 24 |     ["MPI_Type_get_extent", 84],
 25 |     ["MPI_Type_size", 82],
 26 |     ["MPI_Pack", 92],
 27 |     ["MPI_Pack_size", 92],
 28 |     ["MPI_Unpack", 92],
 29 |     ["MPI_Type_contiguous", 79],
 30 |     ["MPI_Type_vector", 79],
 31 |     ["MPI_Type_indexed", 79],
 32 |     ["MPI_Type_create_hvector", 79],
 33 |     ["MPI_Type_create_hindexed", 79],
 34 |     ["MPI_Type_create_struct", 79],
 35 |     ["MPI_Type_commit", 86],
 36 |     ["MPI_Type_free", 86],
 37 |     ["MPI_Op_create", 115],
 38 |     ["MPI_Op_free", 115],
 39 | ]
 40 | # remote memory access
 41 | lesson_rma = [
 42 |     ["MPI_Get", 270],
 43 |     ["MPI_Put", 269],
 44 |     ["MPI_Accumulate", 273],
 45 |     ["MPI_Win_create", 261],
 46 |     ["MPI_Win_allocate", 262],
 47 |     ["MPI_Win_allocate_shared", 263],
 48 |     ["MPI_Win_create_dynamic", 264],
 49 |     ["MPI_Win_fence", 280],
 50 |     ["MPI_Win_post", 281],
 51 |     ["MPI_Win_start", 281],
 52 |     ["MPI_Win_complete", 281],
 53 |     ["MPI_Win_wait", 281],
 54 |     ["MPI_Win_lock", 282],
 55 |     ["MPI_Win_unlock", 282],
 56 |     ["MPI_Win_free", 265],
 57 |     ["MPI_Win_test", 281],
 58 |     ["MPI_Win_attach", 264],
 59 | ]
 60 | # collectives
 61 | lesson_coll = [
 62 |     ["MPI_Barrier", 100],
 63 |     ["MPI_Bcast", 101],
 64 |     ["MPI_Reduce", 111],
 65 |     ["MPI_Scatter", 105],
 66 |     ["MPI_Gather", 103],
 67 |     ["MPI_Allgather", 107],
 68 |     ["MPI_Alltoall", 109],
 69 |     ["MPI_Allgatherv", 107],
 70 |     ["MPI_Ibarrier", 127],
 71 |     ["MPI_Ireduce", 134],
 72 | ]
 73 | # non-blocking
 74 | lesson_non_blocking = [
 75 |     ["MPI_Send", 47],
 76 |     ["MPI_Ssend", 57],
 77 |     ["MPI_Isend", 63],
 78 |     ["MPI_Irecv", 63],
 79 |     ["MPI_Wait", 64],
 80 |     ["MPI_Waitany", 66],
 81 |     ["MPI_Waitsome", 66],
 82 |     ["MPI_Waitall", 66],
 83 |     ["MPI_Test", 64],
 84 |     ["MPI_Testany", 66],
 85 |     ["MPI_Testsome", 66],
 86 |     ["MPI_Testall", 66],
 87 |     ["MPI_Ibsend", 63],
 88 |     ["MPI_Irsend", 63],
 89 |     ["MPI_Issend", 63],
 90 | ]
 91 | # threads
 92 | lesson_threads = [
 93 |     ["MPI_Init_thread", 303],
 94 |     ["MPI_Query_thread", 303],
 95 |     ["MPI_Is_thread_main", 303],
 96 |     #    ["MPI_", 303],
 97 | ]
 98 | # functions mentioned in passing in the lesson
 99 | other = [
100 |     ["MPI_Recv", 50],
101 |     ["MPI_Alloc_mem", 216],
102 |     ["MPI_Free_mem", 216],
103 |     ["MPI_Bsend", 57],
104 |     ["MPI_Rsend", 57],
105 | ]
106 | 
107 | MPI_functions = (
108 |     lesson_comms
109 |     + lesson_dd
110 |     + lesson_rma
111 |     + lesson_coll
112 |     + lesson_threads
113 |     + lesson_non_blocking
114 |     + other
115 | )
116 | 
117 | abbr_and_term = """
118 | .. |{function}| replace:: ``{function}``
119 | .. |term-{function}| raw:: html
120 | 
121 |    <a class="reference internal" href="../quick-reference/index.html#term-{function}"><span class="xref std std-term"><code class="docutils literal notranslate">{function}</code></span></a>
122 | """
123 | 
124 | header = """
125 | .. |{function}-implementors_docs| raw:: html
126 | """
127 | 
128 | impls = """
129 |    <p>Documentation from implementors:</p>
130 |    <div>
131 |    <ul class="simple">
132 |    <li><p><a class="reference external" href="https://www.mpich.org/static/docs/latest/www3/{function}.html">MPICH</a></p></li>
133 |    <li><p><a class="reference external" href="https://www.open-mpi.org/doc/current/man3/{function}.3.php">OpenMPI</a></p></li>
134 |    </ul>
135 |    </div>"""
136 | 
137 | standard = """
138 |    <p>Documentation in the standard:</p>
139 |    <div>
140 |    <ul class="simple">
141 |    <li><p><a class="reference external" href="https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node{id}.htm#Node{id}.html">MPI standard 3.1</a></p></li>
142 |    </ul>
143 |    </div>"""
144 | 
145 | impls_2 = """
146 | .. |{function}-implementors_docs| raw:: html
147 | 
148 |    <p>Documentation from implementors:</p>
149 |    <div>
150 |    <ul class="simple">
151 |    <li><p><a class="reference external" href="https://www.mpich.org/static/docs/latest/www3/{function}.html">MPICH</a></p></li>
152 |    <li><p><a class="reference external" href="https://www.open-mpi.org/doc/current/man3/{function}.3.php">OpenMPI</a></p></li>
153 |    </ul>
154 |    </div>
155 |    <p>Documentation in the standard:</p>
156 |    <div>
157 |    <ul class="simple">
158 |    <li><p><a class="reference external" href="https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node{id}.htm#Node{id}.html">MPI standard 3.1</a></p></li>
159 |    </ul>
160 |    </div>
161 | """
162 | 
163 | 
164 | def MPI_glossary():
165 |     # abbreviations and terms for the glossary
166 |     glossary_helper = "\n".join(
167 |         [abbr_and_term.format(function=function) for function, _ in MPI_functions]
168 |     )
169 | 
170 |     # documentation string from implementors
171 |     implementors_docs = ""
172 |     for function, id in MPI_functions:
173 |         implementors_docs += header.format(function=function)
174 |         format_string = impls
175 |         if id >= 0:
176 |             format_string += standard
177 |         implementors_docs += format_string.format(function=function, id=id)
178 | 
179 |     # include all customisation in the rst_epilog, so it's available everywhere
180 |     return glossary_helper + implementors_docs
181 | 
182 | 
183 | from sphinx_lesson.directives import _BaseCRDirective
184 | 
185 | 
186 | class SignatureDirective(_BaseCRDirective):
187 |     extra_classes = ["toggle-shown", "dropdown"]
188 | 
189 | 
190 | class ParametersDirective(_BaseCRDirective):
191 |     extra_classes = ["dropdown"]
192 | 
193 | 
194 | class TypealongDirective(_BaseCRDirective):
195 |     extra_classes = ["toggle-shown", "dropdown"]
196 | 
197 | 
198 | DIRECTIVES = [SignatureDirective, ParametersDirective, TypealongDirective]
199 | 


--------------------------------------------------------------------------------