├── .gitignore
├── LICENSE-MIT
├── LICENSE.md
├── README.md
├── benchmarks
    ├── neighbor-alltoall.cpp
    ├── neighbor-alltoall2.cpp
    ├── neighbor-gather.cpp
    └── neighbor-gather2.cpp
├── demos
    ├── cartesian-topology.cpp
    ├── demos.md
    ├── example.hdf5
    ├── mpi-io-fileview.c
    ├── mpi-io-no-fileview.c
    ├── mpi-io-write.f90
    ├── mpi_write_at.F90
    ├── neighbor.cpp
    ├── neighbor_alltoallw.F90
    └── neighbor_alltoallw.cpp
├── docs
    ├── 01-custom-communicators.md
    ├── 02-persistent-neighbour.md
    ├── 03-datatypes-intro.md
    ├── 04-datatypes-advanced.md
    ├── 05-parallel-io-posix.md
    ├── 06-mpi-io.md
    ├── 07-parallel-io-hdf5.md
    ├── img
    │   ├── cartesian-neighborhood.svg
    │   ├── cluster_diagram.jpeg
    │   ├── colorcomm1.svg
    │   ├── colorcomm2.svg
    │   ├── communicator.svg
    │   ├── communicators.png
    │   ├── communicators.svg
    │   ├── contiguous.svg
    │   ├── cray_top500.png
    │   ├── fortran-array-layout.png
    │   ├── fortran-array-layout.svg
    │   ├── hdf5-hyperslab.png
    │   ├── hdf5-hyperslab.svg
    │   ├── io-illustration.png
    │   ├── io-illustration.svg
    │   ├── io-layers.png
    │   ├── io-layers.svg
    │   ├── io-subarray.png
    │   ├── io-subarray.svg
    │   ├── lustre-architecture.png
    │   ├── lustre-architecture.svg
    │   ├── lustre-striping.png
    │   ├── lustre-striping.svg
    │   ├── neighbor-alltoall.svg
    │   ├── neighbor-gather.svg
    │   ├── nonblocking-io.png
    │   ├── nonblocking-io.svg
    │   ├── osu-benchmark.png
    │   ├── posix-everybody.png
    │   ├── posix-everybody.svg
    │   ├── posix-spokesman.png
    │   ├── posix-spokesman.svg
    │   ├── processes-threads.png
    │   ├── processes-threads.svg
    │   ├── striping-performance.png
    │   ├── svg2png.sh
    │   ├── triangle.svg
    │   ├── two-d-cartesian-grid.png
    │   ├── two-d-cartesian-grid.svg
    │   ├── type-struct.png
    │   ├── type-struct.svg
    │   ├── type_array.svg
    │   ├── type_indexed.svg
    │   ├── type_vector.svg
    │   ├── typemap.png
    │   ├── typemap.svg
    │   ├── vector-extent.png
    │   ├── vector-extent.svg
    │   ├── vector-resize.png
    │   └── vector-resize.svg
    ├── mpi-reference.md
    └── quizzes
    │   └── mpi-recap.md
├── exercise-instructions.md
├── job_mpi.sh
├── mpi
    ├── cartesian-grid
    │   ├── README.md
    │   ├── skeleton.c
    │   ├── skeleton.f90
    │   └── solution
    │   │   ├── cartesian-grid.c
    │   │   └── cartesian-grid.f90
    ├── communicator
    │   ├── README.md
    │   ├── collective.F90
    │   ├── collective.c
    │   ├── collective.cpp
    │   ├── img
    │   │   ├── comm-split-reduce.svg
    │   │   └── sendbuffer.png
    │   └── solution
    │   │   ├── reduce.F90
    │   │   ├── reduce.c
    │   │   └── reduce.cpp
    ├── datatype-extent
    │   ├── README.md
    │   ├── skeleton.F90
    │   ├── skeleton.c
    │   └── solution
    │   │   ├── scatter.F90
    │   │   ├── scatter.c
    │   │   ├── send-receive.F90
    │   │   └── send-receive.c
    ├── heat-equation
    │   ├── README_2d.md
    │   ├── README_cartesian.md
    │   ├── README_neighbor.md
    │   ├── common
    │   │   ├── bottle.dat
    │   │   ├── pngwriter.c
    │   │   └── pngwriter.h
    │   ├── cpp
    │   │   ├── Makefile
    │   │   ├── bottle.dat
    │   │   ├── core.cpp
    │   │   ├── heat.cpp
    │   │   ├── heat.hpp
    │   │   ├── io.cpp
    │   │   ├── main.cpp
    │   │   ├── matrix.hpp
    │   │   ├── setup.cpp
    │   │   ├── solution
    │   │   │   ├── Makefile
    │   │   │   ├── bottle.dat
    │   │   │   ├── core.cpp
    │   │   │   ├── heat.cpp
    │   │   │   ├── heat.hpp
    │   │   │   ├── io.cpp
    │   │   │   ├── main.cpp
    │   │   │   ├── matrix.hpp
    │   │   │   ├── setup.cpp
    │   │   │   └── utilities.cpp
    │   │   └── utilities.cpp
    │   └── fortran
    │   │   ├── Makefile
    │   │   ├── core.F90
    │   │   ├── heat_mod.F90
    │   │   ├── io.F90
    │   │   ├── main.F90
    │   │   ├── pngwriter_mod.F90
    │   │   ├── setup.F90
    │   │   ├── solution
    │   │       ├── Makefile
    │   │       ├── core.F90
    │   │       ├── heat_mod.F90
    │   │       ├── io.F90
    │   │       ├── main.F90
    │   │       ├── pngwriter_mod.F90
    │   │       ├── setup.F90
    │   │       └── utilities.F90
    │   │   └── utilities.F90
    ├── message-chain-cartesian
    │   ├── README.md
    │   ├── skeleton.F90
    │   ├── skeleton.cpp
    │   └── solution
    │   │   ├── chain-periodic.F90
    │   │   ├── chain-periodic.cpp
    │   │   ├── chain.F90
    │   │   └── chain.cpp
    ├── message-chain-persistent
    │   ├── README.md
    │   ├── skeleton.F90
    │   ├── skeleton.cpp
    │   └── solution
    │   │   ├── chain.F90
    │   │   └── chain.cpp
    ├── neighbor-exchange
    │   ├── README.md
    │   ├── skeleton.F90
    │   ├── skeleton.cpp
    │   └── solution
    │   │   ├── neighbor-exchange.F90
    │   │   └── neighbor-exchange.cpp
    ├── simple-datatypes
    │   ├── README.md
    │   ├── img
    │   │   ├── indexed.png
    │   │   ├── subarray.png
    │   │   ├── subarray.svg
    │   │   └── vector.png
    │   ├── skeleton.F90
    │   ├── skeleton.c
    │   └── solution
    │   │   ├── custom_type_a.F90
    │   │   ├── custom_type_a.c
    │   │   ├── custom_type_b.F90
    │   │   ├── custom_type_b.c
    │   │   ├── custom_type_c.F90
    │   │   └── custom_type_c.c
    └── struct-datatype
    │   ├── README.md
    │   ├── solution
    │       ├── struct_with_byte.F90
    │       ├── struct_with_byte.c
    │       ├── struct_with_type.F90
    │       └── struct_with_type.c
    │   ├── struct_type.F90
    │   └── struct_type.c
└── parallel-io
    ├── hdf5
        ├── README.md
        ├── hdf5.c
        └── hdf5.f90
    ├── heat-restart
        ├── README.md
        ├── c
        │   ├── Makefile
        │   ├── core.c
        │   ├── heat.h
        │   ├── io.c
        │   ├── main.c
        │   ├── setup.c
        │   ├── solution
        │   │   ├── Makefile
        │   │   ├── core.c
        │   │   ├── heat.h
        │   │   ├── io.c
        │   │   ├── main.c
        │   │   ├── setup.c
        │   │   └── utilities.c
        │   └── utilities.c
        └── fortran
        │   ├── Makefile
        │   ├── core.F90
        │   ├── heat_mod.F90
        │   ├── io.F90
        │   ├── main.F90
        │   ├── pngwriter_mod.F90
        │   ├── setup.F90
        │   ├── solution
        │       ├── Makefile
        │       ├── core.F90
        │       ├── heat_mod.F90
        │       ├── io.F90
        │       ├── main.F90
        │       ├── pngwriter_mod.F90
        │       ├── setup.F90
        │       └── utilities.F90
        │   └── utilities.F90
    ├── mpi-io
        ├── README.md
        ├── mpi-io.c
        ├── mpi-io.f90
        └── solution
        │   ├── mpi-io.c
        │   └── mpi-io.f90
    └── posix
        ├── README.md
        ├── c
            ├── solution
            │   ├── separate-files.c
            │   ├── spokesman.c
            │   └── spokesman_reader.c
            ├── spokesman.c
            └── spokesman_reader.c
        └── fortran
            ├── solution
                ├── separate-files.f90
                ├── spokesman.f90
                └── spokesman_reader.f90
            ├── spokesman.f90
            └── spokesman_reader.f90


/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.pptx
3 | *.pdf
4 | *.swp
5 | *.o
6 | *.mod
7 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CSC Training
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | All text and image material licensed under Creative Commons BY-SA 4.0 unless
2 | otherwise noted http://creativecommons.org/licenses/by-sa/4.0/
3 | 
4 | All code samples licensed under MIT license (see LICENSE-MIT for details) unless
5 | otherwise noted
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Advanced MPI and parallel I/O
 2 | 
 3 | Exercise material and model answers for the CSC course "Advanced MPI and parallel I/O". 
 4 | The course is part of PRACE Training Center (PTC) activity at CSC.
 5 | 
 6 | ## Exercises
 7 | 
 8 | [General instructions](exercise-instructions.md)
 9 | 
10 | 
11 | ### Using own communicators
12 | 
13 |  - [Communicators and collectives](mpi/communicator)
14 |  - [Cartesian grid process topology](mpi/cartesian-grid)
15 |  - [Message chain with Cartesian communicator](mpi/message-chain-cartesian)
16 |  - [(Bonus) Heat equation with cartesian communicator](mpi/heat-equation/README_cartesian.md)
17 | 
18 | ### Persistent and neighborhood communication
19 | 
20 |  - [Message chain with persistent communication](mpi/message-chain-persistent)
21 |  - [Neighborhood exchange](mpi/neighbor-exchange)
22 |  - [(Bonus) Heat equation with neighborhood collectives](mpi/heat-equation/README_neighbor.md)
23 | 
24 | ### Simple derived types
25 | 
26 |  - [Simple datatypes](mpi/simple-datatypes)
27 |  - [(Bonus) 2D-decomposed heat equation](mpi/heat-equation/README_2d.md)
28 | 
29 | ### Advanced derived types
30 |  - [Communicating struct / derived type](mpi/struct-datatype)
31 |  - [Modifying extent](mpi/datatype-extent)
32 | 
33 | 
34 | ### Parallel I/O
35 | 
36 |  - [Parallel I/O with Posix](parallel-io/posix)
37 |  - [Simple MPI-IO](parallel-io/mpi-io)
38 |  - [HDF5 example](parallel-io/hdf5)
39 |  - [Bonus: Checkpoint + restart with MPI-IO](parallel-io/heat-restart)
40 | 


--------------------------------------------------------------------------------
/demos/cartesian-topology.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int world_rank, rank, ntasks;
 7 | 
 8 |     MPI_Init(&argc, &argv);
 9 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
10 |     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
11 | 
12 |     int ndims = 2;
13 |     int dims[2] = {0, 0};
14 | 
15 |     int rc = MPI_Dims_create(ntasks, ndims, dims);
16 |     if (0 == world_rank)
17 |         printf("Decomposing %d ntasks into %d x %d grid\n", ntasks, dims[0], dims[1]);
18 | 
19 |     int periods[2] = {0, 1};
20 |     MPI_Comm cart_comm;
21 |     MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm); 
22 | 
23 |     int coords[2];
24 |     int nup, ndown, nleft, nright;
25 |     MPI_Comm_rank(cart_comm, &rank);
26 |     MPI_Cart_coords(cart_comm, rank, 2, coords);
27 |     MPI_Cart_shift(cart_comm, 0, 1, &nup, &ndown);
28 |     MPI_Cart_shift(cart_comm, 1, 1, &nleft, &nright);
29 |     printf("Coords of %d (%d): %d, %d, neighbours %d %d %d %d\n", 
30 |            rank, world_rank, coords[0], coords[1], nup, ndown, nleft, nright);
31 | 
32 |     MPI_Finalize();
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/demos/demos.md:
--------------------------------------------------------------------------------
 1 | # Parallel I/O
 2 | ### Striping example:
 3 | 
 4 | ```
 5 | touch first_file
 6 | lfs getstripe first_file
 7 | 
 8 | mkdir stripe_exp
 9 | lfs setstripe -c 4 experiments
10 | touch experiments/new_file
11 | lfs getstripe experiments/new_file
12 | ```
13 | ### MPI-IO simple writing
14 | ```
15 |  mpif90 mpi_write_at.F90
16 | 
17 |  srun  --job-name=example --account=project_2002078  --partition=test --time=00:15:00 --ntasks=2 ./a.out
18 | 
19 |  less test
20 |  hexdump test
21 |  hexdump -e '16 "%02d ""\n"' test
22 | ```
23 | 
24 | ### Hdf5 Command Line Tools Example
25 | 
26 | ```
27 | h5ls py_example.hdf5
28 | 
29 | h5ls py_example.hdf5/DataTwo
30 | 
31 | h5dump -d /DataTwo/Matrix_two py_example.hdf5
32 | 
33 | h5dump -n 1 py_example.hdf5
34 | 
35 | h5dump -a "Matrix_one/maxvalue" py_example.hdf5
36 | h5dump -a "Matrix_one/minvalue" py_example.hdf5
37 | h5dump -a "Matrix_one/average" py_example.hdf5
38 | h5dump -a "Matrix_one/var" py_example.hdf5
39 | 
40 | h5mkgrp py_example.hdf5 "/DataTwo/test"
41 | 
42 | h5ls py_example.hdf5/DataTwo
43 | ```
44 | 
45 | 
46 | ### Python hdf5 example
47 | ``` 
48 | module load hdf5
49 | module load biokit
50 | module load biopythontools
51 | 
52 | python # start python
53 | ``` 
54 | ```
55 | import h5py
56 | import numpy as np
57 | 
58 | m1_shape=(10,20,4)
59 | m2_shape=(6,80,12)
60 | 
61 | M1=np.random.random(m1_shape)
62 | M2=np.random.random(m2_shape)
63 | 
64 | hdf5_name_file ='python_demo.hdf5' # the hdf5_name_file
65 | 
66 | with h5py.File(hdf5_name_file, 'w') as hdf5_file: # open an hdf5 file in write mode
67 | ...      dset_name='Matrix_one'
68 | ...      dset=hdf5_file.create_dataset(dset_name, data=M2, compression='gzip')
69 | ...      dset.attrs['maxvalue']=np.max(np.max(M1))
70 | ...      dset.attrs['minvalue']=np.min(np.min(M1))
71 | ...      dset.attrs['average']=np.mean(np.mean(M1))
72 | ...      dset.attrs['var']='M1'
73 | 
74 | with h5py.File(hdf5_name_file, 'a') as hdf5_file: # open an hdf5 file in write mode
75 | ...      gzero=hdf5_file.create_group('DataTwo')
76 | ...      nset_name='Matrix_two'
77 | ...      nsdet=gzero.create_dataset(nset_name,data=M2,compression='gzip')
78 | ...      nsdet.attrs['maxvalue']=np.max(np.max(M2))
79 | ...      nsdet.attrs['minvalue']=np.min(np.min(M2))
80 | ...      nsdet.attrs['average']=np.mean(np.mean(M2))
81 | ...      nsdet.attrs['var']='M2'
82 | ``` 
83 | 


--------------------------------------------------------------------------------
/demos/example.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/demos/example.hdf5


--------------------------------------------------------------------------------
/demos/mpi-io-write.f90:
--------------------------------------------------------------------------------
 1 | program mpiio
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 |   integer :: fh, dsize
10 |   integer(kind=MPI_OFFSET_KIND) :: offset
11 | 
12 |   call mpi_init(rc)
13 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
14 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
15 | 
16 |   localsize = datasize / ntasks
17 |   allocate(localvector(localsize))
18 | 
19 |   do i = 1, localsize
20 |       localvector(i) = i + my_id * localsize
21 |   end do
22 | 
23 |   call mpi_type_size(MPI_INTEGER, dsize, rc)
24 |   offset = my_id * localsize * dsize
25 | 
26 |   call mpi_file_open(MPI_COMM_WORLD, 'output.dat', &
27 |                      MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, fh, rc)
28 |   call mpi_file_write_at_all(fh, offset, localvector, localsize, &
29 |                              MPI_INTEGER, MPI_STATUS_IGNORE, rc)
30 |   call mpi_file_close(fh, rc)
31 | 
32 |   deallocate(localvector)
33 |   call mpi_finalize(rc)
34 | 
35 | end program mpiio
36 | 


--------------------------------------------------------------------------------
/demos/mpi_write_at.F90:
--------------------------------------------------------------------------------
 1 | program output
 2 | use mpi
 3 | implicit none
 4 | 
 5 | integer :: err, i, myid, file, intsize
 6 | integer :: status(mpi_status_size)
 7 | integer, parameter :: count=100
 8 | integer, dimension(count) :: buf
 9 | integer(kind=mpi_offset_kind) :: disp
10 | 
11 | call mpi_init(err)
12 | call mpi_comm_rank(mpi_comm_world, myid, err)
13 | 
14 | do i = 1, count
15 |     buf(i) = myid * count + i
16 | end do
17 | 
18 | call mpi_file_open(mpi_comm_world, 'test', &
19 |                    mpi_mode_create + mpi_mode_wronly, &
20 |                    mpi_info_null, file, err)
21 | 
22 | intsize = sizeof(count)
23 | disp = myid * count * intsize
24 | 
25 | call mpi_file_write_at(file, disp, buf, count, mpi_integer, status, err)
26 | 
27 | call mpi_file_close(file, err)
28 | call mpi_finalize(err)
29 | 
30 | end program output
31 | 


--------------------------------------------------------------------------------
/demos/neighbor.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <iostream>
 4 | #include <mpi.h>
 5 | 
 6 | int main(int argc, char **argv)
 7 | {
 8 |     int rank, ntasks;
 9 |     
10 |     MPI_Init(&argc, &argv);
11 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
12 |     if (ntasks != 9) {
13 |        std::cout << "This demo works only with 9 MPI tasks" << std::endl;
14 |        MPI_Abort(MPI_COMM_WORLD, -1);
15 |     }
16 | 
17 |     MPI_Comm cart_comm;
18 |     int ndims = 2;
19 |     int dims[2] = {3, 3};
20 |     int periods[2] = {0, 1};
21 |     MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 0, &cart_comm); 
22 |     MPI_Comm_rank(cart_comm, &rank);
23 | 
24 |     // Print out process grid
25 |     if (0 == rank) {
26 |         std::cout << "Process grid" << std::endl;
27 |         for (int i=0; i < dims[0]; i++) {
28 |            for (int j=0; j < dims[1]; j++) {
29 |               int r;
30 |               int coords[2] = {i, j};
31 |               MPI_Cart_rank(cart_comm, coords, &r);
32 |               std::cout << r << "  ";
33 |            }
34 |            std::cout << std::endl;
35 |         }
36 |         std::cout << std::flush;
37 |     }
38 | 
39 |     MPI_Barrier(cart_comm);
40 |     
41 |     int neighbors = 4;
42 |     std::vector<int> sendbuf(2, rank);
43 |     std::vector<int> recvbuf(2*neighbors, -1);
44 | 
45 |     MPI_Neighbor_allgather(sendbuf.data(), 2, MPI_INT,
46 |                            recvbuf.data(), 2, MPI_INT,
47 |                            cart_comm);
48 | 
49 |    
50 |     // Print out the result
51 |     for (int r=0; r < ntasks; r++) {
52 |         MPI_Barrier(cart_comm);
53 |         if (rank == r) {
54 |            std::cout << "task " << rank << ": ";
55 |            for (auto i: recvbuf) 
56 |              std::cout << i << " ";
57 |            std::cout << std::endl << std::flush;
58 |         }
59 |         MPI_Barrier(cart_comm);
60 |     }
61 | 
62 |     MPI_Finalize();
63 | }
64 | 


--------------------------------------------------------------------------------
/docs/img/cluster_diagram.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/cluster_diagram.jpeg


--------------------------------------------------------------------------------
/docs/img/communicators.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/communicators.png


--------------------------------------------------------------------------------
/docs/img/cray_top500.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/cray_top500.png


--------------------------------------------------------------------------------
/docs/img/fortran-array-layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/fortran-array-layout.png


--------------------------------------------------------------------------------
/docs/img/hdf5-hyperslab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/hdf5-hyperslab.png


--------------------------------------------------------------------------------
/docs/img/io-illustration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/io-illustration.png


--------------------------------------------------------------------------------
/docs/img/io-layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/io-layers.png


--------------------------------------------------------------------------------
/docs/img/io-subarray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/io-subarray.png


--------------------------------------------------------------------------------
/docs/img/lustre-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/lustre-architecture.png


--------------------------------------------------------------------------------
/docs/img/lustre-striping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/lustre-striping.png


--------------------------------------------------------------------------------
/docs/img/nonblocking-io.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/nonblocking-io.png


--------------------------------------------------------------------------------
/docs/img/osu-benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/osu-benchmark.png


--------------------------------------------------------------------------------
/docs/img/posix-everybody.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/posix-everybody.png


--------------------------------------------------------------------------------
/docs/img/posix-spokesman.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/posix-spokesman.png


--------------------------------------------------------------------------------
/docs/img/processes-threads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/processes-threads.png


--------------------------------------------------------------------------------
/docs/img/striping-performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/striping-performance.png


--------------------------------------------------------------------------------
/docs/img/svg2png.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # convert SVGs to PNGs with a correct size
 3 | inkscape affinity.svg -e affinity.png -w 709
 4 | inkscape collective-patterns.svg -e collective-patterns.png -w 1594
 5 | inkscape communication-schematic.svg -e communication-schematic.png -w 443
 6 | inkscape communicators.svg -e communicators.png -w 1418
 7 | inkscape fortran-array-layout.svg -e fortran-array-layout.png -w 709
 8 | inkscape hdf5-hyperslab.svg -e hdf5-hyperslab.png -w 532
 9 | inkscape io-illustration.svg -e io-illustration.png -w 1240
10 | inkscape io-layers.svg -e io-layers.png -w 1772
11 | inkscape io-subarray.svg -e io-subarray.png -w 1418
12 | inkscape lustre-architecture.svg -e lustre-architecture.png -w 1772
13 | inkscape lustre-striping.svg -e lustre-striping.png -w 1772
14 | inkscape mpi-summary.svg -e mpi-summary.png -w 1418
15 | inkscape mpi-thread-support.svg -e mpi-thread-support.png -w 1772
16 | inkscape multiple-thread-communication.svg -e multiple-thread-communication.png -w 886
17 | inkscape nonblocking-io.svg -e nonblocking-io.png -w 1240
18 | inkscape omp-parallel.svg -e omp-parallel.png -w 443
19 | inkscape omp-summary.svg -e omp-summary.png -w 1418
20 | inkscape one-sided-epoch.svg -e one-sided-epoch.png -w 620
21 | inkscape one-sided-limitations.svg -e one-sided-limitations.png -w 1240
22 | inkscape one-sided-window.svg -e one-sided-window.png -w 708
23 | inkscape posix-everybody.svg -e posix-everybody.png -w 620
24 | inkscape posix-spokesman.svg -e posix-spokesman.png -w 620
25 | inkscape processes-threads.svg -e processes-threads.png -w 1594
26 | inkscape supercomputer-node-hybrid.svg -e supercomputer-node-hybrid.png -w 1772
27 | inkscape two-d-cartesian-grid.svg -e two-d-cartesian-grid.png -w 886
28 | inkscape type-struct.svg -e type-struct.png -w 1240
29 | inkscape typemap.svg -e typemap.png -w 1418
30 | inkscape vector-extent.svg -e vector-extent.png -w 1772
31 | inkscape vector-resize.svg -e vector-resize.png -w 1772
32 | 


--------------------------------------------------------------------------------
/docs/img/two-d-cartesian-grid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/two-d-cartesian-grid.png


--------------------------------------------------------------------------------
/docs/img/type-struct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/type-struct.png


--------------------------------------------------------------------------------
/docs/img/typemap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/typemap.png


--------------------------------------------------------------------------------
/docs/img/vector-extent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/vector-extent.png


--------------------------------------------------------------------------------
/docs/img/vector-resize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/vector-resize.png


--------------------------------------------------------------------------------
/job_mpi.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=example
 3 | #SBATCH --account=project_2000745
 4 | #SBATCH --partition=small
 5 | #SBATCH --reservation=advance-mpi
 6 | #SBATCH --time=00:05:00
 7 | #SBATCH --ntasks=4
 8 | 
 9 | srun my_mpi_exe
10 | 


--------------------------------------------------------------------------------
/mpi/cartesian-grid/README.md:
--------------------------------------------------------------------------------
 1 | ## Cartesian grid process topology
 2 | 
 3 | Explore a Cartesian grid process topology by writing a toy program, where
 4 | 
 5 | - the MPI processes are arranged into a 2D Cartesian grid
 6 | - every task prints out their linear rank together with its coordinates
 7 |   in the process grid
 8 | - every task prints out the linear rank of their nearest neighboring
 9 |   processes
10 | 
11 | Run the code with both periodic and non-periodic boundaries, and experiment
12 | with the direction and displacement parameters of the `MPI_Cart_shift`
13 | routine.
14 | 
15 | You can start from scratch or use one of the provided skeleton codes
16 | ([skeleton.c](skeleton.c) or [skeleton.f90](skeleton.f90)).
17 | 


--------------------------------------------------------------------------------
/mpi/cartesian-grid/skeleton.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <mpi.h>
 4 | 
 5 | 
 6 | int main(int argc, char* argv[]) {
 7 |     int ntasks, my_id, irank;
 8 |     int dims[2];      /* Dimensions of the grid */
 9 |     int coords[2];    /* Coordinates in the grid */
10 |     int neighbors[4]; /* Neighbors in 2D grid */
11 |     int period[2] = {1, 1};
12 |     MPI_Comm comm2d;
13 | 
14 |     MPI_Init(&argc, &argv);
15 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
16 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
17 | 
18 |     /* Determine the process grid (dims[0] x dims[1] = ntasks) */
19 |     if (ntasks < 16) {
20 |         dims[0] = 2;
21 |     } else if (ntasks >= 16 && ntasks < 64) {
22 |         dims[0] = 4;
23 |     } else if (ntasks >= 64 && ntasks < 256) {
24 |         dims[0] = 8;
25 |     } else {
26 |         dims[0] = 16;
27 |     }
28 |     dims[1] = ntasks / dims[0];
29 | 
30 |     if (dims[0] * dims[1] != ntasks) {
31 |         fprintf(stderr, "Incompatible dimensions: %i x %i != %i\n",
32 |                 dims[0], dims[1], ntasks);
33 |         MPI_Finalize();
34 |         exit(EXIT_FAILURE);
35 |     }
36 | 
37 |     /* Create the 2D Cartesian communicator */
38 |     /* TO DO */
39 | 
40 |     /* Find out and store the neighboring ranks */
41 |     /* TO DO */
42 | 
43 |     /* Find out and store also the Cartesian coordinates of a rank */
44 |     /* TO DO */
45 | 
46 |     for (irank = 0; irank < ntasks; irank++) {
47 |         if (my_id == irank) {
48 |             printf("%3i = %2i %2i neighbors=%3i %3i %3i %3i\n",
49 |                    my_id, coords[0], coords[1], neighbors[0], neighbors[1],
50 |                    neighbors[2], neighbors[3]);
51 |         }
52 |         MPI_Barrier(MPI_COMM_WORLD);
53 |     }
54 | 
55 |     MPI_Finalize();
56 |     return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/mpi/cartesian-grid/skeleton.f90:
--------------------------------------------------------------------------------
 1 | program cartesian_grid
 2 |   use mpi
 3 |   implicit none
 4 | 
 5 |   integer :: ntask,  & ! number of MPI tasks
 6 |        my_id,        & ! MPI rank of the task
 7 |        rc,           & ! return code
 8 |        comm2d,       & ! Cartesian communicator
 9 |        neighbors(4), & ! neighbors in the 2D grid
10 |        coord(0:1),   & ! coordinates in the grid
11 |        dims(0:1)       ! dimensions of the grid
12 |   logical, dimension(2) :: period = (/ .true., .true. /)
13 |   integer :: irank
14 | 
15 |   call mpi_init(rc)
16 |   call mpi_comm_size(MPI_COMM_WORLD, ntask, rc)
17 |   call mpi_comm_rank(MPI_COMM_WORLD, my_id, rc)
18 | 
19 |   ! Determine the process grid (dims(0) x dims(1) = ntask)
20 |   if (ntask < 16) then
21 |      dims(0) = 2
22 |   else if (ntask >= 16 .and. ntask < 64) then
23 |      dims(0) = 4
24 |   else if (ntask >= 64 .and. ntask < 256) then
25 |      dims(0) = 8
26 |   else
27 |      dims(0) = 16
28 |   end if
29 | 
30 |   dims(1) = ntask / dims(0)
31 |   if (dims(0) * dims(1) /= ntask) then
32 |      write(*,'(A,I3,A1,I3,A2,I4)') 'sorry, no go', dims(0), 'x', &
33 |           dims(1),'/=', ntask
34 |      call mpi_abort(mpi_comm_world, 1, rc)
35 |   end if
36 | 
37 |   ! Create the 2D Cartesian communicator
38 |   ! TO DO
39 |   ! Find out & store the neighboring ranks
40 |   ! TO DO
41 |   ! Find out & store also the Cartesian coordinates of a rank
42 |   ! TO DO
43 | 
44 |   do irank = 0, ntask-1
45 |      if (my_id == irank) print '(I3,A,2I2,A,4I3)', &
46 |           my_id, '=', coord, ' neighbors=', neighbors(:)
47 |      call mpi_barrier(mpi_comm_world, rc)
48 |   end do
49 | 
50 |   call mpi_finalize(rc)
51 | 
52 | end program cartesian_grid
53 | 


--------------------------------------------------------------------------------
/mpi/cartesian-grid/solution/cartesian-grid.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <mpi.h>
 4 | 
 5 | 
 6 | int main(int argc, char* argv[]) {
 7 |     int ntasks, my_id, irank;
 8 |     int dims[2];      /* Dimensions of the grid */
 9 |     int coords[2];    /* Coordinates in the grid */
10 |     int neighbors[4]; /* Neighbors in 2D grid */
11 |     int period[2] = {1, 1};
12 |     MPI_Comm comm2d;
13 | 
14 |     MPI_Init(&argc, &argv);
15 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
16 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
17 | 
18 |     /* Determine the process grid (dims[0] x dims[1] = ntasks) */
19 |     if (ntasks < 16) {
20 |         dims[0] = 2;
21 |     } else if (ntasks >= 16 && ntasks < 64) {
22 |         dims[0] = 4;
23 |     } else if (ntasks >= 64 && ntasks < 256) {
24 |         dims[0] = 8;
25 |     } else {
26 |         dims[0] = 16;
27 |     }
28 |     dims[1] = ntasks / dims[0];
29 | 
30 |     if (dims[0] * dims[1] != ntasks) {
31 |         fprintf(stderr, "Incompatible dimensions: %i x %i != %i\n",
32 |                 dims[0], dims[1], ntasks);
33 |         MPI_Finalize();
34 |         exit(EXIT_FAILURE);
35 |     }
36 | 
37 |     /* Create the 2D Cartesian communicator */
38 |     MPI_Cart_create(MPI_COMM_WORLD, 2, dims, period, 1, &comm2d);
39 |     /* Find out and store the ranks with which to perform halo exchange */
40 |     MPI_Cart_shift(comm2d, 0, 1, &neighbors[0], &neighbors[1]);
41 |     MPI_Cart_shift(comm2d, 1, 1, &neighbors[2], &neighbors[3]);
42 |     /* Find out and store also the Cartesian coordinates of a rank */
43 |     MPI_Cart_coords(comm2d, my_id, 2, coords);
44 | 
45 |     for (irank = 0; irank < ntasks; irank++) {
46 |         if (my_id == irank) {
47 |             printf("%3i = %2i %2i neighbors=%3i %3i %3i %3i\n",
48 |                    my_id, coords[0], coords[1], neighbors[0], neighbors[1],
49 |                    neighbors[2], neighbors[3]);
50 |         }
51 |         MPI_Barrier(MPI_COMM_WORLD);
52 |     }
53 | 
54 |     MPI_Finalize();
55 |     return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/mpi/cartesian-grid/solution/cartesian-grid.f90:
--------------------------------------------------------------------------------
 1 | program cartesian_grid
 2 |   use mpi
 3 |   implicit none
 4 | 
 5 |   integer :: ntask,  & ! number of MPI tasks
 6 |        my_id,        & ! MPI rank of the task
 7 |        rc,           & ! return code
 8 |        comm2d,       & ! Cartesian communicator
 9 |        neighbors(4), & ! neighbors in the 2D grid
10 |        coord(0:1),   & ! coordinates in the grid
11 |        dims(0:1)       ! dimensions of the grid
12 |   logical, dimension(2) :: period = (/ .true., .true. /)
13 |   integer :: irank
14 | 
15 |   call mpi_init(rc)
16 |   call mpi_comm_size(MPI_COMM_WORLD, ntask, rc)
17 |   call mpi_comm_rank(MPI_COMM_WORLD, my_id, rc)
18 | 
19 |   ! Determine the process grid (dims(0) x dims(1) = ntask)
20 |   if (ntask < 16) then
21 |      dims(0) = 2
22 |   else if (ntask >= 16 .and. ntask < 64) then
23 |      dims(0) = 4
24 |   else if (ntask >= 64 .and. ntask < 256) then
25 |      dims(0) = 8
26 |   else
27 |      dims(0) = 16
28 |   end if
29 | 
30 |   dims(1) = ntask / dims(0)
31 |   if (dims(0) * dims(1) /= ntask) then
32 |      write(*,'(A,I3,A1,I3,A2,I4)') 'sorry, no go', dims(0), 'x', &
33 |           dims(1),'/=', ntask
34 |      call mpi_abort(mpi_comm_world, 1, rc)
35 |   end if
36 | 
37 |   ! Create the 2D Cartesian communicator
38 |   call mpi_cart_create(mpi_comm_world, 2, dims, period, .true., comm2d, rc)
39 |   ! Find out & store the neighboring ranks
40 |   call mpi_cart_shift(comm2d, 0, 1, neighbors(1), neighbors(2), rc)
41 |   call mpi_cart_shift(comm2d, 1, 1, neighbors(3), neighbors(4), rc)
42 |   ! Find out & store also the Cartesian coordinates of a rank
43 |   call mpi_cart_coords(comm2d, my_id, 2, coord, rc)
44 | 
45 |   do irank = 0, ntask-1
46 |      if (my_id == irank) print '(I3,A,2I2,A,4I3)', &
47 |           my_id, '=', coord, ' neighbors=', neighbors(:)
48 |      call mpi_barrier(mpi_comm_world, rc)
49 |   end do
50 | 
51 |   call mpi_finalize(rc)
52 | 
53 | end program cartesian_grid
54 | 


--------------------------------------------------------------------------------
/mpi/communicator/README.md:
--------------------------------------------------------------------------------
 1 | ## Communicators and collectives
 2 | 
 3 | In this exercise we combine collective communication with user defined
 4 | communicators. Write a program for four MPI processes, such that each
 5 | process has a data vector with the following data:
 6 | 
 7 | ![](https://github.com/csc-training/summerschool/blob/master/mpi/collectives/img/sendbuffer.png)
 8 | 
 9 | In addition, each task has a receive buffer for eight elements and the
10 | values in the buffer are initialized to -1.
11 | 
12 | Implement now a pattern with user defined communicators and collective
13 | operation so that the receive buffers will have the following values:
14 | 
15 | ![](https://raw.githubusercontent.com/csc-training/summerschool/master/mpi/communicator/img/comm-split-reduce.svg?sanitize=true)
16 | 
17 | You can start from scratch or use the skeleton code
18 | [collective.cpp](collective.cpp), [collective.c](collective.c) or
19 | [collective.F90](collective.F90).
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mpi/communicator/collective.F90:
--------------------------------------------------------------------------------
 1 | program coll_exer
 2 |   use mpi
 3 |   implicit none
 4 | 
 5 |   integer, parameter :: n_mpi_tasks = 4
 6 | 
 7 |   integer :: ntasks, rank, ierr, i, color, sub_comm
 8 |   integer, dimension(2*n_mpi_tasks) :: sendbuf, recvbuf
 9 |   integer, dimension(2*n_mpi_tasks**2) :: printbuf
10 | 
11 |   call mpi_init(ierr)
12 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierr)
13 |   call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
14 | 
15 |   if (ntasks /= n_mpi_tasks) then
16 |      if (rank == 0) then
17 |         print *, "Run this program with ", n_mpi_tasks, " tasks."
18 |      end if
19 |      call mpi_abort(MPI_COMM_WORLD, -1, ierr)
20 |   end if
21 | 
22 |   ! Initialize message buffers
23 |   call init_buffers
24 | 
25 |   ! Print data that will be sent
26 |   call print_buffers(sendbuf)
27 | 
28 |   ! TODO: use a single collective communication call (and maybe prepare
29 |   !       some parameters for the call)
30 | 
31 |   ! Print data that was received
32 |   ! TODO: add correct buffer
33 |   call print_buffers(...)
34 | 
35 |   call mpi_finalize(ierr)
36 | 
37 | contains
38 | 
39 |   subroutine init_buffers
40 |     implicit none
41 |     integer :: i
42 | 
43 |     do i = 1, 2*n_mpi_tasks
44 |        recvbuf(i) = -1
45 |        sendbuf(i) = i + 2*n_mpi_tasks * rank - 1
46 |     end do
47 |   end subroutine init_buffers
48 | 
49 | 
50 |   subroutine print_buffers(buffer)
51 |     implicit none
52 |     integer, dimension(:), intent(in) :: buffer
53 |     integer, parameter :: bufsize = 2*n_mpi_tasks
54 |     integer :: i
55 |     character(len=40) :: pformat
56 | 
57 |     write(pformat,'(A,I3,A)') '(A4,I2,":",', bufsize, 'I3)'
58 | 
59 |     call mpi_gather(buffer, bufsize, MPI_INTEGER, &
60 |          & printbuf, bufsize, MPI_INTEGER, &
61 |          & 0, MPI_COMM_WORLD, ierr)
62 | 
63 |     if (rank == 0) then
64 |        do i = 1, ntasks
65 |           write(*,pformat) 'Task', i - 1, printbuf((i-1)*bufsize+1:i*bufsize)
66 |        end do
67 |        print *
68 |     end if
69 |   end subroutine print_buffers
70 | 
71 | end program coll_exer
72 | 


--------------------------------------------------------------------------------
/mpi/communicator/collective.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <mpi.h>
 4 | 
 5 | #define NTASKS 4
 6 | 
 7 | void print_buffers(int *printbuffer, int *sendbuffer, int buffersize);
 8 | void init_buffers(int *sendbuffer, int *recvbuffer, int buffersize);
 9 | 
10 | 
11 | int main(int argc, char *argv[])
12 | {
13 |     int ntasks, rank, color;
14 |     int sendbuf[2 * NTASKS], recvbuf[2 * NTASKS];
15 |     int printbuf[2 * NTASKS * NTASKS];
16 | 
17 |     MPI_Comm sub_comm;
18 | 
19 |     MPI_Init(&argc, &argv);
20 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
21 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
22 | 
23 |     if (ntasks != NTASKS) {
24 |         if (rank == 0) {
25 |             fprintf(stderr, "Run this program with %i tasks.\n", NTASKS);
26 |         }
27 |         MPI_Abort(MPI_COMM_WORLD, -1);
28 |     }
29 | 
30 |     /* Initialize message buffers */
31 |     init_buffers(sendbuf, recvbuf, 2 * NTASKS);
32 | 
33 |     /* Print data that will be sent */
34 |     print_buffers(printbuf, sendbuf, 2 * NTASKS);
35 | 
36 |     /* TODO: use a single collective communication call (and maybe prepare
37 |      *       some parameters for the call) */
38 | 
39 |     /* Print data that was received */
40 |     /* TODO: add correct buffer */
41 |     print_buffers(printbuf, ..., 2 * NTASKS);
42 | 
43 |     MPI_Finalize();
44 |     return 0;
45 | }
46 | 
47 | 
48 | void init_buffers(int *sendbuffer, int *recvbuffer, int buffersize)
49 | {
50 |     int rank, i;
51 | 
52 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
53 |     for (i = 0; i < buffersize; i++) {
54 |         recvbuffer[i] = -1;
55 |         sendbuffer[i] = i + buffersize * rank;
56 |     }
57 | }
58 | 
59 | 
60 | void print_buffers(int *printbuffer, int *sendbuffer, int buffersize)
61 | {
62 |     int i, j, rank, ntasks;
63 | 
64 |     MPI_Gather(sendbuffer, buffersize, MPI_INT,
65 |                printbuffer, buffersize, MPI_INT, 0, MPI_COMM_WORLD);
66 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
67 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
68 | 
69 |     if (rank == 0) {
70 |         for (j = 0; j < ntasks; j++) {
71 |             printf("Task %i:", j);
72 |             for (i = 0; i < buffersize; i++) {
73 |                 printf(" %2i", printbuffer[i + buffersize * j]);
74 |             }
75 |             printf("\n");
76 |         }
77 |         printf("\n");
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/mpi/communicator/collective.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <array>
 3 | #include <mpi.h>
 4 | 
 5 | #define NTASKS 4
 6 | 
 7 | template<size_t N, size_t N2>
 8 | void print_buffers(std::array<int, N2> &printbuffer, std::array<int, N> &sendbuffer);
 9 | template<size_t N>
10 | void init_buffers(std::array<int, N> &sendbuffer, std::array<int, N> &recvbuffer);
11 | 
12 | 
13 | int main(int argc, char *argv[])
14 | {
15 |     int ntasks, rank, color;
16 |     std::array<int, 2 * NTASKS> sendbuf, recvbuf;
17 |     std::array<int, 2 * NTASKS * NTASKS> printbuf;
18 | 
19 |     MPI_Comm sub_comm;
20 | 
21 |     MPI_Init(&argc, &argv);
22 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
23 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
24 | 
25 |     if (ntasks != NTASKS) {
26 |         if (rank == 0) {
27 |             fprintf(stderr, "Run this program with %i tasks.\n", NTASKS);
28 |         }
29 |         MPI_Abort(MPI_COMM_WORLD, -1);
30 |     }
31 | 
32 |     // Initialize message buffers 
33 |     init_buffers(sendbuf, recvbuf);
34 | 
35 |     // Print data that will be sent */
36 |     print_buffers(printbuf, sendbuf);
37 | 
38 |     /* TODO: use a single collective communication call (and maybe prepare
39 |      *       some parameters for the call) */
40 | 
41 |     /* Print data that was received */
42 |     /* TODO: add correct buffer */
43 |     print_buffers(printbuf, ...);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 
49 | 
50 | template<size_t N>
51 | void init_buffers(std::array<int, N> &sendbuffer, std::array<int, N> &recvbuffer)
52 | {
53 |     int rank;
54 | 
55 |     const int buffersize = sendbuffer.size();
56 | 
57 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
58 |     for (int i = 0; i < buffersize; i++) {
59 |         recvbuffer[i] = -1;
60 |         sendbuffer[i] = i + buffersize * rank;
61 |     }
62 | }
63 | 
64 | 
65 | template<size_t N, size_t N2>
66 | void print_buffers(std::array<int, N2> &printbuffer, std::array<int, N> &sendbuffer)
67 | {
68 |     int rank, ntasks;
69 | 
70 |     const int buffersize = sendbuffer.size();
71 | 
72 |     MPI_Gather(sendbuffer.data(), buffersize, MPI_INT,
73 |                printbuffer.data(), buffersize, MPI_INT, 0, MPI_COMM_WORLD);
74 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
75 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
76 | 
77 |     if (rank == 0) {
78 |         for (int j = 0; j < ntasks; j++) {
79 |             printf("Task %i:", j);
80 |             for (int i = 0; i < buffersize; i++) {
81 |                 printf(" %2i", printbuffer[i + buffersize * j]);
82 |             }
83 |             printf("\n");
84 |         }
85 |         printf("\n");
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/mpi/communicator/img/sendbuffer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/communicator/img/sendbuffer.png


--------------------------------------------------------------------------------
/mpi/communicator/solution/reduce.F90:
--------------------------------------------------------------------------------
 1 | program coll_exer
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer, parameter :: n_mpi_tasks = 4
 6 | 
 7 |   integer :: ntasks, rank, ierr, color
 8 |   type(mpi_comm) :: sub_comm
 9 |   integer, dimension(2*n_mpi_tasks) :: sendbuf, recvbuf
10 |   integer, dimension(2*n_mpi_tasks**2) :: printbuf
11 | 
12 |   call mpi_init(ierr)
13 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierr)
14 |   call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
15 | 
16 |   if (ntasks /= n_mpi_tasks) then
17 |      if (rank == 0) then
18 |         print *, "Run this program with ", n_mpi_tasks, " tasks."
19 |      end if
20 |      call mpi_abort(MPI_COMM_WORLD, -1, ierr)
21 |   end if
22 | 
23 |   ! Initialize message buffers
24 |   call init_buffers
25 | 
26 |   ! Print data that will be sent
27 |   call print_buffers(sendbuf)
28 | 
29 |   ! Create new communicator and reduce the data
30 |   if (rank / 2 == 0) then
31 |      color = 1
32 |   else
33 |      color = 2
34 |   end if
35 | 
36 |   call mpi_comm_split(MPI_COMM_WORLD, color, rank, sub_comm, ierr)
37 |   call mpi_reduce(sendbuf, recvbuf, 2*n_mpi_tasks, MPI_INTEGER, MPI_SUM, 0, &
38 |        & sub_comm, ierr)
39 | 
40 |   ! Print data that was received
41 |   call print_buffers(recvbuf)
42 | 
43 |   call mpi_finalize(ierr)
44 | 
45 | contains
46 | 
47 |   subroutine init_buffers
48 |     implicit none
49 |     integer :: i
50 | 
51 |     do i = 1, 2*n_mpi_tasks
52 |        recvbuf(i) = -1
53 |        sendbuf(i) = i + 2*n_mpi_tasks * rank - 1
54 |     end do
55 |   end subroutine init_buffers
56 | 
57 | 
58 |   subroutine print_buffers(buffer)
59 |     implicit none
60 |     integer, dimension(:), intent(in) :: buffer
61 |     integer, parameter :: bufsize = 2*n_mpi_tasks
62 |     integer :: i
63 |     character(len=40) :: pformat
64 | 
65 |     write(pformat,'(A,I3,A)') '(A4,I2,":",', bufsize, 'I3)'
66 | 
67 |     call mpi_gather(buffer, bufsize, MPI_INTEGER, &
68 |          & printbuf, bufsize, MPI_INTEGER, &
69 |          & 0, MPI_COMM_WORLD, ierr)
70 | 
71 |     if (rank == 0) then
72 |        do i = 1, ntasks
73 |           write(*,pformat) 'Task', i - 1, printbuf((i-1)*bufsize+1:i*bufsize)
74 |        end do
75 |        print *
76 |     end if
77 |   end subroutine print_buffers
78 | 
79 | end program coll_exer
80 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/README.md:
--------------------------------------------------------------------------------
 1 | ## Modifying extent
 2 | 
 3 | When the datatype contains gaps in the beginning or in the end, one might need to
 4 | modify the *extent* of the datatype.
 5 | 
 6 | Starting from [skeleton.c](skeleton.c) or [skeleton.F90](skeleton.F90), create a vector
 7 | datatype for sending a column (C) or row (Fortran) of a matrix.
 8 | 
 9 | 1. Verify that the datatype works by communicating a single column/row.
10 | 
11 | 2. Try to send to columns / rows. What happens? Can you explain why?
12 | 
13 | 3. Create a new datatype with resized extent, so that communicating multiple columns / rows
14 |    succeeds.
15 | 
16 | 4. Try to scatter columns / rows with `MPI_Scatter`
17 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/skeleton.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer :: rank, rc
 6 |   integer :: sendarray(8,6), recvarray(8,6)
 7 |   type(mpi_datatype) :: vector, vector2
 8 |   integer(kind=mpi_address_kind) :: extent, lb
 9 | 
10 |   integer :: i, j
11 | 
12 |   call mpi_init(rc)
13 |   call mpi_comm_rank(MPI_COMM_WORLD, rank, rc)
14 | 
15 |   ! initialize arrays
16 |   recvarray = 0
17 | 
18 |   if (rank == 0) then
19 |      sendarray = reshape([ ((i*10 + j, i=1,8), j=1,6) ], [8, 6] )
20 |      write(*,*) 'Original data'
21 |      do i=1, 8
22 |         write(*,'(*(I3))') sendarray(i, :)
23 |      end do
24 |   end if
25 | 
26 |   ! TODO create datatype
27 | 
28 |   ! Communicate with the datatype
29 |   if (rank == 0) then
30 | 
31 |   else if (rank == 1) then
32 | 
33 |   end if
34 | 
35 |   ! free datatype
36 | 
37 |   ! TODO end
38 | 
39 |   if (rank == 1) then
40 |      write(*,*) 'Received data'
41 |      do i=1, 8
42 |         write(*,'(*(I3))') recvarray(i, :)
43 |      end do
44 |   end if
45 | 
46 |   call mpi_finalize(rc)
47 |   
48 |   
49 | 
50 | end program datatype1
51 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/skeleton.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank;
 7 |     int sendarray[8][6];
 8 |     int recvarray[8][6];
 9 |     MPI_Datatype vector, vector2;
10 | 
11 | 
12 |     MPI_Init(&argc, &argv);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
14 | 
15 |     // Initialize arrays
16 |     if (rank == 0) {
17 |         for (int i = 0; i < 8; i++) {
18 |             for (int j = 0; j < 6; j++) {
19 |                 sendarray[i][j] = (i + 1) * 10 + j + 1;
20 |             }
21 |         }
22 |     } 
23 | 
24 |     for (int i = 0; i < 8; i++) {
25 |         for (int j = 0; j < 6; j++) {
26 |             recvarray[i][j] = 0;
27 |         }
28 |     }
29 | 
30 |     if (rank == 0) {
31 |         printf("Data in rank 0\n");
32 |         for (int i = 0; i < 8; i++) {
33 |             for (int j = 0; j < 6; j++) {
34 |                 printf("%3d", sendarray[i][j]);
35 |             }
36 |             printf("\n");
37 |         }
38 |     }
39 | 
40 |     // TODO create datatype 
41 | 
42 |     // Communicate with the datatype
43 |     if (rank == 0)
44 |         
45 |     else if (rank == 1)
46 |         
47 | 
48 |     // free datatype
49 | 
50 |     // TODO end
51 | 
52 |     if (rank == 1) {
53 |         printf("Received data\n");
54 |         for (int i = 0; i < 8; i++) {
55 |             for (int j = 0; j < 6; j++) {
56 |                 printf("%3d", recvarray[i][j]);
57 |             }
58 |             printf("\n");
59 |         }
60 |     }
61 | 
62 |     MPI_Finalize();
63 | 
64 |     return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/solution/scatter.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer :: rank, ntasks, rc
 6 |   integer :: sendarray(8,6), recvarray(8,6)
 7 |   type(mpi_datatype) :: vector, vector2
 8 |   integer(kind=mpi_address_kind) :: extent, lb
 9 | 
10 |   integer :: i, j
11 | 
12 |   call mpi_init(rc)
13 |   call mpi_comm_rank(MPI_COMM_WORLD, rank, rc)
14 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
15 | 
16 |   ! initialize arrays
17 |   recvarray = 0
18 | 
19 |   if (rank == 0) then
20 |      sendarray = reshape([ ((i*10 + j, i=1,8), j=1,6) ], [8, 6] )
21 |      write(*,*) 'Original data'
22 |      do i=1, 8
23 |         write(*,'(*(I3))') sendarray(i, :)
24 |      end do
25 |   end if
26 | 
27 |   ! create datatype
28 |   call mpi_type_vector(6, 1, 8, MPI_INTEGER, vector, rc);
29 |   extent = storage_size(i) / 8
30 |   lb = 0
31 |   call mpi_type_create_resized(vector, lb, extent, vector2, rc)
32 |   call mpi_type_commit(vector2, rc)
33 | 
34 |   call mpi_scatter(sendarray, 2, vector2, recvarray, 2, vector2, 0, MPI_COMM_WORLD, rc)
35 | 
36 |   call mpi_type_free(vector2, rc)
37 | 
38 |   if (rank == ntasks - 1) then
39 |      write(*,*) 'Received data'
40 |      do i=1, 8
41 |         write(*,'(*(I3))') recvarray(i, :)
42 |      end do
43 |   end if
44 | 
45 |   call mpi_finalize(rc)
46 |   
47 |   
48 | 
49 | end program datatype1
50 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/solution/scatter.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank, ntasks;
 7 |     int sendarray[8][6];
 8 |     int recvarray[8][6];
 9 |     MPI_Datatype vector, vector2;
10 | 
11 | 
12 |     MPI_Init(&argc, &argv);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
14 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
15 | 
16 |     // Initialize arrays
17 |     if (rank == 0) {
18 |         for (int i = 0; i < 8; i++) {
19 |             for (int j = 0; j < 6; j++) {
20 |                 sendarray[i][j] = (i + 1) * 10 + j + 1;
21 |             }
22 |         }
23 |     } 
24 | 
25 |     for (int i = 0; i < 8; i++) {
26 |         for (int j = 0; j < 6; j++) {
27 |             recvarray[i][j] = 0;
28 |         }
29 |     }
30 | 
31 |     if (rank == 0) {
32 |         printf("Data in rank 0\n");
33 |         for (int i = 0; i < 8; i++) {
34 |             for (int j = 0; j < 6; j++) {
35 |                 printf("%3d", sendarray[i][j]);
36 |             }
37 |             printf("\n");
38 |         }
39 |     }
40 | 
41 |     // Create datatype
42 |     MPI_Type_vector(8, 1, 6, MPI_INT, &vector);
43 |     MPI_Type_create_resized(vector, 0, sizeof(int), &vector2);
44 |     MPI_Type_commit(&vector2);
45 | 
46 |     // Scatter columns
47 |     MPI_Scatter(&sendarray[0][0], 1, vector2, &recvarray[0][0], 1, vector2, 0, MPI_COMM_WORLD);
48 | 
49 |     MPI_Type_free(&vector2);
50 | 
51 |     if (rank == ntasks - 1) {
52 |         printf("Received data\n");
53 |         for (int i = 0; i < 8; i++) {
54 |             for (int j = 0; j < 6; j++) {
55 |                 printf("%3d", recvarray[i][j]);
56 |             }
57 |             printf("\n");
58 |         }
59 |     }
60 | 
61 |     MPI_Finalize();
62 | 
63 |     return 0;
64 | }
65 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/solution/send-receive.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer :: rank, rc
 6 |   integer :: sendarray(8,6), recvarray(8,6)
 7 |   type(mpi_datatype) :: vector, vector2
 8 |   integer(kind=mpi_address_kind) :: extent, lb
 9 | 
10 |   integer :: i, j
11 | 
12 |   call mpi_init(rc)
13 |   call mpi_comm_rank(MPI_COMM_WORLD, rank, rc)
14 | 
15 |   ! initialize arrays
16 |   recvarray = 0
17 | 
18 |   if (rank == 0) then
19 |      sendarray = reshape([ ((i*10 + j, i=1,8), j=1,6) ], [8, 6] )
20 |      write(*,*) 'Original data'
21 |      do i=1, 8
22 |         write(*,'(*(I3))') sendarray(i, :)
23 |      end do
24 |   end if
25 | 
26 |   ! create datatype
27 |   call mpi_type_vector(6, 1, 8, MPI_INTEGER, vector, rc);
28 |   extent = storage_size(i) / 8
29 |   lb = 0
30 |   call mpi_type_create_resized(vector, lb, extent, vector2, rc)
31 |   call mpi_type_commit(vector2, rc)
32 | 
33 |   ! Send two rows of a matrix
34 |   if (rank == 0) then
35 |       call mpi_send(sendarray, 2, vector2, 1, 0, MPI_COMM_WORLD, rc)
36 |   else if (rank == 1) then
37 |       call mpi_recv(recvarray, 2, vector2, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc)
38 |   end if
39 | 
40 |   call mpi_type_free(vector2, rc)
41 | 
42 |   if (rank == 1) then
43 |      write(*,*) 'Received data'
44 |      do i=1, 8
45 |         write(*,'(*(I3))') recvarray(i, :)
46 |      end do
47 |   end if
48 | 
49 |   call mpi_finalize(rc)
50 |   
51 |   
52 | 
53 | end program datatype1
54 | 


--------------------------------------------------------------------------------
/mpi/datatype-extent/solution/send-receive.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank;
 7 |     int sendarray[8][6];
 8 |     int recvarray[8][6];
 9 |     MPI_Datatype vector, vector2;
10 | 
11 | 
12 |     MPI_Init(&argc, &argv);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
14 | 
15 |     // Initialize arrays
16 |     if (rank == 0) {
17 |         for (int i = 0; i < 8; i++) {
18 |             for (int j = 0; j < 6; j++) {
19 |                 sendarray[i][j] = (i + 1) * 10 + j + 1;
20 |             }
21 |         }
22 |     } 
23 | 
24 |     for (int i = 0; i < 8; i++) {
25 |         for (int j = 0; j < 6; j++) {
26 |             recvarray[i][j] = 0;
27 |         }
28 |     }
29 | 
30 |     if (rank == 0) {
31 |         printf("Data in rank 0\n");
32 |         for (int i = 0; i < 8; i++) {
33 |             for (int j = 0; j < 6; j++) {
34 |                 printf("%3d", sendarray[i][j]);
35 |             }
36 |             printf("\n");
37 |         }
38 |     }
39 | 
40 |     // Create datatype
41 |     MPI_Type_vector(8, 1, 6, MPI_INT, &vector);
42 |     MPI_Type_create_resized(vector, 0, sizeof(int), &vector2);
43 |     MPI_Type_commit(&vector2);
44 | 
45 |     // Send two columns of matrix
46 |     if (rank == 0)
47 |         MPI_Send(&sendarray[0][0], 2, vector2, 1, 0, MPI_COMM_WORLD);
48 |     else if (rank == 1)
49 |         MPI_Recv(&recvarray[0][0], 2, vector2, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
50 | 
51 |     MPI_Type_free(&vector2);
52 | 
53 |     if (rank == 1) {
54 |         printf("Received data\n");
55 |         for (int i = 0; i < 8; i++) {
56 |             for (int j = 0; j < 6; j++) {
57 |                 printf("%3d", recvarray[i][j]);
58 |             }
59 |             printf("\n");
60 |         }
61 |     }
62 | 
63 |     MPI_Finalize();
64 | 
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/README_2d.md:
--------------------------------------------------------------------------------
 1 | ## Heat equation decomposed in two dimensions
 2 | 
 3 | Note: this is very advanced exercise. It is meant more for illustration what can be done
 4 | with derived datatypes, and as something one might want to study after the course, rather than
 5 | completing during the course.
 6 | 
 7 | If you are not familiar with the two dimensional heat equation, please have a look
 8 | for [basic description](https://github.com/csc-training/mpi-introduction/tree/main/heat-equation)
 9 | in "Parallel programming with MPI" exercise material.
10 | 
11 | Here, starting point is a working code parallelized over columns (in Fortran) or rows (in C/C++).
12 | 
13 | Before starting with this exercise, it is recommended that you have
14 | the [usage of Cartesian communicator](README_cartesian.md) implemented.
15 | 
16 | 1. Modify the creation of Cartesian communicator so that the
17 |    decomposition is done in two dimensions, and determine all four
18 |    neighbors (up, down, left, right).
19 |    
20 | 2. As the rows (in Fortran) or columns (in C/C++) are not contiguous
21 |    in the computer memory, one needs to use user-defined datatypes
22 |    when communicating in the `exchange()` routine in [cpp/core.cpp](cpp/core.cpp) 
23 |    or [fortran/core.F90](fortran/core.F90). In order to make code more
24 |    symmetric, one can utilize derived type also for the contiguous
25 |    dimension. Create required datatypes (it is recommended to store
26 |    them as attributes in "parallel" data structure).
27 | 3. Perform the halo exchange with `MPI_Neighbor_alltoallw`. Together
28 |    with the user defined datatypes, no temporary buffers are needed in
29 |    the user code. In order to use `MPI_Neighbor_alltoallw`, you need
30 |    to determine the correct `displacements` both in sending and
31 |    receiving.
32 | 4. In the base version, the I/O routines `write_field` and
33 |    `read_field` (in [cpp/core.cpp](cpp/io.cpp) or
34 |    [fortran/core.F90](fortran/io.F90))
35 |    use temporary buffers for communication. Create appropriate
36 |    datatype and utilize it in I/O related communication. Note that you
37 |    need also the coordinates of processes in the cartesian grid in
38 |    order to read from / write to the correct part of the global
39 |    temperature field.
40 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/README_cartesian.md:
--------------------------------------------------------------------------------
 1 | ## Heat equation solver with Cartesian communicator
 2 | 
 3 | If you are not familiar with the two dimensional heat equation, please have a look
 4 | for [basic description](https://github.com/csc-training/mpi-introduction/tree/main/heat-equation)
 5 | in "Parallel programming with MPI" exercise material.
 6 | 
 7 | Here, starting point is a working code parallelized over columns (in Fortran) or rows (in C/C++).
 8 | 
 9 | The current version uses only MPI_COMM_WORLD, and neighboring process are determined manually.
10 | 
11 | 1. Add a "communicator" attribute to the basic parallelization data structure (`type :: parallel_data` in [fortran/heat_mod.F90](fortran/heat_mod.F90) or class `ParallelData` in [cpp/heat.hpp](cpp/heat.hpp))
12 | 2. Create the Cartesian communicator in the routine `parallel_setup()` (Fortran) or in the 
13 | the `ParallelData()` constructor (C++), and use `MPI_Cart_shift` for determining the 
14 | neighboring processes 
15 | 3. Use the Cartesian communicator in all communication routines
16 | 
17 | To build the code, please use the provided `Makefile` (by typing `make`). By default, Intel 
18 | compiler is used, in order to use gcc type `make COMP=gnu`.
19 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/README_neighbor.md:
--------------------------------------------------------------------------------
 1 | ## Heat equation solver with neighborhood collectives
 2 | 
 3 | If you are not familiar with the two dimensional heat equation, please have a look
 4 | for [basic description](https://github.com/csc-training/mpi-introduction/tree/main/heat-equation)
 5 | in "Parallel programming with MPI" exercise material.
 6 | 
 7 | Here, starting point is a working code parallelized over columns (in Fortran) or rows (in C/C++).
 8 | 
 9 | Implement the halo exchange in the `exchange()` routine in [cpp/core.cpp](cpp/core.cpp) 
10 | or [fortran/core.F90](fortran/core.F90) with `MPI_Neighbor_alltoall`. In order to proceed,
11 | you need to have the [usage of Cartesian communicator](README_cartesian.md) implemented.
12 | 
13 | 1. Before start of the communication, the values at the real domain boundaries (*i.e* not the
14 |    in the ghost region) need to be copied into a send buffer in the correct order. You may 
15 |    allocate and deallocate the send and receive buffers within the `exchange` routine, or as 
16 |    recommended as part of the "parallel" data structure.
17 | 
18 | 2. Perform the halo exchange with a single call to `MPI_Neighbor_alltoall`. 
19 | 
20 | 3. As the boundary values of neighboring processes are in the receive buffer, values need to be
21 |    copied to the ghost region.
22 | 
23 | To build the code, please use the provided `Makefile` (by typing `make`). By default, Intel 
24 | compiler is used, in order to use gcc type `make COMP=gnu`.
25 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/common/pngwriter.h:
--------------------------------------------------------------------------------
 1 | #ifndef PNGWRITER_H_
 2 | #define PNGWRITER_H_
 3 | 
 4 | #if __cplusplus
 5 |   extern "C" {
 6 | #endif
 7 | 
 8 | int save_png(double *data, const int nx, const int ny, const char *fname,
 9 |              const char lang);
10 | 
11 | #if __cplusplus
12 |   }
13 | #endif
14 | #endif
15 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/Makefile:
--------------------------------------------------------------------------------
 1 | ifeq ($(COMP),)
 2 | COMP=intel
 3 | endif
 4 | 
 5 | COMMONDIR=../common
 6 | 
 7 | ifeq ($(COMP),gnu)
 8 | CXX=mpicxx
 9 | CC=gcc
10 | CCFLAGS=-g -O3 -march=native -Wall -I$(COMMONDIR) -DNDEBUG
11 | LDFLAGS=
12 | LIBS=-lpng
13 | endif
14 | 
15 | ifeq ($(COMP),intel)
16 | CXX=mpicxx
17 | CC=icc
18 | CCFLAGS=-g -O3 -xHost -I$(COMMONDIR) -DNDEBUG
19 | LDFLAGS=
20 | LIBS=-lpng
21 | endif
22 | 
23 | EXE=heat_mpi
24 | OBJS=main.o heat.o core.o setup.o io.o utilities.o
25 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
26 | 
27 | 
28 | all: $(EXE)
29 | 
30 | 
31 | utilities.o: utilities.cpp heat.hpp
32 | heat.o: heat.cpp heat.hpp matrix.hpp
33 | core.o: core.cpp heat.hpp
34 | setup.o: setup.cpp heat.hpp
35 | io.o: io.cpp heat.hpp matrix.hpp
36 | main.o: main.cpp heat.hpp
37 | 
38 | $(OBJS_PNG): C_COMPILER := $(CC)
39 | $(OBJS): C_COMPILER := $(CXX)
40 | 
41 | $(EXE): $(OBJS) $(OBJS_PNG)
42 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
43 | 
44 | %.o: %.cpp
45 | 	$(CXX) $(CCFLAGS) -c $< -o $@
46 | 
47 | %.o: %.c
48 | 	$(CC) $(CCFLAGS) -c $< -o $@
49 | 
50 | .PHONY: clean
51 | clean:
52 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~
53 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/core.cpp:
--------------------------------------------------------------------------------
 1 | // Main solver routines for heat equation solver
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | #include "heat.hpp"
 6 | 
 7 | // Exchange the boundary values
 8 | void exchange(Field& field, const ParallelData parallel)
 9 | {
10 | 
11 |     // Send to up, receive from down
12 |     double* sbuf = field.temperature.data(1, 0);
13 |     double* rbuf  = field.temperature.data(field.nx + 1, 0);
14 |     MPI_Sendrecv(sbuf, field.ny + 2, MPI_DOUBLE,
15 |                  parallel.nup, 11,
16 |                  rbuf, field.ny + 2, MPI_DOUBLE, 
17 |                  parallel.ndown, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
18 | 
19 |     // Send to down, receive from up
20 |     sbuf = field.temperature.data(field.nx, 0);
21 |     rbuf = field.temperature.data();
22 |     MPI_Sendrecv(sbuf, field.ny + 2, MPI_DOUBLE, 
23 |                  parallel.ndown, 12,
24 |                  rbuf, field.ny + 2, MPI_DOUBLE,
25 |                  parallel.nup, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
26 | 
27 | }
28 | 
29 | // Update the temperature values using five-point stencil */
30 | void evolve(Field& curr, const Field& prev, const double a, const double dt)
31 | {
32 | 
33 |   // Compilers do not necessarily optimize division to multiplication, so make it explicit
34 |   auto inv_dx2 = 1.0 / (prev.dx * prev.dx);
35 |   auto inv_dy2 = 1.0 / (prev.dy * prev.dy);
36 | 
37 |   // Determine the temperature field at next time step
38 |   // As we have fixed boundary conditions, the outermost gridpoints
39 |   // are not updated.
40 |   for (int i = 1; i < curr.nx + 1; i++) {
41 |     for (int j = 1; j < curr.ny + 1; j++) {
42 |             curr(i, j) = prev(i, j) + a * dt * (
43 | 	       ( prev(i + 1, j) - 2.0 * prev(i, j) + prev(i - 1, j) ) * inv_dx2 +
44 | 	       ( prev(i, j + 1) - 2.0 * prev(i, j) + prev(i, j - 1) ) * inv_dy2
45 |                );
46 |     }
47 |   }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/heat.cpp:
--------------------------------------------------------------------------------
 1 | #include "heat.hpp"
 2 | #include "matrix.hpp"
 3 | #include <iostream>
 4 | #include <mpi.h>
 5 | 
 6 | void Field::setup(int nx_in, int ny_in, ParallelData parallel) 
 7 | {
 8 |     nx_full = nx_in;
 9 |     ny_full = ny_in;
10 | 
11 |     nx = nx_full / parallel.size;
12 |     if (nx * parallel.size != nx_full) {
13 |         std::cout << "Cannot divide grid evenly to processors" << std::endl;
14 |         MPI_Abort(MPI_COMM_WORLD, -2);
15 |     }
16 |     ny = ny_full;
17 | 
18 |    // matrix includes also ghost layers
19 |    temperature = Matrix<double> (nx + 2, ny + 2);
20 | }
21 | 
22 | void Field::generate(ParallelData parallel) {
23 | 
24 |     // Radius of the source disc 
25 |     auto radius = nx_full / 6.0;
26 |     for (int i = 0; i < nx + 2; i++) {
27 |         for (int j = 0; j < ny + 2; j++) {
28 |             // Distance of point i, j from the origin 
29 |             auto dx = i + parallel.rank * nx - nx_full / 2 + 1;
30 |             auto dy = j - ny / 2 + 1;
31 |             if (dx * dx + dy * dy < radius * radius) {
32 |                 temperature(i, j) = 5.0;
33 |             } else {
34 |                 temperature(i, j) = 65.0;
35 |             }
36 |         }
37 |     }
38 | 
39 |     // Boundary conditions
40 |     for (int i = 0; i < nx + 2; i++) {
41 |         // Left
42 |         temperature(i, 0) = 20.0;
43 |         // Right
44 |         temperature(i, ny + 1) = 70.0;
45 |     }
46 | 
47 |     // Top
48 |     if (0 == parallel.rank) {
49 |         for (int j = 0; j < ny + 2; j++) {
50 |             temperature(0, j) = 85.0;
51 |         }
52 |     }
53 |     // Bottom
54 |     if (parallel.rank == parallel.size - 1) {
55 |         for (int j = 0; j < ny + 2; j++) {
56 |             temperature(nx + 1, j) = 5.0;
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/heat.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include "matrix.hpp"
 4 | #include <mpi.h>
 5 | 
 6 | // Class for basic parallelization information
 7 | struct ParallelData {
 8 |     int size;            // Number of MPI tasks
 9 |     int rank;
10 |     int nup, ndown;      // Ranks of neighbouring MPI tasks
11 | 
12 |     ParallelData() {      // Constructor
13 | 
14 |       MPI_Comm_size(MPI_COMM_WORLD, &size);
15 |       MPI_Comm_rank(MPI_COMM_WORLD, &rank);
16 | 
17 |       nup = rank - 1;
18 |       ndown = rank + 1;
19 | 
20 |       if (nup < 0) {
21 |         nup = MPI_PROC_NULL;
22 |       }
23 |       if (ndown > size - 1) {
24 |         ndown = MPI_PROC_NULL;
25 |       }
26 |     };
27 | 
28 | };
29 | 
30 | // Class for temperature field
31 | struct Field {
32 |     // nx and ny are the true dimensions of the field. The temperature matrix
33 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2 
34 |     int nx;                     // Local dimensions of the field
35 |     int ny;
36 |     int nx_full;                // Global dimensions of the field
37 |     int ny_full;                // Global dimensions of the field
38 |     double dx = 0.01;           // Grid spacing
39 |     double dy = 0.01;
40 | 
41 |     Matrix<double> temperature;
42 | 
43 |     void setup(int nx_in, int ny_in, ParallelData parallel);
44 | 
45 |     void generate(ParallelData parallel);
46 | 
47 |     // standard (i,j) syntax for setting elements
48 |     double& operator()(int i, int j) {return temperature(i, j);}
49 | 
50 |     // standard (i,j) syntax for getting elements
51 |     const double& operator()(int i, int j) const {return temperature(i, j);}
52 | 
53 | };
54 | 
55 | // Function declarations
56 | void initialize(int argc, char *argv[], Field& current,
57 |                 Field& previous, int& nsteps, ParallelData parallel);
58 | 
59 | void exchange(Field& field, const ParallelData parallel);
60 | 
61 | void evolve(Field& curr, const Field& prev, const double a, const double dt);
62 | 
63 | void write_field(const Field& field, const int iter, const ParallelData parallel);
64 | 
65 | void read_field(Field& field, std::string filename,
66 |                 const ParallelData parallel);
67 | 
68 | double average(const Field& field, const ParallelData parallel);
69 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/matrix.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include <cassert>
 4 | 
 5 | // Generic 2D matrix array class.
 6 | //
 7 | // Internally data is stored in 1D vector but is
 8 | // accessed using index function that maps i and j
 9 | // indices to an element in the flat data vector.
10 | // Row major storage is used
11 | // For easier usage, we overload parentheses () operator
12 | // for accessing matrix elements in the usual (i,j)
13 | // format.
14 | 
15 | template<typename T>
16 | class Matrix
17 | {
18 | 
19 | private:
20 | 
21 |     // Internal storage
22 |     std::vector<T> _data;
23 | 
24 |     // Internal 1D indexing
25 |     int indx(int i, int j) const {
26 |         //assert that indices are reasonable
27 |         assert(i >= 0 && i <  nx);
28 |         assert(j >= 0 && j <  ny);
29 | 
30 |         return i * ny + j;
31 |     }
32 | 
33 | public:
34 | 
35 |     // matrix dimensions
36 |     int nx, ny;
37 | 
38 |     // Default constructor
39 |     Matrix() = default;
40 |     // Allocate at the time of construction
41 |     Matrix(int nx, int ny) : nx(nx), ny(ny) {
42 |         _data.resize(nx * ny);
43 |     };
44 | 
45 |     void allocate(int nx_in, int ny_in) {
46 |         nx = nx_in;
47 |         ny = ny_in;
48 |         _data.resize(nx * ny);
49 |     };
50 | 
51 |     // standard (i,j) syntax for setting elements
52 |     T& operator()(int i, int j) {
53 |         return _data[ indx(i, j) ];
54 |     }
55 | 
56 |     // standard (i,j) syntax for getting elements
57 |     const T& operator()(int i, int j) const {
58 |         return _data[ indx(i, j) ];
59 |     }
60 | 
61 |     // provide possibility to get raw pointer for data at index (i,j) (needed for MPI)
62 |     T* data(int i=0, int j=0) {return _data.data() + i * ny + j;}
63 | 
64 | };
65 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/setup.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <cstdlib>
 3 | #include <iostream>
 4 | #include "heat.hpp"
 5 | 
 6 | 
 7 | void initialize(int argc, char *argv[], Field& current,
 8 |                 Field& previous, int& nsteps, ParallelData parallel)
 9 | {
10 |     /*
11 |      * Following combinations of command line arguments are possible:
12 |      * No arguments:    use default field dimensions and number of time steps
13 |      * One argument:    read initial field from a given file
14 |      * Two arguments:   initial field from file and number of time steps
15 |      * Three arguments: field dimensions (rows,cols) and number of time steps
16 |      */
17 | 
18 | 
19 |     int rows = 2000;             //!< Field dimensions with default values
20 |     int cols = 2000;
21 | 
22 |     std::string input_file;        //!< Name of the optional input file
23 | 
24 |     bool read_file = 0;
25 | 
26 |     nsteps = 500;
27 | 
28 |     switch (argc) {
29 |     case 1:
30 |         /* Use default values */
31 |         break;
32 |     case 2:
33 |         /* Read initial field from a file */
34 |         input_file = argv[1];
35 |         read_file = true;
36 |         break;
37 |     case 3:
38 |         /* Read initial field from a file */
39 |         input_file = argv[1];
40 |         read_file = true;
41 | 
42 |         /* Number of time steps */
43 |         nsteps = std::atoi(argv[2]);
44 |         break;
45 |     case 4:
46 |         /* Field dimensions */
47 |         rows = std::atoi(argv[1]);
48 |         cols = std::atoi(argv[2]);
49 |         /* Number of time steps */
50 |         nsteps = std::atoi(argv[3]);
51 |         break;
52 |     default:
53 |         std::cout << "Unsupported number of command line arguments" << std::endl;
54 |         exit(-1);
55 |     }
56 | 
57 |     if (read_file) {
58 |         if (0 == parallel.rank)
59 |             std::cout << "Reading input from " + input_file << std::endl;
60 |         read_field(current, input_file, parallel);
61 |     } else {
62 |         current.setup(rows, cols, parallel);
63 |         current.generate(parallel);
64 |     }
65 | 
66 |     // copy "current" field also to "previous"
67 |     previous = current;
68 | 
69 | }
70 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/solution/Makefile:
--------------------------------------------------------------------------------
 1 | ifeq ($(COMP),)
 2 | COMP=intel
 3 | endif
 4 | 
 5 | COMMONDIR=../../common
 6 | 
 7 | ifeq ($(COMP),gnu)
 8 | CXX=mpicxx
 9 | CC=gcc
10 | CCFLAGS=-g -O3 -march=native -Wall -I$(COMMONDIR) -DNDEBUG
11 | LDFLAGS=
12 | LIBS=-lpng
13 | endif
14 | 
15 | ifeq ($(COMP),intel)
16 | CXX=mpicxx
17 | CC=icc
18 | CCFLAGS=-g -O3 -xHost -I$(COMMONDIR) -DNDEBUG
19 | LDFLAGS=
20 | LIBS=-lpng
21 | endif
22 | 
23 | EXE=heat_mpi
24 | OBJS=main.o heat.o core.o setup.o io.o utilities.o
25 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
26 | 
27 | 
28 | all: $(EXE)
29 | 
30 | 
31 | utilities.o: utilities.cpp heat.hpp
32 | heat.o: heat.cpp heat.hpp matrix.hpp
33 | core.o: core.cpp heat.hpp
34 | setup.o: setup.cpp heat.hpp
35 | io.o: io.cpp heat.hpp matrix.hpp
36 | main.o: main.cpp heat.hpp
37 | 
38 | $(OBJS_PNG): C_COMPILER := $(CC)
39 | $(OBJS): C_COMPILER := $(CXX)
40 | 
41 | $(EXE): $(OBJS) $(OBJS_PNG)
42 | 	$(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
43 | 
44 | %.o: %.cpp
45 | 	$(CXX) $(CCFLAGS) -c $< -o $@
46 | 
47 | %.o: %.c
48 | 	$(CC) $(CCFLAGS) -c $< -o $@
49 | 
50 | .PHONY: clean
51 | clean:
52 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~
53 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/solution/core.cpp:
--------------------------------------------------------------------------------
 1 | // Main solver routines for heat equation solver
 2 | 
 3 | #include <mpi.h>
 4 | 
 5 | #include "heat.hpp"
 6 | 
 7 | // Exchange the boundary values
 8 | void exchange(Field& field, const ParallelData parallel)
 9 | {
10 | 
11 |     MPI_Datatype types[4] = {parallel.rowtype, parallel.rowtype,
12 |                              parallel.columntype, parallel.columntype};
13 |     int counts[4] = {1, 1, 1, 1};
14 |     MPI_Aint sdisps[4], rdisps[4], disp0;
15 | 
16 |     // Determine displacements
17 |     disp0 = reinterpret_cast<MPI_Aint> (field.temperature.data(0, 0));
18 |     sdisps[0] =  reinterpret_cast<MPI_Aint> (field.temperature.data(1, 0));        // Second row
19 |     sdisps[1] =  reinterpret_cast<MPI_Aint> (field.temperature.data(field.nx, 0)); // Second last row
20 |     sdisps[2] =  reinterpret_cast<MPI_Aint> (field.temperature.data(0, 1));        // Second column
21 |     sdisps[3] =  reinterpret_cast<MPI_Aint> (field.temperature.data(0, field.ny)); // Second last column
22 | 
23 |     rdisps[0] =  reinterpret_cast<MPI_Aint> (field.temperature.data(0, 0));            // First row
24 |     rdisps[1] =  reinterpret_cast<MPI_Aint> (field.temperature.data(field.nx + 1, 0)); // Last row
25 |     rdisps[2] =  reinterpret_cast<MPI_Aint> (field.temperature.data(0, 0));            // First column
26 |     rdisps[3] =  reinterpret_cast<MPI_Aint> (field.temperature.data(0, field.ny + 1)); // Last column
27 | 
28 |     for (int i=0; i < 4; i++) {
29 |       sdisps[i] -= disp0;
30 |       rdisps[i] -= disp0;
31 |     }
32 | 
33 |     MPI_Neighbor_alltoallw(field.temperature.data(), counts, sdisps, types,
34 |                            field.temperature.data(), counts, rdisps, types,
35 |                            parallel.comm);
36 | }
37 | 
38 | // Update the temperature values using five-point stencil */
39 | void evolve(Field& curr, const Field& prev, const double a, const double dt)
40 | {
41 | 
42 |   // Compilers do not necessarily optimize division to multiplication, so make it explicit
43 |   auto inv_dx2 = 1.0 / (prev.dx * prev.dx);
44 |   auto inv_dy2 = 1.0 / (prev.dy * prev.dy);
45 | 
46 |   // Determine the temperature field at next time step
47 |   // As we have fixed boundary conditions, the outermost gridpoints
48 |   // are not updated.
49 |   for (int i = 1; i < curr.nx + 1; i++) {
50 |     for (int j = 1; j < curr.ny + 1; j++) {
51 |             curr(i, j) = prev(i, j) + a * dt * (
52 | 	       ( prev(i + 1, j) - 2.0 * prev(i, j) + prev(i - 1, j) ) * inv_dx2 +
53 | 	       ( prev(i, j + 1) - 2.0 * prev(i, j) + prev(i, j - 1) ) * inv_dy2
54 |                );
55 |     }
56 |   }
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/solution/heat.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include "matrix.hpp"
 4 | #include <mpi.h>
 5 | 
 6 | // Class for basic parallelization information
 7 | struct ParallelData {
 8 |     int size;            // Number of MPI tasks
 9 |     int rank;
10 |     int nghbrs[2][2]; // Ranks of neighbouring MPI tasks
11 |     MPI_Datatype rowtype, columntype, subarraytype;
12 |     MPI_Comm comm;
13 | 
14 |     ParallelData() {      // Constructor
15 | 
16 |       MPI_Comm_size(MPI_COMM_WORLD, &size);
17 | 
18 |       constexpr int ndims = 2;
19 |       int dims[ndims] = {0, 0};
20 |       int periods[ndims] = {0, 0};
21 | 
22 |       MPI_Dims_create(size, ndims, dims);
23 |       MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &comm);
24 |       MPI_Comm_rank(comm, &rank);
25 | 
26 |       // Determine neighbors
27 |       for (int i=0; i < ndims; i++)
28 |         MPI_Cart_shift(comm, i, 1, &nghbrs[i][0], &nghbrs[i][1]);
29 |     };
30 | 
31 | };
32 | 
33 | // Class for temperature field
34 | struct Field {
35 |     // nx and ny are the true dimensions of the field. The temperature matrix
36 |     // contains also ghost layers, so it will have dimensions nx+2 x ny+2 
37 |     int nx;                     // Local dimensions of the field
38 |     int ny;
39 |     int nx_full;                // Global dimensions of the field
40 |     int ny_full;                // Global dimensions of the field
41 |     double dx = 0.01;           // Grid spacing
42 |     double dy = 0.01;
43 | 
44 |     Matrix<double> temperature;
45 | 
46 |     void setup(int nx_in, int ny_in, ParallelData& parallel);
47 | 
48 |     void generate(ParallelData parallel);
49 | 
50 |     // standard (i,j) syntax for setting elements
51 |     double& operator()(int i, int j) {return temperature(i, j);}
52 | 
53 |     // standard (i,j) syntax for getting elements
54 |     const double& operator()(int i, int j) const {return temperature(i, j);}
55 | 
56 | };
57 | 
58 | // Function declarations
59 | void initialize(int argc, char *argv[], Field& current,
60 |                 Field& previous, int& nsteps, ParallelData& parallel);
61 | 
62 | void exchange(Field& field, const ParallelData parallel);
63 | 
64 | void evolve(Field& curr, const Field& prev, const double a, const double dt);
65 | 
66 | void write_field(Field& field, const int iter, const ParallelData parallel);
67 | 
68 | void read_field(Field& field, std::string filename,
69 |                 ParallelData& parallel);
70 | 
71 | double average(const Field& field, const ParallelData parallel);
72 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/solution/matrix.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include <cassert>
 4 | 
 5 | // Generic 2D matrix array class.
 6 | //
 7 | // Internally data is stored in 1D vector but is
 8 | // accessed using index function that maps i and j
 9 | // indices to an element in the flat data vector.
10 | // Row major storage is used
11 | // For easier usage, we overload parentheses () operator
12 | // for accessing matrix elements in the usual (i,j)
13 | // format.
14 | 
15 | template<typename T>
16 | class Matrix
17 | {
18 | 
19 | private:
20 | 
21 |     // Internal storage
22 |     std::vector<T> _data;
23 | 
24 |     // Internal 1D indexing
25 |     int indx(int i, int j) const {
26 |         //assert that indices are reasonable
27 |         assert(i >= 0 && i <  nx);
28 |         assert(j >= 0 && j <  ny);
29 | 
30 |         return i * ny + j;
31 |     }
32 | 
33 | public:
34 | 
35 |     // matrix dimensions
36 |     int nx, ny;
37 | 
38 |     // Default constructor
39 |     Matrix() = default;
40 |     // Allocate at the time of construction
41 |     Matrix(int nx, int ny) : nx(nx), ny(ny) {
42 |         _data.resize(nx * ny);
43 |     };
44 | 
45 |     void allocate(int nx_in, int ny_in) {
46 |         nx = nx_in;
47 |         ny = ny_in;
48 |         _data.resize(nx * ny);
49 |     };
50 | 
51 |     // standard (i,j) syntax for setting elements
52 |     T& operator()(int i, int j) {
53 |         return _data[ indx(i, j) ];
54 |     }
55 | 
56 |     // standard (i,j) syntax for getting elements
57 |     const T& operator()(int i, int j) const {
58 |         return _data[ indx(i, j) ];
59 |     }
60 | 
61 |     // provide possibility to get raw pointer for data at index (i,j) (needed for MPI)
62 |     T* data(int i=0, int j=0) {return _data.data() + i * ny + j;}
63 | 
64 | };
65 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/solution/setup.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <cstdlib>
 3 | #include <iostream>
 4 | #include "heat.hpp"
 5 | 
 6 | 
 7 | void initialize(int argc, char *argv[], Field& current,
 8 |                 Field& previous, int& nsteps, ParallelData &parallel)
 9 | {
10 |     /*
11 |      * Following combinations of command line arguments are possible:
12 |      * No arguments:    use default field dimensions and number of time steps
13 |      * One argument:    read initial field from a given file
14 |      * Two arguments:   initial field from file and number of time steps
15 |      * Three arguments: field dimensions (rows,cols) and number of time steps
16 |      */
17 | 
18 | 
19 |     int rows = 2000;             //!< Field dimensions with default values
20 |     int cols = 2000;
21 | 
22 |     std::string input_file;        //!< Name of the optional input file
23 | 
24 |     bool read_file = 0;
25 | 
26 |     nsteps = 500;
27 | 
28 |     switch (argc) {
29 |     case 1:
30 |         /* Use default values */
31 |         break;
32 |     case 2:
33 |         /* Read initial field from a file */
34 |         input_file = argv[1];
35 |         read_file = true;
36 |         break;
37 |     case 3:
38 |         /* Read initial field from a file */
39 |         input_file = argv[1];
40 |         read_file = true;
41 | 
42 |         /* Number of time steps */
43 |         nsteps = std::atoi(argv[2]);
44 |         break;
45 |     case 4:
46 |         /* Field dimensions */
47 |         rows = std::atoi(argv[1]);
48 |         cols = std::atoi(argv[2]);
49 |         /* Number of time steps */
50 |         nsteps = std::atoi(argv[3]);
51 |         break;
52 |     default:
53 |         std::cout << "Unsupported number of command line arguments" << std::endl;
54 |         exit(-1);
55 |     }
56 | 
57 |     if (read_file) {
58 |         if (0 == parallel.rank)
59 |             std::cout << "Reading input from " + input_file << std::endl;
60 |         read_field(current, input_file, parallel);
61 |     } else {
62 |         current.setup(rows, cols, parallel);
63 |         current.generate(parallel);
64 |     }
65 | 
66 |     // copy "current" field also to "previous"
67 |     previous = current;
68 | 
69 | }
70 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/solution/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Utility functions for heat equation solver
 2 | //    NOTE: This file does not need to be edited! 
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | #include "heat.hpp"
 7 | 
 8 | // Calculate average temperature
 9 | double average(const Field& field, const ParallelData parallel)
10 | {
11 |      double local_average = 0.0;
12 |      double average = 0.0;
13 | 
14 |      for (int i = 1; i < field.nx + 1; i++) {
15 |        for (int j = 1; j < field.ny + 1; j++) {
16 |          local_average += field.temperature(i, j);
17 |        }
18 |      }
19 | 
20 |      MPI_Allreduce(&local_average, &average, 1, MPI_DOUBLE, MPI_SUM, 
21 |                    parallel.comm);
22 |      average /= (field.nx_full * field.ny_full);
23 |      return average;
24 | }
25 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/cpp/utilities.cpp:
--------------------------------------------------------------------------------
 1 | // Utility functions for heat equation solver
 2 | //    NOTE: This file does not need to be edited! 
 3 | 
 4 | #include <mpi.h>
 5 | 
 6 | #include "heat.hpp"
 7 | 
 8 | // Calculate average temperature
 9 | double average(const Field& field, const ParallelData parallel)
10 | {
11 |      double local_average = 0.0;
12 |      double average = 0.0;
13 | 
14 |      for (int i = 1; i < field.nx + 1; i++) {
15 |        for (int j = 1; j < field.ny + 1; j++) {
16 |          local_average += field.temperature(i, j);
17 |        }
18 |      }
19 | 
20 |      MPI_Allreduce(&local_average, &average, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
21 |      average /= (field.nx_full * field.ny_full);
22 |      return average;
23 | }
24 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | ifeq ($(COMP),)
 2 | COMP=intel
 3 | endif
 4 | 
 5 | COMMONDIR=../common
 6 | 
 7 | ifeq ($(COMP),intel)
 8 | FC=mpif90
 9 | CC=icc
10 | FCFLAGS=-O2 -xHost
11 | CCFLAGS=-O2 -xHost -I$(COMMONDIR)
12 | LDFLAGS=
13 | LIBS=-lpng
14 | endif
15 | 
16 | ifeq ($(COMP),gnu)
17 | FC=mpif90
18 | CC=gcc
19 | FCFLAGS=-O3 -Wall -march=native
20 | CCFLAGS=-O3 -Wall -march=native -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=-lpng
23 | endif
24 | 
25 | EXE=heat_mpi
26 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
27 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
28 | 
29 | 
30 | all: $(EXE)
31 | 
32 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
33 | core.o: core.F90 heat_mod.o
34 | utilities.o: utilities.F90 heat_mod.o
35 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
36 | setup.o: setup.F90 heat_mod.o utilities.o io.o
37 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
38 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
39 | 
40 | $(EXE): $(OBJS) $(OBJS_PNG)
41 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
42 | 
43 | %.o: %.F90
44 | 	$(FC) $(FCFLAGS) -c $< -o $@
45 | 
46 | %.o: %.c
47 | 	$(CC) $(CCFLAGS) -c $< -o $@
48 | 
49 | .PHONY: clean
50 | clean:
51 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
52 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Main solver routines for heat equation solver
 2 | module core
 3 |   use heat
 4 | 
 5 | contains
 6 | 
 7 |   ! Exchange the boundary data between MPI tasks
 8 |   subroutine exchange(field0, parallel)
 9 | 
10 |     implicit none
11 | 
12 |     type(field), intent(inout) :: field0
13 |     type(parallel_data), intent(in) :: parallel
14 | 
15 |     integer :: ierr
16 | 
17 |     ! Send to left, receive from right
18 |     call mpi_sendrecv(field0%data(:, 1), field0%nx + 2, MPI_DOUBLE_PRECISION, &
19 |          & parallel%nleft, 11, &
20 |          & field0%data(:, field0%ny + 1), field0%nx + 2, MPI_DOUBLE_PRECISION, &
21 |          & parallel%nright, 11, &
22 |          & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
23 | 
24 |     ! Send to right, receive from left
25 |     call mpi_sendrecv(field0%data(:, field0%ny), field0%nx + 2, MPI_DOUBLE_PRECISION, &
26 |          & parallel%nright, 12, &
27 |          & field0%data(:, 0), field0%nx + 2, MPI_DOUBLE_PRECISION,&
28 |          & parallel%nleft, 12, &
29 |          & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
30 | 
31 |   end subroutine exchange
32 | 
33 |   ! Compute one time step of temperature evolution
34 |   ! Arguments:
35 |   !   curr (type(field)): current temperature values
36 |   !   prev (type(field)): values from previous time step
37 |   !   a (real(dp)): update equation constant
38 |   !   dt (real(dp)): time step value
39 |   subroutine evolve(curr, prev, a, dt)
40 | 
41 |     implicit none
42 | 
43 |     type(field), intent(inout) :: curr, prev
44 |     real(dp) :: a, dt
45 |     integer :: i, j, nx, ny
46 | 
47 |     nx = curr%nx
48 |     ny = curr%ny
49 | 
50 |     do j = 1, ny
51 |        do i = 1, nx
52 |           curr%data(i, j) = prev%data(i, j) + a * dt * &
53 |                & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
54 |                &   prev%data(i+1, j)) / curr%dx**2 + &
55 |                &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
56 |                &   prev%data(i, j+1)) / curr%dy**2)
57 |        end do
58 |     end do
59 |   end subroutine evolve
60 | 
61 | end module core
62 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Field metadata for heat equation solver
 2 | module heat
 3 |   use mpi_f08
 4 |   use iso_fortran_env, only : REAL64
 5 | 
 6 |   implicit none
 7 | 
 8 |   integer, parameter :: dp = REAL64
 9 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
10 | 
11 |   type :: field
12 |      integer :: nx          ! local dimension of the field
13 |      integer :: ny
14 |      integer :: nx_full     ! global dimension of the field
15 |      integer :: ny_full
16 |      real(dp) :: dx
17 |      real(dp) :: dy
18 |      real(dp), dimension(:,:), allocatable :: data
19 |   end type field
20 | 
21 |   type :: parallel_data
22 |      integer :: size
23 |      integer :: rank
24 |      integer :: nleft, nright  ! Ranks of neighbouring MPI tasks
25 |   end type parallel_data
26 | 
27 | contains
28 |   ! Initialize the field type metadata
29 |   ! Arguments:
30 |   !   field0 (type(field)): input field
31 |   !   nx, ny, dx, dy: field dimensions and spatial step size
32 |   subroutine set_field_dimensions(field0, nx, ny, parallel)
33 |     implicit none
34 | 
35 |     type(field), intent(out) :: field0
36 |     integer, intent(in) :: nx, ny
37 |     type(parallel_data), intent(in) :: parallel
38 | 
39 |     integer :: nx_local, ny_local
40 | 
41 |     nx_local = nx
42 |     ny_local = ny / parallel%size
43 | 
44 |     field0%dx = DX
45 |     field0%dy = DY
46 |     field0%nx = nx_local
47 |     field0%ny = ny_local
48 |     field0%nx_full = nx
49 |     field0%ny_full = ny
50 | 
51 |   end subroutine set_field_dimensions
52 | 
53 |   subroutine parallel_setup(parallel, nx, ny)
54 |     use mpi
55 | 
56 |     implicit none
57 | 
58 |     type(parallel_data), intent(out) :: parallel
59 |     integer, intent(in), optional :: nx, ny
60 | 
61 |     integer :: ny_local
62 |     integer :: ierr
63 | 
64 |     call mpi_comm_size(MPI_COMM_WORLD, parallel%size, ierr)
65 | 
66 |     if (present(ny)) then
67 |        ny_local = ny / parallel%size
68 |        if (ny_local * parallel%size /= ny) then
69 |           write(*,*) 'Cannot divide grid evenly to processors'
70 |           call mpi_abort(MPI_COMM_WORLD, -2, ierr)
71 |        end if
72 |     end if
73 | 
74 |     call mpi_comm_rank(MPI_COMM_WORLD, parallel%rank, ierr)
75 | 
76 |     parallel%nleft = parallel%rank - 1
77 |     parallel%nright = parallel%rank + 1
78 | 
79 |     if (parallel%nleft < 0) then
80 |        parallel%nleft = MPI_PROC_NULL
81 |     end if
82 |     if (parallel%nright > parallel%size - 1) then
83 |        parallel%nright = MPI_PROC_NULL
84 |     end if
85 | 
86 |   end subroutine parallel_setup
87 | 
88 | end module heat
89 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Heat equation solver in 2D.
 2 | 
 3 | program heat_solve
 4 |   use heat
 5 |   use core
 6 |   use io
 7 |   use setup
 8 |   use utilities
 9 | 
10 |   implicit none
11 | 
12 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
13 |   type(field) :: current, previous    ! Current and previus temperature fields
14 | 
15 |   real(dp) :: dt     ! Time step
16 |   integer :: nsteps       ! Number of time steps
17 |   integer, parameter :: image_interval = 100 ! Image output interval
18 | 
19 |   type(parallel_data) :: parallelization
20 |   integer :: ierr
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call mpi_init(ierr)
29 | 
30 |   call initialize(current, previous, nsteps, parallelization)
31 | 
32 |   ! Draw the picture of the initial state
33 |   call write_field(current, 0, parallelization)
34 | 
35 |   average_temp = average(current, parallelization)
36 |   if (parallelization % rank == 0) then
37 |      write(*,'(A, I5, A, I5, A, I5)') 'Simulation grid: ', current%nx_full, ' x ', & 
38 |           & current%ny_full, ' time steps: ', nsteps
39 |      write(*,'(A, I5)') 'MPI processes: ', parallelization%size
40 |      write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
41 |   end if
42 | 
43 |   ! Largest stable time step
44 |   dt = current%dx**2 * current%dy**2 / &
45 |        & (2.0 * a * (current%dx**2 + current%dy**2))
46 | 
47 |   ! Main iteration loop, save a picture every
48 |   ! image_interval steps
49 | 
50 |   start =  mpi_wtime()
51 | 
52 |   do iter = 1, nsteps
53 |      call exchange(previous, parallelization)
54 |      call evolve(current, previous, a, dt)
55 |      if (mod(iter, image_interval) == 0) then
56 |         call write_field(current, iter, parallelization)
57 |      end if
58 |      call swap_fields(current, previous)
59 |   end do
60 | 
61 |   stop = mpi_wtime()
62 | 
63 |   ! Average temperature for reference
64 |   average_temp = average(previous, parallelization)
65 | 
66 |   if (parallelization % rank == 0) then
67 |      write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
68 |      write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
69 |      if (command_argument_count() == 0) then
70 |          write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
71 |      end if
72 |   end if
73 | 
74 |   call finalize(current, previous)
75 | 
76 |   call mpi_finalize(ierr)
77 | 
78 | end program heat_solve
79 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! PNG writer for heat equation solver
 2 | module pngwriter
 3 |   use heat
 4 | 
 5 | contains
 6 | 
 7 |   function save_png(data, nx, ny, fname) result(stat)
 8 | 
 9 |     use, intrinsic :: ISO_C_BINDING
10 |     implicit none
11 | 
12 |     real(dp), dimension(:,:), intent(in) :: data
13 |     integer, intent(in) :: nx, ny
14 |     character(len=*), intent(in) :: fname
15 |     integer :: stat
16 | 
17 |     ! Interface for save_png C-function
18 |     interface
19 |        ! The C-function definition is
20 |        !   int save_png(double *data, const int nx, const int ny,
21 |        !                const char *fname)
22 |        function save_png_c(data, nx, ny, fname, order) &
23 |             & bind(C,name="save_png") result(stat)
24 |          use, intrinsic :: ISO_C_BINDING
25 |          implicit none
26 |          real(kind=C_DOUBLE) :: data(*)
27 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
28 |          character(kind=C_CHAR), intent(IN) :: fname(*)
29 |          character(kind=C_CHAR), value, intent(IN) :: order
30 |          integer(kind=C_INT) :: stat
31 |        end function save_png_c
32 |     end interface
33 | 
34 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
35 |     if (stat /= 0) then
36 |        write(*,*) 'save_png returned error!'
37 |     end if
38 | 
39 |   end function save_png
40 | 
41 | end module pngwriter
42 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/solution/Makefile:
--------------------------------------------------------------------------------
 1 | ifeq ($(COMP),)
 2 | COMP=intel
 3 | endif
 4 | 
 5 | COMMONDIR=../../common
 6 | 
 7 | ifeq ($(COMP),intel)
 8 | FC=mpif90
 9 | CC=icc
10 | FCFLAGS=-O2 -xHost
11 | CCFLAGS=-O2 -xHost -I$(COMMONDIR)
12 | LDFLAGS=
13 | LIBS=-lpng
14 | endif
15 | 
16 | ifeq ($(COMP),gnu)
17 | FC=mpif90
18 | CC=gcc
19 | FCFLAGS=-O3 -Wall -march=native
20 | CCFLAGS=-O3 -Wall -march=native -I$(COMMONDIR)
21 | LDFLAGS=
22 | LIBS=-lpng
23 | endif
24 | 
25 | EXE=heat_mpi
26 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
27 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
28 | 
29 | 
30 | all: $(EXE)
31 | 
32 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
33 | core.o: core.F90 heat_mod.o
34 | utilities.o: utilities.F90 heat_mod.o
35 | io.o: io.F90 heat_mod.o pngwriter_mod.o 
36 | setup.o: setup.F90 heat_mod.o utilities.o io.o
37 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
38 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
39 | 
40 | $(EXE): $(OBJS) $(OBJS_PNG)
41 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
42 | 
43 | %.o: %.F90
44 | 	$(FC) $(FCFLAGS) -c $< -o $@
45 | 
46 | %.o: %.c
47 | 	$(CC) $(CCFLAGS) -c $< -o $@
48 | 
49 | .PHONY: clean
50 | clean:
51 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
52 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/solution/main.F90:
--------------------------------------------------------------------------------
 1 | ! Heat equation solver in 2D.
 2 | 
 3 | program heat_solve
 4 |   use heat
 5 |   use core
 6 |   use io
 7 |   use setup
 8 |   use utilities
 9 | 
10 |   implicit none
11 | 
12 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
13 |   type(field) :: current, previous    ! Current and previus temperature fields
14 | 
15 |   real(dp) :: dt     ! Time step
16 |   integer :: nsteps       ! Number of time steps
17 |   integer, parameter :: image_interval = 100 ! Image output interval
18 | 
19 |   type(parallel_data) :: parallelization
20 |   integer :: ierr
21 | 
22 |   integer :: iter
23 | 
24 |   real(dp) :: average_temp   !  Average temperature
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call mpi_init(ierr)
29 | 
30 |   call initialize(current, previous, nsteps, parallelization)
31 | 
32 |   ! Draw the picture of the initial state
33 |   call write_field(current, 0, parallelization)
34 | 
35 |   average_temp = average(current, parallelization)
36 |   if (parallelization % rank == 0) then
37 |      write(*,'(A, I5, A, I5, A, I5)') 'Simulation grid: ', current%nx_full, ' x ', & 
38 |           & current%ny_full, ' time steps: ', nsteps
39 |      write(*,'(A, I5)') 'MPI processes: ', parallelization%size
40 |      write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp
41 |   end if
42 | 
43 |   ! Largest stable time step
44 |   dt = current%dx**2 * current%dy**2 / &
45 |        & (2.0 * a * (current%dx**2 + current%dy**2))
46 | 
47 |   ! Main iteration loop, save a picture every
48 |   ! image_interval steps
49 | 
50 |   start =  mpi_wtime()
51 | 
52 |   do iter = 1, nsteps
53 |      call exchange(previous, parallelization)
54 |      call evolve(current, previous, a, dt)
55 |      if (mod(iter, image_interval) == 0 .or. iter == nsteps) then
56 |         call write_field(current, iter, parallelization)
57 |      end if
58 |      call swap_fields(current, previous)
59 |   end do
60 | 
61 |   stop = mpi_wtime()
62 | 
63 |   ! Average temperature for reference
64 |   average_temp = average(previous, parallelization)
65 | 
66 |   if (parallelization % rank == 0) then
67 |      write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
68 |      write(*,'(A,F9.6)') 'Average temperature: ',  average_temp
69 |      if (command_argument_count() == 0) then
70 |          write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239
71 |      end if
72 |   end if
73 | 
74 |   call finalize(current, previous)
75 | 
76 |   call mpi_finalize(ierr)
77 | 
78 | end program heat_solve
79 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/solution/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! PNG writer for heat equation solver
 2 | module pngwriter
 3 |   use heat
 4 | 
 5 | contains
 6 | 
 7 |   function save_png(data, nx, ny, fname) result(stat)
 8 | 
 9 |     use, intrinsic :: ISO_C_BINDING
10 |     implicit none
11 | 
12 |     real(dp), dimension(:,:), intent(in) :: data
13 |     integer, intent(in) :: nx, ny
14 |     character(len=*), intent(in) :: fname
15 |     integer :: stat
16 | 
17 |     ! Interface for save_png C-function
18 |     interface
19 |        ! The C-function definition is
20 |        !   int save_png(double *data, const int nx, const int ny,
21 |        !                const char *fname)
22 |        function save_png_c(data, nx, ny, fname, order) &
23 |             & bind(C,name="save_png") result(stat)
24 |          use, intrinsic :: ISO_C_BINDING
25 |          implicit none
26 |          real(kind=C_DOUBLE) :: data(*)
27 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
28 |          character(kind=C_CHAR), intent(IN) :: fname(*)
29 |          character(kind=C_CHAR), value, intent(IN) :: order
30 |          integer(kind=C_INT) :: stat
31 |        end function save_png_c
32 |     end interface
33 | 
34 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
35 |     if (stat /= 0) then
36 |        write(*,*) 'save_png returned error!'
37 |     end if
38 | 
39 |   end function save_png
40 | 
41 | end module pngwriter
42 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/solution/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Utility routines for heat equation solver
 2 | 
 3 | module utilities
 4 |   use heat
 5 | 
 6 | contains
 7 | 
 8 |   ! Swap the data fields of two variables of type field
 9 |   ! Arguments:
10 |   !   curr, prev (type(field)): the two variables that are swapped
11 |   subroutine swap_fields(curr, prev)
12 | 
13 |     implicit none
14 | 
15 |     type(field), intent(inout) :: curr, prev
16 |     real(dp), allocatable, dimension(:,:) :: tmp
17 | 
18 |     call move_alloc(curr%data, tmp)
19 |     call move_alloc(prev%data, curr%data)
20 |     call move_alloc(tmp, prev%data)
21 |   end subroutine swap_fields
22 | 
23 |   ! Copy the data from one field to another
24 |   ! Arguments:
25 |   !   from_field (type(field)): variable to copy from
26 |   !   to_field (type(field)): variable to copy to
27 |   subroutine copy_fields(from_field, to_field)
28 | 
29 |     implicit none
30 | 
31 |     type(field), intent(in) :: from_field
32 |     type(field), intent(out) :: to_field
33 | 
34 |     ! Consistency checks
35 |     if (.not.allocated(from_field%data)) then
36 |        write (*,*) "Can not copy from a field without allocated data"
37 |        stop
38 |     end if
39 |     if (.not.allocated(to_field%data)) then
40 |        ! Target is not initialize, allocate memory
41 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
42 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
43 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
44 |        write (*,*) "Wrong field data sizes in copy routine"
45 |        print *, shape(from_field%data), shape(to_field%data)
46 |        stop
47 |     end if
48 | 
49 |     to_field%data = from_field%data
50 | 
51 |     to_field%nx = from_field%nx
52 |     to_field%ny = from_field%ny
53 |     to_field%nx_full = from_field%nx_full
54 |     to_field%ny_full = from_field%ny_full
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0, parallelization)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 |     type(parallel_data), intent(in) :: parallelization
66 | 
67 |     real(dp) :: local_average
68 |     integer :: rc
69 | 
70 |     local_average = sum(field0%data(1:field0%nx, 1:field0%ny))
71 |     call mpi_allreduce(local_average, average, 1, MPI_DOUBLE_PRECISION, & 
72 |          & MPI_SUM, MPI_COMM_WORLD, rc)
73 |     average = average / (field0%nx_full * field0%ny_full)
74 | 
75 |   end function average
76 | 
77 | end module utilities
78 | 


--------------------------------------------------------------------------------
/mpi/heat-equation/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Utility routines for heat equation solver
 2 | 
 3 | module utilities
 4 |   use heat
 5 | 
 6 | contains
 7 | 
 8 |   ! Swap the data fields of two variables of type field
 9 |   ! Arguments:
10 |   !   curr, prev (type(field)): the two variables that are swapped
11 |   subroutine swap_fields(curr, prev)
12 | 
13 |     implicit none
14 | 
15 |     type(field), intent(inout) :: curr, prev
16 |     real(dp), allocatable, dimension(:,:) :: tmp
17 | 
18 |     call move_alloc(curr%data, tmp)
19 |     call move_alloc(prev%data, curr%data)
20 |     call move_alloc(tmp, prev%data)
21 |   end subroutine swap_fields
22 | 
23 |   ! Copy the data from one field to another
24 |   ! Arguments:
25 |   !   from_field (type(field)): variable to copy from
26 |   !   to_field (type(field)): variable to copy to
27 |   subroutine copy_fields(from_field, to_field)
28 | 
29 |     implicit none
30 | 
31 |     type(field), intent(in) :: from_field
32 |     type(field), intent(out) :: to_field
33 | 
34 |     ! Consistency checks
35 |     if (.not.allocated(from_field%data)) then
36 |        write (*,*) "Can not copy from a field without allocated data"
37 |        stop
38 |     end if
39 |     if (.not.allocated(to_field%data)) then
40 |        ! Target is not initialize, allocate memory
41 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
42 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
43 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
44 |        write (*,*) "Wrong field data sizes in copy routine"
45 |        print *, shape(from_field%data), shape(to_field%data)
46 |        stop
47 |     end if
48 | 
49 |     to_field%data = from_field%data
50 | 
51 |     to_field%nx = from_field%nx
52 |     to_field%ny = from_field%ny
53 |     to_field%nx_full = from_field%nx_full
54 |     to_field%ny_full = from_field%ny_full
55 |     to_field%dx = from_field%dx
56 |     to_field%dy = from_field%dy
57 |   end subroutine copy_fields
58 | 
59 |   function average(field0, parallelization)
60 | 
61 |     implicit none
62 | 
63 |     real(dp) :: average
64 |     type(field) :: field0
65 |     type(parallel_data), intent(in) :: parallelization
66 | 
67 |     real(dp) :: local_average
68 |     integer :: rc
69 | 
70 |     local_average = sum(field0%data(1:field0%nx, 1:field0%ny))
71 |     call mpi_allreduce(local_average, average, 1, MPI_DOUBLE_PRECISION, & 
72 |          & MPI_SUM, MPI_COMM_WORLD, rc)
73 |     average = average / (field0%nx_full * field0%ny_full)
74 | 
75 |   end function average
76 | 
77 | end module utilities
78 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/README.md:
--------------------------------------------------------------------------------
 1 | ## Message chain with Cartesian communicator
 2 | 
 3 | Write a program where every MPI task sends data to the next one.
 4 | Let `ntasks` be the number of the tasks, and `myid` the rank of the
 5 | current task. Your program should work as follows:
 6 | 
 7 | - Every task with a rank less than `ntasks-1` sends a message to task
 8 |   `myid+1`. For example, task 0 sends a message to task 1.
 9 | - The message content is an integer array where each element is initialised to
10 |   `myid`.
11 | - The message tag is the receiver's rank.
12 | - The sender prints out the number of elements it sends and the tag it used.
13 | - All tasks with rank > 0 receive messages.
14 | - Each receiver prints out their `myid` and the first element in the
15 |   received array.
16 | 
17 | 1. Create a Cartesian topology for the chain. Utilize MPI_Cart_shift for finding
18 |    the neighbouring ranks and implement the communication with MPI point-to-point routines 
19 |    (either blocking or non-blocking). Use
20 |    [cpp/skeleton.cpp](skeleton.cpp) or [skeleton.F90](skeleton.F90) 
21 |    as a starting point. 
22 | 
23 | 2. Make a version where the chain is periodic, i.e. task `ntasks-1` sends to task 0
24 |    and every task receives.
25 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/skeleton.F90:
--------------------------------------------------------------------------------
 1 | program basic
 2 |   use mpi_f08
 3 |   use iso_fortran_env, only : REAL64
 4 | 
 5 |   implicit none
 6 |   integer, parameter :: size = 10000000
 7 |   integer :: rc, myid, ntasks
 8 |   integer :: message(size)
 9 |   integer :: receiveBuffer(size)
10 |   integer :: status(MPI_STATUS_SIZE)
11 |   integer :: requests(2)
12 | 
13 |   real(REAL64) :: t0, t1
14 | 
15 |   integer :: source, destination
16 | 
17 |   call mpi_init(rc)
18 |   call mpi_comm_rank(MPI_COMM_WORLD, myid, rc)
19 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
20 | 
21 |   message = myid
22 | 
23 |   ! TODO: create a cartesian communicator
24 |   ! and determine the source and destination ranks 
25 |   ! with the help of MPI_Cart_shift
26 | 
27 | 
28 |   ! end TODO
29 | 
30 |   ! Start measuring the time spent in communication
31 |   call mpi_barrier(mpi_comm_world, rc)
32 |   t0 = mpi_wtime()
33 | 
34 |   ! TODO: Send messages
35 | 
36 |   write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, &
37 |           ' Sent elements: ',size, &
38 |           '. Tag: ', myid+1, '. Receiver: ', destination
39 | 
40 |   ! TODO: Receive messages
41 | 
42 |   write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, &
43 |           ' First element: ', receiveBuffer(1)
44 | 
45 |   ! Finalize measuring the time and print it out
46 |   t1 = mpi_wtime()
47 |   call mpi_barrier(mpi_comm_world, rc)
48 |   call flush(6)
49 | 
50 |   call print_ordered(t1 - t0)
51 | 
52 |   call mpi_finalize(rc)
53 | 
54 | contains
55 | 
56 |   subroutine print_ordered(t)
57 |     implicit none
58 |     real(REAL64) :: t
59 | 
60 |     integer i
61 | 
62 |     if (myid == 0) then
63 |        write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t
64 |        do i=1, ntasks-1
65 |            call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11,  &
66 |                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc)
67 |            write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t
68 |        end do
69 |     else
70 |        call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11,  &
71 |                          MPI_COMM_WORLD, rc)
72 |     end if
73 |   end subroutine print_ordered
74 | 
75 | end program basic
76 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/skeleton.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <mpi.h>
 4 | 
 5 | void print_ordered(double t);
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int i, myid, ntasks;
10 |     constexpr int size = 10000000;
11 |     MPI_Status status;
12 | 
13 |     double t0, t1;
14 | 
15 |     int source, destination;
16 | 
17 |     MPI_Init(&argc, &argv);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
19 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
20 | 
21 |     // Initialize message
22 |     std::vector<int> message(size, myid);
23 |     std::vector<int> receiveBuffer(size);
24 | 
25 |     // TODO: create a cartesian communicator
26 |     // and determine the source and destination ranks 
27 |     // with the help of MPI_Cart_shift
28 | 
29 | 
30 |     // end TODO
31 | 
32 |     // Start measuring the time spent in communication
33 |     MPI_Barrier(MPI_COMM_WORLD);
34 |     t0 = MPI_Wtime();
35 | 
36 |     // TODO: Send messages 
37 | 
38 |     printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n",
39 |            myid, size, myid + 1, destination);
40 | 
41 |     // TODO: Receive messages
42 | 
43 |     printf("Receiver: %d. first element %d.\n",
44 |            myid, receiveBuffer[0]);
45 | 
46 |     // Finalize measuring the time and print it out
47 |     t1 = MPI_Wtime();
48 |     MPI_Barrier(MPI_COMM_WORLD);
49 |     fflush(stdout);
50 | 
51 |     print_ordered(t1 - t0);
52 | 
53 |     MPI_Finalize();
54 |     return 0;
55 | }
56 | 
57 | void print_ordered(double t)
58 | {
59 |     int i, rank, ntasks;
60 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
61 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
62 | 
63 |     if (rank == 0) {
64 |         printf("Time elapsed in rank %2d: %6.3f\n", rank, t);
65 |         for (i = 1; i < ntasks; i++) {
66 |             MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
67 |             printf("Time elapsed in rank %2d: %6.3f\n", i, t);
68 |         }
69 |     } else {
70 |         MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD);
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/solution/chain-periodic.F90:
--------------------------------------------------------------------------------
 1 | program basic
 2 |   use mpi_f08
 3 |   use iso_fortran_env, only : REAL64
 4 | 
 5 |   implicit none
 6 |   integer, parameter :: size = 10000000
 7 |   integer :: rc, myid, ntasks
 8 |   integer :: message(size)
 9 |   integer :: receiveBuffer(size)
10 |   type(mpi_status) :: status
11 |   integer :: requests(2)
12 | 
13 |   integer :: ndims, dims(1), cart_id
14 |   type(mpi_comm) :: cart_comm
15 |   logical :: reorder, periods(1)
16 | 
17 |   real(REAL64) :: t0, t1
18 | 
19 |   integer :: source, destination
20 | 
21 |   call mpi_init(rc)
22 |   call mpi_comm_rank(MPI_COMM_WORLD, myid, rc)
23 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
24 | 
25 |   message = myid
26 | 
27 |   ndims = 1
28 |   dims(1) = ntasks
29 |   periods(1) = .true.
30 |   reorder = .true.
31 |   call mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart_comm, rc)
32 |   call mpi_cart_shift(cart_comm, 0, 1, source, destination, rc)
33 |   call mpi_comm_rank(cart_comm, cart_id, rc)
34 | 
35 | 
36 |   ! Start measuring the time spent in communication
37 |   call mpi_barrier(mpi_comm_world, rc)
38 |   t0 = mpi_wtime()
39 | 
40 |   ! Send and receive messages
41 |   call mpi_sendrecv(message, size, MPI_INTEGER, destination, destination, &
42 |                     receiveBuffer, size, MPI_INTEGER, source, cart_id, &
43 |                     cart_comm, status, rc)
44 |   write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, &
45 |        ' Sent elements: ', size, &
46 |        '. Tag: ', myid + 1, &
47 |        '. Receiver: ', destination
48 |   write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, &
49 |           ' First element: ', receiveBuffer(1)
50 | 
51 |   ! Finalize measuring the time and print it out
52 |   t1 = mpi_wtime()
53 |   call mpi_barrier(mpi_comm_world, rc)
54 |   call flush(6)
55 | 
56 |   call print_ordered(t1 - t0)
57 |   call mpi_comm_free(cart_comm, rc)
58 | 
59 |   call mpi_finalize(rc)
60 | 
61 | contains
62 | 
63 |   subroutine print_ordered(t)
64 |     implicit none
65 |     real(REAL64) :: t
66 | 
67 |     integer i
68 | 
69 |     if (myid == 0) then
70 |        write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t
71 |        do i=1, ntasks-1
72 |            call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11,  &
73 |                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc)
74 |            write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t
75 |        end do
76 |     else
77 |        call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11,  &
78 |                          MPI_COMM_WORLD, rc)
79 |     end if
80 |   end subroutine print_ordered
81 | 
82 | end program basic
83 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/solution/chain-periodic.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <mpi.h>
 4 | 
 5 | void print_ordered(double t);
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int i, myid, ntasks;
10 |     constexpr int size = 10000000;
11 |     MPI_Status status;
12 | 
13 |     double t0, t1;
14 | 
15 |     int source, destination;
16 | 
17 |     MPI_Init(&argc, &argv);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
19 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
20 | 
21 |     // Initialize message
22 |     std::vector<int> message(size, myid);
23 |     std::vector<int> receiveBuffer(size);
24 | 
25 |     // TODO: create a cartesian communicator
26 |     // and determine the source and destination ranks 
27 |     // with the help of MPI_Cart_shift
28 |     MPI_Comm cart_comm;
29 |     int ndims = 1;
30 |     int dims[1] = {ntasks};
31 |     int periods[1] = {1};
32 | 
33 |     MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm);
34 |     MPI_Cart_shift(cart_comm, 0, 1, &source, &destination);
35 |     int cart_id;
36 |     MPI_Comm_rank(cart_comm, &cart_id);
37 | 
38 |     // end TODO
39 | 
40 |     // Start measuring the time spent in communication
41 |     MPI_Barrier(MPI_COMM_WORLD);
42 |     t0 = MPI_Wtime();
43 | 
44 | 
45 |     MPI_Sendrecv(message.data(), message.size(), MPI_INT, destination, destination,
46 |                  receiveBuffer.data(), receiveBuffer.size(), MPI_INT, source, cart_id,
47 |                  cart_comm, MPI_STATUS_IGNORE);
48 |     printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n",
49 |            myid, size, myid + 1, destination);
50 | 
51 |     printf("Receiver: %d. first element %d.\n",
52 |            myid, receiveBuffer[0]);
53 | 
54 |     // Finalize measuring the time and print it out
55 |     t1 = MPI_Wtime();
56 |     MPI_Barrier(MPI_COMM_WORLD);
57 |     fflush(stdout);
58 | 
59 |     print_ordered(t1 - t0);
60 | 
61 |     MPI_Comm_free(&cart_comm);
62 |     MPI_Finalize();
63 |     return 0;
64 | }
65 | 
66 | void print_ordered(double t)
67 | {
68 |     int i, rank, ntasks;
69 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
70 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
71 | 
72 |     if (rank == 0) {
73 |         printf("Time elapsed in rank %2d: %6.3f\n", rank, t);
74 |         for (i = 1; i < ntasks; i++) {
75 |             MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
76 |             printf("Time elapsed in rank %2d: %6.3f\n", i, t);
77 |         }
78 |     } else {
79 |         MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD);
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/solution/chain.F90:
--------------------------------------------------------------------------------
 1 | program basic
 2 |   use mpi_f08
 3 |   use iso_fortran_env, only : REAL64
 4 | 
 5 |   implicit none
 6 |   integer, parameter :: size = 10000000
 7 |   integer :: rc, myid, ntasks
 8 |   integer :: message(size)
 9 |   integer :: receiveBuffer(size)
10 |   type(mpi_status) :: status
11 |   integer :: requests(2)
12 | 
13 |   integer :: ndims, dims(1), cart_id
14 |   type(mpi_comm) :: cart_comm
15 |   logical :: reorder, periods(1)
16 | 
17 |   real(REAL64) :: t0, t1
18 | 
19 |   integer :: source, destination
20 | 
21 |   call mpi_init(rc)
22 |   call mpi_comm_rank(MPI_COMM_WORLD, myid, rc)
23 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
24 | 
25 |   message = myid
26 | 
27 |   ndims = 1
28 |   dims(1) = ntasks
29 |   periods(1) = .false.
30 |   reorder = .true.
31 |   call mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart_comm, rc)
32 |   call mpi_cart_shift(cart_comm, 0, 1, source, destination, rc)
33 |   call mpi_comm_rank(cart_comm, cart_id, rc)
34 | 
35 | 
36 |   ! Start measuring the time spent in communication
37 |   call mpi_barrier(mpi_comm_world, rc)
38 |   t0 = mpi_wtime()
39 | 
40 |   ! Send and receive messages
41 |   call mpi_sendrecv(message, size, MPI_INTEGER, destination, cart_id + 1, &
42 |                     receiveBuffer, size, MPI_INTEGER, source, cart_id, &
43 |                     cart_comm, status, rc)
44 |   write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, &
45 |        ' Sent elements: ', size, &
46 |        '. Tag: ', myid + 1, &
47 |        '. Receiver: ', destination
48 |   write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, &
49 |           ' First element: ', receiveBuffer(1)
50 | 
51 |   ! Finalize measuring the time and print it out
52 |   t1 = mpi_wtime()
53 |   call mpi_barrier(mpi_comm_world, rc)
54 |   call flush(6)
55 | 
56 |   call print_ordered(t1 - t0)
57 |   call mpi_comm_free(cart_comm, rc)
58 | 
59 |   call mpi_finalize(rc)
60 | 
61 | contains
62 | 
63 |   subroutine print_ordered(t)
64 |     implicit none
65 |     real(REAL64) :: t
66 | 
67 |     integer i
68 | 
69 |     if (myid == 0) then
70 |        write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t
71 |        do i=1, ntasks-1
72 |            call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11,  &
73 |                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc)
74 |            write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t
75 |        end do
76 |     else
77 |        call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11,  &
78 |                          MPI_COMM_WORLD, rc)
79 |     end if
80 |   end subroutine print_ordered
81 | 
82 | end program basic
83 | 


--------------------------------------------------------------------------------
/mpi/message-chain-cartesian/solution/chain.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <mpi.h>
 4 | 
 5 | void print_ordered(double t);
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int i, myid, ntasks;
10 |     constexpr int size = 10000000;
11 |     MPI_Status status;
12 | 
13 |     double t0, t1;
14 | 
15 |     int source, destination;
16 | 
17 |     MPI_Init(&argc, &argv);
18 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
19 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
20 | 
21 |     // Initialize message
22 |     std::vector<int> message(size, myid);
23 |     std::vector<int> receiveBuffer(size);
24 | 
25 |     // TODO: create a cartesian communicator
26 |     // and determine the source and destination ranks 
27 |     // with the help of MPI_Cart_shift
28 |     MPI_Comm cart_comm;
29 |     int ndims = 1;
30 |     int dims[1] = {ntasks};
31 |     int periods[1] = {0};
32 | 
33 |     MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm);
34 |     MPI_Cart_shift(cart_comm, 0, 1, &source, &destination);
35 |     int cart_id;
36 |     MPI_Comm_rank(cart_comm, &cart_id);
37 | 
38 |     // end TODO
39 | 
40 |     // Start measuring the time spent in communication
41 |     MPI_Barrier(MPI_COMM_WORLD);
42 |     t0 = MPI_Wtime();
43 | 
44 | 
45 |     MPI_Sendrecv(message.data(), message.size(), MPI_INT, destination, cart_id + 1,
46 |                  receiveBuffer.data(), receiveBuffer.size(), MPI_INT, source, cart_id,
47 |                  cart_comm, MPI_STATUS_IGNORE);
48 |     printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n",
49 |            myid, size, myid + 1, destination);
50 | 
51 |     printf("Receiver: %d. first element %d.\n",
52 |            myid, receiveBuffer[0]);
53 | 
54 |     // Finalize measuring the time and print it out
55 |     t1 = MPI_Wtime();
56 |     MPI_Barrier(MPI_COMM_WORLD);
57 |     fflush(stdout);
58 | 
59 |     print_ordered(t1 - t0);
60 | 
61 |     MPI_Comm_free(&cart_comm);
62 |     MPI_Finalize();
63 |     return 0;
64 | }
65 | 
66 | void print_ordered(double t)
67 | {
68 |     int i, rank, ntasks;
69 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
70 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
71 | 
72 |     if (rank == 0) {
73 |         printf("Time elapsed in rank %2d: %6.3f\n", rank, t);
74 |         for (i = 1; i < ntasks; i++) {
75 |             MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
76 |             printf("Time elapsed in rank %2d: %6.3f\n", i, t);
77 |         }
78 |     } else {
79 |         MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD);
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/mpi/message-chain-persistent/README.md:
--------------------------------------------------------------------------------
 1 | ## Message chain with persistent communication
 2 | 
 3 | Write a program where every MPI task sends data to the next one.
 4 | Let `ntasks` be the number of the tasks, and `myid` the rank of the
 5 | current task. Your program should work as follows:
 6 | 
 7 | - Every task with a rank less than `ntasks-1` sends a message to task
 8 |   `myid+1`. For example, task 0 sends a message to task 1.
 9 | - The message content is an integer array where each element is initialised to
10 |   `myid`.
11 | - The message tag is the receiver's rank.
12 | - The sender prints out the number of elements it sends and the tag it used.
13 | - All tasks with rank > 0 receive messages.
14 | - Each receiver prints out their `myid` and the first element in the
15 |   received array.
16 | 
17 | 1. Implement the program described above using persistent communication, *i.e.* 
18 |    `MPI_Send_init`, `MPI_Recv_init`, `MPI_Start` and `MPI_Wait`. 
19 |    You may start from scratch or use the skeleton code
20 |    ([skeleton.cpp](skeleton.cpp) or [skeleton.F90](skeleton.F90)) 
21 |    as a starting point. 
22 | 
23 | 2. Write a version that uses `MPI_Startall` and `MPI_Waitall` instead of `MPI_Start`s and `MPI_Wait`s.
24 | 


--------------------------------------------------------------------------------
/mpi/message-chain-persistent/skeleton.F90:
--------------------------------------------------------------------------------
 1 | program basic
 2 |   use mpi_f08
 3 |   use iso_fortran_env, only : REAL64
 4 | 
 5 |   implicit none
 6 |   integer, parameter :: size = 10000000
 7 |   integer :: rc, myid, ntasks
 8 |   integer :: message(size)
 9 |   integer :: receiveBuffer(size)
10 |   integer :: status(MPI_STATUS_SIZE)
11 |   integer :: requests(2)
12 | 
13 |   real(REAL64) :: t0, t1
14 | 
15 |   integer :: source, destination
16 | 
17 |   call mpi_init(rc)
18 |   call mpi_comm_rank(MPI_COMM_WORLD, myid, rc)
19 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
20 | 
21 |   message = myid
22 | 
23 |   ! TODO: set source and destination ranks 
24 |   ! Treat boundaries with MPI_PROC_NULL
25 |   ! You may utilize also Cartesian topology and MPI_Cart_shift
26 |   ! instead manual determination
27 | 
28 | 
29 |   ! end TODO
30 | 
31 |   ! Start measuring the time spent in communication
32 |   call mpi_barrier(mpi_comm_world, rc)
33 |   t0 = mpi_wtime()
34 | 
35 |   ! TODO: Send and receive messages
36 | 
37 |   write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, &
38 |           ' Sent elements: ',size, &
39 |           '. Tag: ', myid+1, '. Receiver: ', destination
40 | 
41 |   write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, &
42 |           ' First element: ', receiveBuffer(1)
43 | 
44 |   ! Finalize measuring the time and print it out
45 |   t1 = mpi_wtime()
46 |   call mpi_barrier(mpi_comm_world, rc)
47 |   call flush(6)
48 | 
49 |   call print_ordered(t1 - t0)
50 | 
51 |   call mpi_finalize(rc)
52 | 
53 | contains
54 | 
55 |   subroutine print_ordered(t)
56 |     implicit none
57 |     real(REAL64) :: t
58 | 
59 |     integer i
60 | 
61 |     if (myid == 0) then
62 |        write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t
63 |        do i=1, ntasks-1
64 |            call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11,  &
65 |                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc)
66 |            write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t
67 |        end do
68 |     else
69 |        call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11,  &
70 |                          MPI_COMM_WORLD, rc)
71 |     end if
72 |   end subroutine print_ordered
73 | 
74 | end program basic
75 | 


--------------------------------------------------------------------------------
/mpi/message-chain-persistent/skeleton.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <mpi.h>
 4 | 
 5 | void print_ordered(double t);
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int i, myid, ntasks;
10 |     constexpr int size = 10000000;
11 |     MPI_Status status;
12 | 
13 |     double t0, t1;
14 | 
15 |     MPI_Init(&argc, &argv);
16 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
17 |     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
18 | 
19 |     // Initialize message
20 |     std::vector<int> message(size, myid);
21 |     std::vector<int> receiveBuffer(size);
22 | 
23 |     // TODO: set source and destination ranks 
24 |     // Treat boundaries with MPI_PROC_NULL
25 |     // You may utilize also Cartesian topology and MPI_Cart_shift
26 |     // instead manual determination
27 | 
28 |     int source, destination;
29 | 
30 | 
31 |     // end TODO
32 | 
33 |     // Start measuring the time spent in communication
34 |     MPI_Barrier(MPI_COMM_WORLD);
35 |     t0 = MPI_Wtime();
36 | 
37 |     // TODO: Send and receive messages 
38 | 
39 |     printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n",
40 |            myid, size, myid + 1, destination);
41 | 
42 |     printf("Receiver: %d. first element %d.\n",
43 |            myid, receiveBuffer[0]);
44 | 
45 |     // Finalize measuring the time and print it out
46 |     t1 = MPI_Wtime();
47 |     MPI_Barrier(MPI_COMM_WORLD);
48 |     fflush(stdout);
49 | 
50 |     print_ordered(t1 - t0);
51 | 
52 |     MPI_Finalize();
53 |     return 0;
54 | }
55 | 
56 | void print_ordered(double t)
57 | {
58 |     int i, rank, ntasks;
59 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
60 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
61 | 
62 |     if (rank == 0) {
63 |         printf("Time elapsed in rank %2d: %6.3f\n", rank, t);
64 |         for (i = 1; i < ntasks; i++) {
65 |             MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
66 |             printf("Time elapsed in rank %2d: %6.3f\n", i, t);
67 |         }
68 |     } else {
69 |         MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD);
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/mpi/neighbor-exchange/README.md:
--------------------------------------------------------------------------------
 1 | ## "Halo exchange" with neighborhood collectives
 2 | 
 3 | Consider a parallel program where processes are arranged in a one
 4 | dimensional chain. Each process stores local data in an array, but
 5 | reserves also "ghost layers" for communicating with neighboring
 6 | processes. Initially, the ghost data is zero, and real data consists
 7 | of sequence of integers:
 8 | 
 9 | ```
10 | Task 0:  0  0  1  2  3  0
11 | Task 1:  0  4  5  6  7  0
12 | Task 2:  0  8  9 10 11  0
13 | ...
14 | ```
15 | 
16 | Implement a program where each process sends the boundary values to
17 | neighboring processes, so that after communication the arrays
18 | (including ghost layers) look like:
19 | ```
20 | ...
21 | Task 2:  7  8  9 10 11 12
22 | Task 3: 11 12 13 14 15 16
23 | Task 4: 15 16 17 18 19 20
24 | ...
25 | ```
26 | 
27 | 1. Create a Cartesian communicator for the chain
28 | 2. Copy boundary values to a send buffer
29 | 3. Perform the communication with appropriate neighboordhood
30 |    collective routine
31 | 4. Copy values from the receive buffer to the ghost layers.
32 | 
33 | Try both finite and periodic chains.
34 | 
35 | You may start from scratch or use [skeleton.cpp](skeleton.cpp) (or
36 | [skeleton.F90](skeleton.F90) for Fortran) as a starting point.
37 | 


--------------------------------------------------------------------------------
/mpi/neighbor-exchange/skeleton.F90:
--------------------------------------------------------------------------------
 1 | program neighborhood
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer :: ntasks, cart_id, i, rc
 6 |   integer, parameter :: dsize = 4
 7 |   integer :: data(0:dsize+1)
 8 |   integer :: sendbuf(2), recvbuf(2)
 9 |   
10 |   type(mpi_comm) :: cart_comm
11 |   integer, parameter :: ndims = 1
12 |   integer :: dims(1)
13 |   logical :: reorder, periods(1)
14 | 
15 |   call mpi_init(rc)
16 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
17 | 
18 |   ! TODO create cartesian communicator
19 | 
20 |   ! TODO end
21 | 
22 |   call mpi_comm_rank(cart_comm, cart_id, rc)
23 | 
24 |   ! Initialize data
25 |   data = 0
26 |   do i=1, ntasks
27 |      data(i) = cart_id * dsize + (i - 1)
28 |   end do
29 | 
30 |   ! print out initial data
31 |   if (cart_id == 0) then
32 |      write(*,*) 'Initial data'
33 |   end if
34 | 
35 |   call print_buffers(data, cart_comm)
36 | 
37 |   ! TODO implement halo exchange with neighborhood communication
38 |   ! Copy data to send buffer
39 | 
40 |   ! Communicate with neighbor
41 | 
42 |   ! Copy receive buffer to "halo"
43 | 
44 |   ! TODO end
45 | 
46 |   ! print out results
47 |   if (cart_id == 0) then
48 |      write(*,*) 'Final data'
49 |   end if
50 | 
51 |   call print_buffers(data, cart_comm)
52 | 
53 |   call mpi_finalize(rc)
54 | 
55 | contains
56 | 
57 |   subroutine print_buffers(buffer, comm)
58 |     implicit none
59 |     integer, dimension(:), intent(in) :: buffer
60 |     type(mpi_comm), intent(in) :: comm
61 |     integer, dimension(:), allocatable :: printbuffer
62 |     integer :: bufsize, rank, ntasks
63 |     integer :: i
64 | 
65 |     bufsize = size(buffer)
66 |     call mpi_comm_size(comm, ntasks)
67 |     call mpi_comm_rank(comm, rank)
68 |     allocate(printbuffer(ntasks * bufsize))
69 |     
70 |     call mpi_gather(buffer, bufsize, MPI_INTEGER, &
71 |          & printbuffer, bufsize, MPI_INTEGER, &
72 |          & 0, comm, rc)
73 | 
74 |     if (rank == 0) then
75 |        do i = 1, ntasks
76 |           write(*,'(A,I3,A,*(I3))') 'Task', i - 1, ': ', printbuffer((i-1)*bufsize+1:i*bufsize)
77 |        end do
78 |        print *
79 |     end if
80 | 
81 |     deallocate(printbuffer)
82 | 
83 |   end subroutine print_buffers
84 | 
85 | 
86 | end program
87 | 


--------------------------------------------------------------------------------
/mpi/neighbor-exchange/skeleton.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <mpi.h>
 4 | 
 5 | void print_buffers(std::vector<int> &buffer, MPI_Comm comm);
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int myid, ntasks;
10 |     constexpr int size = 4;
11 |     std::vector<int> data(size + 2, 0);
12 |     std::vector<int> sendbuf(2), recvbuf(2);
13 |     MPI_Status status;
14 | 
15 |     MPI_Init(&argc, &argv);
16 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
17 | 
18 |     // TODO create cartesian communicator
19 |     MPI_Comm cart_comm;
20 | 
21 |     // TODO end
22 | 
23 |     int cart_id;
24 |     MPI_Comm_rank(cart_comm, &cart_id);
25 |     
26 |     // Inner part of local data
27 |     for (int i = 1; i < size + 1; i++) {
28 |       data[i] = cart_id * size + (i - 1);
29 |     }
30 | 
31 |     // Print out initial data
32 |     if (0 == cart_id) 
33 |       printf("Initial data\n");
34 |     print_buffers(data, cart_comm);
35 | 
36 |     // TODO implement halo exchange with neighborhood communication
37 |     // Copy data to send buffer
38 | 
39 |     // Communicate with neighbor
40 |     
41 | 
42 |     // Copy receive buffer to "halo"
43 | 
44 | 
45 |     // TODO end
46 |  
47 |     // Print out results
48 |     if (0 == cart_id) 
49 |       printf("Final data\n");
50 |     print_buffers(data, cart_comm);
51 | 
52 |     MPI_Finalize();
53 | }
54 | 
55 | void print_buffers(std::vector<int> &buffer, MPI_Comm comm)
56 | {
57 |   int rank, ntasks;
58 |   MPI_Comm_rank(comm, &rank);
59 |   MPI_Comm_size(comm, &ntasks);
60 | 
61 |   const int buffersize = buffer.size();
62 |   std::vector<int> printbuffer(ntasks * buffersize);
63 | 
64 |   MPI_Gather(buffer.data(), buffersize, MPI_INT,
65 |              printbuffer.data(), buffersize, MPI_INT, 0, MPI_COMM_WORLD);
66 | 
67 |   if (rank == 0) {
68 |     for (int j = 0; j < ntasks; j++) {
69 |       printf("Task %i:", j);
70 |       for (int i = 0; i < buffersize; i++) {
71 |         printf(" %2i", printbuffer[i + buffersize * j]);
72 |       }
73 |       printf("\n");
74 |     }
75 |     printf("\n");
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/mpi/neighbor-exchange/solution/neighbor-exchange.F90:
--------------------------------------------------------------------------------
 1 | program neighborhood
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer :: ntasks, cart_id, i, rc
 6 |   integer, parameter :: dsize = 4
 7 |   integer :: data(0:dsize+1)
 8 |   integer :: sendbuf(2), recvbuf(2)
 9 |   
10 |   type(mpi_comm) :: cart_comm
11 |   integer, parameter :: ndims = 1
12 |   integer :: dims(1)
13 |   logical :: reorder, periods(1)
14 | 
15 |   call mpi_init(rc)
16 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc)
17 | 
18 |   dims = ntasks
19 |   reorder = .true.
20 |   periods(1) = .false.
21 | 
22 |   call mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart_comm, rc)
23 |   call mpi_comm_rank(cart_comm, cart_id, rc)
24 | 
25 |   ! Initialize data
26 |   data = 0
27 |   do i=1, ntasks
28 |      data(i) = cart_id * dsize + (i - 1)
29 |   end do
30 | 
31 |   ! print out initial data
32 |   if (cart_id == 0) then
33 |      write(*,*) 'Initial data'
34 |   end if
35 | 
36 |   call print_buffers(data, cart_comm)
37 | 
38 |   ! Copy data to send buffer
39 |   sendbuf(1) = data(1)
40 |   sendbuf(2) = data(dsize)
41 | 
42 |   ! Communicate with neighbor
43 |   call mpi_neighbor_alltoall(sendbuf, 1, MPI_INTEGER, recvbuf, 1, MPI_INTEGER, cart_comm, rc)
44 | 
45 |   ! Copy receive buffer to "halo"
46 |   data(0) = recvbuf(1)
47 |   data(dsize + 1) = recvbuf(2)
48 | 
49 |   ! print out results
50 |   if (cart_id == 0) then
51 |      write(*,*) 'Final data'
52 |   end if
53 | 
54 |   call print_buffers(data, cart_comm)
55 | 
56 |   call mpi_finalize(rc)
57 | 
58 | contains
59 | 
60 |   subroutine print_buffers(buffer, comm)
61 |     implicit none
62 |     integer, dimension(:), intent(in) :: buffer
63 |     type(mpi_comm), intent(in) :: comm
64 |     integer, dimension(:), allocatable :: printbuffer
65 |     integer :: bufsize, rank, ntasks
66 |     integer :: i
67 | 
68 |     bufsize = size(buffer)
69 |     call mpi_comm_size(comm, ntasks)
70 |     call mpi_comm_rank(comm, rank)
71 |     allocate(printbuffer(ntasks * bufsize))
72 |     
73 |     call mpi_gather(buffer, bufsize, MPI_INTEGER, &
74 |          & printbuffer, bufsize, MPI_INTEGER, &
75 |          & 0, comm, rc)
76 | 
77 |     if (rank == 0) then
78 |        do i = 1, ntasks
79 |           write(*,'(A,I3,A,*(I3))') 'Task', i - 1, ': ', printbuffer((i-1)*bufsize+1:i*bufsize)
80 |        end do
81 |        print *
82 |     end if
83 | 
84 |     deallocate(printbuffer)
85 | 
86 |   end subroutine print_buffers
87 | 
88 | 
89 | end program
90 | 


--------------------------------------------------------------------------------
/mpi/neighbor-exchange/solution/neighbor-exchange.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <mpi.h>
 4 | 
 5 | void print_buffers(std::vector<int> &buffer, MPI_Comm comm);
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     int myid, ntasks;
10 |     constexpr int size = 4;
11 |     std::vector<int> data(size + 2, 0);
12 |     std::vector<int> sendbuf(2), recvbuf(2);
13 |     MPI_Status status;
14 | 
15 |     MPI_Init(&argc, &argv);
16 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
17 | 
18 |     MPI_Comm cart_comm;
19 |     int ndims = 1;
20 |     int dims[1] = {ntasks};
21 |     int periods[1] = {1};
22 | 
23 |     MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm);
24 |     int cart_id;
25 |     MPI_Comm_rank(cart_comm, &cart_id);
26 |     
27 |     // Inner part of local data
28 |     for (int i = 1; i < size + 1; i++) {
29 |       data[i] = cart_id * size + (i - 1);
30 |     }
31 | 
32 |     // Print out initial data
33 |     if (0 == cart_id) 
34 |       printf("Initial data\n");
35 |     print_buffers(data, cart_comm);
36 | 
37 |     // Copy data to send buffer
38 |     sendbuf[0] = data[1];
39 |     sendbuf[1] = data[size];
40 | 
41 |     // Communicate with neighbor
42 |     MPI_Neighbor_alltoall(sendbuf.data(), 1, MPI_INT, recvbuf.data(),
43 |                           1, MPI_INT, cart_comm);
44 |     
45 | 
46 |     // Copy receive buffer to "halo"
47 |     data[0] = recvbuf[0];
48 |     data[size + 1] = recvbuf[1];
49 | 
50 |     // Print out results
51 |     if (0 == cart_id) 
52 |       printf("Final data\n");
53 |     print_buffers(data, cart_comm);
54 | 
55 |     MPI_Finalize();
56 | }
57 | 
58 | void print_buffers(std::vector<int> &buffer, MPI_Comm comm)
59 | {
60 |   int rank, ntasks;
61 |   MPI_Comm_rank(comm, &rank);
62 |   MPI_Comm_size(comm, &ntasks);
63 | 
64 |   const int buffersize = buffer.size();
65 |   std::vector<int> printbuffer(ntasks * buffersize);
66 | 
67 |   MPI_Gather(buffer.data(), buffersize, MPI_INT,
68 |              printbuffer.data(), buffersize, MPI_INT, 0, MPI_COMM_WORLD);
69 | 
70 |   if (rank == 0) {
71 |     for (int j = 0; j < ntasks; j++) {
72 |       printf("Task %i:", j);
73 |       for (int i = 0; i < buffersize; i++) {
74 |         printf(" %2i", printbuffer[i + buffersize * j]);
75 |       }
76 |       printf("\n");
77 |     }
78 |     printf("\n");
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/README.md:
--------------------------------------------------------------------------------
 1 | ## Using custom datatypes 
 2 | 
 3 | Write a program that sends the highlighted elements of a 2D array
 4 | using user defined datatypes from one MPI task to another. Note the
 5 | different assignments for C and Fortran, and remember that C stores
 6 | arrays in a row-major order and Fortran in a column-major order. You can
 7 | start from skeleton codes in [C](skeleton.c) or [Fortran](skeleton.F90)
 8 | 
 9 | a) 
10 | 
11 | ![](img/vector.png)
12 | 
13 | b)
14 | 
15 | ![](img/indexed.png)
16 | 
17 | c)
18 | 
19 | ![](img/subarray.png)
20 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/img/indexed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/simple-datatypes/img/indexed.png


--------------------------------------------------------------------------------
/mpi/simple-datatypes/img/subarray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/simple-datatypes/img/subarray.png


--------------------------------------------------------------------------------
/mpi/simple-datatypes/img/vector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/simple-datatypes/img/vector.png


--------------------------------------------------------------------------------
/mpi/simple-datatypes/skeleton.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer, dimension(8,8) :: array
 6 |   integer :: rank, ierr
 7 |   !TODO: declare variable for datatype
 8 |   integer :: i, j
 9 | 
10 |   call mpi_init(ierr)
11 |   call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr)
12 | 
13 |   ! initialize arrays
14 |   if (rank == 0) then
15 |      do i=1,8
16 |         do j=1,8
17 |            array(i,j) = i*10 + j
18 |         end do
19 |      end do
20 |   else
21 |      array(:,:) = 0
22 |   end if
23 | 
24 |   if (rank == 0) then
25 |      write(*,*) 'Data in rank 0'
26 |      do i=1,8
27 |         write(*,'(8I3)') array(i, :)
28 |      end do
29 |   end if
30 | 
31 | 
32 |   !TODO: create datatype 
33 | 
34 |   !TODO: communicate with datatype
35 | 
36 |   !TODO: free datatype
37 | 
38 |   ! Print out the result
39 |   if (rank == 1) then
40 |      write(*,*) 'Received data'
41 |      do i=1,8
42 |         write(*,'(8I3)') array(i, :)
43 |      end do
44 |   end if
45 | 
46 | 
47 |   call mpi_finalize(ierr)
48 | 
49 | end program datatype1
50 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/skeleton.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank;
 7 |     int array[8][8];
 8 |     //TODO: Declare a variable storing the MPI datatype
 9 | 
10 |     int i, j;
11 | 
12 |     MPI_Init(&argc, &argv);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
14 | 
15 |     // Initialize arrays
16 |     if (rank == 0) {
17 |         for (i = 0; i < 8; i++) {
18 |             for (j = 0; j < 8; j++) {
19 |                 array[i][j] = (i + 1) * 10 + j + 1;
20 |             }
21 |         }
22 |     } else {
23 |         for (i = 0; i < 8; i++) {
24 |             for (j = 0; j < 8; j++) {
25 |                 array[i][j] = 0;
26 |             }
27 |         }
28 |     }
29 | 
30 |     if (rank == 0) {
31 |         printf("Data in rank 0\n");
32 |         for (i = 0; i < 8; i++) {
33 |             for (j = 0; j < 8; j++) {
34 |                 printf("%3d", array[i][j]);
35 |             }
36 |             printf("\n");
37 |         }
38 |     }
39 | 
40 |     //TODO: Create datatype 
41 | 
42 |     //TODO: Send data
43 | 
44 |     //TODO: Free datatype
45 | 
46 |     // Print out the result on rank 1
47 |     if (rank == 1) {
48 |         printf("Received data\n");
49 |         for (i = 0; i < 8; i++) {
50 |             for (j = 0; j < 8; j++) {
51 |                 printf("%3d", array[i][j]);
52 |             }
53 |             printf("\n");
54 |         }
55 |     }
56 | 
57 |     MPI_Finalize();
58 | 
59 |     return 0;
60 | }
61 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/solution/custom_type_a.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer, dimension(8,8) :: array
 6 |   integer :: rank, ierr
 7 |   type(mpi_datatype) :: rowtype
 8 |   integer :: i, j
 9 | 
10 |   call mpi_init(ierr)
11 |   call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr)
12 | 
13 |   ! initialize arrays
14 |   if (rank == 0) then
15 |      do i=1,8
16 |         do j=1,8
17 |            array(i,j) = i*10 + j
18 |         end do
19 |      end do
20 |   else
21 |      array(:,:) = 0
22 |   end if
23 | 
24 |   if (rank == 0) then
25 |      write(*,*) 'Data in rank 0'
26 |      do i=1,8
27 |         write(*,'(8I3)') array(i, :)
28 |      end do
29 |   end if
30 | 
31 |   ! create datatype
32 |   call mpi_type_vector(8, 1, 8, MPI_INTEGER, rowtype, ierr)
33 |   call mpi_type_commit(rowtype, ierr)
34 | 
35 |   ! send first row of matrix
36 |   if (rank == 0) then
37 |      call mpi_send(array(2, 1), 1, rowtype, 1, 1, MPI_COMM_WORLD, ierr)
38 |   else if (rank == 1) then
39 |      call mpi_recv(array(2, 1), 1, rowtype, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
40 |           ierr)
41 |   end if
42 | 
43 |   ! Print out the result
44 |   if (rank == 1) then
45 |      write(*,*) 'Received data'
46 |      do i=1,8
47 |         write(*,'(8I3)') array(i, :)
48 |      end do
49 |   end if
50 | 
51 |   call mpi_type_free(rowtype, ierr)
52 |   call mpi_finalize(ierr)
53 | 
54 | end program datatype1
55 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/solution/custom_type_a.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank;
 7 |     int array[8][8];
 8 |     MPI_Datatype columntype;
 9 | 
10 |     int i, j;
11 | 
12 |     MPI_Init(&argc, &argv);
13 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
14 | 
15 |     // Initialize arrays
16 |     if (rank == 0) {
17 |         for (i = 0; i < 8; i++) {
18 |             for (j = 0; j < 8; j++) {
19 |                 array[i][j] = (i + 1) * 10 + j + 1;
20 |             }
21 |         }
22 |     } else {
23 |         for (i = 0; i < 8; i++) {
24 |             for (j = 0; j < 8; j++) {
25 |                 array[i][j] = 0;
26 |             }
27 |         }
28 |     }
29 | 
30 |     if (rank == 0) {
31 |         printf("Data in rank 0\n");
32 |         for (i = 0; i < 8; i++) {
33 |             for (j = 0; j < 8; j++) {
34 |                 printf("%3d", array[i][j]);
35 |             }
36 |             printf("\n");
37 |         }
38 |     }
39 | 
40 |     //TODO: Create datatype that describes one column. Use MPI_Type_vector.
41 | 
42 |     // Create datatype
43 |     MPI_Type_vector(8, 1, 8, MPI_INT, &columntype);
44 |     MPI_Type_commit(&columntype);
45 | 
46 |     // Send first column of matrix
47 |     if (rank == 0) {
48 |         MPI_Send(&array[0][1], 1, columntype, 1, 1, MPI_COMM_WORLD);
49 |     } else if (rank == 1) {
50 |         MPI_Recv(&array[0][1], 1, columntype, 0, 1, MPI_COMM_WORLD,
51 |                  MPI_STATUS_IGNORE);
52 |     }
53 | 
54 |     // Print out the result
55 |     if (rank == 1) {
56 |         printf("Received data\n");
57 |         for (i = 0; i < 8; i++) {
58 |             for (j = 0; j < 8; j++) {
59 |                 printf("%3d", array[i][j]);
60 |             }
61 |             printf("\n");
62 |         }
63 |     }
64 | 
65 |     MPI_Type_free(&columntype);
66 |     MPI_Finalize();
67 | 
68 |     return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/solution/custom_type_b.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer, dimension(8,8) :: array
 6 |   integer :: rank, ierr
 7 |   type(mpi_datatype) :: indexedtype
 8 |   integer, dimension(4) :: counts, displs
 9 |   integer :: i, j
10 | 
11 |   call mpi_init(ierr)
12 |   call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr)
13 | 
14 |   ! initialize arrays
15 |   if (rank == 0) then
16 |      do i=1,8
17 |         do j=1,8
18 |            array(i,j) = i*10 + j
19 |         end do
20 |      end do
21 |   else
22 |      array(:,:) = 0
23 |   end if
24 | 
25 |   if (rank == 0) then
26 |      write(*,*) 'Data in rank 0'
27 |      do i=1,8
28 |         write(*,'(8I3)') array(i, :)
29 |      end do
30 |   end if
31 | 
32 |   do i = 1, 4
33 |      counts(i) = i
34 |      displs(i) = i - 1 + 2 * (i - 1) * 8
35 |   end do
36 | 
37 |   ! create datatype
38 |   call mpi_type_indexed(4, counts, displs, MPI_INTEGER, indexedtype, ierr)
39 |   call mpi_type_commit(indexedtype, ierr)
40 | 
41 |   ! send first indexed of matrix
42 |   if (rank == 0) then
43 |      call mpi_send(array, 1, indexedtype, 1, 1, MPI_COMM_WORLD, ierr)
44 |   else if (rank == 1) then
45 |      call mpi_recv(array, 1, indexedtype, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
46 |           ierr)
47 |   end if
48 | 
49 |   ! Print out the result
50 |   if (rank == 1) then
51 |      write(*,*) 'Received data'
52 |      do i=1,8
53 |         write(*,'(8I3)') array(i, :)
54 |      end do
55 |   end if
56 | 
57 |   call mpi_type_free(indexedtype, ierr)
58 |   call mpi_finalize(ierr)
59 | 
60 | end program datatype1
61 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/solution/custom_type_b.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank;
 7 |     int array[8][8];
 8 |     MPI_Datatype indexedtype;
 9 |     int displs[4];
10 |     int counts[4];
11 | 
12 |     int i, j;
13 | 
14 |     MPI_Init(&argc, &argv);
15 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
16 | 
17 |     // Initialize arrays
18 |     if (rank == 0) {
19 |         for (i = 0; i < 8; i++) {
20 |             for (j = 0; j < 8; j++) {
21 |                 array[i][j] = (i + 1) * 10 + j + 1;
22 |             }
23 |         }
24 |     } else {
25 |         for (i = 0; i < 8; i++) {
26 |             for (j = 0; j < 8; j++) {
27 |                 array[i][j] = 0;
28 |             }
29 |         }
30 |     }
31 | 
32 |     if (rank == 0) {
33 |         printf("Data in rank 0\n");
34 |         for (i = 0; i < 8; i++) {
35 |             for (j = 0; j < 8; j++) {
36 |                 printf("%3d", array[i][j]);
37 |             }
38 |             printf("\n");
39 |         }
40 |     }
41 | 
42 |     // Create datatype
43 |     for (i = 0; i < 4; i++) {
44 |         counts[i] = i + 1;
45 |         displs[i] = i + 2 * i * 8;
46 |     }
47 | 
48 |     MPI_Type_indexed(4, counts, displs, MPI_INT, &indexedtype);
49 |     MPI_Type_commit(&indexedtype);
50 | 
51 |     // Send first indexed of matrix
52 |     if (rank == 0) {
53 |         MPI_Send(array, 1, indexedtype, 1, 1, MPI_COMM_WORLD);
54 |     } else if (rank == 1) {
55 |         MPI_Recv(array, 1, indexedtype, 0, 1, MPI_COMM_WORLD,
56 |                  MPI_STATUS_IGNORE);
57 |     }
58 | 
59 |     // Print out the result on rank 1
60 |     // The application is correct if the first column has the values of rank 0
61 |     if (rank == 1) {
62 |         printf("Received data\n");
63 |         for (i = 0; i < 8; i++) {
64 |             for (j = 0; j < 8; j++) {
65 |                 printf("%3d", array[i][j]);
66 |             }
67 |             printf("\n");
68 |         }
69 |     }
70 | 
71 |     MPI_Type_free(&indexedtype);
72 |     MPI_Finalize();
73 | 
74 |     return 0;
75 | }
76 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/solution/custom_type_c.F90:
--------------------------------------------------------------------------------
 1 | program datatype1
 2 |   use mpi_f08
 3 |   implicit none
 4 | 
 5 |   integer, dimension(8,8) :: array
 6 |   integer :: rank, ierr
 7 |   type(mpi_datatype) :: subarray
 8 |   integer, dimension(2) :: sizes, subsizes, offsets
 9 |   integer :: i, j
10 | 
11 |   call mpi_init(ierr)
12 |   call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr)
13 | 
14 |   ! initialize arrays
15 |   if (rank == 0) then
16 |      do i=1,8
17 |         do j=1,8
18 |            array(i,j) = i*10 + j
19 |         end do
20 |      end do
21 |   else
22 |      array(:,:) = 0
23 |   end if
24 | 
25 |   if (rank == 0) then
26 |      write(*,*) 'Data in rank 0'
27 |      do i=1,8
28 |         write(*,'(8I3)') array(i, :)
29 |      end do
30 |   end if
31 | 
32 |   ! create datatype
33 |   sizes = 8
34 |   subsizes = 4
35 |   offsets = 2
36 |   call mpi_type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_FORTRAN, MPI_INTEGER, subarray, ierr)
37 |   call mpi_type_commit(subarray, ierr)
38 | 
39 |   ! send first row of matrix
40 |   if (rank == 0) then
41 |      call mpi_send(array(1, 1), 1, subarray, 1, 1, MPI_COMM_WORLD, ierr)
42 |   else if (rank == 1) then
43 |      call mpi_recv(array(1, 1), 1, subarray, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
44 |           ierr)
45 |   end if
46 | 
47 |   ! Print out the result
48 |   if (rank == 1) then
49 |      write(*,*) 'Received data'
50 |      do i=1,8
51 |         write(*,'(8I3)') array(i, :)
52 |      end do
53 |   end if
54 | 
55 |   call mpi_type_free(subarray, ierr)
56 |   call mpi_finalize(ierr)
57 | 
58 | end program datatype1
59 | 


--------------------------------------------------------------------------------
/mpi/simple-datatypes/solution/custom_type_c.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <mpi.h>
 3 | 
 4 | int main(int argc, char **argv)
 5 | {
 6 |     int rank;
 7 |     int array[8][8];
 8 |     MPI_Datatype subarray;
 9 |     int i, j;
10 | 
11 |     MPI_Init(&argc, &argv);
12 |     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
13 | 
14 | 
15 |     // Initialize arrays
16 |     if (rank == 0) {
17 |         for (i = 0; i < 8; i++) {
18 |             for (j = 0; j < 8; j++) {
19 |                 array[i][j] = (i + 1) * 10 + j + 1;
20 |             }
21 |         }
22 |     } else {
23 |         for (i = 0; i < 8; i++) {
24 |             for (j = 0; j < 8; j++) {
25 |                 array[i][j] = 0;
26 |             }
27 |         }
28 |     }
29 | 
30 |     if (rank == 0) {
31 |         printf("Data in rank 0\n");
32 |         for (i = 0; i < 8; i++) {
33 |             for (j = 0; j < 8; j++) {
34 |                 printf("%3d", array[i][j]);
35 |             }
36 |             printf("\n");
37 |         }
38 |     }
39 | 
40 | 
41 |     int sizes[2] = {8, 8};
42 |     int subsizes[2] = {4, 4};
43 |     int offsets[2] = {2, 2};
44 |     // Create datatype
45 |     MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C, MPI_INT, &subarray);
46 |     MPI_Type_commit(&subarray);
47 | 
48 |     // Send first column of matrix
49 |     if (rank == 0) {
50 |         MPI_Send(&array[0][0], 1, subarray, 1, 1, MPI_COMM_WORLD);
51 |     } else if (rank == 1) {
52 |         MPI_Recv(&array[0][0], 1, subarray, 0, 1, MPI_COMM_WORLD,
53 |                  MPI_STATUS_IGNORE);
54 |     }
55 | 
56 |     // Print out the result
57 |     if (rank == 1) {
58 |         printf("Received data\n");
59 |         for (i = 0; i < 8; i++) {
60 |             for (j = 0; j < 8; j++) {
61 |                 printf("%3d", array[i][j]);
62 |             }
63 |             printf("\n");
64 |         }
65 |     }
66 | 
67 |     MPI_Type_free(&subarray);
68 |     MPI_Finalize();
69 | 
70 |     return 0;
71 | }
72 | 


--------------------------------------------------------------------------------
/mpi/struct-datatype/README.md:
--------------------------------------------------------------------------------
 1 | ## Datatype for a struct / derived type
 2 | 
 3 | The skeleton code provided in
 4 | [struct_type.c](struct_type.c) or
 5 | [struct_type.F90](struct_type.F90) defines a struct (C) / derived type (Fortran)
 6 | 
 7 | 1. Implement a custom MPI datatype, and send a single struct / derived type between two
 8 |    processes. Verify that the communication is performed succesfully.
 9 | 
10 | 2. Next, try to send an array of structs / derived types. Make sure that the *extent* of the 
11 |    datatype is correct (you may use `MPI_Type_get_extent` and `MPI_Get_address` for checking).
12 | 
13 | 3. Implement the same send by sending just a stream of bytes (type `MPI_BYTE`).
14 |    Verify correctness and compare the performance of these two approaches.
15 | 


--------------------------------------------------------------------------------
/mpi/struct-datatype/solution/struct_with_byte.F90:
--------------------------------------------------------------------------------
 1 | program datatype_struct
 2 |   use mpi_f08
 3 |   use iso_fortran_env, only : REAL64
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: n = 1000, cnt=3, reps=10000
 7 | 
 8 |   type particle
 9 |      real :: coords(3)
10 |      integer :: charge
11 |      character(len=2) :: label
12 |   end type particle
13 | 
14 |   type(particle) :: particles(n)
15 | 
16 |   integer :: i, ierror,  myid,  ntasks, tag
17 | 
18 |   type(mpi_datatype) :: particle_mpi_type, temp_type
19 |   type(mpi_datatype):: types(cnt)
20 |   integer :: blocklen(cnt)
21 |   integer(kind=MPI_ADDRESS_KIND) :: disp(cnt)
22 |   integer(kind=MPI_ADDRESS_KIND) :: lb1, lb2, extent
23 |   integer :: nbytes
24 | 
25 |   real(REAL64) :: t1, t2
26 | 
27 |   call mpi_init(ierror)
28 |   call mpi_comm_rank(MPI_COMM_WORLD, myid, ierror)
29 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierror)
30 | 
31 |   ! insert some data for the particle struct
32 |   if (myid == 0) then
33 |     do i = 1, n
34 |       call random_number(particles(i)%coords)
35 |       particles(i)%charge = 54
36 |       particles(i)%label = 'Xe'
37 |     end do
38 |   end if
39 | 
40 |   ! Determine the true extent of one particle struct
41 |   call MPI_GET_ADDRESS(particles(1),lb1,ierror)
42 |   call MPI_GET_ADDRESS(particles(2),lb2,ierror)
43 |   extent = lb2 - lb1
44 | 
45 |   t1 = mpi_wtime()
46 |   ! send and receive using the MPI_BYTE datatype
47 |   nbytes = n * extent
48 |   if(myid == 0) then
49 |      do i = 1, reps
50 |         call mpi_send(particles, nbytes, MPI_BYTE, 1, i, &
51 |                       MPI_COMM_WORLD, ierror)
52 |      end do
53 |   else if(myid == 1) then
54 |      do i = 1, reps
55 |         call mpi_recv(particles, nbytes, MPI_BYTE, 0, i, &
56 |                       MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierror)
57 |      end do
58 |   end if
59 |   t2=mpi_wtime()
60 | 
61 |   write(*,*) "Time: ", myid, (t2-t1) / reps
62 |   write(*,*) "Check:", myid, particles(n)%label, particles(n)%coords(1), &
63 |                        particles(n)%coords(2), particles(n)%coords(3)
64 | 
65 |   call mpi_finalize(ierror)
66 | 
67 | end program datatype_struct
68 | 


--------------------------------------------------------------------------------
/mpi/struct-datatype/solution/struct_with_byte.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <mpi.h>
 5 | 
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |   int n=1000, cnt=3, reps=10000;
10 |   typedef struct{
11 |     float coords[3];
12 |     int charge;
13 |     char label[2];
14 |   } particle;
15 |   particle particles[n];
16 |   MPI_Aint lb0, lb1, extent;
17 |   int i, j, myid, ntasks;
18 |   double t1, t2;
19 | 
20 |   MPI_Init(&argc, &argv);
21 |   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
22 | 
23 |   /* fill in some values for the particles */
24 |   if (myid == 0) {
25 |     for (i=0; i < n; i++) {
26 |       for (j=0; j < 3; j++)
27 |         particles[i].coords[j] = (float)rand()/(float)RAND_MAX*10.0;
28 |       particles[i].charge = 54;
29 |       strcpy(particles[i].label, "Xe");
30 |     }
31 |   }
32 |   /* determine the true extent of one particle struct */
33 |   MPI_Get_address(&particles[0], &lb0);
34 |   MPI_Get_address(&particles[1], &lb1);
35 |   extent = lb1 - lb0;
36 | 
37 |   /* send and receive using the MPI_BYTE datatype */
38 |   t1 = MPI_Wtime();
39 |   if (myid == 0) {
40 |     for (i=0; i < reps; i++)
41 |       MPI_Send(particles, n*extent, MPI_BYTE, 1, i, MPI_COMM_WORLD);
42 |   } else if (myid == 1) {
43 |     for (i=0; i < reps; i++)
44 |       MPI_Recv(particles, n*extent, MPI_BYTE, 0, i, MPI_COMM_WORLD,
45 |                MPI_STATUS_IGNORE);
46 |   }
47 |   t2 = MPI_Wtime();
48 | 
49 |   printf("Time: %i, %e \n", myid, (t2-t1)/(double)reps);
50 |   printf("Check: %i: %s %f %f %f \n", myid, particles[n-1].label,
51 |           particles[n-1].coords[0], particles[n-1].coords[1],
52 |           particles[n-1].coords[2]);
53 | 
54 |   MPI_Finalize();
55 |   return 0;
56 | }
57 | 


--------------------------------------------------------------------------------
/mpi/struct-datatype/struct_type.F90:
--------------------------------------------------------------------------------
 1 | program datatype_struct
 2 |   use mpi_f08
 3 |   use use iso_fortran_env, only : REAL64
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: n = 1000, cnt=3, reps=10000
 7 | 
 8 |   type particle
 9 |      real :: coords(3)
10 |      integer :: charge
11 |      character(len=2) :: label
12 |   end type particle
13 | 
14 |   type(particle) :: particles(n)
15 | 
16 |   integer :: i, ierror,  myid,  ntasks, tag
17 | 
18 |   type(mpi_datatype) :: particle_mpi_type, temp_type
19 |   integer :: types(cnt), blocklen(cnt)
20 |   integer(kind=MPI_ADDRESS_KIND) :: disp(cnt)
21 |   integer(kind=MPI_ADDRESS_KIND) :: lb, extent
22 | 
23 |   real(REAL64) :: t1,t2
24 | 
25 |   call mpi_init(ierror)
26 |   call mpi_comm_rank(MPI_COMM_WORLD, myid, ierror)
27 |   call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierror)
28 | 
29 |   ! insert some data for the particle struct
30 |   if(myid == 0) then
31 |     do i = 1, n
32 |       call random_number(particles(i)%coords)
33 |       particles(i)%charge = 54
34 |       particles(i)%label = 'Xe'
35 |     end do
36 |   end if
37 | 
38 |   ! TODO: define the datatype for type particle
39 | 
40 |   ! TODO: Check extent.
41 |   ! (Not really neccessary on most systems.)
42 | 
43 |   ! communicate using the created particletype
44 |   t1 = mpi_wtime()
45 |   if(myid == 0) then
46 |      do i = 1, reps  ! multiple sends for better timing
47 |        ! TODO: send
48 |      end do
49 |   else if(myid == 1) then
50 |      do i = 1, reps
51 |        ! TODO: receive
52 |      end do
53 |   end if
54 |   t2=mpi_wtime()
55 | 
56 |   ! TODOs end
57 | 
58 |   write(*,*) "Time: ", myid, (t2-t1) / reps
59 |   write(*,*) "Check:", myid, particles(n)%label, particles(n)%coords(1), &
60 |                        particles(n)%coords(2), particles(n)%coords(3)
61 | 
62 |   call mpi_type_free(particle_mpi_type, ierror)
63 |   call mpi_finalize(ierror)
64 | 
65 | end program datatype_struct
66 | 


--------------------------------------------------------------------------------
/mpi/struct-datatype/struct_type.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <mpi.h>
 5 | 
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |   int n=1000, cnt=3, reps=10000;
10 | 
11 |   typedef struct {
12 |     float coords[3];
13 |     int charge;
14 |     char label[2];
15 |   } particle;
16 | 
17 |   particle particles[n];
18 | 
19 |   int i, j, myid, ntasks;
20 |   double t1, t2;
21 | 
22 |   MPI_Init(&argc, &argv);
23 |   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
24 | 
25 |   /* fill in some values for the particles */
26 |   if (myid == 0) {
27 |     for (i=0; i < n; i++) {
28 |       for (j=0; j < 3; j++)
29 |         particles[i].coords[j] = (float)rand()/(float)RAND_MAX*10.0;
30 |       particles[i].charge = 54;
31 |       strcpy(particles[i].label, "H");
32 |     }
33 |   }
34 | 
35 |   // TODO: define data type for the struct
36 |   MPI_Datatype particletype, temptype, types[cnt];
37 |   MPI_Aint disp[cnt], dist[2], lb, extent;
38 |   int blocklen[cnt];
39 | 
40 |   // TODO: check extent (not really necessary on most platforms) 
41 | 
42 |   // communicate using the created particletype
43 |   t1 = MPI_Wtime();
44 |   if (myid == 0) {
45 |     for (i=0; i < reps; i++) // multiple sends for better timing
46 |       // TODO: send  
47 | 
48 |   } else if (myid == 1) {
49 |     for (i=0; i < reps; i++)
50 |       // TODO: receive
51 | 
52 |   }
53 | 
54 |   // TODOs end
55 | 
56 |   t2 = MPI_Wtime();
57 | 
58 |   printf("Time: %i, %e \n", myid, (t2-t1)/(double)reps);
59 |   printf("Check: %i: %s %f %f %f \n", myid, particles[n-1].label,
60 |           particles[n-1].coords[0], particles[n-1].coords[1],
61 |           particles[n-1].coords[2]);
62 | 
63 |   MPI_Finalize();
64 |   return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/parallel-io/hdf5/README.md:
--------------------------------------------------------------------------------
 1 | ## HDF5 example
 2 | 
 3 | Study and test the HDF5 examples ([hdf5.c](hdf5.c) or [hdf5.f90](hdf5.f90))
 4 | where the [Simple MPI-IO exercise](../mpi-io) has been re-written with HDF5
 5 | using collective parallel write.
 6 | 
 7 | On Puhti, you will need to load the module `hdf5/1.10.4-mpi` before you can
 8 | compile the code:
 9 | 
10 | ```
11 |  module load hdf5/1.10.4-mpi
12 | ```
13 | 
14 | Compile and run the program. You can use the `h5dump` command to check the
15 | values in a HDF5 file.
16 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/README.md:
--------------------------------------------------------------------------------
 1 | ## Heat equation: checkpoint + restart with MPI-IO
 2 | 
 3 | 1. Add a feature to the heat equation solver program that enables one to
 4 |    start the program from a given state (i.e. not from scratch every time).
 5 | 
 6 | 2. Add also a checkpointing feature that will dump the state of the simulation
 7 |    to disk periodically (e.g. after every few tens of iterations); in a form
 8 |    that can be read in afterwards.
 9 | 
10 | Combining the two features allows one to restart and continue an earlier
11 | calculation. So, when the program starts and a checkpoint file is present, it
12 | will replace the initial state with the one in the restart file.
13 | 
14 | Use MPI-IO to accomplish the I/O routines. Starting points are provided in
15 | [c/io.c](c/io.c) and [fortran/io.F90](fortran/io.F90).
16 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/Makefile:
--------------------------------------------------------------------------------
 1 | COMP=intel
 2 | 
 3 | COMMONDIR=../common
 4 | LIBPNGDIR=/appl/opt/libpng
 5 | 
 6 | ifeq ($(COMP),cray)
 7 | CC=cc
 8 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
 9 | LDFLAGS=-L$(LIBPNGDIR)/lib
10 | LIBS=-lpng -lz
11 | endif
12 | 
13 | ifeq ($(COMP),gnu)
14 | CC=mpicc
15 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR)
16 | LDFLAGS=-L$(LIBPNGDIR)/lib
17 | LIBS=-lpng -lz
18 | endif
19 | 
20 | ifeq ($(COMP),intel)
21 | CC=mpicc
22 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR)
23 | LDFLAGS=-L$(LIBPNGDIR)/lib
24 | LIBS=-lpng -lz
25 | endif
26 | 
27 | EXE=heat_mpi
28 | OBJS=core.o setup.o utilities.o io.o main.o
29 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
30 | 
31 | 
32 | all: $(EXE)
33 | 
34 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
35 | core.o: core.c heat.h
36 | utilities.o: utilities.c heat.h
37 | setup.o: setup.c heat.h
38 | io.o: io.c heat.h
39 | main.o: main.c heat.h
40 | 
41 | $(OBJS_PNG): C_COMPILER := $(CC)
42 | $(OBJS): C_COMPILER := $(CC)
43 | 
44 | $(EXE): $(OBJS) $(OBJS_PNG)
45 | 	$(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
46 | 
47 | %.o: %.c
48 | 	$(C_COMPILER) $(CCFLAGS) -c $< -o $@
49 | 
50 | .PHONY: clean
51 | clean:
52 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~
53 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/core.c:
--------------------------------------------------------------------------------
 1 | /* Main solver routines for heat equation solver */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | #include <assert.h>
 7 | #include <mpi.h>
 8 | 
 9 | #include "heat.h"
10 | 
11 | /* Exchange the boundary values */
12 | void exchange(field *temperature, parallel_data *parallel)
13 | {
14 | 
15 |     // Send to the up, receive from down
16 |     MPI_Sendrecv(temperature->data[1], temperature->ny + 2, MPI_DOUBLE,
17 |                  parallel->nup, 11,
18 |                  temperature->data[temperature->nx + 1],
19 |                  temperature->ny + 2, MPI_DOUBLE, parallel->ndown, 11,
20 |                  MPI_COMM_WORLD, MPI_STATUS_IGNORE);
21 |     // Send to the down, receive from up
22 |     MPI_Sendrecv(temperature->data[temperature->nx], temperature->ny + 2,
23 |                  MPI_DOUBLE, parallel->ndown, 12,
24 |                  temperature->data[0], temperature->ny + 2, MPI_DOUBLE,
25 |                  parallel->nup, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
26 | }
27 | 
28 | 
29 | /* Update the temperature values using five-point stencil */
30 | void evolve(field *curr, field *prev, double a, double dt)
31 | {
32 |     int i, j;
33 |     double dx2, dy2;
34 | 
35 |     /* Determine the temperature field at next time step
36 |      * As we have fixed boundary conditions, the outermost gridpoints
37 |      * are not updated. */
38 |     dx2 = prev->dx * prev->dx;
39 |     dy2 = prev->dy * prev->dy;
40 |     for (i = 1; i < curr->nx + 1; i++) {
41 |         for (j = 1; j < curr->ny + 1; j++) {
42 |             curr->data[i][j] = prev->data[i][j] + a * dt *
43 |                                ((prev->data[i + 1][j] -
44 |                                  2.0 * prev->data[i][j] +
45 |                                  prev->data[i - 1][j]) / dx2 +
46 |                                 (prev->data[i][j + 1] -
47 |                                  2.0 * prev->data[i][j] +
48 |                                  prev->data[i][j - 1]) / dy2);
49 |         }
50 |     }
51 | }
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/heat.h:
--------------------------------------------------------------------------------
 1 | #ifndef __HEAT_H__
 2 | #define __HEAT_H__
 3 | 
 4 | /* Datatype for temperature field */
 5 | typedef struct {
 6 |     /* nx and ny are the true dimensions of the field. The array data
 7 |      * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */
 8 |     int nx;                     /* Local dimensions of the field */
 9 |     int ny;
10 |     int nx_full;                /* Global dimensions of the field */
11 |     int ny_full;                /* Global dimensions of the field */
12 |     double dx;
13 |     double dy;
14 |     double **data;
15 | } field;
16 | 
17 | /* Datatype for basic parallelization information */
18 | typedef struct {
19 |     int size;                   /* Number of MPI tasks */
20 |     int rank;
21 |     int nup, ndown;      /* Ranks of neighbouring MPI tasks */
22 | } parallel_data;
23 | 
24 | 
25 | /* We use here fixed grid spacing */
26 | #define DX 0.01
27 | #define DY 0.01
28 | 
29 | /* file name for restart checkpoints*/
30 | #define CHECKPOINT "HEAT_RESTART.dat"
31 | 
32 | /* Function prototypes */
33 | double **malloc_2d(int nx, int ny);
34 | 
35 | void free_2d(double **array);
36 | 
37 | void set_field_dimensions(field *temperature, int nx, int ny,
38 |                           parallel_data *parallel);
39 | 
40 | void parallel_setup(parallel_data *parallel, int nx, int ny);
41 | 
42 | void parallel_set_dimensions(parallel_data *parallel, int nx, int ny);
43 | 
44 | void initialize(int argc, char *argv[], field *temperature1,
45 |                 field *temperature2, int *nsteps, parallel_data *parallel);
46 | 
47 | void generate_field(field *temperature, parallel_data *parallel);
48 | 
49 | void exchange(field *temperature, parallel_data *parallel);
50 | 
51 | void evolve(field *curr, field *prev, double a, double dt);
52 | 
53 | void write_field(field *temperature, int iter, parallel_data *parallel);
54 | 
55 | void read_field(field *temperature1, field *temperature2,
56 |                 char *filename, parallel_data *parallel);
57 | 
58 | void copy_field(field *temperature1, field *temperature2);
59 | 
60 | void swap_fields(field *temperature1, field *temperature2);
61 | 
62 | void allocate_field(field *temperature);
63 | 
64 | void finalize(field *temperature1, field *temperature2);
65 | 
66 | void write_restart(field *temperature, parallel_data *parallel, int iter);
67 | 
68 | void read_restart(field *temperature, parallel_data *parallel, int *iter);
69 | 
70 | #endif  /* __HEAT_H__ */
71 | 
72 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/main.c:
--------------------------------------------------------------------------------
 1 | /* Heat equation solver in 2D. */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | #include <time.h>
 7 | #include <mpi.h>
 8 | 
 9 | #include "heat.h"
10 | 
11 | 
12 | int main(int argc, char **argv)
13 | {
14 |     double a = 0.5;             //!< Diffusion constant
15 |     field current, previous;    //!< Current and previous temperature fields
16 | 
17 |     double dt;                  //!< Time step
18 |     int nsteps;                 //!< Number of time steps
19 | 
20 |     int image_interval = 500;    //!< Image output interval
21 |     int restart_interval = 200;  //!< Checkpoint output interval
22 | 
23 |     parallel_data parallelization; //!< Parallelization info
24 | 
25 |     int iter;                   //!< Iteration counter
26 | 
27 |     double dx2, dy2;            //!< delta x and y squared
28 | 
29 |     double start_clock;        //!< Time stamps
30 | 
31 |     MPI_Init(&argc, &argv);
32 | 
33 |     initialize(argc, argv, &current, &previous, &nsteps, &parallelization);
34 | 
35 |     /* Output the initial field */
36 |     write_field(&current, 0, &parallelization);
37 | 
38 |     /* Largest stable time step */
39 |     dx2 = current.dx * current.dx;
40 |     dy2 = current.dy * current.dy;
41 |     dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
42 | 
43 |     /* Get the start time stamp */
44 |     start_clock = MPI_Wtime();
45 | 
46 |     /* Time evolve */
47 |     for (iter = 1; iter <= nsteps; iter++) {
48 |         exchange(&previous, &parallelization);
49 |         evolve(&current, &previous, a, dt);
50 |         if (iter % image_interval == 0) {
51 |           write_field(&current, iter, &parallelization);
52 |         }
53 |         /* write a checkpoint now and then for easy restarting */
54 |         if (iter % restart_interval == 0) {
55 |             write_restart(&current, &parallelization, iter);
56 |         }
57 |         /* Swap current field so that it will be used
58 |             as previous for next iteration step */
59 |         swap_fields(&current, &previous);
60 |     }
61 | 
62 |     /* Determine the CPU time used for the iteration */
63 |     if (parallelization.rank == 0) {
64 |       printf("Iteration took %.3f seconds.\n", (MPI_Wtime() - start_clock));
65 |       printf("Reference value at 5,5: %f\n", previous.data[5][5]);
66 |     }
67 | 
68 |     finalize(&current, &previous);
69 |     MPI_Finalize();
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/solution/Makefile:
--------------------------------------------------------------------------------
 1 | COMP=intel
 2 | 
 3 | COMMONDIR=../../common
 4 | LIBPNGDIR=/appl/opt/libpng
 5 | 
 6 | ifeq ($(COMP),cray)
 7 | CC=cc
 8 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
 9 | LDFLAGS=-L$(LIBPNGDIR)/lib
10 | LIBS=-lpng -lz
11 | endif
12 | 
13 | ifeq ($(COMP),gnu)
14 | CC=mpicc
15 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR)
16 | LDFLAGS=-L$(LIBPNGDIR)/lib
17 | LIBS=-lpng -lz
18 | endif
19 | 
20 | ifeq ($(COMP),intel)
21 | CC=mpicc
22 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR)
23 | LDFLAGS=-L$(LIBPNGDIR)/lib
24 | LIBS=-lpng -lz
25 | endif
26 | 
27 | EXE=heat_mpi
28 | OBJS=core.o setup.o utilities.o io.o main.o
29 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
30 | 
31 | 
32 | all: $(EXE)
33 | 
34 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
35 | core.o: core.c heat.h
36 | utilities.o: utilities.c heat.h
37 | setup.o: setup.c heat.h
38 | io.o: io.c heat.h
39 | main.o: main.c heat.h
40 | 
41 | $(OBJS_PNG): C_COMPILER := $(CC)
42 | $(OBJS): C_COMPILER := $(CC)
43 | 
44 | $(EXE): $(OBJS) $(OBJS_PNG)
45 | 	$(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
46 | 
47 | %.o: %.c
48 | 	$(C_COMPILER) $(CCFLAGS) -c $< -o $@
49 | 
50 | .PHONY: clean
51 | clean:
52 | 	-/bin/rm -f $(EXE) a.out *.o *.png *~
53 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/solution/core.c:
--------------------------------------------------------------------------------
 1 | /* Main solver routines for heat equation solver */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | #include <assert.h>
 7 | #include <mpi.h>
 8 | 
 9 | #include "heat.h"
10 | 
11 | /* Exchange the boundary values */
12 | void exchange(field *temperature, parallel_data *parallel)
13 | {
14 | 
15 |     // Send to the up, receive from down
16 |     MPI_Sendrecv(temperature->data[1], temperature->ny + 2, MPI_DOUBLE,
17 |                  parallel->nup, 11,
18 |                  temperature->data[temperature->nx + 1],
19 |                  temperature->ny + 2, MPI_DOUBLE, parallel->ndown, 11,
20 |                  MPI_COMM_WORLD, MPI_STATUS_IGNORE);
21 |     // Send to the down, receive from up
22 |     MPI_Sendrecv(temperature->data[temperature->nx], temperature->ny + 2,
23 |                  MPI_DOUBLE, parallel->ndown, 12,
24 |                  temperature->data[0], temperature->ny + 2, MPI_DOUBLE,
25 |                  parallel->nup, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
26 | }
27 | 
28 | 
29 | /* Update the temperature values using five-point stencil */
30 | void evolve(field *curr, field *prev, double a, double dt)
31 | {
32 |     int i, j;
33 |     double dx2, dy2;
34 | 
35 |     /* Determine the temperature field at next time step
36 |      * As we have fixed boundary conditions, the outermost gridpoints
37 |      * are not updated. */
38 |     dx2 = prev->dx * prev->dx;
39 |     dy2 = prev->dy * prev->dy;
40 |     for (i = 1; i < curr->nx + 1; i++) {
41 |         for (j = 1; j < curr->ny + 1; j++) {
42 |             curr->data[i][j] = prev->data[i][j] + a * dt *
43 |                                ((prev->data[i + 1][j] -
44 |                                  2.0 * prev->data[i][j] +
45 |                                  prev->data[i - 1][j]) / dx2 +
46 |                                 (prev->data[i][j + 1] -
47 |                                  2.0 * prev->data[i][j] +
48 |                                  prev->data[i][j - 1]) / dy2);
49 |         }
50 |     }
51 | }
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/solution/heat.h:
--------------------------------------------------------------------------------
 1 | #ifndef __HEAT_H__
 2 | #define __HEAT_H__
 3 | 
 4 | /* Datatype for temperature field */
 5 | typedef struct {
 6 |     /* nx and ny are the true dimensions of the field. The array data
 7 |      * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */
 8 |     int nx;                     /* Local dimensions of the field */
 9 |     int ny;
10 |     int nx_full;                /* Global dimensions of the field */
11 |     int ny_full;                /* Global dimensions of the field */
12 |     double dx;
13 |     double dy;
14 |     double **data;
15 | } field;
16 | 
17 | /* Datatype for basic parallelization information */
18 | typedef struct {
19 |     int size;                   /* Number of MPI tasks */
20 |     int rank;
21 |     int nup, ndown;      /* Ranks of neighbouring MPI tasks */
22 | } parallel_data;
23 | 
24 | 
25 | /* We use here fixed grid spacing */
26 | #define DX 0.01
27 | #define DY 0.01
28 | 
29 | /* file name for restart checkpoints*/
30 | #define CHECKPOINT "HEAT_RESTART.dat"
31 | 
32 | /* Function prototypes */
33 | double **malloc_2d(int nx, int ny);
34 | 
35 | void free_2d(double **array);
36 | 
37 | void set_field_dimensions(field *temperature, int nx, int ny,
38 |                           parallel_data *parallel);
39 | 
40 | void parallel_setup(parallel_data *parallel, int nx, int ny);
41 | 
42 | void parallel_set_dimensions(parallel_data *parallel, int nx, int ny);
43 | 
44 | void initialize(int argc, char *argv[], field *temperature1,
45 |                 field *temperature2, int *nsteps, parallel_data *parallel);
46 | 
47 | void generate_field(field *temperature, parallel_data *parallel);
48 | 
49 | void exchange(field *temperature, parallel_data *parallel);
50 | 
51 | void evolve(field *curr, field *prev, double a, double dt);
52 | 
53 | void write_field(field *temperature, int iter, parallel_data *parallel);
54 | 
55 | void read_field(field *temperature1, field *temperature2,
56 |                 char *filename, parallel_data *parallel);
57 | 
58 | void copy_field(field *temperature1, field *temperature2);
59 | 
60 | void swap_fields(field *temperature1, field *temperature2);
61 | 
62 | void allocate_field(field *temperature);
63 | 
64 | void finalize(field *temperature1, field *temperature2);
65 | 
66 | void write_restart(field *temperature, parallel_data *parallel, int iter);
67 | 
68 | void read_restart(field *temperature, parallel_data *parallel, int *iter);
69 | 
70 | #endif  /* __HEAT_H__ */
71 | 
72 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/solution/main.c:
--------------------------------------------------------------------------------
 1 | /* Heat equation solver in 2D. */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | #include <time.h>
 7 | #include <mpi.h>
 8 | 
 9 | #include "heat.h"
10 | 
11 | 
12 | int main(int argc, char **argv)
13 | {
14 |     double a = 0.5;             //!< Diffusion constant
15 |     field current, previous;    //!< Current and previous temperature fields
16 | 
17 |     double dt;                  //!< Time step
18 |     int nsteps;                 //!< Number of time steps
19 | 
20 |     int image_interval = 500;    //!< Image output interval
21 |     int restart_interval = 200;  //!< Checkpoint output interval
22 | 
23 |     parallel_data parallelization; //!< Parallelization info
24 | 
25 |     int iter;                   //!< Iteration counter
26 | 
27 |     double dx2, dy2;            //!< delta x and y squared
28 | 
29 |     double start_clock;        //!< Time stamps
30 | 
31 |     MPI_Init(&argc, &argv);
32 | 
33 |     initialize(argc, argv, &current, &previous, &nsteps, &parallelization);
34 | 
35 |     /* Output the initial field */
36 |     write_field(&current, 0, &parallelization);
37 | 
38 |     /* Largest stable time step */
39 |     dx2 = current.dx * current.dx;
40 |     dy2 = current.dy * current.dy;
41 |     dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2));
42 | 
43 |     /* Get the start time stamp */
44 |     start_clock = MPI_Wtime();
45 | 
46 |     /* Time evolve */
47 |     for (iter = 1; iter <= nsteps; iter++) {
48 |         exchange(&previous, &parallelization);
49 |         evolve(&current, &previous, a, dt);
50 |         if (iter % image_interval == 0) {
51 |           write_field(&current, iter, &parallelization);
52 |         }
53 |         /* write a checkpoint now and then for easy restarting */
54 |         if (iter % restart_interval == 0) {
55 |             write_restart(&current, &parallelization, iter);
56 |         }
57 |         /* Swap current field so that it will be used
58 |             as previous for next iteration step */
59 |         swap_fields(&current, &previous);
60 |     }
61 | 
62 |     /* Determine the CPU time used for the iteration */
63 |     if (parallelization.rank == 0) {
64 |       printf("Iteration took %.3f seconds.\n", (MPI_Wtime() - start_clock));
65 |       printf("Reference value at 5,5: %f\n", previous.data[5][5]);
66 |     }
67 | 
68 |     finalize(&current, &previous);
69 |     MPI_Finalize();
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/solution/utilities.c:
--------------------------------------------------------------------------------
 1 | /* Utility functions for heat equation solver
 2 |  *   NOTE: This file does not need to be edited! */
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <assert.h>
 8 | 
 9 | #include "heat.h"
10 | 
11 | /* Utility routine for allocating a two dimensional array */
12 | double **malloc_2d(int nx, int ny)
13 | {
14 |     double **array;
15 |     int i;
16 | 
17 |     array = (double **) malloc(nx * sizeof(double *));
18 |     array[0] = (double *) malloc(nx * ny * sizeof(double));
19 | 
20 |     for (i = 1; i < nx; i++) {
21 |         array[i] = array[0] + i * ny;
22 |     }
23 | 
24 |     return array;
25 | }
26 | 
27 | /* Utility routine for deallocating a two dimensional array */
28 | void free_2d(double **array)
29 | {
30 |     free(array[0]);
31 |     free(array);
32 | }
33 | 
34 | 
35 | /* Copy data on temperature1 into temperature2 */
36 | void copy_field(field *temperature1, field *temperature2)
37 | {
38 |     assert(temperature1->nx == temperature2->nx);
39 |     assert(temperature1->ny == temperature2->ny);
40 |     memcpy(temperature2->data[0], temperature1->data[0],
41 |            (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double));
42 | }
43 | 
44 | /* Swap the data of fields temperature1 and temperature2 */
45 | void swap_fields(field *temperature1, field *temperature2)
46 | {
47 |     double **tmp;
48 |     tmp = temperature1->data;
49 |     temperature1->data = temperature2->data;
50 |     temperature2->data = tmp;
51 | }
52 | 
53 | /* Allocate memory for a temperature field and initialise it to zero */
54 | void allocate_field(field *temperature)
55 | {
56 |     // Allocate also ghost layers
57 |     temperature->data =
58 |         malloc_2d(temperature->nx + 2, temperature->ny + 2);
59 | 
60 |     // Initialize to zero
61 |     memset(temperature->data[0], 0.0,
62 |            (temperature->nx + 2) * (temperature->ny + 2) * sizeof(double));
63 | }
64 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/c/utilities.c:
--------------------------------------------------------------------------------
 1 | /* Utility functions for heat equation solver
 2 |  *   NOTE: This file does not need to be edited! */
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <assert.h>
 8 | 
 9 | #include "heat.h"
10 | 
11 | /* Utility routine for allocating a two dimensional array */
12 | double **malloc_2d(int nx, int ny)
13 | {
14 |     double **array;
15 |     int i;
16 | 
17 |     array = (double **) malloc(nx * sizeof(double *));
18 |     array[0] = (double *) malloc(nx * ny * sizeof(double));
19 | 
20 |     for (i = 1; i < nx; i++) {
21 |         array[i] = array[0] + i * ny;
22 |     }
23 | 
24 |     return array;
25 | }
26 | 
27 | /* Utility routine for deallocating a two dimensional array */
28 | void free_2d(double **array)
29 | {
30 |     free(array[0]);
31 |     free(array);
32 | }
33 | 
34 | 
35 | /* Copy data on temperature1 into temperature2 */
36 | void copy_field(field *temperature1, field *temperature2)
37 | {
38 |     assert(temperature1->nx == temperature2->nx);
39 |     assert(temperature1->ny == temperature2->ny);
40 |     memcpy(temperature2->data[0], temperature1->data[0],
41 |            (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double));
42 | }
43 | 
44 | /* Swap the data of fields temperature1 and temperature2 */
45 | void swap_fields(field *temperature1, field *temperature2)
46 | {
47 |     double **tmp;
48 |     tmp = temperature1->data;
49 |     temperature1->data = temperature2->data;
50 |     temperature2->data = tmp;
51 | }
52 | 
53 | /* Allocate memory for a temperature field and initialise it to zero */
54 | void allocate_field(field *temperature)
55 | {
56 |     // Allocate also ghost layers
57 |     temperature->data =
58 |         malloc_2d(temperature->nx + 2, temperature->ny + 2);
59 | 
60 |     // Initialize to zero
61 |     memset(temperature->data[0], 0.0,
62 |            (temperature->nx + 2) * (temperature->ny + 2) * sizeof(double));
63 | }
64 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/Makefile:
--------------------------------------------------------------------------------
 1 | COMP=intel
 2 | 
 3 | COMMONDIR=../common
 4 | LIBPNGDIR=/appl/opt/libpng
 5 | 
 6 | ifeq ($(COMP),cray)
 7 | FC=ftn
 8 | CC=cc
 9 | FCFLAGS=-O3
10 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
11 | LDFLAGS=-L$(LIBPNGDIR)/lib
12 | LIBS=-lpng -lz
13 | endif
14 | 
15 | ifeq ($(COMP),gnu)
16 | FC=mpif90
17 | CC=gcc
18 | FCFLAGS=-O3
19 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
20 | LDFLAGS=-L$(LIBPNGDIR)/lib
21 | LIBS=-lpng -lz
22 | endif
23 | 
24 | ifeq ($(COMP),intel)
25 | FC=mpif90
26 | CC=icc
27 | FCFLAGS=-O3
28 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
29 | LDFLAGS=-L$(LIBPNGDIR)/lib
30 | LIBS=-lpng -lz
31 | endif
32 | 
33 | EXE=heat_mpi
34 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
35 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
36 | 
37 | 
38 | all: $(EXE)
39 | 
40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
41 | core.o: core.F90 heat_mod.o
42 | utilities.o: utilities.F90 heat_mod.o
43 | io.o: io.F90 heat_mod.o pngwriter_mod.o
44 | setup.o: setup.F90 heat_mod.o utilities.o io.o
45 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
46 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
47 | 
48 | $(EXE): $(OBJS) $(OBJS_PNG)
49 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
50 | 
51 | %.o: %.F90
52 | 	$(FC) $(FCFLAGS) -c $< -o $@
53 | 
54 | %.o: %.c
55 | 	$(CC) $(CCFLAGS) -c $< -o $@
56 | 
57 | .PHONY: clean
58 | clean:
59 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
60 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/core.F90:
--------------------------------------------------------------------------------
 1 | ! Main solver routines for heat equation solver
 2 | module core
 3 |     use heat
 4 | 
 5 | contains
 6 | 
 7 |   ! Exchange the boundary data between MPI tasks
 8 |   subroutine exchange(field0, parallel)
 9 |     use mpi
10 | 
11 |     implicit none
12 | 
13 |     type(field), intent(inout) :: field0
14 |     type(parallel_data), intent(in) :: parallel
15 | 
16 |     integer :: ierr
17 | 
18 |     ! Send to left, receive from right
19 |     call mpi_sendrecv(field0%data(:, 1), field0%nx + 2, MPI_DOUBLE_PRECISION, &
20 |          & parallel%nleft, 11, &
21 |          & field0%data(:, field0%ny + 1), field0%nx + 2, MPI_DOUBLE_PRECISION, &
22 |          & parallel%nright, 11, &
23 |          & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
24 | 
25 |     ! Send to right, receive from left
26 |     call mpi_sendrecv(field0%data(:, field0%ny), field0%nx + 2, MPI_DOUBLE_PRECISION, &
27 |          & parallel%nright, 12, &
28 |          & field0%data(:, 0), field0%nx + 2, MPI_DOUBLE_PRECISION,&
29 |          & parallel%nleft, 12, &
30 |          & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
31 | 
32 |   end subroutine exchange
33 | 
34 |   ! Compute one time step of temperature evolution
35 |   ! Arguments:
36 |   !   curr (type(field)): current temperature values
37 |   !   prev (type(field)): values from previous time step
38 |   !   a (real(dp)): update equation constant
39 |   !   dt (real(dp)): time step value
40 |   subroutine evolve(curr, prev, a, dt)
41 | 
42 |     implicit none
43 | 
44 |     type(field), intent(inout) :: curr, prev
45 |     real(dp) :: a, dt
46 |     integer :: i, j, nx, ny
47 | 
48 |     nx = curr%nx
49 |     ny = curr%ny
50 | 
51 |     do j = 1, ny
52 |        do i = 1, nx
53 |           curr%data(i, j) = prev%data(i, j) + a * dt * &
54 |                & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
55 |                &   prev%data(i+1, j)) / curr%dx**2 + &
56 |                &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
57 |                &   prev%data(i, j+1)) / curr%dy**2)
58 |        end do
59 |     end do
60 |   end subroutine evolve
61 | 
62 | end module core
63 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Field metadata for heat equation solver
 2 | module heat
 3 |   use iso_fortran_env, only : REAL64
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: dp = REAL64
 7 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
 8 | 
 9 |   type :: field
10 |      integer :: nx          ! local dimension of the field
11 |      integer :: ny
12 |      integer :: nx_full     ! global dimension of the field
13 |      integer :: ny_full
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 |   type :: parallel_data
20 |      integer :: size
21 |      integer :: rank
22 |      integer :: nleft, nright  ! Ranks of neighbouring MPI tasks
23 |   end type parallel_data
24 | 
25 | contains
26 |   ! Initialize the field type metadata
27 |   ! Arguments:
28 |   !   field0 (type(field)): input field
29 |   !   nx, ny, dx, dy: field dimensions and spatial step size
30 |   subroutine set_field_dimensions(field0, nx, ny, parallel)
31 |     implicit none
32 | 
33 |     type(field), intent(out) :: field0
34 |     integer, intent(in) :: nx, ny
35 |     type(parallel_data), intent(in) :: parallel
36 | 
37 |     integer :: nx_local, ny_local
38 | 
39 |     nx_local = nx
40 |     ny_local = ny / parallel%size
41 | 
42 |     field0%dx = DX
43 |     field0%dy = DY
44 |     field0%nx = nx_local
45 |     field0%ny = ny_local
46 |     field0%nx_full = nx
47 |     field0%ny_full = ny
48 | 
49 |   end subroutine set_field_dimensions
50 | 
51 |   subroutine parallel_setup(parallel, nx, ny)
52 |     use mpi
53 |     
54 |     implicit none
55 |     
56 |     type(parallel_data), intent(out) :: parallel
57 |     integer, intent(in), optional :: nx, ny
58 | 
59 |     integer :: ny_local
60 |     integer :: ierr
61 | 
62 |     call mpi_comm_size(MPI_COMM_WORLD, parallel%size, ierr)
63 |     
64 |     if (present(ny)) then
65 |        ny_local = ny / parallel%size
66 |        if (ny_local * parallel%size /= ny) then
67 |           write(*,*) 'Cannot divide grid evenly to processors'
68 |           call mpi_abort(MPI_COMM_WORLD, -2, ierr)
69 |        end if
70 |     end if
71 |     
72 |     call mpi_comm_rank(MPI_COMM_WORLD, parallel%rank, ierr)
73 |     
74 |     parallel%nleft = parallel%rank - 1
75 |     parallel%nright = parallel%rank + 1
76 |     
77 |     if (parallel%nleft < 0) then
78 |        parallel%nleft = MPI_PROC_NULL
79 |     end if
80 |     if (parallel%nright > parallel%size - 1) then
81 |        parallel%nright = MPI_PROC_NULL
82 |     end if
83 |     
84 |   end subroutine parallel_setup
85 | 
86 | end module heat
87 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/main.F90:
--------------------------------------------------------------------------------
 1 | ! Heat equation solver in 2D.
 2 | 
 3 | program heat_solve
 4 |   use heat
 5 |   use core
 6 |   use io
 7 |   use setup
 8 |   use utilities
 9 |   use mpi
10 | 
11 |   implicit none
12 | 
13 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
14 |   type(field) :: current, previous    ! Current and previus temperature fields
15 | 
16 |   real(dp) :: dt     ! Time step
17 |   integer :: nsteps       ! Number of time steps
18 |   integer, parameter :: image_interval = 500 ! Image output interval
19 |   integer, parameter :: checkpoint_interval = 200 ! restart interval 
20 | 
21 |   type(parallel_data) :: parallelization
22 |   integer :: ierr
23 | 
24 |   integer :: iter
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call mpi_init(ierr)
29 | 
30 |   call initialize(current, previous, nsteps, parallelization)
31 | 
32 |   ! Draw the picture of the initial state
33 |   call write_field(current, 0, parallelization)
34 | 
35 |   ! Largest stable time step
36 |   dt = current%dx**2 * current%dy**2 / &
37 |        & (2.0 * a * (current%dx**2 + current%dy**2))
38 | 
39 |   ! Main iteration loop, save a picture every
40 |   ! image_interval steps
41 | 
42 |   start =  mpi_wtime()
43 |   
44 |   do iter = 1, nsteps
45 |      call exchange(previous, parallelization)
46 |      call evolve(current, previous, a, dt)
47 |      if (mod(iter, image_interval) == 0) then
48 |         call write_field(current, iter, parallelization)
49 |      end if
50 |      if (mod(iter, checkpoint_interval) == 0) then
51 |          call write_restart(current, parallelization, iter)
52 |      end if
53 |      call swap_fields(current, previous)
54 |   end do
55 | 
56 |   stop = mpi_wtime()
57 | 
58 |   if (parallelization % rank == 0) then
59 |      write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
60 |      write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5)
61 |   end if
62 |   
63 |   call finalize(current, previous)
64 | 
65 |   call mpi_finalize(ierr)
66 | 
67 | end program heat_solve
68 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! PNG writer for heat equation solver
 2 | module pngwriter
 3 |   use heat
 4 | 
 5 | contains
 6 | 
 7 |   function save_png(data, nx, ny, fname) result(stat)
 8 | 
 9 |     use, intrinsic :: ISO_C_BINDING
10 |     implicit none
11 | 
12 |     real(dp), dimension(:,:), intent(in) :: data
13 |     integer, intent(in) :: nx, ny
14 |     character(len=*), intent(in) :: fname
15 |     integer :: stat
16 | 
17 |     ! Interface for save_png C-function
18 |     interface
19 |        ! The C-function definition is
20 |        !   int save_png(double *data, const int nx, const int ny,
21 |        !                const char *fname)
22 |        function save_png_c(data, nx, ny, fname, order) &
23 |             & bind(C,name="save_png") result(stat)
24 |          use, intrinsic :: ISO_C_BINDING
25 |          implicit none
26 |          real(kind=C_DOUBLE) :: data(*)
27 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
28 |          character(kind=C_CHAR), intent(IN) :: fname(*)
29 |          character(kind=C_CHAR), value, intent(IN) :: order
30 |          integer(kind=C_INT) :: stat
31 |        end function save_png_c
32 |     end interface
33 | 
34 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
35 |     if (stat /= 0) then
36 |        write(*,*) 'save_png returned error!'
37 |     end if
38 | 
39 |   end function save_png
40 | 
41 | end module pngwriter
42 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/solution/Makefile:
--------------------------------------------------------------------------------
 1 | COMP=intel
 2 | 
 3 | COMMONDIR=../../common
 4 | LIBPNGDIR=/appl/opt/libpng
 5 | 
 6 | ifeq ($(COMP),cray)
 7 | FC=ftn
 8 | CC=cc
 9 | FCFLAGS=-O3
10 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
11 | LDFLAGS=-L$(LIBPNGDIR)/lib
12 | LIBS=-lpng -lz
13 | endif
14 | 
15 | ifeq ($(COMP),gnu)
16 | FC=mpif90
17 | CC=gcc
18 | FCFLAGS=-O3
19 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
20 | LDFLAGS=-L$(LIBPNGDIR)/lib
21 | LIBS=-lpng -lz
22 | endif
23 | 
24 | ifeq ($(COMP),intel)
25 | FC=mpif90
26 | CC=icc
27 | FCFLAGS=-O3
28 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR)
29 | LDFLAGS=-L$(LIBPNGDIR)/lib
30 | LIBS=-lpng -lz
31 | endif
32 | 
33 | EXE=heat_mpi
34 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o
35 | OBJS_PNG=$(COMMONDIR)/pngwriter.o
36 | 
37 | 
38 | all: $(EXE)
39 | 
40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h
41 | core.o: core.F90 heat_mod.o
42 | utilities.o: utilities.F90 heat_mod.o
43 | io.o: io.F90 heat_mod.o pngwriter_mod.o
44 | setup.o: setup.F90 heat_mod.o utilities.o io.o
45 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o
46 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o
47 | 
48 | $(EXE): $(OBJS) $(OBJS_PNG)
49 | 	$(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS)
50 | 
51 | %.o: %.F90
52 | 	$(FC) $(FCFLAGS) -c $< -o $@
53 | 
54 | %.o: %.c
55 | 	$(CC) $(CCFLAGS) -c $< -o $@
56 | 
57 | .PHONY: clean
58 | clean:
59 | 	-/bin/rm -f $(EXE) a.out *.o *.mod *.png *~
60 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/solution/core.F90:
--------------------------------------------------------------------------------
 1 | ! Main solver routines for heat equation solver
 2 | module core
 3 |     use heat
 4 | 
 5 | contains
 6 | 
 7 |   ! Exchange the boundary data between MPI tasks
 8 |   subroutine exchange(field0, parallel)
 9 |     use mpi
10 | 
11 |     implicit none
12 | 
13 |     type(field), intent(inout) :: field0
14 |     type(parallel_data), intent(in) :: parallel
15 | 
16 |     integer :: ierr
17 | 
18 |     ! Send to left, receive from right
19 |     call mpi_sendrecv(field0%data(:, 1), field0%nx + 2, MPI_DOUBLE_PRECISION, &
20 |          & parallel%nleft, 11, &
21 |          & field0%data(:, field0%ny + 1), field0%nx + 2, MPI_DOUBLE_PRECISION, &
22 |          & parallel%nright, 11, &
23 |          & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
24 | 
25 |     ! Send to right, receive from left
26 |     call mpi_sendrecv(field0%data(:, field0%ny), field0%nx + 2, MPI_DOUBLE_PRECISION, &
27 |          & parallel%nright, 12, &
28 |          & field0%data(:, 0), field0%nx + 2, MPI_DOUBLE_PRECISION,&
29 |          & parallel%nleft, 12, &
30 |          & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
31 | 
32 |   end subroutine exchange
33 | 
34 |   ! Compute one time step of temperature evolution
35 |   ! Arguments:
36 |   !   curr (type(field)): current temperature values
37 |   !   prev (type(field)): values from previous time step
38 |   !   a (real(dp)): update equation constant
39 |   !   dt (real(dp)): time step value
40 |   subroutine evolve(curr, prev, a, dt)
41 | 
42 |     implicit none
43 | 
44 |     type(field), intent(inout) :: curr, prev
45 |     real(dp) :: a, dt
46 |     integer :: i, j, nx, ny
47 | 
48 |     nx = curr%nx
49 |     ny = curr%ny
50 | 
51 |     do j = 1, ny
52 |        do i = 1, nx
53 |           curr%data(i, j) = prev%data(i, j) + a * dt * &
54 |                & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + &
55 |                &   prev%data(i+1, j)) / curr%dx**2 + &
56 |                &  (prev%data(i, j-1) - 2.0 * prev%data(i, j) + &
57 |                &   prev%data(i, j+1)) / curr%dy**2)
58 |        end do
59 |     end do
60 |   end subroutine evolve
61 | 
62 | end module core
63 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/solution/heat_mod.F90:
--------------------------------------------------------------------------------
 1 | ! Field metadata for heat equation solver
 2 | module heat
 3 |   use iso_fortran_env, only : REAL64
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: dp = REAL64
 7 |   real(dp), parameter :: DX = 0.01, DY = 0.01  ! Fixed grid spacing
 8 | 
 9 |   type :: field
10 |      integer :: nx          ! local dimension of the field
11 |      integer :: ny
12 |      integer :: nx_full     ! global dimension of the field
13 |      integer :: ny_full
14 |      real(dp) :: dx
15 |      real(dp) :: dy
16 |      real(dp), dimension(:,:), allocatable :: data
17 |   end type field
18 | 
19 |   type :: parallel_data
20 |      integer :: size
21 |      integer :: rank
22 |      integer :: nleft, nright  ! Ranks of neighbouring MPI tasks
23 |   end type parallel_data
24 | 
25 | contains
26 |   ! Initialize the field type metadata
27 |   ! Arguments:
28 |   !   field0 (type(field)): input field
29 |   !   nx, ny, dx, dy: field dimensions and spatial step size
30 |   subroutine set_field_dimensions(field0, nx, ny, parallel)
31 |     implicit none
32 | 
33 |     type(field), intent(out) :: field0
34 |     integer, intent(in) :: nx, ny
35 |     type(parallel_data), intent(in) :: parallel
36 | 
37 |     integer :: nx_local, ny_local
38 | 
39 |     nx_local = nx
40 |     ny_local = ny / parallel%size
41 | 
42 |     field0%dx = DX
43 |     field0%dy = DY
44 |     field0%nx = nx_local
45 |     field0%ny = ny_local
46 |     field0%nx_full = nx
47 |     field0%ny_full = ny
48 | 
49 |   end subroutine set_field_dimensions
50 | 
51 |   subroutine parallel_setup(parallel, nx, ny)
52 |     use mpi
53 |     
54 |     implicit none
55 |     
56 |     type(parallel_data), intent(out) :: parallel
57 |     integer, intent(in), optional :: nx, ny
58 | 
59 |     integer :: ny_local
60 |     integer :: ierr
61 | 
62 |     call mpi_comm_size(MPI_COMM_WORLD, parallel%size, ierr)
63 |     
64 |     if (present(ny)) then
65 |        ny_local = ny / parallel%size
66 |        if (ny_local * parallel%size /= ny) then
67 |           write(*,*) 'Cannot divide grid evenly to processors'
68 |           call mpi_abort(MPI_COMM_WORLD, -2, ierr)
69 |        end if
70 |     end if
71 |     
72 |     call mpi_comm_rank(MPI_COMM_WORLD, parallel%rank, ierr)
73 |     
74 |     parallel%nleft = parallel%rank - 1
75 |     parallel%nright = parallel%rank + 1
76 |     
77 |     if (parallel%nleft < 0) then
78 |        parallel%nleft = MPI_PROC_NULL
79 |     end if
80 |     if (parallel%nright > parallel%size - 1) then
81 |        parallel%nright = MPI_PROC_NULL
82 |     end if
83 |     
84 |   end subroutine parallel_setup
85 | 
86 | end module heat
87 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/solution/main.F90:
--------------------------------------------------------------------------------
 1 | ! Heat equation solver in 2D.
 2 | 
 3 | program heat_solve
 4 |   use heat
 5 |   use core
 6 |   use io
 7 |   use setup
 8 |   use utilities
 9 |   use mpi
10 | 
11 |   implicit none
12 | 
13 |   real(dp), parameter :: a = 0.5 ! Diffusion constant
14 |   type(field) :: current, previous    ! Current and previus temperature fields
15 | 
16 |   real(dp) :: dt     ! Time step
17 |   integer :: nsteps       ! Number of time steps
18 |   integer, parameter :: image_interval = 500 ! Image output interval
19 |   integer, parameter :: checkpoint_interval = 200 ! restart interval 
20 | 
21 |   type(parallel_data) :: parallelization
22 |   integer :: ierr
23 | 
24 |   integer :: iter
25 | 
26 |   real(kind=dp) :: start, stop ! Timers
27 | 
28 |   call mpi_init(ierr)
29 | 
30 |   call initialize(current, previous, nsteps, parallelization)
31 | 
32 |   ! Draw the picture of the initial state
33 |   call write_field(current, 0, parallelization)
34 | 
35 |   ! Largest stable time step
36 |   dt = current%dx**2 * current%dy**2 / &
37 |        & (2.0 * a * (current%dx**2 + current%dy**2))
38 | 
39 |   ! Main iteration loop, save a picture every
40 |   ! image_interval steps
41 | 
42 |   start =  mpi_wtime()
43 |   
44 |   do iter = 1, nsteps
45 |      call exchange(previous, parallelization)
46 |      call evolve(current, previous, a, dt)
47 |      if (mod(iter, image_interval) == 0) then
48 |         call write_field(current, iter, parallelization)
49 |      end if
50 |      if (mod(iter, checkpoint_interval) == 0) then
51 |          call write_restart(current, parallelization, iter)
52 |      end if
53 |      call swap_fields(current, previous)
54 |   end do
55 | 
56 |   stop = mpi_wtime()
57 | 
58 |   if (parallelization % rank == 0) then
59 |      write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.'
60 |      write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5)
61 |   end if
62 |   
63 |   call finalize(current, previous)
64 | 
65 |   call mpi_finalize(ierr)
66 | 
67 | end program heat_solve
68 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/solution/pngwriter_mod.F90:
--------------------------------------------------------------------------------
 1 | ! PNG writer for heat equation solver
 2 | module pngwriter
 3 |   use heat
 4 | 
 5 | contains
 6 | 
 7 |   function save_png(data, nx, ny, fname) result(stat)
 8 | 
 9 |     use, intrinsic :: ISO_C_BINDING
10 |     implicit none
11 | 
12 |     real(dp), dimension(:,:), intent(in) :: data
13 |     integer, intent(in) :: nx, ny
14 |     character(len=*), intent(in) :: fname
15 |     integer :: stat
16 | 
17 |     ! Interface for save_png C-function
18 |     interface
19 |        ! The C-function definition is
20 |        !   int save_png(double *data, const int nx, const int ny,
21 |        !                const char *fname)
22 |        function save_png_c(data, nx, ny, fname, order) &
23 |             & bind(C,name="save_png") result(stat)
24 |          use, intrinsic :: ISO_C_BINDING
25 |          implicit none
26 |          real(kind=C_DOUBLE) :: data(*)
27 |          integer(kind=C_INT), value, intent(IN) :: nx, ny
28 |          character(kind=C_CHAR), intent(IN) :: fname(*)
29 |          character(kind=C_CHAR), value, intent(IN) :: order
30 |          integer(kind=C_INT) :: stat
31 |        end function save_png_c
32 |     end interface
33 | 
34 |     stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f')
35 |     if (stat /= 0) then
36 |        write(*,*) 'save_png returned error!'
37 |     end if
38 | 
39 |   end function save_png
40 | 
41 | end module pngwriter
42 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/solution/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Utility routines for heat equation solver
 2 | !   NOTE: This file does not need to be edited!
 3 | module utilities
 4 |     use heat
 5 | 
 6 | contains
 7 | 
 8 |   ! Swap the data fields of two variables of type field
 9 |   ! Arguments:
10 |   !   curr, prev (type(field)): the two variables that are swapped
11 |   subroutine swap_fields(curr, prev)
12 | 
13 |     implicit none
14 | 
15 |     type(field), intent(inout) :: curr, prev
16 |     real(dp), allocatable, dimension(:,:) :: tmp
17 | 
18 |     call move_alloc(curr%data, tmp)
19 |     call move_alloc(prev%data, curr%data)
20 |     call move_alloc(tmp, prev%data)
21 |   end subroutine swap_fields
22 | 
23 |   ! Copy the data from one field to another
24 |   ! Arguments:
25 |   !   from_field (type(field)): variable to copy from
26 |   !   to_field (type(field)): variable to copy to
27 |   subroutine copy_fields(from_field, to_field)
28 | 
29 |     implicit none
30 | 
31 |     type(field), intent(in) :: from_field
32 |     type(field), intent(out) :: to_field
33 | 
34 |     ! Consistency checks
35 |     if (.not.allocated(from_field%data)) then
36 |        write (*,*) "Can not copy from a field without allocated data"
37 |        stop
38 |     end if
39 |     if (.not.allocated(to_field%data)) then
40 |        ! Target is not initialize, allocate memory
41 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
42 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
43 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
44 |        write (*,*) "Wrong field data sizes in copy routine"
45 |        print *, shape(from_field%data), shape(to_field%data)
46 |        stop
47 |     end if
48 | 
49 |     to_field%data = from_field%data
50 | 
51 |     to_field%nx = from_field%nx
52 |     to_field%ny = from_field%ny
53 |     to_field%dx = from_field%dx
54 |     to_field%dy = from_field%dy
55 |   end subroutine copy_fields
56 | 
57 | end module utilities
58 | 


--------------------------------------------------------------------------------
/parallel-io/heat-restart/fortran/utilities.F90:
--------------------------------------------------------------------------------
 1 | ! Utility routines for heat equation solver
 2 | !   NOTE: This file does not need to be edited!
 3 | module utilities
 4 |     use heat
 5 | 
 6 | contains
 7 | 
 8 |   ! Swap the data fields of two variables of type field
 9 |   ! Arguments:
10 |   !   curr, prev (type(field)): the two variables that are swapped
11 |   subroutine swap_fields(curr, prev)
12 | 
13 |     implicit none
14 | 
15 |     type(field), intent(inout) :: curr, prev
16 |     real(dp), allocatable, dimension(:,:) :: tmp
17 | 
18 |     call move_alloc(curr%data, tmp)
19 |     call move_alloc(prev%data, curr%data)
20 |     call move_alloc(tmp, prev%data)
21 |   end subroutine swap_fields
22 | 
23 |   ! Copy the data from one field to another
24 |   ! Arguments:
25 |   !   from_field (type(field)): variable to copy from
26 |   !   to_field (type(field)): variable to copy to
27 |   subroutine copy_fields(from_field, to_field)
28 | 
29 |     implicit none
30 | 
31 |     type(field), intent(in) :: from_field
32 |     type(field), intent(out) :: to_field
33 | 
34 |     ! Consistency checks
35 |     if (.not.allocated(from_field%data)) then
36 |        write (*,*) "Can not copy from a field without allocated data"
37 |        stop
38 |     end if
39 |     if (.not.allocated(to_field%data)) then
40 |        ! Target is not initialize, allocate memory
41 |        allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), &
42 |             & lbound(from_field%data, 2):ubound(from_field%data, 2)))
43 |     else if (any(shape(from_field%data) /= shape(to_field%data))) then
44 |        write (*,*) "Wrong field data sizes in copy routine"
45 |        print *, shape(from_field%data), shape(to_field%data)
46 |        stop
47 |     end if
48 | 
49 |     to_field%data = from_field%data
50 | 
51 |     to_field%nx = from_field%nx
52 |     to_field%ny = from_field%ny
53 |     to_field%dx = from_field%dx
54 |     to_field%dy = from_field%dy
55 |   end subroutine copy_fields
56 | 
57 | end module utilities
58 | 


--------------------------------------------------------------------------------
/parallel-io/mpi-io/README.md:
--------------------------------------------------------------------------------
1 | ## Simple MPI-IO
2 | 
3 | Implement parallel I/O in a test code using MPI-IO.
4 | 
5 | Write data from all MPI tasks to a single output file using MPI-IO. Each task
6 | should write their own local part of the data directly to the correct position
7 | in the file. Skeleton code to start from is available in `mpi-io.c` (or
8 | `mpi-io.f90`).
9 | 


--------------------------------------------------------------------------------
/parallel-io/mpi-io/mpi-io.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <errno.h>
 5 | #include <mpi.h>
 6 | 
 7 | #define DATASIZE   64
 8 | #define WRITER_ID   0
 9 | 
10 | void mpiio_writer(int, int *, int);
11 | 
12 | 
13 | int main(int argc, char *argv[])
14 | {
15 |     int my_id, ntasks, i, localsize;
16 |     int *localvector;
17 | 
18 |     MPI_Init(&argc, &argv);
19 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
20 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
21 | 
22 |     if (ntasks > 64) {
23 |         fprintf(stderr, "Datasize (64) should be divisible by number "
24 |                 "of tasks.\n");
25 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
26 |     }
27 | 
28 |     if (DATASIZE % ntasks != 0) {
29 |         fprintf(stderr, "Datasize (64) should be divisible by number "
30 |                 "of tasks.\n");
31 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
32 |     }
33 | 
34 |     localsize = DATASIZE / ntasks;
35 |     localvector = (int *) malloc(localsize * sizeof(int));
36 | 
37 |     for (i = 0; i < localsize; i++) {
38 |         localvector[i] = i + 1 + localsize * my_id;
39 |     }
40 | 
41 |     mpiio_writer(my_id, localvector, localsize);
42 | 
43 |     free(localvector);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 
49 | void mpiio_writer(int my_id, int *localvector, int localsize)
50 | {
51 |     MPI_File fh;
52 |     MPI_Offset offset;
53 | 
54 |     /* TODO: Write the data to an output file "output.dat" using MPI IO. Each
55 |              process should write their own local vectors to correct location
56 |              of the output file. */
57 | }
58 | 


--------------------------------------------------------------------------------
/parallel-io/mpi-io/mpi-io.f90:
--------------------------------------------------------------------------------
 1 | program mpiio
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 | 
10 |   call mpi_init(rc)
11 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
12 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
13 | 
14 |   if (ntasks > 64) then
15 |     write(error_unit, *) 'Maximum number of tasks is 64!'
16 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
17 |   end if
18 | 
19 |   if (mod(datasize, ntasks) /= 0) then
20 |     write(error_unit,*) 'Datasize (64) should be divisible by number of tasks'
21 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
22 |   end if
23 | 
24 |   localsize = datasize / ntasks
25 |   allocate(localvector(localsize))
26 | 
27 |   localvector = [(i + my_id * localsize, i=1,localsize)]
28 | 
29 |   call mpiio_writer()
30 | 
31 |   deallocate(localvector)
32 |   call mpi_finalize(rc)
33 | 
34 | contains
35 | 
36 |   subroutine mpiio_writer()
37 |     implicit none
38 |     integer :: fh, rc, dsize
39 |     integer(kind=MPI_OFFSET_KIND) :: offset;
40 | 
41 |     call mpi_type_size(MPI_INTEGER, dsize, rc)
42 | 
43 |     ! TODO: write the output file "output.dat" using MPI IO. Each
44 |     !       rank should write their own local vectors to correct
45 |     !       locations in the output file.
46 | 
47 |   end subroutine mpiio_writer
48 | 
49 | end program mpiio
50 | 


--------------------------------------------------------------------------------
/parallel-io/mpi-io/solution/mpi-io.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <errno.h>
 5 | #include <mpi.h>
 6 | 
 7 | #define DATASIZE   64
 8 | #define WRITER_ID   0
 9 | 
10 | void mpiio_writer(int, int *, int);
11 | 
12 | 
13 | int main(int argc, char *argv[])
14 | {
15 |     int my_id, ntasks, i, localsize;
16 |     int *localvector;
17 | 
18 |     MPI_Init(&argc, &argv);
19 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
20 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
21 | 
22 |     if (ntasks > 64) {
23 |         fprintf(stderr, "Datasize (64) should be divisible by number "
24 |                 "of tasks.\n");
25 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
26 |     }
27 | 
28 |     if (DATASIZE % ntasks != 0) {
29 |         fprintf(stderr, "Datasize (64) should be divisible by number "
30 |                 "of tasks.\n");
31 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
32 |     }
33 | 
34 |     localsize = DATASIZE / ntasks;
35 |     localvector = (int *) malloc(localsize * sizeof(int));
36 | 
37 |     for (i = 0; i < localsize; i++) {
38 |         localvector[i] = i + 1 + localsize * my_id;
39 |     }
40 | 
41 |     mpiio_writer(my_id, localvector, localsize);
42 | 
43 |     free(localvector);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 
49 | void mpiio_writer(int my_id, int *localvector, int localsize)
50 | {
51 |     MPI_File fh;
52 |     MPI_Offset offset;
53 | 
54 |     MPI_File_open(MPI_COMM_WORLD, "output.dat",
55 |                   MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
56 | 
57 |     offset = my_id * localsize * sizeof(int);
58 | 
59 |     MPI_File_write_at_all(fh, offset, localvector,
60 |                           localsize, MPI_INT, MPI_STATUS_IGNORE);
61 | 
62 |     MPI_File_close(&fh);
63 | }
64 | 


--------------------------------------------------------------------------------
/parallel-io/mpi-io/solution/mpi-io.f90:
--------------------------------------------------------------------------------
 1 | program mpiio
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 | 
10 |   call mpi_init(rc)
11 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
12 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
13 | 
14 |   if (ntasks > 64) then
15 |     write(error_unit, *) 'Maximum number of tasks is 64!'
16 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
17 |   end if
18 | 
19 |   if (mod(datasize, ntasks) /= 0) then
20 |     write(error_unit,*) 'Datasize (64) should be divisible by number of tasks'
21 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
22 |   end if
23 | 
24 |   localsize = datasize / ntasks
25 |   allocate(localvector(localsize))
26 | 
27 |   localvector = [(i + my_id * localsize, i=1,localsize)]
28 | 
29 |   call mpiio_writer()
30 | 
31 |   deallocate(localvector)
32 |   call mpi_finalize(rc)
33 | 
34 | contains
35 | 
36 |   subroutine mpiio_writer()
37 |     implicit none
38 |     integer :: fh, rc, dsize
39 |     integer(kind=MPI_OFFSET_KIND) :: offset;
40 | 
41 |     call mpi_type_size(MPI_INTEGER, dsize, rc)
42 | 
43 |     offset = my_id * localsize * dsize
44 | 
45 |     call mpi_file_open(MPI_COMM_WORLD, 'output.dat', &
46 |          & MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, fh, rc)
47 |     call mpi_file_write_at_all(fh, offset, localvector, localsize, &
48 |          & MPI_INTEGER, MPI_STATUS_IGNORE, rc)
49 |     call mpi_file_close(fh, rc)
50 |   end subroutine mpiio_writer
51 | 
52 | end program mpiio
53 | 


--------------------------------------------------------------------------------
/parallel-io/posix/README.md:
--------------------------------------------------------------------------------
 1 | ## Parallel I/O with Posix
 2 | 
 3 | Implement parallel I/O in a test code using MPI and normal I/O calls.
 4 | 
 5 | 1. Write data from all MPI tasks to a single file using the spokesman
 6 |    strategy, i.e. gather data to a single MPI task and write it to a file.
 7 |    The data should be kept in the order of the MPI ranks. You may start from
 8 |    the skeleton code in `c/spokesman.c` (or `fortran/spokesman.F90`).
 9 | 
10 | 2. Verify the above write by reading the file using the same spokesman
11 |    strategy, but with a different number of MPI tasks than in writing.
12 |    Skeleton code to start from is available in `c/spokesman_reader.c` (or
13 |    `fortran/spokesman_reader.F90`).
14 | 
15 | 3. Re-implement the write code so that all the MPI tasks write into separate
16 |    files (aka "every man for himself" strategy).
17 | 


--------------------------------------------------------------------------------
/parallel-io/posix/c/solution/spokesman.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <errno.h>
 5 | #include <mpi.h>
 6 | 
 7 | #define DATASIZE   64
 8 | #define WRITER_ID   0
 9 | 
10 | void single_writer(int, int *, int);
11 | 
12 | 
13 | int main(int argc, char *argv[])
14 | {
15 |     int my_id, ntasks, i, localsize;
16 |     int *localvector;
17 | 
18 |     MPI_Init(&argc, &argv);
19 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
20 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
21 | 
22 |     if (ntasks > 64) {
23 |         fprintf(stderr, "Datasize (64) should be divisible by number "
24 |                 "of tasks.\n");
25 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
26 |     }
27 | 
28 |     if (DATASIZE % ntasks != 0) {
29 |         fprintf(stderr, "Datasize (64) should be divisible by number "
30 |                 "of tasks.\n");
31 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
32 |     }
33 | 
34 |     localsize = DATASIZE / ntasks;
35 |     localvector = (int *) malloc(localsize * sizeof(int));
36 | 
37 |     for (i = 0; i < localsize; i++) {
38 |         localvector[i] = i + 1 + localsize * my_id;
39 |     }
40 | 
41 |     single_writer(my_id, localvector, localsize);
42 | 
43 |     free(localvector);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 
49 | void single_writer(int my_id, int *localvector, int localsize)
50 | {
51 |     FILE *fp;
52 |     int *fullvector;
53 | 
54 |     fullvector = (int *) malloc(DATASIZE * sizeof(int));
55 | 
56 |     MPI_Gather(localvector, localsize, MPI_INT, fullvector, localsize,
57 |                MPI_INT, WRITER_ID, MPI_COMM_WORLD);
58 | 
59 |     if (my_id == WRITER_ID) {
60 |         if ((fp = fopen("singlewriter.dat", "wb")) == NULL) {
61 |             fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
62 |             MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
63 |         } else {
64 |             fwrite(fullvector, sizeof(int), DATASIZE, fp);
65 |             fclose(fp);
66 |             printf("Wrote %d elements to file singlewriter.dat\n", DATASIZE);
67 |         }
68 |     }
69 | 
70 |     free(fullvector);
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/parallel-io/posix/c/spokesman.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <errno.h>
 5 | #include <mpi.h>
 6 | 
 7 | #define DATASIZE   64
 8 | #define WRITER_ID   0
 9 | 
10 | void single_writer(int, int *, int);
11 | 
12 | 
13 | int main(int argc, char *argv[])
14 | {
15 |     int my_id, ntasks, i, localsize;
16 |     int *localvector;
17 | 
18 |     MPI_Init(&argc, &argv);
19 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
20 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
21 | 
22 |     if (ntasks > 64) {
23 |         fprintf(stderr, "Datasize (64) should be divisible by number "
24 |                 "of tasks.\n");
25 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
26 |     }
27 | 
28 |     if (DATASIZE % ntasks != 0) {
29 |         fprintf(stderr, "Datasize (64) should be divisible by number "
30 |                 "of tasks.\n");
31 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
32 |     }
33 | 
34 |     localsize = DATASIZE / ntasks;
35 |     localvector = (int *) malloc(localsize * sizeof(int));
36 | 
37 |     for (i = 0; i < localsize; i++) {
38 |         localvector[i] = i + 1 + localsize * my_id;
39 |     }
40 | 
41 |     single_writer(my_id, localvector, localsize);
42 | 
43 |     free(localvector);
44 | 
45 |     MPI_Finalize();
46 |     return 0;
47 | }
48 | 
49 | void single_writer(int my_id, int *localvector, int localsize)
50 | {
51 |     FILE *fp;
52 |     int *fullvector;
53 | 
54 |     /* TODO: Implement a function that will write the data to file so that
55 |        a single process does the file io. Use rank WRITER_ID as the io rank */
56 | 
57 |     free(fullvector);
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/parallel-io/posix/c/spokesman_reader.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <errno.h>
 5 | #include <mpi.h>
 6 | 
 7 | #define DATASIZE   64
 8 | #define WRITER_ID   0
 9 | 
10 | void single_reader(int, int *, int);
11 | void ordered_print(int, int, int *, int);
12 | 
13 | int main(int argc, char *argv[])
14 | {
15 |     int my_id, ntasks, localsize;
16 |     int *localvector;
17 | 
18 |     MPI_Init(&argc, &argv);
19 |     MPI_Comm_size(MPI_COMM_WORLD, &ntasks);
20 |     MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
21 | 
22 |     if (ntasks > 64) {
23 |         fprintf(stderr, "Datasize (64) should be divisible by number "
24 |                 "of tasks.\n");
25 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
26 |     }
27 | 
28 |     if (DATASIZE % ntasks != 0) {
29 |         fprintf(stderr, "Datasize (64) should be divisible by number "
30 |                 "of tasks.\n");
31 |         MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
32 |     }
33 | 
34 |     localsize = DATASIZE / ntasks;
35 |     localvector = (int *) malloc(localsize * sizeof(int));
36 | 
37 |     single_reader(my_id, localvector, localsize);
38 | 
39 |     ordered_print(ntasks, my_id, localvector, localsize);
40 | 
41 |     free(localvector);
42 | 
43 |     MPI_Finalize();
44 |     return 0;
45 | }
46 | 
47 | void single_reader(int my_id, int *localvector, int localsize)
48 | {
49 |     FILE *fp;
50 |     int *fullvector, nread;
51 |     char *fname = "singlewriter.dat";
52 | 
53 |     /* TODO: Implement a function that will read the data from a file so that
54 |        a single process does the file io. Use rank WRITER_ID as the io rank */
55 | 
56 |     free(fullvector);
57 | }
58 | 
59 | /* Try to avoid this type of pattern when ever possible.
60 |    Here we are using this serialized output just to make the
61 |    debugging easier. */
62 | void ordered_print(int ntasks, int rank, int *buffer, int n)
63 | {
64 |     int task, i;
65 | 
66 |     for (task = 0; task < ntasks; task++) {
67 |         if (rank == task) {
68 |             printf("Task %i received:", rank);
69 |             for (i = 0; i < n; i++) {
70 |                 printf(" %2i", buffer[i]);
71 |             }
72 |             printf("\n");
73 |         }
74 |         MPI_Barrier(MPI_COMM_WORLD);
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/parallel-io/posix/fortran/solution/separate-files.f90:
--------------------------------------------------------------------------------
 1 | program pario
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 |   integer, dimension(datasize) :: fullvector
10 | 
11 |   call mpi_init(rc)
12 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
13 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
14 | 
15 |   if (ntasks > 64) then
16 |     write(error_unit, *) 'Maximum number of tasks is 64!'
17 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
18 |   end if
19 | 
20 |   if (mod(datasize, ntasks) /= 0) then
21 |     write(error_unit,*) 'Datasize (64) should be divisible by number of tasks'
22 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
23 |   end if
24 | 
25 |   localsize = datasize / ntasks
26 |   allocate(localvector(localsize))
27 | 
28 |   localvector = [(i + my_id * localsize, i=1,localsize)]
29 | 
30 |   call many_writers()
31 | 
32 |   deallocate(localvector)
33 |   call mpi_finalize(rc)
34 | 
35 | contains
36 | 
37 |   subroutine single_writer()
38 |     implicit none
39 | 
40 |     call mpi_gather(localvector, localsize, mpi_integer, fullvector, &
41 |          & localsize, mpi_integer, writer_id, mpi_comm_world, rc)
42 |     if (my_id == writer_id) then
43 |       open(10, file='singlewriter.dat', status='replace', form='unformatted', &
44 |            & access='stream')
45 |       write(10, pos=1) fullvector
46 |       close (10)
47 |       write(output_unit,'(A,I0,A)') 'Wrote ', size(fullvector), &
48 |            & ' elements to file singlewriter.dat'
49 |     end if
50 |   end subroutine single_writer
51 | 
52 |   subroutine many_writers()
53 |     implicit none
54 |     character(len=85) :: filename
55 | 
56 |     write(filename, '(A,I0,A)') 'manywriters-', my_id, '.dat'
57 | 
58 |     open(my_id+10, file=filename, status='replace', form='unformatted', &
59 |          & access='stream')
60 |     write(my_id+10, pos=1) localvector
61 |     close (my_id+10)
62 |     write(output_unit,'(A,I0,A,A)') 'Wrote ', size(localvector), &
63 |          & ' elements to file ', filename
64 |   end subroutine many_writers
65 | 
66 | end program pario
67 | 


--------------------------------------------------------------------------------
/parallel-io/posix/fortran/solution/spokesman.f90:
--------------------------------------------------------------------------------
 1 | program pario
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 |   integer, dimension(datasize) :: fullvector
10 | 
11 |   call mpi_init(rc)
12 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
13 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
14 | 
15 |   if (ntasks > 64) then
16 |     write(error_unit, *) 'Maximum number of tasks is 64!'
17 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
18 |   end if
19 | 
20 |   if (mod(datasize, ntasks) /= 0) then
21 |     write(error_unit,*) 'Datasize (64) should be divisible by number of tasks'
22 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
23 |   end if
24 | 
25 |   localsize = datasize / ntasks
26 |   allocate(localvector(localsize))
27 | 
28 |   localvector = [(i + my_id * localsize, i=1,localsize)]
29 | 
30 |   call single_writer()
31 | 
32 |   deallocate(localvector)
33 |   call mpi_finalize(rc)
34 | 
35 | contains
36 | 
37 |   subroutine single_writer()
38 |     implicit none
39 | 
40 |     call mpi_gather(localvector, localsize, mpi_integer, fullvector, &
41 |          & localsize, mpi_integer, writer_id, mpi_comm_world, rc)
42 |     if (my_id == writer_id) then
43 |       open(10, file='singlewriter.dat', status='replace', form='unformatted', &
44 |            & access='stream')
45 |       write(10, pos=1) fullvector
46 |       close (10)
47 |       write(output_unit,'(A,I0,A)') 'Wrote ', size(fullvector), &
48 |            & ' elements to file singlewriter.dat'
49 |     end if
50 |   end subroutine single_writer
51 | 
52 | end program pario
53 | 


--------------------------------------------------------------------------------
/parallel-io/posix/fortran/solution/spokesman_reader.f90:
--------------------------------------------------------------------------------
 1 | program pario
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 |   integer, dimension(datasize) :: fullvector
10 | 
11 |   call mpi_init(rc)
12 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
13 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
14 | 
15 |   if (ntasks > 64) then
16 |     write(error_unit, *) 'Maximum number of tasks is 64!'
17 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
18 |   end if
19 | 
20 |   if (mod(datasize, ntasks) /= 0) then
21 |     write(error_unit, *) 'Datasize (64) should be divisible by number of tasks'
22 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
23 |   end if
24 | 
25 |   localsize = datasize / ntasks
26 |   allocate(localvector(localsize))
27 | 
28 |   localvector = [(i + my_id * localsize, i=1,localsize)]
29 | 
30 |   call single_reader()
31 | 
32 |   call ordered_print()
33 | 
34 |   deallocate(localvector)
35 |   call mpi_finalize(rc)
36 | 
37 | contains
38 | 
39 |   subroutine single_reader()
40 |     implicit none
41 | 
42 |     if (my_id == writer_id) then
43 |       open(10, file='singlewriter.dat', status='old', form='unformatted', &
44 |            & access='stream')
45 |       read(10, pos=1) fullvector
46 |       close(10)
47 |       write(output_unit,'(A,I0,A)') 'Read ', size(fullvector), &
48 |            & ' elements from file ex1a.dat'
49 |     end if
50 | 
51 |     call mpi_scatter(fullvector, localsize, mpi_integer, localvector, &
52 |          & localsize, mpi_integer, writer_id, mpi_comm_world, rc)
53 | 
54 |   end subroutine single_reader
55 | 
56 |   subroutine ordered_print
57 |     implicit none
58 |     integer :: task
59 | 
60 |     do task = 0, ntasks-1
61 |       if (my_id == task) then
62 |         write(output_unit, '(A,I0,A)', advance='no') 'Task ', &
63 |              & my_id, ' received:'
64 |         do i = 1, localsize
65 |           write(output_unit, '(I3)', advance='no') localvector(i)
66 |         end do
67 |         write(output_unit,*) ' '
68 |       end if
69 |       call mpi_barrier(MPI_COMM_WORLD, rc)
70 |     end do
71 | 
72 |   end subroutine ordered_print
73 | 
74 | 
75 | end program pario
76 | 


--------------------------------------------------------------------------------
/parallel-io/posix/fortran/spokesman.f90:
--------------------------------------------------------------------------------
 1 | program pario
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 |   integer, dimension(datasize) :: fullvector
10 | 
11 |   call mpi_init(rc)
12 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
13 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
14 | 
15 |   if (ntasks > 64) then
16 |     write(error_unit, *) 'Maximum number of tasks is 64!'
17 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
18 |   end if
19 | 
20 |   if (mod(datasize, ntasks) /= 0) then
21 |     write(error_unit,*) 'Datasize (64) should be divisible by number of tasks'
22 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
23 |   end if
24 | 
25 |   localsize = datasize / ntasks
26 |   allocate(localvector(localsize))
27 | 
28 |   localvector = [(i + my_id * localsize, i=1,localsize)]
29 | 
30 |   call single_writer()
31 | 
32 |   deallocate(localvector)
33 |   call mpi_finalize(rc)
34 | 
35 | contains
36 | 
37 |   subroutine single_writer()
38 |     implicit none
39 | 
40 |     ! TODO: Implement a function that writers the whole array of elements
41 |     !       to a file so that single process is responsible for the file io
42 | 
43 |   end subroutine single_writer
44 | 
45 | end program pario
46 | 


--------------------------------------------------------------------------------
/parallel-io/posix/fortran/spokesman_reader.f90:
--------------------------------------------------------------------------------
 1 | program pario
 2 |   use mpi
 3 |   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
 4 |   implicit none
 5 | 
 6 |   integer, parameter :: datasize = 64, writer_id = 0
 7 |   integer :: rc, my_id, ntasks, localsize, i
 8 |   integer, dimension(:), allocatable :: localvector
 9 |   integer, dimension(datasize) :: fullvector
10 | 
11 |   call mpi_init(rc)
12 |   call mpi_comm_size(mpi_comm_world, ntasks, rc)
13 |   call mpi_comm_rank(mpi_comm_world, my_id, rc)
14 | 
15 |   if (ntasks > 64) then
16 |     write(error_unit, *) 'Maximum number of tasks is 64!'
17 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
18 |   end if
19 | 
20 |   if (mod(datasize, ntasks) /= 0) then
21 |     write(error_unit, *) 'Datasize (64) should be divisible by number of tasks'
22 |     call mpi_abort(MPI_COMM_WORLD, -1, rc)
23 |   end if
24 | 
25 |   localsize = datasize / ntasks
26 |   allocate(localvector(localsize))
27 | 
28 |   localvector = [(i + my_id * localsize, i=1,localsize)]
29 | 
30 |   call single_reader()
31 | 
32 |   call ordered_print()
33 | 
34 |   deallocate(localvector)
35 |   call mpi_finalize(rc)
36 | 
37 | contains
38 | 
39 |   subroutine single_reader()
40 |     implicit none
41 | 
42 | ! TODO: Implement a function that will read the data from a file so that
43 | !       a single process does the file io. Use rank WRITER_ID as the io rank 
44 | 
45 |   end subroutine single_reader
46 | 
47 |   subroutine ordered_print
48 |     implicit none
49 |     integer :: task
50 | 
51 |     do task = 0, ntasks-1
52 |       if (my_id == task) then
53 |         write(output_unit, '(A,I0,A)', advance='no') 'Task ', &
54 |              & my_id, ' received:'
55 |         do i = 1, localsize
56 |           write(output_unit, '(I3)', advance='no') localvector(i)
57 |         end do
58 |         write(output_unit,*) ' '
59 |       end if
60 |       call mpi_barrier(MPI_COMM_WORLD, rc)
61 |     end do
62 | 
63 |   end subroutine ordered_print
64 | 
65 | 
66 | end program pario
67 | 


--------------------------------------------------------------------------------