├── .gitignore ├── LICENSE-MIT ├── LICENSE.md ├── README.md ├── benchmarks ├── neighbor-alltoall.cpp ├── neighbor-alltoall2.cpp ├── neighbor-gather.cpp └── neighbor-gather2.cpp ├── demos ├── cartesian-topology.cpp ├── demos.md ├── example.hdf5 ├── mpi-io-fileview.c ├── mpi-io-no-fileview.c ├── mpi-io-write.f90 ├── mpi_write_at.F90 ├── neighbor.cpp ├── neighbor_alltoallw.F90 └── neighbor_alltoallw.cpp ├── docs ├── 01-custom-communicators.md ├── 02-persistent-neighbour.md ├── 03-datatypes-intro.md ├── 04-datatypes-advanced.md ├── 05-parallel-io-posix.md ├── 06-mpi-io.md ├── 07-parallel-io-hdf5.md ├── img │ ├── cartesian-neighborhood.svg │ ├── cluster_diagram.jpeg │ ├── colorcomm1.svg │ ├── colorcomm2.svg │ ├── communicator.svg │ ├── communicators.png │ ├── communicators.svg │ ├── contiguous.svg │ ├── cray_top500.png │ ├── fortran-array-layout.png │ ├── fortran-array-layout.svg │ ├── hdf5-hyperslab.png │ ├── hdf5-hyperslab.svg │ ├── io-illustration.png │ ├── io-illustration.svg │ ├── io-layers.png │ ├── io-layers.svg │ ├── io-subarray.png │ ├── io-subarray.svg │ ├── lustre-architecture.png │ ├── lustre-architecture.svg │ ├── lustre-striping.png │ ├── lustre-striping.svg │ ├── neighbor-alltoall.svg │ ├── neighbor-gather.svg │ ├── nonblocking-io.png │ ├── nonblocking-io.svg │ ├── osu-benchmark.png │ ├── posix-everybody.png │ ├── posix-everybody.svg │ ├── posix-spokesman.png │ ├── posix-spokesman.svg │ ├── processes-threads.png │ ├── processes-threads.svg │ ├── striping-performance.png │ ├── svg2png.sh │ ├── triangle.svg │ ├── two-d-cartesian-grid.png │ ├── two-d-cartesian-grid.svg │ ├── type-struct.png │ ├── type-struct.svg │ ├── type_array.svg │ ├── type_indexed.svg │ ├── type_vector.svg │ ├── typemap.png │ ├── typemap.svg │ ├── vector-extent.png │ ├── vector-extent.svg │ ├── vector-resize.png │ └── vector-resize.svg ├── mpi-reference.md └── quizzes │ └── mpi-recap.md ├── exercise-instructions.md ├── job_mpi.sh ├── mpi ├── cartesian-grid │ ├── README.md │ ├── skeleton.c │ ├── skeleton.f90 │ └── solution │ │ ├── cartesian-grid.c │ │ └── cartesian-grid.f90 ├── communicator │ ├── README.md │ ├── collective.F90 │ ├── collective.c │ ├── collective.cpp │ ├── img │ │ ├── comm-split-reduce.svg │ │ └── sendbuffer.png │ └── solution │ │ ├── reduce.F90 │ │ ├── reduce.c │ │ └── reduce.cpp ├── datatype-extent │ ├── README.md │ ├── skeleton.F90 │ ├── skeleton.c │ └── solution │ │ ├── scatter.F90 │ │ ├── scatter.c │ │ ├── send-receive.F90 │ │ └── send-receive.c ├── heat-equation │ ├── README_2d.md │ ├── README_cartesian.md │ ├── README_neighbor.md │ ├── common │ │ ├── bottle.dat │ │ ├── pngwriter.c │ │ └── pngwriter.h │ ├── cpp │ │ ├── Makefile │ │ ├── bottle.dat │ │ ├── core.cpp │ │ ├── heat.cpp │ │ ├── heat.hpp │ │ ├── io.cpp │ │ ├── main.cpp │ │ ├── matrix.hpp │ │ ├── setup.cpp │ │ ├── solution │ │ │ ├── Makefile │ │ │ ├── bottle.dat │ │ │ ├── core.cpp │ │ │ ├── heat.cpp │ │ │ ├── heat.hpp │ │ │ ├── io.cpp │ │ │ ├── main.cpp │ │ │ ├── matrix.hpp │ │ │ ├── setup.cpp │ │ │ └── utilities.cpp │ │ └── utilities.cpp │ └── fortran │ │ ├── Makefile │ │ ├── core.F90 │ │ ├── heat_mod.F90 │ │ ├── io.F90 │ │ ├── main.F90 │ │ ├── pngwriter_mod.F90 │ │ ├── setup.F90 │ │ ├── solution │ │ ├── Makefile │ │ ├── core.F90 │ │ ├── heat_mod.F90 │ │ ├── io.F90 │ │ ├── main.F90 │ │ ├── pngwriter_mod.F90 │ │ ├── setup.F90 │ │ └── utilities.F90 │ │ └── utilities.F90 ├── message-chain-cartesian │ ├── README.md │ ├── skeleton.F90 │ ├── skeleton.cpp │ └── solution │ │ ├── chain-periodic.F90 │ │ ├── chain-periodic.cpp │ │ ├── chain.F90 │ │ └── chain.cpp ├── message-chain-persistent │ ├── README.md │ ├── skeleton.F90 │ ├── skeleton.cpp │ └── solution │ │ ├── chain.F90 │ │ └── chain.cpp ├── neighbor-exchange │ ├── README.md │ ├── skeleton.F90 │ ├── skeleton.cpp │ └── solution │ │ ├── neighbor-exchange.F90 │ │ └── neighbor-exchange.cpp ├── simple-datatypes │ ├── README.md │ ├── img │ │ ├── indexed.png │ │ ├── subarray.png │ │ ├── subarray.svg │ │ └── vector.png │ ├── skeleton.F90 │ ├── skeleton.c │ └── solution │ │ ├── custom_type_a.F90 │ │ ├── custom_type_a.c │ │ ├── custom_type_b.F90 │ │ ├── custom_type_b.c │ │ ├── custom_type_c.F90 │ │ └── custom_type_c.c └── struct-datatype │ ├── README.md │ ├── solution │ ├── struct_with_byte.F90 │ ├── struct_with_byte.c │ ├── struct_with_type.F90 │ └── struct_with_type.c │ ├── struct_type.F90 │ └── struct_type.c └── parallel-io ├── hdf5 ├── README.md ├── hdf5.c └── hdf5.f90 ├── heat-restart ├── README.md ├── c │ ├── Makefile │ ├── core.c │ ├── heat.h │ ├── io.c │ ├── main.c │ ├── setup.c │ ├── solution │ │ ├── Makefile │ │ ├── core.c │ │ ├── heat.h │ │ ├── io.c │ │ ├── main.c │ │ ├── setup.c │ │ └── utilities.c │ └── utilities.c └── fortran │ ├── Makefile │ ├── core.F90 │ ├── heat_mod.F90 │ ├── io.F90 │ ├── main.F90 │ ├── pngwriter_mod.F90 │ ├── setup.F90 │ ├── solution │ ├── Makefile │ ├── core.F90 │ ├── heat_mod.F90 │ ├── io.F90 │ ├── main.F90 │ ├── pngwriter_mod.F90 │ ├── setup.F90 │ └── utilities.F90 │ └── utilities.F90 ├── mpi-io ├── README.md ├── mpi-io.c ├── mpi-io.f90 └── solution │ ├── mpi-io.c │ └── mpi-io.f90 └── posix ├── README.md ├── c ├── solution │ ├── separate-files.c │ ├── spokesman.c │ └── spokesman_reader.c ├── spokesman.c └── spokesman_reader.c └── fortran ├── solution ├── separate-files.f90 ├── spokesman.f90 └── spokesman_reader.f90 ├── spokesman.f90 └── spokesman_reader.f90 /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.pptx 3 | *.pdf 4 | *.swp 5 | *.o 6 | *.mod 7 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CSC Training 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | All text and image material licensed under Creative Commons BY-SA 4.0 unless 2 | otherwise noted http://creativecommons.org/licenses/by-sa/4.0/ 3 | 4 | All code samples licensed under MIT license (see LICENSE-MIT for details) unless 5 | otherwise noted 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced MPI and parallel I/O 2 | 3 | Exercise material and model answers for the CSC course "Advanced MPI and parallel I/O". 4 | The course is part of PRACE Training Center (PTC) activity at CSC. 5 | 6 | ## Exercises 7 | 8 | [General instructions](exercise-instructions.md) 9 | 10 | 11 | ### Using own communicators 12 | 13 | - [Communicators and collectives](mpi/communicator) 14 | - [Cartesian grid process topology](mpi/cartesian-grid) 15 | - [Message chain with Cartesian communicator](mpi/message-chain-cartesian) 16 | - [(Bonus) Heat equation with cartesian communicator](mpi/heat-equation/README_cartesian.md) 17 | 18 | ### Persistent and neighborhood communication 19 | 20 | - [Message chain with persistent communication](mpi/message-chain-persistent) 21 | - [Neighborhood exchange](mpi/neighbor-exchange) 22 | - [(Bonus) Heat equation with neighborhood collectives](mpi/heat-equation/README_neighbor.md) 23 | 24 | ### Simple derived types 25 | 26 | - [Simple datatypes](mpi/simple-datatypes) 27 | - [(Bonus) 2D-decomposed heat equation](mpi/heat-equation/README_2d.md) 28 | 29 | ### Advanced derived types 30 | - [Communicating struct / derived type](mpi/struct-datatype) 31 | - [Modifying extent](mpi/datatype-extent) 32 | 33 | 34 | ### Parallel I/O 35 | 36 | - [Parallel I/O with Posix](parallel-io/posix) 37 | - [Simple MPI-IO](parallel-io/mpi-io) 38 | - [HDF5 example](parallel-io/hdf5) 39 | - [Bonus: Checkpoint + restart with MPI-IO](parallel-io/heat-restart) 40 | -------------------------------------------------------------------------------- /demos/cartesian-topology.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int world_rank, rank, ntasks; 7 | 8 | MPI_Init(&argc, &argv); 9 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 10 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); 11 | 12 | int ndims = 2; 13 | int dims[2] = {0, 0}; 14 | 15 | int rc = MPI_Dims_create(ntasks, ndims, dims); 16 | if (0 == world_rank) 17 | printf("Decomposing %d ntasks into %d x %d grid\n", ntasks, dims[0], dims[1]); 18 | 19 | int periods[2] = {0, 1}; 20 | MPI_Comm cart_comm; 21 | MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm); 22 | 23 | int coords[2]; 24 | int nup, ndown, nleft, nright; 25 | MPI_Comm_rank(cart_comm, &rank); 26 | MPI_Cart_coords(cart_comm, rank, 2, coords); 27 | MPI_Cart_shift(cart_comm, 0, 1, &nup, &ndown); 28 | MPI_Cart_shift(cart_comm, 1, 1, &nleft, &nright); 29 | printf("Coords of %d (%d): %d, %d, neighbours %d %d %d %d\n", 30 | rank, world_rank, coords[0], coords[1], nup, ndown, nleft, nright); 31 | 32 | MPI_Finalize(); 33 | 34 | } 35 | -------------------------------------------------------------------------------- /demos/demos.md: -------------------------------------------------------------------------------- 1 | # Parallel I/O 2 | ### Striping example: 3 | 4 | ``` 5 | touch first_file 6 | lfs getstripe first_file 7 | 8 | mkdir stripe_exp 9 | lfs setstripe -c 4 experiments 10 | touch experiments/new_file 11 | lfs getstripe experiments/new_file 12 | ``` 13 | ### MPI-IO simple writing 14 | ``` 15 | mpif90 mpi_write_at.F90 16 | 17 | srun --job-name=example --account=project_2002078 --partition=test --time=00:15:00 --ntasks=2 ./a.out 18 | 19 | less test 20 | hexdump test 21 | hexdump -e '16 "%02d ""\n"' test 22 | ``` 23 | 24 | ### Hdf5 Command Line Tools Example 25 | 26 | ``` 27 | h5ls py_example.hdf5 28 | 29 | h5ls py_example.hdf5/DataTwo 30 | 31 | h5dump -d /DataTwo/Matrix_two py_example.hdf5 32 | 33 | h5dump -n 1 py_example.hdf5 34 | 35 | h5dump -a "Matrix_one/maxvalue" py_example.hdf5 36 | h5dump -a "Matrix_one/minvalue" py_example.hdf5 37 | h5dump -a "Matrix_one/average" py_example.hdf5 38 | h5dump -a "Matrix_one/var" py_example.hdf5 39 | 40 | h5mkgrp py_example.hdf5 "/DataTwo/test" 41 | 42 | h5ls py_example.hdf5/DataTwo 43 | ``` 44 | 45 | 46 | ### Python hdf5 example 47 | ``` 48 | module load hdf5 49 | module load biokit 50 | module load biopythontools 51 | 52 | python # start python 53 | ``` 54 | ``` 55 | import h5py 56 | import numpy as np 57 | 58 | m1_shape=(10,20,4) 59 | m2_shape=(6,80,12) 60 | 61 | M1=np.random.random(m1_shape) 62 | M2=np.random.random(m2_shape) 63 | 64 | hdf5_name_file ='python_demo.hdf5' # the hdf5_name_file 65 | 66 | with h5py.File(hdf5_name_file, 'w') as hdf5_file: # open an hdf5 file in write mode 67 | ... dset_name='Matrix_one' 68 | ... dset=hdf5_file.create_dataset(dset_name, data=M2, compression='gzip') 69 | ... dset.attrs['maxvalue']=np.max(np.max(M1)) 70 | ... dset.attrs['minvalue']=np.min(np.min(M1)) 71 | ... dset.attrs['average']=np.mean(np.mean(M1)) 72 | ... dset.attrs['var']='M1' 73 | 74 | with h5py.File(hdf5_name_file, 'a') as hdf5_file: # open an hdf5 file in write mode 75 | ... gzero=hdf5_file.create_group('DataTwo') 76 | ... nset_name='Matrix_two' 77 | ... nsdet=gzero.create_dataset(nset_name,data=M2,compression='gzip') 78 | ... nsdet.attrs['maxvalue']=np.max(np.max(M2)) 79 | ... nsdet.attrs['minvalue']=np.min(np.min(M2)) 80 | ... nsdet.attrs['average']=np.mean(np.mean(M2)) 81 | ... nsdet.attrs['var']='M2' 82 | ``` 83 | -------------------------------------------------------------------------------- /demos/example.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/demos/example.hdf5 -------------------------------------------------------------------------------- /demos/mpi-io-write.f90: -------------------------------------------------------------------------------- 1 | program mpiio 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | integer :: fh, dsize 10 | integer(kind=MPI_OFFSET_KIND) :: offset 11 | 12 | call mpi_init(rc) 13 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 14 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 15 | 16 | localsize = datasize / ntasks 17 | allocate(localvector(localsize)) 18 | 19 | do i = 1, localsize 20 | localvector(i) = i + my_id * localsize 21 | end do 22 | 23 | call mpi_type_size(MPI_INTEGER, dsize, rc) 24 | offset = my_id * localsize * dsize 25 | 26 | call mpi_file_open(MPI_COMM_WORLD, 'output.dat', & 27 | MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, fh, rc) 28 | call mpi_file_write_at_all(fh, offset, localvector, localsize, & 29 | MPI_INTEGER, MPI_STATUS_IGNORE, rc) 30 | call mpi_file_close(fh, rc) 31 | 32 | deallocate(localvector) 33 | call mpi_finalize(rc) 34 | 35 | end program mpiio 36 | -------------------------------------------------------------------------------- /demos/mpi_write_at.F90: -------------------------------------------------------------------------------- 1 | program output 2 | use mpi 3 | implicit none 4 | 5 | integer :: err, i, myid, file, intsize 6 | integer :: status(mpi_status_size) 7 | integer, parameter :: count=100 8 | integer, dimension(count) :: buf 9 | integer(kind=mpi_offset_kind) :: disp 10 | 11 | call mpi_init(err) 12 | call mpi_comm_rank(mpi_comm_world, myid, err) 13 | 14 | do i = 1, count 15 | buf(i) = myid * count + i 16 | end do 17 | 18 | call mpi_file_open(mpi_comm_world, 'test', & 19 | mpi_mode_create + mpi_mode_wronly, & 20 | mpi_info_null, file, err) 21 | 22 | intsize = sizeof(count) 23 | disp = myid * count * intsize 24 | 25 | call mpi_file_write_at(file, disp, buf, count, mpi_integer, status, err) 26 | 27 | call mpi_file_close(file, err) 28 | call mpi_finalize(err) 29 | 30 | end program output 31 | -------------------------------------------------------------------------------- /demos/neighbor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char **argv) 7 | { 8 | int rank, ntasks; 9 | 10 | MPI_Init(&argc, &argv); 11 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 12 | if (ntasks != 9) { 13 | std::cout << "This demo works only with 9 MPI tasks" << std::endl; 14 | MPI_Abort(MPI_COMM_WORLD, -1); 15 | } 16 | 17 | MPI_Comm cart_comm; 18 | int ndims = 2; 19 | int dims[2] = {3, 3}; 20 | int periods[2] = {0, 1}; 21 | MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 0, &cart_comm); 22 | MPI_Comm_rank(cart_comm, &rank); 23 | 24 | // Print out process grid 25 | if (0 == rank) { 26 | std::cout << "Process grid" << std::endl; 27 | for (int i=0; i < dims[0]; i++) { 28 | for (int j=0; j < dims[1]; j++) { 29 | int r; 30 | int coords[2] = {i, j}; 31 | MPI_Cart_rank(cart_comm, coords, &r); 32 | std::cout << r << " "; 33 | } 34 | std::cout << std::endl; 35 | } 36 | std::cout << std::flush; 37 | } 38 | 39 | MPI_Barrier(cart_comm); 40 | 41 | int neighbors = 4; 42 | std::vector sendbuf(2, rank); 43 | std::vector recvbuf(2*neighbors, -1); 44 | 45 | MPI_Neighbor_allgather(sendbuf.data(), 2, MPI_INT, 46 | recvbuf.data(), 2, MPI_INT, 47 | cart_comm); 48 | 49 | 50 | // Print out the result 51 | for (int r=0; r < ntasks; r++) { 52 | MPI_Barrier(cart_comm); 53 | if (rank == r) { 54 | std::cout << "task " << rank << ": "; 55 | for (auto i: recvbuf) 56 | std::cout << i << " "; 57 | std::cout << std::endl << std::flush; 58 | } 59 | MPI_Barrier(cart_comm); 60 | } 61 | 62 | MPI_Finalize(); 63 | } 64 | -------------------------------------------------------------------------------- /docs/img/cluster_diagram.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/cluster_diagram.jpeg -------------------------------------------------------------------------------- /docs/img/communicators.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/communicators.png -------------------------------------------------------------------------------- /docs/img/cray_top500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/cray_top500.png -------------------------------------------------------------------------------- /docs/img/fortran-array-layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/fortran-array-layout.png -------------------------------------------------------------------------------- /docs/img/hdf5-hyperslab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/hdf5-hyperslab.png -------------------------------------------------------------------------------- /docs/img/io-illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/io-illustration.png -------------------------------------------------------------------------------- /docs/img/io-layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/io-layers.png -------------------------------------------------------------------------------- /docs/img/io-subarray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/io-subarray.png -------------------------------------------------------------------------------- /docs/img/lustre-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/lustre-architecture.png -------------------------------------------------------------------------------- /docs/img/lustre-striping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/lustre-striping.png -------------------------------------------------------------------------------- /docs/img/nonblocking-io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/nonblocking-io.png -------------------------------------------------------------------------------- /docs/img/osu-benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/osu-benchmark.png -------------------------------------------------------------------------------- /docs/img/posix-everybody.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/posix-everybody.png -------------------------------------------------------------------------------- /docs/img/posix-spokesman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/posix-spokesman.png -------------------------------------------------------------------------------- /docs/img/processes-threads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/processes-threads.png -------------------------------------------------------------------------------- /docs/img/striping-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/striping-performance.png -------------------------------------------------------------------------------- /docs/img/svg2png.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # convert SVGs to PNGs with a correct size 3 | inkscape affinity.svg -e affinity.png -w 709 4 | inkscape collective-patterns.svg -e collective-patterns.png -w 1594 5 | inkscape communication-schematic.svg -e communication-schematic.png -w 443 6 | inkscape communicators.svg -e communicators.png -w 1418 7 | inkscape fortran-array-layout.svg -e fortran-array-layout.png -w 709 8 | inkscape hdf5-hyperslab.svg -e hdf5-hyperslab.png -w 532 9 | inkscape io-illustration.svg -e io-illustration.png -w 1240 10 | inkscape io-layers.svg -e io-layers.png -w 1772 11 | inkscape io-subarray.svg -e io-subarray.png -w 1418 12 | inkscape lustre-architecture.svg -e lustre-architecture.png -w 1772 13 | inkscape lustre-striping.svg -e lustre-striping.png -w 1772 14 | inkscape mpi-summary.svg -e mpi-summary.png -w 1418 15 | inkscape mpi-thread-support.svg -e mpi-thread-support.png -w 1772 16 | inkscape multiple-thread-communication.svg -e multiple-thread-communication.png -w 886 17 | inkscape nonblocking-io.svg -e nonblocking-io.png -w 1240 18 | inkscape omp-parallel.svg -e omp-parallel.png -w 443 19 | inkscape omp-summary.svg -e omp-summary.png -w 1418 20 | inkscape one-sided-epoch.svg -e one-sided-epoch.png -w 620 21 | inkscape one-sided-limitations.svg -e one-sided-limitations.png -w 1240 22 | inkscape one-sided-window.svg -e one-sided-window.png -w 708 23 | inkscape posix-everybody.svg -e posix-everybody.png -w 620 24 | inkscape posix-spokesman.svg -e posix-spokesman.png -w 620 25 | inkscape processes-threads.svg -e processes-threads.png -w 1594 26 | inkscape supercomputer-node-hybrid.svg -e supercomputer-node-hybrid.png -w 1772 27 | inkscape two-d-cartesian-grid.svg -e two-d-cartesian-grid.png -w 886 28 | inkscape type-struct.svg -e type-struct.png -w 1240 29 | inkscape typemap.svg -e typemap.png -w 1418 30 | inkscape vector-extent.svg -e vector-extent.png -w 1772 31 | inkscape vector-resize.svg -e vector-resize.png -w 1772 32 | -------------------------------------------------------------------------------- /docs/img/two-d-cartesian-grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/two-d-cartesian-grid.png -------------------------------------------------------------------------------- /docs/img/type-struct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/type-struct.png -------------------------------------------------------------------------------- /docs/img/typemap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/typemap.png -------------------------------------------------------------------------------- /docs/img/vector-extent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/vector-extent.png -------------------------------------------------------------------------------- /docs/img/vector-resize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/docs/img/vector-resize.png -------------------------------------------------------------------------------- /job_mpi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=example 3 | #SBATCH --account=project_2000745 4 | #SBATCH --partition=small 5 | #SBATCH --reservation=advance-mpi 6 | #SBATCH --time=00:05:00 7 | #SBATCH --ntasks=4 8 | 9 | srun my_mpi_exe 10 | -------------------------------------------------------------------------------- /mpi/cartesian-grid/README.md: -------------------------------------------------------------------------------- 1 | ## Cartesian grid process topology 2 | 3 | Explore a Cartesian grid process topology by writing a toy program, where 4 | 5 | - the MPI processes are arranged into a 2D Cartesian grid 6 | - every task prints out their linear rank together with its coordinates 7 | in the process grid 8 | - every task prints out the linear rank of their nearest neighboring 9 | processes 10 | 11 | Run the code with both periodic and non-periodic boundaries, and experiment 12 | with the direction and displacement parameters of the `MPI_Cart_shift` 13 | routine. 14 | 15 | You can start from scratch or use one of the provided skeleton codes 16 | ([skeleton.c](skeleton.c) or [skeleton.f90](skeleton.f90)). 17 | -------------------------------------------------------------------------------- /mpi/cartesian-grid/skeleton.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | int main(int argc, char* argv[]) { 7 | int ntasks, my_id, irank; 8 | int dims[2]; /* Dimensions of the grid */ 9 | int coords[2]; /* Coordinates in the grid */ 10 | int neighbors[4]; /* Neighbors in 2D grid */ 11 | int period[2] = {1, 1}; 12 | MPI_Comm comm2d; 13 | 14 | MPI_Init(&argc, &argv); 15 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 16 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 17 | 18 | /* Determine the process grid (dims[0] x dims[1] = ntasks) */ 19 | if (ntasks < 16) { 20 | dims[0] = 2; 21 | } else if (ntasks >= 16 && ntasks < 64) { 22 | dims[0] = 4; 23 | } else if (ntasks >= 64 && ntasks < 256) { 24 | dims[0] = 8; 25 | } else { 26 | dims[0] = 16; 27 | } 28 | dims[1] = ntasks / dims[0]; 29 | 30 | if (dims[0] * dims[1] != ntasks) { 31 | fprintf(stderr, "Incompatible dimensions: %i x %i != %i\n", 32 | dims[0], dims[1], ntasks); 33 | MPI_Finalize(); 34 | exit(EXIT_FAILURE); 35 | } 36 | 37 | /* Create the 2D Cartesian communicator */ 38 | /* TO DO */ 39 | 40 | /* Find out and store the neighboring ranks */ 41 | /* TO DO */ 42 | 43 | /* Find out and store also the Cartesian coordinates of a rank */ 44 | /* TO DO */ 45 | 46 | for (irank = 0; irank < ntasks; irank++) { 47 | if (my_id == irank) { 48 | printf("%3i = %2i %2i neighbors=%3i %3i %3i %3i\n", 49 | my_id, coords[0], coords[1], neighbors[0], neighbors[1], 50 | neighbors[2], neighbors[3]); 51 | } 52 | MPI_Barrier(MPI_COMM_WORLD); 53 | } 54 | 55 | MPI_Finalize(); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /mpi/cartesian-grid/skeleton.f90: -------------------------------------------------------------------------------- 1 | program cartesian_grid 2 | use mpi 3 | implicit none 4 | 5 | integer :: ntask, & ! number of MPI tasks 6 | my_id, & ! MPI rank of the task 7 | rc, & ! return code 8 | comm2d, & ! Cartesian communicator 9 | neighbors(4), & ! neighbors in the 2D grid 10 | coord(0:1), & ! coordinates in the grid 11 | dims(0:1) ! dimensions of the grid 12 | logical, dimension(2) :: period = (/ .true., .true. /) 13 | integer :: irank 14 | 15 | call mpi_init(rc) 16 | call mpi_comm_size(MPI_COMM_WORLD, ntask, rc) 17 | call mpi_comm_rank(MPI_COMM_WORLD, my_id, rc) 18 | 19 | ! Determine the process grid (dims(0) x dims(1) = ntask) 20 | if (ntask < 16) then 21 | dims(0) = 2 22 | else if (ntask >= 16 .and. ntask < 64) then 23 | dims(0) = 4 24 | else if (ntask >= 64 .and. ntask < 256) then 25 | dims(0) = 8 26 | else 27 | dims(0) = 16 28 | end if 29 | 30 | dims(1) = ntask / dims(0) 31 | if (dims(0) * dims(1) /= ntask) then 32 | write(*,'(A,I3,A1,I3,A2,I4)') 'sorry, no go', dims(0), 'x', & 33 | dims(1),'/=', ntask 34 | call mpi_abort(mpi_comm_world, 1, rc) 35 | end if 36 | 37 | ! Create the 2D Cartesian communicator 38 | ! TO DO 39 | ! Find out & store the neighboring ranks 40 | ! TO DO 41 | ! Find out & store also the Cartesian coordinates of a rank 42 | ! TO DO 43 | 44 | do irank = 0, ntask-1 45 | if (my_id == irank) print '(I3,A,2I2,A,4I3)', & 46 | my_id, '=', coord, ' neighbors=', neighbors(:) 47 | call mpi_barrier(mpi_comm_world, rc) 48 | end do 49 | 50 | call mpi_finalize(rc) 51 | 52 | end program cartesian_grid 53 | -------------------------------------------------------------------------------- /mpi/cartesian-grid/solution/cartesian-grid.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | int main(int argc, char* argv[]) { 7 | int ntasks, my_id, irank; 8 | int dims[2]; /* Dimensions of the grid */ 9 | int coords[2]; /* Coordinates in the grid */ 10 | int neighbors[4]; /* Neighbors in 2D grid */ 11 | int period[2] = {1, 1}; 12 | MPI_Comm comm2d; 13 | 14 | MPI_Init(&argc, &argv); 15 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 16 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 17 | 18 | /* Determine the process grid (dims[0] x dims[1] = ntasks) */ 19 | if (ntasks < 16) { 20 | dims[0] = 2; 21 | } else if (ntasks >= 16 && ntasks < 64) { 22 | dims[0] = 4; 23 | } else if (ntasks >= 64 && ntasks < 256) { 24 | dims[0] = 8; 25 | } else { 26 | dims[0] = 16; 27 | } 28 | dims[1] = ntasks / dims[0]; 29 | 30 | if (dims[0] * dims[1] != ntasks) { 31 | fprintf(stderr, "Incompatible dimensions: %i x %i != %i\n", 32 | dims[0], dims[1], ntasks); 33 | MPI_Finalize(); 34 | exit(EXIT_FAILURE); 35 | } 36 | 37 | /* Create the 2D Cartesian communicator */ 38 | MPI_Cart_create(MPI_COMM_WORLD, 2, dims, period, 1, &comm2d); 39 | /* Find out and store the ranks with which to perform halo exchange */ 40 | MPI_Cart_shift(comm2d, 0, 1, &neighbors[0], &neighbors[1]); 41 | MPI_Cart_shift(comm2d, 1, 1, &neighbors[2], &neighbors[3]); 42 | /* Find out and store also the Cartesian coordinates of a rank */ 43 | MPI_Cart_coords(comm2d, my_id, 2, coords); 44 | 45 | for (irank = 0; irank < ntasks; irank++) { 46 | if (my_id == irank) { 47 | printf("%3i = %2i %2i neighbors=%3i %3i %3i %3i\n", 48 | my_id, coords[0], coords[1], neighbors[0], neighbors[1], 49 | neighbors[2], neighbors[3]); 50 | } 51 | MPI_Barrier(MPI_COMM_WORLD); 52 | } 53 | 54 | MPI_Finalize(); 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /mpi/cartesian-grid/solution/cartesian-grid.f90: -------------------------------------------------------------------------------- 1 | program cartesian_grid 2 | use mpi 3 | implicit none 4 | 5 | integer :: ntask, & ! number of MPI tasks 6 | my_id, & ! MPI rank of the task 7 | rc, & ! return code 8 | comm2d, & ! Cartesian communicator 9 | neighbors(4), & ! neighbors in the 2D grid 10 | coord(0:1), & ! coordinates in the grid 11 | dims(0:1) ! dimensions of the grid 12 | logical, dimension(2) :: period = (/ .true., .true. /) 13 | integer :: irank 14 | 15 | call mpi_init(rc) 16 | call mpi_comm_size(MPI_COMM_WORLD, ntask, rc) 17 | call mpi_comm_rank(MPI_COMM_WORLD, my_id, rc) 18 | 19 | ! Determine the process grid (dims(0) x dims(1) = ntask) 20 | if (ntask < 16) then 21 | dims(0) = 2 22 | else if (ntask >= 16 .and. ntask < 64) then 23 | dims(0) = 4 24 | else if (ntask >= 64 .and. ntask < 256) then 25 | dims(0) = 8 26 | else 27 | dims(0) = 16 28 | end if 29 | 30 | dims(1) = ntask / dims(0) 31 | if (dims(0) * dims(1) /= ntask) then 32 | write(*,'(A,I3,A1,I3,A2,I4)') 'sorry, no go', dims(0), 'x', & 33 | dims(1),'/=', ntask 34 | call mpi_abort(mpi_comm_world, 1, rc) 35 | end if 36 | 37 | ! Create the 2D Cartesian communicator 38 | call mpi_cart_create(mpi_comm_world, 2, dims, period, .true., comm2d, rc) 39 | ! Find out & store the neighboring ranks 40 | call mpi_cart_shift(comm2d, 0, 1, neighbors(1), neighbors(2), rc) 41 | call mpi_cart_shift(comm2d, 1, 1, neighbors(3), neighbors(4), rc) 42 | ! Find out & store also the Cartesian coordinates of a rank 43 | call mpi_cart_coords(comm2d, my_id, 2, coord, rc) 44 | 45 | do irank = 0, ntask-1 46 | if (my_id == irank) print '(I3,A,2I2,A,4I3)', & 47 | my_id, '=', coord, ' neighbors=', neighbors(:) 48 | call mpi_barrier(mpi_comm_world, rc) 49 | end do 50 | 51 | call mpi_finalize(rc) 52 | 53 | end program cartesian_grid 54 | -------------------------------------------------------------------------------- /mpi/communicator/README.md: -------------------------------------------------------------------------------- 1 | ## Communicators and collectives 2 | 3 | In this exercise we combine collective communication with user defined 4 | communicators. Write a program for four MPI processes, such that each 5 | process has a data vector with the following data: 6 | 7 | ![](https://github.com/csc-training/summerschool/blob/master/mpi/collectives/img/sendbuffer.png) 8 | 9 | In addition, each task has a receive buffer for eight elements and the 10 | values in the buffer are initialized to -1. 11 | 12 | Implement now a pattern with user defined communicators and collective 13 | operation so that the receive buffers will have the following values: 14 | 15 | ![](https://raw.githubusercontent.com/csc-training/summerschool/master/mpi/communicator/img/comm-split-reduce.svg?sanitize=true) 16 | 17 | You can start from scratch or use the skeleton code 18 | [collective.cpp](collective.cpp), [collective.c](collective.c) or 19 | [collective.F90](collective.F90). 20 | 21 | 22 | -------------------------------------------------------------------------------- /mpi/communicator/collective.F90: -------------------------------------------------------------------------------- 1 | program coll_exer 2 | use mpi 3 | implicit none 4 | 5 | integer, parameter :: n_mpi_tasks = 4 6 | 7 | integer :: ntasks, rank, ierr, i, color, sub_comm 8 | integer, dimension(2*n_mpi_tasks) :: sendbuf, recvbuf 9 | integer, dimension(2*n_mpi_tasks**2) :: printbuf 10 | 11 | call mpi_init(ierr) 12 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierr) 13 | call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr) 14 | 15 | if (ntasks /= n_mpi_tasks) then 16 | if (rank == 0) then 17 | print *, "Run this program with ", n_mpi_tasks, " tasks." 18 | end if 19 | call mpi_abort(MPI_COMM_WORLD, -1, ierr) 20 | end if 21 | 22 | ! Initialize message buffers 23 | call init_buffers 24 | 25 | ! Print data that will be sent 26 | call print_buffers(sendbuf) 27 | 28 | ! TODO: use a single collective communication call (and maybe prepare 29 | ! some parameters for the call) 30 | 31 | ! Print data that was received 32 | ! TODO: add correct buffer 33 | call print_buffers(...) 34 | 35 | call mpi_finalize(ierr) 36 | 37 | contains 38 | 39 | subroutine init_buffers 40 | implicit none 41 | integer :: i 42 | 43 | do i = 1, 2*n_mpi_tasks 44 | recvbuf(i) = -1 45 | sendbuf(i) = i + 2*n_mpi_tasks * rank - 1 46 | end do 47 | end subroutine init_buffers 48 | 49 | 50 | subroutine print_buffers(buffer) 51 | implicit none 52 | integer, dimension(:), intent(in) :: buffer 53 | integer, parameter :: bufsize = 2*n_mpi_tasks 54 | integer :: i 55 | character(len=40) :: pformat 56 | 57 | write(pformat,'(A,I3,A)') '(A4,I2,":",', bufsize, 'I3)' 58 | 59 | call mpi_gather(buffer, bufsize, MPI_INTEGER, & 60 | & printbuf, bufsize, MPI_INTEGER, & 61 | & 0, MPI_COMM_WORLD, ierr) 62 | 63 | if (rank == 0) then 64 | do i = 1, ntasks 65 | write(*,pformat) 'Task', i - 1, printbuf((i-1)*bufsize+1:i*bufsize) 66 | end do 67 | print * 68 | end if 69 | end subroutine print_buffers 70 | 71 | end program coll_exer 72 | -------------------------------------------------------------------------------- /mpi/communicator/collective.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define NTASKS 4 6 | 7 | void print_buffers(int *printbuffer, int *sendbuffer, int buffersize); 8 | void init_buffers(int *sendbuffer, int *recvbuffer, int buffersize); 9 | 10 | 11 | int main(int argc, char *argv[]) 12 | { 13 | int ntasks, rank, color; 14 | int sendbuf[2 * NTASKS], recvbuf[2 * NTASKS]; 15 | int printbuf[2 * NTASKS * NTASKS]; 16 | 17 | MPI_Comm sub_comm; 18 | 19 | MPI_Init(&argc, &argv); 20 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | 23 | if (ntasks != NTASKS) { 24 | if (rank == 0) { 25 | fprintf(stderr, "Run this program with %i tasks.\n", NTASKS); 26 | } 27 | MPI_Abort(MPI_COMM_WORLD, -1); 28 | } 29 | 30 | /* Initialize message buffers */ 31 | init_buffers(sendbuf, recvbuf, 2 * NTASKS); 32 | 33 | /* Print data that will be sent */ 34 | print_buffers(printbuf, sendbuf, 2 * NTASKS); 35 | 36 | /* TODO: use a single collective communication call (and maybe prepare 37 | * some parameters for the call) */ 38 | 39 | /* Print data that was received */ 40 | /* TODO: add correct buffer */ 41 | print_buffers(printbuf, ..., 2 * NTASKS); 42 | 43 | MPI_Finalize(); 44 | return 0; 45 | } 46 | 47 | 48 | void init_buffers(int *sendbuffer, int *recvbuffer, int buffersize) 49 | { 50 | int rank, i; 51 | 52 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 53 | for (i = 0; i < buffersize; i++) { 54 | recvbuffer[i] = -1; 55 | sendbuffer[i] = i + buffersize * rank; 56 | } 57 | } 58 | 59 | 60 | void print_buffers(int *printbuffer, int *sendbuffer, int buffersize) 61 | { 62 | int i, j, rank, ntasks; 63 | 64 | MPI_Gather(sendbuffer, buffersize, MPI_INT, 65 | printbuffer, buffersize, MPI_INT, 0, MPI_COMM_WORLD); 66 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 67 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 68 | 69 | if (rank == 0) { 70 | for (j = 0; j < ntasks; j++) { 71 | printf("Task %i:", j); 72 | for (i = 0; i < buffersize; i++) { 73 | printf(" %2i", printbuffer[i + buffersize * j]); 74 | } 75 | printf("\n"); 76 | } 77 | printf("\n"); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /mpi/communicator/collective.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define NTASKS 4 6 | 7 | template 8 | void print_buffers(std::array &printbuffer, std::array &sendbuffer); 9 | template 10 | void init_buffers(std::array &sendbuffer, std::array &recvbuffer); 11 | 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int ntasks, rank, color; 16 | std::array sendbuf, recvbuf; 17 | std::array printbuf; 18 | 19 | MPI_Comm sub_comm; 20 | 21 | MPI_Init(&argc, &argv); 22 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 23 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 24 | 25 | if (ntasks != NTASKS) { 26 | if (rank == 0) { 27 | fprintf(stderr, "Run this program with %i tasks.\n", NTASKS); 28 | } 29 | MPI_Abort(MPI_COMM_WORLD, -1); 30 | } 31 | 32 | // Initialize message buffers 33 | init_buffers(sendbuf, recvbuf); 34 | 35 | // Print data that will be sent */ 36 | print_buffers(printbuf, sendbuf); 37 | 38 | /* TODO: use a single collective communication call (and maybe prepare 39 | * some parameters for the call) */ 40 | 41 | /* Print data that was received */ 42 | /* TODO: add correct buffer */ 43 | print_buffers(printbuf, ...); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | 49 | 50 | template 51 | void init_buffers(std::array &sendbuffer, std::array &recvbuffer) 52 | { 53 | int rank; 54 | 55 | const int buffersize = sendbuffer.size(); 56 | 57 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 58 | for (int i = 0; i < buffersize; i++) { 59 | recvbuffer[i] = -1; 60 | sendbuffer[i] = i + buffersize * rank; 61 | } 62 | } 63 | 64 | 65 | template 66 | void print_buffers(std::array &printbuffer, std::array &sendbuffer) 67 | { 68 | int rank, ntasks; 69 | 70 | const int buffersize = sendbuffer.size(); 71 | 72 | MPI_Gather(sendbuffer.data(), buffersize, MPI_INT, 73 | printbuffer.data(), buffersize, MPI_INT, 0, MPI_COMM_WORLD); 74 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 75 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 76 | 77 | if (rank == 0) { 78 | for (int j = 0; j < ntasks; j++) { 79 | printf("Task %i:", j); 80 | for (int i = 0; i < buffersize; i++) { 81 | printf(" %2i", printbuffer[i + buffersize * j]); 82 | } 83 | printf("\n"); 84 | } 85 | printf("\n"); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /mpi/communicator/img/sendbuffer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/communicator/img/sendbuffer.png -------------------------------------------------------------------------------- /mpi/communicator/solution/reduce.F90: -------------------------------------------------------------------------------- 1 | program coll_exer 2 | use mpi_f08 3 | implicit none 4 | 5 | integer, parameter :: n_mpi_tasks = 4 6 | 7 | integer :: ntasks, rank, ierr, color 8 | type(mpi_comm) :: sub_comm 9 | integer, dimension(2*n_mpi_tasks) :: sendbuf, recvbuf 10 | integer, dimension(2*n_mpi_tasks**2) :: printbuf 11 | 12 | call mpi_init(ierr) 13 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierr) 14 | call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr) 15 | 16 | if (ntasks /= n_mpi_tasks) then 17 | if (rank == 0) then 18 | print *, "Run this program with ", n_mpi_tasks, " tasks." 19 | end if 20 | call mpi_abort(MPI_COMM_WORLD, -1, ierr) 21 | end if 22 | 23 | ! Initialize message buffers 24 | call init_buffers 25 | 26 | ! Print data that will be sent 27 | call print_buffers(sendbuf) 28 | 29 | ! Create new communicator and reduce the data 30 | if (rank / 2 == 0) then 31 | color = 1 32 | else 33 | color = 2 34 | end if 35 | 36 | call mpi_comm_split(MPI_COMM_WORLD, color, rank, sub_comm, ierr) 37 | call mpi_reduce(sendbuf, recvbuf, 2*n_mpi_tasks, MPI_INTEGER, MPI_SUM, 0, & 38 | & sub_comm, ierr) 39 | 40 | ! Print data that was received 41 | call print_buffers(recvbuf) 42 | 43 | call mpi_finalize(ierr) 44 | 45 | contains 46 | 47 | subroutine init_buffers 48 | implicit none 49 | integer :: i 50 | 51 | do i = 1, 2*n_mpi_tasks 52 | recvbuf(i) = -1 53 | sendbuf(i) = i + 2*n_mpi_tasks * rank - 1 54 | end do 55 | end subroutine init_buffers 56 | 57 | 58 | subroutine print_buffers(buffer) 59 | implicit none 60 | integer, dimension(:), intent(in) :: buffer 61 | integer, parameter :: bufsize = 2*n_mpi_tasks 62 | integer :: i 63 | character(len=40) :: pformat 64 | 65 | write(pformat,'(A,I3,A)') '(A4,I2,":",', bufsize, 'I3)' 66 | 67 | call mpi_gather(buffer, bufsize, MPI_INTEGER, & 68 | & printbuf, bufsize, MPI_INTEGER, & 69 | & 0, MPI_COMM_WORLD, ierr) 70 | 71 | if (rank == 0) then 72 | do i = 1, ntasks 73 | write(*,pformat) 'Task', i - 1, printbuf((i-1)*bufsize+1:i*bufsize) 74 | end do 75 | print * 76 | end if 77 | end subroutine print_buffers 78 | 79 | end program coll_exer 80 | -------------------------------------------------------------------------------- /mpi/datatype-extent/README.md: -------------------------------------------------------------------------------- 1 | ## Modifying extent 2 | 3 | When the datatype contains gaps in the beginning or in the end, one might need to 4 | modify the *extent* of the datatype. 5 | 6 | Starting from [skeleton.c](skeleton.c) or [skeleton.F90](skeleton.F90), create a vector 7 | datatype for sending a column (C) or row (Fortran) of a matrix. 8 | 9 | 1. Verify that the datatype works by communicating a single column/row. 10 | 11 | 2. Try to send to columns / rows. What happens? Can you explain why? 12 | 13 | 3. Create a new datatype with resized extent, so that communicating multiple columns / rows 14 | succeeds. 15 | 16 | 4. Try to scatter columns / rows with `MPI_Scatter` 17 | -------------------------------------------------------------------------------- /mpi/datatype-extent/skeleton.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer :: rank, rc 6 | integer :: sendarray(8,6), recvarray(8,6) 7 | type(mpi_datatype) :: vector, vector2 8 | integer(kind=mpi_address_kind) :: extent, lb 9 | 10 | integer :: i, j 11 | 12 | call mpi_init(rc) 13 | call mpi_comm_rank(MPI_COMM_WORLD, rank, rc) 14 | 15 | ! initialize arrays 16 | recvarray = 0 17 | 18 | if (rank == 0) then 19 | sendarray = reshape([ ((i*10 + j, i=1,8), j=1,6) ], [8, 6] ) 20 | write(*,*) 'Original data' 21 | do i=1, 8 22 | write(*,'(*(I3))') sendarray(i, :) 23 | end do 24 | end if 25 | 26 | ! TODO create datatype 27 | 28 | ! Communicate with the datatype 29 | if (rank == 0) then 30 | 31 | else if (rank == 1) then 32 | 33 | end if 34 | 35 | ! free datatype 36 | 37 | ! TODO end 38 | 39 | if (rank == 1) then 40 | write(*,*) 'Received data' 41 | do i=1, 8 42 | write(*,'(*(I3))') recvarray(i, :) 43 | end do 44 | end if 45 | 46 | call mpi_finalize(rc) 47 | 48 | 49 | 50 | end program datatype1 51 | -------------------------------------------------------------------------------- /mpi/datatype-extent/skeleton.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank; 7 | int sendarray[8][6]; 8 | int recvarray[8][6]; 9 | MPI_Datatype vector, vector2; 10 | 11 | 12 | MPI_Init(&argc, &argv); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | 15 | // Initialize arrays 16 | if (rank == 0) { 17 | for (int i = 0; i < 8; i++) { 18 | for (int j = 0; j < 6; j++) { 19 | sendarray[i][j] = (i + 1) * 10 + j + 1; 20 | } 21 | } 22 | } 23 | 24 | for (int i = 0; i < 8; i++) { 25 | for (int j = 0; j < 6; j++) { 26 | recvarray[i][j] = 0; 27 | } 28 | } 29 | 30 | if (rank == 0) { 31 | printf("Data in rank 0\n"); 32 | for (int i = 0; i < 8; i++) { 33 | for (int j = 0; j < 6; j++) { 34 | printf("%3d", sendarray[i][j]); 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | // TODO create datatype 41 | 42 | // Communicate with the datatype 43 | if (rank == 0) 44 | 45 | else if (rank == 1) 46 | 47 | 48 | // free datatype 49 | 50 | // TODO end 51 | 52 | if (rank == 1) { 53 | printf("Received data\n"); 54 | for (int i = 0; i < 8; i++) { 55 | for (int j = 0; j < 6; j++) { 56 | printf("%3d", recvarray[i][j]); 57 | } 58 | printf("\n"); 59 | } 60 | } 61 | 62 | MPI_Finalize(); 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /mpi/datatype-extent/solution/scatter.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer :: rank, ntasks, rc 6 | integer :: sendarray(8,6), recvarray(8,6) 7 | type(mpi_datatype) :: vector, vector2 8 | integer(kind=mpi_address_kind) :: extent, lb 9 | 10 | integer :: i, j 11 | 12 | call mpi_init(rc) 13 | call mpi_comm_rank(MPI_COMM_WORLD, rank, rc) 14 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 15 | 16 | ! initialize arrays 17 | recvarray = 0 18 | 19 | if (rank == 0) then 20 | sendarray = reshape([ ((i*10 + j, i=1,8), j=1,6) ], [8, 6] ) 21 | write(*,*) 'Original data' 22 | do i=1, 8 23 | write(*,'(*(I3))') sendarray(i, :) 24 | end do 25 | end if 26 | 27 | ! create datatype 28 | call mpi_type_vector(6, 1, 8, MPI_INTEGER, vector, rc); 29 | extent = storage_size(i) / 8 30 | lb = 0 31 | call mpi_type_create_resized(vector, lb, extent, vector2, rc) 32 | call mpi_type_commit(vector2, rc) 33 | 34 | call mpi_scatter(sendarray, 2, vector2, recvarray, 2, vector2, 0, MPI_COMM_WORLD, rc) 35 | 36 | call mpi_type_free(vector2, rc) 37 | 38 | if (rank == ntasks - 1) then 39 | write(*,*) 'Received data' 40 | do i=1, 8 41 | write(*,'(*(I3))') recvarray(i, :) 42 | end do 43 | end if 44 | 45 | call mpi_finalize(rc) 46 | 47 | 48 | 49 | end program datatype1 50 | -------------------------------------------------------------------------------- /mpi/datatype-extent/solution/scatter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank, ntasks; 7 | int sendarray[8][6]; 8 | int recvarray[8][6]; 9 | MPI_Datatype vector, vector2; 10 | 11 | 12 | MPI_Init(&argc, &argv); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 15 | 16 | // Initialize arrays 17 | if (rank == 0) { 18 | for (int i = 0; i < 8; i++) { 19 | for (int j = 0; j < 6; j++) { 20 | sendarray[i][j] = (i + 1) * 10 + j + 1; 21 | } 22 | } 23 | } 24 | 25 | for (int i = 0; i < 8; i++) { 26 | for (int j = 0; j < 6; j++) { 27 | recvarray[i][j] = 0; 28 | } 29 | } 30 | 31 | if (rank == 0) { 32 | printf("Data in rank 0\n"); 33 | for (int i = 0; i < 8; i++) { 34 | for (int j = 0; j < 6; j++) { 35 | printf("%3d", sendarray[i][j]); 36 | } 37 | printf("\n"); 38 | } 39 | } 40 | 41 | // Create datatype 42 | MPI_Type_vector(8, 1, 6, MPI_INT, &vector); 43 | MPI_Type_create_resized(vector, 0, sizeof(int), &vector2); 44 | MPI_Type_commit(&vector2); 45 | 46 | // Scatter columns 47 | MPI_Scatter(&sendarray[0][0], 1, vector2, &recvarray[0][0], 1, vector2, 0, MPI_COMM_WORLD); 48 | 49 | MPI_Type_free(&vector2); 50 | 51 | if (rank == ntasks - 1) { 52 | printf("Received data\n"); 53 | for (int i = 0; i < 8; i++) { 54 | for (int j = 0; j < 6; j++) { 55 | printf("%3d", recvarray[i][j]); 56 | } 57 | printf("\n"); 58 | } 59 | } 60 | 61 | MPI_Finalize(); 62 | 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /mpi/datatype-extent/solution/send-receive.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer :: rank, rc 6 | integer :: sendarray(8,6), recvarray(8,6) 7 | type(mpi_datatype) :: vector, vector2 8 | integer(kind=mpi_address_kind) :: extent, lb 9 | 10 | integer :: i, j 11 | 12 | call mpi_init(rc) 13 | call mpi_comm_rank(MPI_COMM_WORLD, rank, rc) 14 | 15 | ! initialize arrays 16 | recvarray = 0 17 | 18 | if (rank == 0) then 19 | sendarray = reshape([ ((i*10 + j, i=1,8), j=1,6) ], [8, 6] ) 20 | write(*,*) 'Original data' 21 | do i=1, 8 22 | write(*,'(*(I3))') sendarray(i, :) 23 | end do 24 | end if 25 | 26 | ! create datatype 27 | call mpi_type_vector(6, 1, 8, MPI_INTEGER, vector, rc); 28 | extent = storage_size(i) / 8 29 | lb = 0 30 | call mpi_type_create_resized(vector, lb, extent, vector2, rc) 31 | call mpi_type_commit(vector2, rc) 32 | 33 | ! Send two rows of a matrix 34 | if (rank == 0) then 35 | call mpi_send(sendarray, 2, vector2, 1, 0, MPI_COMM_WORLD, rc) 36 | else if (rank == 1) then 37 | call mpi_recv(recvarray, 2, vector2, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc) 38 | end if 39 | 40 | call mpi_type_free(vector2, rc) 41 | 42 | if (rank == 1) then 43 | write(*,*) 'Received data' 44 | do i=1, 8 45 | write(*,'(*(I3))') recvarray(i, :) 46 | end do 47 | end if 48 | 49 | call mpi_finalize(rc) 50 | 51 | 52 | 53 | end program datatype1 54 | -------------------------------------------------------------------------------- /mpi/datatype-extent/solution/send-receive.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank; 7 | int sendarray[8][6]; 8 | int recvarray[8][6]; 9 | MPI_Datatype vector, vector2; 10 | 11 | 12 | MPI_Init(&argc, &argv); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | 15 | // Initialize arrays 16 | if (rank == 0) { 17 | for (int i = 0; i < 8; i++) { 18 | for (int j = 0; j < 6; j++) { 19 | sendarray[i][j] = (i + 1) * 10 + j + 1; 20 | } 21 | } 22 | } 23 | 24 | for (int i = 0; i < 8; i++) { 25 | for (int j = 0; j < 6; j++) { 26 | recvarray[i][j] = 0; 27 | } 28 | } 29 | 30 | if (rank == 0) { 31 | printf("Data in rank 0\n"); 32 | for (int i = 0; i < 8; i++) { 33 | for (int j = 0; j < 6; j++) { 34 | printf("%3d", sendarray[i][j]); 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | // Create datatype 41 | MPI_Type_vector(8, 1, 6, MPI_INT, &vector); 42 | MPI_Type_create_resized(vector, 0, sizeof(int), &vector2); 43 | MPI_Type_commit(&vector2); 44 | 45 | // Send two columns of matrix 46 | if (rank == 0) 47 | MPI_Send(&sendarray[0][0], 2, vector2, 1, 0, MPI_COMM_WORLD); 48 | else if (rank == 1) 49 | MPI_Recv(&recvarray[0][0], 2, vector2, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 50 | 51 | MPI_Type_free(&vector2); 52 | 53 | if (rank == 1) { 54 | printf("Received data\n"); 55 | for (int i = 0; i < 8; i++) { 56 | for (int j = 0; j < 6; j++) { 57 | printf("%3d", recvarray[i][j]); 58 | } 59 | printf("\n"); 60 | } 61 | } 62 | 63 | MPI_Finalize(); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /mpi/heat-equation/README_2d.md: -------------------------------------------------------------------------------- 1 | ## Heat equation decomposed in two dimensions 2 | 3 | Note: this is very advanced exercise. It is meant more for illustration what can be done 4 | with derived datatypes, and as something one might want to study after the course, rather than 5 | completing during the course. 6 | 7 | If you are not familiar with the two dimensional heat equation, please have a look 8 | for [basic description](https://github.com/csc-training/mpi-introduction/tree/main/heat-equation) 9 | in "Parallel programming with MPI" exercise material. 10 | 11 | Here, starting point is a working code parallelized over columns (in Fortran) or rows (in C/C++). 12 | 13 | Before starting with this exercise, it is recommended that you have 14 | the [usage of Cartesian communicator](README_cartesian.md) implemented. 15 | 16 | 1. Modify the creation of Cartesian communicator so that the 17 | decomposition is done in two dimensions, and determine all four 18 | neighbors (up, down, left, right). 19 | 20 | 2. As the rows (in Fortran) or columns (in C/C++) are not contiguous 21 | in the computer memory, one needs to use user-defined datatypes 22 | when communicating in the `exchange()` routine in [cpp/core.cpp](cpp/core.cpp) 23 | or [fortran/core.F90](fortran/core.F90). In order to make code more 24 | symmetric, one can utilize derived type also for the contiguous 25 | dimension. Create required datatypes (it is recommended to store 26 | them as attributes in "parallel" data structure). 27 | 3. Perform the halo exchange with `MPI_Neighbor_alltoallw`. Together 28 | with the user defined datatypes, no temporary buffers are needed in 29 | the user code. In order to use `MPI_Neighbor_alltoallw`, you need 30 | to determine the correct `displacements` both in sending and 31 | receiving. 32 | 4. In the base version, the I/O routines `write_field` and 33 | `read_field` (in [cpp/core.cpp](cpp/io.cpp) or 34 | [fortran/core.F90](fortran/io.F90)) 35 | use temporary buffers for communication. Create appropriate 36 | datatype and utilize it in I/O related communication. Note that you 37 | need also the coordinates of processes in the cartesian grid in 38 | order to read from / write to the correct part of the global 39 | temperature field. 40 | -------------------------------------------------------------------------------- /mpi/heat-equation/README_cartesian.md: -------------------------------------------------------------------------------- 1 | ## Heat equation solver with Cartesian communicator 2 | 3 | If you are not familiar with the two dimensional heat equation, please have a look 4 | for [basic description](https://github.com/csc-training/mpi-introduction/tree/main/heat-equation) 5 | in "Parallel programming with MPI" exercise material. 6 | 7 | Here, starting point is a working code parallelized over columns (in Fortran) or rows (in C/C++). 8 | 9 | The current version uses only MPI_COMM_WORLD, and neighboring process are determined manually. 10 | 11 | 1. Add a "communicator" attribute to the basic parallelization data structure (`type :: parallel_data` in [fortran/heat_mod.F90](fortran/heat_mod.F90) or class `ParallelData` in [cpp/heat.hpp](cpp/heat.hpp)) 12 | 2. Create the Cartesian communicator in the routine `parallel_setup()` (Fortran) or in the 13 | the `ParallelData()` constructor (C++), and use `MPI_Cart_shift` for determining the 14 | neighboring processes 15 | 3. Use the Cartesian communicator in all communication routines 16 | 17 | To build the code, please use the provided `Makefile` (by typing `make`). By default, Intel 18 | compiler is used, in order to use gcc type `make COMP=gnu`. 19 | -------------------------------------------------------------------------------- /mpi/heat-equation/README_neighbor.md: -------------------------------------------------------------------------------- 1 | ## Heat equation solver with neighborhood collectives 2 | 3 | If you are not familiar with the two dimensional heat equation, please have a look 4 | for [basic description](https://github.com/csc-training/mpi-introduction/tree/main/heat-equation) 5 | in "Parallel programming with MPI" exercise material. 6 | 7 | Here, starting point is a working code parallelized over columns (in Fortran) or rows (in C/C++). 8 | 9 | Implement the halo exchange in the `exchange()` routine in [cpp/core.cpp](cpp/core.cpp) 10 | or [fortran/core.F90](fortran/core.F90) with `MPI_Neighbor_alltoall`. In order to proceed, 11 | you need to have the [usage of Cartesian communicator](README_cartesian.md) implemented. 12 | 13 | 1. Before start of the communication, the values at the real domain boundaries (*i.e* not the 14 | in the ghost region) need to be copied into a send buffer in the correct order. You may 15 | allocate and deallocate the send and receive buffers within the `exchange` routine, or as 16 | recommended as part of the "parallel" data structure. 17 | 18 | 2. Perform the halo exchange with a single call to `MPI_Neighbor_alltoall`. 19 | 20 | 3. As the boundary values of neighboring processes are in the receive buffer, values need to be 21 | copied to the ghost region. 22 | 23 | To build the code, please use the provided `Makefile` (by typing `make`). By default, Intel 24 | compiler is used, in order to use gcc type `make COMP=gnu`. 25 | -------------------------------------------------------------------------------- /mpi/heat-equation/common/pngwriter.h: -------------------------------------------------------------------------------- 1 | #ifndef PNGWRITER_H_ 2 | #define PNGWRITER_H_ 3 | 4 | #if __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int save_png(double *data, const int nx, const int ny, const char *fname, 9 | const char lang); 10 | 11 | #if __cplusplus 12 | } 13 | #endif 14 | #endif 15 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(COMP),) 2 | COMP=intel 3 | endif 4 | 5 | COMMONDIR=../common 6 | 7 | ifeq ($(COMP),gnu) 8 | CXX=mpicxx 9 | CC=gcc 10 | CCFLAGS=-g -O3 -march=native -Wall -I$(COMMONDIR) -DNDEBUG 11 | LDFLAGS= 12 | LIBS=-lpng 13 | endif 14 | 15 | ifeq ($(COMP),intel) 16 | CXX=mpicxx 17 | CC=icc 18 | CCFLAGS=-g -O3 -xHost -I$(COMMONDIR) -DNDEBUG 19 | LDFLAGS= 20 | LIBS=-lpng 21 | endif 22 | 23 | EXE=heat_mpi 24 | OBJS=main.o heat.o core.o setup.o io.o utilities.o 25 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 26 | 27 | 28 | all: $(EXE) 29 | 30 | 31 | utilities.o: utilities.cpp heat.hpp 32 | heat.o: heat.cpp heat.hpp matrix.hpp 33 | core.o: core.cpp heat.hpp 34 | setup.o: setup.cpp heat.hpp 35 | io.o: io.cpp heat.hpp matrix.hpp 36 | main.o: main.cpp heat.hpp 37 | 38 | $(OBJS_PNG): C_COMPILER := $(CC) 39 | $(OBJS): C_COMPILER := $(CXX) 40 | 41 | $(EXE): $(OBJS) $(OBJS_PNG) 42 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 43 | 44 | %.o: %.cpp 45 | $(CXX) $(CCFLAGS) -c $< -o $@ 46 | 47 | %.o: %.c 48 | $(CC) $(CCFLAGS) -c $< -o $@ 49 | 50 | .PHONY: clean 51 | clean: 52 | -/bin/rm -f $(EXE) a.out *.o *.png *~ 53 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/core.cpp: -------------------------------------------------------------------------------- 1 | // Main solver routines for heat equation solver 2 | 3 | #include 4 | 5 | #include "heat.hpp" 6 | 7 | // Exchange the boundary values 8 | void exchange(Field& field, const ParallelData parallel) 9 | { 10 | 11 | // Send to up, receive from down 12 | double* sbuf = field.temperature.data(1, 0); 13 | double* rbuf = field.temperature.data(field.nx + 1, 0); 14 | MPI_Sendrecv(sbuf, field.ny + 2, MPI_DOUBLE, 15 | parallel.nup, 11, 16 | rbuf, field.ny + 2, MPI_DOUBLE, 17 | parallel.ndown, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 18 | 19 | // Send to down, receive from up 20 | sbuf = field.temperature.data(field.nx, 0); 21 | rbuf = field.temperature.data(); 22 | MPI_Sendrecv(sbuf, field.ny + 2, MPI_DOUBLE, 23 | parallel.ndown, 12, 24 | rbuf, field.ny + 2, MPI_DOUBLE, 25 | parallel.nup, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 26 | 27 | } 28 | 29 | // Update the temperature values using five-point stencil */ 30 | void evolve(Field& curr, const Field& prev, const double a, const double dt) 31 | { 32 | 33 | // Compilers do not necessarily optimize division to multiplication, so make it explicit 34 | auto inv_dx2 = 1.0 / (prev.dx * prev.dx); 35 | auto inv_dy2 = 1.0 / (prev.dy * prev.dy); 36 | 37 | // Determine the temperature field at next time step 38 | // As we have fixed boundary conditions, the outermost gridpoints 39 | // are not updated. 40 | for (int i = 1; i < curr.nx + 1; i++) { 41 | for (int j = 1; j < curr.ny + 1; j++) { 42 | curr(i, j) = prev(i, j) + a * dt * ( 43 | ( prev(i + 1, j) - 2.0 * prev(i, j) + prev(i - 1, j) ) * inv_dx2 + 44 | ( prev(i, j + 1) - 2.0 * prev(i, j) + prev(i, j - 1) ) * inv_dy2 45 | ); 46 | } 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/heat.cpp: -------------------------------------------------------------------------------- 1 | #include "heat.hpp" 2 | #include "matrix.hpp" 3 | #include 4 | #include 5 | 6 | void Field::setup(int nx_in, int ny_in, ParallelData parallel) 7 | { 8 | nx_full = nx_in; 9 | ny_full = ny_in; 10 | 11 | nx = nx_full / parallel.size; 12 | if (nx * parallel.size != nx_full) { 13 | std::cout << "Cannot divide grid evenly to processors" << std::endl; 14 | MPI_Abort(MPI_COMM_WORLD, -2); 15 | } 16 | ny = ny_full; 17 | 18 | // matrix includes also ghost layers 19 | temperature = Matrix (nx + 2, ny + 2); 20 | } 21 | 22 | void Field::generate(ParallelData parallel) { 23 | 24 | // Radius of the source disc 25 | auto radius = nx_full / 6.0; 26 | for (int i = 0; i < nx + 2; i++) { 27 | for (int j = 0; j < ny + 2; j++) { 28 | // Distance of point i, j from the origin 29 | auto dx = i + parallel.rank * nx - nx_full / 2 + 1; 30 | auto dy = j - ny / 2 + 1; 31 | if (dx * dx + dy * dy < radius * radius) { 32 | temperature(i, j) = 5.0; 33 | } else { 34 | temperature(i, j) = 65.0; 35 | } 36 | } 37 | } 38 | 39 | // Boundary conditions 40 | for (int i = 0; i < nx + 2; i++) { 41 | // Left 42 | temperature(i, 0) = 20.0; 43 | // Right 44 | temperature(i, ny + 1) = 70.0; 45 | } 46 | 47 | // Top 48 | if (0 == parallel.rank) { 49 | for (int j = 0; j < ny + 2; j++) { 50 | temperature(0, j) = 85.0; 51 | } 52 | } 53 | // Bottom 54 | if (parallel.rank == parallel.size - 1) { 55 | for (int j = 0; j < ny + 2; j++) { 56 | temperature(nx + 1, j) = 5.0; 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/heat.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "matrix.hpp" 4 | #include 5 | 6 | // Class for basic parallelization information 7 | struct ParallelData { 8 | int size; // Number of MPI tasks 9 | int rank; 10 | int nup, ndown; // Ranks of neighbouring MPI tasks 11 | 12 | ParallelData() { // Constructor 13 | 14 | MPI_Comm_size(MPI_COMM_WORLD, &size); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | 17 | nup = rank - 1; 18 | ndown = rank + 1; 19 | 20 | if (nup < 0) { 21 | nup = MPI_PROC_NULL; 22 | } 23 | if (ndown > size - 1) { 24 | ndown = MPI_PROC_NULL; 25 | } 26 | }; 27 | 28 | }; 29 | 30 | // Class for temperature field 31 | struct Field { 32 | // nx and ny are the true dimensions of the field. The temperature matrix 33 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 34 | int nx; // Local dimensions of the field 35 | int ny; 36 | int nx_full; // Global dimensions of the field 37 | int ny_full; // Global dimensions of the field 38 | double dx = 0.01; // Grid spacing 39 | double dy = 0.01; 40 | 41 | Matrix temperature; 42 | 43 | void setup(int nx_in, int ny_in, ParallelData parallel); 44 | 45 | void generate(ParallelData parallel); 46 | 47 | // standard (i,j) syntax for setting elements 48 | double& operator()(int i, int j) {return temperature(i, j);} 49 | 50 | // standard (i,j) syntax for getting elements 51 | const double& operator()(int i, int j) const {return temperature(i, j);} 52 | 53 | }; 54 | 55 | // Function declarations 56 | void initialize(int argc, char *argv[], Field& current, 57 | Field& previous, int& nsteps, ParallelData parallel); 58 | 59 | void exchange(Field& field, const ParallelData parallel); 60 | 61 | void evolve(Field& curr, const Field& prev, const double a, const double dt); 62 | 63 | void write_field(const Field& field, const int iter, const ParallelData parallel); 64 | 65 | void read_field(Field& field, std::string filename, 66 | const ParallelData parallel); 67 | 68 | double average(const Field& field, const ParallelData parallel); 69 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/matrix.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | // Generic 2D matrix array class. 6 | // 7 | // Internally data is stored in 1D vector but is 8 | // accessed using index function that maps i and j 9 | // indices to an element in the flat data vector. 10 | // Row major storage is used 11 | // For easier usage, we overload parentheses () operator 12 | // for accessing matrix elements in the usual (i,j) 13 | // format. 14 | 15 | template 16 | class Matrix 17 | { 18 | 19 | private: 20 | 21 | // Internal storage 22 | std::vector _data; 23 | 24 | // Internal 1D indexing 25 | int indx(int i, int j) const { 26 | //assert that indices are reasonable 27 | assert(i >= 0 && i < nx); 28 | assert(j >= 0 && j < ny); 29 | 30 | return i * ny + j; 31 | } 32 | 33 | public: 34 | 35 | // matrix dimensions 36 | int nx, ny; 37 | 38 | // Default constructor 39 | Matrix() = default; 40 | // Allocate at the time of construction 41 | Matrix(int nx, int ny) : nx(nx), ny(ny) { 42 | _data.resize(nx * ny); 43 | }; 44 | 45 | void allocate(int nx_in, int ny_in) { 46 | nx = nx_in; 47 | ny = ny_in; 48 | _data.resize(nx * ny); 49 | }; 50 | 51 | // standard (i,j) syntax for setting elements 52 | T& operator()(int i, int j) { 53 | return _data[ indx(i, j) ]; 54 | } 55 | 56 | // standard (i,j) syntax for getting elements 57 | const T& operator()(int i, int j) const { 58 | return _data[ indx(i, j) ]; 59 | } 60 | 61 | // provide possibility to get raw pointer for data at index (i,j) (needed for MPI) 62 | T* data(int i=0, int j=0) {return _data.data() + i * ny + j;} 63 | 64 | }; 65 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/setup.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "heat.hpp" 5 | 6 | 7 | void initialize(int argc, char *argv[], Field& current, 8 | Field& previous, int& nsteps, ParallelData parallel) 9 | { 10 | /* 11 | * Following combinations of command line arguments are possible: 12 | * No arguments: use default field dimensions and number of time steps 13 | * One argument: read initial field from a given file 14 | * Two arguments: initial field from file and number of time steps 15 | * Three arguments: field dimensions (rows,cols) and number of time steps 16 | */ 17 | 18 | 19 | int rows = 2000; //!< Field dimensions with default values 20 | int cols = 2000; 21 | 22 | std::string input_file; //!< Name of the optional input file 23 | 24 | bool read_file = 0; 25 | 26 | nsteps = 500; 27 | 28 | switch (argc) { 29 | case 1: 30 | /* Use default values */ 31 | break; 32 | case 2: 33 | /* Read initial field from a file */ 34 | input_file = argv[1]; 35 | read_file = true; 36 | break; 37 | case 3: 38 | /* Read initial field from a file */ 39 | input_file = argv[1]; 40 | read_file = true; 41 | 42 | /* Number of time steps */ 43 | nsteps = std::atoi(argv[2]); 44 | break; 45 | case 4: 46 | /* Field dimensions */ 47 | rows = std::atoi(argv[1]); 48 | cols = std::atoi(argv[2]); 49 | /* Number of time steps */ 50 | nsteps = std::atoi(argv[3]); 51 | break; 52 | default: 53 | std::cout << "Unsupported number of command line arguments" << std::endl; 54 | exit(-1); 55 | } 56 | 57 | if (read_file) { 58 | if (0 == parallel.rank) 59 | std::cout << "Reading input from " + input_file << std::endl; 60 | read_field(current, input_file, parallel); 61 | } else { 62 | current.setup(rows, cols, parallel); 63 | current.generate(parallel); 64 | } 65 | 66 | // copy "current" field also to "previous" 67 | previous = current; 68 | 69 | } 70 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/solution/Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(COMP),) 2 | COMP=intel 3 | endif 4 | 5 | COMMONDIR=../../common 6 | 7 | ifeq ($(COMP),gnu) 8 | CXX=mpicxx 9 | CC=gcc 10 | CCFLAGS=-g -O3 -march=native -Wall -I$(COMMONDIR) -DNDEBUG 11 | LDFLAGS= 12 | LIBS=-lpng 13 | endif 14 | 15 | ifeq ($(COMP),intel) 16 | CXX=mpicxx 17 | CC=icc 18 | CCFLAGS=-g -O3 -xHost -I$(COMMONDIR) -DNDEBUG 19 | LDFLAGS= 20 | LIBS=-lpng 21 | endif 22 | 23 | EXE=heat_mpi 24 | OBJS=main.o heat.o core.o setup.o io.o utilities.o 25 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 26 | 27 | 28 | all: $(EXE) 29 | 30 | 31 | utilities.o: utilities.cpp heat.hpp 32 | heat.o: heat.cpp heat.hpp matrix.hpp 33 | core.o: core.cpp heat.hpp 34 | setup.o: setup.cpp heat.hpp 35 | io.o: io.cpp heat.hpp matrix.hpp 36 | main.o: main.cpp heat.hpp 37 | 38 | $(OBJS_PNG): C_COMPILER := $(CC) 39 | $(OBJS): C_COMPILER := $(CXX) 40 | 41 | $(EXE): $(OBJS) $(OBJS_PNG) 42 | $(CXX) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 43 | 44 | %.o: %.cpp 45 | $(CXX) $(CCFLAGS) -c $< -o $@ 46 | 47 | %.o: %.c 48 | $(CC) $(CCFLAGS) -c $< -o $@ 49 | 50 | .PHONY: clean 51 | clean: 52 | -/bin/rm -f $(EXE) a.out *.o *.png *~ 53 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/solution/core.cpp: -------------------------------------------------------------------------------- 1 | // Main solver routines for heat equation solver 2 | 3 | #include 4 | 5 | #include "heat.hpp" 6 | 7 | // Exchange the boundary values 8 | void exchange(Field& field, const ParallelData parallel) 9 | { 10 | 11 | MPI_Datatype types[4] = {parallel.rowtype, parallel.rowtype, 12 | parallel.columntype, parallel.columntype}; 13 | int counts[4] = {1, 1, 1, 1}; 14 | MPI_Aint sdisps[4], rdisps[4], disp0; 15 | 16 | // Determine displacements 17 | disp0 = reinterpret_cast (field.temperature.data(0, 0)); 18 | sdisps[0] = reinterpret_cast (field.temperature.data(1, 0)); // Second row 19 | sdisps[1] = reinterpret_cast (field.temperature.data(field.nx, 0)); // Second last row 20 | sdisps[2] = reinterpret_cast (field.temperature.data(0, 1)); // Second column 21 | sdisps[3] = reinterpret_cast (field.temperature.data(0, field.ny)); // Second last column 22 | 23 | rdisps[0] = reinterpret_cast (field.temperature.data(0, 0)); // First row 24 | rdisps[1] = reinterpret_cast (field.temperature.data(field.nx + 1, 0)); // Last row 25 | rdisps[2] = reinterpret_cast (field.temperature.data(0, 0)); // First column 26 | rdisps[3] = reinterpret_cast (field.temperature.data(0, field.ny + 1)); // Last column 27 | 28 | for (int i=0; i < 4; i++) { 29 | sdisps[i] -= disp0; 30 | rdisps[i] -= disp0; 31 | } 32 | 33 | MPI_Neighbor_alltoallw(field.temperature.data(), counts, sdisps, types, 34 | field.temperature.data(), counts, rdisps, types, 35 | parallel.comm); 36 | } 37 | 38 | // Update the temperature values using five-point stencil */ 39 | void evolve(Field& curr, const Field& prev, const double a, const double dt) 40 | { 41 | 42 | // Compilers do not necessarily optimize division to multiplication, so make it explicit 43 | auto inv_dx2 = 1.0 / (prev.dx * prev.dx); 44 | auto inv_dy2 = 1.0 / (prev.dy * prev.dy); 45 | 46 | // Determine the temperature field at next time step 47 | // As we have fixed boundary conditions, the outermost gridpoints 48 | // are not updated. 49 | for (int i = 1; i < curr.nx + 1; i++) { 50 | for (int j = 1; j < curr.ny + 1; j++) { 51 | curr(i, j) = prev(i, j) + a * dt * ( 52 | ( prev(i + 1, j) - 2.0 * prev(i, j) + prev(i - 1, j) ) * inv_dx2 + 53 | ( prev(i, j + 1) - 2.0 * prev(i, j) + prev(i, j - 1) ) * inv_dy2 54 | ); 55 | } 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/solution/heat.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "matrix.hpp" 4 | #include 5 | 6 | // Class for basic parallelization information 7 | struct ParallelData { 8 | int size; // Number of MPI tasks 9 | int rank; 10 | int nghbrs[2][2]; // Ranks of neighbouring MPI tasks 11 | MPI_Datatype rowtype, columntype, subarraytype; 12 | MPI_Comm comm; 13 | 14 | ParallelData() { // Constructor 15 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); 17 | 18 | constexpr int ndims = 2; 19 | int dims[ndims] = {0, 0}; 20 | int periods[ndims] = {0, 0}; 21 | 22 | MPI_Dims_create(size, ndims, dims); 23 | MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &comm); 24 | MPI_Comm_rank(comm, &rank); 25 | 26 | // Determine neighbors 27 | for (int i=0; i < ndims; i++) 28 | MPI_Cart_shift(comm, i, 1, &nghbrs[i][0], &nghbrs[i][1]); 29 | }; 30 | 31 | }; 32 | 33 | // Class for temperature field 34 | struct Field { 35 | // nx and ny are the true dimensions of the field. The temperature matrix 36 | // contains also ghost layers, so it will have dimensions nx+2 x ny+2 37 | int nx; // Local dimensions of the field 38 | int ny; 39 | int nx_full; // Global dimensions of the field 40 | int ny_full; // Global dimensions of the field 41 | double dx = 0.01; // Grid spacing 42 | double dy = 0.01; 43 | 44 | Matrix temperature; 45 | 46 | void setup(int nx_in, int ny_in, ParallelData& parallel); 47 | 48 | void generate(ParallelData parallel); 49 | 50 | // standard (i,j) syntax for setting elements 51 | double& operator()(int i, int j) {return temperature(i, j);} 52 | 53 | // standard (i,j) syntax for getting elements 54 | const double& operator()(int i, int j) const {return temperature(i, j);} 55 | 56 | }; 57 | 58 | // Function declarations 59 | void initialize(int argc, char *argv[], Field& current, 60 | Field& previous, int& nsteps, ParallelData& parallel); 61 | 62 | void exchange(Field& field, const ParallelData parallel); 63 | 64 | void evolve(Field& curr, const Field& prev, const double a, const double dt); 65 | 66 | void write_field(Field& field, const int iter, const ParallelData parallel); 67 | 68 | void read_field(Field& field, std::string filename, 69 | ParallelData& parallel); 70 | 71 | double average(const Field& field, const ParallelData parallel); 72 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/solution/matrix.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | // Generic 2D matrix array class. 6 | // 7 | // Internally data is stored in 1D vector but is 8 | // accessed using index function that maps i and j 9 | // indices to an element in the flat data vector. 10 | // Row major storage is used 11 | // For easier usage, we overload parentheses () operator 12 | // for accessing matrix elements in the usual (i,j) 13 | // format. 14 | 15 | template 16 | class Matrix 17 | { 18 | 19 | private: 20 | 21 | // Internal storage 22 | std::vector _data; 23 | 24 | // Internal 1D indexing 25 | int indx(int i, int j) const { 26 | //assert that indices are reasonable 27 | assert(i >= 0 && i < nx); 28 | assert(j >= 0 && j < ny); 29 | 30 | return i * ny + j; 31 | } 32 | 33 | public: 34 | 35 | // matrix dimensions 36 | int nx, ny; 37 | 38 | // Default constructor 39 | Matrix() = default; 40 | // Allocate at the time of construction 41 | Matrix(int nx, int ny) : nx(nx), ny(ny) { 42 | _data.resize(nx * ny); 43 | }; 44 | 45 | void allocate(int nx_in, int ny_in) { 46 | nx = nx_in; 47 | ny = ny_in; 48 | _data.resize(nx * ny); 49 | }; 50 | 51 | // standard (i,j) syntax for setting elements 52 | T& operator()(int i, int j) { 53 | return _data[ indx(i, j) ]; 54 | } 55 | 56 | // standard (i,j) syntax for getting elements 57 | const T& operator()(int i, int j) const { 58 | return _data[ indx(i, j) ]; 59 | } 60 | 61 | // provide possibility to get raw pointer for data at index (i,j) (needed for MPI) 62 | T* data(int i=0, int j=0) {return _data.data() + i * ny + j;} 63 | 64 | }; 65 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/solution/setup.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "heat.hpp" 5 | 6 | 7 | void initialize(int argc, char *argv[], Field& current, 8 | Field& previous, int& nsteps, ParallelData ¶llel) 9 | { 10 | /* 11 | * Following combinations of command line arguments are possible: 12 | * No arguments: use default field dimensions and number of time steps 13 | * One argument: read initial field from a given file 14 | * Two arguments: initial field from file and number of time steps 15 | * Three arguments: field dimensions (rows,cols) and number of time steps 16 | */ 17 | 18 | 19 | int rows = 2000; //!< Field dimensions with default values 20 | int cols = 2000; 21 | 22 | std::string input_file; //!< Name of the optional input file 23 | 24 | bool read_file = 0; 25 | 26 | nsteps = 500; 27 | 28 | switch (argc) { 29 | case 1: 30 | /* Use default values */ 31 | break; 32 | case 2: 33 | /* Read initial field from a file */ 34 | input_file = argv[1]; 35 | read_file = true; 36 | break; 37 | case 3: 38 | /* Read initial field from a file */ 39 | input_file = argv[1]; 40 | read_file = true; 41 | 42 | /* Number of time steps */ 43 | nsteps = std::atoi(argv[2]); 44 | break; 45 | case 4: 46 | /* Field dimensions */ 47 | rows = std::atoi(argv[1]); 48 | cols = std::atoi(argv[2]); 49 | /* Number of time steps */ 50 | nsteps = std::atoi(argv[3]); 51 | break; 52 | default: 53 | std::cout << "Unsupported number of command line arguments" << std::endl; 54 | exit(-1); 55 | } 56 | 57 | if (read_file) { 58 | if (0 == parallel.rank) 59 | std::cout << "Reading input from " + input_file << std::endl; 60 | read_field(current, input_file, parallel); 61 | } else { 62 | current.setup(rows, cols, parallel); 63 | current.generate(parallel); 64 | } 65 | 66 | // copy "current" field also to "previous" 67 | previous = current; 68 | 69 | } 70 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/solution/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Utility functions for heat equation solver 2 | // NOTE: This file does not need to be edited! 3 | 4 | #include 5 | 6 | #include "heat.hpp" 7 | 8 | // Calculate average temperature 9 | double average(const Field& field, const ParallelData parallel) 10 | { 11 | double local_average = 0.0; 12 | double average = 0.0; 13 | 14 | for (int i = 1; i < field.nx + 1; i++) { 15 | for (int j = 1; j < field.ny + 1; j++) { 16 | local_average += field.temperature(i, j); 17 | } 18 | } 19 | 20 | MPI_Allreduce(&local_average, &average, 1, MPI_DOUBLE, MPI_SUM, 21 | parallel.comm); 22 | average /= (field.nx_full * field.ny_full); 23 | return average; 24 | } 25 | -------------------------------------------------------------------------------- /mpi/heat-equation/cpp/utilities.cpp: -------------------------------------------------------------------------------- 1 | // Utility functions for heat equation solver 2 | // NOTE: This file does not need to be edited! 3 | 4 | #include 5 | 6 | #include "heat.hpp" 7 | 8 | // Calculate average temperature 9 | double average(const Field& field, const ParallelData parallel) 10 | { 11 | double local_average = 0.0; 12 | double average = 0.0; 13 | 14 | for (int i = 1; i < field.nx + 1; i++) { 15 | for (int j = 1; j < field.ny + 1; j++) { 16 | local_average += field.temperature(i, j); 17 | } 18 | } 19 | 20 | MPI_Allreduce(&local_average, &average, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 21 | average /= (field.nx_full * field.ny_full); 22 | return average; 23 | } 24 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(COMP),) 2 | COMP=intel 3 | endif 4 | 5 | COMMONDIR=../common 6 | 7 | ifeq ($(COMP),intel) 8 | FC=mpif90 9 | CC=icc 10 | FCFLAGS=-O2 -xHost 11 | CCFLAGS=-O2 -xHost -I$(COMMONDIR) 12 | LDFLAGS= 13 | LIBS=-lpng 14 | endif 15 | 16 | ifeq ($(COMP),gnu) 17 | FC=mpif90 18 | CC=gcc 19 | FCFLAGS=-O3 -Wall -march=native 20 | CCFLAGS=-O3 -Wall -march=native -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS=-lpng 23 | endif 24 | 25 | EXE=heat_mpi 26 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 27 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 28 | 29 | 30 | all: $(EXE) 31 | 32 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 33 | core.o: core.F90 heat_mod.o 34 | utilities.o: utilities.F90 heat_mod.o 35 | io.o: io.F90 heat_mod.o pngwriter_mod.o 36 | setup.o: setup.F90 heat_mod.o utilities.o io.o 37 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 38 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 39 | 40 | $(EXE): $(OBJS) $(OBJS_PNG) 41 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 42 | 43 | %.o: %.F90 44 | $(FC) $(FCFLAGS) -c $< -o $@ 45 | 46 | %.o: %.c 47 | $(CC) $(CCFLAGS) -c $< -o $@ 48 | 49 | .PHONY: clean 50 | clean: 51 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ 52 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Main solver routines for heat equation solver 2 | module core 3 | use heat 4 | 5 | contains 6 | 7 | ! Exchange the boundary data between MPI tasks 8 | subroutine exchange(field0, parallel) 9 | 10 | implicit none 11 | 12 | type(field), intent(inout) :: field0 13 | type(parallel_data), intent(in) :: parallel 14 | 15 | integer :: ierr 16 | 17 | ! Send to left, receive from right 18 | call mpi_sendrecv(field0%data(:, 1), field0%nx + 2, MPI_DOUBLE_PRECISION, & 19 | & parallel%nleft, 11, & 20 | & field0%data(:, field0%ny + 1), field0%nx + 2, MPI_DOUBLE_PRECISION, & 21 | & parallel%nright, 11, & 22 | & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) 23 | 24 | ! Send to right, receive from left 25 | call mpi_sendrecv(field0%data(:, field0%ny), field0%nx + 2, MPI_DOUBLE_PRECISION, & 26 | & parallel%nright, 12, & 27 | & field0%data(:, 0), field0%nx + 2, MPI_DOUBLE_PRECISION,& 28 | & parallel%nleft, 12, & 29 | & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) 30 | 31 | end subroutine exchange 32 | 33 | ! Compute one time step of temperature evolution 34 | ! Arguments: 35 | ! curr (type(field)): current temperature values 36 | ! prev (type(field)): values from previous time step 37 | ! a (real(dp)): update equation constant 38 | ! dt (real(dp)): time step value 39 | subroutine evolve(curr, prev, a, dt) 40 | 41 | implicit none 42 | 43 | type(field), intent(inout) :: curr, prev 44 | real(dp) :: a, dt 45 | integer :: i, j, nx, ny 46 | 47 | nx = curr%nx 48 | ny = curr%ny 49 | 50 | do j = 1, ny 51 | do i = 1, nx 52 | curr%data(i, j) = prev%data(i, j) + a * dt * & 53 | & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + & 54 | & prev%data(i+1, j)) / curr%dx**2 + & 55 | & (prev%data(i, j-1) - 2.0 * prev%data(i, j) + & 56 | & prev%data(i, j+1)) / curr%dy**2) 57 | end do 58 | end do 59 | end subroutine evolve 60 | 61 | end module core 62 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Field metadata for heat equation solver 2 | module heat 3 | use mpi_f08 4 | use iso_fortran_env, only : REAL64 5 | 6 | implicit none 7 | 8 | integer, parameter :: dp = REAL64 9 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 10 | 11 | type :: field 12 | integer :: nx ! local dimension of the field 13 | integer :: ny 14 | integer :: nx_full ! global dimension of the field 15 | integer :: ny_full 16 | real(dp) :: dx 17 | real(dp) :: dy 18 | real(dp), dimension(:,:), allocatable :: data 19 | end type field 20 | 21 | type :: parallel_data 22 | integer :: size 23 | integer :: rank 24 | integer :: nleft, nright ! Ranks of neighbouring MPI tasks 25 | end type parallel_data 26 | 27 | contains 28 | ! Initialize the field type metadata 29 | ! Arguments: 30 | ! field0 (type(field)): input field 31 | ! nx, ny, dx, dy: field dimensions and spatial step size 32 | subroutine set_field_dimensions(field0, nx, ny, parallel) 33 | implicit none 34 | 35 | type(field), intent(out) :: field0 36 | integer, intent(in) :: nx, ny 37 | type(parallel_data), intent(in) :: parallel 38 | 39 | integer :: nx_local, ny_local 40 | 41 | nx_local = nx 42 | ny_local = ny / parallel%size 43 | 44 | field0%dx = DX 45 | field0%dy = DY 46 | field0%nx = nx_local 47 | field0%ny = ny_local 48 | field0%nx_full = nx 49 | field0%ny_full = ny 50 | 51 | end subroutine set_field_dimensions 52 | 53 | subroutine parallel_setup(parallel, nx, ny) 54 | use mpi 55 | 56 | implicit none 57 | 58 | type(parallel_data), intent(out) :: parallel 59 | integer, intent(in), optional :: nx, ny 60 | 61 | integer :: ny_local 62 | integer :: ierr 63 | 64 | call mpi_comm_size(MPI_COMM_WORLD, parallel%size, ierr) 65 | 66 | if (present(ny)) then 67 | ny_local = ny / parallel%size 68 | if (ny_local * parallel%size /= ny) then 69 | write(*,*) 'Cannot divide grid evenly to processors' 70 | call mpi_abort(MPI_COMM_WORLD, -2, ierr) 71 | end if 72 | end if 73 | 74 | call mpi_comm_rank(MPI_COMM_WORLD, parallel%rank, ierr) 75 | 76 | parallel%nleft = parallel%rank - 1 77 | parallel%nright = parallel%rank + 1 78 | 79 | if (parallel%nleft < 0) then 80 | parallel%nleft = MPI_PROC_NULL 81 | end if 82 | if (parallel%nright > parallel%size - 1) then 83 | parallel%nright = MPI_PROC_NULL 84 | end if 85 | 86 | end subroutine parallel_setup 87 | 88 | end module heat 89 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Heat equation solver in 2D. 2 | 3 | program heat_solve 4 | use heat 5 | use core 6 | use io 7 | use setup 8 | use utilities 9 | 10 | implicit none 11 | 12 | real(dp), parameter :: a = 0.5 ! Diffusion constant 13 | type(field) :: current, previous ! Current and previus temperature fields 14 | 15 | real(dp) :: dt ! Time step 16 | integer :: nsteps ! Number of time steps 17 | integer, parameter :: image_interval = 100 ! Image output interval 18 | 19 | type(parallel_data) :: parallelization 20 | integer :: ierr 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call mpi_init(ierr) 29 | 30 | call initialize(current, previous, nsteps, parallelization) 31 | 32 | ! Draw the picture of the initial state 33 | call write_field(current, 0, parallelization) 34 | 35 | average_temp = average(current, parallelization) 36 | if (parallelization % rank == 0) then 37 | write(*,'(A, I5, A, I5, A, I5)') 'Simulation grid: ', current%nx_full, ' x ', & 38 | & current%ny_full, ' time steps: ', nsteps 39 | write(*,'(A, I5)') 'MPI processes: ', parallelization%size 40 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 41 | end if 42 | 43 | ! Largest stable time step 44 | dt = current%dx**2 * current%dy**2 / & 45 | & (2.0 * a * (current%dx**2 + current%dy**2)) 46 | 47 | ! Main iteration loop, save a picture every 48 | ! image_interval steps 49 | 50 | start = mpi_wtime() 51 | 52 | do iter = 1, nsteps 53 | call exchange(previous, parallelization) 54 | call evolve(current, previous, a, dt) 55 | if (mod(iter, image_interval) == 0) then 56 | call write_field(current, iter, parallelization) 57 | end if 58 | call swap_fields(current, previous) 59 | end do 60 | 61 | stop = mpi_wtime() 62 | 63 | ! Average temperature for reference 64 | average_temp = average(previous, parallelization) 65 | 66 | if (parallelization % rank == 0) then 67 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 68 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 69 | if (command_argument_count() == 0) then 70 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 71 | end if 72 | end if 73 | 74 | call finalize(current, previous) 75 | 76 | call mpi_finalize(ierr) 77 | 78 | end program heat_solve 79 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! PNG writer for heat equation solver 2 | module pngwriter 3 | use heat 4 | 5 | contains 6 | 7 | function save_png(data, nx, ny, fname) result(stat) 8 | 9 | use, intrinsic :: ISO_C_BINDING 10 | implicit none 11 | 12 | real(dp), dimension(:,:), intent(in) :: data 13 | integer, intent(in) :: nx, ny 14 | character(len=*), intent(in) :: fname 15 | integer :: stat 16 | 17 | ! Interface for save_png C-function 18 | interface 19 | ! The C-function definition is 20 | ! int save_png(double *data, const int nx, const int ny, 21 | ! const char *fname) 22 | function save_png_c(data, nx, ny, fname, order) & 23 | & bind(C,name="save_png") result(stat) 24 | use, intrinsic :: ISO_C_BINDING 25 | implicit none 26 | real(kind=C_DOUBLE) :: data(*) 27 | integer(kind=C_INT), value, intent(IN) :: nx, ny 28 | character(kind=C_CHAR), intent(IN) :: fname(*) 29 | character(kind=C_CHAR), value, intent(IN) :: order 30 | integer(kind=C_INT) :: stat 31 | end function save_png_c 32 | end interface 33 | 34 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 35 | if (stat /= 0) then 36 | write(*,*) 'save_png returned error!' 37 | end if 38 | 39 | end function save_png 40 | 41 | end module pngwriter 42 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/solution/Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(COMP),) 2 | COMP=intel 3 | endif 4 | 5 | COMMONDIR=../../common 6 | 7 | ifeq ($(COMP),intel) 8 | FC=mpif90 9 | CC=icc 10 | FCFLAGS=-O2 -xHost 11 | CCFLAGS=-O2 -xHost -I$(COMMONDIR) 12 | LDFLAGS= 13 | LIBS=-lpng 14 | endif 15 | 16 | ifeq ($(COMP),gnu) 17 | FC=mpif90 18 | CC=gcc 19 | FCFLAGS=-O3 -Wall -march=native 20 | CCFLAGS=-O3 -Wall -march=native -I$(COMMONDIR) 21 | LDFLAGS= 22 | LIBS=-lpng 23 | endif 24 | 25 | EXE=heat_mpi 26 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 27 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 28 | 29 | 30 | all: $(EXE) 31 | 32 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 33 | core.o: core.F90 heat_mod.o 34 | utilities.o: utilities.F90 heat_mod.o 35 | io.o: io.F90 heat_mod.o pngwriter_mod.o 36 | setup.o: setup.F90 heat_mod.o utilities.o io.o 37 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 38 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 39 | 40 | $(EXE): $(OBJS) $(OBJS_PNG) 41 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 42 | 43 | %.o: %.F90 44 | $(FC) $(FCFLAGS) -c $< -o $@ 45 | 46 | %.o: %.c 47 | $(CC) $(CCFLAGS) -c $< -o $@ 48 | 49 | .PHONY: clean 50 | clean: 51 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ 52 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/solution/main.F90: -------------------------------------------------------------------------------- 1 | ! Heat equation solver in 2D. 2 | 3 | program heat_solve 4 | use heat 5 | use core 6 | use io 7 | use setup 8 | use utilities 9 | 10 | implicit none 11 | 12 | real(dp), parameter :: a = 0.5 ! Diffusion constant 13 | type(field) :: current, previous ! Current and previus temperature fields 14 | 15 | real(dp) :: dt ! Time step 16 | integer :: nsteps ! Number of time steps 17 | integer, parameter :: image_interval = 100 ! Image output interval 18 | 19 | type(parallel_data) :: parallelization 20 | integer :: ierr 21 | 22 | integer :: iter 23 | 24 | real(dp) :: average_temp ! Average temperature 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call mpi_init(ierr) 29 | 30 | call initialize(current, previous, nsteps, parallelization) 31 | 32 | ! Draw the picture of the initial state 33 | call write_field(current, 0, parallelization) 34 | 35 | average_temp = average(current, parallelization) 36 | if (parallelization % rank == 0) then 37 | write(*,'(A, I5, A, I5, A, I5)') 'Simulation grid: ', current%nx_full, ' x ', & 38 | & current%ny_full, ' time steps: ', nsteps 39 | write(*,'(A, I5)') 'MPI processes: ', parallelization%size 40 | write(*,'(A,F9.6)') 'Average temperature at start: ', average_temp 41 | end if 42 | 43 | ! Largest stable time step 44 | dt = current%dx**2 * current%dy**2 / & 45 | & (2.0 * a * (current%dx**2 + current%dy**2)) 46 | 47 | ! Main iteration loop, save a picture every 48 | ! image_interval steps 49 | 50 | start = mpi_wtime() 51 | 52 | do iter = 1, nsteps 53 | call exchange(previous, parallelization) 54 | call evolve(current, previous, a, dt) 55 | if (mod(iter, image_interval) == 0 .or. iter == nsteps) then 56 | call write_field(current, iter, parallelization) 57 | end if 58 | call swap_fields(current, previous) 59 | end do 60 | 61 | stop = mpi_wtime() 62 | 63 | ! Average temperature for reference 64 | average_temp = average(previous, parallelization) 65 | 66 | if (parallelization % rank == 0) then 67 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 68 | write(*,'(A,F9.6)') 'Average temperature: ', average_temp 69 | if (command_argument_count() == 0) then 70 | write(*,'(A,F9.6)') 'Reference value with default arguments: ', 59.281239 71 | end if 72 | end if 73 | 74 | call finalize(current, previous) 75 | 76 | call mpi_finalize(ierr) 77 | 78 | end program heat_solve 79 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/solution/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! PNG writer for heat equation solver 2 | module pngwriter 3 | use heat 4 | 5 | contains 6 | 7 | function save_png(data, nx, ny, fname) result(stat) 8 | 9 | use, intrinsic :: ISO_C_BINDING 10 | implicit none 11 | 12 | real(dp), dimension(:,:), intent(in) :: data 13 | integer, intent(in) :: nx, ny 14 | character(len=*), intent(in) :: fname 15 | integer :: stat 16 | 17 | ! Interface for save_png C-function 18 | interface 19 | ! The C-function definition is 20 | ! int save_png(double *data, const int nx, const int ny, 21 | ! const char *fname) 22 | function save_png_c(data, nx, ny, fname, order) & 23 | & bind(C,name="save_png") result(stat) 24 | use, intrinsic :: ISO_C_BINDING 25 | implicit none 26 | real(kind=C_DOUBLE) :: data(*) 27 | integer(kind=C_INT), value, intent(IN) :: nx, ny 28 | character(kind=C_CHAR), intent(IN) :: fname(*) 29 | character(kind=C_CHAR), value, intent(IN) :: order 30 | integer(kind=C_INT) :: stat 31 | end function save_png_c 32 | end interface 33 | 34 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 35 | if (stat /= 0) then 36 | write(*,*) 'save_png returned error!' 37 | end if 38 | 39 | end function save_png 40 | 41 | end module pngwriter 42 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/solution/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Utility routines for heat equation solver 2 | 3 | module utilities 4 | use heat 5 | 6 | contains 7 | 8 | ! Swap the data fields of two variables of type field 9 | ! Arguments: 10 | ! curr, prev (type(field)): the two variables that are swapped 11 | subroutine swap_fields(curr, prev) 12 | 13 | implicit none 14 | 15 | type(field), intent(inout) :: curr, prev 16 | real(dp), allocatable, dimension(:,:) :: tmp 17 | 18 | call move_alloc(curr%data, tmp) 19 | call move_alloc(prev%data, curr%data) 20 | call move_alloc(tmp, prev%data) 21 | end subroutine swap_fields 22 | 23 | ! Copy the data from one field to another 24 | ! Arguments: 25 | ! from_field (type(field)): variable to copy from 26 | ! to_field (type(field)): variable to copy to 27 | subroutine copy_fields(from_field, to_field) 28 | 29 | implicit none 30 | 31 | type(field), intent(in) :: from_field 32 | type(field), intent(out) :: to_field 33 | 34 | ! Consistency checks 35 | if (.not.allocated(from_field%data)) then 36 | write (*,*) "Can not copy from a field without allocated data" 37 | stop 38 | end if 39 | if (.not.allocated(to_field%data)) then 40 | ! Target is not initialize, allocate memory 41 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 42 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 43 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 44 | write (*,*) "Wrong field data sizes in copy routine" 45 | print *, shape(from_field%data), shape(to_field%data) 46 | stop 47 | end if 48 | 49 | to_field%data = from_field%data 50 | 51 | to_field%nx = from_field%nx 52 | to_field%ny = from_field%ny 53 | to_field%nx_full = from_field%nx_full 54 | to_field%ny_full = from_field%ny_full 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0, parallelization) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | type(parallel_data), intent(in) :: parallelization 66 | 67 | real(dp) :: local_average 68 | integer :: rc 69 | 70 | local_average = sum(field0%data(1:field0%nx, 1:field0%ny)) 71 | call mpi_allreduce(local_average, average, 1, MPI_DOUBLE_PRECISION, & 72 | & MPI_SUM, MPI_COMM_WORLD, rc) 73 | average = average / (field0%nx_full * field0%ny_full) 74 | 75 | end function average 76 | 77 | end module utilities 78 | -------------------------------------------------------------------------------- /mpi/heat-equation/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Utility routines for heat equation solver 2 | 3 | module utilities 4 | use heat 5 | 6 | contains 7 | 8 | ! Swap the data fields of two variables of type field 9 | ! Arguments: 10 | ! curr, prev (type(field)): the two variables that are swapped 11 | subroutine swap_fields(curr, prev) 12 | 13 | implicit none 14 | 15 | type(field), intent(inout) :: curr, prev 16 | real(dp), allocatable, dimension(:,:) :: tmp 17 | 18 | call move_alloc(curr%data, tmp) 19 | call move_alloc(prev%data, curr%data) 20 | call move_alloc(tmp, prev%data) 21 | end subroutine swap_fields 22 | 23 | ! Copy the data from one field to another 24 | ! Arguments: 25 | ! from_field (type(field)): variable to copy from 26 | ! to_field (type(field)): variable to copy to 27 | subroutine copy_fields(from_field, to_field) 28 | 29 | implicit none 30 | 31 | type(field), intent(in) :: from_field 32 | type(field), intent(out) :: to_field 33 | 34 | ! Consistency checks 35 | if (.not.allocated(from_field%data)) then 36 | write (*,*) "Can not copy from a field without allocated data" 37 | stop 38 | end if 39 | if (.not.allocated(to_field%data)) then 40 | ! Target is not initialize, allocate memory 41 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 42 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 43 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 44 | write (*,*) "Wrong field data sizes in copy routine" 45 | print *, shape(from_field%data), shape(to_field%data) 46 | stop 47 | end if 48 | 49 | to_field%data = from_field%data 50 | 51 | to_field%nx = from_field%nx 52 | to_field%ny = from_field%ny 53 | to_field%nx_full = from_field%nx_full 54 | to_field%ny_full = from_field%ny_full 55 | to_field%dx = from_field%dx 56 | to_field%dy = from_field%dy 57 | end subroutine copy_fields 58 | 59 | function average(field0, parallelization) 60 | 61 | implicit none 62 | 63 | real(dp) :: average 64 | type(field) :: field0 65 | type(parallel_data), intent(in) :: parallelization 66 | 67 | real(dp) :: local_average 68 | integer :: rc 69 | 70 | local_average = sum(field0%data(1:field0%nx, 1:field0%ny)) 71 | call mpi_allreduce(local_average, average, 1, MPI_DOUBLE_PRECISION, & 72 | & MPI_SUM, MPI_COMM_WORLD, rc) 73 | average = average / (field0%nx_full * field0%ny_full) 74 | 75 | end function average 76 | 77 | end module utilities 78 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/README.md: -------------------------------------------------------------------------------- 1 | ## Message chain with Cartesian communicator 2 | 3 | Write a program where every MPI task sends data to the next one. 4 | Let `ntasks` be the number of the tasks, and `myid` the rank of the 5 | current task. Your program should work as follows: 6 | 7 | - Every task with a rank less than `ntasks-1` sends a message to task 8 | `myid+1`. For example, task 0 sends a message to task 1. 9 | - The message content is an integer array where each element is initialised to 10 | `myid`. 11 | - The message tag is the receiver's rank. 12 | - The sender prints out the number of elements it sends and the tag it used. 13 | - All tasks with rank > 0 receive messages. 14 | - Each receiver prints out their `myid` and the first element in the 15 | received array. 16 | 17 | 1. Create a Cartesian topology for the chain. Utilize MPI_Cart_shift for finding 18 | the neighbouring ranks and implement the communication with MPI point-to-point routines 19 | (either blocking or non-blocking). Use 20 | [cpp/skeleton.cpp](skeleton.cpp) or [skeleton.F90](skeleton.F90) 21 | as a starting point. 22 | 23 | 2. Make a version where the chain is periodic, i.e. task `ntasks-1` sends to task 0 24 | and every task receives. 25 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/skeleton.F90: -------------------------------------------------------------------------------- 1 | program basic 2 | use mpi_f08 3 | use iso_fortran_env, only : REAL64 4 | 5 | implicit none 6 | integer, parameter :: size = 10000000 7 | integer :: rc, myid, ntasks 8 | integer :: message(size) 9 | integer :: receiveBuffer(size) 10 | integer :: status(MPI_STATUS_SIZE) 11 | integer :: requests(2) 12 | 13 | real(REAL64) :: t0, t1 14 | 15 | integer :: source, destination 16 | 17 | call mpi_init(rc) 18 | call mpi_comm_rank(MPI_COMM_WORLD, myid, rc) 19 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 20 | 21 | message = myid 22 | 23 | ! TODO: create a cartesian communicator 24 | ! and determine the source and destination ranks 25 | ! with the help of MPI_Cart_shift 26 | 27 | 28 | ! end TODO 29 | 30 | ! Start measuring the time spent in communication 31 | call mpi_barrier(mpi_comm_world, rc) 32 | t0 = mpi_wtime() 33 | 34 | ! TODO: Send messages 35 | 36 | write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, & 37 | ' Sent elements: ',size, & 38 | '. Tag: ', myid+1, '. Receiver: ', destination 39 | 40 | ! TODO: Receive messages 41 | 42 | write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, & 43 | ' First element: ', receiveBuffer(1) 44 | 45 | ! Finalize measuring the time and print it out 46 | t1 = mpi_wtime() 47 | call mpi_barrier(mpi_comm_world, rc) 48 | call flush(6) 49 | 50 | call print_ordered(t1 - t0) 51 | 52 | call mpi_finalize(rc) 53 | 54 | contains 55 | 56 | subroutine print_ordered(t) 57 | implicit none 58 | real(REAL64) :: t 59 | 60 | integer i 61 | 62 | if (myid == 0) then 63 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t 64 | do i=1, ntasks-1 65 | call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11, & 66 | MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc) 67 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t 68 | end do 69 | else 70 | call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11, & 71 | MPI_COMM_WORLD, rc) 72 | end if 73 | end subroutine print_ordered 74 | 75 | end program basic 76 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/skeleton.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_ordered(double t); 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int i, myid, ntasks; 10 | constexpr int size = 10000000; 11 | MPI_Status status; 12 | 13 | double t0, t1; 14 | 15 | int source, destination; 16 | 17 | MPI_Init(&argc, &argv); 18 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 19 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 20 | 21 | // Initialize message 22 | std::vector message(size, myid); 23 | std::vector receiveBuffer(size); 24 | 25 | // TODO: create a cartesian communicator 26 | // and determine the source and destination ranks 27 | // with the help of MPI_Cart_shift 28 | 29 | 30 | // end TODO 31 | 32 | // Start measuring the time spent in communication 33 | MPI_Barrier(MPI_COMM_WORLD); 34 | t0 = MPI_Wtime(); 35 | 36 | // TODO: Send messages 37 | 38 | printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n", 39 | myid, size, myid + 1, destination); 40 | 41 | // TODO: Receive messages 42 | 43 | printf("Receiver: %d. first element %d.\n", 44 | myid, receiveBuffer[0]); 45 | 46 | // Finalize measuring the time and print it out 47 | t1 = MPI_Wtime(); 48 | MPI_Barrier(MPI_COMM_WORLD); 49 | fflush(stdout); 50 | 51 | print_ordered(t1 - t0); 52 | 53 | MPI_Finalize(); 54 | return 0; 55 | } 56 | 57 | void print_ordered(double t) 58 | { 59 | int i, rank, ntasks; 60 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 61 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 62 | 63 | if (rank == 0) { 64 | printf("Time elapsed in rank %2d: %6.3f\n", rank, t); 65 | for (i = 1; i < ntasks; i++) { 66 | MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 67 | printf("Time elapsed in rank %2d: %6.3f\n", i, t); 68 | } 69 | } else { 70 | MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/solution/chain-periodic.F90: -------------------------------------------------------------------------------- 1 | program basic 2 | use mpi_f08 3 | use iso_fortran_env, only : REAL64 4 | 5 | implicit none 6 | integer, parameter :: size = 10000000 7 | integer :: rc, myid, ntasks 8 | integer :: message(size) 9 | integer :: receiveBuffer(size) 10 | type(mpi_status) :: status 11 | integer :: requests(2) 12 | 13 | integer :: ndims, dims(1), cart_id 14 | type(mpi_comm) :: cart_comm 15 | logical :: reorder, periods(1) 16 | 17 | real(REAL64) :: t0, t1 18 | 19 | integer :: source, destination 20 | 21 | call mpi_init(rc) 22 | call mpi_comm_rank(MPI_COMM_WORLD, myid, rc) 23 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 24 | 25 | message = myid 26 | 27 | ndims = 1 28 | dims(1) = ntasks 29 | periods(1) = .true. 30 | reorder = .true. 31 | call mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart_comm, rc) 32 | call mpi_cart_shift(cart_comm, 0, 1, source, destination, rc) 33 | call mpi_comm_rank(cart_comm, cart_id, rc) 34 | 35 | 36 | ! Start measuring the time spent in communication 37 | call mpi_barrier(mpi_comm_world, rc) 38 | t0 = mpi_wtime() 39 | 40 | ! Send and receive messages 41 | call mpi_sendrecv(message, size, MPI_INTEGER, destination, destination, & 42 | receiveBuffer, size, MPI_INTEGER, source, cart_id, & 43 | cart_comm, status, rc) 44 | write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, & 45 | ' Sent elements: ', size, & 46 | '. Tag: ', myid + 1, & 47 | '. Receiver: ', destination 48 | write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, & 49 | ' First element: ', receiveBuffer(1) 50 | 51 | ! Finalize measuring the time and print it out 52 | t1 = mpi_wtime() 53 | call mpi_barrier(mpi_comm_world, rc) 54 | call flush(6) 55 | 56 | call print_ordered(t1 - t0) 57 | call mpi_comm_free(cart_comm, rc) 58 | 59 | call mpi_finalize(rc) 60 | 61 | contains 62 | 63 | subroutine print_ordered(t) 64 | implicit none 65 | real(REAL64) :: t 66 | 67 | integer i 68 | 69 | if (myid == 0) then 70 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t 71 | do i=1, ntasks-1 72 | call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11, & 73 | MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc) 74 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t 75 | end do 76 | else 77 | call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11, & 78 | MPI_COMM_WORLD, rc) 79 | end if 80 | end subroutine print_ordered 81 | 82 | end program basic 83 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/solution/chain-periodic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_ordered(double t); 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int i, myid, ntasks; 10 | constexpr int size = 10000000; 11 | MPI_Status status; 12 | 13 | double t0, t1; 14 | 15 | int source, destination; 16 | 17 | MPI_Init(&argc, &argv); 18 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 19 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 20 | 21 | // Initialize message 22 | std::vector message(size, myid); 23 | std::vector receiveBuffer(size); 24 | 25 | // TODO: create a cartesian communicator 26 | // and determine the source and destination ranks 27 | // with the help of MPI_Cart_shift 28 | MPI_Comm cart_comm; 29 | int ndims = 1; 30 | int dims[1] = {ntasks}; 31 | int periods[1] = {1}; 32 | 33 | MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm); 34 | MPI_Cart_shift(cart_comm, 0, 1, &source, &destination); 35 | int cart_id; 36 | MPI_Comm_rank(cart_comm, &cart_id); 37 | 38 | // end TODO 39 | 40 | // Start measuring the time spent in communication 41 | MPI_Barrier(MPI_COMM_WORLD); 42 | t0 = MPI_Wtime(); 43 | 44 | 45 | MPI_Sendrecv(message.data(), message.size(), MPI_INT, destination, destination, 46 | receiveBuffer.data(), receiveBuffer.size(), MPI_INT, source, cart_id, 47 | cart_comm, MPI_STATUS_IGNORE); 48 | printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n", 49 | myid, size, myid + 1, destination); 50 | 51 | printf("Receiver: %d. first element %d.\n", 52 | myid, receiveBuffer[0]); 53 | 54 | // Finalize measuring the time and print it out 55 | t1 = MPI_Wtime(); 56 | MPI_Barrier(MPI_COMM_WORLD); 57 | fflush(stdout); 58 | 59 | print_ordered(t1 - t0); 60 | 61 | MPI_Comm_free(&cart_comm); 62 | MPI_Finalize(); 63 | return 0; 64 | } 65 | 66 | void print_ordered(double t) 67 | { 68 | int i, rank, ntasks; 69 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 70 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 71 | 72 | if (rank == 0) { 73 | printf("Time elapsed in rank %2d: %6.3f\n", rank, t); 74 | for (i = 1; i < ntasks; i++) { 75 | MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 76 | printf("Time elapsed in rank %2d: %6.3f\n", i, t); 77 | } 78 | } else { 79 | MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/solution/chain.F90: -------------------------------------------------------------------------------- 1 | program basic 2 | use mpi_f08 3 | use iso_fortran_env, only : REAL64 4 | 5 | implicit none 6 | integer, parameter :: size = 10000000 7 | integer :: rc, myid, ntasks 8 | integer :: message(size) 9 | integer :: receiveBuffer(size) 10 | type(mpi_status) :: status 11 | integer :: requests(2) 12 | 13 | integer :: ndims, dims(1), cart_id 14 | type(mpi_comm) :: cart_comm 15 | logical :: reorder, periods(1) 16 | 17 | real(REAL64) :: t0, t1 18 | 19 | integer :: source, destination 20 | 21 | call mpi_init(rc) 22 | call mpi_comm_rank(MPI_COMM_WORLD, myid, rc) 23 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 24 | 25 | message = myid 26 | 27 | ndims = 1 28 | dims(1) = ntasks 29 | periods(1) = .false. 30 | reorder = .true. 31 | call mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart_comm, rc) 32 | call mpi_cart_shift(cart_comm, 0, 1, source, destination, rc) 33 | call mpi_comm_rank(cart_comm, cart_id, rc) 34 | 35 | 36 | ! Start measuring the time spent in communication 37 | call mpi_barrier(mpi_comm_world, rc) 38 | t0 = mpi_wtime() 39 | 40 | ! Send and receive messages 41 | call mpi_sendrecv(message, size, MPI_INTEGER, destination, cart_id + 1, & 42 | receiveBuffer, size, MPI_INTEGER, source, cart_id, & 43 | cart_comm, status, rc) 44 | write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, & 45 | ' Sent elements: ', size, & 46 | '. Tag: ', myid + 1, & 47 | '. Receiver: ', destination 48 | write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, & 49 | ' First element: ', receiveBuffer(1) 50 | 51 | ! Finalize measuring the time and print it out 52 | t1 = mpi_wtime() 53 | call mpi_barrier(mpi_comm_world, rc) 54 | call flush(6) 55 | 56 | call print_ordered(t1 - t0) 57 | call mpi_comm_free(cart_comm, rc) 58 | 59 | call mpi_finalize(rc) 60 | 61 | contains 62 | 63 | subroutine print_ordered(t) 64 | implicit none 65 | real(REAL64) :: t 66 | 67 | integer i 68 | 69 | if (myid == 0) then 70 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t 71 | do i=1, ntasks-1 72 | call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11, & 73 | MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc) 74 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t 75 | end do 76 | else 77 | call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11, & 78 | MPI_COMM_WORLD, rc) 79 | end if 80 | end subroutine print_ordered 81 | 82 | end program basic 83 | -------------------------------------------------------------------------------- /mpi/message-chain-cartesian/solution/chain.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_ordered(double t); 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int i, myid, ntasks; 10 | constexpr int size = 10000000; 11 | MPI_Status status; 12 | 13 | double t0, t1; 14 | 15 | int source, destination; 16 | 17 | MPI_Init(&argc, &argv); 18 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 19 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 20 | 21 | // Initialize message 22 | std::vector message(size, myid); 23 | std::vector receiveBuffer(size); 24 | 25 | // TODO: create a cartesian communicator 26 | // and determine the source and destination ranks 27 | // with the help of MPI_Cart_shift 28 | MPI_Comm cart_comm; 29 | int ndims = 1; 30 | int dims[1] = {ntasks}; 31 | int periods[1] = {0}; 32 | 33 | MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm); 34 | MPI_Cart_shift(cart_comm, 0, 1, &source, &destination); 35 | int cart_id; 36 | MPI_Comm_rank(cart_comm, &cart_id); 37 | 38 | // end TODO 39 | 40 | // Start measuring the time spent in communication 41 | MPI_Barrier(MPI_COMM_WORLD); 42 | t0 = MPI_Wtime(); 43 | 44 | 45 | MPI_Sendrecv(message.data(), message.size(), MPI_INT, destination, cart_id + 1, 46 | receiveBuffer.data(), receiveBuffer.size(), MPI_INT, source, cart_id, 47 | cart_comm, MPI_STATUS_IGNORE); 48 | printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n", 49 | myid, size, myid + 1, destination); 50 | 51 | printf("Receiver: %d. first element %d.\n", 52 | myid, receiveBuffer[0]); 53 | 54 | // Finalize measuring the time and print it out 55 | t1 = MPI_Wtime(); 56 | MPI_Barrier(MPI_COMM_WORLD); 57 | fflush(stdout); 58 | 59 | print_ordered(t1 - t0); 60 | 61 | MPI_Comm_free(&cart_comm); 62 | MPI_Finalize(); 63 | return 0; 64 | } 65 | 66 | void print_ordered(double t) 67 | { 68 | int i, rank, ntasks; 69 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 70 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 71 | 72 | if (rank == 0) { 73 | printf("Time elapsed in rank %2d: %6.3f\n", rank, t); 74 | for (i = 1; i < ntasks; i++) { 75 | MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 76 | printf("Time elapsed in rank %2d: %6.3f\n", i, t); 77 | } 78 | } else { 79 | MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /mpi/message-chain-persistent/README.md: -------------------------------------------------------------------------------- 1 | ## Message chain with persistent communication 2 | 3 | Write a program where every MPI task sends data to the next one. 4 | Let `ntasks` be the number of the tasks, and `myid` the rank of the 5 | current task. Your program should work as follows: 6 | 7 | - Every task with a rank less than `ntasks-1` sends a message to task 8 | `myid+1`. For example, task 0 sends a message to task 1. 9 | - The message content is an integer array where each element is initialised to 10 | `myid`. 11 | - The message tag is the receiver's rank. 12 | - The sender prints out the number of elements it sends and the tag it used. 13 | - All tasks with rank > 0 receive messages. 14 | - Each receiver prints out their `myid` and the first element in the 15 | received array. 16 | 17 | 1. Implement the program described above using persistent communication, *i.e.* 18 | `MPI_Send_init`, `MPI_Recv_init`, `MPI_Start` and `MPI_Wait`. 19 | You may start from scratch or use the skeleton code 20 | ([skeleton.cpp](skeleton.cpp) or [skeleton.F90](skeleton.F90)) 21 | as a starting point. 22 | 23 | 2. Write a version that uses `MPI_Startall` and `MPI_Waitall` instead of `MPI_Start`s and `MPI_Wait`s. 24 | -------------------------------------------------------------------------------- /mpi/message-chain-persistent/skeleton.F90: -------------------------------------------------------------------------------- 1 | program basic 2 | use mpi_f08 3 | use iso_fortran_env, only : REAL64 4 | 5 | implicit none 6 | integer, parameter :: size = 10000000 7 | integer :: rc, myid, ntasks 8 | integer :: message(size) 9 | integer :: receiveBuffer(size) 10 | integer :: status(MPI_STATUS_SIZE) 11 | integer :: requests(2) 12 | 13 | real(REAL64) :: t0, t1 14 | 15 | integer :: source, destination 16 | 17 | call mpi_init(rc) 18 | call mpi_comm_rank(MPI_COMM_WORLD, myid, rc) 19 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 20 | 21 | message = myid 22 | 23 | ! TODO: set source and destination ranks 24 | ! Treat boundaries with MPI_PROC_NULL 25 | ! You may utilize also Cartesian topology and MPI_Cart_shift 26 | ! instead manual determination 27 | 28 | 29 | ! end TODO 30 | 31 | ! Start measuring the time spent in communication 32 | call mpi_barrier(mpi_comm_world, rc) 33 | t0 = mpi_wtime() 34 | 35 | ! TODO: Send and receive messages 36 | 37 | write(*,'(A10,I3,A20,I8,A,I3,A,I3)') 'Sender: ', myid, & 38 | ' Sent elements: ',size, & 39 | '. Tag: ', myid+1, '. Receiver: ', destination 40 | 41 | write(*,'(A10,I3,A,I3)') 'Receiver: ', myid, & 42 | ' First element: ', receiveBuffer(1) 43 | 44 | ! Finalize measuring the time and print it out 45 | t1 = mpi_wtime() 46 | call mpi_barrier(mpi_comm_world, rc) 47 | call flush(6) 48 | 49 | call print_ordered(t1 - t0) 50 | 51 | call mpi_finalize(rc) 52 | 53 | contains 54 | 55 | subroutine print_ordered(t) 56 | implicit none 57 | real(REAL64) :: t 58 | 59 | integer i 60 | 61 | if (myid == 0) then 62 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', myid, ':', t 63 | do i=1, ntasks-1 64 | call mpi_recv(t, 1, MPI_DOUBLE_PRECISION, i, 11, & 65 | MPI_COMM_WORLD, MPI_STATUS_IGNORE, rc) 66 | write(*, '(A20, I3, A, F6.3)') 'Time elapsed in rank', i, ':', t 67 | end do 68 | else 69 | call mpi_send(t, 1, MPI_DOUBLE_PRECISION, 0, 11, & 70 | MPI_COMM_WORLD, rc) 71 | end if 72 | end subroutine print_ordered 73 | 74 | end program basic 75 | -------------------------------------------------------------------------------- /mpi/message-chain-persistent/skeleton.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_ordered(double t); 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int i, myid, ntasks; 10 | constexpr int size = 10000000; 11 | MPI_Status status; 12 | 13 | double t0, t1; 14 | 15 | MPI_Init(&argc, &argv); 16 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 17 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 18 | 19 | // Initialize message 20 | std::vector message(size, myid); 21 | std::vector receiveBuffer(size); 22 | 23 | // TODO: set source and destination ranks 24 | // Treat boundaries with MPI_PROC_NULL 25 | // You may utilize also Cartesian topology and MPI_Cart_shift 26 | // instead manual determination 27 | 28 | int source, destination; 29 | 30 | 31 | // end TODO 32 | 33 | // Start measuring the time spent in communication 34 | MPI_Barrier(MPI_COMM_WORLD); 35 | t0 = MPI_Wtime(); 36 | 37 | // TODO: Send and receive messages 38 | 39 | printf("Sender: %d. Sent elements: %d. Tag: %d. Receiver: %d\n", 40 | myid, size, myid + 1, destination); 41 | 42 | printf("Receiver: %d. first element %d.\n", 43 | myid, receiveBuffer[0]); 44 | 45 | // Finalize measuring the time and print it out 46 | t1 = MPI_Wtime(); 47 | MPI_Barrier(MPI_COMM_WORLD); 48 | fflush(stdout); 49 | 50 | print_ordered(t1 - t0); 51 | 52 | MPI_Finalize(); 53 | return 0; 54 | } 55 | 56 | void print_ordered(double t) 57 | { 58 | int i, rank, ntasks; 59 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 60 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 61 | 62 | if (rank == 0) { 63 | printf("Time elapsed in rank %2d: %6.3f\n", rank, t); 64 | for (i = 1; i < ntasks; i++) { 65 | MPI_Recv(&t, 1, MPI_DOUBLE, i, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 66 | printf("Time elapsed in rank %2d: %6.3f\n", i, t); 67 | } 68 | } else { 69 | MPI_Send(&t, 1, MPI_DOUBLE, 0, 11, MPI_COMM_WORLD); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /mpi/neighbor-exchange/README.md: -------------------------------------------------------------------------------- 1 | ## "Halo exchange" with neighborhood collectives 2 | 3 | Consider a parallel program where processes are arranged in a one 4 | dimensional chain. Each process stores local data in an array, but 5 | reserves also "ghost layers" for communicating with neighboring 6 | processes. Initially, the ghost data is zero, and real data consists 7 | of sequence of integers: 8 | 9 | ``` 10 | Task 0: 0 0 1 2 3 0 11 | Task 1: 0 4 5 6 7 0 12 | Task 2: 0 8 9 10 11 0 13 | ... 14 | ``` 15 | 16 | Implement a program where each process sends the boundary values to 17 | neighboring processes, so that after communication the arrays 18 | (including ghost layers) look like: 19 | ``` 20 | ... 21 | Task 2: 7 8 9 10 11 12 22 | Task 3: 11 12 13 14 15 16 23 | Task 4: 15 16 17 18 19 20 24 | ... 25 | ``` 26 | 27 | 1. Create a Cartesian communicator for the chain 28 | 2. Copy boundary values to a send buffer 29 | 3. Perform the communication with appropriate neighboordhood 30 | collective routine 31 | 4. Copy values from the receive buffer to the ghost layers. 32 | 33 | Try both finite and periodic chains. 34 | 35 | You may start from scratch or use [skeleton.cpp](skeleton.cpp) (or 36 | [skeleton.F90](skeleton.F90) for Fortran) as a starting point. 37 | -------------------------------------------------------------------------------- /mpi/neighbor-exchange/skeleton.F90: -------------------------------------------------------------------------------- 1 | program neighborhood 2 | use mpi_f08 3 | implicit none 4 | 5 | integer :: ntasks, cart_id, i, rc 6 | integer, parameter :: dsize = 4 7 | integer :: data(0:dsize+1) 8 | integer :: sendbuf(2), recvbuf(2) 9 | 10 | type(mpi_comm) :: cart_comm 11 | integer, parameter :: ndims = 1 12 | integer :: dims(1) 13 | logical :: reorder, periods(1) 14 | 15 | call mpi_init(rc) 16 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 17 | 18 | ! TODO create cartesian communicator 19 | 20 | ! TODO end 21 | 22 | call mpi_comm_rank(cart_comm, cart_id, rc) 23 | 24 | ! Initialize data 25 | data = 0 26 | do i=1, ntasks 27 | data(i) = cart_id * dsize + (i - 1) 28 | end do 29 | 30 | ! print out initial data 31 | if (cart_id == 0) then 32 | write(*,*) 'Initial data' 33 | end if 34 | 35 | call print_buffers(data, cart_comm) 36 | 37 | ! TODO implement halo exchange with neighborhood communication 38 | ! Copy data to send buffer 39 | 40 | ! Communicate with neighbor 41 | 42 | ! Copy receive buffer to "halo" 43 | 44 | ! TODO end 45 | 46 | ! print out results 47 | if (cart_id == 0) then 48 | write(*,*) 'Final data' 49 | end if 50 | 51 | call print_buffers(data, cart_comm) 52 | 53 | call mpi_finalize(rc) 54 | 55 | contains 56 | 57 | subroutine print_buffers(buffer, comm) 58 | implicit none 59 | integer, dimension(:), intent(in) :: buffer 60 | type(mpi_comm), intent(in) :: comm 61 | integer, dimension(:), allocatable :: printbuffer 62 | integer :: bufsize, rank, ntasks 63 | integer :: i 64 | 65 | bufsize = size(buffer) 66 | call mpi_comm_size(comm, ntasks) 67 | call mpi_comm_rank(comm, rank) 68 | allocate(printbuffer(ntasks * bufsize)) 69 | 70 | call mpi_gather(buffer, bufsize, MPI_INTEGER, & 71 | & printbuffer, bufsize, MPI_INTEGER, & 72 | & 0, comm, rc) 73 | 74 | if (rank == 0) then 75 | do i = 1, ntasks 76 | write(*,'(A,I3,A,*(I3))') 'Task', i - 1, ': ', printbuffer((i-1)*bufsize+1:i*bufsize) 77 | end do 78 | print * 79 | end if 80 | 81 | deallocate(printbuffer) 82 | 83 | end subroutine print_buffers 84 | 85 | 86 | end program 87 | -------------------------------------------------------------------------------- /mpi/neighbor-exchange/skeleton.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_buffers(std::vector &buffer, MPI_Comm comm); 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int myid, ntasks; 10 | constexpr int size = 4; 11 | std::vector data(size + 2, 0); 12 | std::vector sendbuf(2), recvbuf(2); 13 | MPI_Status status; 14 | 15 | MPI_Init(&argc, &argv); 16 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 17 | 18 | // TODO create cartesian communicator 19 | MPI_Comm cart_comm; 20 | 21 | // TODO end 22 | 23 | int cart_id; 24 | MPI_Comm_rank(cart_comm, &cart_id); 25 | 26 | // Inner part of local data 27 | for (int i = 1; i < size + 1; i++) { 28 | data[i] = cart_id * size + (i - 1); 29 | } 30 | 31 | // Print out initial data 32 | if (0 == cart_id) 33 | printf("Initial data\n"); 34 | print_buffers(data, cart_comm); 35 | 36 | // TODO implement halo exchange with neighborhood communication 37 | // Copy data to send buffer 38 | 39 | // Communicate with neighbor 40 | 41 | 42 | // Copy receive buffer to "halo" 43 | 44 | 45 | // TODO end 46 | 47 | // Print out results 48 | if (0 == cart_id) 49 | printf("Final data\n"); 50 | print_buffers(data, cart_comm); 51 | 52 | MPI_Finalize(); 53 | } 54 | 55 | void print_buffers(std::vector &buffer, MPI_Comm comm) 56 | { 57 | int rank, ntasks; 58 | MPI_Comm_rank(comm, &rank); 59 | MPI_Comm_size(comm, &ntasks); 60 | 61 | const int buffersize = buffer.size(); 62 | std::vector printbuffer(ntasks * buffersize); 63 | 64 | MPI_Gather(buffer.data(), buffersize, MPI_INT, 65 | printbuffer.data(), buffersize, MPI_INT, 0, MPI_COMM_WORLD); 66 | 67 | if (rank == 0) { 68 | for (int j = 0; j < ntasks; j++) { 69 | printf("Task %i:", j); 70 | for (int i = 0; i < buffersize; i++) { 71 | printf(" %2i", printbuffer[i + buffersize * j]); 72 | } 73 | printf("\n"); 74 | } 75 | printf("\n"); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /mpi/neighbor-exchange/solution/neighbor-exchange.F90: -------------------------------------------------------------------------------- 1 | program neighborhood 2 | use mpi_f08 3 | implicit none 4 | 5 | integer :: ntasks, cart_id, i, rc 6 | integer, parameter :: dsize = 4 7 | integer :: data(0:dsize+1) 8 | integer :: sendbuf(2), recvbuf(2) 9 | 10 | type(mpi_comm) :: cart_comm 11 | integer, parameter :: ndims = 1 12 | integer :: dims(1) 13 | logical :: reorder, periods(1) 14 | 15 | call mpi_init(rc) 16 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, rc) 17 | 18 | dims = ntasks 19 | reorder = .true. 20 | periods(1) = .false. 21 | 22 | call mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart_comm, rc) 23 | call mpi_comm_rank(cart_comm, cart_id, rc) 24 | 25 | ! Initialize data 26 | data = 0 27 | do i=1, ntasks 28 | data(i) = cart_id * dsize + (i - 1) 29 | end do 30 | 31 | ! print out initial data 32 | if (cart_id == 0) then 33 | write(*,*) 'Initial data' 34 | end if 35 | 36 | call print_buffers(data, cart_comm) 37 | 38 | ! Copy data to send buffer 39 | sendbuf(1) = data(1) 40 | sendbuf(2) = data(dsize) 41 | 42 | ! Communicate with neighbor 43 | call mpi_neighbor_alltoall(sendbuf, 1, MPI_INTEGER, recvbuf, 1, MPI_INTEGER, cart_comm, rc) 44 | 45 | ! Copy receive buffer to "halo" 46 | data(0) = recvbuf(1) 47 | data(dsize + 1) = recvbuf(2) 48 | 49 | ! print out results 50 | if (cart_id == 0) then 51 | write(*,*) 'Final data' 52 | end if 53 | 54 | call print_buffers(data, cart_comm) 55 | 56 | call mpi_finalize(rc) 57 | 58 | contains 59 | 60 | subroutine print_buffers(buffer, comm) 61 | implicit none 62 | integer, dimension(:), intent(in) :: buffer 63 | type(mpi_comm), intent(in) :: comm 64 | integer, dimension(:), allocatable :: printbuffer 65 | integer :: bufsize, rank, ntasks 66 | integer :: i 67 | 68 | bufsize = size(buffer) 69 | call mpi_comm_size(comm, ntasks) 70 | call mpi_comm_rank(comm, rank) 71 | allocate(printbuffer(ntasks * bufsize)) 72 | 73 | call mpi_gather(buffer, bufsize, MPI_INTEGER, & 74 | & printbuffer, bufsize, MPI_INTEGER, & 75 | & 0, comm, rc) 76 | 77 | if (rank == 0) then 78 | do i = 1, ntasks 79 | write(*,'(A,I3,A,*(I3))') 'Task', i - 1, ': ', printbuffer((i-1)*bufsize+1:i*bufsize) 80 | end do 81 | print * 82 | end if 83 | 84 | deallocate(printbuffer) 85 | 86 | end subroutine print_buffers 87 | 88 | 89 | end program 90 | -------------------------------------------------------------------------------- /mpi/neighbor-exchange/solution/neighbor-exchange.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_buffers(std::vector &buffer, MPI_Comm comm); 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int myid, ntasks; 10 | constexpr int size = 4; 11 | std::vector data(size + 2, 0); 12 | std::vector sendbuf(2), recvbuf(2); 13 | MPI_Status status; 14 | 15 | MPI_Init(&argc, &argv); 16 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 17 | 18 | MPI_Comm cart_comm; 19 | int ndims = 1; 20 | int dims[1] = {ntasks}; 21 | int periods[1] = {1}; 22 | 23 | MPI_Cart_create(MPI_COMM_WORLD, ndims, dims, periods, 1, &cart_comm); 24 | int cart_id; 25 | MPI_Comm_rank(cart_comm, &cart_id); 26 | 27 | // Inner part of local data 28 | for (int i = 1; i < size + 1; i++) { 29 | data[i] = cart_id * size + (i - 1); 30 | } 31 | 32 | // Print out initial data 33 | if (0 == cart_id) 34 | printf("Initial data\n"); 35 | print_buffers(data, cart_comm); 36 | 37 | // Copy data to send buffer 38 | sendbuf[0] = data[1]; 39 | sendbuf[1] = data[size]; 40 | 41 | // Communicate with neighbor 42 | MPI_Neighbor_alltoall(sendbuf.data(), 1, MPI_INT, recvbuf.data(), 43 | 1, MPI_INT, cart_comm); 44 | 45 | 46 | // Copy receive buffer to "halo" 47 | data[0] = recvbuf[0]; 48 | data[size + 1] = recvbuf[1]; 49 | 50 | // Print out results 51 | if (0 == cart_id) 52 | printf("Final data\n"); 53 | print_buffers(data, cart_comm); 54 | 55 | MPI_Finalize(); 56 | } 57 | 58 | void print_buffers(std::vector &buffer, MPI_Comm comm) 59 | { 60 | int rank, ntasks; 61 | MPI_Comm_rank(comm, &rank); 62 | MPI_Comm_size(comm, &ntasks); 63 | 64 | const int buffersize = buffer.size(); 65 | std::vector printbuffer(ntasks * buffersize); 66 | 67 | MPI_Gather(buffer.data(), buffersize, MPI_INT, 68 | printbuffer.data(), buffersize, MPI_INT, 0, MPI_COMM_WORLD); 69 | 70 | if (rank == 0) { 71 | for (int j = 0; j < ntasks; j++) { 72 | printf("Task %i:", j); 73 | for (int i = 0; i < buffersize; i++) { 74 | printf(" %2i", printbuffer[i + buffersize * j]); 75 | } 76 | printf("\n"); 77 | } 78 | printf("\n"); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/README.md: -------------------------------------------------------------------------------- 1 | ## Using custom datatypes 2 | 3 | Write a program that sends the highlighted elements of a 2D array 4 | using user defined datatypes from one MPI task to another. Note the 5 | different assignments for C and Fortran, and remember that C stores 6 | arrays in a row-major order and Fortran in a column-major order. You can 7 | start from skeleton codes in [C](skeleton.c) or [Fortran](skeleton.F90) 8 | 9 | a) 10 | 11 | ![](img/vector.png) 12 | 13 | b) 14 | 15 | ![](img/indexed.png) 16 | 17 | c) 18 | 19 | ![](img/subarray.png) 20 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/img/indexed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/simple-datatypes/img/indexed.png -------------------------------------------------------------------------------- /mpi/simple-datatypes/img/subarray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/simple-datatypes/img/subarray.png -------------------------------------------------------------------------------- /mpi/simple-datatypes/img/vector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csc-training/advanced-mpi/6e16617f3c4abac4c44ce23f0f5c5cb590073e0d/mpi/simple-datatypes/img/vector.png -------------------------------------------------------------------------------- /mpi/simple-datatypes/skeleton.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer, dimension(8,8) :: array 6 | integer :: rank, ierr 7 | !TODO: declare variable for datatype 8 | integer :: i, j 9 | 10 | call mpi_init(ierr) 11 | call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr) 12 | 13 | ! initialize arrays 14 | if (rank == 0) then 15 | do i=1,8 16 | do j=1,8 17 | array(i,j) = i*10 + j 18 | end do 19 | end do 20 | else 21 | array(:,:) = 0 22 | end if 23 | 24 | if (rank == 0) then 25 | write(*,*) 'Data in rank 0' 26 | do i=1,8 27 | write(*,'(8I3)') array(i, :) 28 | end do 29 | end if 30 | 31 | 32 | !TODO: create datatype 33 | 34 | !TODO: communicate with datatype 35 | 36 | !TODO: free datatype 37 | 38 | ! Print out the result 39 | if (rank == 1) then 40 | write(*,*) 'Received data' 41 | do i=1,8 42 | write(*,'(8I3)') array(i, :) 43 | end do 44 | end if 45 | 46 | 47 | call mpi_finalize(ierr) 48 | 49 | end program datatype1 50 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/skeleton.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank; 7 | int array[8][8]; 8 | //TODO: Declare a variable storing the MPI datatype 9 | 10 | int i, j; 11 | 12 | MPI_Init(&argc, &argv); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | 15 | // Initialize arrays 16 | if (rank == 0) { 17 | for (i = 0; i < 8; i++) { 18 | for (j = 0; j < 8; j++) { 19 | array[i][j] = (i + 1) * 10 + j + 1; 20 | } 21 | } 22 | } else { 23 | for (i = 0; i < 8; i++) { 24 | for (j = 0; j < 8; j++) { 25 | array[i][j] = 0; 26 | } 27 | } 28 | } 29 | 30 | if (rank == 0) { 31 | printf("Data in rank 0\n"); 32 | for (i = 0; i < 8; i++) { 33 | for (j = 0; j < 8; j++) { 34 | printf("%3d", array[i][j]); 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | //TODO: Create datatype 41 | 42 | //TODO: Send data 43 | 44 | //TODO: Free datatype 45 | 46 | // Print out the result on rank 1 47 | if (rank == 1) { 48 | printf("Received data\n"); 49 | for (i = 0; i < 8; i++) { 50 | for (j = 0; j < 8; j++) { 51 | printf("%3d", array[i][j]); 52 | } 53 | printf("\n"); 54 | } 55 | } 56 | 57 | MPI_Finalize(); 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/solution/custom_type_a.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer, dimension(8,8) :: array 6 | integer :: rank, ierr 7 | type(mpi_datatype) :: rowtype 8 | integer :: i, j 9 | 10 | call mpi_init(ierr) 11 | call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr) 12 | 13 | ! initialize arrays 14 | if (rank == 0) then 15 | do i=1,8 16 | do j=1,8 17 | array(i,j) = i*10 + j 18 | end do 19 | end do 20 | else 21 | array(:,:) = 0 22 | end if 23 | 24 | if (rank == 0) then 25 | write(*,*) 'Data in rank 0' 26 | do i=1,8 27 | write(*,'(8I3)') array(i, :) 28 | end do 29 | end if 30 | 31 | ! create datatype 32 | call mpi_type_vector(8, 1, 8, MPI_INTEGER, rowtype, ierr) 33 | call mpi_type_commit(rowtype, ierr) 34 | 35 | ! send first row of matrix 36 | if (rank == 0) then 37 | call mpi_send(array(2, 1), 1, rowtype, 1, 1, MPI_COMM_WORLD, ierr) 38 | else if (rank == 1) then 39 | call mpi_recv(array(2, 1), 1, rowtype, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, & 40 | ierr) 41 | end if 42 | 43 | ! Print out the result 44 | if (rank == 1) then 45 | write(*,*) 'Received data' 46 | do i=1,8 47 | write(*,'(8I3)') array(i, :) 48 | end do 49 | end if 50 | 51 | call mpi_type_free(rowtype, ierr) 52 | call mpi_finalize(ierr) 53 | 54 | end program datatype1 55 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/solution/custom_type_a.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank; 7 | int array[8][8]; 8 | MPI_Datatype columntype; 9 | 10 | int i, j; 11 | 12 | MPI_Init(&argc, &argv); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | 15 | // Initialize arrays 16 | if (rank == 0) { 17 | for (i = 0; i < 8; i++) { 18 | for (j = 0; j < 8; j++) { 19 | array[i][j] = (i + 1) * 10 + j + 1; 20 | } 21 | } 22 | } else { 23 | for (i = 0; i < 8; i++) { 24 | for (j = 0; j < 8; j++) { 25 | array[i][j] = 0; 26 | } 27 | } 28 | } 29 | 30 | if (rank == 0) { 31 | printf("Data in rank 0\n"); 32 | for (i = 0; i < 8; i++) { 33 | for (j = 0; j < 8; j++) { 34 | printf("%3d", array[i][j]); 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | //TODO: Create datatype that describes one column. Use MPI_Type_vector. 41 | 42 | // Create datatype 43 | MPI_Type_vector(8, 1, 8, MPI_INT, &columntype); 44 | MPI_Type_commit(&columntype); 45 | 46 | // Send first column of matrix 47 | if (rank == 0) { 48 | MPI_Send(&array[0][1], 1, columntype, 1, 1, MPI_COMM_WORLD); 49 | } else if (rank == 1) { 50 | MPI_Recv(&array[0][1], 1, columntype, 0, 1, MPI_COMM_WORLD, 51 | MPI_STATUS_IGNORE); 52 | } 53 | 54 | // Print out the result 55 | if (rank == 1) { 56 | printf("Received data\n"); 57 | for (i = 0; i < 8; i++) { 58 | for (j = 0; j < 8; j++) { 59 | printf("%3d", array[i][j]); 60 | } 61 | printf("\n"); 62 | } 63 | } 64 | 65 | MPI_Type_free(&columntype); 66 | MPI_Finalize(); 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/solution/custom_type_b.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer, dimension(8,8) :: array 6 | integer :: rank, ierr 7 | type(mpi_datatype) :: indexedtype 8 | integer, dimension(4) :: counts, displs 9 | integer :: i, j 10 | 11 | call mpi_init(ierr) 12 | call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr) 13 | 14 | ! initialize arrays 15 | if (rank == 0) then 16 | do i=1,8 17 | do j=1,8 18 | array(i,j) = i*10 + j 19 | end do 20 | end do 21 | else 22 | array(:,:) = 0 23 | end if 24 | 25 | if (rank == 0) then 26 | write(*,*) 'Data in rank 0' 27 | do i=1,8 28 | write(*,'(8I3)') array(i, :) 29 | end do 30 | end if 31 | 32 | do i = 1, 4 33 | counts(i) = i 34 | displs(i) = i - 1 + 2 * (i - 1) * 8 35 | end do 36 | 37 | ! create datatype 38 | call mpi_type_indexed(4, counts, displs, MPI_INTEGER, indexedtype, ierr) 39 | call mpi_type_commit(indexedtype, ierr) 40 | 41 | ! send first indexed of matrix 42 | if (rank == 0) then 43 | call mpi_send(array, 1, indexedtype, 1, 1, MPI_COMM_WORLD, ierr) 44 | else if (rank == 1) then 45 | call mpi_recv(array, 1, indexedtype, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, & 46 | ierr) 47 | end if 48 | 49 | ! Print out the result 50 | if (rank == 1) then 51 | write(*,*) 'Received data' 52 | do i=1,8 53 | write(*,'(8I3)') array(i, :) 54 | end do 55 | end if 56 | 57 | call mpi_type_free(indexedtype, ierr) 58 | call mpi_finalize(ierr) 59 | 60 | end program datatype1 61 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/solution/custom_type_b.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank; 7 | int array[8][8]; 8 | MPI_Datatype indexedtype; 9 | int displs[4]; 10 | int counts[4]; 11 | 12 | int i, j; 13 | 14 | MPI_Init(&argc, &argv); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | 17 | // Initialize arrays 18 | if (rank == 0) { 19 | for (i = 0; i < 8; i++) { 20 | for (j = 0; j < 8; j++) { 21 | array[i][j] = (i + 1) * 10 + j + 1; 22 | } 23 | } 24 | } else { 25 | for (i = 0; i < 8; i++) { 26 | for (j = 0; j < 8; j++) { 27 | array[i][j] = 0; 28 | } 29 | } 30 | } 31 | 32 | if (rank == 0) { 33 | printf("Data in rank 0\n"); 34 | for (i = 0; i < 8; i++) { 35 | for (j = 0; j < 8; j++) { 36 | printf("%3d", array[i][j]); 37 | } 38 | printf("\n"); 39 | } 40 | } 41 | 42 | // Create datatype 43 | for (i = 0; i < 4; i++) { 44 | counts[i] = i + 1; 45 | displs[i] = i + 2 * i * 8; 46 | } 47 | 48 | MPI_Type_indexed(4, counts, displs, MPI_INT, &indexedtype); 49 | MPI_Type_commit(&indexedtype); 50 | 51 | // Send first indexed of matrix 52 | if (rank == 0) { 53 | MPI_Send(array, 1, indexedtype, 1, 1, MPI_COMM_WORLD); 54 | } else if (rank == 1) { 55 | MPI_Recv(array, 1, indexedtype, 0, 1, MPI_COMM_WORLD, 56 | MPI_STATUS_IGNORE); 57 | } 58 | 59 | // Print out the result on rank 1 60 | // The application is correct if the first column has the values of rank 0 61 | if (rank == 1) { 62 | printf("Received data\n"); 63 | for (i = 0; i < 8; i++) { 64 | for (j = 0; j < 8; j++) { 65 | printf("%3d", array[i][j]); 66 | } 67 | printf("\n"); 68 | } 69 | } 70 | 71 | MPI_Type_free(&indexedtype); 72 | MPI_Finalize(); 73 | 74 | return 0; 75 | } 76 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/solution/custom_type_c.F90: -------------------------------------------------------------------------------- 1 | program datatype1 2 | use mpi_f08 3 | implicit none 4 | 5 | integer, dimension(8,8) :: array 6 | integer :: rank, ierr 7 | type(mpi_datatype) :: subarray 8 | integer, dimension(2) :: sizes, subsizes, offsets 9 | integer :: i, j 10 | 11 | call mpi_init(ierr) 12 | call mpi_comm_rank(MPI_COMM_WORLD, rank ,ierr) 13 | 14 | ! initialize arrays 15 | if (rank == 0) then 16 | do i=1,8 17 | do j=1,8 18 | array(i,j) = i*10 + j 19 | end do 20 | end do 21 | else 22 | array(:,:) = 0 23 | end if 24 | 25 | if (rank == 0) then 26 | write(*,*) 'Data in rank 0' 27 | do i=1,8 28 | write(*,'(8I3)') array(i, :) 29 | end do 30 | end if 31 | 32 | ! create datatype 33 | sizes = 8 34 | subsizes = 4 35 | offsets = 2 36 | call mpi_type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_FORTRAN, MPI_INTEGER, subarray, ierr) 37 | call mpi_type_commit(subarray, ierr) 38 | 39 | ! send first row of matrix 40 | if (rank == 0) then 41 | call mpi_send(array(1, 1), 1, subarray, 1, 1, MPI_COMM_WORLD, ierr) 42 | else if (rank == 1) then 43 | call mpi_recv(array(1, 1), 1, subarray, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, & 44 | ierr) 45 | end if 46 | 47 | ! Print out the result 48 | if (rank == 1) then 49 | write(*,*) 'Received data' 50 | do i=1,8 51 | write(*,'(8I3)') array(i, :) 52 | end do 53 | end if 54 | 55 | call mpi_type_free(subarray, ierr) 56 | call mpi_finalize(ierr) 57 | 58 | end program datatype1 59 | -------------------------------------------------------------------------------- /mpi/simple-datatypes/solution/custom_type_c.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) 5 | { 6 | int rank; 7 | int array[8][8]; 8 | MPI_Datatype subarray; 9 | int i, j; 10 | 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | 14 | 15 | // Initialize arrays 16 | if (rank == 0) { 17 | for (i = 0; i < 8; i++) { 18 | for (j = 0; j < 8; j++) { 19 | array[i][j] = (i + 1) * 10 + j + 1; 20 | } 21 | } 22 | } else { 23 | for (i = 0; i < 8; i++) { 24 | for (j = 0; j < 8; j++) { 25 | array[i][j] = 0; 26 | } 27 | } 28 | } 29 | 30 | if (rank == 0) { 31 | printf("Data in rank 0\n"); 32 | for (i = 0; i < 8; i++) { 33 | for (j = 0; j < 8; j++) { 34 | printf("%3d", array[i][j]); 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | 41 | int sizes[2] = {8, 8}; 42 | int subsizes[2] = {4, 4}; 43 | int offsets[2] = {2, 2}; 44 | // Create datatype 45 | MPI_Type_create_subarray(2, sizes, subsizes, offsets, MPI_ORDER_C, MPI_INT, &subarray); 46 | MPI_Type_commit(&subarray); 47 | 48 | // Send first column of matrix 49 | if (rank == 0) { 50 | MPI_Send(&array[0][0], 1, subarray, 1, 1, MPI_COMM_WORLD); 51 | } else if (rank == 1) { 52 | MPI_Recv(&array[0][0], 1, subarray, 0, 1, MPI_COMM_WORLD, 53 | MPI_STATUS_IGNORE); 54 | } 55 | 56 | // Print out the result 57 | if (rank == 1) { 58 | printf("Received data\n"); 59 | for (i = 0; i < 8; i++) { 60 | for (j = 0; j < 8; j++) { 61 | printf("%3d", array[i][j]); 62 | } 63 | printf("\n"); 64 | } 65 | } 66 | 67 | MPI_Type_free(&subarray); 68 | MPI_Finalize(); 69 | 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /mpi/struct-datatype/README.md: -------------------------------------------------------------------------------- 1 | ## Datatype for a struct / derived type 2 | 3 | The skeleton code provided in 4 | [struct_type.c](struct_type.c) or 5 | [struct_type.F90](struct_type.F90) defines a struct (C) / derived type (Fortran) 6 | 7 | 1. Implement a custom MPI datatype, and send a single struct / derived type between two 8 | processes. Verify that the communication is performed succesfully. 9 | 10 | 2. Next, try to send an array of structs / derived types. Make sure that the *extent* of the 11 | datatype is correct (you may use `MPI_Type_get_extent` and `MPI_Get_address` for checking). 12 | 13 | 3. Implement the same send by sending just a stream of bytes (type `MPI_BYTE`). 14 | Verify correctness and compare the performance of these two approaches. 15 | -------------------------------------------------------------------------------- /mpi/struct-datatype/solution/struct_with_byte.F90: -------------------------------------------------------------------------------- 1 | program datatype_struct 2 | use mpi_f08 3 | use iso_fortran_env, only : REAL64 4 | implicit none 5 | 6 | integer, parameter :: n = 1000, cnt=3, reps=10000 7 | 8 | type particle 9 | real :: coords(3) 10 | integer :: charge 11 | character(len=2) :: label 12 | end type particle 13 | 14 | type(particle) :: particles(n) 15 | 16 | integer :: i, ierror, myid, ntasks, tag 17 | 18 | type(mpi_datatype) :: particle_mpi_type, temp_type 19 | type(mpi_datatype):: types(cnt) 20 | integer :: blocklen(cnt) 21 | integer(kind=MPI_ADDRESS_KIND) :: disp(cnt) 22 | integer(kind=MPI_ADDRESS_KIND) :: lb1, lb2, extent 23 | integer :: nbytes 24 | 25 | real(REAL64) :: t1, t2 26 | 27 | call mpi_init(ierror) 28 | call mpi_comm_rank(MPI_COMM_WORLD, myid, ierror) 29 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierror) 30 | 31 | ! insert some data for the particle struct 32 | if (myid == 0) then 33 | do i = 1, n 34 | call random_number(particles(i)%coords) 35 | particles(i)%charge = 54 36 | particles(i)%label = 'Xe' 37 | end do 38 | end if 39 | 40 | ! Determine the true extent of one particle struct 41 | call MPI_GET_ADDRESS(particles(1),lb1,ierror) 42 | call MPI_GET_ADDRESS(particles(2),lb2,ierror) 43 | extent = lb2 - lb1 44 | 45 | t1 = mpi_wtime() 46 | ! send and receive using the MPI_BYTE datatype 47 | nbytes = n * extent 48 | if(myid == 0) then 49 | do i = 1, reps 50 | call mpi_send(particles, nbytes, MPI_BYTE, 1, i, & 51 | MPI_COMM_WORLD, ierror) 52 | end do 53 | else if(myid == 1) then 54 | do i = 1, reps 55 | call mpi_recv(particles, nbytes, MPI_BYTE, 0, i, & 56 | MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierror) 57 | end do 58 | end if 59 | t2=mpi_wtime() 60 | 61 | write(*,*) "Time: ", myid, (t2-t1) / reps 62 | write(*,*) "Check:", myid, particles(n)%label, particles(n)%coords(1), & 63 | particles(n)%coords(2), particles(n)%coords(3) 64 | 65 | call mpi_finalize(ierror) 66 | 67 | end program datatype_struct 68 | -------------------------------------------------------------------------------- /mpi/struct-datatype/solution/struct_with_byte.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int n=1000, cnt=3, reps=10000; 10 | typedef struct{ 11 | float coords[3]; 12 | int charge; 13 | char label[2]; 14 | } particle; 15 | particle particles[n]; 16 | MPI_Aint lb0, lb1, extent; 17 | int i, j, myid, ntasks; 18 | double t1, t2; 19 | 20 | MPI_Init(&argc, &argv); 21 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 22 | 23 | /* fill in some values for the particles */ 24 | if (myid == 0) { 25 | for (i=0; i < n; i++) { 26 | for (j=0; j < 3; j++) 27 | particles[i].coords[j] = (float)rand()/(float)RAND_MAX*10.0; 28 | particles[i].charge = 54; 29 | strcpy(particles[i].label, "Xe"); 30 | } 31 | } 32 | /* determine the true extent of one particle struct */ 33 | MPI_Get_address(&particles[0], &lb0); 34 | MPI_Get_address(&particles[1], &lb1); 35 | extent = lb1 - lb0; 36 | 37 | /* send and receive using the MPI_BYTE datatype */ 38 | t1 = MPI_Wtime(); 39 | if (myid == 0) { 40 | for (i=0; i < reps; i++) 41 | MPI_Send(particles, n*extent, MPI_BYTE, 1, i, MPI_COMM_WORLD); 42 | } else if (myid == 1) { 43 | for (i=0; i < reps; i++) 44 | MPI_Recv(particles, n*extent, MPI_BYTE, 0, i, MPI_COMM_WORLD, 45 | MPI_STATUS_IGNORE); 46 | } 47 | t2 = MPI_Wtime(); 48 | 49 | printf("Time: %i, %e \n", myid, (t2-t1)/(double)reps); 50 | printf("Check: %i: %s %f %f %f \n", myid, particles[n-1].label, 51 | particles[n-1].coords[0], particles[n-1].coords[1], 52 | particles[n-1].coords[2]); 53 | 54 | MPI_Finalize(); 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /mpi/struct-datatype/struct_type.F90: -------------------------------------------------------------------------------- 1 | program datatype_struct 2 | use mpi_f08 3 | use use iso_fortran_env, only : REAL64 4 | implicit none 5 | 6 | integer, parameter :: n = 1000, cnt=3, reps=10000 7 | 8 | type particle 9 | real :: coords(3) 10 | integer :: charge 11 | character(len=2) :: label 12 | end type particle 13 | 14 | type(particle) :: particles(n) 15 | 16 | integer :: i, ierror, myid, ntasks, tag 17 | 18 | type(mpi_datatype) :: particle_mpi_type, temp_type 19 | integer :: types(cnt), blocklen(cnt) 20 | integer(kind=MPI_ADDRESS_KIND) :: disp(cnt) 21 | integer(kind=MPI_ADDRESS_KIND) :: lb, extent 22 | 23 | real(REAL64) :: t1,t2 24 | 25 | call mpi_init(ierror) 26 | call mpi_comm_rank(MPI_COMM_WORLD, myid, ierror) 27 | call mpi_comm_size(MPI_COMM_WORLD, ntasks, ierror) 28 | 29 | ! insert some data for the particle struct 30 | if(myid == 0) then 31 | do i = 1, n 32 | call random_number(particles(i)%coords) 33 | particles(i)%charge = 54 34 | particles(i)%label = 'Xe' 35 | end do 36 | end if 37 | 38 | ! TODO: define the datatype for type particle 39 | 40 | ! TODO: Check extent. 41 | ! (Not really neccessary on most systems.) 42 | 43 | ! communicate using the created particletype 44 | t1 = mpi_wtime() 45 | if(myid == 0) then 46 | do i = 1, reps ! multiple sends for better timing 47 | ! TODO: send 48 | end do 49 | else if(myid == 1) then 50 | do i = 1, reps 51 | ! TODO: receive 52 | end do 53 | end if 54 | t2=mpi_wtime() 55 | 56 | ! TODOs end 57 | 58 | write(*,*) "Time: ", myid, (t2-t1) / reps 59 | write(*,*) "Check:", myid, particles(n)%label, particles(n)%coords(1), & 60 | particles(n)%coords(2), particles(n)%coords(3) 61 | 62 | call mpi_type_free(particle_mpi_type, ierror) 63 | call mpi_finalize(ierror) 64 | 65 | end program datatype_struct 66 | -------------------------------------------------------------------------------- /mpi/struct-datatype/struct_type.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | int n=1000, cnt=3, reps=10000; 10 | 11 | typedef struct { 12 | float coords[3]; 13 | int charge; 14 | char label[2]; 15 | } particle; 16 | 17 | particle particles[n]; 18 | 19 | int i, j, myid, ntasks; 20 | double t1, t2; 21 | 22 | MPI_Init(&argc, &argv); 23 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 24 | 25 | /* fill in some values for the particles */ 26 | if (myid == 0) { 27 | for (i=0; i < n; i++) { 28 | for (j=0; j < 3; j++) 29 | particles[i].coords[j] = (float)rand()/(float)RAND_MAX*10.0; 30 | particles[i].charge = 54; 31 | strcpy(particles[i].label, "H"); 32 | } 33 | } 34 | 35 | // TODO: define data type for the struct 36 | MPI_Datatype particletype, temptype, types[cnt]; 37 | MPI_Aint disp[cnt], dist[2], lb, extent; 38 | int blocklen[cnt]; 39 | 40 | // TODO: check extent (not really necessary on most platforms) 41 | 42 | // communicate using the created particletype 43 | t1 = MPI_Wtime(); 44 | if (myid == 0) { 45 | for (i=0; i < reps; i++) // multiple sends for better timing 46 | // TODO: send 47 | 48 | } else if (myid == 1) { 49 | for (i=0; i < reps; i++) 50 | // TODO: receive 51 | 52 | } 53 | 54 | // TODOs end 55 | 56 | t2 = MPI_Wtime(); 57 | 58 | printf("Time: %i, %e \n", myid, (t2-t1)/(double)reps); 59 | printf("Check: %i: %s %f %f %f \n", myid, particles[n-1].label, 60 | particles[n-1].coords[0], particles[n-1].coords[1], 61 | particles[n-1].coords[2]); 62 | 63 | MPI_Finalize(); 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /parallel-io/hdf5/README.md: -------------------------------------------------------------------------------- 1 | ## HDF5 example 2 | 3 | Study and test the HDF5 examples ([hdf5.c](hdf5.c) or [hdf5.f90](hdf5.f90)) 4 | where the [Simple MPI-IO exercise](../mpi-io) has been re-written with HDF5 5 | using collective parallel write. 6 | 7 | On Puhti, you will need to load the module `hdf5/1.10.4-mpi` before you can 8 | compile the code: 9 | 10 | ``` 11 | module load hdf5/1.10.4-mpi 12 | ``` 13 | 14 | Compile and run the program. You can use the `h5dump` command to check the 15 | values in a HDF5 file. 16 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/README.md: -------------------------------------------------------------------------------- 1 | ## Heat equation: checkpoint + restart with MPI-IO 2 | 3 | 1. Add a feature to the heat equation solver program that enables one to 4 | start the program from a given state (i.e. not from scratch every time). 5 | 6 | 2. Add also a checkpointing feature that will dump the state of the simulation 7 | to disk periodically (e.g. after every few tens of iterations); in a form 8 | that can be read in afterwards. 9 | 10 | Combining the two features allows one to restart and continue an earlier 11 | calculation. So, when the program starts and a checkpoint file is present, it 12 | will replace the initial state with the one in the restart file. 13 | 14 | Use MPI-IO to accomplish the I/O routines. Starting points are provided in 15 | [c/io.c](c/io.c) and [fortran/io.F90](fortran/io.F90). 16 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/Makefile: -------------------------------------------------------------------------------- 1 | COMP=intel 2 | 3 | COMMONDIR=../common 4 | LIBPNGDIR=/appl/opt/libpng 5 | 6 | ifeq ($(COMP),cray) 7 | CC=cc 8 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 9 | LDFLAGS=-L$(LIBPNGDIR)/lib 10 | LIBS=-lpng -lz 11 | endif 12 | 13 | ifeq ($(COMP),gnu) 14 | CC=mpicc 15 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR) 16 | LDFLAGS=-L$(LIBPNGDIR)/lib 17 | LIBS=-lpng -lz 18 | endif 19 | 20 | ifeq ($(COMP),intel) 21 | CC=mpicc 22 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR) 23 | LDFLAGS=-L$(LIBPNGDIR)/lib 24 | LIBS=-lpng -lz 25 | endif 26 | 27 | EXE=heat_mpi 28 | OBJS=core.o setup.o utilities.o io.o main.o 29 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 30 | 31 | 32 | all: $(EXE) 33 | 34 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 35 | core.o: core.c heat.h 36 | utilities.o: utilities.c heat.h 37 | setup.o: setup.c heat.h 38 | io.o: io.c heat.h 39 | main.o: main.c heat.h 40 | 41 | $(OBJS_PNG): C_COMPILER := $(CC) 42 | $(OBJS): C_COMPILER := $(CC) 43 | 44 | $(EXE): $(OBJS) $(OBJS_PNG) 45 | $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 46 | 47 | %.o: %.c 48 | $(C_COMPILER) $(CCFLAGS) -c $< -o $@ 49 | 50 | .PHONY: clean 51 | clean: 52 | -/bin/rm -f $(EXE) a.out *.o *.png *~ 53 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/core.c: -------------------------------------------------------------------------------- 1 | /* Main solver routines for heat equation solver */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "heat.h" 10 | 11 | /* Exchange the boundary values */ 12 | void exchange(field *temperature, parallel_data *parallel) 13 | { 14 | 15 | // Send to the up, receive from down 16 | MPI_Sendrecv(temperature->data[1], temperature->ny + 2, MPI_DOUBLE, 17 | parallel->nup, 11, 18 | temperature->data[temperature->nx + 1], 19 | temperature->ny + 2, MPI_DOUBLE, parallel->ndown, 11, 20 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); 21 | // Send to the down, receive from up 22 | MPI_Sendrecv(temperature->data[temperature->nx], temperature->ny + 2, 23 | MPI_DOUBLE, parallel->ndown, 12, 24 | temperature->data[0], temperature->ny + 2, MPI_DOUBLE, 25 | parallel->nup, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 26 | } 27 | 28 | 29 | /* Update the temperature values using five-point stencil */ 30 | void evolve(field *curr, field *prev, double a, double dt) 31 | { 32 | int i, j; 33 | double dx2, dy2; 34 | 35 | /* Determine the temperature field at next time step 36 | * As we have fixed boundary conditions, the outermost gridpoints 37 | * are not updated. */ 38 | dx2 = prev->dx * prev->dx; 39 | dy2 = prev->dy * prev->dy; 40 | for (i = 1; i < curr->nx + 1; i++) { 41 | for (j = 1; j < curr->ny + 1; j++) { 42 | curr->data[i][j] = prev->data[i][j] + a * dt * 43 | ((prev->data[i + 1][j] - 44 | 2.0 * prev->data[i][j] + 45 | prev->data[i - 1][j]) / dx2 + 46 | (prev->data[i][j + 1] - 47 | 2.0 * prev->data[i][j] + 48 | prev->data[i][j - 1]) / dy2); 49 | } 50 | } 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/heat.h: -------------------------------------------------------------------------------- 1 | #ifndef __HEAT_H__ 2 | #define __HEAT_H__ 3 | 4 | /* Datatype for temperature field */ 5 | typedef struct { 6 | /* nx and ny are the true dimensions of the field. The array data 7 | * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */ 8 | int nx; /* Local dimensions of the field */ 9 | int ny; 10 | int nx_full; /* Global dimensions of the field */ 11 | int ny_full; /* Global dimensions of the field */ 12 | double dx; 13 | double dy; 14 | double **data; 15 | } field; 16 | 17 | /* Datatype for basic parallelization information */ 18 | typedef struct { 19 | int size; /* Number of MPI tasks */ 20 | int rank; 21 | int nup, ndown; /* Ranks of neighbouring MPI tasks */ 22 | } parallel_data; 23 | 24 | 25 | /* We use here fixed grid spacing */ 26 | #define DX 0.01 27 | #define DY 0.01 28 | 29 | /* file name for restart checkpoints*/ 30 | #define CHECKPOINT "HEAT_RESTART.dat" 31 | 32 | /* Function prototypes */ 33 | double **malloc_2d(int nx, int ny); 34 | 35 | void free_2d(double **array); 36 | 37 | void set_field_dimensions(field *temperature, int nx, int ny, 38 | parallel_data *parallel); 39 | 40 | void parallel_setup(parallel_data *parallel, int nx, int ny); 41 | 42 | void parallel_set_dimensions(parallel_data *parallel, int nx, int ny); 43 | 44 | void initialize(int argc, char *argv[], field *temperature1, 45 | field *temperature2, int *nsteps, parallel_data *parallel); 46 | 47 | void generate_field(field *temperature, parallel_data *parallel); 48 | 49 | void exchange(field *temperature, parallel_data *parallel); 50 | 51 | void evolve(field *curr, field *prev, double a, double dt); 52 | 53 | void write_field(field *temperature, int iter, parallel_data *parallel); 54 | 55 | void read_field(field *temperature1, field *temperature2, 56 | char *filename, parallel_data *parallel); 57 | 58 | void copy_field(field *temperature1, field *temperature2); 59 | 60 | void swap_fields(field *temperature1, field *temperature2); 61 | 62 | void allocate_field(field *temperature); 63 | 64 | void finalize(field *temperature1, field *temperature2); 65 | 66 | void write_restart(field *temperature, parallel_data *parallel, int iter); 67 | 68 | void read_restart(field *temperature, parallel_data *parallel, int *iter); 69 | 70 | #endif /* __HEAT_H__ */ 71 | 72 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/main.c: -------------------------------------------------------------------------------- 1 | /* Heat equation solver in 2D. */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "heat.h" 10 | 11 | 12 | int main(int argc, char **argv) 13 | { 14 | double a = 0.5; //!< Diffusion constant 15 | field current, previous; //!< Current and previous temperature fields 16 | 17 | double dt; //!< Time step 18 | int nsteps; //!< Number of time steps 19 | 20 | int image_interval = 500; //!< Image output interval 21 | int restart_interval = 200; //!< Checkpoint output interval 22 | 23 | parallel_data parallelization; //!< Parallelization info 24 | 25 | int iter; //!< Iteration counter 26 | 27 | double dx2, dy2; //!< delta x and y squared 28 | 29 | double start_clock; //!< Time stamps 30 | 31 | MPI_Init(&argc, &argv); 32 | 33 | initialize(argc, argv, ¤t, &previous, &nsteps, ¶llelization); 34 | 35 | /* Output the initial field */ 36 | write_field(¤t, 0, ¶llelization); 37 | 38 | /* Largest stable time step */ 39 | dx2 = current.dx * current.dx; 40 | dy2 = current.dy * current.dy; 41 | dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 42 | 43 | /* Get the start time stamp */ 44 | start_clock = MPI_Wtime(); 45 | 46 | /* Time evolve */ 47 | for (iter = 1; iter <= nsteps; iter++) { 48 | exchange(&previous, ¶llelization); 49 | evolve(¤t, &previous, a, dt); 50 | if (iter % image_interval == 0) { 51 | write_field(¤t, iter, ¶llelization); 52 | } 53 | /* write a checkpoint now and then for easy restarting */ 54 | if (iter % restart_interval == 0) { 55 | write_restart(¤t, ¶llelization, iter); 56 | } 57 | /* Swap current field so that it will be used 58 | as previous for next iteration step */ 59 | swap_fields(¤t, &previous); 60 | } 61 | 62 | /* Determine the CPU time used for the iteration */ 63 | if (parallelization.rank == 0) { 64 | printf("Iteration took %.3f seconds.\n", (MPI_Wtime() - start_clock)); 65 | printf("Reference value at 5,5: %f\n", previous.data[5][5]); 66 | } 67 | 68 | finalize(¤t, &previous); 69 | MPI_Finalize(); 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/solution/Makefile: -------------------------------------------------------------------------------- 1 | COMP=intel 2 | 3 | COMMONDIR=../../common 4 | LIBPNGDIR=/appl/opt/libpng 5 | 6 | ifeq ($(COMP),cray) 7 | CC=cc 8 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 9 | LDFLAGS=-L$(LIBPNGDIR)/lib 10 | LIBS=-lpng -lz 11 | endif 12 | 13 | ifeq ($(COMP),gnu) 14 | CC=mpicc 15 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR) 16 | LDFLAGS=-L$(LIBPNGDIR)/lib 17 | LIBS=-lpng -lz 18 | endif 19 | 20 | ifeq ($(COMP),intel) 21 | CC=mpicc 22 | CCFLAGS=-O3 -Wall -I$(LIBPNGDIR)/include -I$(COMMONDIR) 23 | LDFLAGS=-L$(LIBPNGDIR)/lib 24 | LIBS=-lpng -lz 25 | endif 26 | 27 | EXE=heat_mpi 28 | OBJS=core.o setup.o utilities.o io.o main.o 29 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 30 | 31 | 32 | all: $(EXE) 33 | 34 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 35 | core.o: core.c heat.h 36 | utilities.o: utilities.c heat.h 37 | setup.o: setup.c heat.h 38 | io.o: io.c heat.h 39 | main.o: main.c heat.h 40 | 41 | $(OBJS_PNG): C_COMPILER := $(CC) 42 | $(OBJS): C_COMPILER := $(CC) 43 | 44 | $(EXE): $(OBJS) $(OBJS_PNG) 45 | $(CC) $(CCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 46 | 47 | %.o: %.c 48 | $(C_COMPILER) $(CCFLAGS) -c $< -o $@ 49 | 50 | .PHONY: clean 51 | clean: 52 | -/bin/rm -f $(EXE) a.out *.o *.png *~ 53 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/solution/core.c: -------------------------------------------------------------------------------- 1 | /* Main solver routines for heat equation solver */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "heat.h" 10 | 11 | /* Exchange the boundary values */ 12 | void exchange(field *temperature, parallel_data *parallel) 13 | { 14 | 15 | // Send to the up, receive from down 16 | MPI_Sendrecv(temperature->data[1], temperature->ny + 2, MPI_DOUBLE, 17 | parallel->nup, 11, 18 | temperature->data[temperature->nx + 1], 19 | temperature->ny + 2, MPI_DOUBLE, parallel->ndown, 11, 20 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); 21 | // Send to the down, receive from up 22 | MPI_Sendrecv(temperature->data[temperature->nx], temperature->ny + 2, 23 | MPI_DOUBLE, parallel->ndown, 12, 24 | temperature->data[0], temperature->ny + 2, MPI_DOUBLE, 25 | parallel->nup, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 26 | } 27 | 28 | 29 | /* Update the temperature values using five-point stencil */ 30 | void evolve(field *curr, field *prev, double a, double dt) 31 | { 32 | int i, j; 33 | double dx2, dy2; 34 | 35 | /* Determine the temperature field at next time step 36 | * As we have fixed boundary conditions, the outermost gridpoints 37 | * are not updated. */ 38 | dx2 = prev->dx * prev->dx; 39 | dy2 = prev->dy * prev->dy; 40 | for (i = 1; i < curr->nx + 1; i++) { 41 | for (j = 1; j < curr->ny + 1; j++) { 42 | curr->data[i][j] = prev->data[i][j] + a * dt * 43 | ((prev->data[i + 1][j] - 44 | 2.0 * prev->data[i][j] + 45 | prev->data[i - 1][j]) / dx2 + 46 | (prev->data[i][j + 1] - 47 | 2.0 * prev->data[i][j] + 48 | prev->data[i][j - 1]) / dy2); 49 | } 50 | } 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/solution/heat.h: -------------------------------------------------------------------------------- 1 | #ifndef __HEAT_H__ 2 | #define __HEAT_H__ 3 | 4 | /* Datatype for temperature field */ 5 | typedef struct { 6 | /* nx and ny are the true dimensions of the field. The array data 7 | * contains also ghost layers, so it will have dimensions nx+2 x ny+2 */ 8 | int nx; /* Local dimensions of the field */ 9 | int ny; 10 | int nx_full; /* Global dimensions of the field */ 11 | int ny_full; /* Global dimensions of the field */ 12 | double dx; 13 | double dy; 14 | double **data; 15 | } field; 16 | 17 | /* Datatype for basic parallelization information */ 18 | typedef struct { 19 | int size; /* Number of MPI tasks */ 20 | int rank; 21 | int nup, ndown; /* Ranks of neighbouring MPI tasks */ 22 | } parallel_data; 23 | 24 | 25 | /* We use here fixed grid spacing */ 26 | #define DX 0.01 27 | #define DY 0.01 28 | 29 | /* file name for restart checkpoints*/ 30 | #define CHECKPOINT "HEAT_RESTART.dat" 31 | 32 | /* Function prototypes */ 33 | double **malloc_2d(int nx, int ny); 34 | 35 | void free_2d(double **array); 36 | 37 | void set_field_dimensions(field *temperature, int nx, int ny, 38 | parallel_data *parallel); 39 | 40 | void parallel_setup(parallel_data *parallel, int nx, int ny); 41 | 42 | void parallel_set_dimensions(parallel_data *parallel, int nx, int ny); 43 | 44 | void initialize(int argc, char *argv[], field *temperature1, 45 | field *temperature2, int *nsteps, parallel_data *parallel); 46 | 47 | void generate_field(field *temperature, parallel_data *parallel); 48 | 49 | void exchange(field *temperature, parallel_data *parallel); 50 | 51 | void evolve(field *curr, field *prev, double a, double dt); 52 | 53 | void write_field(field *temperature, int iter, parallel_data *parallel); 54 | 55 | void read_field(field *temperature1, field *temperature2, 56 | char *filename, parallel_data *parallel); 57 | 58 | void copy_field(field *temperature1, field *temperature2); 59 | 60 | void swap_fields(field *temperature1, field *temperature2); 61 | 62 | void allocate_field(field *temperature); 63 | 64 | void finalize(field *temperature1, field *temperature2); 65 | 66 | void write_restart(field *temperature, parallel_data *parallel, int iter); 67 | 68 | void read_restart(field *temperature, parallel_data *parallel, int *iter); 69 | 70 | #endif /* __HEAT_H__ */ 71 | 72 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/solution/main.c: -------------------------------------------------------------------------------- 1 | /* Heat equation solver in 2D. */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "heat.h" 10 | 11 | 12 | int main(int argc, char **argv) 13 | { 14 | double a = 0.5; //!< Diffusion constant 15 | field current, previous; //!< Current and previous temperature fields 16 | 17 | double dt; //!< Time step 18 | int nsteps; //!< Number of time steps 19 | 20 | int image_interval = 500; //!< Image output interval 21 | int restart_interval = 200; //!< Checkpoint output interval 22 | 23 | parallel_data parallelization; //!< Parallelization info 24 | 25 | int iter; //!< Iteration counter 26 | 27 | double dx2, dy2; //!< delta x and y squared 28 | 29 | double start_clock; //!< Time stamps 30 | 31 | MPI_Init(&argc, &argv); 32 | 33 | initialize(argc, argv, ¤t, &previous, &nsteps, ¶llelization); 34 | 35 | /* Output the initial field */ 36 | write_field(¤t, 0, ¶llelization); 37 | 38 | /* Largest stable time step */ 39 | dx2 = current.dx * current.dx; 40 | dy2 = current.dy * current.dy; 41 | dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)); 42 | 43 | /* Get the start time stamp */ 44 | start_clock = MPI_Wtime(); 45 | 46 | /* Time evolve */ 47 | for (iter = 1; iter <= nsteps; iter++) { 48 | exchange(&previous, ¶llelization); 49 | evolve(¤t, &previous, a, dt); 50 | if (iter % image_interval == 0) { 51 | write_field(¤t, iter, ¶llelization); 52 | } 53 | /* write a checkpoint now and then for easy restarting */ 54 | if (iter % restart_interval == 0) { 55 | write_restart(¤t, ¶llelization, iter); 56 | } 57 | /* Swap current field so that it will be used 58 | as previous for next iteration step */ 59 | swap_fields(¤t, &previous); 60 | } 61 | 62 | /* Determine the CPU time used for the iteration */ 63 | if (parallelization.rank == 0) { 64 | printf("Iteration took %.3f seconds.\n", (MPI_Wtime() - start_clock)); 65 | printf("Reference value at 5,5: %f\n", previous.data[5][5]); 66 | } 67 | 68 | finalize(¤t, &previous); 69 | MPI_Finalize(); 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/solution/utilities.c: -------------------------------------------------------------------------------- 1 | /* Utility functions for heat equation solver 2 | * NOTE: This file does not need to be edited! */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "heat.h" 10 | 11 | /* Utility routine for allocating a two dimensional array */ 12 | double **malloc_2d(int nx, int ny) 13 | { 14 | double **array; 15 | int i; 16 | 17 | array = (double **) malloc(nx * sizeof(double *)); 18 | array[0] = (double *) malloc(nx * ny * sizeof(double)); 19 | 20 | for (i = 1; i < nx; i++) { 21 | array[i] = array[0] + i * ny; 22 | } 23 | 24 | return array; 25 | } 26 | 27 | /* Utility routine for deallocating a two dimensional array */ 28 | void free_2d(double **array) 29 | { 30 | free(array[0]); 31 | free(array); 32 | } 33 | 34 | 35 | /* Copy data on temperature1 into temperature2 */ 36 | void copy_field(field *temperature1, field *temperature2) 37 | { 38 | assert(temperature1->nx == temperature2->nx); 39 | assert(temperature1->ny == temperature2->ny); 40 | memcpy(temperature2->data[0], temperature1->data[0], 41 | (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double)); 42 | } 43 | 44 | /* Swap the data of fields temperature1 and temperature2 */ 45 | void swap_fields(field *temperature1, field *temperature2) 46 | { 47 | double **tmp; 48 | tmp = temperature1->data; 49 | temperature1->data = temperature2->data; 50 | temperature2->data = tmp; 51 | } 52 | 53 | /* Allocate memory for a temperature field and initialise it to zero */ 54 | void allocate_field(field *temperature) 55 | { 56 | // Allocate also ghost layers 57 | temperature->data = 58 | malloc_2d(temperature->nx + 2, temperature->ny + 2); 59 | 60 | // Initialize to zero 61 | memset(temperature->data[0], 0.0, 62 | (temperature->nx + 2) * (temperature->ny + 2) * sizeof(double)); 63 | } 64 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/c/utilities.c: -------------------------------------------------------------------------------- 1 | /* Utility functions for heat equation solver 2 | * NOTE: This file does not need to be edited! */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "heat.h" 10 | 11 | /* Utility routine for allocating a two dimensional array */ 12 | double **malloc_2d(int nx, int ny) 13 | { 14 | double **array; 15 | int i; 16 | 17 | array = (double **) malloc(nx * sizeof(double *)); 18 | array[0] = (double *) malloc(nx * ny * sizeof(double)); 19 | 20 | for (i = 1; i < nx; i++) { 21 | array[i] = array[0] + i * ny; 22 | } 23 | 24 | return array; 25 | } 26 | 27 | /* Utility routine for deallocating a two dimensional array */ 28 | void free_2d(double **array) 29 | { 30 | free(array[0]); 31 | free(array); 32 | } 33 | 34 | 35 | /* Copy data on temperature1 into temperature2 */ 36 | void copy_field(field *temperature1, field *temperature2) 37 | { 38 | assert(temperature1->nx == temperature2->nx); 39 | assert(temperature1->ny == temperature2->ny); 40 | memcpy(temperature2->data[0], temperature1->data[0], 41 | (temperature1->nx + 2) * (temperature1->ny + 2) * sizeof(double)); 42 | } 43 | 44 | /* Swap the data of fields temperature1 and temperature2 */ 45 | void swap_fields(field *temperature1, field *temperature2) 46 | { 47 | double **tmp; 48 | tmp = temperature1->data; 49 | temperature1->data = temperature2->data; 50 | temperature2->data = tmp; 51 | } 52 | 53 | /* Allocate memory for a temperature field and initialise it to zero */ 54 | void allocate_field(field *temperature) 55 | { 56 | // Allocate also ghost layers 57 | temperature->data = 58 | malloc_2d(temperature->nx + 2, temperature->ny + 2); 59 | 60 | // Initialize to zero 61 | memset(temperature->data[0], 0.0, 62 | (temperature->nx + 2) * (temperature->ny + 2) * sizeof(double)); 63 | } 64 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/Makefile: -------------------------------------------------------------------------------- 1 | COMP=intel 2 | 3 | COMMONDIR=../common 4 | LIBPNGDIR=/appl/opt/libpng 5 | 6 | ifeq ($(COMP),cray) 7 | FC=ftn 8 | CC=cc 9 | FCFLAGS=-O3 10 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 11 | LDFLAGS=-L$(LIBPNGDIR)/lib 12 | LIBS=-lpng -lz 13 | endif 14 | 15 | ifeq ($(COMP),gnu) 16 | FC=mpif90 17 | CC=gcc 18 | FCFLAGS=-O3 19 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 20 | LDFLAGS=-L$(LIBPNGDIR)/lib 21 | LIBS=-lpng -lz 22 | endif 23 | 24 | ifeq ($(COMP),intel) 25 | FC=mpif90 26 | CC=icc 27 | FCFLAGS=-O3 28 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 29 | LDFLAGS=-L$(LIBPNGDIR)/lib 30 | LIBS=-lpng -lz 31 | endif 32 | 33 | EXE=heat_mpi 34 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 35 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 36 | 37 | 38 | all: $(EXE) 39 | 40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 41 | core.o: core.F90 heat_mod.o 42 | utilities.o: utilities.F90 heat_mod.o 43 | io.o: io.F90 heat_mod.o pngwriter_mod.o 44 | setup.o: setup.F90 heat_mod.o utilities.o io.o 45 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 46 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 47 | 48 | $(EXE): $(OBJS) $(OBJS_PNG) 49 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 50 | 51 | %.o: %.F90 52 | $(FC) $(FCFLAGS) -c $< -o $@ 53 | 54 | %.o: %.c 55 | $(CC) $(CCFLAGS) -c $< -o $@ 56 | 57 | .PHONY: clean 58 | clean: 59 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ 60 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/core.F90: -------------------------------------------------------------------------------- 1 | ! Main solver routines for heat equation solver 2 | module core 3 | use heat 4 | 5 | contains 6 | 7 | ! Exchange the boundary data between MPI tasks 8 | subroutine exchange(field0, parallel) 9 | use mpi 10 | 11 | implicit none 12 | 13 | type(field), intent(inout) :: field0 14 | type(parallel_data), intent(in) :: parallel 15 | 16 | integer :: ierr 17 | 18 | ! Send to left, receive from right 19 | call mpi_sendrecv(field0%data(:, 1), field0%nx + 2, MPI_DOUBLE_PRECISION, & 20 | & parallel%nleft, 11, & 21 | & field0%data(:, field0%ny + 1), field0%nx + 2, MPI_DOUBLE_PRECISION, & 22 | & parallel%nright, 11, & 23 | & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) 24 | 25 | ! Send to right, receive from left 26 | call mpi_sendrecv(field0%data(:, field0%ny), field0%nx + 2, MPI_DOUBLE_PRECISION, & 27 | & parallel%nright, 12, & 28 | & field0%data(:, 0), field0%nx + 2, MPI_DOUBLE_PRECISION,& 29 | & parallel%nleft, 12, & 30 | & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) 31 | 32 | end subroutine exchange 33 | 34 | ! Compute one time step of temperature evolution 35 | ! Arguments: 36 | ! curr (type(field)): current temperature values 37 | ! prev (type(field)): values from previous time step 38 | ! a (real(dp)): update equation constant 39 | ! dt (real(dp)): time step value 40 | subroutine evolve(curr, prev, a, dt) 41 | 42 | implicit none 43 | 44 | type(field), intent(inout) :: curr, prev 45 | real(dp) :: a, dt 46 | integer :: i, j, nx, ny 47 | 48 | nx = curr%nx 49 | ny = curr%ny 50 | 51 | do j = 1, ny 52 | do i = 1, nx 53 | curr%data(i, j) = prev%data(i, j) + a * dt * & 54 | & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + & 55 | & prev%data(i+1, j)) / curr%dx**2 + & 56 | & (prev%data(i, j-1) - 2.0 * prev%data(i, j) + & 57 | & prev%data(i, j+1)) / curr%dy**2) 58 | end do 59 | end do 60 | end subroutine evolve 61 | 62 | end module core 63 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Field metadata for heat equation solver 2 | module heat 3 | use iso_fortran_env, only : REAL64 4 | implicit none 5 | 6 | integer, parameter :: dp = REAL64 7 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 8 | 9 | type :: field 10 | integer :: nx ! local dimension of the field 11 | integer :: ny 12 | integer :: nx_full ! global dimension of the field 13 | integer :: ny_full 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | type :: parallel_data 20 | integer :: size 21 | integer :: rank 22 | integer :: nleft, nright ! Ranks of neighbouring MPI tasks 23 | end type parallel_data 24 | 25 | contains 26 | ! Initialize the field type metadata 27 | ! Arguments: 28 | ! field0 (type(field)): input field 29 | ! nx, ny, dx, dy: field dimensions and spatial step size 30 | subroutine set_field_dimensions(field0, nx, ny, parallel) 31 | implicit none 32 | 33 | type(field), intent(out) :: field0 34 | integer, intent(in) :: nx, ny 35 | type(parallel_data), intent(in) :: parallel 36 | 37 | integer :: nx_local, ny_local 38 | 39 | nx_local = nx 40 | ny_local = ny / parallel%size 41 | 42 | field0%dx = DX 43 | field0%dy = DY 44 | field0%nx = nx_local 45 | field0%ny = ny_local 46 | field0%nx_full = nx 47 | field0%ny_full = ny 48 | 49 | end subroutine set_field_dimensions 50 | 51 | subroutine parallel_setup(parallel, nx, ny) 52 | use mpi 53 | 54 | implicit none 55 | 56 | type(parallel_data), intent(out) :: parallel 57 | integer, intent(in), optional :: nx, ny 58 | 59 | integer :: ny_local 60 | integer :: ierr 61 | 62 | call mpi_comm_size(MPI_COMM_WORLD, parallel%size, ierr) 63 | 64 | if (present(ny)) then 65 | ny_local = ny / parallel%size 66 | if (ny_local * parallel%size /= ny) then 67 | write(*,*) 'Cannot divide grid evenly to processors' 68 | call mpi_abort(MPI_COMM_WORLD, -2, ierr) 69 | end if 70 | end if 71 | 72 | call mpi_comm_rank(MPI_COMM_WORLD, parallel%rank, ierr) 73 | 74 | parallel%nleft = parallel%rank - 1 75 | parallel%nright = parallel%rank + 1 76 | 77 | if (parallel%nleft < 0) then 78 | parallel%nleft = MPI_PROC_NULL 79 | end if 80 | if (parallel%nright > parallel%size - 1) then 81 | parallel%nright = MPI_PROC_NULL 82 | end if 83 | 84 | end subroutine parallel_setup 85 | 86 | end module heat 87 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/main.F90: -------------------------------------------------------------------------------- 1 | ! Heat equation solver in 2D. 2 | 3 | program heat_solve 4 | use heat 5 | use core 6 | use io 7 | use setup 8 | use utilities 9 | use mpi 10 | 11 | implicit none 12 | 13 | real(dp), parameter :: a = 0.5 ! Diffusion constant 14 | type(field) :: current, previous ! Current and previus temperature fields 15 | 16 | real(dp) :: dt ! Time step 17 | integer :: nsteps ! Number of time steps 18 | integer, parameter :: image_interval = 500 ! Image output interval 19 | integer, parameter :: checkpoint_interval = 200 ! restart interval 20 | 21 | type(parallel_data) :: parallelization 22 | integer :: ierr 23 | 24 | integer :: iter 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call mpi_init(ierr) 29 | 30 | call initialize(current, previous, nsteps, parallelization) 31 | 32 | ! Draw the picture of the initial state 33 | call write_field(current, 0, parallelization) 34 | 35 | ! Largest stable time step 36 | dt = current%dx**2 * current%dy**2 / & 37 | & (2.0 * a * (current%dx**2 + current%dy**2)) 38 | 39 | ! Main iteration loop, save a picture every 40 | ! image_interval steps 41 | 42 | start = mpi_wtime() 43 | 44 | do iter = 1, nsteps 45 | call exchange(previous, parallelization) 46 | call evolve(current, previous, a, dt) 47 | if (mod(iter, image_interval) == 0) then 48 | call write_field(current, iter, parallelization) 49 | end if 50 | if (mod(iter, checkpoint_interval) == 0) then 51 | call write_restart(current, parallelization, iter) 52 | end if 53 | call swap_fields(current, previous) 54 | end do 55 | 56 | stop = mpi_wtime() 57 | 58 | if (parallelization % rank == 0) then 59 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 60 | write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5) 61 | end if 62 | 63 | call finalize(current, previous) 64 | 65 | call mpi_finalize(ierr) 66 | 67 | end program heat_solve 68 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! PNG writer for heat equation solver 2 | module pngwriter 3 | use heat 4 | 5 | contains 6 | 7 | function save_png(data, nx, ny, fname) result(stat) 8 | 9 | use, intrinsic :: ISO_C_BINDING 10 | implicit none 11 | 12 | real(dp), dimension(:,:), intent(in) :: data 13 | integer, intent(in) :: nx, ny 14 | character(len=*), intent(in) :: fname 15 | integer :: stat 16 | 17 | ! Interface for save_png C-function 18 | interface 19 | ! The C-function definition is 20 | ! int save_png(double *data, const int nx, const int ny, 21 | ! const char *fname) 22 | function save_png_c(data, nx, ny, fname, order) & 23 | & bind(C,name="save_png") result(stat) 24 | use, intrinsic :: ISO_C_BINDING 25 | implicit none 26 | real(kind=C_DOUBLE) :: data(*) 27 | integer(kind=C_INT), value, intent(IN) :: nx, ny 28 | character(kind=C_CHAR), intent(IN) :: fname(*) 29 | character(kind=C_CHAR), value, intent(IN) :: order 30 | integer(kind=C_INT) :: stat 31 | end function save_png_c 32 | end interface 33 | 34 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 35 | if (stat /= 0) then 36 | write(*,*) 'save_png returned error!' 37 | end if 38 | 39 | end function save_png 40 | 41 | end module pngwriter 42 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/solution/Makefile: -------------------------------------------------------------------------------- 1 | COMP=intel 2 | 3 | COMMONDIR=../../common 4 | LIBPNGDIR=/appl/opt/libpng 5 | 6 | ifeq ($(COMP),cray) 7 | FC=ftn 8 | CC=cc 9 | FCFLAGS=-O3 10 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 11 | LDFLAGS=-L$(LIBPNGDIR)/lib 12 | LIBS=-lpng -lz 13 | endif 14 | 15 | ifeq ($(COMP),gnu) 16 | FC=mpif90 17 | CC=gcc 18 | FCFLAGS=-O3 19 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 20 | LDFLAGS=-L$(LIBPNGDIR)/lib 21 | LIBS=-lpng -lz 22 | endif 23 | 24 | ifeq ($(COMP),intel) 25 | FC=mpif90 26 | CC=icc 27 | FCFLAGS=-O3 28 | CCFLAGS=-O3 -I$(LIBPNGDIR)/include -I$(COMMONDIR) 29 | LDFLAGS=-L$(LIBPNGDIR)/lib 30 | LIBS=-lpng -lz 31 | endif 32 | 33 | EXE=heat_mpi 34 | OBJS=main.o heat_mod.o core.o setup.o utilities.o io.o pngwriter_mod.o 35 | OBJS_PNG=$(COMMONDIR)/pngwriter.o 36 | 37 | 38 | all: $(EXE) 39 | 40 | $(COMMONDIR)/pngwriter.o: $(COMMONDIR)/pngwriter.c $(COMMONDIR)/pngwriter.h 41 | core.o: core.F90 heat_mod.o 42 | utilities.o: utilities.F90 heat_mod.o 43 | io.o: io.F90 heat_mod.o pngwriter_mod.o 44 | setup.o: setup.F90 heat_mod.o utilities.o io.o 45 | pngwriter_mod.o: pngwriter_mod.F90 heat_mod.o 46 | main.o: main.F90 heat_mod.o core.o io.o setup.o utilities.o 47 | 48 | $(EXE): $(OBJS) $(OBJS_PNG) 49 | $(FC) $(FCFLAGS) $(OBJS) $(OBJS_PNG) -o $@ $(LDFLAGS) $(LIBS) 50 | 51 | %.o: %.F90 52 | $(FC) $(FCFLAGS) -c $< -o $@ 53 | 54 | %.o: %.c 55 | $(CC) $(CCFLAGS) -c $< -o $@ 56 | 57 | .PHONY: clean 58 | clean: 59 | -/bin/rm -f $(EXE) a.out *.o *.mod *.png *~ 60 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/solution/core.F90: -------------------------------------------------------------------------------- 1 | ! Main solver routines for heat equation solver 2 | module core 3 | use heat 4 | 5 | contains 6 | 7 | ! Exchange the boundary data between MPI tasks 8 | subroutine exchange(field0, parallel) 9 | use mpi 10 | 11 | implicit none 12 | 13 | type(field), intent(inout) :: field0 14 | type(parallel_data), intent(in) :: parallel 15 | 16 | integer :: ierr 17 | 18 | ! Send to left, receive from right 19 | call mpi_sendrecv(field0%data(:, 1), field0%nx + 2, MPI_DOUBLE_PRECISION, & 20 | & parallel%nleft, 11, & 21 | & field0%data(:, field0%ny + 1), field0%nx + 2, MPI_DOUBLE_PRECISION, & 22 | & parallel%nright, 11, & 23 | & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) 24 | 25 | ! Send to right, receive from left 26 | call mpi_sendrecv(field0%data(:, field0%ny), field0%nx + 2, MPI_DOUBLE_PRECISION, & 27 | & parallel%nright, 12, & 28 | & field0%data(:, 0), field0%nx + 2, MPI_DOUBLE_PRECISION,& 29 | & parallel%nleft, 12, & 30 | & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) 31 | 32 | end subroutine exchange 33 | 34 | ! Compute one time step of temperature evolution 35 | ! Arguments: 36 | ! curr (type(field)): current temperature values 37 | ! prev (type(field)): values from previous time step 38 | ! a (real(dp)): update equation constant 39 | ! dt (real(dp)): time step value 40 | subroutine evolve(curr, prev, a, dt) 41 | 42 | implicit none 43 | 44 | type(field), intent(inout) :: curr, prev 45 | real(dp) :: a, dt 46 | integer :: i, j, nx, ny 47 | 48 | nx = curr%nx 49 | ny = curr%ny 50 | 51 | do j = 1, ny 52 | do i = 1, nx 53 | curr%data(i, j) = prev%data(i, j) + a * dt * & 54 | & ((prev%data(i-1, j) - 2.0 * prev%data(i, j) + & 55 | & prev%data(i+1, j)) / curr%dx**2 + & 56 | & (prev%data(i, j-1) - 2.0 * prev%data(i, j) + & 57 | & prev%data(i, j+1)) / curr%dy**2) 58 | end do 59 | end do 60 | end subroutine evolve 61 | 62 | end module core 63 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/solution/heat_mod.F90: -------------------------------------------------------------------------------- 1 | ! Field metadata for heat equation solver 2 | module heat 3 | use iso_fortran_env, only : REAL64 4 | implicit none 5 | 6 | integer, parameter :: dp = REAL64 7 | real(dp), parameter :: DX = 0.01, DY = 0.01 ! Fixed grid spacing 8 | 9 | type :: field 10 | integer :: nx ! local dimension of the field 11 | integer :: ny 12 | integer :: nx_full ! global dimension of the field 13 | integer :: ny_full 14 | real(dp) :: dx 15 | real(dp) :: dy 16 | real(dp), dimension(:,:), allocatable :: data 17 | end type field 18 | 19 | type :: parallel_data 20 | integer :: size 21 | integer :: rank 22 | integer :: nleft, nright ! Ranks of neighbouring MPI tasks 23 | end type parallel_data 24 | 25 | contains 26 | ! Initialize the field type metadata 27 | ! Arguments: 28 | ! field0 (type(field)): input field 29 | ! nx, ny, dx, dy: field dimensions and spatial step size 30 | subroutine set_field_dimensions(field0, nx, ny, parallel) 31 | implicit none 32 | 33 | type(field), intent(out) :: field0 34 | integer, intent(in) :: nx, ny 35 | type(parallel_data), intent(in) :: parallel 36 | 37 | integer :: nx_local, ny_local 38 | 39 | nx_local = nx 40 | ny_local = ny / parallel%size 41 | 42 | field0%dx = DX 43 | field0%dy = DY 44 | field0%nx = nx_local 45 | field0%ny = ny_local 46 | field0%nx_full = nx 47 | field0%ny_full = ny 48 | 49 | end subroutine set_field_dimensions 50 | 51 | subroutine parallel_setup(parallel, nx, ny) 52 | use mpi 53 | 54 | implicit none 55 | 56 | type(parallel_data), intent(out) :: parallel 57 | integer, intent(in), optional :: nx, ny 58 | 59 | integer :: ny_local 60 | integer :: ierr 61 | 62 | call mpi_comm_size(MPI_COMM_WORLD, parallel%size, ierr) 63 | 64 | if (present(ny)) then 65 | ny_local = ny / parallel%size 66 | if (ny_local * parallel%size /= ny) then 67 | write(*,*) 'Cannot divide grid evenly to processors' 68 | call mpi_abort(MPI_COMM_WORLD, -2, ierr) 69 | end if 70 | end if 71 | 72 | call mpi_comm_rank(MPI_COMM_WORLD, parallel%rank, ierr) 73 | 74 | parallel%nleft = parallel%rank - 1 75 | parallel%nright = parallel%rank + 1 76 | 77 | if (parallel%nleft < 0) then 78 | parallel%nleft = MPI_PROC_NULL 79 | end if 80 | if (parallel%nright > parallel%size - 1) then 81 | parallel%nright = MPI_PROC_NULL 82 | end if 83 | 84 | end subroutine parallel_setup 85 | 86 | end module heat 87 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/solution/main.F90: -------------------------------------------------------------------------------- 1 | ! Heat equation solver in 2D. 2 | 3 | program heat_solve 4 | use heat 5 | use core 6 | use io 7 | use setup 8 | use utilities 9 | use mpi 10 | 11 | implicit none 12 | 13 | real(dp), parameter :: a = 0.5 ! Diffusion constant 14 | type(field) :: current, previous ! Current and previus temperature fields 15 | 16 | real(dp) :: dt ! Time step 17 | integer :: nsteps ! Number of time steps 18 | integer, parameter :: image_interval = 500 ! Image output interval 19 | integer, parameter :: checkpoint_interval = 200 ! restart interval 20 | 21 | type(parallel_data) :: parallelization 22 | integer :: ierr 23 | 24 | integer :: iter 25 | 26 | real(kind=dp) :: start, stop ! Timers 27 | 28 | call mpi_init(ierr) 29 | 30 | call initialize(current, previous, nsteps, parallelization) 31 | 32 | ! Draw the picture of the initial state 33 | call write_field(current, 0, parallelization) 34 | 35 | ! Largest stable time step 36 | dt = current%dx**2 * current%dy**2 / & 37 | & (2.0 * a * (current%dx**2 + current%dy**2)) 38 | 39 | ! Main iteration loop, save a picture every 40 | ! image_interval steps 41 | 42 | start = mpi_wtime() 43 | 44 | do iter = 1, nsteps 45 | call exchange(previous, parallelization) 46 | call evolve(current, previous, a, dt) 47 | if (mod(iter, image_interval) == 0) then 48 | call write_field(current, iter, parallelization) 49 | end if 50 | if (mod(iter, checkpoint_interval) == 0) then 51 | call write_restart(current, parallelization, iter) 52 | end if 53 | call swap_fields(current, previous) 54 | end do 55 | 56 | stop = mpi_wtime() 57 | 58 | if (parallelization % rank == 0) then 59 | write(*,'(A,F7.3,A)') 'Iteration took ', stop - start, ' seconds.' 60 | write(*,'(A,G0)') 'Reference value at 5,5: ', previous % data(5,5) 61 | end if 62 | 63 | call finalize(current, previous) 64 | 65 | call mpi_finalize(ierr) 66 | 67 | end program heat_solve 68 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/solution/pngwriter_mod.F90: -------------------------------------------------------------------------------- 1 | ! PNG writer for heat equation solver 2 | module pngwriter 3 | use heat 4 | 5 | contains 6 | 7 | function save_png(data, nx, ny, fname) result(stat) 8 | 9 | use, intrinsic :: ISO_C_BINDING 10 | implicit none 11 | 12 | real(dp), dimension(:,:), intent(in) :: data 13 | integer, intent(in) :: nx, ny 14 | character(len=*), intent(in) :: fname 15 | integer :: stat 16 | 17 | ! Interface for save_png C-function 18 | interface 19 | ! The C-function definition is 20 | ! int save_png(double *data, const int nx, const int ny, 21 | ! const char *fname) 22 | function save_png_c(data, nx, ny, fname, order) & 23 | & bind(C,name="save_png") result(stat) 24 | use, intrinsic :: ISO_C_BINDING 25 | implicit none 26 | real(kind=C_DOUBLE) :: data(*) 27 | integer(kind=C_INT), value, intent(IN) :: nx, ny 28 | character(kind=C_CHAR), intent(IN) :: fname(*) 29 | character(kind=C_CHAR), value, intent(IN) :: order 30 | integer(kind=C_INT) :: stat 31 | end function save_png_c 32 | end interface 33 | 34 | stat = save_png_c(data, nx, ny, trim(fname) // C_NULL_CHAR, 'f') 35 | if (stat /= 0) then 36 | write(*,*) 'save_png returned error!' 37 | end if 38 | 39 | end function save_png 40 | 41 | end module pngwriter 42 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/solution/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Utility routines for heat equation solver 2 | ! NOTE: This file does not need to be edited! 3 | module utilities 4 | use heat 5 | 6 | contains 7 | 8 | ! Swap the data fields of two variables of type field 9 | ! Arguments: 10 | ! curr, prev (type(field)): the two variables that are swapped 11 | subroutine swap_fields(curr, prev) 12 | 13 | implicit none 14 | 15 | type(field), intent(inout) :: curr, prev 16 | real(dp), allocatable, dimension(:,:) :: tmp 17 | 18 | call move_alloc(curr%data, tmp) 19 | call move_alloc(prev%data, curr%data) 20 | call move_alloc(tmp, prev%data) 21 | end subroutine swap_fields 22 | 23 | ! Copy the data from one field to another 24 | ! Arguments: 25 | ! from_field (type(field)): variable to copy from 26 | ! to_field (type(field)): variable to copy to 27 | subroutine copy_fields(from_field, to_field) 28 | 29 | implicit none 30 | 31 | type(field), intent(in) :: from_field 32 | type(field), intent(out) :: to_field 33 | 34 | ! Consistency checks 35 | if (.not.allocated(from_field%data)) then 36 | write (*,*) "Can not copy from a field without allocated data" 37 | stop 38 | end if 39 | if (.not.allocated(to_field%data)) then 40 | ! Target is not initialize, allocate memory 41 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 42 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 43 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 44 | write (*,*) "Wrong field data sizes in copy routine" 45 | print *, shape(from_field%data), shape(to_field%data) 46 | stop 47 | end if 48 | 49 | to_field%data = from_field%data 50 | 51 | to_field%nx = from_field%nx 52 | to_field%ny = from_field%ny 53 | to_field%dx = from_field%dx 54 | to_field%dy = from_field%dy 55 | end subroutine copy_fields 56 | 57 | end module utilities 58 | -------------------------------------------------------------------------------- /parallel-io/heat-restart/fortran/utilities.F90: -------------------------------------------------------------------------------- 1 | ! Utility routines for heat equation solver 2 | ! NOTE: This file does not need to be edited! 3 | module utilities 4 | use heat 5 | 6 | contains 7 | 8 | ! Swap the data fields of two variables of type field 9 | ! Arguments: 10 | ! curr, prev (type(field)): the two variables that are swapped 11 | subroutine swap_fields(curr, prev) 12 | 13 | implicit none 14 | 15 | type(field), intent(inout) :: curr, prev 16 | real(dp), allocatable, dimension(:,:) :: tmp 17 | 18 | call move_alloc(curr%data, tmp) 19 | call move_alloc(prev%data, curr%data) 20 | call move_alloc(tmp, prev%data) 21 | end subroutine swap_fields 22 | 23 | ! Copy the data from one field to another 24 | ! Arguments: 25 | ! from_field (type(field)): variable to copy from 26 | ! to_field (type(field)): variable to copy to 27 | subroutine copy_fields(from_field, to_field) 28 | 29 | implicit none 30 | 31 | type(field), intent(in) :: from_field 32 | type(field), intent(out) :: to_field 33 | 34 | ! Consistency checks 35 | if (.not.allocated(from_field%data)) then 36 | write (*,*) "Can not copy from a field without allocated data" 37 | stop 38 | end if 39 | if (.not.allocated(to_field%data)) then 40 | ! Target is not initialize, allocate memory 41 | allocate(to_field%data(lbound(from_field%data, 1):ubound(from_field%data, 1), & 42 | & lbound(from_field%data, 2):ubound(from_field%data, 2))) 43 | else if (any(shape(from_field%data) /= shape(to_field%data))) then 44 | write (*,*) "Wrong field data sizes in copy routine" 45 | print *, shape(from_field%data), shape(to_field%data) 46 | stop 47 | end if 48 | 49 | to_field%data = from_field%data 50 | 51 | to_field%nx = from_field%nx 52 | to_field%ny = from_field%ny 53 | to_field%dx = from_field%dx 54 | to_field%dy = from_field%dy 55 | end subroutine copy_fields 56 | 57 | end module utilities 58 | -------------------------------------------------------------------------------- /parallel-io/mpi-io/README.md: -------------------------------------------------------------------------------- 1 | ## Simple MPI-IO 2 | 3 | Implement parallel I/O in a test code using MPI-IO. 4 | 5 | Write data from all MPI tasks to a single output file using MPI-IO. Each task 6 | should write their own local part of the data directly to the correct position 7 | in the file. Skeleton code to start from is available in `mpi-io.c` (or 8 | `mpi-io.f90`). 9 | -------------------------------------------------------------------------------- /parallel-io/mpi-io/mpi-io.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define DATASIZE 64 8 | #define WRITER_ID 0 9 | 10 | void mpiio_writer(int, int *, int); 11 | 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int my_id, ntasks, i, localsize; 16 | int *localvector; 17 | 18 | MPI_Init(&argc, &argv); 19 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 20 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 21 | 22 | if (ntasks > 64) { 23 | fprintf(stderr, "Datasize (64) should be divisible by number " 24 | "of tasks.\n"); 25 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 26 | } 27 | 28 | if (DATASIZE % ntasks != 0) { 29 | fprintf(stderr, "Datasize (64) should be divisible by number " 30 | "of tasks.\n"); 31 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 32 | } 33 | 34 | localsize = DATASIZE / ntasks; 35 | localvector = (int *) malloc(localsize * sizeof(int)); 36 | 37 | for (i = 0; i < localsize; i++) { 38 | localvector[i] = i + 1 + localsize * my_id; 39 | } 40 | 41 | mpiio_writer(my_id, localvector, localsize); 42 | 43 | free(localvector); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | 49 | void mpiio_writer(int my_id, int *localvector, int localsize) 50 | { 51 | MPI_File fh; 52 | MPI_Offset offset; 53 | 54 | /* TODO: Write the data to an output file "output.dat" using MPI IO. Each 55 | process should write their own local vectors to correct location 56 | of the output file. */ 57 | } 58 | -------------------------------------------------------------------------------- /parallel-io/mpi-io/mpi-io.f90: -------------------------------------------------------------------------------- 1 | program mpiio 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | 10 | call mpi_init(rc) 11 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 12 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 13 | 14 | if (ntasks > 64) then 15 | write(error_unit, *) 'Maximum number of tasks is 64!' 16 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 17 | end if 18 | 19 | if (mod(datasize, ntasks) /= 0) then 20 | write(error_unit,*) 'Datasize (64) should be divisible by number of tasks' 21 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 22 | end if 23 | 24 | localsize = datasize / ntasks 25 | allocate(localvector(localsize)) 26 | 27 | localvector = [(i + my_id * localsize, i=1,localsize)] 28 | 29 | call mpiio_writer() 30 | 31 | deallocate(localvector) 32 | call mpi_finalize(rc) 33 | 34 | contains 35 | 36 | subroutine mpiio_writer() 37 | implicit none 38 | integer :: fh, rc, dsize 39 | integer(kind=MPI_OFFSET_KIND) :: offset; 40 | 41 | call mpi_type_size(MPI_INTEGER, dsize, rc) 42 | 43 | ! TODO: write the output file "output.dat" using MPI IO. Each 44 | ! rank should write their own local vectors to correct 45 | ! locations in the output file. 46 | 47 | end subroutine mpiio_writer 48 | 49 | end program mpiio 50 | -------------------------------------------------------------------------------- /parallel-io/mpi-io/solution/mpi-io.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define DATASIZE 64 8 | #define WRITER_ID 0 9 | 10 | void mpiio_writer(int, int *, int); 11 | 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int my_id, ntasks, i, localsize; 16 | int *localvector; 17 | 18 | MPI_Init(&argc, &argv); 19 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 20 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 21 | 22 | if (ntasks > 64) { 23 | fprintf(stderr, "Datasize (64) should be divisible by number " 24 | "of tasks.\n"); 25 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 26 | } 27 | 28 | if (DATASIZE % ntasks != 0) { 29 | fprintf(stderr, "Datasize (64) should be divisible by number " 30 | "of tasks.\n"); 31 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 32 | } 33 | 34 | localsize = DATASIZE / ntasks; 35 | localvector = (int *) malloc(localsize * sizeof(int)); 36 | 37 | for (i = 0; i < localsize; i++) { 38 | localvector[i] = i + 1 + localsize * my_id; 39 | } 40 | 41 | mpiio_writer(my_id, localvector, localsize); 42 | 43 | free(localvector); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | 49 | void mpiio_writer(int my_id, int *localvector, int localsize) 50 | { 51 | MPI_File fh; 52 | MPI_Offset offset; 53 | 54 | MPI_File_open(MPI_COMM_WORLD, "output.dat", 55 | MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); 56 | 57 | offset = my_id * localsize * sizeof(int); 58 | 59 | MPI_File_write_at_all(fh, offset, localvector, 60 | localsize, MPI_INT, MPI_STATUS_IGNORE); 61 | 62 | MPI_File_close(&fh); 63 | } 64 | -------------------------------------------------------------------------------- /parallel-io/mpi-io/solution/mpi-io.f90: -------------------------------------------------------------------------------- 1 | program mpiio 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | 10 | call mpi_init(rc) 11 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 12 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 13 | 14 | if (ntasks > 64) then 15 | write(error_unit, *) 'Maximum number of tasks is 64!' 16 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 17 | end if 18 | 19 | if (mod(datasize, ntasks) /= 0) then 20 | write(error_unit,*) 'Datasize (64) should be divisible by number of tasks' 21 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 22 | end if 23 | 24 | localsize = datasize / ntasks 25 | allocate(localvector(localsize)) 26 | 27 | localvector = [(i + my_id * localsize, i=1,localsize)] 28 | 29 | call mpiio_writer() 30 | 31 | deallocate(localvector) 32 | call mpi_finalize(rc) 33 | 34 | contains 35 | 36 | subroutine mpiio_writer() 37 | implicit none 38 | integer :: fh, rc, dsize 39 | integer(kind=MPI_OFFSET_KIND) :: offset; 40 | 41 | call mpi_type_size(MPI_INTEGER, dsize, rc) 42 | 43 | offset = my_id * localsize * dsize 44 | 45 | call mpi_file_open(MPI_COMM_WORLD, 'output.dat', & 46 | & MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, fh, rc) 47 | call mpi_file_write_at_all(fh, offset, localvector, localsize, & 48 | & MPI_INTEGER, MPI_STATUS_IGNORE, rc) 49 | call mpi_file_close(fh, rc) 50 | end subroutine mpiio_writer 51 | 52 | end program mpiio 53 | -------------------------------------------------------------------------------- /parallel-io/posix/README.md: -------------------------------------------------------------------------------- 1 | ## Parallel I/O with Posix 2 | 3 | Implement parallel I/O in a test code using MPI and normal I/O calls. 4 | 5 | 1. Write data from all MPI tasks to a single file using the spokesman 6 | strategy, i.e. gather data to a single MPI task and write it to a file. 7 | The data should be kept in the order of the MPI ranks. You may start from 8 | the skeleton code in `c/spokesman.c` (or `fortran/spokesman.F90`). 9 | 10 | 2. Verify the above write by reading the file using the same spokesman 11 | strategy, but with a different number of MPI tasks than in writing. 12 | Skeleton code to start from is available in `c/spokesman_reader.c` (or 13 | `fortran/spokesman_reader.F90`). 14 | 15 | 3. Re-implement the write code so that all the MPI tasks write into separate 16 | files (aka "every man for himself" strategy). 17 | -------------------------------------------------------------------------------- /parallel-io/posix/c/solution/spokesman.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define DATASIZE 64 8 | #define WRITER_ID 0 9 | 10 | void single_writer(int, int *, int); 11 | 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int my_id, ntasks, i, localsize; 16 | int *localvector; 17 | 18 | MPI_Init(&argc, &argv); 19 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 20 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 21 | 22 | if (ntasks > 64) { 23 | fprintf(stderr, "Datasize (64) should be divisible by number " 24 | "of tasks.\n"); 25 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 26 | } 27 | 28 | if (DATASIZE % ntasks != 0) { 29 | fprintf(stderr, "Datasize (64) should be divisible by number " 30 | "of tasks.\n"); 31 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 32 | } 33 | 34 | localsize = DATASIZE / ntasks; 35 | localvector = (int *) malloc(localsize * sizeof(int)); 36 | 37 | for (i = 0; i < localsize; i++) { 38 | localvector[i] = i + 1 + localsize * my_id; 39 | } 40 | 41 | single_writer(my_id, localvector, localsize); 42 | 43 | free(localvector); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | 49 | void single_writer(int my_id, int *localvector, int localsize) 50 | { 51 | FILE *fp; 52 | int *fullvector; 53 | 54 | fullvector = (int *) malloc(DATASIZE * sizeof(int)); 55 | 56 | MPI_Gather(localvector, localsize, MPI_INT, fullvector, localsize, 57 | MPI_INT, WRITER_ID, MPI_COMM_WORLD); 58 | 59 | if (my_id == WRITER_ID) { 60 | if ((fp = fopen("singlewriter.dat", "wb")) == NULL) { 61 | fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno)); 62 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 63 | } else { 64 | fwrite(fullvector, sizeof(int), DATASIZE, fp); 65 | fclose(fp); 66 | printf("Wrote %d elements to file singlewriter.dat\n", DATASIZE); 67 | } 68 | } 69 | 70 | free(fullvector); 71 | } 72 | 73 | -------------------------------------------------------------------------------- /parallel-io/posix/c/spokesman.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define DATASIZE 64 8 | #define WRITER_ID 0 9 | 10 | void single_writer(int, int *, int); 11 | 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int my_id, ntasks, i, localsize; 16 | int *localvector; 17 | 18 | MPI_Init(&argc, &argv); 19 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 20 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 21 | 22 | if (ntasks > 64) { 23 | fprintf(stderr, "Datasize (64) should be divisible by number " 24 | "of tasks.\n"); 25 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 26 | } 27 | 28 | if (DATASIZE % ntasks != 0) { 29 | fprintf(stderr, "Datasize (64) should be divisible by number " 30 | "of tasks.\n"); 31 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 32 | } 33 | 34 | localsize = DATASIZE / ntasks; 35 | localvector = (int *) malloc(localsize * sizeof(int)); 36 | 37 | for (i = 0; i < localsize; i++) { 38 | localvector[i] = i + 1 + localsize * my_id; 39 | } 40 | 41 | single_writer(my_id, localvector, localsize); 42 | 43 | free(localvector); 44 | 45 | MPI_Finalize(); 46 | return 0; 47 | } 48 | 49 | void single_writer(int my_id, int *localvector, int localsize) 50 | { 51 | FILE *fp; 52 | int *fullvector; 53 | 54 | /* TODO: Implement a function that will write the data to file so that 55 | a single process does the file io. Use rank WRITER_ID as the io rank */ 56 | 57 | free(fullvector); 58 | } 59 | 60 | -------------------------------------------------------------------------------- /parallel-io/posix/c/spokesman_reader.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define DATASIZE 64 8 | #define WRITER_ID 0 9 | 10 | void single_reader(int, int *, int); 11 | void ordered_print(int, int, int *, int); 12 | 13 | int main(int argc, char *argv[]) 14 | { 15 | int my_id, ntasks, localsize; 16 | int *localvector; 17 | 18 | MPI_Init(&argc, &argv); 19 | MPI_Comm_size(MPI_COMM_WORLD, &ntasks); 20 | MPI_Comm_rank(MPI_COMM_WORLD, &my_id); 21 | 22 | if (ntasks > 64) { 23 | fprintf(stderr, "Datasize (64) should be divisible by number " 24 | "of tasks.\n"); 25 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 26 | } 27 | 28 | if (DATASIZE % ntasks != 0) { 29 | fprintf(stderr, "Datasize (64) should be divisible by number " 30 | "of tasks.\n"); 31 | MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); 32 | } 33 | 34 | localsize = DATASIZE / ntasks; 35 | localvector = (int *) malloc(localsize * sizeof(int)); 36 | 37 | single_reader(my_id, localvector, localsize); 38 | 39 | ordered_print(ntasks, my_id, localvector, localsize); 40 | 41 | free(localvector); 42 | 43 | MPI_Finalize(); 44 | return 0; 45 | } 46 | 47 | void single_reader(int my_id, int *localvector, int localsize) 48 | { 49 | FILE *fp; 50 | int *fullvector, nread; 51 | char *fname = "singlewriter.dat"; 52 | 53 | /* TODO: Implement a function that will read the data from a file so that 54 | a single process does the file io. Use rank WRITER_ID as the io rank */ 55 | 56 | free(fullvector); 57 | } 58 | 59 | /* Try to avoid this type of pattern when ever possible. 60 | Here we are using this serialized output just to make the 61 | debugging easier. */ 62 | void ordered_print(int ntasks, int rank, int *buffer, int n) 63 | { 64 | int task, i; 65 | 66 | for (task = 0; task < ntasks; task++) { 67 | if (rank == task) { 68 | printf("Task %i received:", rank); 69 | for (i = 0; i < n; i++) { 70 | printf(" %2i", buffer[i]); 71 | } 72 | printf("\n"); 73 | } 74 | MPI_Barrier(MPI_COMM_WORLD); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /parallel-io/posix/fortran/solution/separate-files.f90: -------------------------------------------------------------------------------- 1 | program pario 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | integer, dimension(datasize) :: fullvector 10 | 11 | call mpi_init(rc) 12 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 13 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 14 | 15 | if (ntasks > 64) then 16 | write(error_unit, *) 'Maximum number of tasks is 64!' 17 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 18 | end if 19 | 20 | if (mod(datasize, ntasks) /= 0) then 21 | write(error_unit,*) 'Datasize (64) should be divisible by number of tasks' 22 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 23 | end if 24 | 25 | localsize = datasize / ntasks 26 | allocate(localvector(localsize)) 27 | 28 | localvector = [(i + my_id * localsize, i=1,localsize)] 29 | 30 | call many_writers() 31 | 32 | deallocate(localvector) 33 | call mpi_finalize(rc) 34 | 35 | contains 36 | 37 | subroutine single_writer() 38 | implicit none 39 | 40 | call mpi_gather(localvector, localsize, mpi_integer, fullvector, & 41 | & localsize, mpi_integer, writer_id, mpi_comm_world, rc) 42 | if (my_id == writer_id) then 43 | open(10, file='singlewriter.dat', status='replace', form='unformatted', & 44 | & access='stream') 45 | write(10, pos=1) fullvector 46 | close (10) 47 | write(output_unit,'(A,I0,A)') 'Wrote ', size(fullvector), & 48 | & ' elements to file singlewriter.dat' 49 | end if 50 | end subroutine single_writer 51 | 52 | subroutine many_writers() 53 | implicit none 54 | character(len=85) :: filename 55 | 56 | write(filename, '(A,I0,A)') 'manywriters-', my_id, '.dat' 57 | 58 | open(my_id+10, file=filename, status='replace', form='unformatted', & 59 | & access='stream') 60 | write(my_id+10, pos=1) localvector 61 | close (my_id+10) 62 | write(output_unit,'(A,I0,A,A)') 'Wrote ', size(localvector), & 63 | & ' elements to file ', filename 64 | end subroutine many_writers 65 | 66 | end program pario 67 | -------------------------------------------------------------------------------- /parallel-io/posix/fortran/solution/spokesman.f90: -------------------------------------------------------------------------------- 1 | program pario 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | integer, dimension(datasize) :: fullvector 10 | 11 | call mpi_init(rc) 12 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 13 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 14 | 15 | if (ntasks > 64) then 16 | write(error_unit, *) 'Maximum number of tasks is 64!' 17 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 18 | end if 19 | 20 | if (mod(datasize, ntasks) /= 0) then 21 | write(error_unit,*) 'Datasize (64) should be divisible by number of tasks' 22 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 23 | end if 24 | 25 | localsize = datasize / ntasks 26 | allocate(localvector(localsize)) 27 | 28 | localvector = [(i + my_id * localsize, i=1,localsize)] 29 | 30 | call single_writer() 31 | 32 | deallocate(localvector) 33 | call mpi_finalize(rc) 34 | 35 | contains 36 | 37 | subroutine single_writer() 38 | implicit none 39 | 40 | call mpi_gather(localvector, localsize, mpi_integer, fullvector, & 41 | & localsize, mpi_integer, writer_id, mpi_comm_world, rc) 42 | if (my_id == writer_id) then 43 | open(10, file='singlewriter.dat', status='replace', form='unformatted', & 44 | & access='stream') 45 | write(10, pos=1) fullvector 46 | close (10) 47 | write(output_unit,'(A,I0,A)') 'Wrote ', size(fullvector), & 48 | & ' elements to file singlewriter.dat' 49 | end if 50 | end subroutine single_writer 51 | 52 | end program pario 53 | -------------------------------------------------------------------------------- /parallel-io/posix/fortran/solution/spokesman_reader.f90: -------------------------------------------------------------------------------- 1 | program pario 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | integer, dimension(datasize) :: fullvector 10 | 11 | call mpi_init(rc) 12 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 13 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 14 | 15 | if (ntasks > 64) then 16 | write(error_unit, *) 'Maximum number of tasks is 64!' 17 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 18 | end if 19 | 20 | if (mod(datasize, ntasks) /= 0) then 21 | write(error_unit, *) 'Datasize (64) should be divisible by number of tasks' 22 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 23 | end if 24 | 25 | localsize = datasize / ntasks 26 | allocate(localvector(localsize)) 27 | 28 | localvector = [(i + my_id * localsize, i=1,localsize)] 29 | 30 | call single_reader() 31 | 32 | call ordered_print() 33 | 34 | deallocate(localvector) 35 | call mpi_finalize(rc) 36 | 37 | contains 38 | 39 | subroutine single_reader() 40 | implicit none 41 | 42 | if (my_id == writer_id) then 43 | open(10, file='singlewriter.dat', status='old', form='unformatted', & 44 | & access='stream') 45 | read(10, pos=1) fullvector 46 | close(10) 47 | write(output_unit,'(A,I0,A)') 'Read ', size(fullvector), & 48 | & ' elements from file ex1a.dat' 49 | end if 50 | 51 | call mpi_scatter(fullvector, localsize, mpi_integer, localvector, & 52 | & localsize, mpi_integer, writer_id, mpi_comm_world, rc) 53 | 54 | end subroutine single_reader 55 | 56 | subroutine ordered_print 57 | implicit none 58 | integer :: task 59 | 60 | do task = 0, ntasks-1 61 | if (my_id == task) then 62 | write(output_unit, '(A,I0,A)', advance='no') 'Task ', & 63 | & my_id, ' received:' 64 | do i = 1, localsize 65 | write(output_unit, '(I3)', advance='no') localvector(i) 66 | end do 67 | write(output_unit,*) ' ' 68 | end if 69 | call mpi_barrier(MPI_COMM_WORLD, rc) 70 | end do 71 | 72 | end subroutine ordered_print 73 | 74 | 75 | end program pario 76 | -------------------------------------------------------------------------------- /parallel-io/posix/fortran/spokesman.f90: -------------------------------------------------------------------------------- 1 | program pario 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | integer, dimension(datasize) :: fullvector 10 | 11 | call mpi_init(rc) 12 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 13 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 14 | 15 | if (ntasks > 64) then 16 | write(error_unit, *) 'Maximum number of tasks is 64!' 17 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 18 | end if 19 | 20 | if (mod(datasize, ntasks) /= 0) then 21 | write(error_unit,*) 'Datasize (64) should be divisible by number of tasks' 22 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 23 | end if 24 | 25 | localsize = datasize / ntasks 26 | allocate(localvector(localsize)) 27 | 28 | localvector = [(i + my_id * localsize, i=1,localsize)] 29 | 30 | call single_writer() 31 | 32 | deallocate(localvector) 33 | call mpi_finalize(rc) 34 | 35 | contains 36 | 37 | subroutine single_writer() 38 | implicit none 39 | 40 | ! TODO: Implement a function that writers the whole array of elements 41 | ! to a file so that single process is responsible for the file io 42 | 43 | end subroutine single_writer 44 | 45 | end program pario 46 | -------------------------------------------------------------------------------- /parallel-io/posix/fortran/spokesman_reader.f90: -------------------------------------------------------------------------------- 1 | program pario 2 | use mpi 3 | use, intrinsic :: iso_fortran_env, only : error_unit, output_unit 4 | implicit none 5 | 6 | integer, parameter :: datasize = 64, writer_id = 0 7 | integer :: rc, my_id, ntasks, localsize, i 8 | integer, dimension(:), allocatable :: localvector 9 | integer, dimension(datasize) :: fullvector 10 | 11 | call mpi_init(rc) 12 | call mpi_comm_size(mpi_comm_world, ntasks, rc) 13 | call mpi_comm_rank(mpi_comm_world, my_id, rc) 14 | 15 | if (ntasks > 64) then 16 | write(error_unit, *) 'Maximum number of tasks is 64!' 17 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 18 | end if 19 | 20 | if (mod(datasize, ntasks) /= 0) then 21 | write(error_unit, *) 'Datasize (64) should be divisible by number of tasks' 22 | call mpi_abort(MPI_COMM_WORLD, -1, rc) 23 | end if 24 | 25 | localsize = datasize / ntasks 26 | allocate(localvector(localsize)) 27 | 28 | localvector = [(i + my_id * localsize, i=1,localsize)] 29 | 30 | call single_reader() 31 | 32 | call ordered_print() 33 | 34 | deallocate(localvector) 35 | call mpi_finalize(rc) 36 | 37 | contains 38 | 39 | subroutine single_reader() 40 | implicit none 41 | 42 | ! TODO: Implement a function that will read the data from a file so that 43 | ! a single process does the file io. Use rank WRITER_ID as the io rank 44 | 45 | end subroutine single_reader 46 | 47 | subroutine ordered_print 48 | implicit none 49 | integer :: task 50 | 51 | do task = 0, ntasks-1 52 | if (my_id == task) then 53 | write(output_unit, '(A,I0,A)', advance='no') 'Task ', & 54 | & my_id, ' received:' 55 | do i = 1, localsize 56 | write(output_unit, '(I3)', advance='no') localvector(i) 57 | end do 58 | write(output_unit,*) ' ' 59 | end if 60 | call mpi_barrier(MPI_COMM_WORLD, rc) 61 | end do 62 | 63 | end subroutine ordered_print 64 | 65 | 66 | end program pario 67 | --------------------------------------------------------------------------------